feature: force stateless for fill buffer

Related-to: NEO-6075

Signed-off-by: Damian Tomczak <damian.tomczak@intel.com>
This commit is contained in:
Damian Tomczak
2025-09-10 21:28:13 +00:00
committed by Compute-Runtime-Automation
parent fe8cb28efe
commit 33db4ed0de
3 changed files with 56 additions and 5 deletions

View File

@@ -52,9 +52,9 @@ cl_int CommandQueueHw<GfxFamily>::enqueueFillBuffer(
memcpy_s(patternAllocation->getUnderlyingBuffer(), patternSize, pattern, patternSize);
}
const bool useStateless = forceStateless(buffer->getSize());
const bool isStateless = isForceStateless || forceStateless(buffer->getSize());
const bool useHeapless = this->getHeaplessModeEnabled();
auto builtInType = EBuiltInOps::adjustBuiltinType<EBuiltInOps::fillBuffer>(useStateless, useHeapless);
auto builtInType = EBuiltInOps::adjustBuiltinType<EBuiltInOps::fillBuffer>(isStateless, useHeapless);
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(builtInType,
this->getClDevice());

View File

@@ -16,6 +16,7 @@
#include "shared/test/common/helpers/gtest_helpers.h"
#include "shared/test/common/helpers/unit_test_helper.h"
#include "shared/test/common/mocks/mock_allocation_properties.h"
#include "shared/test/common/mocks/mock_builtins.h"
#include "shared/test/common/test_macros/test.h"
#include "opencl/source/built_ins/builtins_dispatch_builder.h"
@@ -25,6 +26,7 @@
#include "opencl/test/unit_test/command_queue/enqueue_fixture.h"
#include "opencl/test/unit_test/gen_common/gen_commands_common_validation.h"
#include "opencl/test/unit_test/mocks/mock_buffer.h"
#include "opencl/test/unit_test/mocks/mock_builder.h"
using namespace NEO;
@@ -97,7 +99,14 @@ HWTEST_F(EnqueueFillBufferCmdTests, WhenFillingBufferThenIndirectDataGetsAdded)
EnqueueFillBufferHelper<>::enqueueFillBuffer(pCmdQ, buffer);
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(adjustBuiltInType(EBuiltInOps::fillBuffer),
auto builtInType = EBuiltInOps::fillBuffer;
auto &compilerProductHelper = pDevice->getCompilerProductHelper();
if (compilerProductHelper.isForceToStatelessRequired()) {
builtInType = EBuiltInOps::fillBufferStateless;
}
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(adjustBuiltInType(builtInType),
pCmdQ->getClDevice());
ASSERT_NE(nullptr, &builder);
@@ -673,7 +682,7 @@ HWTEST_F(EnqueueFillBufferStatelessTest, givenBuffersWhenFillingBufferStatelessT
using EnqueueFillBufferStatefulTest = EnqueueFillBufferHw;
HWTEST_F(EnqueueFillBufferStatefulTest, givenBuffersWhenFillingBufferStatefulThenSuccessIsReturned) {
HWTEST2_F(EnqueueFillBufferStatefulTest, givenBuffersWhenFillingBufferStatefulThenSuccessIsReturned, IsStatefulBufferPreferredForProduct) {
auto pCmdQ = std::make_unique<CommandQueueStateful<FamilyType>>(context.get(), device.get());
if (pCmdQ->getHeaplessModeEnabled()) {
GTEST_SKIP();
@@ -691,3 +700,46 @@ HWTEST_F(EnqueueFillBufferStatefulTest, givenBuffersWhenFillingBufferStatefulThe
ASSERT_EQ(CL_SUCCESS, retVal);
}
HWTEST_F(EnqueueFillBufferCmdTests, given4gbBufferAndIsForceStatelessIsFalseWhenEnqueueFillBufferCallThenStatelessIsUsed) {
struct FourGbMockBuffer : MockBuffer {
size_t getSize() const override { return static_cast<size_t>(4ull * MemoryConstants::gigaByte); }
};
if (is32bit) {
GTEST_SKIP();
}
auto mockCmdQ = static_cast<MockCommandQueueHw<FamilyType> *>(pCmdQ);
mockCmdQ->isForceStateless = false;
EBuiltInOps::Type copyBuiltIn = EBuiltInOps::adjustBuiltinType<EBuiltInOps::fillBuffer>(true, pCmdQ->getHeaplessModeEnabled());
auto builtIns = new MockBuiltins();
MockRootDeviceEnvironment::resetBuiltins(pCmdQ->getDevice().getExecutionEnvironment()->rootDeviceEnvironments[pCmdQ->getDevice().getRootDeviceIndex()].get(), builtIns);
// substitute original builder with mock builder
auto oldBuilder = pClExecutionEnvironment->setBuiltinDispatchInfoBuilder(
rootDeviceIndex,
copyBuiltIn,
std::unique_ptr<NEO::BuiltinDispatchInfoBuilder>(new MockBuilder(*builtIns, pCmdQ->getClDevice())));
FourGbMockBuffer buffer;
auto mockBuilder = static_cast<MockBuilder *>(&BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(
copyBuiltIn,
*pClDevice));
EXPECT_FALSE(mockBuilder->wasBuildDispatchInfosWithBuiltinOpParamsCalled);
EnqueueFillBufferHelper<>::enqueueFillBuffer(pCmdQ, &buffer);
EXPECT_TRUE(mockBuilder->wasBuildDispatchInfosWithBuiltinOpParamsCalled);
// restore original builder and retrieve mock builder
auto newBuilder = pClExecutionEnvironment->setBuiltinDispatchInfoBuilder(
rootDeviceIndex,
copyBuiltIn,
std::move(oldBuilder));
EXPECT_EQ(mockBuilder, newBuilder.get());
}

View File

@@ -20,7 +20,6 @@ HWTEST_EXCLUDE_PRODUCT(DeviceGetCapsTest, givenDeviceWhenAskingForSubGroupSizesT
HWTEST_EXCLUDE_PRODUCT(ClGfxCoreHelperTest, givenKernelInfoWhenCheckingRequiresAuxResolvesThenCorrectValuesAreReturned, IGFX_XE_HPC_CORE);
HWTEST_EXCLUDE_PRODUCT(EnqueueSvmMemCopyHwTest, givenEnqueueSVMMemCopyWhenUsingCopyBufferToBufferStatefulBuilderThenSuccessIsReturned, IGFX_XE_HPC_CORE);
HWTEST_EXCLUDE_PRODUCT(EnqueueSvmMemFillHwTest, givenEnqueueSVMMemFillWhenUsingCopyBufferToLocalBufferStatefulBuilderThenSuccessIsReturned, IGFX_XE_HPC_CORE);
HWTEST_EXCLUDE_PRODUCT(EnqueueFillBufferStatefulTest, givenBuffersWhenFillingBufferStatefulThenSuccessIsReturned, IGFX_XE_HPC_CORE);
HWTEST_EXCLUDE_PRODUCT(EnqueueWriteBufferStatefulTest, WhenWritingBufferStatefulThenSuccessIsReturned, IGFX_XE_HPC_CORE);
HWTEST_EXCLUDE_PRODUCT(EnqueueReadBufferStatefulTest, WhenReadingBufferStatefulThenSuccessIsReturned, IGFX_XE_HPC_CORE);
HWTEST_EXCLUDE_PRODUCT(CommandStreamReceiverFlushTaskTests, givenOverrideThreadArbitrationPolicyDebugVariableSetWhenFlushingThenRequestRequiredMode, IGFX_XE_HPC_CORE);