From a58c56c0c1e13a16484e00619ec7c6d1507ea547 Mon Sep 17 00:00:00 2001 From: Damian Tomczak Date: Thu, 11 Sep 2025 19:28:11 +0000 Subject: [PATCH] feature: force stateless for copy buffer to buffer Related-to: NEO-6075 Signed-off-by: Damian Tomczak --- .../command_queue/enqueue_read_buffer.h | 4 +- .../command_queue/enqueue_write_buffer.h | 4 +- .../command_queue_hw_2_tests.cpp | 11 +++- .../enqueue_read_buffer_tests.cpp | 56 ++++++++++++++++++- .../enqueue_write_buffer_tests.cpp | 55 +++++++++++++++++- .../xe_hpc_core/excludes_ocl_xe_hpc_core.cpp | 2 - 6 files changed, 120 insertions(+), 12 deletions(-) diff --git a/opencl/source/command_queue/enqueue_read_buffer.h b/opencl/source/command_queue/enqueue_read_buffer.h index 95e0ecd120..7ae7d7f212 100644 --- a/opencl/source/command_queue/enqueue_read_buffer.h +++ b/opencl/source/command_queue/enqueue_read_buffer.h @@ -87,9 +87,9 @@ cl_int CommandQueueHw::enqueueReadBufferImpl( numEventsInWaitList, eventWaitList, event); } - const bool useStateless = forceStateless(buffer->getSize()); + const bool isStateless = isForceStateless || forceStateless(buffer->getSize()); const bool useHeapless = this->getHeaplessModeEnabled(); - auto builtInType = EBuiltInOps::adjustBuiltinType(useStateless, useHeapless); + auto builtInType = EBuiltInOps::adjustBuiltinType(isStateless, useHeapless); void *dstPtr = ptr; diff --git a/opencl/source/command_queue/enqueue_write_buffer.h b/opencl/source/command_queue/enqueue_write_buffer.h index 0458a6fe9f..37a404a38a 100644 --- a/opencl/source/command_queue/enqueue_write_buffer.h +++ b/opencl/source/command_queue/enqueue_write_buffer.h @@ -80,9 +80,9 @@ cl_int CommandQueueHw::enqueueWriteBufferImpl( numEventsInWaitList, eventWaitList, event); } - const bool useStateless = forceStateless(buffer->getSize()); + const bool isStateless = isForceStateless || forceStateless(buffer->getSize()); const bool useHeapless = this->getHeaplessModeEnabled(); - auto builtInType = EBuiltInOps::adjustBuiltinType(useStateless, useHeapless); + auto builtInType = EBuiltInOps::adjustBuiltinType(isStateless, useHeapless); void *srcPtr = const_cast(ptr); diff --git a/opencl/test/unit_test/command_queue/command_queue_hw_2_tests.cpp b/opencl/test/unit_test/command_queue/command_queue_hw_2_tests.cpp index d701a4e091..6d99c5c2eb 100644 --- a/opencl/test/unit_test/command_queue/command_queue_hw_2_tests.cpp +++ b/opencl/test/unit_test/command_queue/command_queue_hw_2_tests.cpp @@ -137,9 +137,16 @@ struct BuiltinParamsCommandQueueHwTests : public CommandQueueHwTest { HWTEST_F(BuiltinParamsCommandQueueHwTests, givenEnqueueReadWriteBufferCallWhenBuiltinParamsArePassedThenCheckValuesCorectness) { + auto builtInType = EBuiltInOps::copyBufferToBuffer; + auto &compilerProductHelper = pDevice->getCompilerProductHelper(); - auto builtIn = compilerProductHelper.isHeaplessModeEnabled(*defaultHwInfo) ? EBuiltInOps::copyBufferToBufferStatelessHeapless : EBuiltInOps::copyBufferToBuffer; - setUpImpl(builtIn); + if (compilerProductHelper.isHeaplessModeEnabled(*defaultHwInfo)) { + builtInType = EBuiltInOps::copyBufferToBufferStatelessHeapless; + } else if (compilerProductHelper.isForceToStatelessRequired()) { + builtInType = EBuiltInOps::copyBufferToBufferStateless; + } + + setUpImpl(builtInType); BufferDefaults::context = context; auto buffer = clUniquePtr(BufferHelper<>::create()); diff --git a/opencl/test/unit_test/command_queue/enqueue_read_buffer_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_read_buffer_tests.cpp index 46a39e35e1..b95863df08 100644 --- a/opencl/test/unit_test/command_queue/enqueue_read_buffer_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_read_buffer_tests.cpp @@ -10,6 +10,7 @@ #include "shared/source/helpers/local_memory_access_modes.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/unit_test_helper.h" +#include "shared/test/common/mocks/mock_builtins.h" #include "shared/test/common/mocks/mock_execution_environment.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/common/utilities/base_object_utils.h" @@ -20,6 +21,7 @@ #include "opencl/test/unit_test/command_queue/enqueue_read_buffer_fixture.h" #include "opencl/test/unit_test/gen_common/gen_commands_common_validation.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" +#include "opencl/test/unit_test/mocks/mock_builder.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" using namespace NEO; @@ -153,7 +155,14 @@ HWTEST_F(EnqueueReadBufferTypeTest, WhenReadingBufferThenIndirectDataIsAdded) { srcBuffer->forceDisallowCPUCopy = true; EnqueueReadBufferHelper<>::enqueueReadBuffer(pCmdQ, srcBuffer.get(), CL_TRUE); - auto builtInType = pCmdQ->getHeaplessModeEnabled() ? EBuiltInOps::copyBufferToBufferStatelessHeapless : EBuiltInOps::copyBufferToBuffer; + auto builtInType = EBuiltInOps::copyBufferToBuffer; + + if (pCmdQ->getHeaplessModeEnabled()) { + builtInType = EBuiltInOps::copyBufferToBufferStatelessHeapless; + } else if (static_cast *>(pCmdQ)->isForceStateless) { + builtInType = EBuiltInOps::copyBufferToBufferStateless; + } + auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(builtInType, pCmdQ->getClDevice()); ASSERT_NE(nullptr, &builder); @@ -800,7 +809,7 @@ HWTEST_F(EnqueueReadBufferStatelessTest, WhenReadingBufferStatelessThenSuccessIs using EnqueueReadBufferStatefulTest = EnqueueReadBufferHw; -HWTEST_F(EnqueueReadBufferStatefulTest, WhenReadingBufferStatefulThenSuccessIsReturned) { +HWTEST2_F(EnqueueReadBufferStatefulTest, WhenReadingBufferStatefulThenSuccessIsReturned, IsStatefulBufferPreferredForProduct) { auto pCmdQ = std::make_unique>(context.get(), device.get()); if (pCmdQ->getHeaplessModeEnabled()) { @@ -1006,4 +1015,47 @@ HWTEST_F(ReadBufferStagingBufferTest, whenIsValidForStagingTransferCalledAndCpuC unsigned char ptr[16]; EXPECT_FALSE(mockCommandQueueHw.isValidForStagingTransfer(&buffer, ptr, 16, CL_COMMAND_READ_BUFFER, true, false)); +} + +HWTEST_F(EnqueueReadBufferTypeTest, given4gbBufferAndIsForceStatelessIsFalseWhenEnqueueReadBufferCallThenStatelessIsUsed) { + struct FourGbMockBuffer : MockBuffer { + size_t getSize() const override { return static_cast(4ull * MemoryConstants::gigaByte); } + }; + + if (is32bit) { + GTEST_SKIP(); + } + + auto mockCmdQ = static_cast *>(pCmdQ); + mockCmdQ->isForceStateless = false; + + EBuiltInOps::Type copyBuiltIn = EBuiltInOps::adjustBuiltinType(true, pCmdQ->getHeaplessModeEnabled()); + + auto builtIns = new MockBuiltins(); + MockRootDeviceEnvironment::resetBuiltins(pCmdQ->getDevice().getExecutionEnvironment()->rootDeviceEnvironments[pCmdQ->getDevice().getRootDeviceIndex()].get(), builtIns); + + // substitute original builder with mock builder + auto oldBuilder = pClExecutionEnvironment->setBuiltinDispatchInfoBuilder( + rootDeviceIndex, + copyBuiltIn, + std::unique_ptr(new MockBuilder(*builtIns, pCmdQ->getClDevice()))); + + FourGbMockBuffer buffer; + + auto mockBuilder = static_cast(&BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder( + copyBuiltIn, + *pClDevice)); + + EXPECT_FALSE(mockBuilder->wasBuildDispatchInfosWithBuiltinOpParamsCalled); + + EnqueueReadBufferHelper<>::enqueueReadBuffer(pCmdQ, &buffer); + + EXPECT_TRUE(mockBuilder->wasBuildDispatchInfosWithBuiltinOpParamsCalled); + + // restore original builder and retrieve mock builder + auto newBuilder = pClExecutionEnvironment->setBuiltinDispatchInfoBuilder( + rootDeviceIndex, + copyBuiltIn, + std::move(oldBuilder)); + EXPECT_EQ(mockBuilder, newBuilder.get()); } \ No newline at end of file diff --git a/opencl/test/unit_test/command_queue/enqueue_write_buffer_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_write_buffer_tests.cpp index d9eb562b1f..6b1f848337 100644 --- a/opencl/test/unit_test/command_queue/enqueue_write_buffer_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_write_buffer_tests.cpp @@ -10,6 +10,7 @@ #include "shared/source/helpers/local_memory_access_modes.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/unit_test_helper.h" +#include "shared/test/common/mocks/mock_builtins.h" #include "shared/test/common/mocks/mock_execution_environment.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/common/utilities/base_object_utils.h" @@ -20,6 +21,7 @@ #include "opencl/test/unit_test/command_queue/enqueue_fixture.h" #include "opencl/test/unit_test/gen_common/gen_commands_common_validation.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" +#include "opencl/test/unit_test/mocks/mock_builder.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" using namespace NEO; @@ -158,10 +160,16 @@ HWTEST_F(EnqueueWriteBufferTypeTest, WhenWritingBufferThenIndirectDataIsAdded) { srcBuffer->forceDisallowCPUCopy = true; EnqueueWriteBufferHelper<>::enqueueWriteBuffer(pCmdQ, srcBuffer.get(), EnqueueWriteBufferTraits::blocking); + auto builtInType = EBuiltInOps::copyBufferToBuffer; + + if (static_cast *>(pCmdQ)->isForceStateless) { + builtInType = EBuiltInOps::copyBufferToBufferStateless; + } + auto &compilerProductHelper = pDevice->getCompilerProductHelper(); auto heaplessEnabled = compilerProductHelper.isHeaplessModeEnabled(*defaultHwInfo); - auto builtInType = adjustBuiltInType(heaplessEnabled, EBuiltInOps::copyBufferToBuffer); + builtInType = adjustBuiltInType(heaplessEnabled, builtInType); auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(builtInType, pCmdQ->getClDevice()); @@ -580,7 +588,7 @@ HWTEST_F(EnqueueReadWriteStatelessTest, WhenWritingBufferStatelessThenSuccessIsR using EnqueueWriteBufferStatefulTest = EnqueueWriteBufferHw; -HWTEST_F(EnqueueWriteBufferStatefulTest, WhenWritingBufferStatefulThenSuccessIsReturned) { +HWTEST2_F(EnqueueWriteBufferStatefulTest, WhenWritingBufferStatefulThenSuccessIsReturned, IsStatefulBufferPreferredForProduct) { auto pCmdQ = std::make_unique>(context.get(), device.get()); if (pCmdQ->getHeaplessModeEnabled()) { @@ -786,4 +794,47 @@ HWTEST_F(WriteBufferStagingBufferTest, whenIsValidForStagingTransferCalledAndCpu unsigned char ptr[16]; EXPECT_FALSE(mockCommandQueueHw.isValidForStagingTransfer(&buffer, ptr, 16, CL_COMMAND_WRITE_BUFFER, true, false)); +} + +HWTEST_F(EnqueueWriteBufferTypeTest, given4gbBufferAndIsForceStatelessIsFalseWhenEnqueueWriteBufferCallThenStatelessIsUsed) { + struct FourGbMockBuffer : MockBuffer { + size_t getSize() const override { return static_cast(4ull * MemoryConstants::gigaByte); } + }; + + if (is32bit) { + GTEST_SKIP(); + } + + auto mockCmdQ = static_cast *>(pCmdQ); + mockCmdQ->isForceStateless = false; + + EBuiltInOps::Type copyBuiltIn = EBuiltInOps::adjustBuiltinType(true, pCmdQ->getHeaplessModeEnabled()); + + auto builtIns = new MockBuiltins(); + MockRootDeviceEnvironment::resetBuiltins(pCmdQ->getDevice().getExecutionEnvironment()->rootDeviceEnvironments[pCmdQ->getDevice().getRootDeviceIndex()].get(), builtIns); + + // substitute original builder with mock builder + auto oldBuilder = pClExecutionEnvironment->setBuiltinDispatchInfoBuilder( + rootDeviceIndex, + copyBuiltIn, + std::unique_ptr(new MockBuilder(*builtIns, pCmdQ->getClDevice()))); + + FourGbMockBuffer buffer; + + auto mockBuilder = static_cast(&BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder( + copyBuiltIn, + *pClDevice)); + + EXPECT_FALSE(mockBuilder->wasBuildDispatchInfosWithBuiltinOpParamsCalled); + + EnqueueWriteBufferHelper<>::enqueueWriteBuffer(pCmdQ, &buffer); + + EXPECT_TRUE(mockBuilder->wasBuildDispatchInfosWithBuiltinOpParamsCalled); + + // restore original builder and retrieve mock builder + auto newBuilder = pClExecutionEnvironment->setBuiltinDispatchInfoBuilder( + rootDeviceIndex, + copyBuiltIn, + std::move(oldBuilder)); + EXPECT_EQ(mockBuilder, newBuilder.get()); } \ No newline at end of file diff --git a/opencl/test/unit_test/xe_hpc_core/excludes_ocl_xe_hpc_core.cpp b/opencl/test/unit_test/xe_hpc_core/excludes_ocl_xe_hpc_core.cpp index 74668f8ae0..fa6afb1d42 100644 --- a/opencl/test/unit_test/xe_hpc_core/excludes_ocl_xe_hpc_core.cpp +++ b/opencl/test/unit_test/xe_hpc_core/excludes_ocl_xe_hpc_core.cpp @@ -20,6 +20,4 @@ HWTEST_EXCLUDE_PRODUCT(DeviceGetCapsTest, givenDeviceWhenAskingForSubGroupSizesT HWTEST_EXCLUDE_PRODUCT(ClGfxCoreHelperTest, givenKernelInfoWhenCheckingRequiresAuxResolvesThenCorrectValuesAreReturned, IGFX_XE_HPC_CORE); HWTEST_EXCLUDE_PRODUCT(EnqueueSvmMemCopyHwTest, givenEnqueueSVMMemCopyWhenUsingCopyBufferToBufferStatefulBuilderThenSuccessIsReturned, IGFX_XE_HPC_CORE); HWTEST_EXCLUDE_PRODUCT(EnqueueSvmMemFillHwTest, givenEnqueueSVMMemFillWhenUsingCopyBufferToLocalBufferStatefulBuilderThenSuccessIsReturned, IGFX_XE_HPC_CORE); -HWTEST_EXCLUDE_PRODUCT(EnqueueWriteBufferStatefulTest, WhenWritingBufferStatefulThenSuccessIsReturned, IGFX_XE_HPC_CORE); -HWTEST_EXCLUDE_PRODUCT(EnqueueReadBufferStatefulTest, WhenReadingBufferStatefulThenSuccessIsReturned, IGFX_XE_HPC_CORE); HWTEST_EXCLUDE_PRODUCT(CommandStreamReceiverFlushTaskTests, givenOverrideThreadArbitrationPolicyDebugVariableSetWhenFlushingThenRequestRequiredMode, IGFX_XE_HPC_CORE);