diff --git a/opencl/source/command_queue/command_queue.cpp b/opencl/source/command_queue/command_queue.cpp index 503b6c2342..c12db990f6 100644 --- a/opencl/source/command_queue/command_queue.cpp +++ b/opencl/source/command_queue/command_queue.cpp @@ -27,6 +27,7 @@ #include "opencl/source/event/event_builder.h" #include "opencl/source/event/user_event.h" #include "opencl/source/gtpin/gtpin_notify.h" +#include "opencl/source/helpers/cl_hw_helper.h" #include "opencl/source/helpers/convert_color.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/helpers/mipmap.h" @@ -711,14 +712,27 @@ bool CommandQueue::blitEnqueueAllowed(cl_command_type cmdType) const { } bool CommandQueue::blitEnqueuePreferred(cl_command_type cmdType, const BuiltinOpParams &builtinOpParams) const { - if (cmdType == CL_COMMAND_COPY_BUFFER) { - return DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.get() == 1; + bool isLocalToLocal = false; + + if (cmdType == CL_COMMAND_COPY_BUFFER && + builtinOpParams.srcMemObj->getGraphicsAllocation(device->getRootDeviceIndex())->isAllocatedInLocalMemoryPool() && + builtinOpParams.dstMemObj->getGraphicsAllocation(device->getRootDeviceIndex())->isAllocatedInLocalMemoryPool()) { + isLocalToLocal = true; } if (cmdType == CL_COMMAND_SVM_MEMCPY && builtinOpParams.srcSvmAlloc->isAllocatedInLocalMemoryPool() && builtinOpParams.dstSvmAlloc->isAllocatedInLocalMemoryPool()) { - return DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.get() == 1; + isLocalToLocal = true; } + + if (isLocalToLocal) { + if (DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.get() != -1) { + return static_cast(DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.get()); + } + const auto &clHwHelper = ClHwHelper::get(device->getHardwareInfo().platform.eRenderCoreFamily); + return clHwHelper.preferBlitterForLocalToLocalTransfers(); + } + return true; } diff --git a/opencl/source/helpers/cl_hw_helper.h b/opencl/source/helpers/cl_hw_helper.h index 17445e6e96..7864844b3b 100644 --- a/opencl/source/helpers/cl_hw_helper.h +++ b/opencl/source/helpers/cl_hw_helper.h @@ -30,6 +30,7 @@ class ClHwHelper { virtual cl_command_queue_capabilities_intel getAdditionalDisabledQueueFamilyCapabilities(EngineGroupType type) const = 0; virtual bool getQueueFamilyName(std::string &name, EngineGroupType type) const = 0; virtual cl_ulong getKernelPrivateMemSize(const KernelInfo &kernelInfo) const = 0; + virtual bool preferBlitterForLocalToLocalTransfers() const = 0; protected: virtual bool hasStatelessAccessToBuffer(const KernelInfo &kernelInfo) const = 0; @@ -51,6 +52,7 @@ class ClHwHelperHw : public ClHwHelper { cl_command_queue_capabilities_intel getAdditionalDisabledQueueFamilyCapabilities(EngineGroupType type) const override; bool getQueueFamilyName(std::string &name, EngineGroupType type) const override; cl_ulong getKernelPrivateMemSize(const KernelInfo &kernelInfo) const override; + bool preferBlitterForLocalToLocalTransfers() const override; protected: bool hasStatelessAccessToBuffer(const KernelInfo &kernelInfo) const override; diff --git a/opencl/source/helpers/cl_hw_helper_base.inl b/opencl/source/helpers/cl_hw_helper_base.inl index 4783c91806..0a49bb7de0 100644 --- a/opencl/source/helpers/cl_hw_helper_base.inl +++ b/opencl/source/helpers/cl_hw_helper_base.inl @@ -47,4 +47,9 @@ inline bool ClHwHelperHw::getQueueFamilyName(std::string &name, Engin return false; } +template +inline bool ClHwHelperHw::preferBlitterForLocalToLocalTransfers() const { + return false; +} + } // namespace NEO diff --git a/opencl/test/unit_test/command_queue/command_queue_tests.cpp b/opencl/test/unit_test/command_queue/command_queue_tests.cpp index b5e310c5bf..51621437fc 100644 --- a/opencl/test/unit_test/command_queue/command_queue_tests.cpp +++ b/opencl/test/unit_test/command_queue/command_queue_tests.cpp @@ -18,6 +18,7 @@ #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/event/event.h" #include "opencl/source/event/user_event.h" +#include "opencl/source/helpers/cl_hw_helper.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/test/unit_test/command_queue/command_queue_fixture.h" #include "opencl/test/unit_test/command_stream/command_stream_fixture.h" @@ -1211,21 +1212,62 @@ TEST(CommandQueue, givenRegularClCommandWhenCallingBlitEnqueuePreferredThenRetur EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_COPY_IMAGE, builtinOpParams)); } -TEST(CommandQueue, givenCopyBufferCommandWhenCallingBlitEnqueuePreferredThenReturnValueBasedOnDebugFlag) { +TEST(CommandQueue, givenLocalToLocalCopyBufferCommandWhenCallingBlitEnqueuePreferredThenReturnValueBasedOnDebugFlagAndHwPreference) { + const bool preferBlitterHw = ClHwHelper::get(::defaultHwInfo->platform.eRenderCoreFamily).preferBlitterForLocalToLocalTransfers(); DebugManagerStateRestore restore{}; MockContext context{}; MockCommandQueue queue{context}; BuiltinOpParams builtinOpParams{}; + MockGraphicsAllocation srcGraphicsAllocation{}; + MockGraphicsAllocation dstGraphicsAllocation{}; + MockBuffer srcMemObj{srcGraphicsAllocation}; + MockBuffer dstMemObj{dstGraphicsAllocation}; + builtinOpParams.srcMemObj = &srcMemObj; + builtinOpParams.dstMemObj = &dstMemObj; + srcGraphicsAllocation.memoryPool = MemoryPool::LocalMemory; + dstGraphicsAllocation.memoryPool = MemoryPool::LocalMemory; DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(-1); - EXPECT_FALSE(queue.blitEnqueuePreferred(CL_COMMAND_COPY_BUFFER, builtinOpParams)); + EXPECT_EQ(preferBlitterHw, queue.blitEnqueuePreferred(CL_COMMAND_COPY_BUFFER, builtinOpParams)); DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(0); EXPECT_FALSE(queue.blitEnqueuePreferred(CL_COMMAND_COPY_BUFFER, builtinOpParams)); DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(1); EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_COPY_BUFFER, builtinOpParams)); } -TEST(CommandQueue, givenLocalToLocalSvmCopyCommandWhenCallingBlitEnqueuePreferredThenReturnValueBasedOnDebugFlag) { +TEST(CommandQueue, givenNotLocalToLocalCopyBufferCommandWhenCallingBlitEnqueuePreferredThenReturnTrueRegardlessOfDebugFlag) { + DebugManagerStateRestore restore{}; + MockContext context{}; + MockCommandQueue queue{context}; + BuiltinOpParams builtinOpParams{}; + MockGraphicsAllocation srcGraphicsAllocation{}; + MockGraphicsAllocation dstGraphicsAllocation{}; + MockBuffer srcMemObj{srcGraphicsAllocation}; + MockBuffer dstMemObj{dstGraphicsAllocation}; + builtinOpParams.srcMemObj = &srcMemObj; + builtinOpParams.dstMemObj = &dstMemObj; + + srcGraphicsAllocation.memoryPool = MemoryPool::System4KBPages; + dstGraphicsAllocation.memoryPool = MemoryPool::LocalMemory; + DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(-1); + EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_COPY_BUFFER, builtinOpParams)); + DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(0); + EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_COPY_BUFFER, builtinOpParams)); + DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(1); + EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_COPY_BUFFER, builtinOpParams)); + + srcGraphicsAllocation.memoryPool = MemoryPool::LocalMemory; + dstGraphicsAllocation.memoryPool = MemoryPool::System4KBPages; + DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(-1); + EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_COPY_BUFFER, builtinOpParams)); + DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(0); + EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_COPY_BUFFER, builtinOpParams)); + DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(1); + EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_COPY_BUFFER, builtinOpParams)); +} + +TEST(CommandQueue, givenLocalToLocalSvmCopyCommandWhenCallingBlitEnqueuePreferredThenReturnValueBasedOnDebugFlagAndHwPreference) { + const bool preferBlitterHw = ClHwHelper::get(::defaultHwInfo->platform.eRenderCoreFamily).preferBlitterForLocalToLocalTransfers(); DebugManagerStateRestore restore{}; MockContext context{}; MockCommandQueue queue{context}; @@ -1238,7 +1280,7 @@ TEST(CommandQueue, givenLocalToLocalSvmCopyCommandWhenCallingBlitEnqueuePreferre srcSvmAlloc.memoryPool = MemoryPool::LocalMemory; dstSvmAlloc.memoryPool = MemoryPool::LocalMemory; DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(-1); - EXPECT_FALSE(queue.blitEnqueuePreferred(CL_COMMAND_SVM_MEMCPY, builtinOpParams)); + EXPECT_EQ(preferBlitterHw, queue.blitEnqueuePreferred(CL_COMMAND_SVM_MEMCPY, builtinOpParams)); DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(0); EXPECT_FALSE(queue.blitEnqueuePreferred(CL_COMMAND_SVM_MEMCPY, builtinOpParams)); DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(1); diff --git a/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp b/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp index e4176c7a01..c01077518f 100644 --- a/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp +++ b/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp @@ -14,8 +14,10 @@ #include "opencl/source/api/api.h" #include "opencl/source/event/user_event.h" #include "opencl/source/helpers/cl_blit_properties.h" +#include "opencl/source/helpers/cl_hw_helper.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/helpers/unit_test_helper.h" +#include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_memory_manager.h" @@ -1329,25 +1331,35 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockedEnqueueWhenUsingBcsThenWaitForVal EXPECT_EQ(1u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled); } -HWTEST_TEMPLATED_F(BcsBufferTests, givenDebugFlagSetToOneWhenEnqueueingCopyBufferToBufferThenUseBlitter) { +HWTEST_TEMPLATED_F(BcsBufferTests, givenDebugFlagSetToOneWhenEnqueueingCopyLocalBufferToLocalBufferThenUseBlitter) { auto bcsCsr = static_cast *>(commandQueue->getBcsCommandStreamReceiver()); - auto bufferForBlt0 = clUniquePtr(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); - auto bufferForBlt1 = clUniquePtr(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); + MockGraphicsAllocation srcGraphicsAllocation{}; + MockGraphicsAllocation dstGraphicsAllocation{}; + MockBuffer srcMemObj{srcGraphicsAllocation}; + MockBuffer dstMemObj{dstGraphicsAllocation}; + srcGraphicsAllocation.memoryPool = MemoryPool::LocalMemory; + dstGraphicsAllocation.memoryPool = MemoryPool::LocalMemory; + const bool preferBlitterHw = ClHwHelper::get(::defaultHwInfo->platform.eRenderCoreFamily).preferBlitterForLocalToLocalTransfers(); + uint32_t expectedBlitBufferCalled = 0; DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(-1); - EXPECT_EQ(0u, bcsCsr->blitBufferCalled); - commandQueue->enqueueCopyBuffer(bufferForBlt0.get(), bufferForBlt1.get(), 0, 1, 1, 0, nullptr, nullptr); - EXPECT_EQ(0u, bcsCsr->blitBufferCalled); + EXPECT_EQ(expectedBlitBufferCalled, bcsCsr->blitBufferCalled); + commandQueue->enqueueCopyBuffer(&srcMemObj, &dstMemObj, 0, 1, 1, 0, nullptr, nullptr); + if (preferBlitterHw) { + expectedBlitBufferCalled++; + } + EXPECT_EQ(expectedBlitBufferCalled, bcsCsr->blitBufferCalled); DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(0); - EXPECT_EQ(0u, bcsCsr->blitBufferCalled); - commandQueue->enqueueCopyBuffer(bufferForBlt0.get(), bufferForBlt1.get(), 0, 1, 1, 0, nullptr, nullptr); - EXPECT_EQ(0u, bcsCsr->blitBufferCalled); + EXPECT_EQ(expectedBlitBufferCalled, bcsCsr->blitBufferCalled); + commandQueue->enqueueCopyBuffer(&srcMemObj, &dstMemObj, 0, 1, 1, 0, nullptr, nullptr); + EXPECT_EQ(expectedBlitBufferCalled, bcsCsr->blitBufferCalled); DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(1); - EXPECT_EQ(0u, bcsCsr->blitBufferCalled); - commandQueue->enqueueCopyBuffer(bufferForBlt0.get(), bufferForBlt1.get(), 0, 1, 1, 0, nullptr, nullptr); - EXPECT_EQ(1u, bcsCsr->blitBufferCalled); + EXPECT_EQ(expectedBlitBufferCalled, bcsCsr->blitBufferCalled); + commandQueue->enqueueCopyBuffer(&srcMemObj, &dstMemObj, 0, 1, 1, 0, nullptr, nullptr); + expectedBlitBufferCalled++; + EXPECT_EQ(expectedBlitBufferCalled, bcsCsr->blitBufferCalled); } HWTEST_TEMPLATED_F(BcsBufferTests, givenBcsQueueWhenEnqueueingCopyBufferToBufferThenUseBlitterRegardlessOfPreference) { @@ -1362,21 +1374,25 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBcsQueueWhenEnqueueingCopyBufferToBuffer }; MockCommandQueueHw queue(bcsMockContext.get(), device.get(), properties); auto bcsCsr = static_cast *>(queue.getBcsCommandStreamReceiver()); - auto bufferForBlt0 = clUniquePtr(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); - auto bufferForBlt1 = clUniquePtr(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); + MockGraphicsAllocation srcGraphicsAllocation{}; + MockGraphicsAllocation dstGraphicsAllocation{}; + MockBuffer srcMemObj{srcGraphicsAllocation}; + MockBuffer dstMemObj{dstGraphicsAllocation}; + srcGraphicsAllocation.memoryPool = MemoryPool::LocalMemory; + dstGraphicsAllocation.memoryPool = MemoryPool::LocalMemory; DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(-1); EXPECT_EQ(0u, bcsCsr->blitBufferCalled); - queue.enqueueCopyBuffer(bufferForBlt0.get(), bufferForBlt1.get(), 0, 1, 1, 0, nullptr, nullptr); + queue.enqueueCopyBuffer(&srcMemObj, &dstMemObj, 0, 1, 1, 0, nullptr, nullptr); EXPECT_EQ(1u, bcsCsr->blitBufferCalled); DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(0); EXPECT_EQ(1u, bcsCsr->blitBufferCalled); - queue.enqueueCopyBuffer(bufferForBlt0.get(), bufferForBlt1.get(), 0, 1, 1, 0, nullptr, nullptr); + queue.enqueueCopyBuffer(&srcMemObj, &dstMemObj, 0, 1, 1, 0, nullptr, nullptr); EXPECT_EQ(2u, bcsCsr->blitBufferCalled); DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(1); EXPECT_EQ(2u, bcsCsr->blitBufferCalled); - queue.enqueueCopyBuffer(bufferForBlt0.get(), bufferForBlt1.get(), 0, 1, 1, 0, nullptr, nullptr); + queue.enqueueCopyBuffer(&srcMemObj, &dstMemObj, 0, 1, 1, 0, nullptr, nullptr); EXPECT_EQ(3u, bcsCsr->blitBufferCalled); }