Add HW-specific preference for usage of blitter in local->local transfers

Signed-off-by: Maciej Dziuban <maciej.dziuban@intel.com>
Resolves: NEO-5549
This commit is contained in:
Maciej Dziuban 2021-02-12 12:25:51 +00:00 committed by Compute-Runtime-Automation
parent 25c696ae85
commit 6eebf82ec5
5 changed files with 103 additions and 24 deletions

View File

@ -27,6 +27,7 @@
#include "opencl/source/event/event_builder.h" #include "opencl/source/event/event_builder.h"
#include "opencl/source/event/user_event.h" #include "opencl/source/event/user_event.h"
#include "opencl/source/gtpin/gtpin_notify.h" #include "opencl/source/gtpin/gtpin_notify.h"
#include "opencl/source/helpers/cl_hw_helper.h"
#include "opencl/source/helpers/convert_color.h" #include "opencl/source/helpers/convert_color.h"
#include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/helpers/hardware_commands_helper.h"
#include "opencl/source/helpers/mipmap.h" #include "opencl/source/helpers/mipmap.h"
@ -711,14 +712,27 @@ bool CommandQueue::blitEnqueueAllowed(cl_command_type cmdType) const {
} }
bool CommandQueue::blitEnqueuePreferred(cl_command_type cmdType, const BuiltinOpParams &builtinOpParams) const { bool CommandQueue::blitEnqueuePreferred(cl_command_type cmdType, const BuiltinOpParams &builtinOpParams) const {
if (cmdType == CL_COMMAND_COPY_BUFFER) { bool isLocalToLocal = false;
return DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.get() == 1;
if (cmdType == CL_COMMAND_COPY_BUFFER &&
builtinOpParams.srcMemObj->getGraphicsAllocation(device->getRootDeviceIndex())->isAllocatedInLocalMemoryPool() &&
builtinOpParams.dstMemObj->getGraphicsAllocation(device->getRootDeviceIndex())->isAllocatedInLocalMemoryPool()) {
isLocalToLocal = true;
} }
if (cmdType == CL_COMMAND_SVM_MEMCPY && if (cmdType == CL_COMMAND_SVM_MEMCPY &&
builtinOpParams.srcSvmAlloc->isAllocatedInLocalMemoryPool() && builtinOpParams.srcSvmAlloc->isAllocatedInLocalMemoryPool() &&
builtinOpParams.dstSvmAlloc->isAllocatedInLocalMemoryPool()) { builtinOpParams.dstSvmAlloc->isAllocatedInLocalMemoryPool()) {
return DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.get() == 1; isLocalToLocal = true;
} }
if (isLocalToLocal) {
if (DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.get() != -1) {
return static_cast<bool>(DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.get());
}
const auto &clHwHelper = ClHwHelper::get(device->getHardwareInfo().platform.eRenderCoreFamily);
return clHwHelper.preferBlitterForLocalToLocalTransfers();
}
return true; return true;
} }

View File

@ -30,6 +30,7 @@ class ClHwHelper {
virtual cl_command_queue_capabilities_intel getAdditionalDisabledQueueFamilyCapabilities(EngineGroupType type) const = 0; virtual cl_command_queue_capabilities_intel getAdditionalDisabledQueueFamilyCapabilities(EngineGroupType type) const = 0;
virtual bool getQueueFamilyName(std::string &name, EngineGroupType type) const = 0; virtual bool getQueueFamilyName(std::string &name, EngineGroupType type) const = 0;
virtual cl_ulong getKernelPrivateMemSize(const KernelInfo &kernelInfo) const = 0; virtual cl_ulong getKernelPrivateMemSize(const KernelInfo &kernelInfo) const = 0;
virtual bool preferBlitterForLocalToLocalTransfers() const = 0;
protected: protected:
virtual bool hasStatelessAccessToBuffer(const KernelInfo &kernelInfo) const = 0; virtual bool hasStatelessAccessToBuffer(const KernelInfo &kernelInfo) const = 0;
@ -51,6 +52,7 @@ class ClHwHelperHw : public ClHwHelper {
cl_command_queue_capabilities_intel getAdditionalDisabledQueueFamilyCapabilities(EngineGroupType type) const override; cl_command_queue_capabilities_intel getAdditionalDisabledQueueFamilyCapabilities(EngineGroupType type) const override;
bool getQueueFamilyName(std::string &name, EngineGroupType type) const override; bool getQueueFamilyName(std::string &name, EngineGroupType type) const override;
cl_ulong getKernelPrivateMemSize(const KernelInfo &kernelInfo) const override; cl_ulong getKernelPrivateMemSize(const KernelInfo &kernelInfo) const override;
bool preferBlitterForLocalToLocalTransfers() const override;
protected: protected:
bool hasStatelessAccessToBuffer(const KernelInfo &kernelInfo) const override; bool hasStatelessAccessToBuffer(const KernelInfo &kernelInfo) const override;

View File

@ -47,4 +47,9 @@ inline bool ClHwHelperHw<GfxFamily>::getQueueFamilyName(std::string &name, Engin
return false; return false;
} }
template <typename GfxFamily>
inline bool ClHwHelperHw<GfxFamily>::preferBlitterForLocalToLocalTransfers() const {
return false;
}
} // namespace NEO } // namespace NEO

View File

@ -18,6 +18,7 @@
#include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/command_queue/command_queue_hw.h"
#include "opencl/source/event/event.h" #include "opencl/source/event/event.h"
#include "opencl/source/event/user_event.h" #include "opencl/source/event/user_event.h"
#include "opencl/source/helpers/cl_hw_helper.h"
#include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/helpers/hardware_commands_helper.h"
#include "opencl/test/unit_test/command_queue/command_queue_fixture.h" #include "opencl/test/unit_test/command_queue/command_queue_fixture.h"
#include "opencl/test/unit_test/command_stream/command_stream_fixture.h" #include "opencl/test/unit_test/command_stream/command_stream_fixture.h"
@ -1211,21 +1212,62 @@ TEST(CommandQueue, givenRegularClCommandWhenCallingBlitEnqueuePreferredThenRetur
EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_COPY_IMAGE, builtinOpParams)); EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_COPY_IMAGE, builtinOpParams));
} }
TEST(CommandQueue, givenCopyBufferCommandWhenCallingBlitEnqueuePreferredThenReturnValueBasedOnDebugFlag) { TEST(CommandQueue, givenLocalToLocalCopyBufferCommandWhenCallingBlitEnqueuePreferredThenReturnValueBasedOnDebugFlagAndHwPreference) {
const bool preferBlitterHw = ClHwHelper::get(::defaultHwInfo->platform.eRenderCoreFamily).preferBlitterForLocalToLocalTransfers();
DebugManagerStateRestore restore{}; DebugManagerStateRestore restore{};
MockContext context{}; MockContext context{};
MockCommandQueue queue{context}; MockCommandQueue queue{context};
BuiltinOpParams builtinOpParams{}; BuiltinOpParams builtinOpParams{};
MockGraphicsAllocation srcGraphicsAllocation{};
MockGraphicsAllocation dstGraphicsAllocation{};
MockBuffer srcMemObj{srcGraphicsAllocation};
MockBuffer dstMemObj{dstGraphicsAllocation};
builtinOpParams.srcMemObj = &srcMemObj;
builtinOpParams.dstMemObj = &dstMemObj;
srcGraphicsAllocation.memoryPool = MemoryPool::LocalMemory;
dstGraphicsAllocation.memoryPool = MemoryPool::LocalMemory;
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(-1); DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(-1);
EXPECT_FALSE(queue.blitEnqueuePreferred(CL_COMMAND_COPY_BUFFER, builtinOpParams)); EXPECT_EQ(preferBlitterHw, queue.blitEnqueuePreferred(CL_COMMAND_COPY_BUFFER, builtinOpParams));
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(0); DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(0);
EXPECT_FALSE(queue.blitEnqueuePreferred(CL_COMMAND_COPY_BUFFER, builtinOpParams)); EXPECT_FALSE(queue.blitEnqueuePreferred(CL_COMMAND_COPY_BUFFER, builtinOpParams));
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(1); DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(1);
EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_COPY_BUFFER, builtinOpParams)); EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_COPY_BUFFER, builtinOpParams));
} }
TEST(CommandQueue, givenLocalToLocalSvmCopyCommandWhenCallingBlitEnqueuePreferredThenReturnValueBasedOnDebugFlag) { TEST(CommandQueue, givenNotLocalToLocalCopyBufferCommandWhenCallingBlitEnqueuePreferredThenReturnTrueRegardlessOfDebugFlag) {
DebugManagerStateRestore restore{};
MockContext context{};
MockCommandQueue queue{context};
BuiltinOpParams builtinOpParams{};
MockGraphicsAllocation srcGraphicsAllocation{};
MockGraphicsAllocation dstGraphicsAllocation{};
MockBuffer srcMemObj{srcGraphicsAllocation};
MockBuffer dstMemObj{dstGraphicsAllocation};
builtinOpParams.srcMemObj = &srcMemObj;
builtinOpParams.dstMemObj = &dstMemObj;
srcGraphicsAllocation.memoryPool = MemoryPool::System4KBPages;
dstGraphicsAllocation.memoryPool = MemoryPool::LocalMemory;
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(-1);
EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_COPY_BUFFER, builtinOpParams));
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(0);
EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_COPY_BUFFER, builtinOpParams));
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(1);
EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_COPY_BUFFER, builtinOpParams));
srcGraphicsAllocation.memoryPool = MemoryPool::LocalMemory;
dstGraphicsAllocation.memoryPool = MemoryPool::System4KBPages;
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(-1);
EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_COPY_BUFFER, builtinOpParams));
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(0);
EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_COPY_BUFFER, builtinOpParams));
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(1);
EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_COPY_BUFFER, builtinOpParams));
}
TEST(CommandQueue, givenLocalToLocalSvmCopyCommandWhenCallingBlitEnqueuePreferredThenReturnValueBasedOnDebugFlagAndHwPreference) {
const bool preferBlitterHw = ClHwHelper::get(::defaultHwInfo->platform.eRenderCoreFamily).preferBlitterForLocalToLocalTransfers();
DebugManagerStateRestore restore{}; DebugManagerStateRestore restore{};
MockContext context{}; MockContext context{};
MockCommandQueue queue{context}; MockCommandQueue queue{context};
@ -1238,7 +1280,7 @@ TEST(CommandQueue, givenLocalToLocalSvmCopyCommandWhenCallingBlitEnqueuePreferre
srcSvmAlloc.memoryPool = MemoryPool::LocalMemory; srcSvmAlloc.memoryPool = MemoryPool::LocalMemory;
dstSvmAlloc.memoryPool = MemoryPool::LocalMemory; dstSvmAlloc.memoryPool = MemoryPool::LocalMemory;
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(-1); DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(-1);
EXPECT_FALSE(queue.blitEnqueuePreferred(CL_COMMAND_SVM_MEMCPY, builtinOpParams)); EXPECT_EQ(preferBlitterHw, queue.blitEnqueuePreferred(CL_COMMAND_SVM_MEMCPY, builtinOpParams));
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(0); DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(0);
EXPECT_FALSE(queue.blitEnqueuePreferred(CL_COMMAND_SVM_MEMCPY, builtinOpParams)); EXPECT_FALSE(queue.blitEnqueuePreferred(CL_COMMAND_SVM_MEMCPY, builtinOpParams));
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(1); DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(1);

View File

@ -14,8 +14,10 @@
#include "opencl/source/api/api.h" #include "opencl/source/api/api.h"
#include "opencl/source/event/user_event.h" #include "opencl/source/event/user_event.h"
#include "opencl/source/helpers/cl_blit_properties.h" #include "opencl/source/helpers/cl_blit_properties.h"
#include "opencl/source/helpers/cl_hw_helper.h"
#include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h"
#include "opencl/test/unit_test/helpers/unit_test_helper.h" #include "opencl/test/unit_test/helpers/unit_test_helper.h"
#include "opencl/test/unit_test/mocks/mock_buffer.h"
#include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h"
#include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_context.h"
#include "opencl/test/unit_test/mocks/mock_memory_manager.h" #include "opencl/test/unit_test/mocks/mock_memory_manager.h"
@ -1329,25 +1331,35 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockedEnqueueWhenUsingBcsThenWaitForVal
EXPECT_EQ(1u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled); EXPECT_EQ(1u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled);
} }
HWTEST_TEMPLATED_F(BcsBufferTests, givenDebugFlagSetToOneWhenEnqueueingCopyBufferToBufferThenUseBlitter) { HWTEST_TEMPLATED_F(BcsBufferTests, givenDebugFlagSetToOneWhenEnqueueingCopyLocalBufferToLocalBufferThenUseBlitter) {
auto bcsCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(commandQueue->getBcsCommandStreamReceiver()); auto bcsCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(commandQueue->getBcsCommandStreamReceiver());
auto bufferForBlt0 = clUniquePtr(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); MockGraphicsAllocation srcGraphicsAllocation{};
auto bufferForBlt1 = clUniquePtr(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); MockGraphicsAllocation dstGraphicsAllocation{};
MockBuffer srcMemObj{srcGraphicsAllocation};
MockBuffer dstMemObj{dstGraphicsAllocation};
srcGraphicsAllocation.memoryPool = MemoryPool::LocalMemory;
dstGraphicsAllocation.memoryPool = MemoryPool::LocalMemory;
const bool preferBlitterHw = ClHwHelper::get(::defaultHwInfo->platform.eRenderCoreFamily).preferBlitterForLocalToLocalTransfers();
uint32_t expectedBlitBufferCalled = 0;
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(-1); DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(-1);
EXPECT_EQ(0u, bcsCsr->blitBufferCalled); EXPECT_EQ(expectedBlitBufferCalled, bcsCsr->blitBufferCalled);
commandQueue->enqueueCopyBuffer(bufferForBlt0.get(), bufferForBlt1.get(), 0, 1, 1, 0, nullptr, nullptr); commandQueue->enqueueCopyBuffer(&srcMemObj, &dstMemObj, 0, 1, 1, 0, nullptr, nullptr);
EXPECT_EQ(0u, bcsCsr->blitBufferCalled); if (preferBlitterHw) {
expectedBlitBufferCalled++;
}
EXPECT_EQ(expectedBlitBufferCalled, bcsCsr->blitBufferCalled);
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(0); DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(0);
EXPECT_EQ(0u, bcsCsr->blitBufferCalled); EXPECT_EQ(expectedBlitBufferCalled, bcsCsr->blitBufferCalled);
commandQueue->enqueueCopyBuffer(bufferForBlt0.get(), bufferForBlt1.get(), 0, 1, 1, 0, nullptr, nullptr); commandQueue->enqueueCopyBuffer(&srcMemObj, &dstMemObj, 0, 1, 1, 0, nullptr, nullptr);
EXPECT_EQ(0u, bcsCsr->blitBufferCalled); EXPECT_EQ(expectedBlitBufferCalled, bcsCsr->blitBufferCalled);
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(1); DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(1);
EXPECT_EQ(0u, bcsCsr->blitBufferCalled); EXPECT_EQ(expectedBlitBufferCalled, bcsCsr->blitBufferCalled);
commandQueue->enqueueCopyBuffer(bufferForBlt0.get(), bufferForBlt1.get(), 0, 1, 1, 0, nullptr, nullptr); commandQueue->enqueueCopyBuffer(&srcMemObj, &dstMemObj, 0, 1, 1, 0, nullptr, nullptr);
EXPECT_EQ(1u, bcsCsr->blitBufferCalled); expectedBlitBufferCalled++;
EXPECT_EQ(expectedBlitBufferCalled, bcsCsr->blitBufferCalled);
} }
HWTEST_TEMPLATED_F(BcsBufferTests, givenBcsQueueWhenEnqueueingCopyBufferToBufferThenUseBlitterRegardlessOfPreference) { HWTEST_TEMPLATED_F(BcsBufferTests, givenBcsQueueWhenEnqueueingCopyBufferToBufferThenUseBlitterRegardlessOfPreference) {
@ -1362,21 +1374,25 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBcsQueueWhenEnqueueingCopyBufferToBuffer
}; };
MockCommandQueueHw<FamilyType> queue(bcsMockContext.get(), device.get(), properties); MockCommandQueueHw<FamilyType> queue(bcsMockContext.get(), device.get(), properties);
auto bcsCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(queue.getBcsCommandStreamReceiver()); auto bcsCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(queue.getBcsCommandStreamReceiver());
auto bufferForBlt0 = clUniquePtr(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); MockGraphicsAllocation srcGraphicsAllocation{};
auto bufferForBlt1 = clUniquePtr(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); MockGraphicsAllocation dstGraphicsAllocation{};
MockBuffer srcMemObj{srcGraphicsAllocation};
MockBuffer dstMemObj{dstGraphicsAllocation};
srcGraphicsAllocation.memoryPool = MemoryPool::LocalMemory;
dstGraphicsAllocation.memoryPool = MemoryPool::LocalMemory;
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(-1); DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(-1);
EXPECT_EQ(0u, bcsCsr->blitBufferCalled); EXPECT_EQ(0u, bcsCsr->blitBufferCalled);
queue.enqueueCopyBuffer(bufferForBlt0.get(), bufferForBlt1.get(), 0, 1, 1, 0, nullptr, nullptr); queue.enqueueCopyBuffer(&srcMemObj, &dstMemObj, 0, 1, 1, 0, nullptr, nullptr);
EXPECT_EQ(1u, bcsCsr->blitBufferCalled); EXPECT_EQ(1u, bcsCsr->blitBufferCalled);
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(0); DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(0);
EXPECT_EQ(1u, bcsCsr->blitBufferCalled); EXPECT_EQ(1u, bcsCsr->blitBufferCalled);
queue.enqueueCopyBuffer(bufferForBlt0.get(), bufferForBlt1.get(), 0, 1, 1, 0, nullptr, nullptr); queue.enqueueCopyBuffer(&srcMemObj, &dstMemObj, 0, 1, 1, 0, nullptr, nullptr);
EXPECT_EQ(2u, bcsCsr->blitBufferCalled); EXPECT_EQ(2u, bcsCsr->blitBufferCalled);
DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(1); DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(1);
EXPECT_EQ(2u, bcsCsr->blitBufferCalled); EXPECT_EQ(2u, bcsCsr->blitBufferCalled);
queue.enqueueCopyBuffer(bufferForBlt0.get(), bufferForBlt1.get(), 0, 1, 1, 0, nullptr, nullptr); queue.enqueueCopyBuffer(&srcMemObj, &dstMemObj, 0, 1, 1, 0, nullptr, nullptr);
EXPECT_EQ(3u, bcsCsr->blitBufferCalled); EXPECT_EQ(3u, bcsCsr->blitBufferCalled);
} }