From c381f371cde67025af25a8b58a9a2ef9c7a209bc Mon Sep 17 00:00:00 2001 From: Bartosz Dunajski Date: Tue, 15 Sep 2020 15:45:26 +0200 Subject: [PATCH] Use BCS to copy ISA if required Change-Id: I181e09d0356718c25162efdddede39b13399ed3b Signed-off-by: Bartosz Dunajski --- opencl/source/program/kernel_info.cpp | 11 +++++ .../command_queue/blit_enqueue_tests.cpp | 45 ++++++++++++++++++- .../unit_test/mem_obj/buffer_bcs_tests.cpp | 2 +- shared/source/helpers/blit_commands_helper.h | 6 +-- .../helpers/blit_commands_helper_extra.cpp | 2 +- 5 files changed, 60 insertions(+), 6 deletions(-) diff --git a/opencl/source/program/kernel_info.cpp b/opencl/source/program/kernel_info.cpp index ff3804835a..94b3cc0334 100644 --- a/opencl/source/program/kernel_info.cpp +++ b/opencl/source/program/kernel_info.cpp @@ -7,6 +7,8 @@ #include "shared/source/device_binary_format/patchtokens_decoder.h" #include "shared/source/helpers/aligned_memory.h" +#include "shared/source/helpers/blit_commands_helper.h" +#include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/ptr_math.h" #include "shared/source/helpers/string.h" #include "shared/source/memory_manager/memory_manager.h" @@ -427,6 +429,15 @@ bool KernelInfo::createKernelAllocation(const Device &device) { if (!kernelAllocation) { return false; } + + auto &hwInfo = device.getHardwareInfo(); + auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); + + if (kernelAllocation->isAllocatedInLocalMemoryPool() && hwHelper.isBlitCopyRequiredForLocalMemory(hwInfo)) { + auto status = BlitHelperFunctions::blitMemoryToAllocation(device, kernelAllocation, 0, heapInfo.pKernelHeap, {kernelIsaSize, 1, 1}); + return (status == BlitOperationResult::Success); + } + return device.getMemoryManager()->copyMemoryToAllocation(kernelAllocation, heapInfo.pKernelHeap, kernelIsaSize); } diff --git a/opencl/test/unit_test/command_queue/blit_enqueue_tests.cpp b/opencl/test/unit_test/command_queue/blit_enqueue_tests.cpp index 0ca0878b32..80b64dc1f4 100644 --- a/opencl/test/unit_test/command_queue/blit_enqueue_tests.cpp +++ b/opencl/test/unit_test/command_queue/blit_enqueue_tests.cpp @@ -36,7 +36,7 @@ struct BlitEnqueueTests : public ::testing::Test { bcsCsr->setupContext(*bcsOsContext); bcsCsr->initializeTagAllocation(); - auto mockBlitMemoryToAllocation = [this](Device &device, GraphicsAllocation *memory, size_t offset, const void *hostPtr, + auto mockBlitMemoryToAllocation = [this](const Device &device, GraphicsAllocation *memory, size_t offset, const void *hostPtr, Vec3 size) -> BlitOperationResult { auto blitProperties = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::HostPtrToBuffer, *bcsCsr, memory, nullptr, @@ -70,6 +70,7 @@ struct BlitEnqueueTests : public ::testing::Test { DebugManager.flags.ForceAuxTranslationMode.set(1); DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.set(1); DebugManager.flags.CsrDispatchMode.set(static_cast(DispatchMode::ImmediateDispatch)); + DebugManager.flags.EnableLocalMemory.set(1); device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); auto &capabilityTable = device->getRootDeviceEnvironment().getMutableHardwareInfo()->capabilityTable; bool createBcsEngine = !capabilityTable.blitterOperationsSupported; @@ -1461,4 +1462,46 @@ HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenSubmissionT } } +using BlitCopyTests = BlitEnqueueTests<1>; + +HWTEST_TEMPLATED_F(BlitCopyTests, givenKernelAllocationInLocalMemoryWhenCreatingWithoutAllowedCpuAccessThenUseBcsForTransfer) { + DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast(LocalMemoryAccessMode::CpuAccessDisallowed)); + DebugManager.flags.ForceNonSystemMemoryPlacement.set(1 << (static_cast(GraphicsAllocation::AllocationType::KERNEL_ISA) - 1)); + + uint32_t kernelHeap = 0; + KernelInfo kernelInfo; + kernelInfo.heapInfo.KernelHeapSize = 1; + kernelInfo.heapInfo.pKernelHeap = &kernelHeap; + + auto initialTaskCount = bcsMockContext->bcsCsr->peekTaskCount(); + + kernelInfo.createKernelAllocation(device->getDevice()); + + if (kernelInfo.kernelAllocation->isAllocatedInLocalMemoryPool()) { + EXPECT_EQ(initialTaskCount + 1, bcsMockContext->bcsCsr->peekTaskCount()); + } else { + EXPECT_EQ(initialTaskCount, bcsMockContext->bcsCsr->peekTaskCount()); + } + + device->getMemoryManager()->freeGraphicsMemory(kernelInfo.kernelAllocation); +} + +HWTEST_TEMPLATED_F(BlitCopyTests, givenKernelAllocationInLocalMemoryWhenCreatingWithAllowedCpuAccessThenDontUseBcsForTransfer) { + DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast(LocalMemoryAccessMode::CpuAccessAllowed)); + DebugManager.flags.ForceNonSystemMemoryPlacement.set(1 << (static_cast(GraphicsAllocation::AllocationType::KERNEL_ISA) - 1)); + + uint32_t kernelHeap = 0; + KernelInfo kernelInfo; + kernelInfo.heapInfo.KernelHeapSize = 1; + kernelInfo.heapInfo.pKernelHeap = &kernelHeap; + + auto initialTaskCount = bcsMockContext->bcsCsr->peekTaskCount(); + + kernelInfo.createKernelAllocation(device->getDevice()); + + EXPECT_EQ(initialTaskCount, bcsMockContext->bcsCsr->peekTaskCount()); + + device->getMemoryManager()->freeGraphicsMemory(kernelInfo.kernelAllocation); +} + } // namespace NEO diff --git a/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp b/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp index 1714b1faef..203ae01b16 100644 --- a/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp +++ b/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp @@ -36,7 +36,7 @@ struct BcsBufferTests : public ::testing::Test { bcsCsr->initializeTagAllocation(); bcsCsr->createGlobalFenceAllocation(); - auto mockBlitMemoryToAllocation = [this](Device &device, GraphicsAllocation *memory, size_t offset, const void *hostPtr, + auto mockBlitMemoryToAllocation = [this](const Device &device, GraphicsAllocation *memory, size_t offset, const void *hostPtr, Vec3 size) -> BlitOperationResult { auto blitProperties = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::HostPtrToBuffer, *bcsCsr, memory, nullptr, diff --git a/shared/source/helpers/blit_commands_helper.h b/shared/source/helpers/blit_commands_helper.h index a53e258188..486fa20d8a 100644 --- a/shared/source/helpers/blit_commands_helper.h +++ b/shared/source/helpers/blit_commands_helper.h @@ -86,7 +86,7 @@ enum class BlitOperationResult { }; namespace BlitHelperFunctions { -using BlitMemoryToAllocationFunc = std::function size); - static BlitOperationResult blitAllocationToMemory(Device &device, GraphicsAllocation *memory, size_t offset, const void *hostPtr, + static BlitOperationResult blitAllocationToMemory(const Device &device, GraphicsAllocation *memory, size_t offset, const void *hostPtr, Vec3 size); }; diff --git a/shared/source/helpers/blit_commands_helper_extra.cpp b/shared/source/helpers/blit_commands_helper_extra.cpp index e1257a9f58..f556ea6bb1 100644 --- a/shared/source/helpers/blit_commands_helper_extra.cpp +++ b/shared/source/helpers/blit_commands_helper_extra.cpp @@ -9,7 +9,7 @@ namespace NEO { -BlitOperationResult BlitHelper::blitMemoryToAllocation(Device &device, GraphicsAllocation *memory, size_t offset, const void *hostPtr, +BlitOperationResult BlitHelper::blitMemoryToAllocation(const Device &device, GraphicsAllocation *memory, size_t offset, const void *hostPtr, Vec3 size) { return BlitOperationResult::Unsupported; }