diff --git a/level_zero/core/source/kernel/kernel_imp.cpp b/level_zero/core/source/kernel/kernel_imp.cpp index 6969c5e8e9..1ec222cc7a 100644 --- a/level_zero/core/source/kernel/kernel_imp.cpp +++ b/level_zero/core/source/kernel/kernel_imp.cpp @@ -8,7 +8,7 @@ #include "level_zero/core/source/kernel/kernel_imp.h" #include "shared/source/helpers/basic_math.h" -#include "shared/source/helpers/hw_helper.h" +#include "shared/source/helpers/blit_commands_helper.h" #include "shared/source/helpers/kernel_helpers.h" #include "shared/source/helpers/register_offsets.h" #include "shared/source/helpers/string.h" @@ -105,8 +105,17 @@ void KernelImmutableData::initialize(NEO::KernelInfo *kernelInfo, NEO::MemoryMan auto allocation = memoryManager.allocateGraphicsMemoryWithProperties( {device->getRootDeviceIndex(), kernelIsaSize, NEO::GraphicsAllocation::AllocationType::KERNEL_ISA, device->getDeviceBitfield()}); UNRECOVERABLE_IF(allocation == nullptr); + + auto &hwInfo = device->getHardwareInfo(); + auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily); + if (kernelInfo->heapInfo.pKernelHeap != nullptr) { - memoryManager.copyMemoryToAllocation(allocation, kernelInfo->heapInfo.pKernelHeap, kernelIsaSize); + if (allocation->isAllocatedInLocalMemoryPool() && hwHelper.isBlitCopyRequiredForLocalMemory(hwInfo)) { + auto status = NEO::BlitHelperFunctions::blitMemoryToAllocation(*device, allocation, 0, kernelInfo->heapInfo.pKernelHeap, {kernelIsaSize, 1, 1}); + UNRECOVERABLE_IF(status != NEO::BlitOperationResult::Success); + } else { + memoryManager.copyMemoryToAllocation(allocation, kernelInfo->heapInfo.pKernelHeap, kernelIsaSize); + } } isaGraphicsAllocation.reset(allocation); diff --git a/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp b/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp index 51700c92e4..fc1661dbc8 100644 --- a/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp +++ b/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp @@ -455,5 +455,73 @@ HWTEST_F(KernelLocalIdsTest, WhenKernelIsCreatedThenDefaultLocalIdGenerationbyRu EXPECT_TRUE(kernel->requiresGenerationOfLocalIdsByRuntime()); } +struct KernelIsaTests : Test { + void SetUp() override { + Test::SetUp(); + + auto &capabilityTable = device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo()->capabilityTable; + bool createBcsEngine = !capabilityTable.blitterOperationsSupported; + capabilityTable.blitterOperationsSupported = true; + + if (createBcsEngine) { + auto &engine = device->getNEODevice()->getEngine(0); + bcsOsContext.reset(OsContext::create(nullptr, 1, device->getNEODevice()->getDeviceBitfield(), aub_stream::ENGINE_BCS, PreemptionMode::Disabled, + false, false, false)); + engine.osContext = bcsOsContext.get(); + engine.commandStreamReceiver->setupContext(*bcsOsContext); + } + } + + std::unique_ptr bcsOsContext; +}; + +TEST_F(KernelIsaTests, givenKernelAllocationInLocalMemoryWhenCreatingWithoutAllowedCpuAccessThenUseBcsForTransfer) { + DebugManagerStateRestore restore; + DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast(LocalMemoryAccessMode::CpuAccessDisallowed)); + DebugManager.flags.ForceNonSystemMemoryPlacement.set(1 << (static_cast(NEO::GraphicsAllocation::AllocationType::KERNEL_ISA) - 1)); + + uint32_t kernelHeap = 0; + KernelInfo kernelInfo; + kernelInfo.heapInfo.KernelHeapSize = 1; + kernelInfo.heapInfo.pKernelHeap = &kernelHeap; + + KernelImmutableData kernelImmutableData(device); + + auto bcsCsr = device->getNEODevice()->getEngine(aub_stream::EngineType::ENGINE_BCS, false, false).commandStreamReceiver; + auto initialTaskCount = bcsCsr->peekTaskCount(); + + kernelImmutableData.initialize(&kernelInfo, *device->getNEODevice()->getMemoryManager(), device->getNEODevice(), 0, nullptr, nullptr); + + if (kernelImmutableData.getIsaGraphicsAllocation()->isAllocatedInLocalMemoryPool()) { + EXPECT_EQ(initialTaskCount + 1, bcsCsr->peekTaskCount()); + } else { + EXPECT_EQ(initialTaskCount, bcsCsr->peekTaskCount()); + } + + device->getNEODevice()->getMemoryManager()->freeGraphicsMemory(kernelInfo.kernelAllocation); +} + +TEST_F(KernelIsaTests, givenKernelAllocationInLocalMemoryWhenCreatingWithAllowedCpuAccessThenDontUseBcsForTransfer) { + DebugManagerStateRestore restore; + DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast(LocalMemoryAccessMode::CpuAccessAllowed)); + DebugManager.flags.ForceNonSystemMemoryPlacement.set(1 << (static_cast(NEO::GraphicsAllocation::AllocationType::KERNEL_ISA) - 1)); + + uint32_t kernelHeap = 0; + KernelInfo kernelInfo; + kernelInfo.heapInfo.KernelHeapSize = 1; + kernelInfo.heapInfo.pKernelHeap = &kernelHeap; + + KernelImmutableData kernelImmutableData(device); + + auto bcsCsr = device->getNEODevice()->getEngine(aub_stream::EngineType::ENGINE_BCS, false, false).commandStreamReceiver; + auto initialTaskCount = bcsCsr->peekTaskCount(); + + kernelImmutableData.initialize(&kernelInfo, *device->getNEODevice()->getMemoryManager(), device->getNEODevice(), 0, nullptr, nullptr); + + EXPECT_EQ(initialTaskCount, bcsCsr->peekTaskCount()); + + device->getNEODevice()->getMemoryManager()->freeGraphicsMemory(kernelInfo.kernelAllocation); +} + } // namespace ult } // namespace L0