diff --git a/level_zero/core/source/kernel/kernel_imp.cpp b/level_zero/core/source/kernel/kernel_imp.cpp index 1ec222cc7a..79be3b5432 100644 --- a/level_zero/core/source/kernel/kernel_imp.cpp +++ b/level_zero/core/source/kernel/kernel_imp.cpp @@ -110,13 +110,20 @@ void KernelImmutableData::initialize(NEO::KernelInfo *kernelInfo, NEO::MemoryMan auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily); if (kernelInfo->heapInfo.pKernelHeap != nullptr) { + bool doCpuIsaCopy = true; + if (allocation->isAllocatedInLocalMemoryPool() && hwHelper.isBlitCopyRequiredForLocalMemory(hwInfo)) { auto status = NEO::BlitHelperFunctions::blitMemoryToAllocation(*device, allocation, 0, kernelInfo->heapInfo.pKernelHeap, {kernelIsaSize, 1, 1}); - UNRECOVERABLE_IF(status != NEO::BlitOperationResult::Success); - } else { + UNRECOVERABLE_IF(status == NEO::BlitOperationResult::Fail); + + doCpuIsaCopy = (status == NEO::BlitOperationResult::Unsupported); + } + + if (doCpuIsaCopy) { memoryManager.copyMemoryToAllocation(allocation, kernelInfo->heapInfo.pKernelHeap, kernelIsaSize); } } + isaGraphicsAllocation.reset(allocation); this->crossThreadDataSize = this->kernelDescriptor->kernelAttributes.crossThreadDataSize; diff --git a/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp b/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp index fc1661dbc8..86f5c80dfb 100644 --- a/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp +++ b/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp @@ -523,5 +523,29 @@ TEST_F(KernelIsaTests, givenKernelAllocationInLocalMemoryWhenCreatingWithAllowed device->getNEODevice()->getMemoryManager()->freeGraphicsMemory(kernelInfo.kernelAllocation); } +TEST_F(KernelIsaTests, givenKernelAllocationInLocalMemoryWhenCreatingWithDisallowedCpuAccessAndDisabledBlitterThenFallbackToCpuCopy) { + DebugManagerStateRestore restore; + DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast(LocalMemoryAccessMode::CpuAccessDisallowed)); + DebugManager.flags.ForceNonSystemMemoryPlacement.set(1 << (static_cast(NEO::GraphicsAllocation::AllocationType::KERNEL_ISA) - 1)); + + device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[0]->getMutableHardwareInfo()->capabilityTable.blitterOperationsSupported = false; + + uint32_t kernelHeap = 0; + KernelInfo kernelInfo; + kernelInfo.heapInfo.KernelHeapSize = 1; + kernelInfo.heapInfo.pKernelHeap = &kernelHeap; + + KernelImmutableData kernelImmutableData(device); + + auto bcsCsr = device->getNEODevice()->getEngine(aub_stream::EngineType::ENGINE_BCS, false, false).commandStreamReceiver; + auto initialTaskCount = bcsCsr->peekTaskCount(); + + kernelImmutableData.initialize(&kernelInfo, *device->getNEODevice()->getMemoryManager(), device->getNEODevice(), 0, nullptr, nullptr); + + EXPECT_EQ(initialTaskCount, bcsCsr->peekTaskCount()); + + device->getNEODevice()->getMemoryManager()->freeGraphicsMemory(kernelInfo.kernelAllocation); +} + } // namespace ult } // namespace L0 diff --git a/opencl/source/program/kernel_info.cpp b/opencl/source/program/kernel_info.cpp index 94b3cc0334..3fb3793740 100644 --- a/opencl/source/program/kernel_info.cpp +++ b/opencl/source/program/kernel_info.cpp @@ -435,7 +435,9 @@ bool KernelInfo::createKernelAllocation(const Device &device) { if (kernelAllocation->isAllocatedInLocalMemoryPool() && hwHelper.isBlitCopyRequiredForLocalMemory(hwInfo)) { auto status = BlitHelperFunctions::blitMemoryToAllocation(device, kernelAllocation, 0, heapInfo.pKernelHeap, {kernelIsaSize, 1, 1}); - return (status == BlitOperationResult::Success); + if (status != BlitOperationResult::Unsupported) { + return status == BlitOperationResult::Success; + } } return device.getMemoryManager()->copyMemoryToAllocation(kernelAllocation, heapInfo.pKernelHeap, kernelIsaSize); diff --git a/opencl/test/unit_test/command_queue/blit_enqueue_tests.cpp b/opencl/test/unit_test/command_queue/blit_enqueue_tests.cpp index 333a63357d..09d13356c8 100644 --- a/opencl/test/unit_test/command_queue/blit_enqueue_tests.cpp +++ b/opencl/test/unit_test/command_queue/blit_enqueue_tests.cpp @@ -38,6 +38,10 @@ struct BlitEnqueueTests : public ::testing::Test { auto mockBlitMemoryToAllocation = [this](const Device &device, GraphicsAllocation *memory, size_t offset, const void *hostPtr, Vec3 size) -> BlitOperationResult { + if (!device.getRootDeviceEnvironment().getMutableHardwareInfo()->capabilityTable.blitterOperationsSupported) { + return BlitOperationResult::Unsupported; + } + auto blitProperties = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::HostPtrToBuffer, *bcsCsr, memory, nullptr, hostPtr, @@ -1504,4 +1508,24 @@ HWTEST_TEMPLATED_F(BlitCopyTests, givenKernelAllocationInLocalMemoryWhenCreating device->getMemoryManager()->freeGraphicsMemory(kernelInfo.kernelAllocation); } +HWTEST_TEMPLATED_F(BlitCopyTests, givenKernelAllocationInLocalMemoryWhenCreatingWithDisallowedCpuAccessAndDisabledBlitterThenFallbackToCpuCopy) { + DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast(LocalMemoryAccessMode::CpuAccessDisallowed)); + DebugManager.flags.ForceNonSystemMemoryPlacement.set(1 << (static_cast(GraphicsAllocation::AllocationType::KERNEL_ISA) - 1)); + + device->getExecutionEnvironment()->rootDeviceEnvironments[0]->getMutableHardwareInfo()->capabilityTable.blitterOperationsSupported = false; + + uint32_t kernelHeap = 0; + KernelInfo kernelInfo; + kernelInfo.heapInfo.KernelHeapSize = 1; + kernelInfo.heapInfo.pKernelHeap = &kernelHeap; + + auto initialTaskCount = bcsMockContext->bcsCsr->peekTaskCount(); + + kernelInfo.createKernelAllocation(device->getDevice()); + + EXPECT_EQ(initialTaskCount, bcsMockContext->bcsCsr->peekTaskCount()); + + device->getMemoryManager()->freeGraphicsMemory(kernelInfo.kernelAllocation); +} + } // namespace NEO