diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h index edcc73e43e..14fdb6e335 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h +++ b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h @@ -158,7 +158,7 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily::appendMemoryCopy( this->device->getDriverHandle()->findAllocationDataForRange(const_cast(srcptr), size, &cpuMemCopyInfo.srcAllocData); this->device->getDriverHandle()->findAllocationDataForRange(dstptr, size, &cpuMemCopyInfo.dstAllocData); if (preferCopyThroughLockedPtr(cpuMemCopyInfo)) { - return performCpuMemcpy(cpuMemCopyInfo, hSignalEvent, numWaitEvents, phWaitEvents); + ret = performCpuMemcpy(cpuMemCopyInfo, hSignalEvent, numWaitEvents, phWaitEvents); + if (ret == ZE_RESULT_SUCCESS || ret == ZE_RESULT_ERROR_DEVICE_LOST) { + return ret; + } } auto isSplitNeeded = this->isAppendSplitNeeded(dstptr, srcptr, size); @@ -677,6 +680,17 @@ bool CommandListCoreFamilyImmediate::isSuitableUSMSharedAlloc(NEO template ze_result_t CommandListCoreFamilyImmediate::performCpuMemcpy(const CpuMemCopyInfo &cpuMemCopyInfo, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { + bool lockingFailed = false; + auto srcLockPointer = obtainLockedPtrFromDevice(cpuMemCopyInfo.srcAllocData, const_cast(cpuMemCopyInfo.srcPtr), lockingFailed); + if (lockingFailed) { + return ZE_RESULT_ERROR_UNKNOWN; + } + + auto dstLockPointer = obtainLockedPtrFromDevice(cpuMemCopyInfo.dstAllocData, const_cast(cpuMemCopyInfo.dstPtr), lockingFailed); + if (lockingFailed) { + return ZE_RESULT_ERROR_UNKNOWN; + } + bool needsBarrier = (numWaitEvents > 0); if (needsBarrier) { this->appendBarrier(nullptr, numWaitEvents, phWaitEvents); @@ -694,9 +708,6 @@ ze_result_t CommandListCoreFamilyImmediate::performCpuMemcpy(cons signalEvent = Event::fromHandle(hSignalEvent); } - auto srcLockPointer = obtainLockedPtrFromDevice(cpuMemCopyInfo.srcAllocData, const_cast(cpuMemCopyInfo.srcPtr)); - auto dstLockPointer = obtainLockedPtrFromDevice(cpuMemCopyInfo.dstAllocData, cpuMemCopyInfo.dstPtr); - const void *cpuMemcpySrcPtr = srcLockPointer ? srcLockPointer : cpuMemCopyInfo.srcPtr; void *cpuMemcpyDstPtr = dstLockPointer ? dstLockPointer : cpuMemCopyInfo.dstPtr; @@ -724,7 +735,7 @@ ze_result_t CommandListCoreFamilyImmediate::performCpuMemcpy(cons } template -void *CommandListCoreFamilyImmediate::obtainLockedPtrFromDevice(NEO::SvmAllocationData *allocData, void *ptr) { +void *CommandListCoreFamilyImmediate::obtainLockedPtrFromDevice(NEO::SvmAllocationData *allocData, void *ptr, bool &lockingFailed) { if (!allocData) { return nullptr; } @@ -736,6 +747,10 @@ void *CommandListCoreFamilyImmediate::obtainLockedPtrFromDevice(N if (!alloc->isLocked()) { this->device->getDriverHandle()->getMemoryManager()->lockResource(alloc); + if (!alloc->isLocked()) { + lockingFailed = true; + return nullptr; + } } auto gpuAddress = allocData->gpuAllocations.getGraphicsAllocation(this->device->getRootDeviceIndex())->getGpuAddress(); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_7.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_7.cpp index d0625f011d..c186d7de9a 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_7.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_7.cpp @@ -2373,6 +2373,27 @@ HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndNonUsmDstHostP EXPECT_GE(cmdList.appendMemoryCopyKernelWithGACalled, 1u); } +HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndFailedToLockPtrThenUseGpuMemcpy, IsAtLeastSkl) { + MockAppendMemoryLockedCopyTestImmediateCmdList cmdList; + cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); + cmdList.csr = device->getNEODevice()->getInternalEngine().commandStreamReceiver; + + cmdList.appendMemoryCopy(devicePtr, nonUsmHostPtr, 1 * MemoryConstants::megaByte, nullptr, 0, nullptr); + ASSERT_EQ(cmdList.appendMemoryCopyKernelWithGACalled, 0u); + + NEO::SvmAllocationData *dstAllocData; + ASSERT_TRUE(device->getDriverHandle()->findAllocationDataForRange(devicePtr, 1 * MemoryConstants::megaByte, &dstAllocData)); + ASSERT_NE(dstAllocData, nullptr); + auto mockMemoryManager = static_cast(device->getDriverHandle()->getMemoryManager()); + auto graphicsAllocation = dstAllocData->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex()); + mockMemoryManager->unlockResource(graphicsAllocation); + mockMemoryManager->failLockResource = true; + ASSERT_FALSE(graphicsAllocation->isLocked()); + + cmdList.appendMemoryCopy(devicePtr, nonUsmHostPtr, 1 * MemoryConstants::megaByte, nullptr, 0, nullptr); + EXPECT_EQ(cmdList.appendMemoryCopyKernelWithGACalled, 1u); +} + HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndD2HCopyWhenSizeTooLargeButFlagSetThenUseCpuMemcpy, IsAtLeastSkl) { DebugManager.flags.ExperimentalD2HCpuCopyThreshold.set(2048); MockAppendMemoryLockedCopyTestImmediateCmdList cmdList; @@ -2478,5 +2499,52 @@ HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndTimestampFlagN EXPECT_EQ(0u, reinterpret_cast(event.get())->gpuEndTimestamp); } +HWTEST2_F(AppendMemoryLockedCopyTest, givenAllocationDataWhenFailingToObtainLockedPtrFromDeviceThenNullptrIsReturned, IsAtLeastSkl) { + MockCommandListImmediateHw cmdList; + cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); + + NEO::SvmAllocationData *dstAllocData = nullptr; + EXPECT_TRUE(device->getDriverHandle()->findAllocationDataForRange(devicePtr, 1024, &dstAllocData)); + ASSERT_NE(dstAllocData, nullptr); + auto graphicsAllocation = dstAllocData->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex()); + ASSERT_FALSE(graphicsAllocation->isLocked()); + + auto mockMemoryManager = static_cast(device->getDriverHandle()->getMemoryManager()); + mockMemoryManager->failLockResource = true; + + bool lockingFailed = false; + void *lockedPtr = cmdList.obtainLockedPtrFromDevice(dstAllocData, devicePtr, lockingFailed); + EXPECT_FALSE(graphicsAllocation->isLocked()); + EXPECT_TRUE(lockingFailed); + EXPECT_EQ(lockedPtr, nullptr); +} + +HWTEST2_F(AppendMemoryLockedCopyTest, givenNullAllocationDataWhenObtainLockedPtrFromDeviceCalledThenNullptrIsReturned, IsAtLeastSkl) { + MockCommandListImmediateHw cmdList; + bool lockingFailed = false; + EXPECT_EQ(cmdList.obtainLockedPtrFromDevice(nullptr, devicePtr, lockingFailed), nullptr); + EXPECT_FALSE(lockingFailed); +} + +HWTEST2_F(AppendMemoryLockedCopyTest, givenFailedToObtainLockedPtrWhenPerformingCpuMemoryCopyThenErrorIsReturned, IsAtLeastSkl) { + MockCommandListImmediateHw cmdList; + cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); + CpuMemCopyInfo cpuMemCopyInfo(nullptr, nullptr, 1024); + auto srcFound = device->getDriverHandle()->findAllocationDataForRange(nonUsmHostPtr, 1024, &cpuMemCopyInfo.srcAllocData); + auto dstFound = device->getDriverHandle()->findAllocationDataForRange(devicePtr, 1024, &cpuMemCopyInfo.dstAllocData); + ASSERT_TRUE(srcFound != dstFound); + ze_result_t returnValue = ZE_RESULT_SUCCESS; + + auto mockMemoryManager = static_cast(device->getDriverHandle()->getMemoryManager()); + mockMemoryManager->failLockResource = true; + + returnValue = cmdList.performCpuMemcpy(cpuMemCopyInfo, nullptr, 1, nullptr); + EXPECT_EQ(ZE_RESULT_ERROR_UNKNOWN, returnValue); + + std::swap(cpuMemCopyInfo.srcAllocData, cpuMemCopyInfo.dstAllocData); + returnValue = cmdList.performCpuMemcpy(cpuMemCopyInfo, nullptr, 1, nullptr); + EXPECT_EQ(ZE_RESULT_ERROR_UNKNOWN, returnValue); +} + } // namespace ult } // namespace L0 diff --git a/level_zero/core/test/unit_tests/sources/event/test_event.cpp b/level_zero/core/test/unit_tests/sources/event/test_event.cpp index 39b9fc5f3a..8a8c7c56a3 100644 --- a/level_zero/core/test/unit_tests/sources/event/test_event.cpp +++ b/level_zero/core/test/unit_tests/sources/event/test_event.cpp @@ -3131,8 +3131,19 @@ TEST_F(ImmediateEventAllPacketSignalSinglePacketUseTest, givenSignalAllEventPack testQueryAllPackets(event.get(), true); } -using EventTimestampTest = Test; -HWTEST2_F(EventTimestampTest, givenAppendMemoryCopyRegionsIsCalledWhenCopyTimeIsLessThanDeviceTimestampResolutionThenReturnTimstampDifferenceAsOne, IsXeHpcCore) { +struct LocalMemoryEnabledDeviceFixture : public DeviceFixture { + void setUp() { + DebugManager.flags.EnableLocalMemory.set(1); + DeviceFixture::setUp(); + } + void tearDown() { + DeviceFixture::tearDown(); + } + DebugManagerStateRestore restore; +}; + +using EventTimestampTest = Test; +HWTEST2_F(EventTimestampTest, givenAppendMemoryCopyIsCalledWhenCpuCopyIsUsedAndCopyTimeIsLessThanDeviceTimestampResolutionThenReturnTimstampDifferenceAsOne, IsXeHpcCore) { MockCommandListImmediateHw cmdList; cmdList.initialize(device, NEO::EngineGroupType::Copy, 0u); cmdList.csr = device->getNEODevice()->getInternalEngine().commandStreamReceiver; diff --git a/shared/test/common/mocks/mock_memory_manager.h b/shared/test/common/mocks/mock_memory_manager.h index 23674ea699..7cc7463e8b 100644 --- a/shared/test/common/mocks/mock_memory_manager.h +++ b/shared/test/common/mocks/mock_memory_manager.h @@ -91,7 +91,10 @@ class MockMemoryManager : public MemoryManagerCreate { void *lockResourceImpl(GraphicsAllocation &gfxAllocation) override { lockResourceCalled++; - auto pLockedMemory = OsAgnosticMemoryManager::lockResourceImpl(gfxAllocation); + void *pLockedMemory = nullptr; + if (!failLockResource) { + pLockedMemory = OsAgnosticMemoryManager::lockResourceImpl(gfxAllocation); + } lockResourcePointers.push_back(pLockedMemory); return pLockedMemory; } @@ -239,6 +242,7 @@ class MockMemoryManager : public MemoryManagerCreate { bool failReserveAddress = false; bool failAllocateSystemMemory = false; bool failAllocate32Bit = false; + bool failLockResource = false; bool failSetMemAdvise = false; bool setMemPrefetchCalled = false; bool cpuCopyRequired = false;