diff --git a/level_zero/core/test/unit_tests/sources/memory/test_memory.cpp b/level_zero/core/test/unit_tests/sources/memory/test_memory.cpp index 7e0c1e8aac..4586264148 100644 --- a/level_zero/core/test/unit_tests/sources/memory/test_memory.cpp +++ b/level_zero/core/test/unit_tests/sources/memory/test_memory.cpp @@ -3969,7 +3969,7 @@ HWTEST2_F(MultipleDevicePeerAllocationTest, auto allocationData1 = svmManager->getSVMAlloc(ptr1); TaskCountType prevPeekTaskCount1 = allocationData1->gpuAllocations.getGraphicsAllocation(1u)->getTaskCount(csr0->getOsContext().getContextId()); - svmManager->prepareIndirectAllocationForDestruction(allocationData1); + svmManager->prepareIndirectAllocationForDestruction(allocationData1, false); TaskCountType postPeekTaskCount1 = allocationData1->gpuAllocations.getGraphicsAllocation(1u)->getTaskCount(csr0->getOsContext().getContextId()); EXPECT_EQ(postPeekTaskCount1, prevPeekTaskCount1); diff --git a/opencl/test/unit_test/memory_manager/unified_memory_manager_tests.cpp b/opencl/test/unit_test/memory_manager/unified_memory_manager_tests.cpp index 180bab22a3..02358f1feb 100644 --- a/opencl/test/unit_test/memory_manager/unified_memory_manager_tests.cpp +++ b/opencl/test/unit_test/memory_manager/unified_memory_manager_tests.cpp @@ -722,9 +722,16 @@ TEST(UnifiedMemoryTest, givenInternalAllocationsWhenTheyArePreparedForFreeingThe auto allocationData = unifiedMemoryManager->getSVMAlloc(ptr); - unifiedMemoryManager->prepareIndirectAllocationForDestruction(allocationData); + unifiedMemoryManager->prepareIndirectAllocationForDestruction(allocationData, false); EXPECT_EQ(124u, graphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation()->getTaskCount(commandStreamReceiver.getOsContext().getContextId())); EXPECT_EQ(124u, graphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation()->getResidencyTaskCount(commandStreamReceiver.getOsContext().getContextId())); + + graphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation()->updateTaskCount(1u, commandStreamReceiver.getOsContext().getContextId()); + graphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation()->updateResidencyTaskCount(GraphicsAllocation::objectAlwaysResident, commandStreamReceiver.getOsContext().getContextId()); + unifiedMemoryManager->prepareIndirectAllocationForDestruction(allocationData, true); + EXPECT_EQ(GraphicsAllocation::objectNotUsed, graphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation()->getTaskCount(commandStreamReceiver.getOsContext().getContextId())); + EXPECT_EQ(GraphicsAllocation::objectNotResident, graphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation()->getResidencyTaskCount(commandStreamReceiver.getOsContext().getContextId())); + unifiedMemoryManager->freeSVMAlloc(ptr); } diff --git a/shared/source/memory_manager/unified_memory_manager.cpp b/shared/source/memory_manager/unified_memory_manager.cpp index 2982e51efe..c6f3d327c8 100644 --- a/shared/source/memory_manager/unified_memory_manager.cpp +++ b/shared/source/memory_manager/unified_memory_manager.cpp @@ -496,7 +496,8 @@ bool SVMAllocsManager::freeSVMAllocDefer(void *ptr) { } void SVMAllocsManager::freeSVMAllocImpl(void *ptr, FreePolicyType policy, SvmAllocationData *svmData) { - this->prepareIndirectAllocationForDestruction(svmData); + auto allowNonBlockingFree = policy == FreePolicyType::none; + this->prepareIndirectAllocationForDestruction(svmData, allowNonBlockingFree); if (policy == FreePolicyType::blocking) { if (svmData->cpuAllocation) { @@ -769,7 +770,7 @@ void SVMAllocsManager::makeIndirectAllocationsResident(CommandStreamReceiver &co } } -void SVMAllocsManager::prepareIndirectAllocationForDestruction(SvmAllocationData *allocationData) { +void SVMAllocsManager::prepareIndirectAllocationForDestruction(SvmAllocationData *allocationData, bool isNonBlockingFree) { std::unique_lock lock(mtx); if (this->indirectAllocationsResidency.size() > 0u) { for (auto &internalAllocationsHandling : this->indirectAllocationsResidency) { @@ -778,7 +779,13 @@ void SVMAllocsManager::prepareIndirectAllocationForDestruction(SvmAllocationData if (gpuAllocation == nullptr) { continue; } - auto desiredTaskCount = std::max(internalAllocationsHandling.second.latestSentTaskCount, gpuAllocation->getTaskCount(commandStreamReceiver->getOsContext().getContextId())); + + // Marking gpuAllocation task count as objectNotUsed means we will not wait for GPU completion. + // However, if this is blocking free, we must select "safest" task count to wait for. + TaskCountType desiredTaskCount = std::max(internalAllocationsHandling.second.latestSentTaskCount, gpuAllocation->getTaskCount(commandStreamReceiver->getOsContext().getContextId())); + if (isNonBlockingFree) { + desiredTaskCount = GraphicsAllocation::objectNotUsed; + } if (gpuAllocation->isAlwaysResident(commandStreamReceiver->getOsContext().getContextId())) { gpuAllocation->updateResidencyTaskCount(GraphicsAllocation::objectNotResident, commandStreamReceiver->getOsContext().getContextId()); gpuAllocation->updateResidencyTaskCount(desiredTaskCount, commandStreamReceiver->getOsContext().getContextId()); diff --git a/shared/source/memory_manager/unified_memory_manager.h b/shared/source/memory_manager/unified_memory_manager.h index abce9ebefd..76789958a9 100644 --- a/shared/source/memory_manager/unified_memory_manager.h +++ b/shared/source/memory_manager/unified_memory_manager.h @@ -229,7 +229,7 @@ class SVMAllocsManager { bool hasHostAllocations(); std::atomic allocationsCounter = 0; MOCKABLE_VIRTUAL void makeIndirectAllocationsResident(CommandStreamReceiver &commandStreamReceiver, TaskCountType taskCount); - void prepareIndirectAllocationForDestruction(SvmAllocationData *); + void prepareIndirectAllocationForDestruction(SvmAllocationData *allocationData, bool isNonBlockingFree); MOCKABLE_VIRTUAL void prefetchMemory(Device &device, CommandStreamReceiver &commandStreamReceiver, SvmAllocationData &svmData); void prefetchSVMAllocs(Device &device, CommandStreamReceiver &commandStreamReceiver); std::unique_lock obtainOwnership();