diff --git a/level_zero/core/source/context/context_imp.cpp b/level_zero/core/source/context/context_imp.cpp index 6b100e8cae..a701128789 100644 --- a/level_zero/core/source/context/context_imp.cpp +++ b/level_zero/core/source/context/context_imp.cpp @@ -28,6 +28,9 @@ namespace L0 { ze_result_t ContextImp::destroy() { + while (driverHandle->svmAllocsManager->getNumDeferFreeAllocs() > 0) { + this->driverHandle->svmAllocsManager->freeSVMAllocDeferImpl(); + } delete this; return ZE_RESULT_SUCCESS; @@ -94,6 +97,15 @@ ze_result_t ContextImp::allocHostMem(const ze_host_mem_alloc_desc_t *hostDesc, auto usmPtr = this->driverHandle->svmAllocsManager->createHostUnifiedMemoryAllocation(size, unifiedMemoryProperties); if (usmPtr == nullptr) { + if (driverHandle->svmAllocsManager->getNumDeferFreeAllocs() > 0) { + this->driverHandle->svmAllocsManager->freeSVMAllocDeferImpl(); + usmPtr = this->driverHandle->svmAllocsManager->createHostUnifiedMemoryAllocation(size, + unifiedMemoryProperties); + if (usmPtr) { + *ptr = usmPtr; + return ZE_RESULT_SUCCESS; + } + } return ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY; } @@ -191,6 +203,15 @@ ze_result_t ContextImp::allocDeviceMem(ze_device_handle_t hDevice, void *usmPtr = this->driverHandle->svmAllocsManager->createUnifiedMemoryAllocation(size, unifiedMemoryProperties); if (usmPtr == nullptr) { + if (driverHandle->svmAllocsManager->getNumDeferFreeAllocs() > 0) { + this->driverHandle->svmAllocsManager->freeSVMAllocDeferImpl(); + usmPtr = + this->driverHandle->svmAllocsManager->createUnifiedMemoryAllocation(size, unifiedMemoryProperties); + if (usmPtr) { + *ptr = usmPtr; + return ZE_RESULT_SUCCESS; + } + } return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY; } *ptr = usmPtr; @@ -292,6 +313,16 @@ ze_result_t ContextImp::allocSharedMem(ze_device_handle_t hDevice, unifiedMemoryProperties, static_cast(neoDevice->getSpecializedDevice())); if (usmPtr == nullptr) { + if (driverHandle->svmAllocsManager->getNumDeferFreeAllocs() > 0) { + this->driverHandle->svmAllocsManager->freeSVMAllocDeferImpl(); + usmPtr = this->driverHandle->svmAllocsManager->createSharedUnifiedMemoryAllocation(size, + unifiedMemoryProperties, + static_cast(neoDevice->getSpecializedDevice())); + if (usmPtr) { + *ptr = usmPtr; + return ZE_RESULT_SUCCESS; + } + } return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY; } *ptr = usmPtr; @@ -349,7 +380,17 @@ ze_result_t ContextImp::freeMemExt(const ze_memory_free_ext_desc_t *pMemFreeDesc return this->freeMem(ptr, true); } if (pMemFreeDesc->freePolicy == ZE_DRIVER_MEMORY_FREE_POLICY_EXT_FLAG_DEFER_FREE) { - return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; + auto allocation = this->driverHandle->svmAllocsManager->getSVMAlloc(ptr); + if (allocation == nullptr) { + return ZE_RESULT_ERROR_INVALID_ARGUMENT; + } + + for (auto pairDevice : this->devices) { + this->freePeerAllocations(ptr, false, Device::fromHandle(pairDevice.second)); + } + + this->driverHandle->svmAllocsManager->freeSVMAllocDefer(const_cast(ptr)); + return ZE_RESULT_SUCCESS; } return this->freeMem(ptr, false); } diff --git a/level_zero/core/test/unit_tests/sources/memory/test_memory.cpp b/level_zero/core/test/unit_tests/sources/memory/test_memory.cpp index 5db9e2aa24..30c9f52c0c 100644 --- a/level_zero/core/test/unit_tests/sources/memory/test_memory.cpp +++ b/level_zero/core/test/unit_tests/sources/memory/test_memory.cpp @@ -955,7 +955,18 @@ struct SVMAllocsManagerFreeExtMock : public NEO::SVMAllocsManager { } return SVMAllocsManager::freeSVMAlloc(ptr, blocking); } - uint32_t blockingCallsMade = 0; + + bool freeSVMAllocDefer(void *ptr) override { + deferFreeCallsMade++; + return SVMAllocsManager::freeSVMAllocDefer(ptr); + } + + uint32_t numDeferFreeAllocs() { + return static_cast(SVMAllocsManager::getNumDeferFreeAllocs()); + } + + uint32_t blockingCallsMade = 0u; + uint32_t deferFreeCallsMade = 0u; }; struct FreeExtTests : public ::testing::Test { @@ -974,7 +985,11 @@ struct FreeExtTests : public ::testing::Test { driverHandle->svmAllocsManager = currSvmAllocsManager; device = driverHandle->devices[0]; - context = std::make_unique(driverHandle.get()); + ze_context_handle_t hContext; + ze_context_desc_t desc = {ZE_STRUCTURE_TYPE_CONTEXT_DESC, nullptr, 0}; + ze_result_t res = driverHandle->createContext(&desc, 0u, nullptr, &hContext); + EXPECT_EQ(ZE_RESULT_SUCCESS, res); + context = static_cast(Context::fromHandle(hContext)); EXPECT_NE(context, nullptr); context->getDevices().insert(std::make_pair(device->getRootDeviceIndex(), device->toHandle())); auto neoDevice = device->getNEODevice(); @@ -983,6 +998,9 @@ struct FreeExtTests : public ::testing::Test { } void TearDown() override { + if (context) { + context->destroy(); + } driverHandle->svmAllocsManager = prevSvmAllocsManager; delete currSvmAllocsManager; } @@ -991,7 +1009,7 @@ struct FreeExtTests : public ::testing::Test { std::unique_ptr driverHandle; NEO::MockDevice *neoDevice = nullptr; L0::Device *device = nullptr; - std::unique_ptr context; + L0::ContextImp *context = nullptr; }; TEST_F(FreeExtTests, @@ -1033,7 +1051,7 @@ TEST_F(FreeExtTests, } TEST_F(FreeExtTests, - whenFreeMemExtIsCalledWithDeferFreePolicyThenUnsuportedIsReturned) { + whenFreeMemExtIsCalledWithDeferFreePolicyThenBlockingCallIsNotMade) { size_t size = 1024; size_t alignment = 1u; void *ptr = nullptr; @@ -1047,10 +1065,361 @@ TEST_F(FreeExtTests, ze_memory_free_ext_desc_t memFreeDesc = {}; memFreeDesc.freePolicy = ZE_DRIVER_MEMORY_FREE_POLICY_EXT_FLAG_DEFER_FREE; result = context->freeMemExt(&memFreeDesc, ptr); - EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, result); - - result = context->freeMem(ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); + SVMAllocsManagerFreeExtMock *memManager = reinterpret_cast(currSvmAllocsManager); + EXPECT_EQ(0u, memManager->blockingCallsMade); + EXPECT_EQ(1u, memManager->deferFreeCallsMade); +} + +TEST_F(FreeExtTests, + whenFreeMemExtIsCalledWithDeferFreePolicyAndInvalidPtrThenReturnInvalidArgument) { + void *ptr = nullptr; + + ze_memory_free_ext_desc_t memFreeDesc = {}; + memFreeDesc.freePolicy = ZE_DRIVER_MEMORY_FREE_POLICY_EXT_FLAG_DEFER_FREE; + ze_result_t result = context->freeMemExt(&memFreeDesc, ptr); + EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, result); +} + +TEST_F(FreeExtTests, + whenFreeMemExtIsCalledWithDeferFreePolicyAndAllocationNotInUseThenMemoryFreeNotDeferred) { + size_t size = 1024; + size_t alignment = 1u; + void *ptr = nullptr; + + static_cast(driverHandle->getMemoryManager())->deferAllocInUse = false; + + ze_host_mem_alloc_desc_t hostDesc = {}; + ze_result_t result = context->allocHostMem(&hostDesc, + size, alignment, &ptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_NE(nullptr, ptr); + SVMAllocsManagerFreeExtMock *memManager = reinterpret_cast(currSvmAllocsManager); + ze_memory_free_ext_desc_t memFreeDesc = {}; + memFreeDesc.freePolicy = ZE_DRIVER_MEMORY_FREE_POLICY_EXT_FLAG_DEFER_FREE; + result = context->freeMemExt(&memFreeDesc, ptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_EQ(0u, memManager->numDeferFreeAllocs()); + EXPECT_EQ(1u, memManager->deferFreeCallsMade); + EXPECT_EQ(0u, memManager->blockingCallsMade); +} + +TEST_F(FreeExtTests, + whenFreeMemExtIsCalledWithDeferFreePolicyAndAllocationInUseThenMemoryFreeDeferred) { + size_t size = 1024; + size_t alignment = 1u; + void *ptr = nullptr; + + static_cast(driverHandle->getMemoryManager())->deferAllocInUse = true; + + ze_host_mem_alloc_desc_t hostDesc = {}; + ze_result_t result = context->allocHostMem(&hostDesc, + size, alignment, &ptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_NE(nullptr, ptr); + SVMAllocsManagerFreeExtMock *memManager = reinterpret_cast(currSvmAllocsManager); + ze_memory_free_ext_desc_t memFreeDesc = {}; + memFreeDesc.freePolicy = ZE_DRIVER_MEMORY_FREE_POLICY_EXT_FLAG_DEFER_FREE; + result = context->freeMemExt(&memFreeDesc, ptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_EQ(1u, memManager->numDeferFreeAllocs()); + EXPECT_EQ(1u, memManager->deferFreeCallsMade); + static_cast(driverHandle->getMemoryManager())->deferAllocInUse = false; + void *ptr2 = nullptr; + result = context->allocHostMem(&hostDesc, + size, alignment, &ptr2); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_NE(nullptr, ptr2); + memFreeDesc.freePolicy = ZE_DRIVER_MEMORY_FREE_POLICY_EXT_FLAG_BLOCKING_FREE; + result = context->freeMemExt(&memFreeDesc, ptr2); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_EQ(0u, memManager->numDeferFreeAllocs()); + EXPECT_EQ(1u, memManager->deferFreeCallsMade); + EXPECT_EQ(1u, memManager->blockingCallsMade); + + static_cast(driverHandle->getMemoryManager())->deferAllocInUse = true; + result = context->allocHostMem(&hostDesc, + size, alignment, &ptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_NE(nullptr, ptr); + memFreeDesc.freePolicy = ZE_DRIVER_MEMORY_FREE_POLICY_EXT_FLAG_DEFER_FREE; + result = context->freeMemExt(&memFreeDesc, ptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_EQ(1u, memManager->numDeferFreeAllocs()); + EXPECT_EQ(2u, memManager->deferFreeCallsMade); + static_cast(driverHandle->getMemoryManager())->deferAllocInUse = false; + result = context->allocHostMem(&hostDesc, + size, alignment, &ptr2); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_NE(nullptr, ptr2); + result = context->freeMemExt(&memFreeDesc, ptr2); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_EQ(0u, memManager->numDeferFreeAllocs()); + EXPECT_EQ(3u, memManager->deferFreeCallsMade); +} + +TEST_F(FreeExtTests, + whenFreeMemExtIsCalledMultipleTimesForSameAllocationWithDeferFreePolicyAndAllocationInUseThenMemoryFreeDeferredOnlyOnce) { + size_t size = 1024; + size_t alignment = 1u; + void *ptr = nullptr; + + static_cast(driverHandle->getMemoryManager())->deferAllocInUse = true; + + ze_host_mem_alloc_desc_t hostDesc = {}; + ze_result_t result = context->allocHostMem(&hostDesc, + size, alignment, &ptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_NE(nullptr, ptr); + SVMAllocsManagerFreeExtMock *memManager = reinterpret_cast(currSvmAllocsManager); + ze_memory_free_ext_desc_t memFreeDesc = {}; + memFreeDesc.freePolicy = ZE_DRIVER_MEMORY_FREE_POLICY_EXT_FLAG_DEFER_FREE; + result = context->freeMemExt(&memFreeDesc, ptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_EQ(1u, memManager->numDeferFreeAllocs()); + EXPECT_EQ(1u, memManager->deferFreeCallsMade); + + result = context->freeMemExt(&memFreeDesc, ptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_EQ(1u, memManager->numDeferFreeAllocs()); + EXPECT_EQ(2u, memManager->deferFreeCallsMade); + + result = context->freeMemExt(&memFreeDesc, ptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_EQ(1u, memManager->numDeferFreeAllocs()); + EXPECT_EQ(3u, memManager->deferFreeCallsMade); + + static_cast(driverHandle->getMemoryManager())->deferAllocInUse = false; + + result = context->freeMemExt(&memFreeDesc, ptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_EQ(0u, memManager->numDeferFreeAllocs()); + EXPECT_EQ(4u, memManager->deferFreeCallsMade); +} + +TEST_F(FreeExtTests, + whenFreeMemIsCalledWithDeferredFreeAllocationThenMemoryFreed) { + size_t size = 1024; + size_t alignment = 1u; + void *ptr = nullptr; + + static_cast(driverHandle->getMemoryManager())->deferAllocInUse = true; + + ze_host_mem_alloc_desc_t hostDesc = {}; + ze_result_t result = context->allocHostMem(&hostDesc, + size, alignment, &ptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_NE(nullptr, ptr); + SVMAllocsManagerFreeExtMock *memManager = reinterpret_cast(currSvmAllocsManager); + ze_memory_free_ext_desc_t memFreeDesc = {}; + memFreeDesc.freePolicy = ZE_DRIVER_MEMORY_FREE_POLICY_EXT_FLAG_DEFER_FREE; + result = context->freeMemExt(&memFreeDesc, ptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_EQ(1u, memManager->numDeferFreeAllocs()); + EXPECT_EQ(1u, memManager->deferFreeCallsMade); + static_cast(driverHandle->getMemoryManager())->deferAllocInUse = false; + + void *ptr2 = nullptr; + result = context->allocHostMem(&hostDesc, + size, alignment, &ptr2); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_NE(nullptr, ptr2); + result = context->freeMem(ptr2); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_EQ(0u, memManager->numDeferFreeAllocs()); + EXPECT_EQ(1u, memManager->deferFreeCallsMade); +} + +TEST_F(FreeExtTests, + whenallocMemFailsWithDeferredFreeAllocationThenMemoryFreed) { + size_t size = 1024; + size_t alignment = 1u; + void *ptr = nullptr; + + static_cast(driverHandle->getMemoryManager())->deferAllocInUse = true; + + ze_host_mem_alloc_desc_t hostDesc = {}; + ze_result_t result = context->allocHostMem(&hostDesc, + size, alignment, &ptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_NE(nullptr, ptr); + SVMAllocsManagerFreeExtMock *memManager = reinterpret_cast(currSvmAllocsManager); + ze_memory_free_ext_desc_t memFreeDesc = {}; + memFreeDesc.freePolicy = ZE_DRIVER_MEMORY_FREE_POLICY_EXT_FLAG_DEFER_FREE; + result = context->freeMemExt(&memFreeDesc, ptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_EQ(1u, memManager->numDeferFreeAllocs()); + EXPECT_EQ(1u, memManager->deferFreeCallsMade); + static_cast(driverHandle->getMemoryManager())->deferAllocInUse = false; + static_cast(driverHandle->getMemoryManager())->isMockHostMemoryManager = true; + static_cast(driverHandle->getMemoryManager())->forceFailureInPrimaryAllocation = true; + + void *ptr2 = nullptr; + ze_device_mem_alloc_desc_t deviceDesc = {}; + + result = context->allocHostMem(&hostDesc, + size, alignment, &ptr2); + EXPECT_EQ(ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY, result); + EXPECT_EQ(0u, memManager->numDeferFreeAllocs()); + + static_cast(driverHandle->getMemoryManager())->forceFailureInPrimaryAllocation = false; + result = context->allocHostMem(&hostDesc, + size, alignment, &ptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_NE(nullptr, ptr); + + static_cast(driverHandle->getMemoryManager())->deferAllocInUse = true; + result = context->freeMemExt(&memFreeDesc, ptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_EQ(1u, memManager->numDeferFreeAllocs()); + EXPECT_EQ(2u, memManager->deferFreeCallsMade); + static_cast(driverHandle->getMemoryManager())->deferAllocInUse = false; + + static_cast(driverHandle->getMemoryManager())->forceFailureInPrimaryAllocation = true; + result = context->allocDeviceMem(device, + &deviceDesc, + size, alignment, &ptr2); + EXPECT_EQ(ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY, result); + EXPECT_EQ(0u, memManager->numDeferFreeAllocs()); + + static_cast(driverHandle->getMemoryManager())->forceFailureInPrimaryAllocation = false; + result = context->allocHostMem(&hostDesc, + size, alignment, &ptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_NE(nullptr, ptr); + + static_cast(driverHandle->getMemoryManager())->deferAllocInUse = true; + result = context->freeMemExt(&memFreeDesc, ptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_EQ(1u, memManager->numDeferFreeAllocs()); + EXPECT_EQ(3u, memManager->deferFreeCallsMade); + static_cast(driverHandle->getMemoryManager())->deferAllocInUse = false; + + static_cast(driverHandle->getMemoryManager())->forceFailureInPrimaryAllocation = true; + result = context->allocSharedMem(device->toHandle(), + &deviceDesc, + &hostDesc, + size, alignment, &ptr2); + EXPECT_EQ(ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY, result); + EXPECT_EQ(0u, memManager->numDeferFreeAllocs()); +} + +TEST_F(FreeExtTests, + whenallocMemFailsWithDeferredFreeAllocationThenMemoryFreedAndRetrySucceeds) { + size_t size = 1024; + size_t alignment = 1u; + void *ptr = nullptr; + + static_cast(driverHandle->getMemoryManager())->deferAllocInUse = true; + + ze_host_mem_alloc_desc_t hostDesc = {}; + ze_result_t result = context->allocHostMem(&hostDesc, + size, alignment, &ptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_NE(nullptr, ptr); + SVMAllocsManagerFreeExtMock *memManager = reinterpret_cast(currSvmAllocsManager); + ze_memory_free_ext_desc_t memFreeDesc = {}; + memFreeDesc.freePolicy = ZE_DRIVER_MEMORY_FREE_POLICY_EXT_FLAG_DEFER_FREE; + result = context->freeMemExt(&memFreeDesc, ptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_EQ(1u, memManager->numDeferFreeAllocs()); + EXPECT_EQ(1u, memManager->deferFreeCallsMade); + static_cast(driverHandle->getMemoryManager())->deferAllocInUse = false; + static_cast(driverHandle->getMemoryManager())->isMockHostMemoryManager = true; + static_cast(driverHandle->getMemoryManager())->forceFailureInPrimaryAllocation = true; + static_cast(driverHandle->getMemoryManager())->singleFailureInPrimaryAllocation = true; + + void *ptr2 = nullptr; + ze_device_mem_alloc_desc_t deviceDesc = {}; + + result = context->allocHostMem(&hostDesc, + size, alignment, &ptr2); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_NE(nullptr, ptr2); + EXPECT_EQ(0u, memManager->numDeferFreeAllocs()); + + static_cast(driverHandle->getMemoryManager())->deferAllocInUse = true; + result = context->freeMemExt(&memFreeDesc, ptr2); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_EQ(1u, memManager->numDeferFreeAllocs()); + EXPECT_EQ(2u, memManager->deferFreeCallsMade); + static_cast(driverHandle->getMemoryManager())->deferAllocInUse = false; + + static_cast(driverHandle->getMemoryManager())->forceFailureInPrimaryAllocation = true; + static_cast(driverHandle->getMemoryManager())->singleFailureInPrimaryAllocation = true; + + result = context->allocDeviceMem(device, + &deviceDesc, + size, alignment, &ptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_NE(nullptr, ptr); + EXPECT_EQ(0u, memManager->numDeferFreeAllocs()); + + static_cast(driverHandle->getMemoryManager())->deferAllocInUse = true; + result = context->freeMemExt(&memFreeDesc, ptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_EQ(1u, memManager->numDeferFreeAllocs()); + EXPECT_EQ(3u, memManager->deferFreeCallsMade); + static_cast(driverHandle->getMemoryManager())->deferAllocInUse = false; + + static_cast(driverHandle->getMemoryManager())->forceFailureInPrimaryAllocation = true; + static_cast(driverHandle->getMemoryManager())->singleFailureInPrimaryAllocation = true; + + result = context->allocSharedMem(device->toHandle(), + &deviceDesc, + &hostDesc, + size, alignment, &ptr2); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_NE(nullptr, ptr2); + EXPECT_EQ(0u, memManager->numDeferFreeAllocs()); + result = context->freeMemExt(&memFreeDesc, ptr2); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_EQ(0u, memManager->numDeferFreeAllocs()); + EXPECT_EQ(4u, memManager->deferFreeCallsMade); +} + +TEST_F(FreeExtTests, + whenDestroyContextAnyRemainingDeferFreeMemoryAllocationsAreFreed) { + size_t size = 1024; + size_t alignment = 1u; + void *ptr = nullptr; + + static_cast(driverHandle->getMemoryManager())->deferAllocInUse = true; + + ze_host_mem_alloc_desc_t hostDesc = {}; + ze_result_t result = context->allocHostMem(&hostDesc, + size, alignment, &ptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_NE(nullptr, ptr); + void *ptr2 = nullptr; + result = context->allocHostMem(&hostDesc, + size, alignment, &ptr2); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_NE(nullptr, ptr2); + void *ptr3 = nullptr; + result = context->allocHostMem(&hostDesc, + size, alignment, &ptr3); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_NE(nullptr, ptr3); + SVMAllocsManagerFreeExtMock *memManager = reinterpret_cast(currSvmAllocsManager); + ze_memory_free_ext_desc_t memFreeDesc = {}; + memFreeDesc.freePolicy = ZE_DRIVER_MEMORY_FREE_POLICY_EXT_FLAG_DEFER_FREE; + result = context->freeMemExt(&memFreeDesc, ptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_EQ(1u, memManager->numDeferFreeAllocs()); + EXPECT_EQ(1u, memManager->deferFreeCallsMade); + result = context->freeMemExt(&memFreeDesc, ptr2); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_EQ(2u, memManager->numDeferFreeAllocs()); + EXPECT_EQ(2u, memManager->deferFreeCallsMade); + result = context->freeMemExt(&memFreeDesc, ptr3); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_EQ(3u, memManager->numDeferFreeAllocs()); + EXPECT_EQ(3u, memManager->deferFreeCallsMade); + static_cast(driverHandle->getMemoryManager())->deferAllocInUse = false; + context->destroy(); + context = nullptr; + EXPECT_EQ(0u, memManager->numDeferFreeAllocs()); } TEST_F(FreeExtTests, diff --git a/shared/source/memory_manager/memory_manager.cpp b/shared/source/memory_manager/memory_manager.cpp index 56145a8cf6..19535d9485 100644 --- a/shared/source/memory_manager/memory_manager.cpp +++ b/shared/source/memory_manager/memory_manager.cpp @@ -780,6 +780,19 @@ void MemoryManager::waitForEnginesCompletion(GraphicsAllocation &graphicsAllocat } } +bool MemoryManager::allocInUse(GraphicsAllocation &graphicsAllocation) { + for (auto &engine : getRegisteredEngines()) { + auto osContextId = engine.osContext->getContextId(); + auto allocationTaskCount = graphicsAllocation.getTaskCount(osContextId); + if (graphicsAllocation.isUsedByOsContext(osContextId) && + engine.commandStreamReceiver->getTagAllocation() != nullptr && + allocationTaskCount > *engine.commandStreamReceiver->getTagAddress()) { + return true; + } + } + return false; +} + void MemoryManager::cleanTemporaryAllocationListOnAllEngines(bool waitForCompletion) { for (auto &engine : getRegisteredEngines()) { auto csr = engine.commandStreamReceiver; diff --git a/shared/source/memory_manager/memory_manager.h b/shared/source/memory_manager/memory_manager.h index 8b3fd222a0..0d58f9f3e6 100644 --- a/shared/source/memory_manager/memory_manager.h +++ b/shared/source/memory_manager/memory_manager.h @@ -166,6 +166,7 @@ class MemoryManager { void waitForDeletions(); MOCKABLE_VIRTUAL void waitForEnginesCompletion(GraphicsAllocation &graphicsAllocation); + MOCKABLE_VIRTUAL bool allocInUse(GraphicsAllocation &graphicsAllocation); void cleanTemporaryAllocationListOnAllEngines(bool waitForCompletion); bool isAsyncDeleterEnabled() const; diff --git a/shared/source/memory_manager/unified_memory_manager.cpp b/shared/source/memory_manager/unified_memory_manager.cpp index 899798bec6..1e2acfa609 100644 --- a/shared/source/memory_manager/unified_memory_manager.cpp +++ b/shared/source/memory_manager/unified_memory_manager.cpp @@ -62,7 +62,7 @@ void SVMAllocsManager::SvmAllocationCache::trim(SVMAllocsManager *svmAllocsManag for (auto &cachedAllocationInfo : this->allocations) { SvmAllocationData *svmData = svmAllocsManager->getSVMAlloc(cachedAllocationInfo.allocation); DEBUG_BREAK_IF(nullptr == svmData); - svmAllocsManager->freeSVMAllocImpl(cachedAllocationInfo.allocation, false, svmData); + svmAllocsManager->freeSVMAllocImpl(cachedAllocationInfo.allocation, FreePolicyType::POLICY_NONE, svmData); } this->allocations.clear(); } @@ -391,6 +391,11 @@ SvmAllocationData *SVMAllocsManager::getSVMAlloc(const void *ptr) { return SVMAllocs.get(ptr); } +SvmAllocationData *SVMAllocsManager::getSVMDeferFreeAlloc(const void *ptr) { + std::shared_lock lock(mtx); + return SVMDeferFreeAllocs.get(ptr); +} + void SVMAllocsManager::insertSVMAlloc(const SvmAllocationData &svmAllocData) { std::unique_lock lock(mtx); SVMAllocs.insert(svmAllocData); @@ -402,6 +407,11 @@ void SVMAllocsManager::removeSVMAlloc(const SvmAllocationData &svmAllocData) { } bool SVMAllocsManager::freeSVMAlloc(void *ptr, bool blocking) { + + if (SVMDeferFreeAllocs.allocations.size() > 0) { + this->freeSVMAllocDeferImpl(); + } + SvmAllocationData *svmData = getSVMAlloc(ptr); if (svmData) { if (InternalMemoryType::DEVICE_UNIFIED_MEMORY == svmData->memoryType && @@ -409,16 +419,39 @@ bool SVMAllocsManager::freeSVMAlloc(void *ptr, bool blocking) { this->usmDeviceAllocationsCache.insert(svmData->size, ptr); return true; } - this->freeSVMAllocImpl(ptr, blocking, svmData); + if (blocking) { + this->freeSVMAllocImpl(ptr, FreePolicyType::POLICY_BLOCKING, svmData); + } else { + this->freeSVMAllocImpl(ptr, FreePolicyType::POLICY_NONE, svmData); + } return true; } return false; } -void SVMAllocsManager::freeSVMAllocImpl(void *ptr, bool blocking, SvmAllocationData *svmData) { +bool SVMAllocsManager::freeSVMAllocDefer(void *ptr) { + + if (SVMDeferFreeAllocs.allocations.size() > 0) { + this->freeSVMAllocDeferImpl(); + } + + SvmAllocationData *svmData = getSVMAlloc(ptr); + if (svmData) { + if (InternalMemoryType::DEVICE_UNIFIED_MEMORY == svmData->memoryType && + this->usmDeviceAllocationsCacheEnabled) { + this->usmDeviceAllocationsCache.insert(svmData->size, ptr); + return true; + } + this->freeSVMAllocImpl(ptr, FreePolicyType::POLICY_DEFER, svmData); + return true; + } + return false; +} + +void SVMAllocsManager::freeSVMAllocImpl(void *ptr, FreePolicyType policy, SvmAllocationData *svmData) { this->prepareIndirectAllocationForDestruction(svmData); - if (blocking) { + if (policy == FreePolicyType::POLICY_BLOCKING) { if (svmData->cpuAllocation) { this->memoryManager->waitForEnginesCompletion(*svmData->cpuAllocation); } @@ -428,8 +461,26 @@ void SVMAllocsManager::freeSVMAllocImpl(void *ptr, bool blocking, SvmAllocationD this->memoryManager->waitForEnginesCompletion(*gpuAllocation); } } + } else if (policy == FreePolicyType::POLICY_DEFER) { + if (svmData->cpuAllocation) { + if (this->memoryManager->allocInUse(*svmData->cpuAllocation)) { + if (getSVMDeferFreeAlloc(svmData) == nullptr) { + this->SVMDeferFreeAllocs.insert(*svmData); + } + return; + } + } + for (auto &gpuAllocation : svmData->gpuAllocations.getGraphicsAllocations()) { + if (gpuAllocation) { + if (this->memoryManager->allocInUse(*gpuAllocation)) { + if (getSVMDeferFreeAlloc(svmData) == nullptr) { + this->SVMDeferFreeAllocs.insert(*svmData); + } + return; + } + } + } } - auto pageFaultManager = this->memoryManager->getPageFaultManager(); if (svmData->cpuAllocation && pageFaultManager) { pageFaultManager->removeAllocation(svmData->cpuAllocation->getUnderlyingBuffer()); @@ -441,6 +492,22 @@ void SVMAllocsManager::freeSVMAllocImpl(void *ptr, bool blocking, SvmAllocationD } } +void SVMAllocsManager::freeSVMAllocDeferImpl() { + + std::vector freedPtr; + for (auto iter = SVMDeferFreeAllocs.allocations.begin(); iter != SVMDeferFreeAllocs.allocations.end(); ++iter) { + void *ptr = reinterpret_cast(iter->second.gpuAllocations.getDefaultGraphicsAllocation()->getGpuAddress()); + this->freeSVMAllocImpl(ptr, FreePolicyType::POLICY_DEFER, this->getSVMAlloc(ptr)); + + if (this->getSVMAlloc(ptr) == nullptr) { + freedPtr.push_back(ptr); + } + } + for (uint32_t i = 0; i < freedPtr.size(); ++i) { + SVMDeferFreeAllocs.allocations.erase(freedPtr[i]); + } +} + void SVMAllocsManager::trimUSMDeviceAllocCache() { this->usmDeviceAllocationsCache.trim(this); } diff --git a/shared/source/memory_manager/unified_memory_manager.h b/shared/source/memory_manager/unified_memory_manager.h index 5000860422..c8aebf27c3 100644 --- a/shared/source/memory_manager/unified_memory_manager.h +++ b/shared/source/memory_manager/unified_memory_manager.h @@ -145,6 +145,12 @@ class SVMAllocsManager { std::mutex mtx; }; + enum class FreePolicyType : uint32_t { + POLICY_NONE = 0, + POLICY_BLOCKING = 1, + POLICY_DEFER = 2 + }; + SVMAllocsManager(MemoryManager *memoryManager, bool multiOsContextSupport); MOCKABLE_VIRTUAL ~SVMAllocsManager(); void *createSVMAlloc(size_t size, @@ -163,13 +169,17 @@ class SVMAllocsManager { const UnifiedMemoryProperties &unifiedMemoryProperties); void setUnifiedAllocationProperties(GraphicsAllocation *allocation, const SvmAllocationProperties &svmProperties); SvmAllocationData *getSVMAlloc(const void *ptr); + SvmAllocationData *getSVMDeferFreeAlloc(const void *ptr); MOCKABLE_VIRTUAL bool freeSVMAlloc(void *ptr, bool blocking); - MOCKABLE_VIRTUAL void freeSVMAllocImpl(void *ptr, bool blocking, SvmAllocationData *svmData); + MOCKABLE_VIRTUAL bool freeSVMAllocDefer(void *ptr); + MOCKABLE_VIRTUAL void freeSVMAllocDeferImpl(); + MOCKABLE_VIRTUAL void freeSVMAllocImpl(void *ptr, FreePolicyType policy, SvmAllocationData *svmData); bool freeSVMAlloc(void *ptr) { return freeSVMAlloc(ptr, false); } void trimUSMDeviceAllocCache(); void insertSVMAlloc(const SvmAllocationData &svmData); void removeSVMAlloc(const SvmAllocationData &svmData); size_t getNumAllocs() const { return SVMAllocs.getNumAllocs(); } + MOCKABLE_VIRTUAL size_t getNumDeferFreeAllocs() const { return SVMDeferFreeAllocs.getNumAllocs(); } MapBasedAllocationTracker *getSVMAllocs() { return &SVMAllocs; } MOCKABLE_VIRTUAL void insertSvmMapOperation(void *regionSvmPtr, size_t regionSize, void *baseSvmPtr, size_t offset, bool readOnlyMap); @@ -206,6 +216,7 @@ class SVMAllocsManager { MapBasedAllocationTracker SVMAllocs; MapOperationsTracker svmMapOperations; + MapBasedAllocationTracker SVMDeferFreeAllocs; MemoryManager *memoryManager; std::shared_mutex mtx; std::mutex mtxForIndirectAccess; diff --git a/shared/test/common/mocks/mock_memory_manager.cpp b/shared/test/common/mocks/mock_memory_manager.cpp index 97fed54709..b56f948ea2 100644 --- a/shared/test/common/mocks/mock_memory_manager.cpp +++ b/shared/test/common/mocks/mock_memory_manager.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2022 Intel Corporation + * Copyright (C) 2018-2023 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -44,6 +44,9 @@ GraphicsAllocation *MockMemoryManager::allocateGraphicsMemoryWithProperties(cons if (isMockHostMemoryManager) { allocateGraphicsMemoryWithPropertiesCount++; if (forceFailureInPrimaryAllocation) { + if (singleFailureInPrimaryAllocation) { + forceFailureInPrimaryAllocation = false; + } return nullptr; } return NEO::MemoryManager::allocateGraphicsMemoryWithProperties(properties); diff --git a/shared/test/common/mocks/mock_memory_manager.h b/shared/test/common/mocks/mock_memory_manager.h index 7b8fc8d320..b0dd8baa22 100644 --- a/shared/test/common/mocks/mock_memory_manager.h +++ b/shared/test/common/mocks/mock_memory_manager.h @@ -114,6 +114,14 @@ class MockMemoryManager : public MemoryManagerCreate { OsAgnosticMemoryManager::unlockResourceImpl(gfxAllocation); } + bool allocInUse(GraphicsAllocation &graphicsAllocation) override { + allocInUseCalled++; + if (deferAllocInUse) { + return true; + } + return false; + } + void waitForEnginesCompletion(GraphicsAllocation &graphicsAllocation) override { waitForEnginesCompletionCalled++; if (waitAllocations.get()) { @@ -223,6 +231,7 @@ class MockMemoryManager : public MemoryManagerCreate { uint32_t unlockResourceCalled = 0u; uint32_t lockResourceCalled = 0u; uint32_t createGraphicsAllocationFromExistingStorageCalled = 0u; + uint32_t allocInUseCalled = 0u; int32_t overrideAllocateAsPackReturn = -1; std::vector allocationsFromExistingStorage{}; AllocationData alignAllocationData; @@ -252,8 +261,10 @@ class MockMemoryManager : public MemoryManagerCreate { bool cpuCopyRequired = false; bool forceCompressed = false; bool forceFailureInPrimaryAllocation = false; + bool singleFailureInPrimaryAllocation = false; bool forceFailureInAllocationWithHostPointer = false; bool isMockHostMemoryManager = false; + bool deferAllocInUse = false; bool isMockEventPoolCreateMemoryManager = false; bool limitedGPU = false; bool returnFakeAllocation = false; diff --git a/shared/test/unit_test/memory_manager/unified_memory_manager_cache_tests.cpp b/shared/test/unit_test/memory_manager/unified_memory_manager_cache_tests.cpp index e3aeeafc6e..90570dcab5 100644 --- a/shared/test/unit_test/memory_manager/unified_memory_manager_cache_tests.cpp +++ b/shared/test/unit_test/memory_manager/unified_memory_manager_cache_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2022 Intel Corporation + * Copyright (C) 2022-2023 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -288,20 +288,23 @@ TEST(SvmDeviceAllocationCacheTest, givenDeviceOutOfMemoryWhenAllocatingThenCache auto svmManager = std::make_unique(memoryManager, false); ASSERT_TRUE(svmManager->usmDeviceAllocationsCacheEnabled); - memoryManager->capacity = MemoryConstants::pageSize64k * 2; + memoryManager->capacity = MemoryConstants::pageSize64k * 3; SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::DEVICE_UNIFIED_MEMORY, rootDeviceIndices, deviceBitfields); unifiedMemoryProperties.device = device; auto allocationInCache = svmManager->createUnifiedMemoryAllocation(MemoryConstants::pageSize64k, unifiedMemoryProperties); auto allocationInCache2 = svmManager->createUnifiedMemoryAllocation(MemoryConstants::pageSize64k, unifiedMemoryProperties); + auto allocationInCache3 = svmManager->createUnifiedMemoryAllocation(MemoryConstants::pageSize64k, unifiedMemoryProperties); ASSERT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), 0u); svmManager->freeSVMAlloc(allocationInCache); svmManager->freeSVMAlloc(allocationInCache2); + svmManager->freeSVMAllocDefer(allocationInCache3); - ASSERT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), 2u); + ASSERT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), 3u); ASSERT_NE(svmManager->getSVMAlloc(allocationInCache), nullptr); ASSERT_NE(svmManager->getSVMAlloc(allocationInCache2), nullptr); + ASSERT_NE(svmManager->getSVMAlloc(allocationInCache3), nullptr); auto ptr = svmManager->createUnifiedMemoryAllocation(MemoryConstants::pageSize64k * 2, unifiedMemoryProperties); EXPECT_NE(ptr, nullptr); EXPECT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), 0u); diff --git a/shared/test/unit_test/memory_manager/unified_memory_manager_tests.cpp b/shared/test/unit_test/memory_manager/unified_memory_manager_tests.cpp index 9d1b93a164..02f53cd6ae 100644 --- a/shared/test/unit_test/memory_manager/unified_memory_manager_tests.cpp +++ b/shared/test/unit_test/memory_manager/unified_memory_manager_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2022 Intel Corporation + * Copyright (C) 2022-2023 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -61,6 +61,65 @@ TEST_F(SVMLocalMemoryAllocatorTest, whenFreeSharedAllocWithOffsetPointerThenReso EXPECT_EQ(pageFaultMemoryData, mockPageFaultManager->memoryData.end()); } +TEST_F(SVMLocalMemoryAllocatorTest, whenFreeSVMAllocIsDeferredThenFreedSubsequently) { + DebugManagerStateRestore restore; + DebugManager.flags.EnableLocalMemory.set(1); + + std::unique_ptr deviceFactory(new UltDeviceFactory(1, 2)); + auto device = deviceFactory->rootDevices[0]; + auto svmManager = std::make_unique(device->getMemoryManager(), false); + auto csr = std::make_unique(*device->getExecutionEnvironment(), device->getRootDeviceIndex(), device->getDeviceBitfield()); + csr->setupContext(*device->getDefaultEngine().osContext); + void *cmdQ = reinterpret_cast(0x12345); + + SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::SHARED_UNIFIED_MEMORY, rootDeviceIndices, deviceBitfields); + unifiedMemoryProperties.device = device; + + auto ptr = svmManager->createSharedUnifiedMemoryAllocation(4096, unifiedMemoryProperties, &cmdQ); + EXPECT_NE(nullptr, ptr); + + auto memoryManager = static_cast(device->getMemoryManager()); + memoryManager->deferAllocInUse = true; + svmManager->freeSVMAllocDefer(ptr); + memoryManager->deferAllocInUse = false; + svmManager->freeSVMAllocDefer(ptr); + ASSERT_EQ(svmManager->getSVMAlloc(ptr), nullptr); +} + +TEST_F(SVMLocalMemoryAllocatorTest, whenMultipleFreeSVMAllocDeferredThenFreedSubsequently) { + + std::unique_ptr deviceFactory(new UltDeviceFactory(1, 2)); + auto device = deviceFactory->rootDevices[0]; + auto svmManager = std::make_unique(device->getMemoryManager(), false); + auto csr = std::make_unique(*device->getExecutionEnvironment(), device->getRootDeviceIndex(), device->getDeviceBitfield()); + csr->setupContext(*device->getDefaultEngine().osContext); + + SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::DEVICE_UNIFIED_MEMORY, rootDeviceIndices, deviceBitfields); + unifiedMemoryProperties.device = device; + + auto ptr = svmManager->createUnifiedMemoryAllocation(4096, unifiedMemoryProperties); + EXPECT_NE(nullptr, ptr); + auto ptr1 = svmManager->createUnifiedMemoryAllocation(4096, unifiedMemoryProperties); + EXPECT_NE(nullptr, ptr1); + auto ptr2 = svmManager->createUnifiedMemoryAllocation(4096, unifiedMemoryProperties); + EXPECT_NE(nullptr, ptr2); + + auto memoryManager = static_cast(device->getMemoryManager()); + memoryManager->deferAllocInUse = true; + svmManager->freeSVMAllocDefer(ptr); + ASSERT_NE(svmManager->getSVMAlloc(ptr), nullptr); + EXPECT_EQ(1ul, svmManager->getNumDeferFreeAllocs()); + svmManager->freeSVMAllocDefer(ptr1); + ASSERT_NE(svmManager->getSVMAlloc(ptr1), nullptr); + EXPECT_EQ(2ul, svmManager->getNumDeferFreeAllocs()); + memoryManager->deferAllocInUse = false; + svmManager->freeSVMAlloc(ptr2, true); + EXPECT_EQ(0ul, svmManager->getNumDeferFreeAllocs()); + ASSERT_EQ(svmManager->getSVMAlloc(ptr), nullptr); + ASSERT_EQ(svmManager->getSVMAlloc(ptr1), nullptr); + ASSERT_EQ(svmManager->getSVMAlloc(ptr2), nullptr); +} + TEST_F(SVMLocalMemoryAllocatorTest, givenKmdMigratedSharedAllocationWhenPrefetchMemoryIsCalledForMultipleActivePartitionsThenPrefetchAllocationToSubDevices) { DebugManagerStateRestore restore; DebugManager.flags.UseKmdMigration.set(1);