From 1252b10ba9fe6d6c31f44b0168876d16092c64a4 Mon Sep 17 00:00:00 2001 From: Dominik Dabek Date: Wed, 4 Dec 2024 07:07:13 +0000 Subject: [PATCH] fix: limit usm device reuse based on used memory Calculate available memory for usm device reuse based as (total device memory - used memory) * fraction for reuse. Use sys mem allocs for devices without local memory. Related-To: NEO-12902 Signed-off-by: Dominik Dabek --- shared/source/device/device.h | 2 +- .../memory_manager/unified_memory_manager.cpp | 10 +- .../memory_manager/unified_memory_manager.h | 2 + shared/test/common/mocks/mock_device.h | 10 +- .../test/common/mocks/mock_memory_manager.h | 1 + .../unified_memory_manager_cache_tests.cpp | 119 ++++++++++++++++-- 6 files changed, 132 insertions(+), 12 deletions(-) diff --git a/shared/source/device/device.h b/shared/source/device/device.h index 5ff0e4fb85..191511490b 100644 --- a/shared/source/device/device.h +++ b/shared/source/device/device.h @@ -181,7 +181,7 @@ class Device : public ReferenceTrackedObject { void initializeRayTracing(uint32_t maxBvhLevels); void allocateRTDispatchGlobals(uint32_t maxBvhLevels); - uint64_t getGlobalMemorySize(uint32_t deviceBitfield) const; + MOCKABLE_VIRTUAL uint64_t getGlobalMemorySize(uint32_t deviceBitfield) const; const std::vector &getSubDevices() const { return subdevices; } bool getUuid(std::array &uuid); void generateUuid(std::array &uuid); diff --git a/shared/source/memory_manager/unified_memory_manager.cpp b/shared/source/memory_manager/unified_memory_manager.cpp index 623fe99545..64b7ace5e8 100644 --- a/shared/source/memory_manager/unified_memory_manager.cpp +++ b/shared/source/memory_manager/unified_memory_manager.cpp @@ -62,7 +62,13 @@ bool SVMAllocsManager::SvmAllocationCache::insert(size_t size, void *ptr, SvmAll if (auto device = svmData->device) { auto lock = device->obtainAllocationsReuseLock(); const auto usedSize = device->getAllocationsSavedForReuseSize(); - if (size + usedSize > this->maxSize) { + uint64_t availableMemory = device->getGlobalMemorySize(static_cast(device->getDeviceBitfield().to_ulong())); + availableMemory -= memoryManager->getUsedLocalMemorySize(device->getRootDeviceIndex()); + if (!localMemorySupported) { + availableMemory -= memoryManager->getUsedSystemMemorySize(); + } + const auto availableMemoryForReuse = static_cast(availableMemory * fractionOfAvailableMemoryForRecycling); + if (size + usedSize > availableMemoryForReuse) { return false; } device->recordAllocationSaveForReuse(size); @@ -756,6 +762,8 @@ void SVMAllocsManager::initUsmDeviceAllocationsCache(Device &device) { if (this->usmDeviceAllocationsCache.maxSize > 0u) { this->usmDeviceAllocationsCache.allocations.reserve(128u); } + this->usmDeviceAllocationsCache.fractionOfAvailableMemoryForRecycling = fractionOfTotalMemoryForRecycling; + this->usmDeviceAllocationsCache.localMemorySupported = memoryManager->isLocalMemorySupported(device.getRootDeviceIndex()); } void SVMAllocsManager::initUsmHostAllocationsCache() { diff --git a/shared/source/memory_manager/unified_memory_manager.h b/shared/source/memory_manager/unified_memory_manager.h index c7e5e8d647..11bb0491ed 100644 --- a/shared/source/memory_manager/unified_memory_manager.h +++ b/shared/source/memory_manager/unified_memory_manager.h @@ -172,6 +172,8 @@ class SVMAllocsManager { std::vector allocations; std::mutex mtx; + bool localMemorySupported = true; + double fractionOfAvailableMemoryForRecycling = 0.0; size_t maxSize = 0; }; diff --git a/shared/test/common/mocks/mock_device.h b/shared/test/common/mocks/mock_device.h index c10e2b14e2..f87e9c3254 100644 --- a/shared/test/common/mocks/mock_device.h +++ b/shared/test/common/mocks/mock_device.h @@ -63,7 +63,6 @@ class MockDevice : public RootDevice { using Device::deviceInfo; using Device::executionEnvironment; using Device::generateUuidFromPciBusInfo; - using Device::getGlobalMemorySize; using Device::initializeCaps; using Device::preemptionMode; using Device::regularEngineGroups; @@ -99,6 +98,13 @@ class MockDevice : public RootDevice { performanceCounters = std::move(perfCounters); } + uint64_t getGlobalMemorySize(uint32_t deviceBitfield) const override { + if (callBaseGetGlobalMemorySize) { + return Device::getGlobalMemorySize(deviceBitfield); + } + return getGlobalMemorySizeReturn; + } + size_t getMaxParameterSizeFromIGC() const override { if (callBaseGetMaxParameterSizeFromIGC) { return Device::getMaxParameterSizeFromIGC(); @@ -173,6 +179,8 @@ class MockDevice : public RootDevice { bool callBaseGetMaxParameterSizeFromIGC = false; bool callBaseVerifyAdapterLuid = true; + bool callBaseGetGlobalMemorySize = true; + uint64_t getGlobalMemorySizeReturn = 0u; bool verifyAdapterLuidReturnValue = true; size_t maxParameterSizeFromIGC = 0u; bool rtDispatchGlobalsForceAllocation = true; diff --git a/shared/test/common/mocks/mock_memory_manager.h b/shared/test/common/mocks/mock_memory_manager.h index c254a748cb..92d401cb4b 100644 --- a/shared/test/common/mocks/mock_memory_manager.h +++ b/shared/test/common/mocks/mock_memory_manager.h @@ -63,6 +63,7 @@ class MockMemoryManager : public MemoryManagerCreate { using MemoryManager::localMemorySupported; using MemoryManager::reservedMemory; using MemoryManager::secondaryEngines; + using MemoryManager::sysMemAllocsSize; static constexpr osHandle invalidSharedHandle = -1; static const unsigned int moduleId; diff --git a/shared/test/unit_test/memory_manager/unified_memory_manager_cache_tests.cpp b/shared/test/unit_test/memory_manager/unified_memory_manager_cache_tests.cpp index 7ef47c3ae0..63dbccbf67 100644 --- a/shared/test/unit_test/memory_manager/unified_memory_manager_cache_tests.cpp +++ b/shared/test/unit_test/memory_manager/unified_memory_manager_cache_tests.cpp @@ -231,25 +231,34 @@ TEST_F(SvmDeviceAllocationCacheTest, givenAllocationCacheEnabledWhenInitializedT auto expectedMaxSize = static_cast(device->getGlobalMemorySize(static_cast(mockDeviceBitfield.to_ulong())) * 0.02); EXPECT_EQ(expectedMaxSize, svmManager->usmDeviceAllocationsCache.maxSize); + + EXPECT_EQ(0.02, svmManager->usmDeviceAllocationsCache.fractionOfAvailableMemoryForRecycling); + EXPECT_EQ(device->getMemoryManager()->isLocalMemorySupported(device->getRootDeviceIndex()), svmManager->usmDeviceAllocationsCache.localMemorySupported); } -TEST_F(SvmDeviceAllocationCacheTest, givenAllocationCacheEnabledWhenFreeingDeviceAllocationThenItIsPutIntoCacheOnlyIfMaxSizeWillNotBeExceeded) { +TEST_F(SvmDeviceAllocationCacheTest, givenAllocationCacheEnabledAndLocalMemorySupportedWhenFreeingDeviceAllocationThenItIsPutIntoCacheOnlyIfMaxSizeWillNotBeExceeded) { std::unique_ptr deviceFactory(new UltDeviceFactory(1, 1)); RootDeviceIndicesContainer rootDeviceIndices = {mockRootDeviceIndex}; std::map deviceBitfields{{mockRootDeviceIndex, mockDeviceBitfield}}; DebugManagerStateRestore restore; - debugManager.flags.ExperimentalEnableDeviceAllocationCache.set(1); + debugManager.flags.ExperimentalEnableDeviceAllocationCache.set(2); auto device = deviceFactory->rootDevices[0]; + auto mockMemoryManager = reinterpret_cast(device->getMemoryManager()); + mockMemoryManager->localMemorySupported[device->getRootDeviceIndex()] = true; + + constexpr auto allocationSize = MemoryConstants::pageSize64k; + device->callBaseGetGlobalMemorySize = false; + device->getGlobalMemorySizeReturn = allocationSize * 100; + auto svmManager = std::make_unique(device->getMemoryManager(), false); svmManager->initUsmAllocationsCaches(*device); ASSERT_TRUE(svmManager->usmDeviceAllocationsCacheEnabled); - constexpr auto allocationSize = MemoryConstants::pageSize64k; - svmManager->usmDeviceAllocationsCache.maxSize = allocationSize; - SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::deviceUnifiedMemory, 1, rootDeviceIndices, deviceBitfields); unifiedMemoryProperties.device = device; { + mockMemoryManager->sysMemAllocsSize = allocationSize * 50; + mockMemoryManager->localMemAllocsSize[device->getRootDeviceIndex()] = 0u; auto allocation = svmManager->createUnifiedMemoryAllocation(allocationSize, unifiedMemoryProperties); ASSERT_NE(allocation, nullptr); auto allocation2 = svmManager->createUnifiedMemoryAllocation(allocationSize, unifiedMemoryProperties); @@ -261,6 +270,8 @@ TEST_F(SvmDeviceAllocationCacheTest, givenAllocationCacheEnabledWhenFreeingDevic EXPECT_EQ(1u, svmManager->usmDeviceAllocationsCache.allocations.size()); EXPECT_EQ(allocationSize, device->getAllocationsSavedForReuseSize()); + mockMemoryManager->localMemAllocsSize[device->getRootDeviceIndex()] = allocationSize * 50; + svmManager->freeSVMAlloc(allocation2); EXPECT_EQ(1u, svmManager->usmDeviceAllocationsCache.allocations.size()); EXPECT_EQ(allocationSize, device->getAllocationsSavedForReuseSize()); @@ -277,6 +288,8 @@ TEST_F(SvmDeviceAllocationCacheTest, givenAllocationCacheEnabledWhenFreeingDevic EXPECT_EQ(0u, device->getAllocationsSavedForReuseSize()); } { + mockMemoryManager->sysMemAllocsSize = allocationSize * 50; + mockMemoryManager->localMemAllocsSize[device->getRootDeviceIndex()] = 0u; auto allocation = svmManager->createUnifiedMemoryAllocation(allocationSize, unifiedMemoryProperties); ASSERT_NE(allocation, nullptr); auto allocation2 = svmManager->createUnifiedMemoryAllocation(allocationSize, unifiedMemoryProperties); @@ -288,6 +301,94 @@ TEST_F(SvmDeviceAllocationCacheTest, givenAllocationCacheEnabledWhenFreeingDevic EXPECT_EQ(1u, svmManager->usmDeviceAllocationsCache.allocations.size()); EXPECT_EQ(allocationSize, device->getAllocationsSavedForReuseSize()); + mockMemoryManager->localMemAllocsSize[device->getRootDeviceIndex()] = allocationSize * 50; + + svmManager->freeSVMAllocDefer(allocation2); + EXPECT_EQ(1u, svmManager->usmDeviceAllocationsCache.allocations.size()); + EXPECT_EQ(allocationSize, device->getAllocationsSavedForReuseSize()); + + auto recycledAllocation = svmManager->createUnifiedMemoryAllocation(allocationSize, unifiedMemoryProperties); + EXPECT_EQ(recycledAllocation, allocation); + EXPECT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), 0u); + EXPECT_EQ(0u, device->getAllocationsSavedForReuseSize()); + + svmManager->freeSVMAllocDefer(recycledAllocation); + + svmManager->trimUSMDeviceAllocCache(); + EXPECT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), 0u); + EXPECT_EQ(0u, device->getAllocationsSavedForReuseSize()); + } +} + +TEST_F(SvmDeviceAllocationCacheTest, givenAllocationCacheEnabledAndLocalMemoryNotSupportedWhenFreeingDeviceAllocationThenItIsPutIntoCacheOnlyIfMaxSizeWillNotBeExceeded) { + std::unique_ptr deviceFactory(new UltDeviceFactory(1, 1)); + RootDeviceIndicesContainer rootDeviceIndices = {mockRootDeviceIndex}; + std::map deviceBitfields{{mockRootDeviceIndex, mockDeviceBitfield}}; + DebugManagerStateRestore restore; + debugManager.flags.ExperimentalEnableDeviceAllocationCache.set(2); + auto device = deviceFactory->rootDevices[0]; + auto mockMemoryManager = reinterpret_cast(device->getMemoryManager()); + mockMemoryManager->localMemorySupported[device->getRootDeviceIndex()] = false; + + constexpr auto allocationSize = MemoryConstants::pageSize64k; + device->callBaseGetGlobalMemorySize = false; + device->getGlobalMemorySizeReturn = allocationSize * 100; + + auto svmManager = std::make_unique(device->getMemoryManager(), false); + svmManager->initUsmAllocationsCaches(*device); + ASSERT_TRUE(svmManager->usmDeviceAllocationsCacheEnabled); + + SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::deviceUnifiedMemory, 1, rootDeviceIndices, deviceBitfields); + unifiedMemoryProperties.device = device; + { + mockMemoryManager->sysMemAllocsSize = 0u; + mockMemoryManager->localMemAllocsSize[device->getRootDeviceIndex()] = 0u; + auto allocation = svmManager->createUnifiedMemoryAllocation(allocationSize, unifiedMemoryProperties); + ASSERT_NE(allocation, nullptr); + auto allocation2 = svmManager->createUnifiedMemoryAllocation(allocationSize, unifiedMemoryProperties); + ASSERT_NE(allocation2, nullptr); + EXPECT_EQ(0u, svmManager->usmDeviceAllocationsCache.allocations.size()); + EXPECT_EQ(0u, device->getAllocationsSavedForReuseSize()); + + svmManager->freeSVMAlloc(allocation); + EXPECT_EQ(1u, svmManager->usmDeviceAllocationsCache.allocations.size()); + EXPECT_EQ(allocationSize, device->getAllocationsSavedForReuseSize()); + + mockMemoryManager->sysMemAllocsSize = allocationSize * 25; + mockMemoryManager->localMemAllocsSize[device->getRootDeviceIndex()] = allocationSize * 25; + + svmManager->freeSVMAlloc(allocation2); + EXPECT_EQ(1u, svmManager->usmDeviceAllocationsCache.allocations.size()); + EXPECT_EQ(allocationSize, device->getAllocationsSavedForReuseSize()); + + auto recycledAllocation = svmManager->createUnifiedMemoryAllocation(allocationSize, unifiedMemoryProperties); + EXPECT_EQ(recycledAllocation, allocation); + EXPECT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), 0u); + EXPECT_EQ(0u, device->getAllocationsSavedForReuseSize()); + + svmManager->freeSVMAlloc(recycledAllocation); + + svmManager->trimUSMDeviceAllocCache(); + EXPECT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), 0u); + EXPECT_EQ(0u, device->getAllocationsSavedForReuseSize()); + } + { + mockMemoryManager->sysMemAllocsSize = 0u; + mockMemoryManager->localMemAllocsSize[device->getRootDeviceIndex()] = 0u; + auto allocation = svmManager->createUnifiedMemoryAllocation(allocationSize, unifiedMemoryProperties); + ASSERT_NE(allocation, nullptr); + auto allocation2 = svmManager->createUnifiedMemoryAllocation(allocationSize, unifiedMemoryProperties); + ASSERT_NE(allocation2, nullptr); + EXPECT_EQ(0u, svmManager->usmDeviceAllocationsCache.allocations.size()); + EXPECT_EQ(0u, device->getAllocationsSavedForReuseSize()); + + svmManager->freeSVMAllocDefer(allocation); + EXPECT_EQ(1u, svmManager->usmDeviceAllocationsCache.allocations.size()); + EXPECT_EQ(allocationSize, device->getAllocationsSavedForReuseSize()); + + mockMemoryManager->sysMemAllocsSize = allocationSize * 25; + mockMemoryManager->localMemAllocsSize[device->getRootDeviceIndex()] = allocationSize * 25; + svmManager->freeSVMAllocDefer(allocation2); EXPECT_EQ(1u, svmManager->usmDeviceAllocationsCache.allocations.size()); EXPECT_EQ(allocationSize, device->getAllocationsSavedForReuseSize()); @@ -312,6 +413,10 @@ TEST_F(SvmDeviceAllocationCacheTest, givenAllocationCacheEnabledAndMultipleSVMMa DebugManagerStateRestore restore; debugManager.flags.ExperimentalEnableDeviceAllocationCache.set(1); auto device = deviceFactory->rootDevices[0]; + + constexpr auto allocationSize = MemoryConstants::pageSize64k; + device->callBaseGetGlobalMemorySize = false; + device->getGlobalMemorySizeReturn = allocationSize * 100; auto svmManager = std::make_unique(device->getMemoryManager(), false); auto secondSvmManager = std::make_unique(device->getMemoryManager(), false); svmManager->initUsmAllocationsCaches(*device); @@ -319,10 +424,6 @@ TEST_F(SvmDeviceAllocationCacheTest, givenAllocationCacheEnabledAndMultipleSVMMa ASSERT_TRUE(svmManager->usmDeviceAllocationsCacheEnabled); ASSERT_TRUE(secondSvmManager->usmDeviceAllocationsCacheEnabled); - constexpr auto allocationSize = MemoryConstants::pageSize64k; - svmManager->usmDeviceAllocationsCache.maxSize = allocationSize; - secondSvmManager->usmDeviceAllocationsCache.maxSize = allocationSize; - SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::deviceUnifiedMemory, 1, rootDeviceIndices, deviceBitfields); unifiedMemoryProperties.device = device; {