From 752f313808ac2d9d8b9dfa421744bdaa6363b3d4 Mon Sep 17 00:00:00 2001 From: Dominik Dabek Date: Mon, 30 Sep 2024 18:38:24 +0000 Subject: [PATCH] fix: limit allocation cache memory wastage Allocations over a certain size will be checked for memory utilization when chosen for reuse. If utilization is below a threshold, they will not be reused. Related-To: NEO-6893 Signed-off-by: Dominik Dabek --- .../memory_manager/unified_memory_manager.cpp | 11 ++ .../memory_manager/unified_memory_manager.h | 4 + .../unified_memory_manager_cache_tests.cpp | 158 +++++++++++++----- 3 files changed, 135 insertions(+), 38 deletions(-) diff --git a/shared/source/memory_manager/unified_memory_manager.cpp b/shared/source/memory_manager/unified_memory_manager.cpp index f3382f0c35..413005ba89 100644 --- a/shared/source/memory_manager/unified_memory_manager.cpp +++ b/shared/source/memory_manager/unified_memory_manager.cpp @@ -53,11 +53,22 @@ bool SVMAllocsManager::SvmAllocationCache::insert(size_t size, void *ptr) { return true; } +bool SVMAllocsManager::SvmAllocationCache::allocUtilizationAllows(size_t requestedSize, size_t reuseCandidateSize) { + if (reuseCandidateSize >= SvmAllocationCache::minimalSizeToCheckUtilization) { + const auto allocUtilization = static_cast(requestedSize) / reuseCandidateSize; + return allocUtilization >= SvmAllocationCache::minimalAllocUtilization; + } + return true; +} + void *SVMAllocsManager::SvmAllocationCache::get(size_t size, const UnifiedMemoryProperties &unifiedMemoryProperties, SVMAllocsManager *svmAllocsManager) { std::lock_guard lock(this->mtx); for (auto allocationIter = std::lower_bound(allocations.begin(), allocations.end(), size); allocationIter != allocations.end(); ++allocationIter) { + if (false == allocUtilizationAllows(size, allocationIter->allocationSize)) { + break; + } void *allocationPtr = allocationIter->allocation; SvmAllocationData *svmAllocData = svmAllocsManager->getSVMAlloc(allocationPtr); UNRECOVERABLE_IF(!svmAllocData); diff --git a/shared/source/memory_manager/unified_memory_manager.h b/shared/source/memory_manager/unified_memory_manager.h index 76789958a9..eef2eaa3eb 100644 --- a/shared/source/memory_manager/unified_memory_manager.h +++ b/shared/source/memory_manager/unified_memory_manager.h @@ -7,6 +7,7 @@ #pragma once #include "shared/source/command_stream/task_count_helper.h" +#include "shared/source/helpers/constants.h" #include "shared/source/helpers/device_bitfield.h" #include "shared/source/memory_manager/multi_graphics_allocation.h" #include "shared/source/memory_manager/residency_container.h" @@ -157,8 +158,11 @@ class SVMAllocsManager { struct SvmAllocationCache { bool insert(size_t size, void *); + static bool allocUtilizationAllows(size_t requestedSize, size_t reuseCandidateSize); void *get(size_t size, const UnifiedMemoryProperties &unifiedMemoryProperties, SVMAllocsManager *svmAllocsManager); void trim(SVMAllocsManager *svmAllocsManager); + static constexpr size_t minimalSizeToCheckUtilization = 4 * MemoryConstants::pageSize64k; + static constexpr double minimalAllocUtilization = 0.5; std::vector allocations; std::mutex mtx; size_t maxSize = 0; diff --git a/shared/test/unit_test/memory_manager/unified_memory_manager_cache_tests.cpp b/shared/test/unit_test/memory_manager/unified_memory_manager_cache_tests.cpp index ea94ea0f68..af3b195ef1 100644 --- a/shared/test/unit_test/memory_manager/unified_memory_manager_cache_tests.cpp +++ b/shared/test/unit_test/memory_manager/unified_memory_manager_cache_tests.cpp @@ -88,6 +88,17 @@ TEST(SortedVectorBasedAllocationTrackerTests, givenSortedVectorBasedAllocationTr EXPECT_EQ(data1->device, addr1); } +using SvmAllocationCacheSimpleTest = ::testing::Test; + +TEST(SvmAllocationCacheSimpleTest, givenDifferentSizesWhenCheckingIfAllocUtilizationAllowedThenReturnCorrectValue) { + static constexpr size_t allocationSizeBasis = SVMAllocsManager::SvmAllocationCache::minimalSizeToCheckUtilization; + EXPECT_TRUE(SVMAllocsManager::SvmAllocationCache::allocUtilizationAllows(1u, allocationSizeBasis - 1)); + EXPECT_TRUE(SVMAllocsManager::SvmAllocationCache::allocUtilizationAllows(allocationSizeBasis - 1, allocationSizeBasis - 1)); + EXPECT_FALSE(SVMAllocsManager::SvmAllocationCache::allocUtilizationAllows(allocationSizeBasis / 2 - 1, allocationSizeBasis)); + EXPECT_TRUE(SVMAllocsManager::SvmAllocationCache::allocUtilizationAllows(allocationSizeBasis / 2, allocationSizeBasis)); + EXPECT_TRUE(SVMAllocsManager::SvmAllocationCache::allocUtilizationAllows(allocationSizeBasis, allocationSizeBasis)); +} + struct SvmAllocationCacheTestFixture { SvmAllocationCacheTestFixture() : executionEnvironment(defaultHwInfo.get()) {} void setUp() { @@ -98,7 +109,7 @@ struct SvmAllocationCacheTestFixture { } void tearDown() { } - + static constexpr size_t allocationSizeBasis = MemoryConstants::pageSize64k; MockExecutionEnvironment executionEnvironment; }; @@ -155,15 +166,14 @@ TEST_F(SvmDeviceAllocationCacheTest, givenAllocationCacheEnabledWhenFreeingDevic ASSERT_TRUE(svmManager->usmDeviceAllocationsCacheEnabled); svmManager->usmDeviceAllocationsCache.maxSize = 1 * MemoryConstants::gigaByte; - constexpr auto allocationSizeBasis = MemoryConstants::pageSize64k; auto testDataset = std::vector( {{1u, nullptr}, - {(allocationSizeBasis << 0) - 1, nullptr}, - {(allocationSizeBasis << 0), nullptr}, - {(allocationSizeBasis << 0) + 1, nullptr}, - {(allocationSizeBasis << 1) - 1, nullptr}, - {(allocationSizeBasis << 1), nullptr}, - {(allocationSizeBasis << 1) + 1, nullptr}}); + {(allocationSizeBasis * 1) - 1, nullptr}, + {(allocationSizeBasis * 1), nullptr}, + {(allocationSizeBasis * 1) + 1, nullptr}, + {(allocationSizeBasis * 2) - 1, nullptr}, + {(allocationSizeBasis * 2), nullptr}, + {(allocationSizeBasis * 2) + 1, nullptr}}); SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::deviceUnifiedMemory, 1, rootDeviceIndices, deviceBitfields); unifiedMemoryProperties.device = device; @@ -289,12 +299,11 @@ TEST_F(SvmDeviceAllocationCacheTest, givenAllocationsWithDifferentSizesWhenAlloc ASSERT_TRUE(svmManager->usmDeviceAllocationsCacheEnabled); svmManager->usmDeviceAllocationsCache.maxSize = 1 * MemoryConstants::gigaByte; - constexpr auto allocationSizeBasis = MemoryConstants::pageSize64k; auto testDataset = std::vector( { - {(allocationSizeBasis << 0), nullptr}, - {(allocationSizeBasis << 1), nullptr}, - {(allocationSizeBasis << 2), nullptr}, + {(allocationSizeBasis * 1), nullptr}, + {(allocationSizeBasis * 2), nullptr}, + {(allocationSizeBasis * 3), nullptr}, }); SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::deviceUnifiedMemory, 1, rootDeviceIndices, deviceBitfields); @@ -327,6 +336,46 @@ TEST_F(SvmDeviceAllocationCacheTest, givenAllocationsWithDifferentSizesWhenAlloc EXPECT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), 0u); } +TEST_F(SvmDeviceAllocationCacheTest, givenAllocationsWithDifferentSizesWhenAllocatingAfterFreeThenLimitMemoryWastage) { + std::unique_ptr deviceFactory(new UltDeviceFactory(1, 1)); + RootDeviceIndicesContainer rootDeviceIndices = {mockRootDeviceIndex}; + std::map deviceBitfields{{mockRootDeviceIndex, mockDeviceBitfield}}; + DebugManagerStateRestore restore; + debugManager.flags.ExperimentalEnableDeviceAllocationCache.set(1); + auto device = deviceFactory->rootDevices[0]; + auto svmManager = std::make_unique(device->getMemoryManager(), false); + svmManager->initUsmAllocationsCaches(*device); + ASSERT_TRUE(svmManager->usmDeviceAllocationsCacheEnabled); + svmManager->usmDeviceAllocationsCache.maxSize = 1 * MemoryConstants::gigaByte; + + SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::deviceUnifiedMemory, 1, rootDeviceIndices, deviceBitfields); + unifiedMemoryProperties.device = device; + auto allocation = svmManager->createUnifiedMemoryAllocation(SVMAllocsManager::SvmAllocationCache::minimalSizeToCheckUtilization, unifiedMemoryProperties); + ASSERT_NE(allocation, nullptr); + + svmManager->freeSVMAlloc(allocation); + + ASSERT_EQ(1u, svmManager->usmDeviceAllocationsCache.allocations.size()); + + constexpr auto allowedSizeForReuse = static_cast(SVMAllocsManager::SvmAllocationCache::minimalSizeToCheckUtilization * SVMAllocsManager::SvmAllocationCache::minimalAllocUtilization); + constexpr auto notAllowedSizeDueToMemoryWastage = allowedSizeForReuse - 1u; + + auto notReusedDueToMemoryWastage = svmManager->createUnifiedMemoryAllocation(notAllowedSizeDueToMemoryWastage, unifiedMemoryProperties); + EXPECT_NE(nullptr, notReusedDueToMemoryWastage); + EXPECT_NE(notReusedDueToMemoryWastage, allocation); + EXPECT_EQ(1u, svmManager->usmDeviceAllocationsCache.allocations.size()); + + auto reused = svmManager->createUnifiedMemoryAllocation(allowedSizeForReuse, unifiedMemoryProperties); + EXPECT_NE(nullptr, notReusedDueToMemoryWastage); + EXPECT_EQ(reused, allocation); + EXPECT_EQ(0u, svmManager->usmDeviceAllocationsCache.allocations.size()); + + svmManager->freeSVMAlloc(notReusedDueToMemoryWastage); + svmManager->freeSVMAlloc(reused); + svmManager->trimUSMDeviceAllocCache(); + EXPECT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), 0u); +} + TEST_F(SvmDeviceAllocationCacheTest, givenMultipleAllocationsWhenAllocatingAfterFreeThenReturnAllocationsInCacheStartingFromSmallest) { std::unique_ptr deviceFactory(new UltDeviceFactory(1, 1)); RootDeviceIndicesContainer rootDeviceIndices = {mockRootDeviceIndex}; @@ -339,12 +388,11 @@ TEST_F(SvmDeviceAllocationCacheTest, givenMultipleAllocationsWhenAllocatingAfter ASSERT_TRUE(svmManager->usmDeviceAllocationsCacheEnabled); svmManager->usmDeviceAllocationsCache.maxSize = 1 * MemoryConstants::gigaByte; - size_t allocationSizeBasis = MemoryConstants::pageSize64k; auto testDataset = std::vector( { - {(allocationSizeBasis << 0), nullptr}, - {(allocationSizeBasis << 1), nullptr}, - {(allocationSizeBasis << 2), nullptr}, + {(allocationSizeBasis * 1), nullptr}, + {(allocationSizeBasis * 2), nullptr}, + {(allocationSizeBasis * 3), nullptr}, }); SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::deviceUnifiedMemory, 1, rootDeviceIndices, deviceBitfields); @@ -418,8 +466,7 @@ TEST_F(SvmDeviceAllocationCacheTest, givenAllocationsWithDifferentFlagsWhenAlloc ASSERT_TRUE(svmManager->usmDeviceAllocationsCacheEnabled); svmManager->usmDeviceAllocationsCache.maxSize = 1 * MemoryConstants::gigaByte; - constexpr auto allocationSizeBasis = MemoryConstants::kiloByte; - size_t defaultAllocSize = allocationSizeBasis << 0; + size_t defaultAllocSize = allocationSizeBasis; std::map subDeviceBitfields = {{0u, {01}}, {1u, {10}}}; SvmDeviceAllocationCacheTestDataType defaultAlloc(defaultAllocSize, @@ -616,15 +663,14 @@ TEST_F(SvmHostAllocationCacheTest, givenAllocationCacheEnabledWhenFreeingDeviceA ASSERT_TRUE(svmManager->usmHostAllocationsCacheEnabled); svmManager->usmHostAllocationsCache.maxSize = 1 * MemoryConstants::gigaByte; - constexpr auto allocationSizeBasis = MemoryConstants::pageSize64k; auto testDataset = std::vector( {{1u, nullptr}, - {(allocationSizeBasis << 0) - 1, nullptr}, - {(allocationSizeBasis << 0), nullptr}, - {(allocationSizeBasis << 0) + 1, nullptr}, - {(allocationSizeBasis << 1) - 1, nullptr}, - {(allocationSizeBasis << 1), nullptr}, - {(allocationSizeBasis << 1) + 1, nullptr}}); + {(allocationSizeBasis * 1) - 1, nullptr}, + {(allocationSizeBasis * 1), nullptr}, + {(allocationSizeBasis * 1) + 1, nullptr}, + {(allocationSizeBasis * 2) - 1, nullptr}, + {(allocationSizeBasis * 2), nullptr}, + {(allocationSizeBasis * 2) + 1, nullptr}}); SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::hostUnifiedMemory, 1, rootDeviceIndices, deviceBitfields); for (auto &testData : testDataset) { @@ -748,15 +794,14 @@ TEST_F(SvmHostAllocationCacheTest, givenAllocationsWithDifferentSizesWhenAllocat ASSERT_TRUE(svmManager->usmHostAllocationsCacheEnabled); svmManager->usmHostAllocationsCache.maxSize = 1 * MemoryConstants::gigaByte; - constexpr auto allocationSizeBasis = MemoryConstants::pageSize64k; auto testDataset = std::vector( { - {(allocationSizeBasis << 0), nullptr}, - {(allocationSizeBasis << 0) + 1, nullptr}, - {(allocationSizeBasis << 1), nullptr}, - {(allocationSizeBasis << 1) + 1, nullptr}, - {(allocationSizeBasis << 2), nullptr}, - {(allocationSizeBasis << 2) + 1, nullptr}, + {(allocationSizeBasis * 1), nullptr}, + {(allocationSizeBasis * 1) + 1, nullptr}, + {(allocationSizeBasis * 2), nullptr}, + {(allocationSizeBasis * 2) + 1, nullptr}, + {(allocationSizeBasis * 3), nullptr}, + {(allocationSizeBasis * 3) + 1, nullptr}, }); SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::hostUnifiedMemory, 1, rootDeviceIndices, deviceBitfields); @@ -788,6 +833,45 @@ TEST_F(SvmHostAllocationCacheTest, givenAllocationsWithDifferentSizesWhenAllocat EXPECT_EQ(svmManager->usmHostAllocationsCache.allocations.size(), 0u); } +TEST_F(SvmHostAllocationCacheTest, givenAllocationsWithDifferentSizesWhenAllocatingAfterFreeThenLimitMemoryWastage) { + std::unique_ptr deviceFactory(new UltDeviceFactory(1, 1)); + RootDeviceIndicesContainer rootDeviceIndices = {mockRootDeviceIndex}; + std::map deviceBitfields{{mockRootDeviceIndex, mockDeviceBitfield}}; + DebugManagerStateRestore restore; + debugManager.flags.ExperimentalEnableHostAllocationCache.set(1); + auto device = deviceFactory->rootDevices[0]; + auto svmManager = std::make_unique(device->getMemoryManager(), false); + svmManager->initUsmAllocationsCaches(*device); + ASSERT_TRUE(svmManager->usmHostAllocationsCacheEnabled); + svmManager->usmHostAllocationsCache.maxSize = 1 * MemoryConstants::gigaByte; + + SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::hostUnifiedMemory, 1, rootDeviceIndices, deviceBitfields); + auto allocation = svmManager->createHostUnifiedMemoryAllocation(SVMAllocsManager::SvmAllocationCache::minimalSizeToCheckUtilization, unifiedMemoryProperties); + ASSERT_NE(allocation, nullptr); + + svmManager->freeSVMAlloc(allocation); + + ASSERT_EQ(1u, svmManager->usmHostAllocationsCache.allocations.size()); + + constexpr auto allowedSizeForReuse = static_cast(SVMAllocsManager::SvmAllocationCache::minimalSizeToCheckUtilization * SVMAllocsManager::SvmAllocationCache::minimalAllocUtilization); + constexpr auto notAllowedSizeDueToMemoryWastage = allowedSizeForReuse - 1u; + + auto notReusedDueToMemoryWastage = svmManager->createHostUnifiedMemoryAllocation(notAllowedSizeDueToMemoryWastage, unifiedMemoryProperties); + EXPECT_NE(nullptr, notReusedDueToMemoryWastage); + EXPECT_NE(notReusedDueToMemoryWastage, allocation); + EXPECT_EQ(1u, svmManager->usmHostAllocationsCache.allocations.size()); + + auto reused = svmManager->createHostUnifiedMemoryAllocation(allowedSizeForReuse, unifiedMemoryProperties); + EXPECT_NE(nullptr, notReusedDueToMemoryWastage); + EXPECT_EQ(reused, allocation); + EXPECT_EQ(0u, svmManager->usmHostAllocationsCache.allocations.size()); + + svmManager->freeSVMAlloc(notReusedDueToMemoryWastage); + svmManager->freeSVMAlloc(reused); + svmManager->trimUSMHostAllocCache(); + EXPECT_EQ(svmManager->usmHostAllocationsCache.allocations.size(), 0u); +} + TEST_F(SvmHostAllocationCacheTest, givenMultipleAllocationsWhenAllocatingAfterFreeThenReturnAllocationsInCacheStartingFromSmallest) { std::unique_ptr deviceFactory(new UltDeviceFactory(1, 1)); RootDeviceIndicesContainer rootDeviceIndices = {mockRootDeviceIndex}; @@ -800,12 +884,11 @@ TEST_F(SvmHostAllocationCacheTest, givenMultipleAllocationsWhenAllocatingAfterFr ASSERT_TRUE(svmManager->usmHostAllocationsCacheEnabled); svmManager->usmHostAllocationsCache.maxSize = 1 * MemoryConstants::gigaByte; - size_t allocationSizeBasis = MemoryConstants::pageSize64k; auto testDataset = std::vector( { - {(allocationSizeBasis << 0), nullptr}, - {(allocationSizeBasis << 1), nullptr}, - {(allocationSizeBasis << 2), nullptr}, + {(allocationSizeBasis * 1), nullptr}, + {(allocationSizeBasis * 2), nullptr}, + {(allocationSizeBasis * 3), nullptr}, }); SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::hostUnifiedMemory, 1, rootDeviceIndices, deviceBitfields); @@ -876,8 +959,7 @@ TEST_F(SvmHostAllocationCacheTest, givenAllocationsWithDifferentFlagsWhenAllocat ASSERT_TRUE(svmManager->usmHostAllocationsCacheEnabled); svmManager->usmHostAllocationsCache.maxSize = 1 * MemoryConstants::gigaByte; - constexpr auto allocationSizeBasis = MemoryConstants::kiloByte; - size_t defaultAllocSize = allocationSizeBasis << 0; + size_t defaultAllocSize = allocationSizeBasis; std::map subDeviceBitfields = {{0u, rootDevice->getDeviceBitfield()}}; RootDeviceIndicesContainer rootDeviceIndices; rootDeviceIndices.pushUnique(rootDevice->getRootDeviceIndex());