diff --git a/level_zero/core/source/driver/driver_handle_imp.cpp b/level_zero/core/source/driver/driver_handle_imp.cpp index 1fa6eb335a..181d8cf661 100644 --- a/level_zero/core/source/driver/driver_handle_imp.cpp +++ b/level_zero/core/source/driver/driver_handle_imp.cpp @@ -250,6 +250,7 @@ ze_result_t DriverHandleImp::initialize(std::vector if (this->svmAllocsManager == nullptr) { return ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY; } + this->svmAllocsManager->initUsmAllocationsCaches(*this->devices[0]->getNEODevice()); this->numDevices = static_cast(this->devices.size()); diff --git a/opencl/source/context/context.cpp b/opencl/source/context/context.cpp index 87e8908b7a..35dc54559e 100644 --- a/opencl/source/context/context.cpp +++ b/opencl/source/context/context.cpp @@ -277,6 +277,7 @@ bool Context::createImpl(const cl_context_properties *properties, if (anySvmSupport) { this->svmAllocsManager = new SVMAllocsManager(this->memoryManager, this->areMultiStorageAllocationsPreferred()); + this->svmAllocsManager->initUsmAllocationsCaches(device->getDevice()); } } diff --git a/shared/source/debug_settings/debug_variables_base.inl b/shared/source/debug_settings/debug_variables_base.inl index ce841208a2..3e7f71d1ff 100644 --- a/shared/source/debug_settings/debug_variables_base.inl +++ b/shared/source/debug_settings/debug_variables_base.inl @@ -517,8 +517,8 @@ DECLARE_DEBUG_VARIABLE(int32_t, UsePipeControlAfterPartitionedWalker, -1, "-1: d DECLARE_DEBUG_VARIABLE(int32_t, ExperimentalSetWalkerPartitionCount, 0, "Experimental implementation: Set number of COMPUTE_WALKERs for a given Partition Type, 0 - do not set the feature.") DECLARE_DEBUG_VARIABLE(int32_t, ExperimentalSetWalkerPartitionType, -1, "Experimental implementation: Set COMPUTE_WALKER Partition Type. Valid values for types from 1 to 3") DECLARE_DEBUG_VARIABLE(int32_t, ExperimentalEnableCustomLocalMemoryAlignment, 0, "Align local memory allocations to a given value. Works only with allocations at least as big as the value. 0: no effect, 2097152: 2 megabytes, 1073741824: 1 gigabyte") -DECLARE_DEBUG_VARIABLE(int32_t, ExperimentalEnableDeviceAllocationCache, -1, "Experimentally enable device usm allocation cache.") -DECLARE_DEBUG_VARIABLE(int32_t, ExperimentalEnableHostAllocationCache, -1, "Experimentally enable host usm allocation cache.") +DECLARE_DEBUG_VARIABLE(int32_t, ExperimentalEnableDeviceAllocationCache, -1, "Experimentally enable device usm allocation cache. Use X% of device memory.") +DECLARE_DEBUG_VARIABLE(int32_t, ExperimentalEnableHostAllocationCache, -1, "Experimentally enable host usm allocation cache. Use X% of shared system memory.") DECLARE_DEBUG_VARIABLE(int32_t, ExperimentalH2DCpuCopyThreshold, -1, "Override default threshold (in bytes) for H2D CPU copy.") DECLARE_DEBUG_VARIABLE(int32_t, ExperimentalD2HCpuCopyThreshold, -1, "Override default threshold (in bytes) for D2H CPU copy.") DECLARE_DEBUG_VARIABLE(int32_t, ExperimentalCopyThroughLock, -1, "Experimentally copy memory through locked ptr. -1: default 0: disable 1: enable ") diff --git a/shared/source/memory_manager/unified_memory_manager.cpp b/shared/source/memory_manager/unified_memory_manager.cpp index da74023002..2dc6b0b559 100644 --- a/shared/source/memory_manager/unified_memory_manager.cpp +++ b/shared/source/memory_manager/unified_memory_manager.cpp @@ -42,9 +42,14 @@ void SVMAllocsManager::MapBasedAllocationTracker::remove(const SvmAllocationData allocations.erase(iter); } -void SVMAllocsManager::SvmAllocationCache::insert(size_t size, void *ptr) { +bool SVMAllocsManager::SvmAllocationCache::insert(size_t size, void *ptr) { std::lock_guard lock(this->mtx); + if (size + this->totalSize > this->maxSize) { + return false; + } allocations.emplace(std::lower_bound(allocations.begin(), allocations.end(), size), size, ptr); + this->totalSize += size; + return true; } void *SVMAllocsManager::SvmAllocationCache::get(size_t size, const UnifiedMemoryProperties &unifiedMemoryProperties, SVMAllocsManager *svmAllocsManager) { @@ -58,6 +63,7 @@ void *SVMAllocsManager::SvmAllocationCache::get(size_t size, const UnifiedMemory if (svmAllocData->device == unifiedMemoryProperties.device && svmAllocData->allocationFlagsProperty.allFlags == unifiedMemoryProperties.allocationFlags.allFlags && svmAllocData->allocationFlagsProperty.allAllocFlags == unifiedMemoryProperties.allocationFlags.allAllocFlags) { + totalSize -= allocationIter->allocationSize; allocations.erase(allocationIter); return allocationPtr; } @@ -73,6 +79,7 @@ void SVMAllocsManager::SvmAllocationCache::trim(SVMAllocsManager *svmAllocsManag svmAllocsManager->freeSVMAllocImpl(cachedAllocationInfo.allocation, FreePolicyType::none, svmData); } this->allocations.clear(); + this->totalSize = 0u; } SvmAllocationData *SVMAllocsManager::MapBasedAllocationTracker::get(const void *ptr) { @@ -166,21 +173,6 @@ void SVMAllocsManager::makeInternalAllocationsResident(CommandStreamReceiver &co SVMAllocsManager::SVMAllocsManager(MemoryManager *memoryManager, bool multiOsContextSupport) : memoryManager(memoryManager), multiOsContextSupport(multiOsContextSupport) { - this->usmDeviceAllocationsCacheEnabled = NEO::ApiSpecificConfig::isDeviceAllocationCacheEnabled(); - if (debugManager.flags.ExperimentalEnableDeviceAllocationCache.get() != -1) { - this->usmDeviceAllocationsCacheEnabled = !!debugManager.flags.ExperimentalEnableDeviceAllocationCache.get(); - } - if (this->usmDeviceAllocationsCacheEnabled) { - this->initUsmDeviceAllocationsCache(); - } - - this->usmHostAllocationsCacheEnabled = NEO::ApiSpecificConfig::isHostAllocationCacheEnabled(); - if (debugManager.flags.ExperimentalEnableHostAllocationCache.get() != -1) { - this->usmHostAllocationsCacheEnabled = !!debugManager.flags.ExperimentalEnableHostAllocationCache.get(); - } - if (this->usmHostAllocationsCacheEnabled) { - this->initUsmHostAllocationsCache(); - } } SVMAllocsManager::~SVMAllocsManager() = default; @@ -457,13 +449,15 @@ bool SVMAllocsManager::freeSVMAlloc(void *ptr, bool blocking) { if (svmData) { if (InternalMemoryType::deviceUnifiedMemory == svmData->memoryType && this->usmDeviceAllocationsCacheEnabled) { - this->usmDeviceAllocationsCache.insert(svmData->size, ptr); - return true; + if (this->usmDeviceAllocationsCache.insert(svmData->size, ptr)) { + return true; + } } if (InternalMemoryType::hostUnifiedMemory == svmData->memoryType && this->usmHostAllocationsCacheEnabled) { - this->usmHostAllocationsCache.insert(svmData->size, ptr); - return true; + if (this->usmHostAllocationsCache.insert(svmData->size, ptr)) { + return true; + } } if (blocking) { this->freeSVMAllocImpl(ptr, FreePolicyType::blocking, svmData); @@ -485,13 +479,15 @@ bool SVMAllocsManager::freeSVMAllocDefer(void *ptr) { if (svmData) { if (InternalMemoryType::deviceUnifiedMemory == svmData->memoryType && this->usmDeviceAllocationsCacheEnabled) { - this->usmDeviceAllocationsCache.insert(svmData->size, ptr); - return true; + if (this->usmDeviceAllocationsCache.insert(svmData->size, ptr)) { + return true; + } } if (InternalMemoryType::hostUnifiedMemory == svmData->memoryType && this->usmHostAllocationsCacheEnabled) { - this->usmHostAllocationsCache.insert(svmData->size, ptr); - return true; + if (this->usmHostAllocationsCache.insert(svmData->size, ptr)) { + return true; + } } this->freeSVMAllocImpl(ptr, FreePolicyType::defer, svmData); return true; @@ -684,12 +680,42 @@ void SVMAllocsManager::freeZeroCopySvmAllocation(SvmAllocationData *svmData) { } } -void SVMAllocsManager::initUsmDeviceAllocationsCache() { +void SVMAllocsManager::initUsmDeviceAllocationsCache(Device &device) { this->usmDeviceAllocationsCache.allocations.reserve(128u); + const auto totalDeviceMemory = device.getGlobalMemorySize(static_cast(device.getDeviceBitfield().to_ulong())); + auto fractionOfTotalMemoryForRecycling = 0.02; + if (debugManager.flags.ExperimentalEnableDeviceAllocationCache.get() != -1) { + fractionOfTotalMemoryForRecycling = 0.01 * std::min(100, debugManager.flags.ExperimentalEnableDeviceAllocationCache.get()); + } + this->usmDeviceAllocationsCache.maxSize = static_cast(fractionOfTotalMemoryForRecycling * totalDeviceMemory); } void SVMAllocsManager::initUsmHostAllocationsCache() { this->usmHostAllocationsCache.allocations.reserve(128u); + const auto totalSystemMemory = this->memoryManager->getSystemSharedMemory(0u); + auto fractionOfTotalMemoryForRecycling = 0.02; + if (debugManager.flags.ExperimentalEnableHostAllocationCache.get() != -1) { + fractionOfTotalMemoryForRecycling = 0.01 * std::min(100, debugManager.flags.ExperimentalEnableHostAllocationCache.get()); + } + this->usmHostAllocationsCache.maxSize = static_cast(fractionOfTotalMemoryForRecycling * totalSystemMemory); +} + +void SVMAllocsManager::initUsmAllocationsCaches(Device &device) { + this->usmDeviceAllocationsCacheEnabled = NEO::ApiSpecificConfig::isDeviceAllocationCacheEnabled(); + if (debugManager.flags.ExperimentalEnableDeviceAllocationCache.get() != -1) { + this->usmDeviceAllocationsCacheEnabled = !!debugManager.flags.ExperimentalEnableDeviceAllocationCache.get(); + } + if (this->usmDeviceAllocationsCacheEnabled) { + this->initUsmDeviceAllocationsCache(device); + } + + this->usmHostAllocationsCacheEnabled = NEO::ApiSpecificConfig::isHostAllocationCacheEnabled(); + if (debugManager.flags.ExperimentalEnableHostAllocationCache.get() != -1) { + this->usmHostAllocationsCacheEnabled = !!debugManager.flags.ExperimentalEnableHostAllocationCache.get(); + } + if (this->usmHostAllocationsCacheEnabled) { + this->initUsmHostAllocationsCache(); + } } void SVMAllocsManager::freeSvmAllocationWithDeviceStorage(SvmAllocationData *svmData) { diff --git a/shared/source/memory_manager/unified_memory_manager.h b/shared/source/memory_manager/unified_memory_manager.h index 44b6bd6e27..1d50f6f4e1 100644 --- a/shared/source/memory_manager/unified_memory_manager.h +++ b/shared/source/memory_manager/unified_memory_manager.h @@ -153,11 +153,13 @@ class SVMAllocsManager { }; struct SvmAllocationCache { - void insert(size_t size, void *); + bool insert(size_t size, void *); void *get(size_t size, const UnifiedMemoryProperties &unifiedMemoryProperties, SVMAllocsManager *svmAllocsManager); void trim(SVMAllocsManager *svmAllocsManager); std::vector allocations; std::mutex mtx; + size_t maxSize = 0; + size_t totalSize = 0; }; enum class FreePolicyType : uint32_t { @@ -234,6 +236,8 @@ class SVMAllocsManager { using NonGpuDomainAllocsContainer = std::vector; NonGpuDomainAllocsContainer nonGpuDomainAllocs; + void initUsmAllocationsCaches(Device &device); + protected: void *createZeroCopySvmAllocation(size_t size, const SvmAllocationProperties &svmProperties, const RootDeviceIndicesContainer &rootDeviceIndices, @@ -242,7 +246,7 @@ class SVMAllocsManager { void freeZeroCopySvmAllocation(SvmAllocationData *svmData); - void initUsmDeviceAllocationsCache(); + void initUsmDeviceAllocationsCache(Device &device); void initUsmHostAllocationsCache(); void freeSVMData(SvmAllocationData *svmData); diff --git a/shared/test/unit_test/memory_manager/unified_memory_manager_cache_tests.cpp b/shared/test/unit_test/memory_manager/unified_memory_manager_cache_tests.cpp index 6fe3ffcd0a..0ea6ab927a 100644 --- a/shared/test/unit_test/memory_manager/unified_memory_manager_cache_tests.cpp +++ b/shared/test/unit_test/memory_manager/unified_memory_manager_cache_tests.cpp @@ -101,12 +101,12 @@ using SvmDeviceAllocationCacheTest = Test; TEST_F(SvmDeviceAllocationCacheTest, givenAllocationCacheDefaultWhenCheckingIfEnabledThenItIsDisabled) { std::unique_ptr deviceFactory(new UltDeviceFactory(1, 1)); - RootDeviceIndicesContainer rootDeviceIndices = {mockRootDeviceIndex}; - std::map deviceBitfields{{mockRootDeviceIndex, mockDeviceBitfield}}; auto device = deviceFactory->rootDevices[0]; auto svmManager = std::make_unique(device->getMemoryManager(), false); ASSERT_EQ(debugManager.flags.ExperimentalEnableDeviceAllocationCache.get(), -1); EXPECT_FALSE(svmManager->usmDeviceAllocationsCacheEnabled); + svmManager->initUsmAllocationsCaches(*device); + EXPECT_FALSE(svmManager->usmDeviceAllocationsCacheEnabled); } struct SvmDeviceAllocationCacheSimpleTestDataType { @@ -122,7 +122,9 @@ TEST_F(SvmDeviceAllocationCacheTest, givenAllocationCacheEnabledWhenFreeingDevic debugManager.flags.ExperimentalEnableDeviceAllocationCache.set(1); auto device = deviceFactory->rootDevices[0]; auto svmManager = std::make_unique(device->getMemoryManager(), false); + svmManager->initUsmAllocationsCaches(*device); ASSERT_TRUE(svmManager->usmDeviceAllocationsCacheEnabled); + svmManager->usmDeviceAllocationsCache.maxSize = 1 * MemoryConstants::gigaByte; constexpr auto allocationSizeBasis = MemoryConstants::pageSize64k; auto testDataset = std::vector( @@ -161,6 +163,91 @@ TEST_F(SvmDeviceAllocationCacheTest, givenAllocationCacheEnabledWhenFreeingDevic EXPECT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), 0u); } +TEST_F(SvmDeviceAllocationCacheTest, givenAllocationCacheEnabledWhenInitializedThenMaxSizeIsSetCorrectly) { + std::unique_ptr deviceFactory(new UltDeviceFactory(1, 1)); + auto device = deviceFactory->rootDevices[0]; + DebugManagerStateRestore restore; + debugManager.flags.ExperimentalEnableDeviceAllocationCache.set(2); + auto svmManager = std::make_unique(device->getMemoryManager(), false); + svmManager->initUsmAllocationsCaches(*device); + ASSERT_TRUE(svmManager->usmDeviceAllocationsCacheEnabled); + + auto expectedMaxSize = static_cast(device->getGlobalMemorySize(static_cast(mockDeviceBitfield.to_ulong())) * 0.02); + EXPECT_EQ(expectedMaxSize, svmManager->usmDeviceAllocationsCache.maxSize); +} + +TEST_F(SvmDeviceAllocationCacheTest, givenAllocationCacheEnabledWhenFreeingDeviceAllocationThenItIsPutIntoCacheOnlyIfMaxSizeWillNotBeExceeded) { + std::unique_ptr deviceFactory(new UltDeviceFactory(1, 1)); + RootDeviceIndicesContainer rootDeviceIndices = {mockRootDeviceIndex}; + std::map deviceBitfields{{mockRootDeviceIndex, mockDeviceBitfield}}; + DebugManagerStateRestore restore; + debugManager.flags.ExperimentalEnableDeviceAllocationCache.set(1); + auto device = deviceFactory->rootDevices[0]; + auto svmManager = std::make_unique(device->getMemoryManager(), false); + svmManager->initUsmAllocationsCaches(*device); + ASSERT_TRUE(svmManager->usmDeviceAllocationsCacheEnabled); + + constexpr auto allocationSize = MemoryConstants::pageSize64k; + svmManager->usmDeviceAllocationsCache.maxSize = allocationSize; + + SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::deviceUnifiedMemory, 1, rootDeviceIndices, deviceBitfields); + unifiedMemoryProperties.device = device; + { + auto allocation = svmManager->createUnifiedMemoryAllocation(allocationSize, unifiedMemoryProperties); + ASSERT_NE(allocation, nullptr); + auto allocation2 = svmManager->createUnifiedMemoryAllocation(1u, unifiedMemoryProperties); + ASSERT_NE(allocation2, nullptr); + EXPECT_EQ(0u, svmManager->usmDeviceAllocationsCache.allocations.size()); + EXPECT_EQ(0u, svmManager->usmDeviceAllocationsCache.totalSize); + + svmManager->freeSVMAlloc(allocation); + EXPECT_EQ(1u, svmManager->usmDeviceAllocationsCache.allocations.size()); + EXPECT_EQ(allocationSize, svmManager->usmDeviceAllocationsCache.totalSize); + + svmManager->freeSVMAlloc(allocation2); + EXPECT_EQ(1u, svmManager->usmDeviceAllocationsCache.allocations.size()); + EXPECT_EQ(allocationSize, svmManager->usmDeviceAllocationsCache.totalSize); + + auto recycledAllocation = svmManager->createUnifiedMemoryAllocation(allocationSize, unifiedMemoryProperties); + EXPECT_EQ(recycledAllocation, allocation); + EXPECT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), 0u); + EXPECT_EQ(0u, svmManager->usmDeviceAllocationsCache.totalSize); + + svmManager->freeSVMAlloc(recycledAllocation); + + svmManager->trimUSMDeviceAllocCache(); + EXPECT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), 0u); + EXPECT_EQ(0u, svmManager->usmDeviceAllocationsCache.totalSize); + } + { + auto allocation = svmManager->createUnifiedMemoryAllocation(allocationSize, unifiedMemoryProperties); + ASSERT_NE(allocation, nullptr); + auto allocation2 = svmManager->createUnifiedMemoryAllocation(1u, unifiedMemoryProperties); + ASSERT_NE(allocation2, nullptr); + EXPECT_EQ(0u, svmManager->usmDeviceAllocationsCache.allocations.size()); + EXPECT_EQ(0u, svmManager->usmDeviceAllocationsCache.totalSize); + + svmManager->freeSVMAllocDefer(allocation); + EXPECT_EQ(1u, svmManager->usmDeviceAllocationsCache.allocations.size()); + EXPECT_EQ(allocationSize, svmManager->usmDeviceAllocationsCache.totalSize); + + svmManager->freeSVMAllocDefer(allocation2); + EXPECT_EQ(1u, svmManager->usmDeviceAllocationsCache.allocations.size()); + EXPECT_EQ(allocationSize, svmManager->usmDeviceAllocationsCache.totalSize); + + auto recycledAllocation = svmManager->createUnifiedMemoryAllocation(allocationSize, unifiedMemoryProperties); + EXPECT_EQ(recycledAllocation, allocation); + EXPECT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), 0u); + EXPECT_EQ(0u, svmManager->usmDeviceAllocationsCache.totalSize); + + svmManager->freeSVMAllocDefer(recycledAllocation); + + svmManager->trimUSMDeviceAllocCache(); + EXPECT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), 0u); + EXPECT_EQ(0u, svmManager->usmDeviceAllocationsCache.totalSize); + } +} + TEST_F(SvmDeviceAllocationCacheTest, givenAllocationsWithDifferentSizesWhenAllocatingAfterFreeThenReturnCorrectCachedAllocation) { std::unique_ptr deviceFactory(new UltDeviceFactory(1, 1)); RootDeviceIndicesContainer rootDeviceIndices = {mockRootDeviceIndex}; @@ -169,7 +256,9 @@ TEST_F(SvmDeviceAllocationCacheTest, givenAllocationsWithDifferentSizesWhenAlloc debugManager.flags.ExperimentalEnableDeviceAllocationCache.set(1); auto device = deviceFactory->rootDevices[0]; auto svmManager = std::make_unique(device->getMemoryManager(), false); + svmManager->initUsmAllocationsCaches(*device); ASSERT_TRUE(svmManager->usmDeviceAllocationsCacheEnabled); + svmManager->usmDeviceAllocationsCache.maxSize = 1 * MemoryConstants::gigaByte; constexpr auto allocationSizeBasis = MemoryConstants::pageSize64k; auto testDataset = std::vector( @@ -220,7 +309,9 @@ TEST_F(SvmDeviceAllocationCacheTest, givenMultipleAllocationsWhenAllocatingAfter debugManager.flags.ExperimentalEnableDeviceAllocationCache.set(1); auto device = deviceFactory->rootDevices[0]; auto svmManager = std::make_unique(device->getMemoryManager(), false); + svmManager->initUsmAllocationsCaches(*device); ASSERT_TRUE(svmManager->usmDeviceAllocationsCacheEnabled); + svmManager->usmDeviceAllocationsCache.maxSize = 1 * MemoryConstants::gigaByte; size_t allocationSizeBasis = MemoryConstants::pageSize64k; auto testDataset = std::vector( @@ -297,7 +388,9 @@ TEST_F(SvmDeviceAllocationCacheTest, givenAllocationsWithDifferentFlagsWhenAlloc auto secondRootDevice = deviceFactory->rootDevices[1]; auto subDevice1 = deviceFactory->subDevices[0]; auto svmManager = std::make_unique(rootDevice->getMemoryManager(), false); + svmManager->initUsmAllocationsCaches(*rootDevice); ASSERT_TRUE(svmManager->usmDeviceAllocationsCacheEnabled); + svmManager->usmDeviceAllocationsCache.maxSize = 1 * MemoryConstants::gigaByte; constexpr auto allocationSizeBasis = MemoryConstants::kiloByte; size_t defaultAllocSize = allocationSizeBasis << 0; @@ -369,7 +462,9 @@ TEST_F(SvmDeviceAllocationCacheTest, givenDeviceOutOfMemoryWhenAllocatingThenCac device->injectMemoryManager(new MockMemoryManagerWithCapacity(*device->getExecutionEnvironment())); MockMemoryManagerWithCapacity *memoryManager = static_cast(device->getMemoryManager()); auto svmManager = std::make_unique(memoryManager, false); + svmManager->initUsmAllocationsCaches(*device); ASSERT_TRUE(svmManager->usmDeviceAllocationsCacheEnabled); + svmManager->usmDeviceAllocationsCache.maxSize = 1 * MemoryConstants::gigaByte; memoryManager->capacity = MemoryConstants::pageSize64k * 3; @@ -407,6 +502,8 @@ TEST_F(SvmHostAllocationCacheTest, givenAllocationCacheDefaultWhenCheckingIfEnab auto svmManager = std::make_unique(device->getMemoryManager(), false); ASSERT_EQ(debugManager.flags.ExperimentalEnableHostAllocationCache.get(), -1); EXPECT_FALSE(svmManager->usmHostAllocationsCacheEnabled); + svmManager->initUsmAllocationsCaches(*device); + EXPECT_FALSE(svmManager->usmHostAllocationsCacheEnabled); SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::hostUnifiedMemory, 1, rootDeviceIndices, deviceBitfields); auto allocation = svmManager->createHostUnifiedMemoryAllocation(1u, unifiedMemoryProperties); @@ -437,7 +534,9 @@ TEST_F(SvmHostAllocationCacheTest, givenAllocationCacheEnabledWhenFreeingDeviceA debugManager.flags.ExperimentalEnableHostAllocationCache.set(1); auto device = deviceFactory->rootDevices[0]; auto svmManager = std::make_unique(device->getMemoryManager(), false); + svmManager->initUsmAllocationsCaches(*device); ASSERT_TRUE(svmManager->usmHostAllocationsCacheEnabled); + svmManager->usmHostAllocationsCache.maxSize = 1 * MemoryConstants::gigaByte; constexpr auto allocationSizeBasis = MemoryConstants::pageSize64k; auto testDataset = std::vector( @@ -475,6 +574,90 @@ TEST_F(SvmHostAllocationCacheTest, givenAllocationCacheEnabledWhenFreeingDeviceA EXPECT_EQ(svmManager->usmHostAllocationsCache.allocations.size(), 0u); } +TEST_F(SvmHostAllocationCacheTest, givenAllocationCacheEnabledWhenInitializedThenMaxSizeIsSetCorrectly) { + std::unique_ptr deviceFactory(new UltDeviceFactory(1, 1)); + DebugManagerStateRestore restore; + debugManager.flags.ExperimentalEnableHostAllocationCache.set(2); + auto device = deviceFactory->rootDevices[0]; + auto svmManager = std::make_unique(device->getMemoryManager(), false); + svmManager->initUsmAllocationsCaches(*device); + ASSERT_TRUE(svmManager->usmHostAllocationsCacheEnabled); + + auto expectedMaxSize = static_cast(svmManager->memoryManager->getSystemSharedMemory(mockRootDeviceIndex) * 0.02); + EXPECT_EQ(expectedMaxSize, svmManager->usmHostAllocationsCache.maxSize); +} + +TEST_F(SvmHostAllocationCacheTest, givenAllocationCacheEnabledWhenFreeingDeviceAllocationThenItIsPutIntoCacheOnlyIfMaxSizeWillNotBeExceeded) { + std::unique_ptr deviceFactory(new UltDeviceFactory(1, 1)); + RootDeviceIndicesContainer rootDeviceIndices = {mockRootDeviceIndex}; + std::map deviceBitfields{{mockRootDeviceIndex, mockDeviceBitfield}}; + DebugManagerStateRestore restore; + debugManager.flags.ExperimentalEnableHostAllocationCache.set(1); + auto device = deviceFactory->rootDevices[0]; + auto svmManager = std::make_unique(device->getMemoryManager(), false); + svmManager->initUsmAllocationsCaches(*device); + ASSERT_TRUE(svmManager->usmHostAllocationsCacheEnabled); + + constexpr auto allocationSize = MemoryConstants::pageSize64k; + svmManager->usmHostAllocationsCache.maxSize = allocationSize; + + SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::hostUnifiedMemory, 1, rootDeviceIndices, deviceBitfields); + { + auto allocation = svmManager->createHostUnifiedMemoryAllocation(allocationSize, unifiedMemoryProperties); + ASSERT_NE(allocation, nullptr); + auto allocation2 = svmManager->createHostUnifiedMemoryAllocation(1u, unifiedMemoryProperties); + ASSERT_NE(allocation2, nullptr); + EXPECT_EQ(0u, svmManager->usmHostAllocationsCache.allocations.size()); + EXPECT_EQ(0u, svmManager->usmHostAllocationsCache.totalSize); + + svmManager->freeSVMAlloc(allocation); + EXPECT_EQ(1u, svmManager->usmHostAllocationsCache.allocations.size()); + EXPECT_EQ(allocationSize, svmManager->usmHostAllocationsCache.totalSize); + + svmManager->freeSVMAlloc(allocation2); + EXPECT_EQ(1u, svmManager->usmHostAllocationsCache.allocations.size()); + EXPECT_EQ(allocationSize, svmManager->usmHostAllocationsCache.totalSize); + + auto recycledAllocation = svmManager->createHostUnifiedMemoryAllocation(allocationSize, unifiedMemoryProperties); + EXPECT_EQ(recycledAllocation, allocation); + EXPECT_EQ(svmManager->usmHostAllocationsCache.allocations.size(), 0u); + EXPECT_EQ(0u, svmManager->usmHostAllocationsCache.totalSize); + + svmManager->freeSVMAlloc(recycledAllocation); + + svmManager->trimUSMHostAllocCache(); + EXPECT_EQ(svmManager->usmHostAllocationsCache.allocations.size(), 0u); + EXPECT_EQ(0u, svmManager->usmHostAllocationsCache.totalSize); + } + { + auto allocation = svmManager->createHostUnifiedMemoryAllocation(allocationSize, unifiedMemoryProperties); + ASSERT_NE(allocation, nullptr); + auto allocation2 = svmManager->createHostUnifiedMemoryAllocation(1u, unifiedMemoryProperties); + ASSERT_NE(allocation2, nullptr); + EXPECT_EQ(0u, svmManager->usmHostAllocationsCache.allocations.size()); + EXPECT_EQ(0u, svmManager->usmHostAllocationsCache.totalSize); + + svmManager->freeSVMAllocDefer(allocation); + EXPECT_EQ(1u, svmManager->usmHostAllocationsCache.allocations.size()); + EXPECT_EQ(allocationSize, svmManager->usmHostAllocationsCache.totalSize); + + svmManager->freeSVMAllocDefer(allocation2); + EXPECT_EQ(1u, svmManager->usmHostAllocationsCache.allocations.size()); + EXPECT_EQ(allocationSize, svmManager->usmHostAllocationsCache.totalSize); + + auto recycledAllocation = svmManager->createHostUnifiedMemoryAllocation(allocationSize, unifiedMemoryProperties); + EXPECT_EQ(recycledAllocation, allocation); + EXPECT_EQ(svmManager->usmHostAllocationsCache.allocations.size(), 0u); + EXPECT_EQ(0u, svmManager->usmHostAllocationsCache.totalSize); + + svmManager->freeSVMAllocDefer(recycledAllocation); + + svmManager->trimUSMHostAllocCache(); + EXPECT_EQ(svmManager->usmHostAllocationsCache.allocations.size(), 0u); + EXPECT_EQ(0u, svmManager->usmHostAllocationsCache.totalSize); + } +} + TEST_F(SvmHostAllocationCacheTest, givenAllocationsWithDifferentSizesWhenAllocatingAfterFreeThenReturnCorrectCachedAllocation) { std::unique_ptr deviceFactory(new UltDeviceFactory(1, 1)); RootDeviceIndicesContainer rootDeviceIndices = {mockRootDeviceIndex}; @@ -483,7 +666,9 @@ TEST_F(SvmHostAllocationCacheTest, givenAllocationsWithDifferentSizesWhenAllocat debugManager.flags.ExperimentalEnableHostAllocationCache.set(1); auto device = deviceFactory->rootDevices[0]; auto svmManager = std::make_unique(device->getMemoryManager(), false); + svmManager->initUsmAllocationsCaches(*device); ASSERT_TRUE(svmManager->usmHostAllocationsCacheEnabled); + svmManager->usmHostAllocationsCache.maxSize = 1 * MemoryConstants::gigaByte; constexpr auto allocationSizeBasis = MemoryConstants::pageSize64k; auto testDataset = std::vector( @@ -533,7 +718,9 @@ TEST_F(SvmHostAllocationCacheTest, givenMultipleAllocationsWhenAllocatingAfterFr debugManager.flags.ExperimentalEnableHostAllocationCache.set(1); auto device = deviceFactory->rootDevices[0]; auto svmManager = std::make_unique(device->getMemoryManager(), false); + svmManager->initUsmAllocationsCaches(*device); ASSERT_TRUE(svmManager->usmHostAllocationsCacheEnabled); + svmManager->usmHostAllocationsCache.maxSize = 1 * MemoryConstants::gigaByte; size_t allocationSizeBasis = MemoryConstants::pageSize64k; auto testDataset = std::vector( @@ -607,7 +794,9 @@ TEST_F(SvmHostAllocationCacheTest, givenAllocationsWithDifferentFlagsWhenAllocat debugManager.flags.ExperimentalEnableHostAllocationCache.set(1); auto rootDevice = deviceFactory->rootDevices[0]; auto svmManager = std::make_unique(rootDevice->getMemoryManager(), false); + svmManager->initUsmAllocationsCaches(*rootDevice); ASSERT_TRUE(svmManager->usmHostAllocationsCacheEnabled); + svmManager->usmHostAllocationsCache.maxSize = 1 * MemoryConstants::gigaByte; constexpr auto allocationSizeBasis = MemoryConstants::kiloByte; size_t defaultAllocSize = allocationSizeBasis << 0; @@ -673,7 +862,9 @@ TEST_F(SvmHostAllocationCacheTest, givenHostOutOfMemoryWhenAllocatingThenCacheIs device->injectMemoryManager(new MockMemoryManagerWithCapacity(*device->getExecutionEnvironment())); MockMemoryManagerWithCapacity *memoryManager = static_cast(device->getMemoryManager()); auto svmManager = std::make_unique(memoryManager, false); + svmManager->initUsmAllocationsCaches(*device); ASSERT_TRUE(svmManager->usmHostAllocationsCacheEnabled); + svmManager->usmHostAllocationsCache.maxSize = 1 * MemoryConstants::gigaByte; memoryManager->capacity = MemoryConstants::pageSize64k * 3;