diff --git a/level_zero/core/source/helpers/api_specific_config_l0.cpp b/level_zero/core/source/helpers/api_specific_config_l0.cpp index 19e67ea85b..5473279411 100644 --- a/level_zero/core/source/helpers/api_specific_config_l0.cpp +++ b/level_zero/core/source/helpers/api_specific_config_l0.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (C) 2020-2024 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -37,6 +37,10 @@ bool ApiSpecificConfig::isDeviceAllocationCacheEnabled() { return false; } +bool ApiSpecificConfig::isHostAllocationCacheEnabled() { + return false; +} + ApiSpecificConfig::ApiType ApiSpecificConfig::getApiType() { return ApiSpecificConfig::L0; } diff --git a/opencl/source/helpers/api_specific_config_ocl.cpp b/opencl/source/helpers/api_specific_config_ocl.cpp index 53e15dac66..7ef7c0a26f 100644 --- a/opencl/source/helpers/api_specific_config_ocl.cpp +++ b/opencl/source/helpers/api_specific_config_ocl.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (C) 2020-2024 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -36,6 +36,10 @@ bool ApiSpecificConfig::isDeviceAllocationCacheEnabled() { return false; } +bool ApiSpecificConfig::isHostAllocationCacheEnabled() { + return false; +} + ApiSpecificConfig::ApiType ApiSpecificConfig::getApiType() { return ApiSpecificConfig::OCL; } diff --git a/shared/source/debug_settings/debug_variables_base.inl b/shared/source/debug_settings/debug_variables_base.inl index 994573b000..52678de0f2 100644 --- a/shared/source/debug_settings/debug_variables_base.inl +++ b/shared/source/debug_settings/debug_variables_base.inl @@ -514,7 +514,8 @@ DECLARE_DEBUG_VARIABLE(int32_t, UsePipeControlAfterPartitionedWalker, -1, "-1: d DECLARE_DEBUG_VARIABLE(int32_t, ExperimentalSetWalkerPartitionCount, 0, "Experimental implementation: Set number of COMPUTE_WALKERs for a given Partition Type, 0 - do not set the feature.") DECLARE_DEBUG_VARIABLE(int32_t, ExperimentalSetWalkerPartitionType, -1, "Experimental implementation: Set COMPUTE_WALKER Partition Type. Valid values for types from 1 to 3") DECLARE_DEBUG_VARIABLE(int32_t, ExperimentalEnableCustomLocalMemoryAlignment, 0, "Align local memory allocations to a given value. Works only with allocations at least as big as the value. 0: no effect, 2097152: 2 megabytes, 1073741824: 1 gigabyte") -DECLARE_DEBUG_VARIABLE(int32_t, ExperimentalEnableDeviceAllocationCache, -1, "Experimentally enable allocation cache.") +DECLARE_DEBUG_VARIABLE(int32_t, ExperimentalEnableDeviceAllocationCache, -1, "Experimentally enable device usm allocation cache.") +DECLARE_DEBUG_VARIABLE(int32_t, ExperimentalEnableHostAllocationCache, -1, "Experimentally enable host usm allocation cache.") DECLARE_DEBUG_VARIABLE(int32_t, ExperimentalH2DCpuCopyThreshold, -1, "Override default threshold (in bytes) for H2D CPU copy.") DECLARE_DEBUG_VARIABLE(int32_t, ExperimentalD2HCpuCopyThreshold, -1, "Override default threshold (in bytes) for D2H CPU copy.") DECLARE_DEBUG_VARIABLE(int32_t, ExperimentalCopyThroughLock, -1, "Experimentally copy memory through locked ptr. -1: default 0: disable 1: enable ") diff --git a/shared/source/helpers/api_specific_config.h b/shared/source/helpers/api_specific_config.h index e6b18605ad..3d4faba7e1 100644 --- a/shared/source/helpers/api_specific_config.h +++ b/shared/source/helpers/api_specific_config.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (C) 2020-2024 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -23,6 +23,7 @@ struct ApiSpecificConfig { static bool getGlobalBindlessHeapConfiguration(); static bool getBindlessMode(const ReleaseHelper *); static bool isDeviceAllocationCacheEnabled(); + static bool isHostAllocationCacheEnabled(); static ApiType getApiType(); static std::string getName(); static uint64_t getReducedMaxAllocSize(uint64_t maxAllocSize); diff --git a/shared/source/memory_manager/unified_memory_manager.cpp b/shared/source/memory_manager/unified_memory_manager.cpp index 5bf8ddafbe..da74023002 100644 --- a/shared/source/memory_manager/unified_memory_manager.cpp +++ b/shared/source/memory_manager/unified_memory_manager.cpp @@ -173,6 +173,14 @@ SVMAllocsManager::SVMAllocsManager(MemoryManager *memoryManager, bool multiOsCon if (this->usmDeviceAllocationsCacheEnabled) { this->initUsmDeviceAllocationsCache(); } + + this->usmHostAllocationsCacheEnabled = NEO::ApiSpecificConfig::isHostAllocationCacheEnabled(); + if (debugManager.flags.ExperimentalEnableHostAllocationCache.get() != -1) { + this->usmHostAllocationsCacheEnabled = !!debugManager.flags.ExperimentalEnableHostAllocationCache.get(); + } + if (this->usmHostAllocationsCacheEnabled) { + this->initUsmHostAllocationsCache(); + } } SVMAllocsManager::~SVMAllocsManager() = default; @@ -221,13 +229,26 @@ void *SVMAllocsManager::createHostUnifiedMemoryAllocation(size_t size, unifiedMemoryProperties.flags.isUSMDeviceAllocation = false; unifiedMemoryProperties.cacheRegion = MemoryPropertiesHelper::getCacheRegion(memoryProperties.allocationFlags); + if (this->usmHostAllocationsCacheEnabled) { + void *allocationFromCache = this->usmHostAllocationsCache.get(size, memoryProperties, this); + if (allocationFromCache) { + return allocationFromCache; + } + } + auto maxRootDeviceIndex = *std::max_element(rootDeviceIndicesVector.begin(), rootDeviceIndicesVector.end(), std::less()); SvmAllocationData allocData(maxRootDeviceIndex); void *externalHostPointer = reinterpret_cast(memoryProperties.allocationFlags.hostptr); void *usmPtr = memoryManager->createMultiGraphicsAllocationInSystemMemoryPool(rootDeviceIndicesVector, unifiedMemoryProperties, allocData.gpuAllocations, externalHostPointer); if (!usmPtr) { - return nullptr; + if (this->usmHostAllocationsCacheEnabled) { + this->trimUSMHostAllocCache(); + usmPtr = memoryManager->createMultiGraphicsAllocationInSystemMemoryPool(rootDeviceIndicesVector, unifiedMemoryProperties, allocData.gpuAllocations, externalHostPointer); + } + if (!usmPtr) { + return nullptr; + } } allocData.cpuAllocation = nullptr; @@ -439,6 +460,11 @@ bool SVMAllocsManager::freeSVMAlloc(void *ptr, bool blocking) { this->usmDeviceAllocationsCache.insert(svmData->size, ptr); return true; } + if (InternalMemoryType::hostUnifiedMemory == svmData->memoryType && + this->usmHostAllocationsCacheEnabled) { + this->usmHostAllocationsCache.insert(svmData->size, ptr); + return true; + } if (blocking) { this->freeSVMAllocImpl(ptr, FreePolicyType::blocking, svmData); } else { @@ -462,6 +488,11 @@ bool SVMAllocsManager::freeSVMAllocDefer(void *ptr) { this->usmDeviceAllocationsCache.insert(svmData->size, ptr); return true; } + if (InternalMemoryType::hostUnifiedMemory == svmData->memoryType && + this->usmHostAllocationsCacheEnabled) { + this->usmHostAllocationsCache.insert(svmData->size, ptr); + return true; + } this->freeSVMAllocImpl(ptr, FreePolicyType::defer, svmData); return true; } @@ -531,6 +562,10 @@ void SVMAllocsManager::trimUSMDeviceAllocCache() { this->usmDeviceAllocationsCache.trim(this); } +void SVMAllocsManager::trimUSMHostAllocCache() { + this->usmHostAllocationsCache.trim(this); +} + void *SVMAllocsManager::createZeroCopySvmAllocation(size_t size, const SvmAllocationProperties &svmProperties, const RootDeviceIndicesContainer &rootDeviceIndices, const std::map &subdeviceBitfields) { @@ -653,6 +688,10 @@ void SVMAllocsManager::initUsmDeviceAllocationsCache() { this->usmDeviceAllocationsCache.allocations.reserve(128u); } +void SVMAllocsManager::initUsmHostAllocationsCache() { + this->usmHostAllocationsCache.allocations.reserve(128u); +} + void SVMAllocsManager::freeSvmAllocationWithDeviceStorage(SvmAllocationData *svmData) { auto graphicsAllocations = svmData->gpuAllocations.getGraphicsAllocations(); GraphicsAllocation *cpuAllocation = svmData->cpuAllocation; diff --git a/shared/source/memory_manager/unified_memory_manager.h b/shared/source/memory_manager/unified_memory_manager.h index a9a493bfe1..44b6bd6e27 100644 --- a/shared/source/memory_manager/unified_memory_manager.h +++ b/shared/source/memory_manager/unified_memory_manager.h @@ -205,6 +205,7 @@ class SVMAllocsManager { MOCKABLE_VIRTUAL void freeSVMAllocImpl(void *ptr, FreePolicyType policy, SvmAllocationData *svmData); bool freeSVMAlloc(void *ptr) { return freeSVMAlloc(ptr, false); } void trimUSMDeviceAllocCache(); + void trimUSMHostAllocCache(); void insertSVMAlloc(const SvmAllocationData &svmData); void removeSVMAlloc(const SvmAllocationData &svmData); size_t getNumAllocs() const { return svmAllocs.getNumAllocs(); } @@ -242,6 +243,7 @@ class SVMAllocsManager { void freeZeroCopySvmAllocation(SvmAllocationData *svmData); void initUsmDeviceAllocationsCache(); + void initUsmHostAllocationsCache(); void freeSVMData(SvmAllocationData *svmData); SortedVectorBasedAllocationTracker svmAllocs; @@ -252,6 +254,8 @@ class SVMAllocsManager { std::mutex mtxForIndirectAccess; bool multiOsContextSupport; SvmAllocationCache usmDeviceAllocationsCache; + SvmAllocationCache usmHostAllocationsCache; bool usmDeviceAllocationsCacheEnabled = false; + bool usmHostAllocationsCacheEnabled = false; }; } // namespace NEO diff --git a/shared/test/common/mocks/mock_svm_manager.h b/shared/test/common/mocks/mock_svm_manager.h index 7b720c51dd..c7177b0ac1 100644 --- a/shared/test/common/mocks/mock_svm_manager.h +++ b/shared/test/common/mocks/mock_svm_manager.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2023 Intel Corporation + * Copyright (C) 2018-2024 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -24,6 +24,8 @@ struct MockSVMAllocsManager : public SVMAllocsManager { using SVMAllocsManager::svmMapOperations; using SVMAllocsManager::usmDeviceAllocationsCache; using SVMAllocsManager::usmDeviceAllocationsCacheEnabled; + using SVMAllocsManager::usmHostAllocationsCache; + using SVMAllocsManager::usmHostAllocationsCacheEnabled; void prefetchMemory(Device &device, CommandStreamReceiver &commandStreamReceiver, SvmAllocationData &svmData) override { SVMAllocsManager::prefetchMemory(device, commandStreamReceiver, svmData); diff --git a/shared/test/common/test_files/igdrcl.config b/shared/test/common/test_files/igdrcl.config index eec88f2441..6731df2b21 100644 --- a/shared/test/common/test_files/igdrcl.config +++ b/shared/test/common/test_files/igdrcl.config @@ -580,4 +580,5 @@ EnableHostUsmAllocationPool = -1 EnableHostAllocationMemPolicy = 0 OverrideHostAllocationMemPolicyMode = -1 SetThreadPriority = -1 +ExperimentalEnableHostAllocationCache = -1 # Please don't edit below this line diff --git a/shared/test/unit_test/api_specific_config_ult.cpp b/shared/test/unit_test/api_specific_config_ult.cpp index fea9d2203d..3c364caf66 100644 --- a/shared/test/unit_test/api_specific_config_ult.cpp +++ b/shared/test/unit_test/api_specific_config_ult.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2023 Intel Corporation + * Copyright (C) 2023-2024 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -40,6 +40,10 @@ bool ApiSpecificConfig::isDeviceAllocationCacheEnabled() { return false; } +bool ApiSpecificConfig::isHostAllocationCacheEnabled() { + return false; +} + ApiSpecificConfig::ApiType ApiSpecificConfig::getApiType() { return apiTypeForUlts; } diff --git a/shared/test/unit_test/memory_manager/unified_memory_manager_cache_tests.cpp b/shared/test/unit_test/memory_manager/unified_memory_manager_cache_tests.cpp index 78b43b6a20..6fe3ffcd0a 100644 --- a/shared/test/unit_test/memory_manager/unified_memory_manager_cache_tests.cpp +++ b/shared/test/unit_test/memory_manager/unified_memory_manager_cache_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2022-2023 Intel Corporation + * Copyright (C) 2022-2024 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -83,7 +83,23 @@ TEST(SortedVectorBasedAllocationTrackerTests, givenSortedVectorBasedAllocationTr EXPECT_EQ(data1->device, addr1); } -TEST(SvmDeviceAllocationCacheTest, givenAllocationCacheDefaultWhenCheckingIfEnabledThenItIsDisabled) { +struct SvmAllocationCacheTestFixture { + SvmAllocationCacheTestFixture() : executionEnvironment(defaultHwInfo.get()) {} + void setUp() { + bool svmSupported = executionEnvironment.rootDeviceEnvironments[0]->getHardwareInfo()->capabilityTable.ftrSvm; + if (!svmSupported) { + GTEST_SKIP(); + } + } + void tearDown() { + } + + MockExecutionEnvironment executionEnvironment; +}; + +using SvmDeviceAllocationCacheTest = Test; + +TEST_F(SvmDeviceAllocationCacheTest, givenAllocationCacheDefaultWhenCheckingIfEnabledThenItIsDisabled) { std::unique_ptr deviceFactory(new UltDeviceFactory(1, 1)); RootDeviceIndicesContainer rootDeviceIndices = {mockRootDeviceIndex}; std::map deviceBitfields{{mockRootDeviceIndex, mockDeviceBitfield}}; @@ -98,7 +114,7 @@ struct SvmDeviceAllocationCacheSimpleTestDataType { void *allocation; }; -TEST(SvmDeviceAllocationCacheTest, givenAllocationCacheEnabledWhenFreeingDeviceAllocationThenItIsPutIntoCache) { +TEST_F(SvmDeviceAllocationCacheTest, givenAllocationCacheEnabledWhenFreeingDeviceAllocationThenItIsPutIntoCache) { std::unique_ptr deviceFactory(new UltDeviceFactory(1, 1)); RootDeviceIndicesContainer rootDeviceIndices = {mockRootDeviceIndex}; std::map deviceBitfields{{mockRootDeviceIndex, mockDeviceBitfield}}; @@ -145,7 +161,7 @@ TEST(SvmDeviceAllocationCacheTest, givenAllocationCacheEnabledWhenFreeingDeviceA EXPECT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), 0u); } -TEST(SvmDeviceAllocationCacheTest, givenAllocationsWithDifferentSizesWhenAllocatingAfterFreeThenReturnCorrectCachedAllocation) { +TEST_F(SvmDeviceAllocationCacheTest, givenAllocationsWithDifferentSizesWhenAllocatingAfterFreeThenReturnCorrectCachedAllocation) { std::unique_ptr deviceFactory(new UltDeviceFactory(1, 1)); RootDeviceIndicesContainer rootDeviceIndices = {mockRootDeviceIndex}; std::map deviceBitfields{{mockRootDeviceIndex, mockDeviceBitfield}}; @@ -196,7 +212,7 @@ TEST(SvmDeviceAllocationCacheTest, givenAllocationsWithDifferentSizesWhenAllocat EXPECT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), 0u); } -TEST(SvmDeviceAllocationCacheTest, givenMultipleAllocationsWhenAllocatingAfterFreeThenReturnAllocationsInCacheStartingFromSmallest) { +TEST_F(SvmDeviceAllocationCacheTest, givenMultipleAllocationsWhenAllocatingAfterFreeThenReturnAllocationsInCacheStartingFromSmallest) { std::unique_ptr deviceFactory(new UltDeviceFactory(1, 1)); RootDeviceIndicesContainer rootDeviceIndices = {mockRootDeviceIndex}; std::map deviceBitfields{{mockRootDeviceIndex, mockDeviceBitfield}}; @@ -273,7 +289,7 @@ struct SvmDeviceAllocationCacheTestDataType { std::string name; }; -TEST(SvmDeviceAllocationCacheTest, givenAllocationsWithDifferentFlagsWhenAllocatingAfterFreeThenReturnCorrectAllocation) { +TEST_F(SvmDeviceAllocationCacheTest, givenAllocationsWithDifferentFlagsWhenAllocatingAfterFreeThenReturnCorrectAllocation) { std::unique_ptr deviceFactory(new UltDeviceFactory(2, 2)); DebugManagerStateRestore restore; debugManager.flags.ExperimentalEnableDeviceAllocationCache.set(1); @@ -343,7 +359,7 @@ TEST(SvmDeviceAllocationCacheTest, givenAllocationsWithDifferentFlagsWhenAllocat } } -TEST(SvmDeviceAllocationCacheTest, givenDeviceOutOfMemoryWhenAllocatingThenCacheIsTrimmedAndAllocationSucceeds) { +TEST_F(SvmDeviceAllocationCacheTest, givenDeviceOutOfMemoryWhenAllocatingThenCacheIsTrimmedAndAllocationSucceeds) { std::unique_ptr deviceFactory(new UltDeviceFactory(1, 1)); RootDeviceIndicesContainer rootDeviceIndices = {mockRootDeviceIndex}; std::map deviceBitfields{{mockRootDeviceIndex, mockDeviceBitfield}}; @@ -380,3 +396,306 @@ TEST(SvmDeviceAllocationCacheTest, givenDeviceOutOfMemoryWhenAllocatingThenCache svmManager->trimUSMDeviceAllocCache(); ASSERT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), 0u); } + +using SvmHostAllocationCacheTest = Test; + +TEST_F(SvmHostAllocationCacheTest, givenAllocationCacheDefaultWhenCheckingIfEnabledThenItIsDisabled) { + std::unique_ptr deviceFactory(new UltDeviceFactory(1, 1)); + RootDeviceIndicesContainer rootDeviceIndices = {mockRootDeviceIndex}; + std::map deviceBitfields{{mockRootDeviceIndex, mockDeviceBitfield}}; + auto device = deviceFactory->rootDevices[0]; + auto svmManager = std::make_unique(device->getMemoryManager(), false); + ASSERT_EQ(debugManager.flags.ExperimentalEnableHostAllocationCache.get(), -1); + EXPECT_FALSE(svmManager->usmHostAllocationsCacheEnabled); + + SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::hostUnifiedMemory, 1, rootDeviceIndices, deviceBitfields); + auto allocation = svmManager->createHostUnifiedMemoryAllocation(1u, unifiedMemoryProperties); + EXPECT_NE(nullptr, allocation); + EXPECT_EQ(0u, svmManager->usmHostAllocationsCache.allocations.size()); + + EXPECT_TRUE(svmManager->freeSVMAlloc(allocation)); + EXPECT_EQ(0u, svmManager->usmHostAllocationsCache.allocations.size()); + + allocation = svmManager->createHostUnifiedMemoryAllocation(1u, unifiedMemoryProperties); + EXPECT_NE(nullptr, allocation); + EXPECT_EQ(0u, svmManager->usmHostAllocationsCache.allocations.size()); + + EXPECT_TRUE(svmManager->freeSVMAllocDefer(allocation)); + EXPECT_EQ(0u, svmManager->usmHostAllocationsCache.allocations.size()); +} + +struct SvmHostAllocationCacheSimpleTestDataType { + size_t allocationSize; + void *allocation; +}; + +TEST_F(SvmHostAllocationCacheTest, givenAllocationCacheEnabledWhenFreeingDeviceAllocationThenItIsPutIntoCache) { + std::unique_ptr deviceFactory(new UltDeviceFactory(1, 1)); + RootDeviceIndicesContainer rootDeviceIndices = {mockRootDeviceIndex}; + std::map deviceBitfields{{mockRootDeviceIndex, mockDeviceBitfield}}; + DebugManagerStateRestore restore; + debugManager.flags.ExperimentalEnableHostAllocationCache.set(1); + auto device = deviceFactory->rootDevices[0]; + auto svmManager = std::make_unique(device->getMemoryManager(), false); + ASSERT_TRUE(svmManager->usmHostAllocationsCacheEnabled); + + constexpr auto allocationSizeBasis = MemoryConstants::pageSize64k; + auto testDataset = std::vector( + {{1u, nullptr}, + {(allocationSizeBasis << 0) - 1, nullptr}, + {(allocationSizeBasis << 0), nullptr}, + {(allocationSizeBasis << 0) + 1, nullptr}, + {(allocationSizeBasis << 1) - 1, nullptr}, + {(allocationSizeBasis << 1), nullptr}, + {(allocationSizeBasis << 1) + 1, nullptr}}); + + SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::hostUnifiedMemory, 1, rootDeviceIndices, deviceBitfields); + for (auto &testData : testDataset) { + testData.allocation = svmManager->createHostUnifiedMemoryAllocation(testData.allocationSize, unifiedMemoryProperties); + ASSERT_NE(testData.allocation, nullptr); + } + size_t expectedCacheSize = 0u; + ASSERT_EQ(svmManager->usmHostAllocationsCache.allocations.size(), expectedCacheSize); + + for (auto const &testData : testDataset) { + svmManager->freeSVMAlloc(testData.allocation); + EXPECT_EQ(svmManager->usmHostAllocationsCache.allocations.size(), ++expectedCacheSize); + bool foundInCache = false; + for (auto i = 0u; i < svmManager->usmHostAllocationsCache.allocations.size(); ++i) { + if (svmManager->usmHostAllocationsCache.allocations[i].allocation == testData.allocation) { + foundInCache = true; + break; + } + } + EXPECT_TRUE(foundInCache); + } + EXPECT_EQ(svmManager->usmHostAllocationsCache.allocations.size(), testDataset.size()); + + svmManager->trimUSMHostAllocCache(); + EXPECT_EQ(svmManager->usmHostAllocationsCache.allocations.size(), 0u); +} + +TEST_F(SvmHostAllocationCacheTest, givenAllocationsWithDifferentSizesWhenAllocatingAfterFreeThenReturnCorrectCachedAllocation) { + std::unique_ptr deviceFactory(new UltDeviceFactory(1, 1)); + RootDeviceIndicesContainer rootDeviceIndices = {mockRootDeviceIndex}; + std::map deviceBitfields{{mockRootDeviceIndex, mockDeviceBitfield}}; + DebugManagerStateRestore restore; + debugManager.flags.ExperimentalEnableHostAllocationCache.set(1); + auto device = deviceFactory->rootDevices[0]; + auto svmManager = std::make_unique(device->getMemoryManager(), false); + ASSERT_TRUE(svmManager->usmHostAllocationsCacheEnabled); + + constexpr auto allocationSizeBasis = MemoryConstants::pageSize64k; + auto testDataset = std::vector( + { + {(allocationSizeBasis << 0), nullptr}, + {(allocationSizeBasis << 0) + 1, nullptr}, + {(allocationSizeBasis << 1), nullptr}, + {(allocationSizeBasis << 1) + 1, nullptr}, + {(allocationSizeBasis << 2), nullptr}, + {(allocationSizeBasis << 2) + 1, nullptr}, + }); + + SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::hostUnifiedMemory, 1, rootDeviceIndices, deviceBitfields); + for (auto &testData : testDataset) { + testData.allocation = svmManager->createHostUnifiedMemoryAllocation(testData.allocationSize, unifiedMemoryProperties); + ASSERT_NE(testData.allocation, nullptr); + } + + size_t expectedCacheSize = 0u; + ASSERT_EQ(svmManager->usmHostAllocationsCache.allocations.size(), expectedCacheSize); + + for (auto const &testData : testDataset) { + svmManager->freeSVMAlloc(testData.allocation); + } + + ASSERT_EQ(svmManager->usmHostAllocationsCache.allocations.size(), testDataset.size()); + + std::vector allocationsToFree; + + for (auto &testData : testDataset) { + auto secondAllocation = svmManager->createHostUnifiedMemoryAllocation(testData.allocationSize, unifiedMemoryProperties); + EXPECT_EQ(svmManager->usmHostAllocationsCache.allocations.size(), testDataset.size() - 1); + EXPECT_EQ(secondAllocation, testData.allocation); + svmManager->freeSVMAlloc(secondAllocation); + EXPECT_EQ(svmManager->usmHostAllocationsCache.allocations.size(), testDataset.size()); + } + + svmManager->trimUSMHostAllocCache(); + EXPECT_EQ(svmManager->usmHostAllocationsCache.allocations.size(), 0u); +} + +TEST_F(SvmHostAllocationCacheTest, givenMultipleAllocationsWhenAllocatingAfterFreeThenReturnAllocationsInCacheStartingFromSmallest) { + std::unique_ptr deviceFactory(new UltDeviceFactory(1, 1)); + RootDeviceIndicesContainer rootDeviceIndices = {mockRootDeviceIndex}; + std::map deviceBitfields{{mockRootDeviceIndex, mockDeviceBitfield}}; + DebugManagerStateRestore restore; + debugManager.flags.ExperimentalEnableHostAllocationCache.set(1); + auto device = deviceFactory->rootDevices[0]; + auto svmManager = std::make_unique(device->getMemoryManager(), false); + ASSERT_TRUE(svmManager->usmHostAllocationsCacheEnabled); + + size_t allocationSizeBasis = MemoryConstants::pageSize64k; + auto testDataset = std::vector( + { + {(allocationSizeBasis << 0), nullptr}, + {(allocationSizeBasis << 1), nullptr}, + {(allocationSizeBasis << 2), nullptr}, + }); + + SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::hostUnifiedMemory, 1, rootDeviceIndices, deviceBitfields); + for (auto &testData : testDataset) { + testData.allocation = svmManager->createHostUnifiedMemoryAllocation(testData.allocationSize, unifiedMemoryProperties); + ASSERT_NE(testData.allocation, nullptr); + } + + ASSERT_EQ(svmManager->usmHostAllocationsCache.allocations.size(), 0u); + + for (auto const &testData : testDataset) { + svmManager->freeSVMAlloc(testData.allocation); + } + + size_t expectedCacheSize = testDataset.size(); + ASSERT_EQ(svmManager->usmHostAllocationsCache.allocations.size(), expectedCacheSize); + + auto allocationLargerThanInCache = svmManager->createHostUnifiedMemoryAllocation(allocationSizeBasis << 3, unifiedMemoryProperties); + EXPECT_EQ(svmManager->usmHostAllocationsCache.allocations.size(), expectedCacheSize); + + auto firstAllocation = svmManager->createHostUnifiedMemoryAllocation(allocationSizeBasis, unifiedMemoryProperties); + EXPECT_EQ(firstAllocation, testDataset[0].allocation); + EXPECT_EQ(svmManager->usmHostAllocationsCache.allocations.size(), --expectedCacheSize); + + auto secondAllocation = svmManager->createHostUnifiedMemoryAllocation(allocationSizeBasis, unifiedMemoryProperties); + EXPECT_EQ(secondAllocation, testDataset[1].allocation); + EXPECT_EQ(svmManager->usmHostAllocationsCache.allocations.size(), --expectedCacheSize); + + auto thirdAllocation = svmManager->createHostUnifiedMemoryAllocation(allocationSizeBasis, unifiedMemoryProperties); + EXPECT_EQ(thirdAllocation, testDataset[2].allocation); + EXPECT_EQ(svmManager->usmHostAllocationsCache.allocations.size(), 0u); + + svmManager->freeSVMAlloc(firstAllocation); + svmManager->freeSVMAlloc(secondAllocation); + svmManager->freeSVMAlloc(thirdAllocation); + svmManager->freeSVMAlloc(allocationLargerThanInCache); + + svmManager->trimUSMHostAllocCache(); + EXPECT_EQ(svmManager->usmHostAllocationsCache.allocations.size(), 0u); +} + +struct SvmHostAllocationCacheTestDataType { + SvmHostAllocationCacheTestDataType(size_t allocationSize, + const RootDeviceIndicesContainer &rootDeviceIndicesArg, + std::map &subdeviceBitFields, + Device *device, + std::string name) : allocationSize(allocationSize), + unifiedMemoryProperties(InternalMemoryType::hostUnifiedMemory, + 1, + rootDeviceIndicesArg, + subdeviceBitFields), + name(name){ + + }; + size_t allocationSize; + void *allocation{nullptr}; + SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties; + std::string name; +}; + +TEST_F(SvmHostAllocationCacheTest, givenAllocationsWithDifferentFlagsWhenAllocatingAfterFreeThenReturnCorrectAllocation) { + std::unique_ptr deviceFactory(new UltDeviceFactory(1, 1)); + DebugManagerStateRestore restore; + debugManager.flags.ExperimentalEnableHostAllocationCache.set(1); + auto rootDevice = deviceFactory->rootDevices[0]; + auto svmManager = std::make_unique(rootDevice->getMemoryManager(), false); + ASSERT_TRUE(svmManager->usmHostAllocationsCacheEnabled); + + constexpr auto allocationSizeBasis = MemoryConstants::kiloByte; + size_t defaultAllocSize = allocationSizeBasis << 0; + std::map subDeviceBitfields = {{0u, rootDevice->getDeviceBitfield()}}; + RootDeviceIndicesContainer rootDeviceIndices; + rootDeviceIndices.pushUnique(rootDevice->getRootDeviceIndex()); + SvmHostAllocationCacheTestDataType + defaultAlloc(defaultAllocSize, + rootDeviceIndices, + subDeviceBitfields, + rootDevice, "defaultAlloc"), + writeOnly(defaultAllocSize, + rootDeviceIndices, + subDeviceBitfields, + rootDevice, "writeOnly"), + readOnly(defaultAllocSize, + rootDeviceIndices, + subDeviceBitfields, + rootDevice, "readOnly"), + allocWriteCombined(defaultAllocSize, + rootDeviceIndices, + subDeviceBitfields, + rootDevice, "allocWriteCombined"); + writeOnly.unifiedMemoryProperties.allocationFlags.flags.writeOnly = true; + readOnly.unifiedMemoryProperties.allocationFlags.flags.readOnly = true; + allocWriteCombined.unifiedMemoryProperties.allocationFlags.allocFlags.allocWriteCombined = true; + + auto testDataset = std::vector({defaultAlloc, writeOnly, readOnly, allocWriteCombined}); + for (auto &allocationDataToVerify : testDataset) { + + for (auto &testData : testDataset) { + testData.allocation = svmManager->createHostUnifiedMemoryAllocation(testData.allocationSize, testData.unifiedMemoryProperties); + } + ASSERT_EQ(svmManager->usmHostAllocationsCache.allocations.size(), 0u); + + for (auto &testData : testDataset) { + svmManager->freeSVMAlloc(testData.allocation); + } + ASSERT_EQ(svmManager->usmHostAllocationsCache.allocations.size(), testDataset.size()); + + auto allocationFromCache = svmManager->createHostUnifiedMemoryAllocation(allocationDataToVerify.allocationSize, allocationDataToVerify.unifiedMemoryProperties); + EXPECT_EQ(allocationFromCache, allocationDataToVerify.allocation); + + auto allocationNotFromCache = svmManager->createHostUnifiedMemoryAllocation(allocationDataToVerify.allocationSize, allocationDataToVerify.unifiedMemoryProperties); + for (auto &cachedAllocation : testDataset) { + EXPECT_NE(allocationNotFromCache, cachedAllocation.allocation); + } + svmManager->freeSVMAlloc(allocationFromCache); + svmManager->freeSVMAlloc(allocationNotFromCache); + + svmManager->trimUSMHostAllocCache(); + ASSERT_EQ(svmManager->usmHostAllocationsCache.allocations.size(), 0u); + } +} + +TEST_F(SvmHostAllocationCacheTest, givenHostOutOfMemoryWhenAllocatingThenCacheIsTrimmedAndAllocationSucceeds) { + std::unique_ptr deviceFactory(new UltDeviceFactory(1, 1)); + RootDeviceIndicesContainer rootDeviceIndices = {mockRootDeviceIndex}; + std::map deviceBitfields{{mockRootDeviceIndex, mockDeviceBitfield}}; + DebugManagerStateRestore restore; + debugManager.flags.ExperimentalEnableHostAllocationCache.set(1); + auto device = deviceFactory->rootDevices[0]; + device->injectMemoryManager(new MockMemoryManagerWithCapacity(*device->getExecutionEnvironment())); + MockMemoryManagerWithCapacity *memoryManager = static_cast(device->getMemoryManager()); + auto svmManager = std::make_unique(memoryManager, false); + ASSERT_TRUE(svmManager->usmHostAllocationsCacheEnabled); + + memoryManager->capacity = MemoryConstants::pageSize64k * 3; + + SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::hostUnifiedMemory, 1, rootDeviceIndices, deviceBitfields); + + auto allocationInCache = svmManager->createHostUnifiedMemoryAllocation(MemoryConstants::pageSize64k, unifiedMemoryProperties); + auto allocationInCache2 = svmManager->createHostUnifiedMemoryAllocation(MemoryConstants::pageSize64k, unifiedMemoryProperties); + auto allocationInCache3 = svmManager->createHostUnifiedMemoryAllocation(MemoryConstants::pageSize64k, unifiedMemoryProperties); + ASSERT_EQ(svmManager->usmHostAllocationsCache.allocations.size(), 0u); + svmManager->freeSVMAlloc(allocationInCache); + svmManager->freeSVMAlloc(allocationInCache2); + svmManager->freeSVMAllocDefer(allocationInCache3); + + ASSERT_EQ(svmManager->usmHostAllocationsCache.allocations.size(), 3u); + ASSERT_NE(svmManager->getSVMAlloc(allocationInCache), nullptr); + ASSERT_NE(svmManager->getSVMAlloc(allocationInCache2), nullptr); + ASSERT_NE(svmManager->getSVMAlloc(allocationInCache3), nullptr); + auto ptr = svmManager->createHostUnifiedMemoryAllocation(MemoryConstants::pageSize64k * 2, unifiedMemoryProperties); + EXPECT_NE(ptr, nullptr); + EXPECT_EQ(svmManager->usmHostAllocationsCache.allocations.size(), 0u); + svmManager->freeSVMAlloc(ptr); + + svmManager->trimUSMHostAllocCache(); + ASSERT_EQ(svmManager->usmHostAllocationsCache.allocations.size(), 0u); +} \ No newline at end of file