From cdfe2ce8ad0c8c194a88f98d773864730bfec162 Mon Sep 17 00:00:00 2001 From: Dominik Dabek Date: Thu, 14 Apr 2022 15:23:52 +0000 Subject: [PATCH] Feature: Flag for device usm allocation reusing With flag enabled, when app calls freeSVMAlloc on device usm allocation, don't free it immediately but save it, and try to use it on subsequent allocations. This allocation cache will be trimmed if an allocation fails. Related-To: NEO-6893 Signed-off-by: Dominik Dabek --- .../unified_memory_manager_tests.cpp | 10 +- .../debug_settings/debug_variables_base.inl | 1 + .../memory_manager/unified_memory_manager.cpp | 119 +++++-- .../memory_manager/unified_memory_manager.h | 28 +- .../test/common/mocks/mock_memory_manager.h | 19 + shared/test/common/mocks/mock_svm_manager.h | 7 +- shared/test/common/test_files/igdrcl.config | 1 + .../unit_test/memory_manager/CMakeLists.txt | 1 + .../unified_memory_manager_cache_tests.cpp | 337 ++++++++++++++++++ 9 files changed, 491 insertions(+), 32 deletions(-) create mode 100644 shared/test/unit_test/memory_manager/unified_memory_manager_cache_tests.cpp diff --git a/opencl/test/unit_test/memory_manager/unified_memory_manager_tests.cpp b/opencl/test/unit_test/memory_manager/unified_memory_manager_tests.cpp index 06f3adfd88..69a94371e5 100644 --- a/opencl/test/unit_test/memory_manager/unified_memory_manager_tests.cpp +++ b/opencl/test/unit_test/memory_manager/unified_memory_manager_tests.cpp @@ -778,18 +778,18 @@ TEST(UnifiedMemoryTest, givenInternalAllocationWhenItIsMadeResidentThenSubsequen EXPECT_TRUE(graphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation()->isResident(commandStreamReceiver.getOsContext().getContextId())); - //now call with task count 2 , allocations shouldn't change + // now call with task count 2 , allocations shouldn't change unifiedMemoryManager->makeIndirectAllocationsResident(commandStreamReceiver, 2u); auto internalEntry = unifiedMemoryManager->indirectAllocationsResidency.find(&commandStreamReceiver)->second; EXPECT_EQ(2u, internalEntry.latestSentTaskCount); EXPECT_TRUE(graphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation()->isResident(commandStreamReceiver.getOsContext().getContextId())); - //force Graphics Allocation to be non resident + // force Graphics Allocation to be non resident graphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation()->updateResidencyTaskCount(GraphicsAllocation::objectNotResident, commandStreamReceiver.getOsContext().getContextId()); EXPECT_FALSE(graphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation()->isResident(commandStreamReceiver.getOsContext().getContextId())); - //now call with task count 3 , allocations shouldn't change + // now call with task count 3 , allocations shouldn't change unifiedMemoryManager->makeIndirectAllocationsResident(commandStreamReceiver, 2u); EXPECT_FALSE(graphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation()->isResident(commandStreamReceiver.getOsContext().getContextId())); unifiedMemoryManager->freeSVMAlloc(ptr); @@ -815,7 +815,7 @@ TEST(UnifiedMemoryTest, givenInternalAllocationWhenNewAllocationIsCreatedThenItI unifiedMemoryManager->makeIndirectAllocationsResident(commandStreamReceiver, 1u); EXPECT_TRUE(graphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation()->isResident(commandStreamReceiver.getOsContext().getContextId())); - //force to non resident + // force to non resident graphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation()->updateResidencyTaskCount(GraphicsAllocation::objectNotResident, commandStreamReceiver.getOsContext().getContextId()); auto ptr2 = unifiedMemoryManager->createSharedUnifiedMemoryAllocation(4096u, unifiedMemoryProperties, &cmdQ); @@ -825,7 +825,7 @@ TEST(UnifiedMemoryTest, givenInternalAllocationWhenNewAllocationIsCreatedThenItI EXPECT_FALSE(graphicsAllocation2->gpuAllocations.getDefaultGraphicsAllocation()->isResident(commandStreamReceiver.getOsContext().getContextId())); - //now call with task count 2, both allocations needs to be made resident + // now call with task count 2, both allocations needs to be made resident unifiedMemoryManager->makeIndirectAllocationsResident(commandStreamReceiver, 2u); EXPECT_TRUE(graphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation()->isResident(commandStreamReceiver.getOsContext().getContextId())); diff --git a/shared/source/debug_settings/debug_variables_base.inl b/shared/source/debug_settings/debug_variables_base.inl index 32748a892e..eb2c45e13d 100644 --- a/shared/source/debug_settings/debug_variables_base.inl +++ b/shared/source/debug_settings/debug_variables_base.inl @@ -402,6 +402,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, ExperimentalSetWalkerPartitionCount, 0, "Experim DECLARE_DEBUG_VARIABLE(int32_t, ExperimentalSetWalkerPartitionType, -1, "Experimental implementation: Set COMPUTE_WALKER Partition Type. Valid values for types from 1 to 3") DECLARE_DEBUG_VARIABLE(int32_t, ExperimentalEnableCustomLocalMemoryAlignment, 0, "Align local memory allocations to a given value. Works only with allocations at least as big as the value. 0: no effect, 2097152: 2 megabytes, 1073741824: 1 gigabyte") DECLARE_DEBUG_VARIABLE(bool, ExperimentalEnableSourceLevelDebugger, false, "Experimentally enable source level debugger.") +DECLARE_DEBUG_VARIABLE(bool, ExperimentalEnableDeviceAllocationCache, false, "Experimentally enable allocation cache.") /*DRIVER TOGGLES*/ DECLARE_DEBUG_VARIABLE(int32_t, ForceOCLVersion, 0, "Force specific OpenCL API version") diff --git a/shared/source/memory_manager/unified_memory_manager.cpp b/shared/source/memory_manager/unified_memory_manager.cpp index 39a275ea94..201ed28ace 100644 --- a/shared/source/memory_manager/unified_memory_manager.cpp +++ b/shared/source/memory_manager/unified_memory_manager.cpp @@ -25,6 +25,38 @@ void SVMAllocsManager::MapBasedAllocationTracker::remove(SvmAllocationData alloc allocations.erase(iter); } +void SVMAllocsManager::SvmAllocationCache::insert(size_t size, void *ptr) { + std::lock_guard lock(this->mtx); + allocations.emplace(std::lower_bound(allocations.begin(), allocations.end(), size), size, ptr); +} + +void *SVMAllocsManager::SvmAllocationCache::get(size_t size, const UnifiedMemoryProperties &unifiedMemoryProperties, SVMAllocsManager *svmAllocsManager) { + std::lock_guard lock(this->mtx); + for (auto allocationIter = std::lower_bound(allocations.begin(), allocations.end(), size); + allocationIter != allocations.end(); + ++allocationIter) { + void *allocationPtr = allocationIter->allocation; + SvmAllocationData *svmAllocData = svmAllocsManager->getSVMAlloc(allocationPtr); + if (svmAllocData->device == unifiedMemoryProperties.device && + svmAllocData->allocationFlagsProperty.allFlags == unifiedMemoryProperties.allocationFlags.allFlags && + svmAllocData->allocationFlagsProperty.allAllocFlags == unifiedMemoryProperties.allocationFlags.allAllocFlags) { + allocations.erase(allocationIter); + return allocationPtr; + } + } + return nullptr; +} + +void SVMAllocsManager::SvmAllocationCache::trim(SVMAllocsManager *svmAllocsManager) { + std::lock_guard lock(this->mtx); + for (auto &cachedAllocationInfo : this->allocations) { + SvmAllocationData *svmData = svmAllocsManager->getSVMAlloc(cachedAllocationInfo.allocation); + DEBUG_BREAK_IF(nullptr == svmData); + svmAllocsManager->freeSVMAllocImpl(cachedAllocationInfo.allocation, false, svmData); + } + this->allocations.clear(); +} + SvmAllocationData *SVMAllocsManager::MapBasedAllocationTracker::get(const void *ptr) { SvmAllocationContainer::iterator iter, end; SvmAllocationData *svmAllocData; @@ -104,6 +136,14 @@ void SVMAllocsManager::makeInternalAllocationsResident(CommandStreamReceiver &co SVMAllocsManager::SVMAllocsManager(MemoryManager *memoryManager, bool multiOsContextSupport) : memoryManager(memoryManager), multiOsContextSupport(multiOsContextSupport) { + if (DebugManager.flags.ExperimentalEnableDeviceAllocationCache.get()) { + this->initUsmDeviceAllocationsCache(); + this->usmDeviceAllocationsCacheEnabled = true; + } +} + +SVMAllocsManager::~SVMAllocsManager() { + this->trimUSMDeviceAllocCache(); } void *SVMAllocsManager::createSVMAlloc(size_t size, const SvmAllocationProperties svmProperties, @@ -210,13 +250,26 @@ void *SVMAllocsManager::createUnifiedMemoryAllocation(size_t size, if (memoryProperties.memoryType == InternalMemoryType::DEVICE_UNIFIED_MEMORY) { unifiedMemoryProperties.flags.isUSMDeviceAllocation = true; + if (this->usmDeviceAllocationsCacheEnabled) { + void *allocationFromCache = this->usmDeviceAllocationsCache.get(size, memoryProperties, this); + if (allocationFromCache) { + return allocationFromCache; + } + } } else if (memoryProperties.memoryType == InternalMemoryType::HOST_UNIFIED_MEMORY) { unifiedMemoryProperties.flags.isUSMHostAllocation = true; } GraphicsAllocation *unifiedMemoryAllocation = memoryManager->allocateGraphicsMemoryWithProperties(unifiedMemoryProperties); if (!unifiedMemoryAllocation) { - return nullptr; + if (memoryProperties.memoryType == InternalMemoryType::DEVICE_UNIFIED_MEMORY && + this->usmDeviceAllocationsCacheEnabled) { + this->trimUSMDeviceAllocCache(); + unifiedMemoryAllocation = memoryManager->allocateGraphicsMemoryWithProperties(unifiedMemoryProperties); + } + if (!unifiedMemoryAllocation) { + return nullptr; + } } setUnifiedAllocationProperties(unifiedMemoryAllocation, {}); @@ -340,35 +393,49 @@ void SVMAllocsManager::removeSVMAlloc(const SvmAllocationData &svmAllocData) { bool SVMAllocsManager::freeSVMAlloc(void *ptr, bool blocking) { SvmAllocationData *svmData = getSVMAlloc(ptr); if (svmData) { - this->prepareIndirectAllocationForDestruction(svmData); - - if (blocking) { - if (svmData->cpuAllocation) { - this->memoryManager->waitForEnginesCompletion(*svmData->cpuAllocation); - } - - for (auto &gpuAllocation : svmData->gpuAllocations.getGraphicsAllocations()) { - if (gpuAllocation) { - this->memoryManager->waitForEnginesCompletion(*gpuAllocation); - } - } - } - - auto pageFaultManager = this->memoryManager->getPageFaultManager(); - if (pageFaultManager) { - pageFaultManager->removeAllocation(ptr); - } - std::unique_lock lock(mtx); - if (svmData->gpuAllocations.getAllocationType() == AllocationType::SVM_ZERO_COPY) { - freeZeroCopySvmAllocation(svmData); - } else { - freeSvmAllocationWithDeviceStorage(svmData); + if (InternalMemoryType::DEVICE_UNIFIED_MEMORY == svmData->memoryType && + this->usmDeviceAllocationsCacheEnabled) { + size_t alignedSize = alignUp(svmData->size, svmData->pageSizeForAlignment); + this->usmDeviceAllocationsCache.insert(alignedSize, ptr); + return true; } + this->freeSVMAllocImpl(ptr, blocking, svmData); return true; } return false; } +void SVMAllocsManager::freeSVMAllocImpl(void *ptr, bool blocking, SvmAllocationData *svmData) { + this->prepareIndirectAllocationForDestruction(svmData); + + if (blocking) { + if (svmData->cpuAllocation) { + this->memoryManager->waitForEnginesCompletion(*svmData->cpuAllocation); + } + + for (auto &gpuAllocation : svmData->gpuAllocations.getGraphicsAllocations()) { + if (gpuAllocation) { + this->memoryManager->waitForEnginesCompletion(*gpuAllocation); + } + } + } + + auto pageFaultManager = this->memoryManager->getPageFaultManager(); + if (pageFaultManager) { + pageFaultManager->removeAllocation(ptr); + } + std::unique_lock lock(mtx); + if (svmData->gpuAllocations.getAllocationType() == AllocationType::SVM_ZERO_COPY) { + freeZeroCopySvmAllocation(svmData); + } else { + freeSvmAllocationWithDeviceStorage(svmData); + } +} + +void SVMAllocsManager::trimUSMDeviceAllocCache() { + this->usmDeviceAllocationsCache.trim(this); +} + void *SVMAllocsManager::createZeroCopySvmAllocation(size_t size, const SvmAllocationProperties &svmProperties, const RootDeviceIndicesContainer &rootDeviceIndices, const std::map &subdeviceBitfields) { @@ -476,6 +543,10 @@ void SVMAllocsManager::freeZeroCopySvmAllocation(SvmAllocationData *svmData) { } } +void SVMAllocsManager::initUsmDeviceAllocationsCache() { + this->usmDeviceAllocationsCache.allocations.reserve(128u); +} + void SVMAllocsManager::freeSvmAllocationWithDeviceStorage(SvmAllocationData *svmData) { auto graphicsAllocations = svmData->gpuAllocations.getGraphicsAllocations(); GraphicsAllocation *cpuAllocation = svmData->cpuAllocation; diff --git a/shared/source/memory_manager/unified_memory_manager.h b/shared/source/memory_manager/unified_memory_manager.h index 66a586af30..321dcc4ea2 100644 --- a/shared/source/memory_manager/unified_memory_manager.h +++ b/shared/source/memory_manager/unified_memory_manager.h @@ -123,8 +123,28 @@ class SVMAllocsManager { const std::map &subdeviceBitfields; }; + struct SvmCacheAllocationInfo { + size_t allocationSize; + void *allocation; + SvmCacheAllocationInfo(size_t allocationSize, void *allocation) : allocationSize(allocationSize), allocation(allocation) {} + bool operator<(SvmCacheAllocationInfo const &other) { + return allocationSize < other.allocationSize; + } + bool operator<(size_t const &size) { + return allocationSize < size; + } + }; + + struct SvmAllocationCache { + void insert(size_t size, void *); + void *get(size_t size, const UnifiedMemoryProperties &unifiedMemoryProperties, SVMAllocsManager *svmAllocsManager); + void trim(SVMAllocsManager *svmAllocsManager); + std::vector allocations; + std::mutex mtx; + }; + SVMAllocsManager(MemoryManager *memoryManager, bool multiOsContextSupport); - MOCKABLE_VIRTUAL ~SVMAllocsManager() = default; + MOCKABLE_VIRTUAL ~SVMAllocsManager(); void *createSVMAlloc(size_t size, const SvmAllocationProperties svmProperties, const RootDeviceIndicesContainer &rootDeviceIndices, @@ -142,7 +162,9 @@ class SVMAllocsManager { void setUnifiedAllocationProperties(GraphicsAllocation *allocation, const SvmAllocationProperties &svmProperties); SvmAllocationData *getSVMAlloc(const void *ptr); MOCKABLE_VIRTUAL bool freeSVMAlloc(void *ptr, bool blocking); + MOCKABLE_VIRTUAL void freeSVMAllocImpl(void *ptr, bool blocking, SvmAllocationData *svmData); bool freeSVMAlloc(void *ptr) { return freeSVMAlloc(ptr, false); } + void trimUSMDeviceAllocCache(); void insertSVMAlloc(const SvmAllocationData &svmData); void removeSVMAlloc(const SvmAllocationData &svmData); size_t getNumAllocs() const { return SVMAllocs.getNumAllocs(); } @@ -175,10 +197,14 @@ class SVMAllocsManager { void freeZeroCopySvmAllocation(SvmAllocationData *svmData); + void initUsmDeviceAllocationsCache(); + MapBasedAllocationTracker SVMAllocs; MapOperationsTracker svmMapOperations; MemoryManager *memoryManager; std::shared_mutex mtx; bool multiOsContextSupport; + SvmAllocationCache usmDeviceAllocationsCache; + bool usmDeviceAllocationsCacheEnabled = false; }; } // namespace NEO diff --git a/shared/test/common/mocks/mock_memory_manager.h b/shared/test/common/mocks/mock_memory_manager.h index ff13c3e709..98757b14bb 100644 --- a/shared/test/common/mocks/mock_memory_manager.h +++ b/shared/test/common/mocks/mock_memory_manager.h @@ -392,4 +392,23 @@ class MockMemoryManagerOsAgnosticContext : public MockMemoryManager { } }; +class MockMemoryManagerWithCapacity : public MockMemoryManager { + public: + MockMemoryManagerWithCapacity(NEO::ExecutionEnvironment &executionEnvironment) : MockMemoryManager(executionEnvironment) {} + GraphicsAllocation *allocateGraphicsMemoryWithProperties(const AllocationProperties &properties) override { + if (this->capacity >= properties.size) { + this->capacity -= properties.size; + return MockMemoryManager::allocateGraphicsMemoryWithProperties(properties); + } + return nullptr; + } + + void freeGraphicsMemoryImpl(GraphicsAllocation *gfxAllocation) override { + this->capacity += gfxAllocation->getUnderlyingBufferSize(); + MockMemoryManager::freeGraphicsMemoryImpl(gfxAllocation); + }; + + size_t capacity = 0u; +}; + } // namespace NEO diff --git a/shared/test/common/mocks/mock_svm_manager.h b/shared/test/common/mocks/mock_svm_manager.h index 3c3226f8b0..81d8557ef0 100644 --- a/shared/test/common/mocks/mock_svm_manager.h +++ b/shared/test/common/mocks/mock_svm_manager.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -8,11 +8,14 @@ #pragma once #include "shared/source/memory_manager/unified_memory_manager.h" namespace NEO { -struct MockSVMAllocsManager : SVMAllocsManager { +struct MockSVMAllocsManager : public SVMAllocsManager { + public: using SVMAllocsManager::memoryManager; using SVMAllocsManager::multiOsContextSupport; using SVMAllocsManager::SVMAllocs; using SVMAllocsManager::SVMAllocsManager; using SVMAllocsManager::svmMapOperations; + using SVMAllocsManager::usmDeviceAllocationsCache; + using SVMAllocsManager::usmDeviceAllocationsCacheEnabled; }; } // namespace NEO diff --git a/shared/test/common/test_files/igdrcl.config b/shared/test/common/test_files/igdrcl.config index 6fabb5aee4..0b4b6c322a 100644 --- a/shared/test/common/test_files/igdrcl.config +++ b/shared/test/common/test_files/igdrcl.config @@ -429,3 +429,4 @@ FailBuildProgramWithStatefulAccess = -1 ForceUncachedGmmUsageType = 0 OverrideDeviceName = unk EnablePrivateBO = 0 +ExperimentalEnableDeviceAllocationCache = 0 diff --git a/shared/test/unit_test/memory_manager/CMakeLists.txt b/shared/test/unit_test/memory_manager/CMakeLists.txt index 6e4827c531..ae8e39039b 100644 --- a/shared/test/unit_test/memory_manager/CMakeLists.txt +++ b/shared/test/unit_test/memory_manager/CMakeLists.txt @@ -24,6 +24,7 @@ target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/physical_address_allocator_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/special_heap_pool_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/storage_info_tests.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/unified_memory_manager_cache_tests.cpp ) add_subdirectories() diff --git a/shared/test/unit_test/memory_manager/unified_memory_manager_cache_tests.cpp b/shared/test/unit_test/memory_manager/unified_memory_manager_cache_tests.cpp new file mode 100644 index 0000000000..fc4f81ecec --- /dev/null +++ b/shared/test/unit_test/memory_manager/unified_memory_manager_cache_tests.cpp @@ -0,0 +1,337 @@ +/* + * Copyright (C) 2022 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/test/common/helpers/debug_manager_state_restore.h" +#include "shared/test/common/mocks/mock_device.h" +#include "shared/test/common/mocks/mock_memory_manager.h" +#include "shared/test/common/mocks/mock_svm_manager.h" +#include "shared/test/common/mocks/ult_device_factory.h" +#include "shared/test/common/test_macros/test.h" + +#include "gtest/gtest.h" + +using namespace NEO; + +struct SvmDeviceAllocationCacheSimpleTestDataType { + size_t allocationSize; + void *allocation; +}; + +TEST(SvmDeviceAllocationCacheTest, givenAllocationCacheEnabledWhenFreeingDeviceAllocationThenItIsPutIntoCache) { + std::unique_ptr deviceFactory(new UltDeviceFactory(1, 1)); + RootDeviceIndicesContainer rootDeviceIndices = {mockRootDeviceIndex}; + std::map deviceBitfields{{mockRootDeviceIndex, mockDeviceBitfield}}; + DebugManagerStateRestore restore; + DebugManager.flags.ExperimentalEnableDeviceAllocationCache.set(1); + auto device = deviceFactory->rootDevices[0]; + auto svmManager = std::make_unique(device->getMemoryManager(), false); + ASSERT_TRUE(svmManager->usmDeviceAllocationsCacheEnabled); + + constexpr auto allocationSizeBasis = MemoryConstants::pageSize64k; + auto testDataset = std::vector( + {{1u, nullptr}, + {(allocationSizeBasis << 0) - 1, nullptr}, + {(allocationSizeBasis << 0), nullptr}, + {(allocationSizeBasis << 0) + 1, nullptr}, + {(allocationSizeBasis << 1) - 1, nullptr}, + {(allocationSizeBasis << 1), nullptr}, + {(allocationSizeBasis << 1) + 1, nullptr}}); + + SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::DEVICE_UNIFIED_MEMORY, rootDeviceIndices, deviceBitfields); + unifiedMemoryProperties.device = device; + for (auto &testData : testDataset) { + testData.allocation = svmManager->createUnifiedMemoryAllocation(testData.allocationSize, unifiedMemoryProperties); + ASSERT_NE(testData.allocation, nullptr); + } + size_t expectedCacheSize = 0u; + ASSERT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), expectedCacheSize); + + for (auto const &testData : testDataset) { + svmManager->freeSVMAlloc(testData.allocation); + EXPECT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), ++expectedCacheSize); + bool foundInCache = false; + for (auto i = 0u; i < svmManager->usmDeviceAllocationsCache.allocations.size(); ++i) { + if (svmManager->usmDeviceAllocationsCache.allocations[i].allocation == testData.allocation) { + foundInCache = true; + break; + } + } + EXPECT_TRUE(foundInCache); + } + EXPECT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), testDataset.size()); + + svmManager->trimUSMDeviceAllocCache(); + EXPECT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), 0u); +} + +TEST(SvmDeviceAllocationCacheTest, givenAllocationsWithDifferentSizesWhenAllocatingAfterFreeThenReturnCorrectCachedAllocation) { + std::unique_ptr deviceFactory(new UltDeviceFactory(1, 1)); + RootDeviceIndicesContainer rootDeviceIndices = {mockRootDeviceIndex}; + std::map deviceBitfields{{mockRootDeviceIndex, mockDeviceBitfield}}; + DebugManagerStateRestore restore; + DebugManager.flags.ExperimentalEnableDeviceAllocationCache.set(1); + auto device = deviceFactory->rootDevices[0]; + auto svmManager = std::make_unique(device->getMemoryManager(), false); + ASSERT_TRUE(svmManager->usmDeviceAllocationsCacheEnabled); + + constexpr auto allocationSizeBasis = MemoryConstants::pageSize64k; + auto testDataset = std::vector( + { + {(allocationSizeBasis << 0), nullptr}, + {(allocationSizeBasis << 1), nullptr}, + {(allocationSizeBasis << 2), nullptr}, + }); + + SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::DEVICE_UNIFIED_MEMORY, rootDeviceIndices, deviceBitfields); + unifiedMemoryProperties.device = device; + for (auto &testData : testDataset) { + testData.allocation = svmManager->createUnifiedMemoryAllocation(testData.allocationSize, unifiedMemoryProperties); + ASSERT_NE(testData.allocation, nullptr); + } + + size_t expectedCacheSize = 0u; + ASSERT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), expectedCacheSize); + + for (auto const &testData : testDataset) { + svmManager->freeSVMAlloc(testData.allocation); + } + + ASSERT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), testDataset.size()); + + std::vector allocationsToFree; + + for (auto &testData : testDataset) { + auto secondAllocation = svmManager->createUnifiedMemoryAllocation(testData.allocationSize, unifiedMemoryProperties); + EXPECT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), testDataset.size() - 1); + EXPECT_EQ(secondAllocation, testData.allocation); + svmManager->freeSVMAlloc(secondAllocation); + EXPECT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), testDataset.size()); + } + + svmManager->trimUSMDeviceAllocCache(); + EXPECT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), 0u); +} + +TEST(SvmDeviceAllocationCacheTest, givenMultipleAllocationsWhenAllocatingAfterFreeThenReturnAllocationsInCacheStartingFromSmallest) { + std::unique_ptr deviceFactory(new UltDeviceFactory(1, 1)); + RootDeviceIndicesContainer rootDeviceIndices = {mockRootDeviceIndex}; + std::map deviceBitfields{{mockRootDeviceIndex, mockDeviceBitfield}}; + DebugManagerStateRestore restore; + DebugManager.flags.ExperimentalEnableDeviceAllocationCache.set(1); + auto device = deviceFactory->rootDevices[0]; + auto svmManager = std::make_unique(device->getMemoryManager(), false); + ASSERT_TRUE(svmManager->usmDeviceAllocationsCacheEnabled); + + size_t allocationSizeBasis = MemoryConstants::pageSize64k; + auto testDataset = std::vector( + { + {(allocationSizeBasis << 0), nullptr}, + {(allocationSizeBasis << 1), nullptr}, + {(allocationSizeBasis << 2), nullptr}, + }); + + SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::DEVICE_UNIFIED_MEMORY, rootDeviceIndices, deviceBitfields); + unifiedMemoryProperties.device = device; + for (auto &testData : testDataset) { + testData.allocation = svmManager->createUnifiedMemoryAllocation(testData.allocationSize, unifiedMemoryProperties); + ASSERT_NE(testData.allocation, nullptr); + } + + ASSERT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), 0u); + + for (auto const &testData : testDataset) { + svmManager->freeSVMAlloc(testData.allocation); + } + + size_t expectedCacheSize = testDataset.size(); + ASSERT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), expectedCacheSize); + + auto allocationLargerThanInCache = svmManager->createUnifiedMemoryAllocation(allocationSizeBasis << 3, unifiedMemoryProperties); + EXPECT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), expectedCacheSize); + + auto firstAllocation = svmManager->createUnifiedMemoryAllocation(allocationSizeBasis, unifiedMemoryProperties); + EXPECT_EQ(firstAllocation, testDataset[0].allocation); + EXPECT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), --expectedCacheSize); + + auto secondAllocation = svmManager->createUnifiedMemoryAllocation(allocationSizeBasis, unifiedMemoryProperties); + EXPECT_EQ(secondAllocation, testDataset[1].allocation); + EXPECT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), --expectedCacheSize); + + auto thirdAllocation = svmManager->createUnifiedMemoryAllocation(allocationSizeBasis, unifiedMemoryProperties); + EXPECT_EQ(thirdAllocation, testDataset[2].allocation); + EXPECT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), 0u); + + svmManager->freeSVMAlloc(firstAllocation); + svmManager->freeSVMAlloc(secondAllocation); + svmManager->freeSVMAlloc(thirdAllocation); + svmManager->freeSVMAlloc(allocationLargerThanInCache); + + svmManager->trimUSMDeviceAllocCache(); + EXPECT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), 0u); +} + +struct SvmDeviceAllocationCacheTestDataType { + SvmDeviceAllocationCacheTestDataType(size_t allocationSize, + const RootDeviceIndicesContainer &rootDeviceIndicesArg, + std::map &subdeviceBitFields, + Device *device, + std::string name) : allocationSize(allocationSize), + allocation(nullptr), + unifiedMemoryProperties(InternalMemoryType::DEVICE_UNIFIED_MEMORY, + rootDeviceIndicesArg, + subdeviceBitFields), + name(name) { + unifiedMemoryProperties.device = device; + }; + size_t allocationSize; + void *allocation; + SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties; + std::string name; +}; + +TEST(SvmDeviceAllocationCacheTest, givenAllocationsWithDifferentFlagsWhenAllocatingAfterFreeThenReturnCorrectAllocation) { + std::unique_ptr deviceFactory(new UltDeviceFactory(2, 2)); + DebugManagerStateRestore restore; + DebugManager.flags.ExperimentalEnableDeviceAllocationCache.set(1); + auto rootDevice = deviceFactory->rootDevices[0]; + auto secondRootDevice = deviceFactory->rootDevices[1]; + auto subDevice1 = deviceFactory->subDevices[0]; + auto svmManager = std::make_unique(rootDevice->getMemoryManager(), false); + ASSERT_TRUE(svmManager->usmDeviceAllocationsCacheEnabled); + + constexpr auto allocationSizeBasis = MemoryConstants::pageSize64k; + size_t defaultAllocSize = allocationSizeBasis << 0; + std::map subDeviceBitfields = {{0u, {01}}, {1u, {10}}}; + SvmDeviceAllocationCacheTestDataType + defaultAlloc(defaultAllocSize, + {rootDevice->getRootDeviceIndex()}, + subDeviceBitfields, + rootDevice, "defaultAlloc"), + writeOnly(defaultAllocSize, + {rootDevice->getRootDeviceIndex()}, + subDeviceBitfields, + rootDevice, "writeOnly"), + readOnly(defaultAllocSize, + {rootDevice->getRootDeviceIndex()}, + subDeviceBitfields, + rootDevice, "readOnly"), + allocWriteCombined(defaultAllocSize, + {rootDevice->getRootDeviceIndex()}, + subDeviceBitfields, + rootDevice, "allocWriteCombined"), + secondDevice(defaultAllocSize, + {rootDevice->getRootDeviceIndex()}, + subDeviceBitfields, + secondRootDevice, "secondDevice"), + subDevice(defaultAllocSize, + {rootDevice->getRootDeviceIndex()}, + subDeviceBitfields, + subDevice1, "subDevice"); + writeOnly.unifiedMemoryProperties.allocationFlags.flags.writeOnly = true; + readOnly.unifiedMemoryProperties.allocationFlags.flags.readOnly = true; + allocWriteCombined.unifiedMemoryProperties.allocationFlags.allocFlags.allocWriteCombined = true; + + auto testDataset = std::vector({defaultAlloc, writeOnly, readOnly, allocWriteCombined, secondDevice, subDevice}); + for (auto &allocationDataToVerify : testDataset) { + + for (auto &testData : testDataset) { + testData.allocation = svmManager->createUnifiedMemoryAllocation(testData.allocationSize, testData.unifiedMemoryProperties); + } + ASSERT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), 0u); + + for (auto &testData : testDataset) { + svmManager->freeSVMAlloc(testData.allocation); + } + ASSERT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), testDataset.size()); + + auto allocationFromCache = svmManager->createUnifiedMemoryAllocation(allocationDataToVerify.allocationSize, allocationDataToVerify.unifiedMemoryProperties); + EXPECT_EQ(allocationFromCache, allocationDataToVerify.allocation); + + auto allocationNotFromCache = svmManager->createUnifiedMemoryAllocation(allocationDataToVerify.allocationSize, allocationDataToVerify.unifiedMemoryProperties); + for (auto &cachedAllocation : testDataset) { + EXPECT_NE(allocationNotFromCache, cachedAllocation.allocation); + } + svmManager->freeSVMAlloc(allocationFromCache); + svmManager->freeSVMAlloc(allocationNotFromCache); + + svmManager->trimUSMDeviceAllocCache(); + ASSERT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), 0u); + } +} + +TEST(SvmDeviceAllocationCacheTest, givenDeviceOutOfMemoryWhenAllocatingThenCacheIsTrimmedAndAllocationSucceeds) { + std::unique_ptr deviceFactory(new UltDeviceFactory(1, 1)); + RootDeviceIndicesContainer rootDeviceIndices = {mockRootDeviceIndex}; + std::map deviceBitfields{{mockRootDeviceIndex, mockDeviceBitfield}}; + DebugManagerStateRestore restore; + DebugManager.flags.ExperimentalEnableDeviceAllocationCache.set(1); + auto device = deviceFactory->rootDevices[0]; + device->injectMemoryManager(new MockMemoryManagerWithCapacity(*device->getExecutionEnvironment())); + MockMemoryManagerWithCapacity *memoryManager = static_cast(device->getMemoryManager()); + auto svmManager = std::make_unique(memoryManager, false); + ASSERT_TRUE(svmManager->usmDeviceAllocationsCacheEnabled); + + memoryManager->capacity = MemoryConstants::pageSize64k * 2; + + SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::DEVICE_UNIFIED_MEMORY, rootDeviceIndices, deviceBitfields); + unifiedMemoryProperties.device = device; + + auto allocationInCache = svmManager->createUnifiedMemoryAllocation(MemoryConstants::pageSize64k, unifiedMemoryProperties); + auto allocationInCache2 = svmManager->createUnifiedMemoryAllocation(MemoryConstants::pageSize64k, unifiedMemoryProperties); + ASSERT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), 0u); + svmManager->freeSVMAlloc(allocationInCache); + svmManager->freeSVMAlloc(allocationInCache2); + + ASSERT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), 2u); + ASSERT_NE(svmManager->getSVMAlloc(allocationInCache), nullptr); + ASSERT_NE(svmManager->getSVMAlloc(allocationInCache2), nullptr); + auto ptr = svmManager->createUnifiedMemoryAllocation(MemoryConstants::pageSize64k * 2, unifiedMemoryProperties); + EXPECT_NE(ptr, nullptr); + EXPECT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), 0u); + svmManager->freeSVMAlloc(ptr); + + svmManager->trimUSMDeviceAllocCache(); + ASSERT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), 0u); +} + +TEST(SvmDeviceAllocationCacheTest, givenCachedAllocationsWhenDestructorIsCalledThenCacheAllocationsAreFreed) { + std::unique_ptr deviceFactory(new UltDeviceFactory(1, 1)); + RootDeviceIndicesContainer rootDeviceIndices = {mockRootDeviceIndex}; + std::map deviceBitfields{{mockRootDeviceIndex, mockDeviceBitfield}}; + DebugManagerStateRestore restore; + DebugManager.flags.ExperimentalEnableDeviceAllocationCache.set(1); + auto device = deviceFactory->rootDevices[0]; + auto memoryManager = std::make_unique(true, device->getExecutionEnvironment()); + auto svmManager = std::make_unique(memoryManager.get(), false); + ASSERT_TRUE(svmManager->usmDeviceAllocationsCacheEnabled); + + constexpr auto allocationSizeBasis = MemoryConstants::pageSize64k; + auto testDataset = std::vector( + { + {(allocationSizeBasis << 0), nullptr}, + {(allocationSizeBasis << 1), nullptr}, + {(allocationSizeBasis << 2), nullptr}, + }); + + SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::DEVICE_UNIFIED_MEMORY, rootDeviceIndices, deviceBitfields); + unifiedMemoryProperties.device = device; + for (auto &testData : testDataset) { + testData.allocation = svmManager->createUnifiedMemoryAllocation(testData.allocationSize, unifiedMemoryProperties); + ASSERT_NE(testData.allocation, nullptr); + } + + ASSERT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), 0u); + + for (auto const &testData : testDataset) { + svmManager->freeSVMAlloc(testData.allocation); + } + + ASSERT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), testDataset.size()); + ASSERT_EQ(memoryManager->freeGraphicsMemoryCalled, 0u); + svmManager.reset(); + EXPECT_EQ(memoryManager->freeGraphicsMemoryCalled, testDataset.size()); +} \ No newline at end of file