From 3f646839ca6628bbd5f7a6a3c07d4b2903974384 Mon Sep 17 00:00:00 2001 From: Dominik Dabek Date: Tue, 21 Jan 2025 11:30:02 +0000 Subject: [PATCH] fix: usm reuse cleaning unused allocations mechanism for freeing allocations saved for reuse that have not been used in a given time Related-To: NEO-13425 Signed-off-by: Dominik Dabek --- .../core/source/driver/driver_handle_imp.cpp | 3 +- opencl/source/context/context.cpp | 5 +- .../debug_settings/debug_variables_base.inl | 1 + .../execution_environment.cpp | 18 +++ .../execution_environment.h | 4 + shared/source/memory_manager/CMakeLists.txt | 4 +- .../memory_manager/unified_memory_manager.cpp | 70 ++++++-- .../memory_manager/unified_memory_manager.h | 10 +- .../unified_memory_reuse_cleaner.cpp | 73 +++++++++ .../unified_memory_reuse_cleaner.h | 47 ++++++ shared/test/common/mocks/CMakeLists.txt | 3 +- .../mocks/mock_usm_memory_reuse_cleaner.h | 17 ++ shared/test/common/test_files/igdrcl.config | 1 + .../execution_environment_tests.cpp | 36 ++++- .../unified_memory_manager_cache_tests.cpp | 153 ++++++++++++++++-- 15 files changed, 407 insertions(+), 38 deletions(-) create mode 100644 shared/source/memory_manager/unified_memory_reuse_cleaner.cpp create mode 100644 shared/source/memory_manager/unified_memory_reuse_cleaner.h create mode 100644 shared/test/common/mocks/mock_usm_memory_reuse_cleaner.h diff --git a/level_zero/core/source/driver/driver_handle_imp.cpp b/level_zero/core/source/driver/driver_handle_imp.cpp index d5ba9640c0..a62f7bd3e8 100644 --- a/level_zero/core/source/driver/driver_handle_imp.cpp +++ b/level_zero/core/source/driver/driver_handle_imp.cpp @@ -191,7 +191,7 @@ DriverHandleImp::~DriverHandleImp() { if (memoryManager != nullptr) { memoryManager->peekExecutionEnvironment().prepareForCleanup(); if (this->svmAllocsManager) { - this->svmAllocsManager->trimUSMDeviceAllocCache(); + this->svmAllocsManager->cleanupUSMAllocCaches(); this->usmHostMemAllocPool.cleanup(); } } @@ -219,7 +219,6 @@ DriverHandleImp::~DriverHandleImp() { this->fabricIndirectEdges.clear(); if (this->svmAllocsManager) { - this->svmAllocsManager->trimUSMDeviceAllocCache(); delete this->svmAllocsManager; this->svmAllocsManager = nullptr; } diff --git a/opencl/source/context/context.cpp b/opencl/source/context/context.cpp index 912ccf98da..dfa48c869e 100644 --- a/opencl/source/context/context.cpp +++ b/opencl/source/context/context.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2024 Intel Corporation + * Copyright (C) 2018-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -78,8 +78,7 @@ Context::~Context() { } if (svmAllocsManager) { this->stagingBufferManager.reset(); - svmAllocsManager->trimUSMDeviceAllocCache(); - svmAllocsManager->trimUSMHostAllocCache(); + svmAllocsManager->cleanupUSMAllocCaches(); delete svmAllocsManager; } if (driverDiagnostics) { diff --git a/shared/source/debug_settings/debug_variables_base.inl b/shared/source/debug_settings/debug_variables_base.inl index fe91c7d5c3..5479bab03a 100644 --- a/shared/source/debug_settings/debug_variables_base.inl +++ b/shared/source/debug_settings/debug_variables_base.inl @@ -586,6 +586,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, ExperimentalEnableCustomLocalMemoryAlignment, 0, DECLARE_DEBUG_VARIABLE(int32_t, ExperimentalEnableDeviceAllocationCache, -1, "Experimentally enable device usm allocation cache. Use X% of device memory.") DECLARE_DEBUG_VARIABLE(int32_t, ExperimentalEnableHostAllocationCache, -1, "Experimentally enable host usm allocation cache. Use X% of shared system memory.") DECLARE_DEBUG_VARIABLE(int32_t, ExperimentalUSMAllocationReuseVersion, -1, "Version of mechanism to use for usm allocation reuse.") +DECLARE_DEBUG_VARIABLE(int32_t, ExperimentalUSMAllocationReuseCleaner, -1, "Enable usm allocation reuse cleaner. -1: default, 0: disable, 1:enable") DECLARE_DEBUG_VARIABLE(int32_t, ExperimentalH2DCpuCopyThreshold, -1, "Override default threshold (in bytes) for H2D CPU copy.") DECLARE_DEBUG_VARIABLE(int32_t, ExperimentalD2HCpuCopyThreshold, -1, "Override default threshold (in bytes) for D2H CPU copy.") DECLARE_DEBUG_VARIABLE(int32_t, ExperimentalCopyThroughLock, -1, "Experimentally copy memory through locked ptr. -1: default 0: disable 1: enable ") diff --git a/shared/source/execution_environment/execution_environment.cpp b/shared/source/execution_environment/execution_environment.cpp index b445c29fc0..25b73a0ea0 100644 --- a/shared/source/execution_environment/execution_environment.cpp +++ b/shared/source/execution_environment/execution_environment.cpp @@ -20,6 +20,7 @@ #include "shared/source/helpers/string_helpers.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/memory_manager/os_agnostic_memory_manager.h" +#include "shared/source/memory_manager/unified_memory_reuse_cleaner.h" #include "shared/source/os_interface/debug_env_reader.h" #include "shared/source/os_interface/driver_info.h" #include "shared/source/os_interface/os_environment.h" @@ -50,6 +51,9 @@ ExecutionEnvironment::~ExecutionEnvironment() { if (directSubmissionController) { directSubmissionController->stopThread(); } + if (unifiedMemoryReuseCleaner) { + unifiedMemoryReuseCleaner->stopThread(); + } if (memoryManager) { memoryManager->commonCleanup(); for (const auto &rootDeviceEnvironment : this->rootDeviceEnvironments) { @@ -143,6 +147,20 @@ DirectSubmissionController *ExecutionEnvironment::initializeDirectSubmissionCont return directSubmissionController.get(); } +void ExecutionEnvironment::initializeUnifiedMemoryReuseCleaner() { + std::lock_guard lock(initializeUnifiedMemoryReuseCleanerMutex); + auto initializeUnifiedMemoryReuseCleaner = UnifiedMemoryReuseCleaner::isSupported(); + + if (debugManager.flags.ExperimentalUSMAllocationReuseCleaner.get() != -1) { + initializeUnifiedMemoryReuseCleaner = debugManager.flags.ExperimentalUSMAllocationReuseCleaner.get() == 1; + } + + if (initializeUnifiedMemoryReuseCleaner && nullptr == this->unifiedMemoryReuseCleaner) { + this->unifiedMemoryReuseCleaner = std::make_unique(); + this->unifiedMemoryReuseCleaner->startThread(); + } +} + void ExecutionEnvironment::prepareRootDeviceEnvironments(uint32_t numRootDevices) { if (rootDeviceEnvironments.size() < numRootDevices) { rootDeviceEnvironments.resize(numRootDevices); diff --git a/shared/source/execution_environment/execution_environment.h b/shared/source/execution_environment/execution_environment.h index 5d8dc15995..0de06b43fa 100644 --- a/shared/source/execution_environment/execution_environment.h +++ b/shared/source/execution_environment/execution_environment.h @@ -19,6 +19,7 @@ namespace NEO { class DirectSubmissionController; +class UnifiedMemoryReuseCleaner; class GfxCoreHelper; class MemoryManager; struct OsEnvironment; @@ -65,8 +66,10 @@ class ExecutionEnvironment : public ReferenceTrackedObject bool isFP64EmulationEnabled() const { return fp64EmulationEnabled; } DirectSubmissionController *initializeDirectSubmissionController(); + void initializeUnifiedMemoryReuseCleaner(); std::unique_ptr memoryManager; + std::unique_ptr unifiedMemoryReuseCleaner; std::unique_ptr directSubmissionController; std::unique_ptr osEnvironment; std::vector> rootDeviceEnvironments; @@ -91,6 +94,7 @@ class ExecutionEnvironment : public ReferenceTrackedObject DebuggingMode debuggingEnabledMode = DebuggingMode::disabled; std::unordered_map rootDeviceNumCcsMap; std::mutex initializeDirectSubmissionControllerMutex; + std::mutex initializeUnifiedMemoryReuseCleanerMutex; std::vector> deviceCcsModeVec; }; } // namespace NEO diff --git a/shared/source/memory_manager/CMakeLists.txt b/shared/source/memory_manager/CMakeLists.txt index 4f7d84d117..7e106cc1bf 100644 --- a/shared/source/memory_manager/CMakeLists.txt +++ b/shared/source/memory_manager/CMakeLists.txt @@ -1,5 +1,5 @@ # -# Copyright (C) 2019-2023 Intel Corporation +# Copyright (C) 2019-2025 Intel Corporation # # SPDX-License-Identifier: MIT # @@ -59,6 +59,8 @@ set(NEO_CORE_MEMORY_MANAGER ${CMAKE_CURRENT_SOURCE_DIR}/unified_memory_manager.h ${CMAKE_CURRENT_SOURCE_DIR}/unified_memory_pooling.cpp ${CMAKE_CURRENT_SOURCE_DIR}/unified_memory_pooling.h + ${CMAKE_CURRENT_SOURCE_DIR}/unified_memory_reuse_cleaner.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/unified_memory_reuse_cleaner.h ${CMAKE_CURRENT_SOURCE_DIR}/page_table.cpp ${CMAKE_CURRENT_SOURCE_DIR}/page_table.h ${CMAKE_CURRENT_SOURCE_DIR}/page_table.inl diff --git a/shared/source/memory_manager/unified_memory_manager.cpp b/shared/source/memory_manager/unified_memory_manager.cpp index 8ab54ad495..a3a0d6ea16 100644 --- a/shared/source/memory_manager/unified_memory_manager.cpp +++ b/shared/source/memory_manager/unified_memory_manager.cpp @@ -21,6 +21,7 @@ #include "shared/source/memory_manager/allocation_properties.h" #include "shared/source/memory_manager/compression_selector.h" #include "shared/source/memory_manager/memory_manager.h" +#include "shared/source/memory_manager/unified_memory_reuse_cleaner.h" #include "shared/source/os_interface/os_context.h" #include "shared/source/os_interface/product_helper.h" #include "shared/source/page_fault_manager/cpu_page_fault_manager.h" @@ -111,15 +112,15 @@ void *SVMAllocsManager::SvmAllocationCache::get(size_t size, const UnifiedMemory break; } void *allocationPtr = allocationIter->allocation; - SvmAllocationData *svmAllocData = svmAllocsManager->getSVMAlloc(allocationPtr); - UNRECOVERABLE_IF(!svmAllocData); - if (svmAllocData->device == unifiedMemoryProperties.device && - svmAllocData->allocationFlagsProperty.allFlags == unifiedMemoryProperties.allocationFlags.allFlags && - svmAllocData->allocationFlagsProperty.allAllocFlags == unifiedMemoryProperties.allocationFlags.allAllocFlags && - false == isInUse(svmAllocData)) { - if (svmAllocData->device) { - auto lock = svmAllocData->device->obtainAllocationsReuseLock(); - svmAllocData->device->recordAllocationGetFromReuse(allocationIter->allocationSize); + SvmAllocationData *svmData = svmAllocsManager->getSVMAlloc(allocationPtr); + UNRECOVERABLE_IF(nullptr == svmData); + if (svmData->device == unifiedMemoryProperties.device && + svmData->allocationFlagsProperty.allFlags == unifiedMemoryProperties.allocationFlags.allFlags && + svmData->allocationFlagsProperty.allAllocFlags == unifiedMemoryProperties.allocationFlags.allAllocFlags && + false == isInUse(svmData)) { + if (svmData->device) { + auto lock = svmData->device->obtainAllocationsReuseLock(); + svmData->device->recordAllocationGetFromReuse(allocationIter->allocationSize); } else { auto lock = memoryManager->obtainHostAllocationsReuseLock(); memoryManager->recordHostAllocationGetFromReuse(allocationIter->allocationSize); @@ -135,7 +136,7 @@ void SVMAllocsManager::SvmAllocationCache::trim() { std::lock_guard lock(this->mtx); for (auto &cachedAllocationInfo : this->allocations) { SvmAllocationData *svmData = svmAllocsManager->getSVMAlloc(cachedAllocationInfo.allocation); - DEBUG_BREAK_IF(nullptr == svmData); + UNRECOVERABLE_IF(nullptr == svmData); if (svmData->device) { auto lock = svmData->device->obtainAllocationsReuseLock(); svmData->device->recordAllocationGetFromReuse(cachedAllocationInfo.allocationSize); @@ -148,6 +149,38 @@ void SVMAllocsManager::SvmAllocationCache::trim() { this->allocations.clear(); } +void SVMAllocsManager::SvmAllocationCache::cleanup() { + if (this->memoryManager) { + if (auto usmReuseCleaner = this->memoryManager->peekExecutionEnvironment().unifiedMemoryReuseCleaner.get()) { + usmReuseCleaner->unregisterSvmAllocationCache(this); + } + } + this->trim(); +} + +void SVMAllocsManager::SvmAllocationCache::trimOldAllocs(std::chrono::high_resolution_clock::time_point trimTimePoint) { + std::lock_guard lock(this->mtx); + for (auto allocationIter = allocations.begin(); + allocationIter != allocations.end();) { + if (allocationIter->saveTime > trimTimePoint) { + ++allocationIter; + continue; + } + void *allocationPtr = allocationIter->allocation; + SvmAllocationData *svmData = svmAllocsManager->getSVMAlloc(allocationPtr); + UNRECOVERABLE_IF(nullptr == svmData); + if (svmData->device) { + auto lock = svmData->device->obtainAllocationsReuseLock(); + svmData->device->recordAllocationGetFromReuse(allocationIter->allocationSize); + } else { + auto lock = memoryManager->obtainHostAllocationsReuseLock(); + memoryManager->recordHostAllocationGetFromReuse(allocationIter->allocationSize); + } + svmAllocsManager->freeSVMAllocImpl(allocationIter->allocation, FreePolicyType::defer, svmData); + allocationIter = allocations.erase(allocationIter); + } +} + SvmAllocationData *SVMAllocsManager::MapBasedAllocationTracker::get(const void *ptr) { if (allocations.size() == 0) { return nullptr; @@ -632,6 +665,11 @@ void SVMAllocsManager::freeSVMAllocDeferImpl() { } } +void SVMAllocsManager::cleanupUSMAllocCaches() { + this->usmDeviceAllocationsCache.cleanup(); + this->usmHostAllocationsCache.cleanup(); +} + void SVMAllocsManager::trimUSMDeviceAllocCache() { this->usmDeviceAllocationsCache.trim(); } @@ -774,6 +812,9 @@ void SVMAllocsManager::initUsmDeviceAllocationsCache(Device &device) { this->usmDeviceAllocationsCache.allocations.reserve(128u); this->usmDeviceAllocationsCache.svmAllocsManager = this; this->usmDeviceAllocationsCache.memoryManager = memoryManager; + if (auto usmReuseCleaner = device.getExecutionEnvironment()->unifiedMemoryReuseCleaner.get()) { + usmReuseCleaner->registerSvmAllocationCache(&this->usmDeviceAllocationsCache); + } } } @@ -786,9 +827,12 @@ void SVMAllocsManager::initUsmHostAllocationsCache() { this->usmHostAllocationsCache.maxSize = static_cast(fractionOfTotalMemoryForRecycling * totalSystemMemory); if (this->usmHostAllocationsCache.maxSize > 0u) { this->usmHostAllocationsCache.allocations.reserve(128u); + this->usmHostAllocationsCache.svmAllocsManager = this; + this->usmHostAllocationsCache.memoryManager = memoryManager; + if (auto usmReuseCleaner = this->memoryManager->peekExecutionEnvironment().unifiedMemoryReuseCleaner.get()) { + usmReuseCleaner->registerSvmAllocationCache(&this->usmHostAllocationsCache); + } } - this->usmHostAllocationsCache.svmAllocsManager = this; - this->usmHostAllocationsCache.memoryManager = memoryManager; } void SVMAllocsManager::initUsmAllocationsCaches(Device &device) { @@ -797,6 +841,7 @@ void SVMAllocsManager::initUsmAllocationsCaches(Device &device) { this->usmDeviceAllocationsCacheEnabled = !!debugManager.flags.ExperimentalEnableDeviceAllocationCache.get(); } if (this->usmDeviceAllocationsCacheEnabled) { + device.getExecutionEnvironment()->initializeUnifiedMemoryReuseCleaner(); this->initUsmDeviceAllocationsCache(device); } @@ -805,6 +850,7 @@ void SVMAllocsManager::initUsmAllocationsCaches(Device &device) { this->usmHostAllocationsCacheEnabled = !!debugManager.flags.ExperimentalEnableHostAllocationCache.get(); } if (this->usmHostAllocationsCacheEnabled) { + device.getExecutionEnvironment()->initializeUnifiedMemoryReuseCleaner(); this->initUsmHostAllocationsCache(); } } diff --git a/shared/source/memory_manager/unified_memory_manager.h b/shared/source/memory_manager/unified_memory_manager.h index d4d5ff3c30..b25fbc2cb9 100644 --- a/shared/source/memory_manager/unified_memory_manager.h +++ b/shared/source/memory_manager/unified_memory_manager.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2019-2024 Intel Corporation + * Copyright (C) 2019-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -150,7 +150,10 @@ class SVMAllocsManager { struct SvmCacheAllocationInfo { size_t allocationSize; void *allocation; - SvmCacheAllocationInfo(size_t allocationSize, void *allocation) : allocationSize(allocationSize), allocation(allocation) {} + std::chrono::high_resolution_clock::time_point saveTime; + SvmCacheAllocationInfo(size_t allocationSize, void *allocation) : allocationSize(allocationSize), allocation(allocation) { + saveTime = std::chrono::high_resolution_clock::now(); + } bool operator<(SvmCacheAllocationInfo const &other) const { return allocationSize < other.allocationSize; } @@ -170,6 +173,8 @@ class SVMAllocsManager { bool isInUse(SvmAllocationData *svmData); void *get(size_t size, const UnifiedMemoryProperties &unifiedMemoryProperties); void trim(); + void trimOldAllocs(std::chrono::high_resolution_clock::time_point trimTimePoint); + void cleanup(); std::vector allocations; std::mutex mtx; @@ -215,6 +220,7 @@ class SVMAllocsManager { MOCKABLE_VIRTUAL void freeSVMAllocDeferImpl(); MOCKABLE_VIRTUAL void freeSVMAllocImpl(void *ptr, FreePolicyType policy, SvmAllocationData *svmData); bool freeSVMAlloc(void *ptr) { return freeSVMAlloc(ptr, false); } + void cleanupUSMAllocCaches(); void trimUSMDeviceAllocCache(); void trimUSMHostAllocCache(); void insertSVMAlloc(const SvmAllocationData &svmData); diff --git a/shared/source/memory_manager/unified_memory_reuse_cleaner.cpp b/shared/source/memory_manager/unified_memory_reuse_cleaner.cpp new file mode 100644 index 0000000000..fb0252c4e4 --- /dev/null +++ b/shared/source/memory_manager/unified_memory_reuse_cleaner.cpp @@ -0,0 +1,73 @@ +/* + * Copyright (C) 2024-2025 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/memory_manager/unified_memory_reuse_cleaner.h" + +#include "shared/source/helpers/sleep.h" +#include "shared/source/os_interface/os_thread.h" + +#include +namespace NEO { + +UnifiedMemoryReuseCleaner::UnifiedMemoryReuseCleaner() { +} + +UnifiedMemoryReuseCleaner::~UnifiedMemoryReuseCleaner() { + UNRECOVERABLE_IF(this->unifiedMemoryReuseCleanerThread); +} + +void UnifiedMemoryReuseCleaner::stopThread() { + keepCleaning.store(false); + runCleaning.store(false); + if (unifiedMemoryReuseCleanerThread) { + unifiedMemoryReuseCleanerThread->join(); + unifiedMemoryReuseCleanerThread.reset(); + } +}; + +void *UnifiedMemoryReuseCleaner::cleanUnifiedMemoryReuse(void *self) { + auto cleaner = reinterpret_cast(self); + while (!cleaner->runCleaning.load()) { + if (!cleaner->keepCleaning.load()) { + return nullptr; + } + NEO::sleep(sleepTime); + } + + while (true) { + if (!cleaner->keepCleaning.load()) { + return nullptr; + } + NEO::sleep(sleepTime); + cleaner->trimOldInCaches(); + } +} + +void UnifiedMemoryReuseCleaner::registerSvmAllocationCache(SvmAllocationCache *cache) { + std::lock_guard lockSvmAllocationCaches(this->svmAllocationCachesMutex); + this->svmAllocationCaches.push_back(cache); + this->startCleaning(); +} + +void UnifiedMemoryReuseCleaner::unregisterSvmAllocationCache(SvmAllocationCache *cache) { + std::lock_guard lockSvmAllocationCaches(this->svmAllocationCachesMutex); + this->svmAllocationCaches.erase(std::find(this->svmAllocationCaches.begin(), this->svmAllocationCaches.end(), cache)); +} + +void UnifiedMemoryReuseCleaner::trimOldInCaches() { + const std::chrono::high_resolution_clock::time_point trimTimePoint = std::chrono::high_resolution_clock::now() - maxHoldTime; + std::lock_guard lockSvmAllocationCaches(this->svmAllocationCachesMutex); + for (auto svmAllocCache : this->svmAllocationCaches) { + svmAllocCache->trimOldAllocs(trimTimePoint); + } +} + +void UnifiedMemoryReuseCleaner::startThread() { + this->unifiedMemoryReuseCleanerThread = Thread::createFunc(cleanUnifiedMemoryReuse, reinterpret_cast(this)); +} + +} // namespace NEO \ No newline at end of file diff --git a/shared/source/memory_manager/unified_memory_reuse_cleaner.h b/shared/source/memory_manager/unified_memory_reuse_cleaner.h new file mode 100644 index 0000000000..619160d8ef --- /dev/null +++ b/shared/source/memory_manager/unified_memory_reuse_cleaner.h @@ -0,0 +1,47 @@ +/* + * Copyright (C) 2024-2025 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#pragma once + +#include "shared/source/memory_manager/unified_memory_manager.h" + +#include +#include +#include +#include +namespace NEO { +class Thread; +class UnifiedMemoryReuseCleaner { + using SvmAllocationCache = SVMAllocsManager::SvmAllocationCache; + + public: + static constexpr auto sleepTime = std::chrono::seconds(2u); + static constexpr auto maxHoldTime = sleepTime; + UnifiedMemoryReuseCleaner(); + virtual ~UnifiedMemoryReuseCleaner(); + + MOCKABLE_VIRTUAL void startThread(); + void stopThread(); + + static bool isSupported() { return false; } + + void registerSvmAllocationCache(SvmAllocationCache *cache); + void unregisterSvmAllocationCache(SvmAllocationCache *cache); + + protected: + void startCleaning() { runCleaning.store(true); }; + static void *cleanUnifiedMemoryReuse(void *self); + void trimOldInCaches(); + std::unique_ptr unifiedMemoryReuseCleanerThread; + + std::vector svmAllocationCaches; + std::mutex svmAllocationCachesMutex; + + std::atomic_bool runCleaning = false; + std::atomic_bool keepCleaning = true; +}; +} // namespace NEO \ No newline at end of file diff --git a/shared/test/common/mocks/CMakeLists.txt b/shared/test/common/mocks/CMakeLists.txt index 1010b47acd..2eba8c7364 100644 --- a/shared/test/common/mocks/CMakeLists.txt +++ b/shared/test/common/mocks/CMakeLists.txt @@ -1,5 +1,5 @@ # -# Copyright (C) 2020-2024 Intel Corporation +# Copyright (C) 2020-2025 Intel Corporation # # SPDX-License-Identifier: MIT # @@ -95,6 +95,7 @@ set(NEO_CORE_tests_mocks ${CMAKE_CURRENT_SOURCE_DIR}/mock_tbx_csr.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_timestamp_container.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_timestamp_packet.h + ${CMAKE_CURRENT_SOURCE_DIR}/mock_usm_memory_reuse_cleaner.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_usm_memory_pool.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_wddm_residency_controller.h ${CMAKE_CURRENT_SOURCE_DIR}/ult_device_factory.cpp diff --git a/shared/test/common/mocks/mock_usm_memory_reuse_cleaner.h b/shared/test/common/mocks/mock_usm_memory_reuse_cleaner.h new file mode 100644 index 0000000000..0b71766b8b --- /dev/null +++ b/shared/test/common/mocks/mock_usm_memory_reuse_cleaner.h @@ -0,0 +1,17 @@ +/* + * Copyright (C) 2025 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#pragma once +#include "shared/source/memory_manager/unified_memory_reuse_cleaner.h" +namespace NEO { +struct MockUnifiedMemoryReuseCleaner : public UnifiedMemoryReuseCleaner { + public: + using UnifiedMemoryReuseCleaner::svmAllocationCaches; + using UnifiedMemoryReuseCleaner::trimOldInCaches; + void startThread() override{}; +}; +} // namespace NEO \ No newline at end of file diff --git a/shared/test/common/test_files/igdrcl.config b/shared/test/common/test_files/igdrcl.config index 361702d856..9104e81db6 100644 --- a/shared/test/common/test_files/igdrcl.config +++ b/shared/test/common/test_files/igdrcl.config @@ -662,4 +662,5 @@ DirectSubmissionRelaxedOrderingCounterHeuristic = -1 DirectSubmissionRelaxedOrderingCounterHeuristicTreshold = -1 ClearStandaloneInOrderTimestampAllocation = -1 PipelinedEuThreadArbitration = -1 +ExperimentalUSMAllocationReuseCleaner = -1 # Please don't edit below this line diff --git a/shared/test/unit_test/execution_environment/execution_environment_tests.cpp b/shared/test/unit_test/execution_environment/execution_environment_tests.cpp index 84221d9b1e..94dd762f59 100644 --- a/shared/test/unit_test/execution_environment/execution_environment_tests.cpp +++ b/shared/test/unit_test/execution_environment/execution_environment_tests.cpp @@ -18,6 +18,7 @@ #include "shared/source/helpers/driver_model_type.h" #include "shared/source/helpers/gfx_core_helper.h" #include "shared/source/helpers/hw_info.h" +#include "shared/source/memory_manager/unified_memory_reuse_cleaner.h" #include "shared/source/os_interface/device_factory.h" #include "shared/source/os_interface/driver_info.h" #include "shared/source/os_interface/os_interface.h" @@ -297,6 +298,30 @@ TEST(ExecutionEnvironment, givenEnableDirectSubmissionControllerSetZeroWhenIniti EXPECT_EQ(controller, nullptr); } +TEST(ExecutionEnvironment, givenExperimentalUSMAllocationReuseCleanerSetWhenInitializeUnifiedMemoryReuseCleanerThenNotNull) { + DebugManagerStateRestore restorer; + debugManager.flags.ExperimentalUSMAllocationReuseCleaner.set(1); + + VariableBackup funcBackup{&NEO::Thread::createFunc, [](void *(*func)(void *), void *arg) -> std::unique_ptr { return nullptr; }}; + MockExecutionEnvironment executionEnvironment{}; + executionEnvironment.initializeUnifiedMemoryReuseCleaner(); + auto cleaner = executionEnvironment.unifiedMemoryReuseCleaner.get(); + + EXPECT_NE(cleaner, nullptr); + executionEnvironment.initializeUnifiedMemoryReuseCleaner(); + EXPECT_EQ(cleaner, executionEnvironment.unifiedMemoryReuseCleaner.get()); +} + +TEST(ExecutionEnvironment, givenExperimentalUSMAllocationReuseCleanerSetZeroWhenInitializeUnifiedMemoryReuseCleanerThenNull) { + DebugManagerStateRestore restorer; + debugManager.flags.ExperimentalUSMAllocationReuseCleaner.set(0); + + MockExecutionEnvironment executionEnvironment{}; + executionEnvironment.initializeUnifiedMemoryReuseCleaner(); + + EXPECT_EQ(nullptr, executionEnvironment.unifiedMemoryReuseCleaner.get()); +} + TEST(ExecutionEnvironment, givenNeoCalEnabledWhenCreateExecutionEnvironmentThenSetDebugVariables) { const std::unordered_map config = { {"UseKmdMigration", 0}, @@ -364,11 +389,12 @@ TEST(ExecutionEnvironment, givenExecutionEnvironmentWhenInitializeMemoryManagerI static_assert(sizeof(ExecutionEnvironment) == sizeof(std::unique_ptr) + sizeof(std::unique_ptr) + + sizeof(std::unique_ptr) + sizeof(std::unique_ptr) + sizeof(std::vector>) + sizeof(std::unordered_map>) + sizeof(std::unordered_map) + - sizeof(std::mutex) + + 2 * sizeof(std::mutex) + 2 * sizeof(bool) + sizeof(DeviceHierarchyMode) + sizeof(DebuggingMode) + @@ -381,12 +407,15 @@ static_assert(sizeof(ExecutionEnvironment) == sizeof(std::unique_ptr { + struct MemoryMangerMock : public DestructorCounted { MemoryMangerMock(uint32_t &destructorId, ExecutionEnvironment &executionEnvironment) : DestructorCounted(destructorId, executionEnvironment) { callBaseAllocateGraphicsMemoryForNonSvmHostPtr = false; callBasePopulateOsHandles = false; } }; + struct UnifiedMemoryReuseCleanerMock : public DestructorCounted { + UnifiedMemoryReuseCleanerMock(uint32_t &destructorId) : DestructorCounted(destructorId) {} + }; struct DirectSubmissionControllerMock : public DestructorCounted { DirectSubmissionControllerMock(uint32_t &destructorId) : DestructorCounted(destructorId) {} }; @@ -418,9 +447,10 @@ TEST(ExecutionEnvironment, givenExecutionEnvironmentWithVariousMembersWhenItIsDe executionEnvironment->rootDeviceEnvironments[0]->builtins = std::make_unique(destructorId); executionEnvironment->rootDeviceEnvironments[0]->compilerInterface = std::make_unique(destructorId); executionEnvironment->directSubmissionController = std::make_unique(destructorId); + executionEnvironment->unifiedMemoryReuseCleaner = std::make_unique(destructorId); executionEnvironment.reset(nullptr); - EXPECT_EQ(8u, destructorId); + EXPECT_EQ(9u, destructorId); } TEST(ExecutionEnvironment, givenMultipleRootDevicesWhenTheyAreCreatedThenReuseMemoryManager) { diff --git a/shared/test/unit_test/memory_manager/unified_memory_manager_cache_tests.cpp b/shared/test/unit_test/memory_manager/unified_memory_manager_cache_tests.cpp index 1d01e1ad68..b27c9404b9 100644 --- a/shared/test/unit_test/memory_manager/unified_memory_manager_cache_tests.cpp +++ b/shared/test/unit_test/memory_manager/unified_memory_manager_cache_tests.cpp @@ -6,6 +6,7 @@ */ #include "shared/source/helpers/api_specific_config.h" +#include "shared/source/memory_manager/unified_memory_reuse_cleaner.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/raii_product_helper.h" #include "shared/test/common/mocks/mock_ail_configuration.h" @@ -14,6 +15,7 @@ #include "shared/test/common/mocks/mock_memory_manager.h" #include "shared/test/common/mocks/mock_product_helper.h" #include "shared/test/common/mocks/mock_svm_manager.h" +#include "shared/test/common/mocks/mock_usm_memory_reuse_cleaner.h" #include "shared/test/common/mocks/ult_device_factory.h" #include "shared/test/common/test_macros/hw_test.h" #include "shared/test/common/test_macros/test.h" @@ -248,7 +250,7 @@ TEST_F(SvmDeviceAllocationCacheTest, givenAllocationCacheEnabledWhenFreeingDevic } EXPECT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), testDataset.size()); - svmManager->trimUSMDeviceAllocCache(); + svmManager->cleanupUSMAllocCaches(); EXPECT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), 0u); } @@ -451,7 +453,7 @@ TEST_F(SvmDeviceAllocationCacheTest, givenAllocationsWithDifferentSizesWhenAlloc EXPECT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), testDataset.size()); } - svmManager->trimUSMDeviceAllocCache(); + svmManager->cleanupUSMAllocCaches(); EXPECT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), 0u); } @@ -491,7 +493,7 @@ TEST_F(SvmDeviceAllocationCacheTest, givenAllocationsWithDifferentSizesWhenAlloc svmManager->freeSVMAlloc(notReusedDueToMemoryWastage); svmManager->freeSVMAlloc(reused); - svmManager->trimUSMDeviceAllocCache(); + svmManager->cleanupUSMAllocCaches(); EXPECT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), 0u); } @@ -503,9 +505,9 @@ TEST_F(SvmDeviceAllocationCacheTest, givenAllocationOverSizeLimitWhenAllocatingA debugManager.flags.ExperimentalEnableDeviceAllocationCache.set(1); auto device = deviceFactory->rootDevices[0]; auto svmManager = std::make_unique(device->getMemoryManager(), false); + device->maxAllocationsSavedForReuseSize = 1 * MemoryConstants::gigaByte; svmManager->initUsmAllocationsCaches(*device); ASSERT_TRUE(svmManager->usmDeviceAllocationsCacheEnabled); - svmManager->usmDeviceAllocationsCache.maxSize = 1 * MemoryConstants::gigaByte; const auto notAcceptedAllocSize = SVMAllocsManager::SvmAllocationCache::maxServicedSize + 1; SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::deviceUnifiedMemory, 1, rootDeviceIndices, deviceBitfields); @@ -580,7 +582,7 @@ TEST_F(SvmDeviceAllocationCacheTest, givenMultipleAllocationsWhenAllocatingAfter svmManager->freeSVMAlloc(thirdAllocation); svmManager->freeSVMAlloc(allocationLargerThanInCache); - svmManager->trimUSMDeviceAllocCache(); + svmManager->cleanupUSMAllocCaches(); EXPECT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), 0u); } @@ -712,7 +714,7 @@ TEST_F(SvmDeviceAllocationCacheTest, givenDeviceOutOfMemoryWhenAllocatingThenCac EXPECT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), 0u); svmManager->freeSVMAlloc(ptr); - svmManager->trimUSMDeviceAllocCache(); + svmManager->cleanupUSMAllocCaches(); ASSERT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), 0u); } @@ -745,7 +747,7 @@ TEST_F(SvmDeviceAllocationCacheTest, givenAllocationWithIsInternalAllocationSetW svmManager->freeSVMAlloc(testedAllocation); EXPECT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), 1u); - svmManager->trimUSMDeviceAllocCache(); + svmManager->cleanupUSMAllocCaches(); } TEST_F(SvmDeviceAllocationCacheTest, givenAllocationInUsageWhenAllocatingAfterFreeThenDoNotReuseAllocation) { @@ -777,7 +779,91 @@ TEST_F(SvmDeviceAllocationCacheTest, givenAllocationInUsageWhenAllocatingAfterFr svmManager->freeSVMAlloc(testedAllocation); EXPECT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), 2u); - svmManager->trimUSMDeviceAllocCache(); + svmManager->cleanupUSMAllocCaches(); +} + +TEST_F(SvmDeviceAllocationCacheTest, givenUsmReuseCleanerWhenTrimOldInCachesCalledThenOldAllocationsAreRemoved) { + std::unique_ptr deviceFactory(new UltDeviceFactory(1, 1)); + RootDeviceIndicesContainer rootDeviceIndices = {mockRootDeviceIndex}; + std::map deviceBitfields{{mockRootDeviceIndex, mockDeviceBitfield}}; + DebugManagerStateRestore restore; + debugManager.flags.ExperimentalEnableDeviceAllocationCache.set(1); + debugManager.flags.ExperimentalEnableHostAllocationCache.set(0); + auto device = deviceFactory->rootDevices[0]; + auto svmManager = std::make_unique(device->getMemoryManager(), false); + device->executionEnvironment->unifiedMemoryReuseCleaner.reset(new MockUnifiedMemoryReuseCleaner); + auto mockUnifiedMemoryReuseCleaner = reinterpret_cast(device->executionEnvironment->unifiedMemoryReuseCleaner.get()); + EXPECT_EQ(0u, mockUnifiedMemoryReuseCleaner->svmAllocationCaches.size()); + device->maxAllocationsSavedForReuseSize = 1 * MemoryConstants::gigaByte; + svmManager->initUsmAllocationsCaches(*device); + EXPECT_TRUE(svmManager->usmDeviceAllocationsCacheEnabled); + EXPECT_EQ(1u, mockUnifiedMemoryReuseCleaner->svmAllocationCaches.size()); + EXPECT_EQ(&svmManager->usmDeviceAllocationsCache, mockUnifiedMemoryReuseCleaner->svmAllocationCaches[0]); + + SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::deviceUnifiedMemory, 1, rootDeviceIndices, deviceBitfields); + unifiedMemoryProperties.device = device; + auto allocation = svmManager->createUnifiedMemoryAllocation(10u, unifiedMemoryProperties); + auto allocation2 = svmManager->createUnifiedMemoryAllocation(10u, unifiedMemoryProperties); + EXPECT_NE(allocation, nullptr); + EXPECT_NE(allocation2, nullptr); + svmManager->freeSVMAlloc(allocation); + svmManager->freeSVMAlloc(allocation2); + EXPECT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), 2u); + + const auto baseTimePoint = std::chrono::high_resolution_clock::now(); + const auto oldTimePoint = baseTimePoint - UnifiedMemoryReuseCleaner::maxHoldTime; + const auto notTrimmedTimePoint = baseTimePoint + std::chrono::hours(24); + + svmManager->usmDeviceAllocationsCache.allocations[0].saveTime = oldTimePoint; + svmManager->usmDeviceAllocationsCache.allocations[1].saveTime = notTrimmedTimePoint; + + mockUnifiedMemoryReuseCleaner->trimOldInCaches(); + + EXPECT_EQ(1u, svmManager->usmDeviceAllocationsCache.allocations.size()); + EXPECT_EQ(notTrimmedTimePoint, svmManager->usmDeviceAllocationsCache.allocations[0].saveTime); + + svmManager->cleanupUSMAllocCaches(); + EXPECT_EQ(0u, mockUnifiedMemoryReuseCleaner->svmAllocationCaches.size()); +} + +TEST_F(SvmDeviceAllocationCacheTest, givenAllocationsInReuseWhenTrimOldAllocsCalledThenTrimAllocationsSavedBeforeTimePoint) { + std::unique_ptr deviceFactory(new UltDeviceFactory(1, 1)); + RootDeviceIndicesContainer rootDeviceIndices = {mockRootDeviceIndex}; + std::map deviceBitfields{{mockRootDeviceIndex, mockDeviceBitfield}}; + DebugManagerStateRestore restore; + debugManager.flags.ExperimentalEnableDeviceAllocationCache.set(1); + auto device = deviceFactory->rootDevices[0]; + auto svmManager = std::make_unique(device->getMemoryManager(), false); + device->maxAllocationsSavedForReuseSize = 1 * MemoryConstants::gigaByte; + svmManager->initUsmAllocationsCaches(*device); + EXPECT_TRUE(svmManager->usmDeviceAllocationsCacheEnabled); + + SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::deviceUnifiedMemory, 1, rootDeviceIndices, deviceBitfields); + unifiedMemoryProperties.device = device; + auto allocation = svmManager->createUnifiedMemoryAllocation(10u, unifiedMemoryProperties); + auto allocation2 = svmManager->createUnifiedMemoryAllocation(10u, unifiedMemoryProperties); + auto allocation3 = svmManager->createUnifiedMemoryAllocation(10u, unifiedMemoryProperties); + EXPECT_NE(allocation, nullptr); + EXPECT_NE(allocation2, nullptr); + EXPECT_NE(allocation3, nullptr); + svmManager->freeSVMAlloc(allocation); + svmManager->freeSVMAlloc(allocation2); + svmManager->freeSVMAlloc(allocation3); + EXPECT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), 3u); + + const auto baseTimePoint = std::chrono::high_resolution_clock::now(); + const auto timeDiff = std::chrono::microseconds(1); + + svmManager->usmDeviceAllocationsCache.allocations[0].saveTime = baseTimePoint; + svmManager->usmDeviceAllocationsCache.allocations[1].saveTime = baseTimePoint + timeDiff; + svmManager->usmDeviceAllocationsCache.allocations[2].saveTime = baseTimePoint + timeDiff * 2; + + svmManager->usmDeviceAllocationsCache.trimOldAllocs(baseTimePoint + timeDiff); + + EXPECT_EQ(1u, svmManager->usmDeviceAllocationsCache.allocations.size()); + EXPECT_EQ(baseTimePoint + timeDiff * 2, svmManager->usmDeviceAllocationsCache.allocations[0].saveTime); + + svmManager->cleanupUSMAllocCaches(); } using SvmHostAllocationCacheTest = Test; @@ -877,7 +963,7 @@ TEST_F(SvmHostAllocationCacheTest, givenAllocationCacheEnabledWhenFreeingHostAll } EXPECT_EQ(svmManager->usmHostAllocationsCache.allocations.size(), testDataset.size()); - svmManager->trimUSMHostAllocCache(); + svmManager->cleanupUSMAllocCaches(); EXPECT_EQ(svmManager->usmHostAllocationsCache.allocations.size(), 0u); } @@ -1091,7 +1177,7 @@ TEST_F(SvmHostAllocationCacheTest, givenAllocationsWithDifferentSizesWhenAllocat EXPECT_EQ(svmManager->usmHostAllocationsCache.allocations.size(), testDataset.size()); } - svmManager->trimUSMHostAllocCache(); + svmManager->cleanupUSMAllocCaches(); EXPECT_EQ(svmManager->usmHostAllocationsCache.allocations.size(), 0u); } @@ -1130,7 +1216,7 @@ TEST_F(SvmHostAllocationCacheTest, givenAllocationsWithDifferentSizesWhenAllocat svmManager->freeSVMAlloc(notReusedDueToMemoryWastage); svmManager->freeSVMAlloc(reused); - svmManager->trimUSMHostAllocCache(); + svmManager->cleanupUSMAllocCaches(); EXPECT_EQ(svmManager->usmHostAllocationsCache.allocations.size(), 0u); } @@ -1217,7 +1303,7 @@ TEST_F(SvmHostAllocationCacheTest, givenMultipleAllocationsWhenAllocatingAfterFr svmManager->freeSVMAlloc(thirdAllocation); svmManager->freeSVMAlloc(allocationLargerThanInCache); - svmManager->trimUSMHostAllocCache(); + svmManager->cleanupUSMAllocCaches(); EXPECT_EQ(svmManager->usmHostAllocationsCache.allocations.size(), 0u); } @@ -1338,7 +1424,7 @@ TEST_F(SvmHostAllocationCacheTest, givenHostOutOfMemoryWhenAllocatingThenCacheIs EXPECT_EQ(svmManager->usmHostAllocationsCache.allocations.size(), 0u); svmManager->freeSVMAlloc(ptr); - svmManager->trimUSMHostAllocCache(); + svmManager->cleanupUSMAllocCaches(); ASSERT_EQ(svmManager->usmHostAllocationsCache.allocations.size(), 0u); } @@ -1370,6 +1456,45 @@ TEST_F(SvmHostAllocationCacheTest, givenAllocationInUsageWhenAllocatingAfterFree svmManager->freeSVMAlloc(testedAllocation); EXPECT_EQ(svmManager->usmHostAllocationsCache.allocations.size(), 2u); - svmManager->trimUSMHostAllocCache(); + svmManager->cleanupUSMAllocCaches(); +} + +TEST_F(SvmHostAllocationCacheTest, givenAllocationsInReuseWhenTrimOldAllocsCalledThenTrimAllocationsSavedBeforeTimePoint) { + std::unique_ptr deviceFactory(new UltDeviceFactory(1, 1)); + RootDeviceIndicesContainer rootDeviceIndices = {mockRootDeviceIndex}; + std::map deviceBitfields{{mockRootDeviceIndex, mockDeviceBitfield}}; + DebugManagerStateRestore restore; + debugManager.flags.ExperimentalEnableHostAllocationCache.set(1); + auto device = deviceFactory->rootDevices[0]; + auto svmManager = std::make_unique(device->getMemoryManager(), false); + svmManager->initUsmAllocationsCaches(*device); + EXPECT_TRUE(svmManager->usmHostAllocationsCacheEnabled); + svmManager->usmHostAllocationsCache.maxSize = 1 * MemoryConstants::gigaByte; + + SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::hostUnifiedMemory, 1, rootDeviceIndices, deviceBitfields); + auto allocation = svmManager->createUnifiedMemoryAllocation(10u, unifiedMemoryProperties); + auto allocation2 = svmManager->createUnifiedMemoryAllocation(10u, unifiedMemoryProperties); + auto allocation3 = svmManager->createUnifiedMemoryAllocation(10u, unifiedMemoryProperties); + EXPECT_NE(allocation, nullptr); + EXPECT_NE(allocation2, nullptr); + EXPECT_NE(allocation3, nullptr); + svmManager->freeSVMAlloc(allocation); + svmManager->freeSVMAlloc(allocation2); + svmManager->freeSVMAlloc(allocation3); + EXPECT_EQ(svmManager->usmHostAllocationsCache.allocations.size(), 3u); + + auto baseTimePoint = std::chrono::high_resolution_clock::now(); + auto timeDiff = std::chrono::microseconds(1); + + svmManager->usmHostAllocationsCache.allocations[0].saveTime = baseTimePoint; + svmManager->usmHostAllocationsCache.allocations[1].saveTime = baseTimePoint + timeDiff; + svmManager->usmHostAllocationsCache.allocations[2].saveTime = baseTimePoint + timeDiff * 2; + + svmManager->usmHostAllocationsCache.trimOldAllocs(baseTimePoint + timeDiff); + + EXPECT_EQ(svmManager->usmHostAllocationsCache.allocations.size(), 1u); + EXPECT_EQ(baseTimePoint + timeDiff * 2, svmManager->usmHostAllocationsCache.allocations[0].saveTime); + + svmManager->cleanupUSMAllocCaches(); } } // namespace NEO \ No newline at end of file