fix: usm reuse cleaning unused allocations

mechanism for freeing allocations saved for reuse that have not been
used in a given time

Related-To: NEO-13425

Signed-off-by: Dominik Dabek <dominik.dabek@intel.com>
This commit is contained in:
Dominik Dabek
2025-01-21 11:30:02 +00:00
committed by Compute-Runtime-Automation
parent b2f8da5109
commit 3f646839ca
15 changed files with 407 additions and 38 deletions

View File

@ -191,7 +191,7 @@ DriverHandleImp::~DriverHandleImp() {
if (memoryManager != nullptr) { if (memoryManager != nullptr) {
memoryManager->peekExecutionEnvironment().prepareForCleanup(); memoryManager->peekExecutionEnvironment().prepareForCleanup();
if (this->svmAllocsManager) { if (this->svmAllocsManager) {
this->svmAllocsManager->trimUSMDeviceAllocCache(); this->svmAllocsManager->cleanupUSMAllocCaches();
this->usmHostMemAllocPool.cleanup(); this->usmHostMemAllocPool.cleanup();
} }
} }
@ -219,7 +219,6 @@ DriverHandleImp::~DriverHandleImp() {
this->fabricIndirectEdges.clear(); this->fabricIndirectEdges.clear();
if (this->svmAllocsManager) { if (this->svmAllocsManager) {
this->svmAllocsManager->trimUSMDeviceAllocCache();
delete this->svmAllocsManager; delete this->svmAllocsManager;
this->svmAllocsManager = nullptr; this->svmAllocsManager = nullptr;
} }

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (C) 2018-2024 Intel Corporation * Copyright (C) 2018-2025 Intel Corporation
* *
* SPDX-License-Identifier: MIT * SPDX-License-Identifier: MIT
* *
@ -78,8 +78,7 @@ Context::~Context() {
} }
if (svmAllocsManager) { if (svmAllocsManager) {
this->stagingBufferManager.reset(); this->stagingBufferManager.reset();
svmAllocsManager->trimUSMDeviceAllocCache(); svmAllocsManager->cleanupUSMAllocCaches();
svmAllocsManager->trimUSMHostAllocCache();
delete svmAllocsManager; delete svmAllocsManager;
} }
if (driverDiagnostics) { if (driverDiagnostics) {

View File

@ -586,6 +586,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, ExperimentalEnableCustomLocalMemoryAlignment, 0,
DECLARE_DEBUG_VARIABLE(int32_t, ExperimentalEnableDeviceAllocationCache, -1, "Experimentally enable device usm allocation cache. Use X% of device memory.") DECLARE_DEBUG_VARIABLE(int32_t, ExperimentalEnableDeviceAllocationCache, -1, "Experimentally enable device usm allocation cache. Use X% of device memory.")
DECLARE_DEBUG_VARIABLE(int32_t, ExperimentalEnableHostAllocationCache, -1, "Experimentally enable host usm allocation cache. Use X% of shared system memory.") DECLARE_DEBUG_VARIABLE(int32_t, ExperimentalEnableHostAllocationCache, -1, "Experimentally enable host usm allocation cache. Use X% of shared system memory.")
DECLARE_DEBUG_VARIABLE(int32_t, ExperimentalUSMAllocationReuseVersion, -1, "Version of mechanism to use for usm allocation reuse.") DECLARE_DEBUG_VARIABLE(int32_t, ExperimentalUSMAllocationReuseVersion, -1, "Version of mechanism to use for usm allocation reuse.")
DECLARE_DEBUG_VARIABLE(int32_t, ExperimentalUSMAllocationReuseCleaner, -1, "Enable usm allocation reuse cleaner. -1: default, 0: disable, 1:enable")
DECLARE_DEBUG_VARIABLE(int32_t, ExperimentalH2DCpuCopyThreshold, -1, "Override default threshold (in bytes) for H2D CPU copy.") DECLARE_DEBUG_VARIABLE(int32_t, ExperimentalH2DCpuCopyThreshold, -1, "Override default threshold (in bytes) for H2D CPU copy.")
DECLARE_DEBUG_VARIABLE(int32_t, ExperimentalD2HCpuCopyThreshold, -1, "Override default threshold (in bytes) for D2H CPU copy.") DECLARE_DEBUG_VARIABLE(int32_t, ExperimentalD2HCpuCopyThreshold, -1, "Override default threshold (in bytes) for D2H CPU copy.")
DECLARE_DEBUG_VARIABLE(int32_t, ExperimentalCopyThroughLock, -1, "Experimentally copy memory through locked ptr. -1: default 0: disable 1: enable ") DECLARE_DEBUG_VARIABLE(int32_t, ExperimentalCopyThroughLock, -1, "Experimentally copy memory through locked ptr. -1: default 0: disable 1: enable ")

View File

@ -20,6 +20,7 @@
#include "shared/source/helpers/string_helpers.h" #include "shared/source/helpers/string_helpers.h"
#include "shared/source/memory_manager/memory_manager.h" #include "shared/source/memory_manager/memory_manager.h"
#include "shared/source/memory_manager/os_agnostic_memory_manager.h" #include "shared/source/memory_manager/os_agnostic_memory_manager.h"
#include "shared/source/memory_manager/unified_memory_reuse_cleaner.h"
#include "shared/source/os_interface/debug_env_reader.h" #include "shared/source/os_interface/debug_env_reader.h"
#include "shared/source/os_interface/driver_info.h" #include "shared/source/os_interface/driver_info.h"
#include "shared/source/os_interface/os_environment.h" #include "shared/source/os_interface/os_environment.h"
@ -50,6 +51,9 @@ ExecutionEnvironment::~ExecutionEnvironment() {
if (directSubmissionController) { if (directSubmissionController) {
directSubmissionController->stopThread(); directSubmissionController->stopThread();
} }
if (unifiedMemoryReuseCleaner) {
unifiedMemoryReuseCleaner->stopThread();
}
if (memoryManager) { if (memoryManager) {
memoryManager->commonCleanup(); memoryManager->commonCleanup();
for (const auto &rootDeviceEnvironment : this->rootDeviceEnvironments) { for (const auto &rootDeviceEnvironment : this->rootDeviceEnvironments) {
@ -143,6 +147,20 @@ DirectSubmissionController *ExecutionEnvironment::initializeDirectSubmissionCont
return directSubmissionController.get(); return directSubmissionController.get();
} }
void ExecutionEnvironment::initializeUnifiedMemoryReuseCleaner() {
std::lock_guard<std::mutex> lock(initializeUnifiedMemoryReuseCleanerMutex);
auto initializeUnifiedMemoryReuseCleaner = UnifiedMemoryReuseCleaner::isSupported();
if (debugManager.flags.ExperimentalUSMAllocationReuseCleaner.get() != -1) {
initializeUnifiedMemoryReuseCleaner = debugManager.flags.ExperimentalUSMAllocationReuseCleaner.get() == 1;
}
if (initializeUnifiedMemoryReuseCleaner && nullptr == this->unifiedMemoryReuseCleaner) {
this->unifiedMemoryReuseCleaner = std::make_unique<UnifiedMemoryReuseCleaner>();
this->unifiedMemoryReuseCleaner->startThread();
}
}
void ExecutionEnvironment::prepareRootDeviceEnvironments(uint32_t numRootDevices) { void ExecutionEnvironment::prepareRootDeviceEnvironments(uint32_t numRootDevices) {
if (rootDeviceEnvironments.size() < numRootDevices) { if (rootDeviceEnvironments.size() < numRootDevices) {
rootDeviceEnvironments.resize(numRootDevices); rootDeviceEnvironments.resize(numRootDevices);

View File

@ -19,6 +19,7 @@
namespace NEO { namespace NEO {
class DirectSubmissionController; class DirectSubmissionController;
class UnifiedMemoryReuseCleaner;
class GfxCoreHelper; class GfxCoreHelper;
class MemoryManager; class MemoryManager;
struct OsEnvironment; struct OsEnvironment;
@ -65,8 +66,10 @@ class ExecutionEnvironment : public ReferenceTrackedObject<ExecutionEnvironment>
bool isFP64EmulationEnabled() const { return fp64EmulationEnabled; } bool isFP64EmulationEnabled() const { return fp64EmulationEnabled; }
DirectSubmissionController *initializeDirectSubmissionController(); DirectSubmissionController *initializeDirectSubmissionController();
void initializeUnifiedMemoryReuseCleaner();
std::unique_ptr<MemoryManager> memoryManager; std::unique_ptr<MemoryManager> memoryManager;
std::unique_ptr<UnifiedMemoryReuseCleaner> unifiedMemoryReuseCleaner;
std::unique_ptr<DirectSubmissionController> directSubmissionController; std::unique_ptr<DirectSubmissionController> directSubmissionController;
std::unique_ptr<OsEnvironment> osEnvironment; std::unique_ptr<OsEnvironment> osEnvironment;
std::vector<std::unique_ptr<RootDeviceEnvironment>> rootDeviceEnvironments; std::vector<std::unique_ptr<RootDeviceEnvironment>> rootDeviceEnvironments;
@ -91,6 +94,7 @@ class ExecutionEnvironment : public ReferenceTrackedObject<ExecutionEnvironment>
DebuggingMode debuggingEnabledMode = DebuggingMode::disabled; DebuggingMode debuggingEnabledMode = DebuggingMode::disabled;
std::unordered_map<uint32_t, uint32_t> rootDeviceNumCcsMap; std::unordered_map<uint32_t, uint32_t> rootDeviceNumCcsMap;
std::mutex initializeDirectSubmissionControllerMutex; std::mutex initializeDirectSubmissionControllerMutex;
std::mutex initializeUnifiedMemoryReuseCleanerMutex;
std::vector<std::tuple<std::string, uint32_t>> deviceCcsModeVec; std::vector<std::tuple<std::string, uint32_t>> deviceCcsModeVec;
}; };
} // namespace NEO } // namespace NEO

View File

@ -1,5 +1,5 @@
# #
# Copyright (C) 2019-2023 Intel Corporation # Copyright (C) 2019-2025 Intel Corporation
# #
# SPDX-License-Identifier: MIT # SPDX-License-Identifier: MIT
# #
@ -59,6 +59,8 @@ set(NEO_CORE_MEMORY_MANAGER
${CMAKE_CURRENT_SOURCE_DIR}/unified_memory_manager.h ${CMAKE_CURRENT_SOURCE_DIR}/unified_memory_manager.h
${CMAKE_CURRENT_SOURCE_DIR}/unified_memory_pooling.cpp ${CMAKE_CURRENT_SOURCE_DIR}/unified_memory_pooling.cpp
${CMAKE_CURRENT_SOURCE_DIR}/unified_memory_pooling.h ${CMAKE_CURRENT_SOURCE_DIR}/unified_memory_pooling.h
${CMAKE_CURRENT_SOURCE_DIR}/unified_memory_reuse_cleaner.cpp
${CMAKE_CURRENT_SOURCE_DIR}/unified_memory_reuse_cleaner.h
${CMAKE_CURRENT_SOURCE_DIR}/page_table.cpp ${CMAKE_CURRENT_SOURCE_DIR}/page_table.cpp
${CMAKE_CURRENT_SOURCE_DIR}/page_table.h ${CMAKE_CURRENT_SOURCE_DIR}/page_table.h
${CMAKE_CURRENT_SOURCE_DIR}/page_table.inl ${CMAKE_CURRENT_SOURCE_DIR}/page_table.inl

View File

@ -21,6 +21,7 @@
#include "shared/source/memory_manager/allocation_properties.h" #include "shared/source/memory_manager/allocation_properties.h"
#include "shared/source/memory_manager/compression_selector.h" #include "shared/source/memory_manager/compression_selector.h"
#include "shared/source/memory_manager/memory_manager.h" #include "shared/source/memory_manager/memory_manager.h"
#include "shared/source/memory_manager/unified_memory_reuse_cleaner.h"
#include "shared/source/os_interface/os_context.h" #include "shared/source/os_interface/os_context.h"
#include "shared/source/os_interface/product_helper.h" #include "shared/source/os_interface/product_helper.h"
#include "shared/source/page_fault_manager/cpu_page_fault_manager.h" #include "shared/source/page_fault_manager/cpu_page_fault_manager.h"
@ -111,15 +112,15 @@ void *SVMAllocsManager::SvmAllocationCache::get(size_t size, const UnifiedMemory
break; break;
} }
void *allocationPtr = allocationIter->allocation; void *allocationPtr = allocationIter->allocation;
SvmAllocationData *svmAllocData = svmAllocsManager->getSVMAlloc(allocationPtr); SvmAllocationData *svmData = svmAllocsManager->getSVMAlloc(allocationPtr);
UNRECOVERABLE_IF(!svmAllocData); UNRECOVERABLE_IF(nullptr == svmData);
if (svmAllocData->device == unifiedMemoryProperties.device && if (svmData->device == unifiedMemoryProperties.device &&
svmAllocData->allocationFlagsProperty.allFlags == unifiedMemoryProperties.allocationFlags.allFlags && svmData->allocationFlagsProperty.allFlags == unifiedMemoryProperties.allocationFlags.allFlags &&
svmAllocData->allocationFlagsProperty.allAllocFlags == unifiedMemoryProperties.allocationFlags.allAllocFlags && svmData->allocationFlagsProperty.allAllocFlags == unifiedMemoryProperties.allocationFlags.allAllocFlags &&
false == isInUse(svmAllocData)) { false == isInUse(svmData)) {
if (svmAllocData->device) { if (svmData->device) {
auto lock = svmAllocData->device->obtainAllocationsReuseLock(); auto lock = svmData->device->obtainAllocationsReuseLock();
svmAllocData->device->recordAllocationGetFromReuse(allocationIter->allocationSize); svmData->device->recordAllocationGetFromReuse(allocationIter->allocationSize);
} else { } else {
auto lock = memoryManager->obtainHostAllocationsReuseLock(); auto lock = memoryManager->obtainHostAllocationsReuseLock();
memoryManager->recordHostAllocationGetFromReuse(allocationIter->allocationSize); memoryManager->recordHostAllocationGetFromReuse(allocationIter->allocationSize);
@ -135,7 +136,7 @@ void SVMAllocsManager::SvmAllocationCache::trim() {
std::lock_guard<std::mutex> lock(this->mtx); std::lock_guard<std::mutex> lock(this->mtx);
for (auto &cachedAllocationInfo : this->allocations) { for (auto &cachedAllocationInfo : this->allocations) {
SvmAllocationData *svmData = svmAllocsManager->getSVMAlloc(cachedAllocationInfo.allocation); SvmAllocationData *svmData = svmAllocsManager->getSVMAlloc(cachedAllocationInfo.allocation);
DEBUG_BREAK_IF(nullptr == svmData); UNRECOVERABLE_IF(nullptr == svmData);
if (svmData->device) { if (svmData->device) {
auto lock = svmData->device->obtainAllocationsReuseLock(); auto lock = svmData->device->obtainAllocationsReuseLock();
svmData->device->recordAllocationGetFromReuse(cachedAllocationInfo.allocationSize); svmData->device->recordAllocationGetFromReuse(cachedAllocationInfo.allocationSize);
@ -148,6 +149,38 @@ void SVMAllocsManager::SvmAllocationCache::trim() {
this->allocations.clear(); this->allocations.clear();
} }
void SVMAllocsManager::SvmAllocationCache::cleanup() {
if (this->memoryManager) {
if (auto usmReuseCleaner = this->memoryManager->peekExecutionEnvironment().unifiedMemoryReuseCleaner.get()) {
usmReuseCleaner->unregisterSvmAllocationCache(this);
}
}
this->trim();
}
void SVMAllocsManager::SvmAllocationCache::trimOldAllocs(std::chrono::high_resolution_clock::time_point trimTimePoint) {
std::lock_guard<std::mutex> lock(this->mtx);
for (auto allocationIter = allocations.begin();
allocationIter != allocations.end();) {
if (allocationIter->saveTime > trimTimePoint) {
++allocationIter;
continue;
}
void *allocationPtr = allocationIter->allocation;
SvmAllocationData *svmData = svmAllocsManager->getSVMAlloc(allocationPtr);
UNRECOVERABLE_IF(nullptr == svmData);
if (svmData->device) {
auto lock = svmData->device->obtainAllocationsReuseLock();
svmData->device->recordAllocationGetFromReuse(allocationIter->allocationSize);
} else {
auto lock = memoryManager->obtainHostAllocationsReuseLock();
memoryManager->recordHostAllocationGetFromReuse(allocationIter->allocationSize);
}
svmAllocsManager->freeSVMAllocImpl(allocationIter->allocation, FreePolicyType::defer, svmData);
allocationIter = allocations.erase(allocationIter);
}
}
SvmAllocationData *SVMAllocsManager::MapBasedAllocationTracker::get(const void *ptr) { SvmAllocationData *SVMAllocsManager::MapBasedAllocationTracker::get(const void *ptr) {
if (allocations.size() == 0) { if (allocations.size() == 0) {
return nullptr; return nullptr;
@ -632,6 +665,11 @@ void SVMAllocsManager::freeSVMAllocDeferImpl() {
} }
} }
void SVMAllocsManager::cleanupUSMAllocCaches() {
this->usmDeviceAllocationsCache.cleanup();
this->usmHostAllocationsCache.cleanup();
}
void SVMAllocsManager::trimUSMDeviceAllocCache() { void SVMAllocsManager::trimUSMDeviceAllocCache() {
this->usmDeviceAllocationsCache.trim(); this->usmDeviceAllocationsCache.trim();
} }
@ -774,6 +812,9 @@ void SVMAllocsManager::initUsmDeviceAllocationsCache(Device &device) {
this->usmDeviceAllocationsCache.allocations.reserve(128u); this->usmDeviceAllocationsCache.allocations.reserve(128u);
this->usmDeviceAllocationsCache.svmAllocsManager = this; this->usmDeviceAllocationsCache.svmAllocsManager = this;
this->usmDeviceAllocationsCache.memoryManager = memoryManager; this->usmDeviceAllocationsCache.memoryManager = memoryManager;
if (auto usmReuseCleaner = device.getExecutionEnvironment()->unifiedMemoryReuseCleaner.get()) {
usmReuseCleaner->registerSvmAllocationCache(&this->usmDeviceAllocationsCache);
}
} }
} }
@ -786,9 +827,12 @@ void SVMAllocsManager::initUsmHostAllocationsCache() {
this->usmHostAllocationsCache.maxSize = static_cast<size_t>(fractionOfTotalMemoryForRecycling * totalSystemMemory); this->usmHostAllocationsCache.maxSize = static_cast<size_t>(fractionOfTotalMemoryForRecycling * totalSystemMemory);
if (this->usmHostAllocationsCache.maxSize > 0u) { if (this->usmHostAllocationsCache.maxSize > 0u) {
this->usmHostAllocationsCache.allocations.reserve(128u); this->usmHostAllocationsCache.allocations.reserve(128u);
this->usmHostAllocationsCache.svmAllocsManager = this;
this->usmHostAllocationsCache.memoryManager = memoryManager;
if (auto usmReuseCleaner = this->memoryManager->peekExecutionEnvironment().unifiedMemoryReuseCleaner.get()) {
usmReuseCleaner->registerSvmAllocationCache(&this->usmHostAllocationsCache);
}
} }
this->usmHostAllocationsCache.svmAllocsManager = this;
this->usmHostAllocationsCache.memoryManager = memoryManager;
} }
void SVMAllocsManager::initUsmAllocationsCaches(Device &device) { void SVMAllocsManager::initUsmAllocationsCaches(Device &device) {
@ -797,6 +841,7 @@ void SVMAllocsManager::initUsmAllocationsCaches(Device &device) {
this->usmDeviceAllocationsCacheEnabled = !!debugManager.flags.ExperimentalEnableDeviceAllocationCache.get(); this->usmDeviceAllocationsCacheEnabled = !!debugManager.flags.ExperimentalEnableDeviceAllocationCache.get();
} }
if (this->usmDeviceAllocationsCacheEnabled) { if (this->usmDeviceAllocationsCacheEnabled) {
device.getExecutionEnvironment()->initializeUnifiedMemoryReuseCleaner();
this->initUsmDeviceAllocationsCache(device); this->initUsmDeviceAllocationsCache(device);
} }
@ -805,6 +850,7 @@ void SVMAllocsManager::initUsmAllocationsCaches(Device &device) {
this->usmHostAllocationsCacheEnabled = !!debugManager.flags.ExperimentalEnableHostAllocationCache.get(); this->usmHostAllocationsCacheEnabled = !!debugManager.flags.ExperimentalEnableHostAllocationCache.get();
} }
if (this->usmHostAllocationsCacheEnabled) { if (this->usmHostAllocationsCacheEnabled) {
device.getExecutionEnvironment()->initializeUnifiedMemoryReuseCleaner();
this->initUsmHostAllocationsCache(); this->initUsmHostAllocationsCache();
} }
} }

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (C) 2019-2024 Intel Corporation * Copyright (C) 2019-2025 Intel Corporation
* *
* SPDX-License-Identifier: MIT * SPDX-License-Identifier: MIT
* *
@ -150,7 +150,10 @@ class SVMAllocsManager {
struct SvmCacheAllocationInfo { struct SvmCacheAllocationInfo {
size_t allocationSize; size_t allocationSize;
void *allocation; void *allocation;
SvmCacheAllocationInfo(size_t allocationSize, void *allocation) : allocationSize(allocationSize), allocation(allocation) {} std::chrono::high_resolution_clock::time_point saveTime;
SvmCacheAllocationInfo(size_t allocationSize, void *allocation) : allocationSize(allocationSize), allocation(allocation) {
saveTime = std::chrono::high_resolution_clock::now();
}
bool operator<(SvmCacheAllocationInfo const &other) const { bool operator<(SvmCacheAllocationInfo const &other) const {
return allocationSize < other.allocationSize; return allocationSize < other.allocationSize;
} }
@ -170,6 +173,8 @@ class SVMAllocsManager {
bool isInUse(SvmAllocationData *svmData); bool isInUse(SvmAllocationData *svmData);
void *get(size_t size, const UnifiedMemoryProperties &unifiedMemoryProperties); void *get(size_t size, const UnifiedMemoryProperties &unifiedMemoryProperties);
void trim(); void trim();
void trimOldAllocs(std::chrono::high_resolution_clock::time_point trimTimePoint);
void cleanup();
std::vector<SvmCacheAllocationInfo> allocations; std::vector<SvmCacheAllocationInfo> allocations;
std::mutex mtx; std::mutex mtx;
@ -215,6 +220,7 @@ class SVMAllocsManager {
MOCKABLE_VIRTUAL void freeSVMAllocDeferImpl(); MOCKABLE_VIRTUAL void freeSVMAllocDeferImpl();
MOCKABLE_VIRTUAL void freeSVMAllocImpl(void *ptr, FreePolicyType policy, SvmAllocationData *svmData); MOCKABLE_VIRTUAL void freeSVMAllocImpl(void *ptr, FreePolicyType policy, SvmAllocationData *svmData);
bool freeSVMAlloc(void *ptr) { return freeSVMAlloc(ptr, false); } bool freeSVMAlloc(void *ptr) { return freeSVMAlloc(ptr, false); }
void cleanupUSMAllocCaches();
void trimUSMDeviceAllocCache(); void trimUSMDeviceAllocCache();
void trimUSMHostAllocCache(); void trimUSMHostAllocCache();
void insertSVMAlloc(const SvmAllocationData &svmData); void insertSVMAlloc(const SvmAllocationData &svmData);

View File

@ -0,0 +1,73 @@
/*
* Copyright (C) 2024-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/memory_manager/unified_memory_reuse_cleaner.h"
#include "shared/source/helpers/sleep.h"
#include "shared/source/os_interface/os_thread.h"
#include <thread>
namespace NEO {
UnifiedMemoryReuseCleaner::UnifiedMemoryReuseCleaner() {
}
UnifiedMemoryReuseCleaner::~UnifiedMemoryReuseCleaner() {
UNRECOVERABLE_IF(this->unifiedMemoryReuseCleanerThread);
}
void UnifiedMemoryReuseCleaner::stopThread() {
keepCleaning.store(false);
runCleaning.store(false);
if (unifiedMemoryReuseCleanerThread) {
unifiedMemoryReuseCleanerThread->join();
unifiedMemoryReuseCleanerThread.reset();
}
};
void *UnifiedMemoryReuseCleaner::cleanUnifiedMemoryReuse(void *self) {
auto cleaner = reinterpret_cast<UnifiedMemoryReuseCleaner *>(self);
while (!cleaner->runCleaning.load()) {
if (!cleaner->keepCleaning.load()) {
return nullptr;
}
NEO::sleep(sleepTime);
}
while (true) {
if (!cleaner->keepCleaning.load()) {
return nullptr;
}
NEO::sleep(sleepTime);
cleaner->trimOldInCaches();
}
}
void UnifiedMemoryReuseCleaner::registerSvmAllocationCache(SvmAllocationCache *cache) {
std::lock_guard<std::mutex> lockSvmAllocationCaches(this->svmAllocationCachesMutex);
this->svmAllocationCaches.push_back(cache);
this->startCleaning();
}
void UnifiedMemoryReuseCleaner::unregisterSvmAllocationCache(SvmAllocationCache *cache) {
std::lock_guard<std::mutex> lockSvmAllocationCaches(this->svmAllocationCachesMutex);
this->svmAllocationCaches.erase(std::find(this->svmAllocationCaches.begin(), this->svmAllocationCaches.end(), cache));
}
void UnifiedMemoryReuseCleaner::trimOldInCaches() {
const std::chrono::high_resolution_clock::time_point trimTimePoint = std::chrono::high_resolution_clock::now() - maxHoldTime;
std::lock_guard<std::mutex> lockSvmAllocationCaches(this->svmAllocationCachesMutex);
for (auto svmAllocCache : this->svmAllocationCaches) {
svmAllocCache->trimOldAllocs(trimTimePoint);
}
}
void UnifiedMemoryReuseCleaner::startThread() {
this->unifiedMemoryReuseCleanerThread = Thread::createFunc(cleanUnifiedMemoryReuse, reinterpret_cast<void *>(this));
}
} // namespace NEO

View File

@ -0,0 +1,47 @@
/*
* Copyright (C) 2024-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "shared/source/memory_manager/unified_memory_manager.h"
#include <chrono>
#include <memory>
#include <mutex>
#include <vector>
namespace NEO {
class Thread;
class UnifiedMemoryReuseCleaner {
using SvmAllocationCache = SVMAllocsManager::SvmAllocationCache;
public:
static constexpr auto sleepTime = std::chrono::seconds(2u);
static constexpr auto maxHoldTime = sleepTime;
UnifiedMemoryReuseCleaner();
virtual ~UnifiedMemoryReuseCleaner();
MOCKABLE_VIRTUAL void startThread();
void stopThread();
static bool isSupported() { return false; }
void registerSvmAllocationCache(SvmAllocationCache *cache);
void unregisterSvmAllocationCache(SvmAllocationCache *cache);
protected:
void startCleaning() { runCleaning.store(true); };
static void *cleanUnifiedMemoryReuse(void *self);
void trimOldInCaches();
std::unique_ptr<Thread> unifiedMemoryReuseCleanerThread;
std::vector<SvmAllocationCache *> svmAllocationCaches;
std::mutex svmAllocationCachesMutex;
std::atomic_bool runCleaning = false;
std::atomic_bool keepCleaning = true;
};
} // namespace NEO

View File

@ -1,5 +1,5 @@
# #
# Copyright (C) 2020-2024 Intel Corporation # Copyright (C) 2020-2025 Intel Corporation
# #
# SPDX-License-Identifier: MIT # SPDX-License-Identifier: MIT
# #
@ -95,6 +95,7 @@ set(NEO_CORE_tests_mocks
${CMAKE_CURRENT_SOURCE_DIR}/mock_tbx_csr.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_tbx_csr.h
${CMAKE_CURRENT_SOURCE_DIR}/mock_timestamp_container.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_timestamp_container.h
${CMAKE_CURRENT_SOURCE_DIR}/mock_timestamp_packet.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_timestamp_packet.h
${CMAKE_CURRENT_SOURCE_DIR}/mock_usm_memory_reuse_cleaner.h
${CMAKE_CURRENT_SOURCE_DIR}/mock_usm_memory_pool.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_usm_memory_pool.h
${CMAKE_CURRENT_SOURCE_DIR}/mock_wddm_residency_controller.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_wddm_residency_controller.h
${CMAKE_CURRENT_SOURCE_DIR}/ult_device_factory.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ult_device_factory.cpp

View File

@ -0,0 +1,17 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "shared/source/memory_manager/unified_memory_reuse_cleaner.h"
namespace NEO {
struct MockUnifiedMemoryReuseCleaner : public UnifiedMemoryReuseCleaner {
public:
using UnifiedMemoryReuseCleaner::svmAllocationCaches;
using UnifiedMemoryReuseCleaner::trimOldInCaches;
void startThread() override{};
};
} // namespace NEO

View File

@ -662,4 +662,5 @@ DirectSubmissionRelaxedOrderingCounterHeuristic = -1
DirectSubmissionRelaxedOrderingCounterHeuristicTreshold = -1 DirectSubmissionRelaxedOrderingCounterHeuristicTreshold = -1
ClearStandaloneInOrderTimestampAllocation = -1 ClearStandaloneInOrderTimestampAllocation = -1
PipelinedEuThreadArbitration = -1 PipelinedEuThreadArbitration = -1
ExperimentalUSMAllocationReuseCleaner = -1
# Please don't edit below this line # Please don't edit below this line

View File

@ -18,6 +18,7 @@
#include "shared/source/helpers/driver_model_type.h" #include "shared/source/helpers/driver_model_type.h"
#include "shared/source/helpers/gfx_core_helper.h" #include "shared/source/helpers/gfx_core_helper.h"
#include "shared/source/helpers/hw_info.h" #include "shared/source/helpers/hw_info.h"
#include "shared/source/memory_manager/unified_memory_reuse_cleaner.h"
#include "shared/source/os_interface/device_factory.h" #include "shared/source/os_interface/device_factory.h"
#include "shared/source/os_interface/driver_info.h" #include "shared/source/os_interface/driver_info.h"
#include "shared/source/os_interface/os_interface.h" #include "shared/source/os_interface/os_interface.h"
@ -297,6 +298,30 @@ TEST(ExecutionEnvironment, givenEnableDirectSubmissionControllerSetZeroWhenIniti
EXPECT_EQ(controller, nullptr); EXPECT_EQ(controller, nullptr);
} }
TEST(ExecutionEnvironment, givenExperimentalUSMAllocationReuseCleanerSetWhenInitializeUnifiedMemoryReuseCleanerThenNotNull) {
DebugManagerStateRestore restorer;
debugManager.flags.ExperimentalUSMAllocationReuseCleaner.set(1);
VariableBackup<decltype(NEO::Thread::createFunc)> funcBackup{&NEO::Thread::createFunc, [](void *(*func)(void *), void *arg) -> std::unique_ptr<Thread> { return nullptr; }};
MockExecutionEnvironment executionEnvironment{};
executionEnvironment.initializeUnifiedMemoryReuseCleaner();
auto cleaner = executionEnvironment.unifiedMemoryReuseCleaner.get();
EXPECT_NE(cleaner, nullptr);
executionEnvironment.initializeUnifiedMemoryReuseCleaner();
EXPECT_EQ(cleaner, executionEnvironment.unifiedMemoryReuseCleaner.get());
}
TEST(ExecutionEnvironment, givenExperimentalUSMAllocationReuseCleanerSetZeroWhenInitializeUnifiedMemoryReuseCleanerThenNull) {
DebugManagerStateRestore restorer;
debugManager.flags.ExperimentalUSMAllocationReuseCleaner.set(0);
MockExecutionEnvironment executionEnvironment{};
executionEnvironment.initializeUnifiedMemoryReuseCleaner();
EXPECT_EQ(nullptr, executionEnvironment.unifiedMemoryReuseCleaner.get());
}
TEST(ExecutionEnvironment, givenNeoCalEnabledWhenCreateExecutionEnvironmentThenSetDebugVariables) { TEST(ExecutionEnvironment, givenNeoCalEnabledWhenCreateExecutionEnvironmentThenSetDebugVariables) {
const std::unordered_map<std::string, int32_t> config = { const std::unordered_map<std::string, int32_t> config = {
{"UseKmdMigration", 0}, {"UseKmdMigration", 0},
@ -364,11 +389,12 @@ TEST(ExecutionEnvironment, givenExecutionEnvironmentWhenInitializeMemoryManagerI
static_assert(sizeof(ExecutionEnvironment) == sizeof(std::unique_ptr<MemoryManager>) + static_assert(sizeof(ExecutionEnvironment) == sizeof(std::unique_ptr<MemoryManager>) +
sizeof(std::unique_ptr<DirectSubmissionController>) + sizeof(std::unique_ptr<DirectSubmissionController>) +
sizeof(std::unique_ptr<UnifiedMemoryReuseCleaner>) +
sizeof(std::unique_ptr<OsEnvironment>) + sizeof(std::unique_ptr<OsEnvironment>) +
sizeof(std::vector<std::unique_ptr<RootDeviceEnvironment>>) + sizeof(std::vector<std::unique_ptr<RootDeviceEnvironment>>) +
sizeof(std::unordered_map<uint32_t, std::tuple<uint32_t, uint32_t, uint32_t>>) + sizeof(std::unordered_map<uint32_t, std::tuple<uint32_t, uint32_t, uint32_t>>) +
sizeof(std::unordered_map<std::thread::id, std::string>) + sizeof(std::unordered_map<std::thread::id, std::string>) +
sizeof(std::mutex) + 2 * sizeof(std::mutex) +
2 * sizeof(bool) + 2 * sizeof(bool) +
sizeof(DeviceHierarchyMode) + sizeof(DeviceHierarchyMode) +
sizeof(DebuggingMode) + sizeof(DebuggingMode) +
@ -381,12 +407,15 @@ static_assert(sizeof(ExecutionEnvironment) == sizeof(std::unique_ptr<MemoryManag
TEST(ExecutionEnvironment, givenExecutionEnvironmentWithVariousMembersWhenItIsDestroyedThenDeleteSequenceIsSpecified) { TEST(ExecutionEnvironment, givenExecutionEnvironmentWithVariousMembersWhenItIsDestroyedThenDeleteSequenceIsSpecified) {
uint32_t destructorId = 0u; uint32_t destructorId = 0u;
struct MemoryMangerMock : public DestructorCounted<MockMemoryManager, 7> { struct MemoryMangerMock : public DestructorCounted<MockMemoryManager, 8> {
MemoryMangerMock(uint32_t &destructorId, ExecutionEnvironment &executionEnvironment) : DestructorCounted(destructorId, executionEnvironment) { MemoryMangerMock(uint32_t &destructorId, ExecutionEnvironment &executionEnvironment) : DestructorCounted(destructorId, executionEnvironment) {
callBaseAllocateGraphicsMemoryForNonSvmHostPtr = false; callBaseAllocateGraphicsMemoryForNonSvmHostPtr = false;
callBasePopulateOsHandles = false; callBasePopulateOsHandles = false;
} }
}; };
struct UnifiedMemoryReuseCleanerMock : public DestructorCounted<UnifiedMemoryReuseCleaner, 7> {
UnifiedMemoryReuseCleanerMock(uint32_t &destructorId) : DestructorCounted(destructorId) {}
};
struct DirectSubmissionControllerMock : public DestructorCounted<DirectSubmissionController, 6> { struct DirectSubmissionControllerMock : public DestructorCounted<DirectSubmissionController, 6> {
DirectSubmissionControllerMock(uint32_t &destructorId) : DestructorCounted(destructorId) {} DirectSubmissionControllerMock(uint32_t &destructorId) : DestructorCounted(destructorId) {}
}; };
@ -418,9 +447,10 @@ TEST(ExecutionEnvironment, givenExecutionEnvironmentWithVariousMembersWhenItIsDe
executionEnvironment->rootDeviceEnvironments[0]->builtins = std::make_unique<BuiltinsMock>(destructorId); executionEnvironment->rootDeviceEnvironments[0]->builtins = std::make_unique<BuiltinsMock>(destructorId);
executionEnvironment->rootDeviceEnvironments[0]->compilerInterface = std::make_unique<CompilerInterfaceMock>(destructorId); executionEnvironment->rootDeviceEnvironments[0]->compilerInterface = std::make_unique<CompilerInterfaceMock>(destructorId);
executionEnvironment->directSubmissionController = std::make_unique<DirectSubmissionControllerMock>(destructorId); executionEnvironment->directSubmissionController = std::make_unique<DirectSubmissionControllerMock>(destructorId);
executionEnvironment->unifiedMemoryReuseCleaner = std::make_unique<UnifiedMemoryReuseCleanerMock>(destructorId);
executionEnvironment.reset(nullptr); executionEnvironment.reset(nullptr);
EXPECT_EQ(8u, destructorId); EXPECT_EQ(9u, destructorId);
} }
TEST(ExecutionEnvironment, givenMultipleRootDevicesWhenTheyAreCreatedThenReuseMemoryManager) { TEST(ExecutionEnvironment, givenMultipleRootDevicesWhenTheyAreCreatedThenReuseMemoryManager) {

View File

@ -6,6 +6,7 @@
*/ */
#include "shared/source/helpers/api_specific_config.h" #include "shared/source/helpers/api_specific_config.h"
#include "shared/source/memory_manager/unified_memory_reuse_cleaner.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "shared/test/common/helpers/raii_product_helper.h" #include "shared/test/common/helpers/raii_product_helper.h"
#include "shared/test/common/mocks/mock_ail_configuration.h" #include "shared/test/common/mocks/mock_ail_configuration.h"
@ -14,6 +15,7 @@
#include "shared/test/common/mocks/mock_memory_manager.h" #include "shared/test/common/mocks/mock_memory_manager.h"
#include "shared/test/common/mocks/mock_product_helper.h" #include "shared/test/common/mocks/mock_product_helper.h"
#include "shared/test/common/mocks/mock_svm_manager.h" #include "shared/test/common/mocks/mock_svm_manager.h"
#include "shared/test/common/mocks/mock_usm_memory_reuse_cleaner.h"
#include "shared/test/common/mocks/ult_device_factory.h" #include "shared/test/common/mocks/ult_device_factory.h"
#include "shared/test/common/test_macros/hw_test.h" #include "shared/test/common/test_macros/hw_test.h"
#include "shared/test/common/test_macros/test.h" #include "shared/test/common/test_macros/test.h"
@ -248,7 +250,7 @@ TEST_F(SvmDeviceAllocationCacheTest, givenAllocationCacheEnabledWhenFreeingDevic
} }
EXPECT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), testDataset.size()); EXPECT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), testDataset.size());
svmManager->trimUSMDeviceAllocCache(); svmManager->cleanupUSMAllocCaches();
EXPECT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), 0u); EXPECT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), 0u);
} }
@ -451,7 +453,7 @@ TEST_F(SvmDeviceAllocationCacheTest, givenAllocationsWithDifferentSizesWhenAlloc
EXPECT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), testDataset.size()); EXPECT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), testDataset.size());
} }
svmManager->trimUSMDeviceAllocCache(); svmManager->cleanupUSMAllocCaches();
EXPECT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), 0u); EXPECT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), 0u);
} }
@ -491,7 +493,7 @@ TEST_F(SvmDeviceAllocationCacheTest, givenAllocationsWithDifferentSizesWhenAlloc
svmManager->freeSVMAlloc(notReusedDueToMemoryWastage); svmManager->freeSVMAlloc(notReusedDueToMemoryWastage);
svmManager->freeSVMAlloc(reused); svmManager->freeSVMAlloc(reused);
svmManager->trimUSMDeviceAllocCache(); svmManager->cleanupUSMAllocCaches();
EXPECT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), 0u); EXPECT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), 0u);
} }
@ -503,9 +505,9 @@ TEST_F(SvmDeviceAllocationCacheTest, givenAllocationOverSizeLimitWhenAllocatingA
debugManager.flags.ExperimentalEnableDeviceAllocationCache.set(1); debugManager.flags.ExperimentalEnableDeviceAllocationCache.set(1);
auto device = deviceFactory->rootDevices[0]; auto device = deviceFactory->rootDevices[0];
auto svmManager = std::make_unique<MockSVMAllocsManager>(device->getMemoryManager(), false); auto svmManager = std::make_unique<MockSVMAllocsManager>(device->getMemoryManager(), false);
device->maxAllocationsSavedForReuseSize = 1 * MemoryConstants::gigaByte;
svmManager->initUsmAllocationsCaches(*device); svmManager->initUsmAllocationsCaches(*device);
ASSERT_TRUE(svmManager->usmDeviceAllocationsCacheEnabled); ASSERT_TRUE(svmManager->usmDeviceAllocationsCacheEnabled);
svmManager->usmDeviceAllocationsCache.maxSize = 1 * MemoryConstants::gigaByte;
const auto notAcceptedAllocSize = SVMAllocsManager::SvmAllocationCache::maxServicedSize + 1; const auto notAcceptedAllocSize = SVMAllocsManager::SvmAllocationCache::maxServicedSize + 1;
SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::deviceUnifiedMemory, 1, rootDeviceIndices, deviceBitfields); SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::deviceUnifiedMemory, 1, rootDeviceIndices, deviceBitfields);
@ -580,7 +582,7 @@ TEST_F(SvmDeviceAllocationCacheTest, givenMultipleAllocationsWhenAllocatingAfter
svmManager->freeSVMAlloc(thirdAllocation); svmManager->freeSVMAlloc(thirdAllocation);
svmManager->freeSVMAlloc(allocationLargerThanInCache); svmManager->freeSVMAlloc(allocationLargerThanInCache);
svmManager->trimUSMDeviceAllocCache(); svmManager->cleanupUSMAllocCaches();
EXPECT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), 0u); EXPECT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), 0u);
} }
@ -712,7 +714,7 @@ TEST_F(SvmDeviceAllocationCacheTest, givenDeviceOutOfMemoryWhenAllocatingThenCac
EXPECT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), 0u); EXPECT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), 0u);
svmManager->freeSVMAlloc(ptr); svmManager->freeSVMAlloc(ptr);
svmManager->trimUSMDeviceAllocCache(); svmManager->cleanupUSMAllocCaches();
ASSERT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), 0u); ASSERT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), 0u);
} }
@ -745,7 +747,7 @@ TEST_F(SvmDeviceAllocationCacheTest, givenAllocationWithIsInternalAllocationSetW
svmManager->freeSVMAlloc(testedAllocation); svmManager->freeSVMAlloc(testedAllocation);
EXPECT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), 1u); EXPECT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), 1u);
svmManager->trimUSMDeviceAllocCache(); svmManager->cleanupUSMAllocCaches();
} }
TEST_F(SvmDeviceAllocationCacheTest, givenAllocationInUsageWhenAllocatingAfterFreeThenDoNotReuseAllocation) { TEST_F(SvmDeviceAllocationCacheTest, givenAllocationInUsageWhenAllocatingAfterFreeThenDoNotReuseAllocation) {
@ -777,7 +779,91 @@ TEST_F(SvmDeviceAllocationCacheTest, givenAllocationInUsageWhenAllocatingAfterFr
svmManager->freeSVMAlloc(testedAllocation); svmManager->freeSVMAlloc(testedAllocation);
EXPECT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), 2u); EXPECT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), 2u);
svmManager->trimUSMDeviceAllocCache(); svmManager->cleanupUSMAllocCaches();
}
TEST_F(SvmDeviceAllocationCacheTest, givenUsmReuseCleanerWhenTrimOldInCachesCalledThenOldAllocationsAreRemoved) {
std::unique_ptr<UltDeviceFactory> deviceFactory(new UltDeviceFactory(1, 1));
RootDeviceIndicesContainer rootDeviceIndices = {mockRootDeviceIndex};
std::map<uint32_t, DeviceBitfield> deviceBitfields{{mockRootDeviceIndex, mockDeviceBitfield}};
DebugManagerStateRestore restore;
debugManager.flags.ExperimentalEnableDeviceAllocationCache.set(1);
debugManager.flags.ExperimentalEnableHostAllocationCache.set(0);
auto device = deviceFactory->rootDevices[0];
auto svmManager = std::make_unique<MockSVMAllocsManager>(device->getMemoryManager(), false);
device->executionEnvironment->unifiedMemoryReuseCleaner.reset(new MockUnifiedMemoryReuseCleaner);
auto mockUnifiedMemoryReuseCleaner = reinterpret_cast<MockUnifiedMemoryReuseCleaner *>(device->executionEnvironment->unifiedMemoryReuseCleaner.get());
EXPECT_EQ(0u, mockUnifiedMemoryReuseCleaner->svmAllocationCaches.size());
device->maxAllocationsSavedForReuseSize = 1 * MemoryConstants::gigaByte;
svmManager->initUsmAllocationsCaches(*device);
EXPECT_TRUE(svmManager->usmDeviceAllocationsCacheEnabled);
EXPECT_EQ(1u, mockUnifiedMemoryReuseCleaner->svmAllocationCaches.size());
EXPECT_EQ(&svmManager->usmDeviceAllocationsCache, mockUnifiedMemoryReuseCleaner->svmAllocationCaches[0]);
SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::deviceUnifiedMemory, 1, rootDeviceIndices, deviceBitfields);
unifiedMemoryProperties.device = device;
auto allocation = svmManager->createUnifiedMemoryAllocation(10u, unifiedMemoryProperties);
auto allocation2 = svmManager->createUnifiedMemoryAllocation(10u, unifiedMemoryProperties);
EXPECT_NE(allocation, nullptr);
EXPECT_NE(allocation2, nullptr);
svmManager->freeSVMAlloc(allocation);
svmManager->freeSVMAlloc(allocation2);
EXPECT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), 2u);
const auto baseTimePoint = std::chrono::high_resolution_clock::now();
const auto oldTimePoint = baseTimePoint - UnifiedMemoryReuseCleaner::maxHoldTime;
const auto notTrimmedTimePoint = baseTimePoint + std::chrono::hours(24);
svmManager->usmDeviceAllocationsCache.allocations[0].saveTime = oldTimePoint;
svmManager->usmDeviceAllocationsCache.allocations[1].saveTime = notTrimmedTimePoint;
mockUnifiedMemoryReuseCleaner->trimOldInCaches();
EXPECT_EQ(1u, svmManager->usmDeviceAllocationsCache.allocations.size());
EXPECT_EQ(notTrimmedTimePoint, svmManager->usmDeviceAllocationsCache.allocations[0].saveTime);
svmManager->cleanupUSMAllocCaches();
EXPECT_EQ(0u, mockUnifiedMemoryReuseCleaner->svmAllocationCaches.size());
}
TEST_F(SvmDeviceAllocationCacheTest, givenAllocationsInReuseWhenTrimOldAllocsCalledThenTrimAllocationsSavedBeforeTimePoint) {
std::unique_ptr<UltDeviceFactory> deviceFactory(new UltDeviceFactory(1, 1));
RootDeviceIndicesContainer rootDeviceIndices = {mockRootDeviceIndex};
std::map<uint32_t, DeviceBitfield> deviceBitfields{{mockRootDeviceIndex, mockDeviceBitfield}};
DebugManagerStateRestore restore;
debugManager.flags.ExperimentalEnableDeviceAllocationCache.set(1);
auto device = deviceFactory->rootDevices[0];
auto svmManager = std::make_unique<MockSVMAllocsManager>(device->getMemoryManager(), false);
device->maxAllocationsSavedForReuseSize = 1 * MemoryConstants::gigaByte;
svmManager->initUsmAllocationsCaches(*device);
EXPECT_TRUE(svmManager->usmDeviceAllocationsCacheEnabled);
SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::deviceUnifiedMemory, 1, rootDeviceIndices, deviceBitfields);
unifiedMemoryProperties.device = device;
auto allocation = svmManager->createUnifiedMemoryAllocation(10u, unifiedMemoryProperties);
auto allocation2 = svmManager->createUnifiedMemoryAllocation(10u, unifiedMemoryProperties);
auto allocation3 = svmManager->createUnifiedMemoryAllocation(10u, unifiedMemoryProperties);
EXPECT_NE(allocation, nullptr);
EXPECT_NE(allocation2, nullptr);
EXPECT_NE(allocation3, nullptr);
svmManager->freeSVMAlloc(allocation);
svmManager->freeSVMAlloc(allocation2);
svmManager->freeSVMAlloc(allocation3);
EXPECT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), 3u);
const auto baseTimePoint = std::chrono::high_resolution_clock::now();
const auto timeDiff = std::chrono::microseconds(1);
svmManager->usmDeviceAllocationsCache.allocations[0].saveTime = baseTimePoint;
svmManager->usmDeviceAllocationsCache.allocations[1].saveTime = baseTimePoint + timeDiff;
svmManager->usmDeviceAllocationsCache.allocations[2].saveTime = baseTimePoint + timeDiff * 2;
svmManager->usmDeviceAllocationsCache.trimOldAllocs(baseTimePoint + timeDiff);
EXPECT_EQ(1u, svmManager->usmDeviceAllocationsCache.allocations.size());
EXPECT_EQ(baseTimePoint + timeDiff * 2, svmManager->usmDeviceAllocationsCache.allocations[0].saveTime);
svmManager->cleanupUSMAllocCaches();
} }
using SvmHostAllocationCacheTest = Test<SvmAllocationCacheTestFixture>; using SvmHostAllocationCacheTest = Test<SvmAllocationCacheTestFixture>;
@ -877,7 +963,7 @@ TEST_F(SvmHostAllocationCacheTest, givenAllocationCacheEnabledWhenFreeingHostAll
} }
EXPECT_EQ(svmManager->usmHostAllocationsCache.allocations.size(), testDataset.size()); EXPECT_EQ(svmManager->usmHostAllocationsCache.allocations.size(), testDataset.size());
svmManager->trimUSMHostAllocCache(); svmManager->cleanupUSMAllocCaches();
EXPECT_EQ(svmManager->usmHostAllocationsCache.allocations.size(), 0u); EXPECT_EQ(svmManager->usmHostAllocationsCache.allocations.size(), 0u);
} }
@ -1091,7 +1177,7 @@ TEST_F(SvmHostAllocationCacheTest, givenAllocationsWithDifferentSizesWhenAllocat
EXPECT_EQ(svmManager->usmHostAllocationsCache.allocations.size(), testDataset.size()); EXPECT_EQ(svmManager->usmHostAllocationsCache.allocations.size(), testDataset.size());
} }
svmManager->trimUSMHostAllocCache(); svmManager->cleanupUSMAllocCaches();
EXPECT_EQ(svmManager->usmHostAllocationsCache.allocations.size(), 0u); EXPECT_EQ(svmManager->usmHostAllocationsCache.allocations.size(), 0u);
} }
@ -1130,7 +1216,7 @@ TEST_F(SvmHostAllocationCacheTest, givenAllocationsWithDifferentSizesWhenAllocat
svmManager->freeSVMAlloc(notReusedDueToMemoryWastage); svmManager->freeSVMAlloc(notReusedDueToMemoryWastage);
svmManager->freeSVMAlloc(reused); svmManager->freeSVMAlloc(reused);
svmManager->trimUSMHostAllocCache(); svmManager->cleanupUSMAllocCaches();
EXPECT_EQ(svmManager->usmHostAllocationsCache.allocations.size(), 0u); EXPECT_EQ(svmManager->usmHostAllocationsCache.allocations.size(), 0u);
} }
@ -1217,7 +1303,7 @@ TEST_F(SvmHostAllocationCacheTest, givenMultipleAllocationsWhenAllocatingAfterFr
svmManager->freeSVMAlloc(thirdAllocation); svmManager->freeSVMAlloc(thirdAllocation);
svmManager->freeSVMAlloc(allocationLargerThanInCache); svmManager->freeSVMAlloc(allocationLargerThanInCache);
svmManager->trimUSMHostAllocCache(); svmManager->cleanupUSMAllocCaches();
EXPECT_EQ(svmManager->usmHostAllocationsCache.allocations.size(), 0u); EXPECT_EQ(svmManager->usmHostAllocationsCache.allocations.size(), 0u);
} }
@ -1338,7 +1424,7 @@ TEST_F(SvmHostAllocationCacheTest, givenHostOutOfMemoryWhenAllocatingThenCacheIs
EXPECT_EQ(svmManager->usmHostAllocationsCache.allocations.size(), 0u); EXPECT_EQ(svmManager->usmHostAllocationsCache.allocations.size(), 0u);
svmManager->freeSVMAlloc(ptr); svmManager->freeSVMAlloc(ptr);
svmManager->trimUSMHostAllocCache(); svmManager->cleanupUSMAllocCaches();
ASSERT_EQ(svmManager->usmHostAllocationsCache.allocations.size(), 0u); ASSERT_EQ(svmManager->usmHostAllocationsCache.allocations.size(), 0u);
} }
@ -1370,6 +1456,45 @@ TEST_F(SvmHostAllocationCacheTest, givenAllocationInUsageWhenAllocatingAfterFree
svmManager->freeSVMAlloc(testedAllocation); svmManager->freeSVMAlloc(testedAllocation);
EXPECT_EQ(svmManager->usmHostAllocationsCache.allocations.size(), 2u); EXPECT_EQ(svmManager->usmHostAllocationsCache.allocations.size(), 2u);
svmManager->trimUSMHostAllocCache(); svmManager->cleanupUSMAllocCaches();
}
TEST_F(SvmHostAllocationCacheTest, givenAllocationsInReuseWhenTrimOldAllocsCalledThenTrimAllocationsSavedBeforeTimePoint) {
std::unique_ptr<UltDeviceFactory> deviceFactory(new UltDeviceFactory(1, 1));
RootDeviceIndicesContainer rootDeviceIndices = {mockRootDeviceIndex};
std::map<uint32_t, DeviceBitfield> deviceBitfields{{mockRootDeviceIndex, mockDeviceBitfield}};
DebugManagerStateRestore restore;
debugManager.flags.ExperimentalEnableHostAllocationCache.set(1);
auto device = deviceFactory->rootDevices[0];
auto svmManager = std::make_unique<MockSVMAllocsManager>(device->getMemoryManager(), false);
svmManager->initUsmAllocationsCaches(*device);
EXPECT_TRUE(svmManager->usmHostAllocationsCacheEnabled);
svmManager->usmHostAllocationsCache.maxSize = 1 * MemoryConstants::gigaByte;
SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::hostUnifiedMemory, 1, rootDeviceIndices, deviceBitfields);
auto allocation = svmManager->createUnifiedMemoryAllocation(10u, unifiedMemoryProperties);
auto allocation2 = svmManager->createUnifiedMemoryAllocation(10u, unifiedMemoryProperties);
auto allocation3 = svmManager->createUnifiedMemoryAllocation(10u, unifiedMemoryProperties);
EXPECT_NE(allocation, nullptr);
EXPECT_NE(allocation2, nullptr);
EXPECT_NE(allocation3, nullptr);
svmManager->freeSVMAlloc(allocation);
svmManager->freeSVMAlloc(allocation2);
svmManager->freeSVMAlloc(allocation3);
EXPECT_EQ(svmManager->usmHostAllocationsCache.allocations.size(), 3u);
auto baseTimePoint = std::chrono::high_resolution_clock::now();
auto timeDiff = std::chrono::microseconds(1);
svmManager->usmHostAllocationsCache.allocations[0].saveTime = baseTimePoint;
svmManager->usmHostAllocationsCache.allocations[1].saveTime = baseTimePoint + timeDiff;
svmManager->usmHostAllocationsCache.allocations[2].saveTime = baseTimePoint + timeDiff * 2;
svmManager->usmHostAllocationsCache.trimOldAllocs(baseTimePoint + timeDiff);
EXPECT_EQ(svmManager->usmHostAllocationsCache.allocations.size(), 1u);
EXPECT_EQ(baseTimePoint + timeDiff * 2, svmManager->usmHostAllocationsCache.allocations[0].saveTime);
svmManager->cleanupUSMAllocCaches();
} }
} // namespace NEO } // namespace NEO