performance: debug flag for usm host alloc recycle

set ExperimentalEnableHostAllocationCache=1 to recycle host usm
allocations

Related-To: GSD-7497

Signed-off-by: Dominik Dabek <dominik.dabek@intel.com>
This commit is contained in:
Dominik Dabek
2024-01-31 13:07:07 +00:00
committed by Compute-Runtime-Automation
parent 2237961deb
commit 2cad595a0d
10 changed files with 393 additions and 14 deletions

View File

@@ -173,6 +173,14 @@ SVMAllocsManager::SVMAllocsManager(MemoryManager *memoryManager, bool multiOsCon
if (this->usmDeviceAllocationsCacheEnabled) {
this->initUsmDeviceAllocationsCache();
}
this->usmHostAllocationsCacheEnabled = NEO::ApiSpecificConfig::isHostAllocationCacheEnabled();
if (debugManager.flags.ExperimentalEnableHostAllocationCache.get() != -1) {
this->usmHostAllocationsCacheEnabled = !!debugManager.flags.ExperimentalEnableHostAllocationCache.get();
}
if (this->usmHostAllocationsCacheEnabled) {
this->initUsmHostAllocationsCache();
}
}
SVMAllocsManager::~SVMAllocsManager() = default;
@@ -221,13 +229,26 @@ void *SVMAllocsManager::createHostUnifiedMemoryAllocation(size_t size,
unifiedMemoryProperties.flags.isUSMDeviceAllocation = false;
unifiedMemoryProperties.cacheRegion = MemoryPropertiesHelper::getCacheRegion(memoryProperties.allocationFlags);
if (this->usmHostAllocationsCacheEnabled) {
void *allocationFromCache = this->usmHostAllocationsCache.get(size, memoryProperties, this);
if (allocationFromCache) {
return allocationFromCache;
}
}
auto maxRootDeviceIndex = *std::max_element(rootDeviceIndicesVector.begin(), rootDeviceIndicesVector.end(), std::less<uint32_t const>());
SvmAllocationData allocData(maxRootDeviceIndex);
void *externalHostPointer = reinterpret_cast<void *>(memoryProperties.allocationFlags.hostptr);
void *usmPtr = memoryManager->createMultiGraphicsAllocationInSystemMemoryPool(rootDeviceIndicesVector, unifiedMemoryProperties, allocData.gpuAllocations, externalHostPointer);
if (!usmPtr) {
return nullptr;
if (this->usmHostAllocationsCacheEnabled) {
this->trimUSMHostAllocCache();
usmPtr = memoryManager->createMultiGraphicsAllocationInSystemMemoryPool(rootDeviceIndicesVector, unifiedMemoryProperties, allocData.gpuAllocations, externalHostPointer);
}
if (!usmPtr) {
return nullptr;
}
}
allocData.cpuAllocation = nullptr;
@@ -439,6 +460,11 @@ bool SVMAllocsManager::freeSVMAlloc(void *ptr, bool blocking) {
this->usmDeviceAllocationsCache.insert(svmData->size, ptr);
return true;
}
if (InternalMemoryType::hostUnifiedMemory == svmData->memoryType &&
this->usmHostAllocationsCacheEnabled) {
this->usmHostAllocationsCache.insert(svmData->size, ptr);
return true;
}
if (blocking) {
this->freeSVMAllocImpl(ptr, FreePolicyType::blocking, svmData);
} else {
@@ -462,6 +488,11 @@ bool SVMAllocsManager::freeSVMAllocDefer(void *ptr) {
this->usmDeviceAllocationsCache.insert(svmData->size, ptr);
return true;
}
if (InternalMemoryType::hostUnifiedMemory == svmData->memoryType &&
this->usmHostAllocationsCacheEnabled) {
this->usmHostAllocationsCache.insert(svmData->size, ptr);
return true;
}
this->freeSVMAllocImpl(ptr, FreePolicyType::defer, svmData);
return true;
}
@@ -531,6 +562,10 @@ void SVMAllocsManager::trimUSMDeviceAllocCache() {
this->usmDeviceAllocationsCache.trim(this);
}
void SVMAllocsManager::trimUSMHostAllocCache() {
this->usmHostAllocationsCache.trim(this);
}
void *SVMAllocsManager::createZeroCopySvmAllocation(size_t size, const SvmAllocationProperties &svmProperties,
const RootDeviceIndicesContainer &rootDeviceIndices,
const std::map<uint32_t, DeviceBitfield> &subdeviceBitfields) {
@@ -653,6 +688,10 @@ void SVMAllocsManager::initUsmDeviceAllocationsCache() {
this->usmDeviceAllocationsCache.allocations.reserve(128u);
}
void SVMAllocsManager::initUsmHostAllocationsCache() {
this->usmHostAllocationsCache.allocations.reserve(128u);
}
void SVMAllocsManager::freeSvmAllocationWithDeviceStorage(SvmAllocationData *svmData) {
auto graphicsAllocations = svmData->gpuAllocations.getGraphicsAllocations();
GraphicsAllocation *cpuAllocation = svmData->cpuAllocation;

View File

@@ -205,6 +205,7 @@ class SVMAllocsManager {
MOCKABLE_VIRTUAL void freeSVMAllocImpl(void *ptr, FreePolicyType policy, SvmAllocationData *svmData);
bool freeSVMAlloc(void *ptr) { return freeSVMAlloc(ptr, false); }
void trimUSMDeviceAllocCache();
void trimUSMHostAllocCache();
void insertSVMAlloc(const SvmAllocationData &svmData);
void removeSVMAlloc(const SvmAllocationData &svmData);
size_t getNumAllocs() const { return svmAllocs.getNumAllocs(); }
@@ -242,6 +243,7 @@ class SVMAllocsManager {
void freeZeroCopySvmAllocation(SvmAllocationData *svmData);
void initUsmDeviceAllocationsCache();
void initUsmHostAllocationsCache();
void freeSVMData(SvmAllocationData *svmData);
SortedVectorBasedAllocationTracker svmAllocs;
@@ -252,6 +254,8 @@ class SVMAllocsManager {
std::mutex mtxForIndirectAccess;
bool multiOsContextSupport;
SvmAllocationCache usmDeviceAllocationsCache;
SvmAllocationCache usmHostAllocationsCache;
bool usmDeviceAllocationsCacheEnabled = false;
bool usmHostAllocationsCacheEnabled = false;
};
} // namespace NEO