performance(ocl): add usm allocation pooling flag

EnableDeviceUsmAllocationPool and EnableHostUsmAllocationPool for device
and host allocations respectively.

Pool size will be set to flag value * MB.

Allocation size threshold to be pooled is 1MB.

Pools are created per context.

Related-To: NEO-9700

Signed-off-by: Dominik Dabek <dominik.dabek@intel.com>
This commit is contained in:
Dominik Dabek
2023-12-22 14:26:30 +00:00
committed by Compute-Runtime-Automation
parent 047438850d
commit 2fe3804cc2
25 changed files with 562 additions and 35 deletions

View File

@@ -61,6 +61,8 @@ Context::~Context() {
smallBufferPoolAllocator.releaseSmallBufferPool();
}
cleanupUsmAllocationPools();
delete[] properties;
for (auto rootDeviceIndex = 0u; rootDeviceIndex < specialQueues.size(); rootDeviceIndex++) {
@@ -487,6 +489,50 @@ bool Context::isSingleDeviceContext() {
return getNumDevices() == 1 && devices[0]->getNumGenericSubDevices() == 0;
}
void Context::initializeUsmAllocationPools() {
auto svmMemoryManager = getSVMAllocsManager();
if (!(svmMemoryManager && this->isSingleDeviceContext())) {
return;
}
bool enabled = false;
size_t poolSize = 2 * MemoryConstants::megaByte;
if (debugManager.flags.EnableDeviceUsmAllocationPool.get() != -1) {
enabled = debugManager.flags.EnableDeviceUsmAllocationPool.get() > 0;
poolSize = debugManager.flags.EnableDeviceUsmAllocationPool.get() * MemoryConstants::megaByte;
}
if (enabled) {
auto subDeviceBitfields = getDeviceBitfields();
auto &neoDevice = devices[0]->getDevice();
subDeviceBitfields[neoDevice.getRootDeviceIndex()] = neoDevice.getDeviceBitfield();
SVMAllocsManager::UnifiedMemoryProperties memoryProperties(InternalMemoryType::deviceUnifiedMemory, MemoryConstants::pageSize2M,
getRootDeviceIndices(), subDeviceBitfields);
memoryProperties.device = &neoDevice;
usmDeviceMemAllocPool.initialize(svmMemoryManager, memoryProperties, poolSize);
}
enabled = false;
poolSize = 2 * MemoryConstants::megaByte;
if (debugManager.flags.EnableHostUsmAllocationPool.get() != -1) {
enabled = debugManager.flags.EnableHostUsmAllocationPool.get() > 0;
poolSize = debugManager.flags.EnableDeviceUsmAllocationPool.get() * MemoryConstants::megaByte;
}
if (enabled) {
auto subDeviceBitfields = getDeviceBitfields();
auto &neoDevice = devices[0]->getDevice();
subDeviceBitfields[neoDevice.getRootDeviceIndex()] = neoDevice.getDeviceBitfield();
SVMAllocsManager::UnifiedMemoryProperties memoryProperties(InternalMemoryType::hostUnifiedMemory, MemoryConstants::pageSize2M,
getRootDeviceIndices(), subDeviceBitfields);
memoryProperties.device = &neoDevice;
usmHostMemAllocPool.initialize(svmMemoryManager, memoryProperties, poolSize);
}
}
void Context::cleanupUsmAllocationPools() {
usmDeviceMemAllocPool.cleanup();
usmHostMemAllocPool.cleanup();
}
bool Context::BufferPoolAllocator::isAggregatedSmallBuffersEnabled(Context *context) const {
bool isSupportedForSingleDeviceContexts = false;
bool isSupportedForAllContexts = false;