fix(ocl): track buffer pool count per device

Track amount of created buffer pools per device. Do not allocate extra
pools if limit is reached. New contexts will have pooling disabled if
limit is reached on device.

Related-To: NEO-13461

Signed-off-by: Dominik Dabek <dominik.dabek@intel.com>
This commit is contained in:
Dominik Dabek
2024-12-16 10:19:12 +00:00
committed by Compute-Runtime-Automation
parent b6fc2b5861
commit e61d04a881
12 changed files with 140 additions and 23 deletions

View File

@@ -35,6 +35,7 @@ ClDevice::ClDevice(Device &device, ClDevice &rootClDevice, Platform *platform) :
driverInfo.reset(DriverInfo::create(&device.getHardwareInfo(), osInterface));
initGTPinHelper();
initializeCaps();
initializeMaxPoolCount();
OpenClCFeaturesContainer emptyOpenClCFeatures;
compilerExtensions = convertEnabledExtensionsToCompilerInternalOptions(deviceInfo.deviceExtensions, emptyOpenClCFeatures);

View File

@@ -147,6 +147,7 @@ class ClDevice : public BaseObject<_cl_device_id> {
void initializeOpenclCAllVersions();
void initializeILsWithVersion();
void initializeOsSpecificCaps();
void initializeMaxPoolCount();
void initGTPinHelper();
void setupFp64Flags();
const std::string getClDeviceName() const;

View File

@@ -18,6 +18,7 @@
#include "shared/source/os_interface/driver_info.h"
#include "opencl/source/cl_device/cl_device.h"
#include "opencl/source/context/context.h"
#include "opencl/source/gtpin/gtpin_gfx_core_helper.h"
#include "opencl/source/helpers/cl_gfx_core_helper.h"
#include "opencl/source/sharings/sharing_factory.h"
@@ -472,6 +473,14 @@ void ClDevice::initializeILsWithVersion() {
}
}
void ClDevice::initializeMaxPoolCount() {
auto &device = getDevice();
const auto bitfield = device.getDeviceBitfield();
const auto deviceMemory = device.getGlobalMemorySize(static_cast<uint32_t>(bitfield.to_ulong()));
const auto maxPoolCount = Context::BufferPoolAllocator::calculateMaxPoolCount(deviceMemory, 2);
device.updateMaxPoolCount(maxPoolCount);
}
const std::string ClDevice::getClDeviceName() const {
return this->getDevice().getDeviceInfo().name;
}

View File

@@ -62,6 +62,8 @@ Context::~Context() {
}
if (smallBufferPoolAllocator.isAggregatedSmallBuffersEnabled(this)) {
auto &device = this->getDevice(0)->getDevice();
device.recordPoolsFreed(smallBufferPoolAllocator.getPoolsCount());
smallBufferPoolAllocator.releasePools();
}
@@ -628,11 +630,10 @@ Buffer *Context::BufferPool::allocate(const MemoryProperties &memoryProperties,
void Context::BufferPoolAllocator::initAggregatedSmallBuffers(Context *context) {
this->context = context;
const auto &device = context->getDevice(0)->getDevice();
const auto bitfield = device.getDeviceBitfield();
const auto deviceMemory = device.getGlobalMemorySize(static_cast<uint32_t>(bitfield.to_ulong()));
this->maxPoolCount = this->calculateMaxPoolCount(deviceMemory, 2);
this->addNewBufferPool(Context::BufferPool{this->context});
auto &device = context->getDevice(0)->getDevice();
if (device.requestPoolCreate(1u)) {
this->addNewBufferPool(Context::BufferPool{this->context});
}
}
Buffer *Context::BufferPoolAllocator::allocateBufferFromPool(const MemoryProperties &memoryProperties,
@@ -671,7 +672,8 @@ Buffer *Context::BufferPoolAllocator::allocateBufferFromPool(const MemoryPropert
return bufferFromPool;
}
if (this->bufferPools.size() < this->maxPoolCount) {
auto &device = context->getDevice(0)->getDevice();
if (device.requestPoolCreate(1u)) {
this->addNewBufferPool(BufferPool{this->context});
return this->allocateFromPools(memoryProperties, flags, flagsIntel, requestedSize, hostPtr, errcodeRet);
}

View File

@@ -79,6 +79,10 @@ class Context : public BaseObject<_cl_context> {
void *hostPtr,
cl_int &errcodeRet);
bool flagsAllowBufferFromPool(const cl_mem_flags &flags, const cl_mem_flags_intel &flagsIntel) const;
static inline uint32_t calculateMaxPoolCount(uint64_t totalMemory, size_t percentOfMemory) {
const auto maxPoolCount = static_cast<uint32_t>(totalMemory * (percentOfMemory / 100.0) / BufferPoolAllocator::aggregatedSmallBuffersPoolSize);
return maxPoolCount ? maxPoolCount : 1u;
}
protected:
Buffer *allocateFromPools(const MemoryProperties &memoryProperties,
@@ -87,13 +91,7 @@ class Context : public BaseObject<_cl_context> {
size_t requestedSize,
void *hostPtr,
cl_int &errcodeRet);
static inline size_t calculateMaxPoolCount(uint64_t totalMemory, size_t percentOfMemory) {
const auto maxPoolCount = static_cast<size_t>(totalMemory * (percentOfMemory / 100.0) / BufferPoolAllocator::aggregatedSmallBuffersPoolSize);
return maxPoolCount ? maxPoolCount : 1u;
}
Context *context{nullptr};
size_t maxPoolCount{1u};
};
static const cl_ulong objectMagic = 0xA4234321DC002130LL;