mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-03 06:49:52 +08:00
Optimize small buffers allocator
- Do not wait for GPU completion on pool exhaust if allocs are in use, allocate new pool instead - Reuse existing pool if allocs are not in use Related-To: NEO-7769 Signed-off-by: Igor Venevtsev <igor.venevtsev@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
e645f58b65
commit
f57ff2913c
@@ -51,9 +51,11 @@ Context::Context(
|
||||
|
||||
Context::~Context() {
|
||||
gtpinNotifyContextDestroy((cl_context)this);
|
||||
|
||||
if (multiRootDeviceTimestampPacketAllocator.get() != nullptr) {
|
||||
multiRootDeviceTimestampPacketAllocator.reset();
|
||||
}
|
||||
|
||||
if (smallBufferPoolAllocator.isAggregatedSmallBuffersEnabled(this)) {
|
||||
smallBufferPoolAllocator.releaseSmallBufferPool();
|
||||
}
|
||||
@@ -502,26 +504,89 @@ bool Context::BufferPoolAllocator::isAggregatedSmallBuffersEnabled(Context *cont
|
||||
(isSupportedForSingleDeviceContexts && context->isSingleDeviceContext());
|
||||
}
|
||||
|
||||
void Context::BufferPoolAllocator::initAggregatedSmallBuffers(Context *context) {
|
||||
Context::BufferPoolAllocator::BufferPool::BufferPool(Context *context) : memoryManager(context->memoryManager) {
|
||||
static constexpr cl_mem_flags flags{};
|
||||
[[maybe_unused]] cl_int errcodeRet{};
|
||||
Buffer::AdditionalBufferCreateArgs bufferCreateArgs{};
|
||||
bufferCreateArgs.doNotProvidePerformanceHints = true;
|
||||
bufferCreateArgs.makeAllocationLockable = true;
|
||||
this->mainStorage = Buffer::create(context,
|
||||
flags,
|
||||
BufferPoolAllocator::aggregatedSmallBuffersPoolSize,
|
||||
nullptr,
|
||||
bufferCreateArgs,
|
||||
errcodeRet);
|
||||
if (this->mainStorage) {
|
||||
this->chunkAllocator.reset(new HeapAllocator(BufferPoolAllocator::startingOffset,
|
||||
BufferPoolAllocator::aggregatedSmallBuffersPoolSize,
|
||||
BufferPoolAllocator::chunkAlignment));
|
||||
mainStorage.reset(Buffer::create(context,
|
||||
flags,
|
||||
BufferPoolAllocator::aggregatedSmallBuffersPoolSize,
|
||||
nullptr,
|
||||
bufferCreateArgs,
|
||||
errcodeRet));
|
||||
if (mainStorage) {
|
||||
chunkAllocator.reset(new HeapAllocator(BufferPoolAllocator::startingOffset,
|
||||
BufferPoolAllocator::aggregatedSmallBuffersPoolSize,
|
||||
BufferPoolAllocator::chunkAlignment));
|
||||
context->decRefInternal();
|
||||
}
|
||||
}
|
||||
|
||||
Context::BufferPoolAllocator::BufferPool::BufferPool(BufferPool &&bufferPool) : memoryManager(bufferPool.memoryManager),
|
||||
mainStorage(std::move(bufferPool.mainStorage)),
|
||||
chunkAllocator(std::move(bufferPool.chunkAllocator)) {}
|
||||
|
||||
Buffer *Context::BufferPoolAllocator::BufferPool::allocate(const MemoryProperties &memoryProperties,
|
||||
cl_mem_flags flags,
|
||||
cl_mem_flags_intel flagsIntel,
|
||||
size_t requestedSize,
|
||||
void *hostPtr,
|
||||
cl_int &errcodeRet) {
|
||||
cl_buffer_region bufferRegion{};
|
||||
size_t actualSize = requestedSize;
|
||||
bufferRegion.origin = static_cast<size_t>(chunkAllocator->allocate(actualSize));
|
||||
if (bufferRegion.origin == 0) {
|
||||
return nullptr;
|
||||
}
|
||||
bufferRegion.origin -= BufferPoolAllocator::startingOffset;
|
||||
bufferRegion.size = requestedSize;
|
||||
auto bufferFromPool = mainStorage->createSubBuffer(flags, flagsIntel, &bufferRegion, errcodeRet);
|
||||
bufferFromPool->createFunction = mainStorage->createFunction;
|
||||
bufferFromPool->setSizeInPoolAllocator(actualSize);
|
||||
return bufferFromPool;
|
||||
}
|
||||
|
||||
bool Context::BufferPoolAllocator::BufferPool::isPoolBuffer(const MemObj *buffer) const {
|
||||
return mainStorage.get() == buffer;
|
||||
}
|
||||
|
||||
bool Context::BufferPoolAllocator::BufferPool::drain() {
|
||||
for (auto allocation : mainStorage->getMultiGraphicsAllocation().getGraphicsAllocations()) {
|
||||
if (memoryManager->allocInUse(*allocation)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
chunkAllocator.reset(new HeapAllocator(BufferPoolAllocator::startingOffset,
|
||||
BufferPoolAllocator::aggregatedSmallBuffersPoolSize,
|
||||
BufferPoolAllocator::chunkAlignment));
|
||||
return true;
|
||||
}
|
||||
|
||||
void Context::BufferPoolAllocator::addNewBufferPool() {
|
||||
Context::BufferPoolAllocator::BufferPool bufferPool(context);
|
||||
if (bufferPool.mainStorage) {
|
||||
bufferPools.push_back(std::move(bufferPool));
|
||||
}
|
||||
}
|
||||
|
||||
void Context::BufferPoolAllocator::initAggregatedSmallBuffers(Context *context) {
|
||||
this->context = context;
|
||||
addNewBufferPool();
|
||||
}
|
||||
|
||||
bool Context::BufferPoolAllocator::isPoolBuffer(const MemObj *buffer) const {
|
||||
for (auto &bufferPool : bufferPools) {
|
||||
if (bufferPool.isPoolBuffer(buffer)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
Buffer *Context::BufferPoolAllocator::allocateBufferFromPool(const MemoryProperties &memoryProperties,
|
||||
cl_mem_flags flags,
|
||||
cl_mem_flags_intel flagsIntel,
|
||||
@@ -529,45 +594,52 @@ Buffer *Context::BufferPoolAllocator::allocateBufferFromPool(const MemoryPropert
|
||||
void *hostPtr,
|
||||
cl_int &errcodeRet) {
|
||||
errcodeRet = CL_MEM_OBJECT_ALLOCATION_FAILURE;
|
||||
if (this->mainStorage &&
|
||||
this->isSizeWithinThreshold(requestedSize) &&
|
||||
this->flagsAllowBufferFromPool(flags, flagsIntel)) {
|
||||
auto lock = std::unique_lock<std::mutex>(this->mutex);
|
||||
cl_buffer_region bufferRegion{};
|
||||
size_t actualSize = requestedSize;
|
||||
bufferRegion.origin = static_cast<size_t>(this->chunkAllocator->allocate(actualSize));
|
||||
if (bufferRegion.origin == 0) {
|
||||
return nullptr;
|
||||
}
|
||||
bufferRegion.origin -= BufferPoolAllocator::startingOffset;
|
||||
bufferRegion.size = requestedSize;
|
||||
auto bufferFromPool = this->mainStorage->createSubBuffer(flags, flagsIntel, &bufferRegion, errcodeRet);
|
||||
bufferFromPool->createFunction = this->mainStorage->createFunction;
|
||||
bufferFromPool->setSizeInPoolAllocator(actualSize);
|
||||
if (bufferPools.empty() ||
|
||||
!isSizeWithinThreshold(requestedSize) ||
|
||||
!flagsAllowBufferFromPool(flags, flagsIntel)) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto lock = std::unique_lock<std::mutex>(mutex);
|
||||
auto bufferFromPool = allocateFromPools(memoryProperties, flags, flagsIntel, requestedSize, hostPtr, errcodeRet);
|
||||
if (bufferFromPool != nullptr) {
|
||||
return bufferFromPool;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
bool Context::BufferPoolAllocator::isPoolBuffer(const MemObj *buffer) const {
|
||||
return buffer != nullptr && this->mainStorage == buffer;
|
||||
}
|
||||
|
||||
void Context::BufferPoolAllocator::tryFreeFromPoolBuffer(MemObj *possiblePoolBuffer, size_t offset, size_t size) {
|
||||
if (this->isPoolBuffer(possiblePoolBuffer)) {
|
||||
auto lock = std::unique_lock<std::mutex>(this->mutex);
|
||||
DEBUG_BREAK_IF(!this->mainStorage);
|
||||
DEBUG_BREAK_IF(size == 0);
|
||||
auto internalBufferAddress = offset + BufferPoolAllocator::startingOffset;
|
||||
this->chunkAllocator->free(internalBufferAddress, size);
|
||||
}
|
||||
drainOrAddNewBufferPool();
|
||||
return allocateFromPools(memoryProperties, flags, flagsIntel, requestedSize, hostPtr, errcodeRet);
|
||||
}
|
||||
|
||||
void Context::BufferPoolAllocator::releaseSmallBufferPool() {
|
||||
DEBUG_BREAK_IF(!this->mainStorage);
|
||||
delete this->mainStorage;
|
||||
this->mainStorage = nullptr;
|
||||
bufferPools.clear();
|
||||
}
|
||||
|
||||
void Context::BufferPoolAllocator::drainOrAddNewBufferPool() {
|
||||
for (auto &bufferPool : bufferPools) {
|
||||
if (bufferPool.drain()) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
addNewBufferPool();
|
||||
}
|
||||
|
||||
Buffer *Context::BufferPoolAllocator::allocateFromPools(const MemoryProperties &memoryProperties,
|
||||
cl_mem_flags flags,
|
||||
cl_mem_flags_intel flagsIntel,
|
||||
size_t requestedSize,
|
||||
void *hostPtr,
|
||||
cl_int &errcodeRet) {
|
||||
for (auto &bufferPool : bufferPools) {
|
||||
auto bufferFromPool = bufferPool.allocate(memoryProperties, flags, flagsIntel, requestedSize, hostPtr, errcodeRet);
|
||||
if (bufferFromPool != nullptr) {
|
||||
return bufferFromPool;
|
||||
}
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
TagAllocatorBase *Context::getMultiRootDeviceTimestampPacketAllocator() {
|
||||
return multiRootDeviceTimestampPacketAllocator.get();
|
||||
}
|
||||
|
||||
@@ -60,24 +60,46 @@ class Context : public BaseObject<_cl_context> {
|
||||
size_t size,
|
||||
void *hostPtr,
|
||||
cl_int &errcodeRet);
|
||||
void tryFreeFromPoolBuffer(MemObj *possiblePoolBuffer, size_t offset, size_t size);
|
||||
|
||||
void releaseSmallBufferPool();
|
||||
|
||||
bool isAggregatedSmallBuffersEnabled(Context *context) const;
|
||||
|
||||
void initAggregatedSmallBuffers(Context *context);
|
||||
|
||||
bool isPoolBuffer(const MemObj *buffer) const;
|
||||
|
||||
bool flagsAllowBufferFromPool(const cl_mem_flags &flags, const cl_mem_flags_intel &flagsIntel) const;
|
||||
|
||||
protected:
|
||||
Buffer *allocateFromPools(const MemoryProperties &memoryProperties,
|
||||
cl_mem_flags flags,
|
||||
cl_mem_flags_intel flagsIntel,
|
||||
size_t size,
|
||||
void *hostPtr,
|
||||
cl_int &errcodeRet);
|
||||
|
||||
inline bool isSizeWithinThreshold(size_t size) const {
|
||||
return BufferPoolAllocator::smallBufferThreshold >= size;
|
||||
}
|
||||
Buffer *mainStorage{nullptr};
|
||||
std::unique_ptr<HeapAllocator> chunkAllocator;
|
||||
|
||||
void drainOrAddNewBufferPool();
|
||||
void addNewBufferPool();
|
||||
|
||||
struct BufferPool {
|
||||
BufferPool(Context *context);
|
||||
BufferPool(BufferPool &&bufferPool);
|
||||
bool isPoolBuffer(const MemObj *buffer) const;
|
||||
Buffer *allocate(const MemoryProperties &memoryProperties,
|
||||
cl_mem_flags flags,
|
||||
cl_mem_flags_intel flagsIntel,
|
||||
size_t size,
|
||||
void *hostPtr,
|
||||
cl_int &errcodeRet);
|
||||
bool drain();
|
||||
MemoryManager *memoryManager{nullptr};
|
||||
std::unique_ptr<Buffer> mainStorage;
|
||||
std::unique_ptr<HeapAllocator> chunkAllocator;
|
||||
};
|
||||
Context *context{nullptr};
|
||||
std::mutex mutex;
|
||||
std::vector<BufferPool> bufferPools;
|
||||
};
|
||||
static const cl_ulong objectMagic = 0xA4234321DC002130LL;
|
||||
|
||||
@@ -221,7 +243,7 @@ class Context : public BaseObject<_cl_context> {
|
||||
|
||||
static Platform *getPlatformFromProperties(const cl_context_properties *properties, cl_int &errcode);
|
||||
BufferPoolAllocator &getBufferPoolAllocator() {
|
||||
return this->smallBufferPoolAllocator;
|
||||
return smallBufferPoolAllocator;
|
||||
}
|
||||
TagAllocatorBase *getMultiRootDeviceTimestampPacketAllocator();
|
||||
std::unique_lock<std::mutex> obtainOwnershipForMultiRootDeviceAllocator();
|
||||
|
||||
@@ -90,8 +90,6 @@ MemObj::~MemObj() {
|
||||
}
|
||||
destroyGraphicsAllocation(graphicsAllocation, doAsyncDestructions);
|
||||
graphicsAllocation = nullptr;
|
||||
} else if (graphicsAllocation && context->getBufferPoolAllocator().isPoolBuffer(associatedMemObject)) {
|
||||
memoryManager->waitForEnginesCompletion(*graphicsAllocation);
|
||||
}
|
||||
if (!associatedMemObject) {
|
||||
releaseMapAllocation(rootDeviceIndex, doAsyncDestructions);
|
||||
@@ -102,7 +100,6 @@ MemObj::~MemObj() {
|
||||
}
|
||||
if (associatedMemObject) {
|
||||
associatedMemObject->decRefInternal();
|
||||
context->getBufferPoolAllocator().tryFreeFromPoolBuffer(associatedMemObject, this->offset, this->sizeInPoolAllocator);
|
||||
}
|
||||
if (!associatedMemObject) {
|
||||
releaseAllocatedMapPtr();
|
||||
|
||||
Reference in New Issue
Block a user