refactor: extract generic parts of small buffers allocator

Currently the whole code resides within the opencl/ tree, but the
mechanism is meant to be reused in L0 for kernel-ISA allocations
optimization (further work).

This commit is a preparation step, which extracts the generic mechanism
and moves the extracted part under the shared/ tree.

Related-To: NEO-7788
Signed-off-by: Maciej Bielski <maciej.bielski@intel.com>
This commit is contained in:
Maciej Bielski
2023-05-12 16:51:52 +00:00
committed by Compute-Runtime-Automation
parent 73d05045b0
commit 7ea8ed1757
10 changed files with 458 additions and 135 deletions

View File

@@ -18,6 +18,8 @@
#include "shared/source/memory_manager/unified_memory_manager.h"
#include "shared/source/os_interface/debug_env_reader.h"
#include "shared/source/os_interface/device_factory.h"
#include "shared/source/utilities/buffer_pool_allocator.inl"
#include "shared/source/utilities/heap_allocator.h"
#include "opencl/source/accelerators/intel_motion_estimation.h"
#include "opencl/source/api/additional_extensions.h"

View File

@@ -18,6 +18,7 @@
#include "shared/source/memory_manager/deferred_deleter.h"
#include "shared/source/memory_manager/memory_manager.h"
#include "shared/source/memory_manager/unified_memory_manager.h"
#include "shared/source/utilities/buffer_pool_allocator.inl"
#include "shared/source/utilities/heap_allocator.h"
#include "shared/source/utilities/tag_allocator.h"
@@ -503,101 +504,53 @@ bool Context::BufferPoolAllocator::isAggregatedSmallBuffersEnabled(Context *cont
(isSupportedForSingleDeviceContexts && context->isSingleDeviceContext());
}
Context::BufferPoolAllocator::BufferPool::BufferPool(Context *context) : memoryManager(context->memoryManager) {
Context::BufferPool::BufferPool(Context *context) : BaseType(context->memoryManager) {
static constexpr cl_mem_flags flags{};
[[maybe_unused]] cl_int errcodeRet{};
Buffer::AdditionalBufferCreateArgs bufferCreateArgs{};
bufferCreateArgs.doNotProvidePerformanceHints = true;
bufferCreateArgs.makeAllocationLockable = true;
mainStorage.reset(Buffer::create(context,
flags,
BufferPoolAllocator::aggregatedSmallBuffersPoolSize,
nullptr,
bufferCreateArgs,
errcodeRet));
if (mainStorage) {
chunkAllocator.reset(new HeapAllocator(BufferPoolAllocator::startingOffset,
BufferPoolAllocator::aggregatedSmallBuffersPoolSize,
BufferPoolAllocator::chunkAlignment));
this->mainStorage.reset(Buffer::create(context,
flags,
BufferPoolAllocator::aggregatedSmallBuffersPoolSize,
nullptr,
bufferCreateArgs,
errcodeRet));
if (this->mainStorage) {
this->chunkAllocator.reset(new HeapAllocator(BufferPool::startingOffset,
BufferPoolAllocator::aggregatedSmallBuffersPoolSize,
BufferPoolAllocator::chunkAlignment));
context->decRefInternal();
}
}
Context::BufferPoolAllocator::BufferPool::BufferPool(BufferPool &&bufferPool) : memoryManager(bufferPool.memoryManager),
mainStorage(std::move(bufferPool.mainStorage)),
chunkAllocator(std::move(bufferPool.chunkAllocator)) {}
const StackVec<NEO::GraphicsAllocation *, 1> &Context::BufferPool::getAllocationsVector() {
return this->mainStorage->getMultiGraphicsAllocation().getGraphicsAllocations();
}
Buffer *Context::BufferPoolAllocator::BufferPool::allocate(const MemoryProperties &memoryProperties,
cl_mem_flags flags,
cl_mem_flags_intel flagsIntel,
size_t requestedSize,
void *hostPtr,
cl_int &errcodeRet) {
Buffer *Context::BufferPool::allocate(const MemoryProperties &memoryProperties,
cl_mem_flags flags,
cl_mem_flags_intel flagsIntel,
size_t requestedSize,
void *hostPtr,
cl_int &errcodeRet) {
cl_buffer_region bufferRegion{};
size_t actualSize = requestedSize;
bufferRegion.origin = static_cast<size_t>(chunkAllocator->allocate(actualSize));
bufferRegion.origin = static_cast<size_t>(this->chunkAllocator->allocate(actualSize));
if (bufferRegion.origin == 0) {
return nullptr;
}
bufferRegion.origin -= BufferPoolAllocator::startingOffset;
bufferRegion.origin -= BufferPool::startingOffset;
bufferRegion.size = requestedSize;
auto bufferFromPool = mainStorage->createSubBuffer(flags, flagsIntel, &bufferRegion, errcodeRet);
bufferFromPool->createFunction = mainStorage->createFunction;
auto bufferFromPool = this->mainStorage->createSubBuffer(flags, flagsIntel, &bufferRegion, errcodeRet);
bufferFromPool->createFunction = this->mainStorage->createFunction;
bufferFromPool->setSizeInPoolAllocator(actualSize);
return bufferFromPool;
}
bool Context::BufferPoolAllocator::BufferPool::isPoolBuffer(const MemObj *buffer) const {
return mainStorage.get() == buffer;
}
void Context::BufferPoolAllocator::BufferPool::drain() {
for (auto allocation : mainStorage->getMultiGraphicsAllocation().getGraphicsAllocations()) {
if (allocation && memoryManager->allocInUse(*allocation)) {
return;
}
}
for (auto &chunk : chunksToFree) {
chunkAllocator->free(chunk.first, chunk.second);
}
chunksToFree.clear();
}
void Context::BufferPoolAllocator::BufferPool::tryFreeFromPoolBuffer(MemObj *possiblePoolBuffer, size_t offset, size_t size) {
if (this->isPoolBuffer(possiblePoolBuffer)) {
chunksToFree.push_back({offset + BufferPoolAllocator::startingOffset, size});
}
}
void Context::BufferPoolAllocator::addNewBufferPool() {
Context::BufferPoolAllocator::BufferPool bufferPool(context);
if (bufferPool.mainStorage) {
bufferPools.push_back(std::move(bufferPool));
}
}
void Context::BufferPoolAllocator::initAggregatedSmallBuffers(Context *context) {
this->context = context;
addNewBufferPool();
}
bool Context::BufferPoolAllocator::isPoolBuffer(const MemObj *buffer) const {
for (auto &bufferPool : bufferPools) {
if (bufferPool.isPoolBuffer(buffer)) {
return true;
}
}
return false;
}
void Context::BufferPoolAllocator::tryFreeFromPoolBuffer(MemObj *possiblePoolBuffer, size_t offset, size_t size) {
auto lock = std::unique_lock<std::mutex>(this->mutex);
for (auto &bufferPool : bufferPools) {
bufferPool.tryFreeFromPoolBuffer(possiblePoolBuffer, offset, size);
}
this->addNewBufferPool(Context::BufferPool{this->context});
}
Buffer *Context::BufferPoolAllocator::allocateBufferFromPool(const MemoryProperties &memoryProperties,
@@ -607,37 +560,27 @@ Buffer *Context::BufferPoolAllocator::allocateBufferFromPool(const MemoryPropert
void *hostPtr,
cl_int &errcodeRet) {
errcodeRet = CL_MEM_OBJECT_ALLOCATION_FAILURE;
if (bufferPools.empty() ||
!isSizeWithinThreshold(requestedSize) ||
if (this->bufferPools.empty() ||
!this->isSizeWithinThreshold(requestedSize) ||
!flagsAllowBufferFromPool(flags, flagsIntel)) {
return nullptr;
}
auto lock = std::unique_lock<std::mutex>(mutex);
auto bufferFromPool = allocateFromPools(memoryProperties, flags, flagsIntel, requestedSize, hostPtr, errcodeRet);
auto bufferFromPool = this->allocateFromPools(memoryProperties, flags, flagsIntel, requestedSize, hostPtr, errcodeRet);
if (bufferFromPool != nullptr) {
return bufferFromPool;
}
drain();
this->drain();
bufferFromPool = allocateFromPools(memoryProperties, flags, flagsIntel, requestedSize, hostPtr, errcodeRet);
bufferFromPool = this->allocateFromPools(memoryProperties, flags, flagsIntel, requestedSize, hostPtr, errcodeRet);
if (bufferFromPool != nullptr) {
return bufferFromPool;
}
addNewBufferPool();
return allocateFromPools(memoryProperties, flags, flagsIntel, requestedSize, hostPtr, errcodeRet);
}
void Context::BufferPoolAllocator::releaseSmallBufferPool() {
bufferPools.clear();
}
void Context::BufferPoolAllocator::drain() {
for (auto &bufferPool : bufferPools) {
bufferPool.drain();
}
this->addNewBufferPool(BufferPool{this->context});
return this->allocateFromPools(memoryProperties, flags, flagsIntel, requestedSize, hostPtr, errcodeRet);
}
Buffer *Context::BufferPoolAllocator::allocateFromPools(const MemoryProperties &memoryProperties,
@@ -646,7 +589,8 @@ Buffer *Context::BufferPoolAllocator::allocateFromPools(const MemoryProperties &
size_t requestedSize,
void *hostPtr,
cl_int &errcodeRet) {
for (auto &bufferPool : bufferPools) {
for (auto &bufferPoolParent : this->bufferPools) {
auto &bufferPool = static_cast<BufferPool &>(bufferPoolParent);
auto bufferFromPool = bufferPool.allocate(memoryProperties, flags, flagsIntel, requestedSize, hostPtr, errcodeRet);
if (bufferFromPool != nullptr) {
return bufferFromPool;

View File

@@ -9,6 +9,8 @@
#include "shared/source/debug_settings/debug_settings_manager.h"
#include "shared/source/helpers/constants.h"
#include "shared/source/helpers/string.h"
#include "shared/source/utilities/buffer_pool_allocator.h"
#include "shared/source/utilities/stackvec.h"
#include "opencl/extensions/public/cl_ext_private.h"
#include "opencl/source/cl_device/cl_device_vector.h"
@@ -45,65 +47,45 @@ struct OpenCLObjectMapper<_cl_context> {
class Context : public BaseObject<_cl_context> {
public:
class BufferPoolAllocator {
using BufferAllocationsVec = StackVec<GraphicsAllocation *, 1>;
struct BufferPool : public AbstractBuffersPool<BufferPool, Buffer, MemObj> {
using BaseType = AbstractBuffersPool<BufferPool, Buffer, MemObj>;
BufferPool(Context *context);
Buffer *allocate(const MemoryProperties &memoryProperties,
cl_mem_flags flags,
cl_mem_flags_intel flagsIntel,
size_t requestedSize,
void *hostPtr,
cl_int &errcodeRet);
const StackVec<NEO::GraphicsAllocation *, 1> &getAllocationsVector();
};
class BufferPoolAllocator : public AbstractBuffersAllocator<BufferPool, Buffer, MemObj> {
public:
static constexpr auto aggregatedSmallBuffersPoolSize = 64 * KB;
static constexpr auto smallBufferThreshold = 4 * KB;
static constexpr auto chunkAlignment = 512u;
static constexpr auto startingOffset = chunkAlignment;
static_assert(aggregatedSmallBuffersPoolSize > smallBufferThreshold, "Largest allowed buffer needs to fit in pool");
bool isAggregatedSmallBuffersEnabled(Context *context) const;
void initAggregatedSmallBuffers(Context *context);
Buffer *allocateBufferFromPool(const MemoryProperties &memoryProperties,
cl_mem_flags flags,
cl_mem_flags_intel flagsIntel,
size_t size,
size_t requestedSize,
void *hostPtr,
cl_int &errcodeRet);
void releaseSmallBufferPool();
bool isAggregatedSmallBuffersEnabled(Context *context) const;
void initAggregatedSmallBuffers(Context *context);
bool isPoolBuffer(const MemObj *buffer) const;
bool flagsAllowBufferFromPool(const cl_mem_flags &flags, const cl_mem_flags_intel &flagsIntel) const;
void tryFreeFromPoolBuffer(MemObj *possiblePoolBuffer, size_t offset, size_t size);
protected:
Buffer *allocateFromPools(const MemoryProperties &memoryProperties,
cl_mem_flags flags,
cl_mem_flags_intel flagsIntel,
size_t size,
size_t requestedSize,
void *hostPtr,
cl_int &errcodeRet);
inline bool isSizeWithinThreshold(size_t size) const {
return BufferPoolAllocator::smallBufferThreshold >= size;
}
void drain();
void addNewBufferPool();
struct BufferPool {
BufferPool(Context *context);
BufferPool(BufferPool &&bufferPool);
bool isPoolBuffer(const MemObj *buffer) const;
void tryFreeFromPoolBuffer(MemObj *possiblePoolBuffer, size_t offset, size_t size);
Buffer *allocate(const MemoryProperties &memoryProperties,
cl_mem_flags flags,
cl_mem_flags_intel flagsIntel,
size_t size,
void *hostPtr,
cl_int &errcodeRet);
void drain();
MemoryManager *memoryManager{nullptr};
std::unique_ptr<Buffer> mainStorage;
std::unique_ptr<HeapAllocator> chunkAllocator;
std::vector<std::pair<uint64_t, size_t>> chunksToFree;
};
Context *context{nullptr};
std::mutex mutex;
std::vector<BufferPool> bufferPools;
};
static const cl_ulong objectMagic = 0xA4234321DC002130LL;
bool createImpl(const cl_context_properties *properties,

View File

@@ -13,11 +13,14 @@
#include "shared/source/helpers/get_info.h"
#include "shared/source/memory_manager/allocation_properties.h"
#include "shared/source/memory_manager/memory_manager.h"
#include "shared/source/utilities/buffer_pool_allocator.inl"
#include "shared/source/utilities/heap_allocator.h"
#include "opencl/source/cl_device/cl_device.h"
#include "opencl/source/context/context.h"
#include "opencl/source/helpers/get_info_status_mapper.h"
#include "opencl/source/helpers/mipmap.h"
#include "opencl/source/mem_obj/buffer.h"
#include "opencl/source/sharings/sharing.h"
#include <algorithm>