mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-09 14:33:04 +08:00
refactor: extract generic parts of small buffers allocator
Currently the whole code resides within the opencl/ tree, but the mechanism is meant to be reused in L0 for kernel-ISA allocations optimization (further work). This commit is a preparation step, which extracts the generic mechanism and moves the extracted part under the shared/ tree. Related-To: NEO-7788 Signed-off-by: Maciej Bielski <maciej.bielski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
73d05045b0
commit
7ea8ed1757
@@ -18,6 +18,8 @@
|
||||
#include "shared/source/memory_manager/unified_memory_manager.h"
|
||||
#include "shared/source/os_interface/debug_env_reader.h"
|
||||
#include "shared/source/os_interface/device_factory.h"
|
||||
#include "shared/source/utilities/buffer_pool_allocator.inl"
|
||||
#include "shared/source/utilities/heap_allocator.h"
|
||||
|
||||
#include "opencl/source/accelerators/intel_motion_estimation.h"
|
||||
#include "opencl/source/api/additional_extensions.h"
|
||||
|
||||
@@ -18,6 +18,7 @@
|
||||
#include "shared/source/memory_manager/deferred_deleter.h"
|
||||
#include "shared/source/memory_manager/memory_manager.h"
|
||||
#include "shared/source/memory_manager/unified_memory_manager.h"
|
||||
#include "shared/source/utilities/buffer_pool_allocator.inl"
|
||||
#include "shared/source/utilities/heap_allocator.h"
|
||||
#include "shared/source/utilities/tag_allocator.h"
|
||||
|
||||
@@ -503,101 +504,53 @@ bool Context::BufferPoolAllocator::isAggregatedSmallBuffersEnabled(Context *cont
|
||||
(isSupportedForSingleDeviceContexts && context->isSingleDeviceContext());
|
||||
}
|
||||
|
||||
Context::BufferPoolAllocator::BufferPool::BufferPool(Context *context) : memoryManager(context->memoryManager) {
|
||||
Context::BufferPool::BufferPool(Context *context) : BaseType(context->memoryManager) {
|
||||
static constexpr cl_mem_flags flags{};
|
||||
[[maybe_unused]] cl_int errcodeRet{};
|
||||
Buffer::AdditionalBufferCreateArgs bufferCreateArgs{};
|
||||
bufferCreateArgs.doNotProvidePerformanceHints = true;
|
||||
bufferCreateArgs.makeAllocationLockable = true;
|
||||
mainStorage.reset(Buffer::create(context,
|
||||
flags,
|
||||
BufferPoolAllocator::aggregatedSmallBuffersPoolSize,
|
||||
nullptr,
|
||||
bufferCreateArgs,
|
||||
errcodeRet));
|
||||
if (mainStorage) {
|
||||
chunkAllocator.reset(new HeapAllocator(BufferPoolAllocator::startingOffset,
|
||||
BufferPoolAllocator::aggregatedSmallBuffersPoolSize,
|
||||
BufferPoolAllocator::chunkAlignment));
|
||||
this->mainStorage.reset(Buffer::create(context,
|
||||
flags,
|
||||
BufferPoolAllocator::aggregatedSmallBuffersPoolSize,
|
||||
nullptr,
|
||||
bufferCreateArgs,
|
||||
errcodeRet));
|
||||
if (this->mainStorage) {
|
||||
this->chunkAllocator.reset(new HeapAllocator(BufferPool::startingOffset,
|
||||
BufferPoolAllocator::aggregatedSmallBuffersPoolSize,
|
||||
BufferPoolAllocator::chunkAlignment));
|
||||
context->decRefInternal();
|
||||
}
|
||||
}
|
||||
|
||||
Context::BufferPoolAllocator::BufferPool::BufferPool(BufferPool &&bufferPool) : memoryManager(bufferPool.memoryManager),
|
||||
mainStorage(std::move(bufferPool.mainStorage)),
|
||||
chunkAllocator(std::move(bufferPool.chunkAllocator)) {}
|
||||
const StackVec<NEO::GraphicsAllocation *, 1> &Context::BufferPool::getAllocationsVector() {
|
||||
return this->mainStorage->getMultiGraphicsAllocation().getGraphicsAllocations();
|
||||
}
|
||||
|
||||
Buffer *Context::BufferPoolAllocator::BufferPool::allocate(const MemoryProperties &memoryProperties,
|
||||
cl_mem_flags flags,
|
||||
cl_mem_flags_intel flagsIntel,
|
||||
size_t requestedSize,
|
||||
void *hostPtr,
|
||||
cl_int &errcodeRet) {
|
||||
Buffer *Context::BufferPool::allocate(const MemoryProperties &memoryProperties,
|
||||
cl_mem_flags flags,
|
||||
cl_mem_flags_intel flagsIntel,
|
||||
size_t requestedSize,
|
||||
void *hostPtr,
|
||||
cl_int &errcodeRet) {
|
||||
cl_buffer_region bufferRegion{};
|
||||
size_t actualSize = requestedSize;
|
||||
bufferRegion.origin = static_cast<size_t>(chunkAllocator->allocate(actualSize));
|
||||
bufferRegion.origin = static_cast<size_t>(this->chunkAllocator->allocate(actualSize));
|
||||
if (bufferRegion.origin == 0) {
|
||||
return nullptr;
|
||||
}
|
||||
bufferRegion.origin -= BufferPoolAllocator::startingOffset;
|
||||
bufferRegion.origin -= BufferPool::startingOffset;
|
||||
bufferRegion.size = requestedSize;
|
||||
auto bufferFromPool = mainStorage->createSubBuffer(flags, flagsIntel, &bufferRegion, errcodeRet);
|
||||
bufferFromPool->createFunction = mainStorage->createFunction;
|
||||
auto bufferFromPool = this->mainStorage->createSubBuffer(flags, flagsIntel, &bufferRegion, errcodeRet);
|
||||
bufferFromPool->createFunction = this->mainStorage->createFunction;
|
||||
bufferFromPool->setSizeInPoolAllocator(actualSize);
|
||||
return bufferFromPool;
|
||||
}
|
||||
|
||||
bool Context::BufferPoolAllocator::BufferPool::isPoolBuffer(const MemObj *buffer) const {
|
||||
return mainStorage.get() == buffer;
|
||||
}
|
||||
|
||||
void Context::BufferPoolAllocator::BufferPool::drain() {
|
||||
for (auto allocation : mainStorage->getMultiGraphicsAllocation().getGraphicsAllocations()) {
|
||||
if (allocation && memoryManager->allocInUse(*allocation)) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
for (auto &chunk : chunksToFree) {
|
||||
chunkAllocator->free(chunk.first, chunk.second);
|
||||
}
|
||||
|
||||
chunksToFree.clear();
|
||||
}
|
||||
|
||||
void Context::BufferPoolAllocator::BufferPool::tryFreeFromPoolBuffer(MemObj *possiblePoolBuffer, size_t offset, size_t size) {
|
||||
if (this->isPoolBuffer(possiblePoolBuffer)) {
|
||||
chunksToFree.push_back({offset + BufferPoolAllocator::startingOffset, size});
|
||||
}
|
||||
}
|
||||
|
||||
void Context::BufferPoolAllocator::addNewBufferPool() {
|
||||
Context::BufferPoolAllocator::BufferPool bufferPool(context);
|
||||
if (bufferPool.mainStorage) {
|
||||
bufferPools.push_back(std::move(bufferPool));
|
||||
}
|
||||
}
|
||||
|
||||
void Context::BufferPoolAllocator::initAggregatedSmallBuffers(Context *context) {
|
||||
this->context = context;
|
||||
addNewBufferPool();
|
||||
}
|
||||
|
||||
bool Context::BufferPoolAllocator::isPoolBuffer(const MemObj *buffer) const {
|
||||
for (auto &bufferPool : bufferPools) {
|
||||
if (bufferPool.isPoolBuffer(buffer)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void Context::BufferPoolAllocator::tryFreeFromPoolBuffer(MemObj *possiblePoolBuffer, size_t offset, size_t size) {
|
||||
auto lock = std::unique_lock<std::mutex>(this->mutex);
|
||||
for (auto &bufferPool : bufferPools) {
|
||||
bufferPool.tryFreeFromPoolBuffer(possiblePoolBuffer, offset, size);
|
||||
}
|
||||
this->addNewBufferPool(Context::BufferPool{this->context});
|
||||
}
|
||||
|
||||
Buffer *Context::BufferPoolAllocator::allocateBufferFromPool(const MemoryProperties &memoryProperties,
|
||||
@@ -607,37 +560,27 @@ Buffer *Context::BufferPoolAllocator::allocateBufferFromPool(const MemoryPropert
|
||||
void *hostPtr,
|
||||
cl_int &errcodeRet) {
|
||||
errcodeRet = CL_MEM_OBJECT_ALLOCATION_FAILURE;
|
||||
if (bufferPools.empty() ||
|
||||
!isSizeWithinThreshold(requestedSize) ||
|
||||
if (this->bufferPools.empty() ||
|
||||
!this->isSizeWithinThreshold(requestedSize) ||
|
||||
!flagsAllowBufferFromPool(flags, flagsIntel)) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto lock = std::unique_lock<std::mutex>(mutex);
|
||||
auto bufferFromPool = allocateFromPools(memoryProperties, flags, flagsIntel, requestedSize, hostPtr, errcodeRet);
|
||||
auto bufferFromPool = this->allocateFromPools(memoryProperties, flags, flagsIntel, requestedSize, hostPtr, errcodeRet);
|
||||
if (bufferFromPool != nullptr) {
|
||||
return bufferFromPool;
|
||||
}
|
||||
|
||||
drain();
|
||||
this->drain();
|
||||
|
||||
bufferFromPool = allocateFromPools(memoryProperties, flags, flagsIntel, requestedSize, hostPtr, errcodeRet);
|
||||
bufferFromPool = this->allocateFromPools(memoryProperties, flags, flagsIntel, requestedSize, hostPtr, errcodeRet);
|
||||
if (bufferFromPool != nullptr) {
|
||||
return bufferFromPool;
|
||||
}
|
||||
|
||||
addNewBufferPool();
|
||||
return allocateFromPools(memoryProperties, flags, flagsIntel, requestedSize, hostPtr, errcodeRet);
|
||||
}
|
||||
|
||||
void Context::BufferPoolAllocator::releaseSmallBufferPool() {
|
||||
bufferPools.clear();
|
||||
}
|
||||
|
||||
void Context::BufferPoolAllocator::drain() {
|
||||
for (auto &bufferPool : bufferPools) {
|
||||
bufferPool.drain();
|
||||
}
|
||||
this->addNewBufferPool(BufferPool{this->context});
|
||||
return this->allocateFromPools(memoryProperties, flags, flagsIntel, requestedSize, hostPtr, errcodeRet);
|
||||
}
|
||||
|
||||
Buffer *Context::BufferPoolAllocator::allocateFromPools(const MemoryProperties &memoryProperties,
|
||||
@@ -646,7 +589,8 @@ Buffer *Context::BufferPoolAllocator::allocateFromPools(const MemoryProperties &
|
||||
size_t requestedSize,
|
||||
void *hostPtr,
|
||||
cl_int &errcodeRet) {
|
||||
for (auto &bufferPool : bufferPools) {
|
||||
for (auto &bufferPoolParent : this->bufferPools) {
|
||||
auto &bufferPool = static_cast<BufferPool &>(bufferPoolParent);
|
||||
auto bufferFromPool = bufferPool.allocate(memoryProperties, flags, flagsIntel, requestedSize, hostPtr, errcodeRet);
|
||||
if (bufferFromPool != nullptr) {
|
||||
return bufferFromPool;
|
||||
|
||||
@@ -9,6 +9,8 @@
|
||||
#include "shared/source/debug_settings/debug_settings_manager.h"
|
||||
#include "shared/source/helpers/constants.h"
|
||||
#include "shared/source/helpers/string.h"
|
||||
#include "shared/source/utilities/buffer_pool_allocator.h"
|
||||
#include "shared/source/utilities/stackvec.h"
|
||||
|
||||
#include "opencl/extensions/public/cl_ext_private.h"
|
||||
#include "opencl/source/cl_device/cl_device_vector.h"
|
||||
@@ -45,65 +47,45 @@ struct OpenCLObjectMapper<_cl_context> {
|
||||
|
||||
class Context : public BaseObject<_cl_context> {
|
||||
public:
|
||||
class BufferPoolAllocator {
|
||||
using BufferAllocationsVec = StackVec<GraphicsAllocation *, 1>;
|
||||
|
||||
struct BufferPool : public AbstractBuffersPool<BufferPool, Buffer, MemObj> {
|
||||
using BaseType = AbstractBuffersPool<BufferPool, Buffer, MemObj>;
|
||||
|
||||
BufferPool(Context *context);
|
||||
Buffer *allocate(const MemoryProperties &memoryProperties,
|
||||
cl_mem_flags flags,
|
||||
cl_mem_flags_intel flagsIntel,
|
||||
size_t requestedSize,
|
||||
void *hostPtr,
|
||||
cl_int &errcodeRet);
|
||||
|
||||
const StackVec<NEO::GraphicsAllocation *, 1> &getAllocationsVector();
|
||||
};
|
||||
|
||||
class BufferPoolAllocator : public AbstractBuffersAllocator<BufferPool, Buffer, MemObj> {
|
||||
public:
|
||||
static constexpr auto aggregatedSmallBuffersPoolSize = 64 * KB;
|
||||
static constexpr auto smallBufferThreshold = 4 * KB;
|
||||
static constexpr auto chunkAlignment = 512u;
|
||||
static constexpr auto startingOffset = chunkAlignment;
|
||||
|
||||
static_assert(aggregatedSmallBuffersPoolSize > smallBufferThreshold, "Largest allowed buffer needs to fit in pool");
|
||||
|
||||
bool isAggregatedSmallBuffersEnabled(Context *context) const;
|
||||
void initAggregatedSmallBuffers(Context *context);
|
||||
Buffer *allocateBufferFromPool(const MemoryProperties &memoryProperties,
|
||||
cl_mem_flags flags,
|
||||
cl_mem_flags_intel flagsIntel,
|
||||
size_t size,
|
||||
size_t requestedSize,
|
||||
void *hostPtr,
|
||||
cl_int &errcodeRet);
|
||||
|
||||
void releaseSmallBufferPool();
|
||||
bool isAggregatedSmallBuffersEnabled(Context *context) const;
|
||||
void initAggregatedSmallBuffers(Context *context);
|
||||
bool isPoolBuffer(const MemObj *buffer) const;
|
||||
bool flagsAllowBufferFromPool(const cl_mem_flags &flags, const cl_mem_flags_intel &flagsIntel) const;
|
||||
void tryFreeFromPoolBuffer(MemObj *possiblePoolBuffer, size_t offset, size_t size);
|
||||
|
||||
protected:
|
||||
Buffer *allocateFromPools(const MemoryProperties &memoryProperties,
|
||||
cl_mem_flags flags,
|
||||
cl_mem_flags_intel flagsIntel,
|
||||
size_t size,
|
||||
size_t requestedSize,
|
||||
void *hostPtr,
|
||||
cl_int &errcodeRet);
|
||||
|
||||
inline bool isSizeWithinThreshold(size_t size) const {
|
||||
return BufferPoolAllocator::smallBufferThreshold >= size;
|
||||
}
|
||||
|
||||
void drain();
|
||||
void addNewBufferPool();
|
||||
|
||||
struct BufferPool {
|
||||
BufferPool(Context *context);
|
||||
BufferPool(BufferPool &&bufferPool);
|
||||
bool isPoolBuffer(const MemObj *buffer) const;
|
||||
void tryFreeFromPoolBuffer(MemObj *possiblePoolBuffer, size_t offset, size_t size);
|
||||
Buffer *allocate(const MemoryProperties &memoryProperties,
|
||||
cl_mem_flags flags,
|
||||
cl_mem_flags_intel flagsIntel,
|
||||
size_t size,
|
||||
void *hostPtr,
|
||||
cl_int &errcodeRet);
|
||||
void drain();
|
||||
MemoryManager *memoryManager{nullptr};
|
||||
std::unique_ptr<Buffer> mainStorage;
|
||||
std::unique_ptr<HeapAllocator> chunkAllocator;
|
||||
std::vector<std::pair<uint64_t, size_t>> chunksToFree;
|
||||
};
|
||||
Context *context{nullptr};
|
||||
std::mutex mutex;
|
||||
std::vector<BufferPool> bufferPools;
|
||||
};
|
||||
|
||||
static const cl_ulong objectMagic = 0xA4234321DC002130LL;
|
||||
|
||||
bool createImpl(const cl_context_properties *properties,
|
||||
|
||||
@@ -13,11 +13,14 @@
|
||||
#include "shared/source/helpers/get_info.h"
|
||||
#include "shared/source/memory_manager/allocation_properties.h"
|
||||
#include "shared/source/memory_manager/memory_manager.h"
|
||||
#include "shared/source/utilities/buffer_pool_allocator.inl"
|
||||
#include "shared/source/utilities/heap_allocator.h"
|
||||
|
||||
#include "opencl/source/cl_device/cl_device.h"
|
||||
#include "opencl/source/context/context.h"
|
||||
#include "opencl/source/helpers/get_info_status_mapper.h"
|
||||
#include "opencl/source/helpers/mipmap.h"
|
||||
#include "opencl/source/mem_obj/buffer.h"
|
||||
#include "opencl/source/sharings/sharing.h"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
Reference in New Issue
Block a user