feature: add optional onChunkFree callback to AbstractBuffersPool

Instances returned by `getAllocationsVector()` in some cases cannot be
freed (in the `malloc/new` sense) until the `drain()` function invokes
`allocInUse()` on them. Plus, the `chunksToFree` container operates on
pairs `{offset, size}`, not pointers, so such pair cannot be used to
release allocations either.

Provide an optional callback, which can be implemented by the custom
pool derived from `AbstractBuffersPool`. This callback can be used, for
example, to perform actual release of an allocation related to the
currently processed chunk.

Additionally, provide the `drain()` and `tryFreeFromPoolBuffer()`
functions with pool-independent versions and keep the previous versions
as defaults (for allocators with a single pool). The new versions allow
reusing the code for cases when allocator has multiple pools.

In both cases, there was no such needs so far but it arose when working
on `IsaBuffersAllocator`. The latter is coming with future commits, but
the shared code modifications are extracted as an independent step.

Related-To: NEO-7788
Signed-off-by: Maciej Bielski <maciej.bielski@intel.com>
This commit is contained in:
Maciej Bielski 2023-06-29 15:15:50 +00:00 committed by Compute-Runtime-Automation
parent 1855dffecf
commit c7a971a28f
5 changed files with 153 additions and 20 deletions

View File

@ -504,7 +504,7 @@ bool Context::BufferPoolAllocator::isAggregatedSmallBuffersEnabled(Context *cont
(isSupportedForSingleDeviceContexts && context->isSingleDeviceContext());
}
Context::BufferPool::BufferPool(Context *context) : BaseType(context->memoryManager) {
Context::BufferPool::BufferPool(Context *context) : BaseType(context->memoryManager, nullptr) {
static constexpr cl_mem_flags flags{};
[[maybe_unused]] cl_int errcodeRet{};
Buffer::AdditionalBufferCreateArgs bufferCreateArgs{};

View File

@ -7,6 +7,7 @@
#pragma once
#include "shared/source/helpers/constants.h"
#include "shared/source/helpers/non_copyable_or_moveable.h"
#include "shared/source/utilities/stackvec.h"
#include <functional>
@ -31,7 +32,7 @@ struct SmallBuffersParams {
};
template <typename PoolT, typename BufferType, typename BufferParentType = BufferType>
struct AbstractBuffersPool : public SmallBuffersParams<PoolT> {
struct AbstractBuffersPool : public SmallBuffersParams<PoolT>, public NonCopyableClass {
// The prototype of a function allocating the `mainStorage` is not specified.
// That would be an unnecessary limitation here - it is completely up to derived class implementation.
// Perhaps the allocating function needs to leverage `HeapAllocator::allocate()` and also
@ -43,8 +44,9 @@ struct AbstractBuffersPool : public SmallBuffersParams<PoolT> {
using Params::smallBufferThreshold;
using Params::startingOffset;
using AllocsVecCRef = const StackVec<NEO::GraphicsAllocation *, 1> &;
using OnChunkFreeCallback = void (PoolT::*)(uint64_t offset, size_t size);
AbstractBuffersPool(MemoryManager *memoryManager);
AbstractBuffersPool(MemoryManager *memoryManager, OnChunkFreeCallback onChunkFreeCallback);
AbstractBuffersPool(AbstractBuffersPool<PoolT, BufferType, BufferParentType> &&bufferPool);
void tryFreeFromPoolBuffer(BufferParentType *possiblePoolBuffer, size_t offset, size_t size);
bool isPoolBuffer(const BufferParentType *buffer) const;
@ -60,6 +62,7 @@ struct AbstractBuffersPool : public SmallBuffersParams<PoolT> {
std::unique_ptr<BufferType> mainStorage;
std::unique_ptr<HeapAllocator> chunkAllocator;
std::vector<std::pair<uint64_t, size_t>> chunksToFree;
OnChunkFreeCallback onChunkFreeCallback = nullptr;
};
template <typename BuffersPoolType, typename BufferType, typename BufferParentType = BufferType>
@ -81,8 +84,11 @@ class AbstractBuffersAllocator : public SmallBuffersParams<BuffersPoolType> {
protected:
inline bool isSizeWithinThreshold(size_t size) const { return smallBufferThreshold >= size; }
void tryFreeFromPoolBuffer(BufferParentType *possiblePoolBuffer, size_t offset, size_t size, std::vector<BuffersPoolType> &bufferPoolsVec);
void drain();
void drain(std::vector<BuffersPoolType> &bufferPoolsVec);
void addNewBufferPool(BuffersPoolType &&bufferPool);
void addNewBufferPool(BuffersPoolType &&bufferPool, std::vector<BuffersPoolType> &bufferPoolsVec);
std::mutex mutex;
std::vector<BuffersPoolType> bufferPools;

View File

@ -14,7 +14,8 @@
namespace NEO {
template <typename PoolT, typename BufferType, typename BufferParentType>
AbstractBuffersPool<PoolT, BufferType, BufferParentType>::AbstractBuffersPool(MemoryManager *mm) : memoryManager{mm} {
AbstractBuffersPool<PoolT, BufferType, BufferParentType>::AbstractBuffersPool(MemoryManager *memoryManager, OnChunkFreeCallback onChunkFreeCb)
: memoryManager{memoryManager}, onChunkFreeCallback{onChunkFreeCb} {
static_assert(std::is_base_of_v<BufferParentType, BufferType>);
}
@ -22,12 +23,13 @@ template <typename PoolT, typename BufferType, typename BufferParentType>
AbstractBuffersPool<PoolT, BufferType, BufferParentType>::AbstractBuffersPool(AbstractBuffersPool<PoolT, BufferType, BufferParentType> &&bufferPool)
: memoryManager{bufferPool.memoryManager},
mainStorage{std::move(bufferPool.mainStorage)},
chunkAllocator{std::move(bufferPool.chunkAllocator)} {}
chunkAllocator{std::move(bufferPool.chunkAllocator)},
onChunkFreeCallback{bufferPool.onChunkFreeCallback} {}
template <typename PoolT, typename BufferType, typename BufferParentType>
void AbstractBuffersPool<PoolT, BufferType, BufferParentType>::tryFreeFromPoolBuffer(BufferParentType *possiblePoolBuffer, size_t offset, size_t size) {
if (this->isPoolBuffer(possiblePoolBuffer)) {
this->chunksToFree.push_back({offset + startingOffset, size});
this->chunksToFree.push_back({offset, size});
}
}
@ -47,7 +49,10 @@ void AbstractBuffersPool<PoolT, BufferType, BufferParentType>::drain() {
}
}
for (auto &chunk : this->chunksToFree) {
this->chunkAllocator->free(chunk.first, chunk.second);
this->chunkAllocator->free(chunk.first + startingOffset, chunk.second);
if (static_cast<PoolT *>(this)->onChunkFreeCallback) {
(static_cast<PoolT *>(this)->*onChunkFreeCallback)(chunk.first, chunk.second);
}
}
this->chunksToFree.clear();
}
@ -66,23 +71,38 @@ bool AbstractBuffersAllocator<BuffersPoolType, BufferType, BufferParentType>::is
template <typename BuffersPoolType, typename BufferType, typename BufferParentType>
void AbstractBuffersAllocator<BuffersPoolType, BufferType, BufferParentType>::tryFreeFromPoolBuffer(BufferParentType *possiblePoolBuffer, size_t offset, size_t size) {
this->tryFreeFromPoolBuffer(possiblePoolBuffer, offset, size, this->bufferPools);
}
template <typename BuffersPoolType, typename BufferType, typename BufferParentType>
void AbstractBuffersAllocator<BuffersPoolType, BufferType, BufferParentType>::tryFreeFromPoolBuffer(BufferParentType *possiblePoolBuffer, size_t offset, size_t size, std::vector<BuffersPoolType> &bufferPoolsVec) {
auto lock = std::unique_lock<std::mutex>(this->mutex);
for (auto &bufferPool : this->bufferPools) {
for (auto &bufferPool : bufferPoolsVec) {
bufferPool.tryFreeFromPoolBuffer(possiblePoolBuffer, offset, size); // NOLINT(clang-analyzer-cplusplus.NewDelete)
}
}
template <typename BuffersPoolType, typename BufferType, typename BufferParentType>
void AbstractBuffersAllocator<BuffersPoolType, BufferType, BufferParentType>::drain() {
for (auto &bufferPool : this->bufferPools) {
this->drain(this->bufferPools);
}
template <typename BuffersPoolType, typename BufferType, typename BufferParentType>
void AbstractBuffersAllocator<BuffersPoolType, BufferType, BufferParentType>::drain(std::vector<BuffersPoolType> &bufferPoolsVec) {
for (auto &bufferPool : bufferPoolsVec) {
bufferPool.drain();
}
}
template <typename BuffersPoolType, typename BufferType, typename BufferParentType>
void AbstractBuffersAllocator<BuffersPoolType, BufferType, BufferParentType>::addNewBufferPool(BuffersPoolType &&bufferPool) {
this->addNewBufferPool(std::move(bufferPool), this->bufferPools);
}
template <typename BuffersPoolType, typename BufferType, typename BufferParentType>
void AbstractBuffersAllocator<BuffersPoolType, BufferType, BufferParentType>::addNewBufferPool(BuffersPoolType &&bufferPool, std::vector<BuffersPoolType> &bufferPoolsVec) {
if (bufferPool.mainStorage) {
this->bufferPools.push_back(std::move(bufferPool));
bufferPoolsVec.push_back(std::move(bufferPool));
}
}
} // namespace NEO

View File

@ -38,13 +38,15 @@ class HeapAllocator {
freedChunksSmall.reserve(50);
}
MOCKABLE_VIRTUAL ~HeapAllocator() = default;
uint64_t allocate(size_t &sizeToAllocate) {
return allocateWithCustomAlignment(sizeToAllocate, 0u);
}
uint64_t allocateWithCustomAlignment(size_t &sizeToAllocate, size_t alignment);
void free(uint64_t ptr, size_t size);
MOCKABLE_VIRTUAL void free(uint64_t ptr, size_t size);
uint64_t getLeftSize() const {
return availableSize;

View File

@ -13,7 +13,9 @@
#include "gtest/gtest.h"
#include <array>
#include <memory>
#include <vector>
struct DummyBufferPool;
@ -34,27 +36,38 @@ struct DummyBuffersPool : public NEO::AbstractBuffersPool<DummyBuffersPool, Dumm
using BaseType = NEO::AbstractBuffersPool<DummyBuffersPool, DummyBuffer>;
static constexpr auto dummyPtr = 0xdeadbeef0000;
DummyBuffersPool(NEO::MemoryManager *memoryManager, uint32_t poolOffset) : BaseType(memoryManager) {
DummyBuffersPool(NEO::MemoryManager *memoryManager, uint32_t poolOffset, BaseType::OnChunkFreeCallback onChunkFreeCallback)
: BaseType{memoryManager, onChunkFreeCallback} {
dummyAllocations.resize(2);
dummyAllocations[0] = reinterpret_cast<NEO::GraphicsAllocation *>(poolOffset + dummyPtr);
dummyAllocations[1] = nullptr; // makes sure nullptrs don't cause SEGFAULTs
}
DummyBuffersPool(NEO::MemoryManager *memoryManager) : DummyBuffersPool(memoryManager, 0x0) {}
DummyBuffersPool(NEO::MemoryManager *memoryManager) : DummyBuffersPool(memoryManager, 0x0, &DummyBuffersPool::onChunkFree) {}
BaseType::AllocsVecCRef getAllocationsVector() {
return dummyAllocations;
}
void onChunkFree(uint64_t offset, size_t size) {
this->freedChunks.push_back({offset, size});
this->onChunkFreeCalled = true;
}
StackVec<NEO::GraphicsAllocation *, 1> dummyAllocations;
std::vector<std::pair<uint64_t, size_t>> freedChunks{};
bool onChunkFreeCalled = false;
};
struct DummyBuffersAllocator : public NEO::AbstractBuffersAllocator<DummyBuffersPool, DummyBuffer> {
using BaseType = NEO::AbstractBuffersAllocator<DummyBuffersPool, DummyBuffer>;
using BaseType::addNewBufferPool;
using BaseType::bufferPools;
using BaseType::drain;
using BaseType::isSizeWithinThreshold;
void drainUnderLock() {
auto lock = std::unique_lock<std::mutex>(this->mutex);
this->BaseType::drain();
}
};
using NEO::MockExecutionEnvironment;
@ -158,13 +171,13 @@ TEST_F(AbstractSmallBuffersTest, givenBuffersAllocatorWhenChunkOfMainStorageTrie
auto &chunksToFree2 = buffersAllocator.bufferPools[1].chunksToFree;
EXPECT_EQ(chunksToFree1.size(), 0u);
EXPECT_EQ(chunksToFree2.size(), 0u);
auto chunkSize = sizeof(DummyBuffer) / 8;
auto chunkOffset = sizeof(DummyBuffer) / 2;
auto chunkSize = DummyBuffersPool::chunkAlignment * 4;
auto chunkOffset = DummyBuffersPool::chunkAlignment;
buffersAllocator.tryFreeFromPoolBuffer(poolStorage2, chunkOffset, chunkSize);
EXPECT_EQ(chunksToFree1.size(), 0u);
EXPECT_EQ(chunksToFree2.size(), 1u);
auto [effectiveChunkOffset, size] = chunksToFree2[0];
EXPECT_EQ(effectiveChunkOffset, chunkOffset + DummyBuffersPool::startingOffset);
EXPECT_EQ(effectiveChunkOffset, chunkOffset);
EXPECT_EQ(size, chunkSize);
buffersAllocator.releaseSmallBufferPool();
@ -193,8 +206,8 @@ TEST_F(AbstractSmallBuffersTest, givenBuffersAllocatorWhenDrainingPoolsThenOnlyA
buffersAllocator.addNewBufferPool(std::move(pool1));
buffersAllocator.addNewBufferPool(std::move(pool2));
auto chunkSize = sizeof(DummyBuffer) / 16;
auto chunkOffset = sizeof(DummyBuffer) / 2;
auto chunkSize = DummyBuffersPool::chunkAlignment * 4;
auto chunkOffset = DummyBuffersPool::chunkAlignment;
for (size_t i = 0; i < 3; i++) {
auto exampleOffset = chunkOffset + i * chunkSize * 2;
buffersAllocator.tryFreeFromPoolBuffer(buffer1, exampleOffset, chunkSize);
@ -203,11 +216,103 @@ TEST_F(AbstractSmallBuffersTest, givenBuffersAllocatorWhenDrainingPoolsThenOnlyA
auto &chunksToFree1 = buffersAllocator.bufferPools[0].chunksToFree;
auto &chunksToFree2 = buffersAllocator.bufferPools[1].chunksToFree;
auto &freedChunks1 = buffersAllocator.bufferPools[0].freedChunks;
auto &freedChunks2 = buffersAllocator.bufferPools[1].freedChunks;
EXPECT_EQ(chunksToFree1.size(), 3u);
EXPECT_EQ(chunksToFree2.size(), 3u);
EXPECT_EQ(freedChunks1.size(), 0u);
EXPECT_EQ(freedChunks2.size(), 0u);
otherMemoryManager->deferAllocInUse = true;
buffersAllocator.drain();
buffersAllocator.drainUnderLock();
EXPECT_EQ(chunksToFree1.size(), 0u);
EXPECT_EQ(chunksToFree2.size(), 3u);
ASSERT_EQ(freedChunks1.size(), 3u);
EXPECT_EQ(freedChunks2.size(), 0u);
EXPECT_TRUE(buffersAllocator.bufferPools[0].onChunkFreeCalled);
EXPECT_FALSE(buffersAllocator.bufferPools[1].onChunkFreeCalled);
for (size_t i = 0; i < 3; i++) {
auto expectedOffset = chunkOffset + i * chunkSize * 2;
auto [freedOffset, freedSize] = freedChunks1[i];
EXPECT_EQ(expectedOffset, freedOffset);
EXPECT_EQ(chunkSize, freedSize);
}
}
TEST_F(AbstractSmallBuffersTest, givenBuffersAllocatorWhenDrainingPoolsThenOnChunkFreeIgnoredIfNotDefined) {
auto pool1 = DummyBuffersPool{this->memoryManager.get(), 0x0, nullptr};
pool1.mainStorage.reset(new DummyBuffer(testVal));
auto buffer1 = pool1.mainStorage.get();
pool1.chunkAllocator.reset(new NEO::HeapAllocator{DummyBuffersPool::startingOffset,
DummyBuffersPool::aggregatedSmallBuffersPoolSize,
DummyBuffersPool::chunkAlignment,
DummyBuffersPool::smallBufferThreshold});
auto buffersAllocator = DummyBuffersAllocator{};
buffersAllocator.addNewBufferPool(std::move(pool1));
auto chunkSize = DummyBuffersPool::chunkAlignment * 4;
auto chunkOffset = DummyBuffersPool::chunkAlignment;
for (size_t i = 0; i < 3; i++) {
auto exampleOffset = chunkOffset + i * chunkSize * 2;
buffersAllocator.tryFreeFromPoolBuffer(buffer1, exampleOffset, chunkSize);
}
auto &chunksToFree1 = buffersAllocator.bufferPools[0].chunksToFree;
auto &freedChunks1 = buffersAllocator.bufferPools[0].freedChunks;
EXPECT_EQ(chunksToFree1.size(), 3u);
EXPECT_EQ(freedChunks1.size(), 0u);
buffersAllocator.drainUnderLock();
EXPECT_EQ(chunksToFree1.size(), 0u);
EXPECT_EQ(freedChunks1.size(), 0u);
EXPECT_FALSE(buffersAllocator.bufferPools[0].onChunkFreeCalled);
}
TEST_F(AbstractSmallBuffersTest, givenBuffersAllocatorWhenDrainingPoolThenOffsetsPassedToChunkAllocatorAreShiftedProperly) {
struct ProxyHeapAllocator : public NEO::HeapAllocator {
using BaseType = NEO::HeapAllocator;
ProxyHeapAllocator(uint64_t address, uint64_t size, size_t allocationAlignment, size_t threshold)
: BaseType{address, size, allocationAlignment, threshold} {}
~ProxyHeapAllocator() override {
this->registeredOffsets.clear();
}
void free(uint64_t offset, size_t size) override {
this->registeredOffsets.push_back(offset);
this->BaseType::free(offset, size);
}
std::vector<uint64_t> registeredOffsets;
};
auto pool1 = DummyBuffersPool{this->memoryManager.get(), 0x0, nullptr};
pool1.mainStorage.reset(new DummyBuffer(testVal));
auto buffer1 = pool1.mainStorage.get();
pool1.chunkAllocator.reset(new ProxyHeapAllocator{DummyBuffersPool::startingOffset,
DummyBuffersPool::aggregatedSmallBuffersPoolSize,
DummyBuffersPool::chunkAlignment,
DummyBuffersPool::smallBufferThreshold});
auto buffersAllocator = DummyBuffersAllocator{};
buffersAllocator.addNewBufferPool(std::move(pool1));
auto chunkSize = DummyBuffersPool::chunkAlignment * 4;
auto exampleOffsets = std::array<size_t, 3>{0u, 0u, 0u};
for (size_t i = 0; i < 3; i++) {
exampleOffsets[i] = DummyBuffersPool::startingOffset + i * chunkSize * 2;
buffersAllocator.tryFreeFromPoolBuffer(buffer1, exampleOffsets[i], chunkSize);
}
auto &chunksToFree1 = buffersAllocator.bufferPools[0].chunksToFree;
EXPECT_EQ(chunksToFree1.size(), 3u);
buffersAllocator.drainUnderLock();
EXPECT_EQ(chunksToFree1.size(), 0u);
auto heapAllocator = static_cast<ProxyHeapAllocator *>(buffersAllocator.bufferPools[0].chunkAllocator.get());
ASSERT_EQ(heapAllocator->registeredOffsets.size(), 3u);
for (size_t i = 0; i < 3; i++) {
EXPECT_EQ(heapAllocator->registeredOffsets[i], exampleOffsets[i] + DummyBuffersPool::startingOffset);
}
}