From b7ecf99abb441fd022d799049fda5cf64ebe6bd6 Mon Sep 17 00:00:00 2001 From: Maciej Bielski Date: Thu, 29 Jun 2023 13:15:50 +0000 Subject: [PATCH] feature: add optional onChunkFree callback to AbstractBuffersPool Instances returned by `getAllocationsVector()` in some cases cannot be freed (in the `malloc/new` sense) until the `drain()` function invokes `allocInUse()` on them. Plus, the `chunksToFree` container operates on pairs `{offset, size}`, not pointers, so such pair cannot be used to release allocations either. Provide an optional callback, which can be implemented by the custom pool derived from `AbstractBuffersPool`. This callback can be used, for example, to perform actual release of an allocation related to the currently processed chunk. Additionally, provide the `drain()` and `tryFreeFromPoolBuffer()` functions with pool-independent versions and keep the previous versions as defaults (for allocators with a single pool). The new versions allow reusing the code for cases when allocator has multiple pools. In both cases, there was no such needs so far but it arose when working on `IsaBuffersAllocator`. The latter is coming with future commits, but the shared code modifications are extracted as an independent step. Related-To: NEO-7788 Signed-off-by: Maciej Bielski --- opencl/source/context/context.cpp | 2 +- .../source/utilities/buffer_pool_allocator.h | 10 ++- .../utilities/buffer_pool_allocator.inl | 32 ++++++-- .../utilities/buffer_pool_allocator_tests.cpp | 74 ++++++++++++++++--- 4 files changed, 99 insertions(+), 19 deletions(-) diff --git a/opencl/source/context/context.cpp b/opencl/source/context/context.cpp index 1c2062c647..32123e84b4 100644 --- a/opencl/source/context/context.cpp +++ b/opencl/source/context/context.cpp @@ -504,7 +504,7 @@ bool Context::BufferPoolAllocator::isAggregatedSmallBuffersEnabled(Context *cont (isSupportedForSingleDeviceContexts && context->isSingleDeviceContext()); } -Context::BufferPool::BufferPool(Context *context) : BaseType(context->memoryManager) { +Context::BufferPool::BufferPool(Context *context) : BaseType(context->memoryManager, nullptr) { static constexpr cl_mem_flags flags{}; [[maybe_unused]] cl_int errcodeRet{}; Buffer::AdditionalBufferCreateArgs bufferCreateArgs{}; diff --git a/shared/source/utilities/buffer_pool_allocator.h b/shared/source/utilities/buffer_pool_allocator.h index 4291cedfd5..f9061031ce 100644 --- a/shared/source/utilities/buffer_pool_allocator.h +++ b/shared/source/utilities/buffer_pool_allocator.h @@ -7,6 +7,7 @@ #pragma once #include "shared/source/helpers/constants.h" +#include "shared/source/helpers/non_copyable_or_moveable.h" #include "shared/source/utilities/stackvec.h" #include @@ -31,7 +32,7 @@ struct SmallBuffersParams { }; template -struct AbstractBuffersPool : public SmallBuffersParams { +struct AbstractBuffersPool : public SmallBuffersParams, public NonCopyableClass { // The prototype of a function allocating the `mainStorage` is not specified. // That would be an unnecessary limitation here - it is completely up to derived class implementation. // Perhaps the allocating function needs to leverage `HeapAllocator::allocate()` and also @@ -43,8 +44,9 @@ struct AbstractBuffersPool : public SmallBuffersParams { using Params::smallBufferThreshold; using Params::startingOffset; using AllocsVecCRef = const StackVec &; + using OnChunkFreeCallback = void (PoolT::*)(uint64_t offset, size_t size); - AbstractBuffersPool(MemoryManager *memoryManager); + AbstractBuffersPool(MemoryManager *memoryManager, OnChunkFreeCallback onChunkFreeCallback); AbstractBuffersPool(AbstractBuffersPool &&bufferPool); void tryFreeFromPoolBuffer(BufferParentType *possiblePoolBuffer, size_t offset, size_t size); bool isPoolBuffer(const BufferParentType *buffer) const; @@ -60,6 +62,7 @@ struct AbstractBuffersPool : public SmallBuffersParams { std::unique_ptr mainStorage; std::unique_ptr chunkAllocator; std::vector> chunksToFree; + OnChunkFreeCallback onChunkFreeCallback = nullptr; }; template @@ -81,8 +84,11 @@ class AbstractBuffersAllocator : public SmallBuffersParams { protected: inline bool isSizeWithinThreshold(size_t size) const { return smallBufferThreshold >= size; } + void tryFreeFromPoolBuffer(BufferParentType *possiblePoolBuffer, size_t offset, size_t size, std::vector &bufferPoolsVec); void drain(); + void drain(std::vector &bufferPoolsVec); void addNewBufferPool(BuffersPoolType &&bufferPool); + void addNewBufferPool(BuffersPoolType &&bufferPool, std::vector &bufferPoolsVec); std::mutex mutex; std::vector bufferPools; diff --git a/shared/source/utilities/buffer_pool_allocator.inl b/shared/source/utilities/buffer_pool_allocator.inl index bc19971530..3355e65f7c 100644 --- a/shared/source/utilities/buffer_pool_allocator.inl +++ b/shared/source/utilities/buffer_pool_allocator.inl @@ -14,7 +14,8 @@ namespace NEO { template -AbstractBuffersPool::AbstractBuffersPool(MemoryManager *mm) : memoryManager{mm} { +AbstractBuffersPool::AbstractBuffersPool(MemoryManager *memoryManager, OnChunkFreeCallback onChunkFreeCb) + : memoryManager{memoryManager}, onChunkFreeCallback{onChunkFreeCb} { static_assert(std::is_base_of_v); } @@ -22,12 +23,13 @@ template AbstractBuffersPool::AbstractBuffersPool(AbstractBuffersPool &&bufferPool) : memoryManager{bufferPool.memoryManager}, mainStorage{std::move(bufferPool.mainStorage)}, - chunkAllocator{std::move(bufferPool.chunkAllocator)} {} + chunkAllocator{std::move(bufferPool.chunkAllocator)}, + onChunkFreeCallback{bufferPool.onChunkFreeCallback} {} template void AbstractBuffersPool::tryFreeFromPoolBuffer(BufferParentType *possiblePoolBuffer, size_t offset, size_t size) { if (this->isPoolBuffer(possiblePoolBuffer)) { - this->chunksToFree.push_back({offset + startingOffset, size}); + this->chunksToFree.push_back({offset, size}); } } @@ -48,6 +50,9 @@ void AbstractBuffersPool::drain() { } for (auto &chunk : this->chunksToFree) { this->chunkAllocator->free(chunk.first, chunk.second); + if (static_cast(this)->onChunkFreeCallback) { + (static_cast(this)->*onChunkFreeCallback)(chunk.first, chunk.second); + } } this->chunksToFree.clear(); } @@ -66,23 +71,38 @@ bool AbstractBuffersAllocator::is template void AbstractBuffersAllocator::tryFreeFromPoolBuffer(BufferParentType *possiblePoolBuffer, size_t offset, size_t size) { + this->tryFreeFromPoolBuffer(possiblePoolBuffer, offset, size, this->bufferPools); +} + +template +void AbstractBuffersAllocator::tryFreeFromPoolBuffer(BufferParentType *possiblePoolBuffer, size_t offset, size_t size, std::vector &bufferPoolsVec) { auto lock = std::unique_lock(this->mutex); - for (auto &bufferPool : this->bufferPools) { + for (auto &bufferPool : bufferPoolsVec) { bufferPool.tryFreeFromPoolBuffer(possiblePoolBuffer, offset, size); // NOLINT(clang-analyzer-cplusplus.NewDelete) } } template void AbstractBuffersAllocator::drain() { - for (auto &bufferPool : this->bufferPools) { + this->drain(this->bufferPools); +} + +template +void AbstractBuffersAllocator::drain(std::vector &bufferPoolsVec) { + for (auto &bufferPool : bufferPoolsVec) { bufferPool.drain(); } } template void AbstractBuffersAllocator::addNewBufferPool(BuffersPoolType &&bufferPool) { + this->addNewBufferPool(std::move(bufferPool), this->bufferPools); +} + +template +void AbstractBuffersAllocator::addNewBufferPool(BuffersPoolType &&bufferPool, std::vector &bufferPoolsVec) { if (bufferPool.mainStorage) { - this->bufferPools.push_back(std::move(bufferPool)); + bufferPoolsVec.push_back(std::move(bufferPool)); } } } // namespace NEO \ No newline at end of file diff --git a/shared/test/unit_test/utilities/buffer_pool_allocator_tests.cpp b/shared/test/unit_test/utilities/buffer_pool_allocator_tests.cpp index 4220d67216..c78d099c7d 100644 --- a/shared/test/unit_test/utilities/buffer_pool_allocator_tests.cpp +++ b/shared/test/unit_test/utilities/buffer_pool_allocator_tests.cpp @@ -34,27 +34,38 @@ struct DummyBuffersPool : public NEO::AbstractBuffersPool; static constexpr auto dummyPtr = 0xdeadbeef0000; - DummyBuffersPool(NEO::MemoryManager *memoryManager, uint32_t poolOffset) : BaseType(memoryManager) { + DummyBuffersPool(NEO::MemoryManager *memoryManager, uint32_t poolOffset, BaseType::OnChunkFreeCallback onChunkFreeCallback) + : BaseType{memoryManager, onChunkFreeCallback} { dummyAllocations.resize(2); dummyAllocations[0] = reinterpret_cast(poolOffset + dummyPtr); dummyAllocations[1] = nullptr; // makes sure nullptrs don't cause SEGFAULTs } - DummyBuffersPool(NEO::MemoryManager *memoryManager) : DummyBuffersPool(memoryManager, 0x0) {} + DummyBuffersPool(NEO::MemoryManager *memoryManager) : DummyBuffersPool(memoryManager, 0x0, &DummyBuffersPool::onChunkFree) {} BaseType::AllocsVecCRef getAllocationsVector() { return dummyAllocations; } + void onChunkFree(uint64_t offset, size_t size) { + this->freedChunks.push_back({offset, size}); + this->onChunkFreeCalled = true; + } StackVec dummyAllocations; + std::vector> freedChunks{}; + bool onChunkFreeCalled = false; }; struct DummyBuffersAllocator : public NEO::AbstractBuffersAllocator { using BaseType = NEO::AbstractBuffersAllocator; using BaseType::addNewBufferPool; using BaseType::bufferPools; - using BaseType::drain; using BaseType::isSizeWithinThreshold; + + void drainUnderLock() { + auto lock = std::unique_lock(this->mutex); + this->BaseType::drain(); + } }; using NEO::MockExecutionEnvironment; @@ -158,13 +169,13 @@ TEST_F(AbstractSmallBuffersTest, givenBuffersAllocatorWhenChunkOfMainStorageTrie auto &chunksToFree2 = buffersAllocator.bufferPools[1].chunksToFree; EXPECT_EQ(chunksToFree1.size(), 0u); EXPECT_EQ(chunksToFree2.size(), 0u); - auto chunkSize = sizeof(DummyBuffer) / 8; - auto chunkOffset = sizeof(DummyBuffer) / 2; + auto chunkSize = DummyBuffersPool::chunkAlignment * 4; + auto chunkOffset = DummyBuffersPool::chunkAlignment; buffersAllocator.tryFreeFromPoolBuffer(poolStorage2, chunkOffset, chunkSize); EXPECT_EQ(chunksToFree1.size(), 0u); EXPECT_EQ(chunksToFree2.size(), 1u); auto [effectiveChunkOffset, size] = chunksToFree2[0]; - EXPECT_EQ(effectiveChunkOffset, chunkOffset + DummyBuffersPool::startingOffset); + EXPECT_EQ(effectiveChunkOffset, chunkOffset); EXPECT_EQ(size, chunkSize); buffersAllocator.releaseSmallBufferPool(); @@ -193,8 +204,8 @@ TEST_F(AbstractSmallBuffersTest, givenBuffersAllocatorWhenDrainingPoolsThenOnlyA buffersAllocator.addNewBufferPool(std::move(pool1)); buffersAllocator.addNewBufferPool(std::move(pool2)); - auto chunkSize = sizeof(DummyBuffer) / 16; - auto chunkOffset = sizeof(DummyBuffer) / 2; + auto chunkSize = DummyBuffersPool::chunkAlignment * 4; + auto chunkOffset = DummyBuffersPool::chunkAlignment; for (size_t i = 0; i < 3; i++) { auto exampleOffset = chunkOffset + i * chunkSize * 2; buffersAllocator.tryFreeFromPoolBuffer(buffer1, exampleOffset, chunkSize); @@ -203,11 +214,54 @@ TEST_F(AbstractSmallBuffersTest, givenBuffersAllocatorWhenDrainingPoolsThenOnlyA auto &chunksToFree1 = buffersAllocator.bufferPools[0].chunksToFree; auto &chunksToFree2 = buffersAllocator.bufferPools[1].chunksToFree; + auto &freedChunks1 = buffersAllocator.bufferPools[0].freedChunks; + auto &freedChunks2 = buffersAllocator.bufferPools[1].freedChunks; EXPECT_EQ(chunksToFree1.size(), 3u); EXPECT_EQ(chunksToFree2.size(), 3u); + EXPECT_EQ(freedChunks1.size(), 0u); + EXPECT_EQ(freedChunks2.size(), 0u); otherMemoryManager->deferAllocInUse = true; - buffersAllocator.drain(); + buffersAllocator.drainUnderLock(); EXPECT_EQ(chunksToFree1.size(), 0u); EXPECT_EQ(chunksToFree2.size(), 3u); -} \ No newline at end of file + ASSERT_EQ(freedChunks1.size(), 3u); + EXPECT_EQ(freedChunks2.size(), 0u); + EXPECT_TRUE(buffersAllocator.bufferPools[0].onChunkFreeCalled); + EXPECT_FALSE(buffersAllocator.bufferPools[1].onChunkFreeCalled); + for (size_t i = 0; i < 3; i++) { + auto expectedOffset = chunkOffset + i * chunkSize * 2; + auto [freedOffset, freedSize] = freedChunks1[i]; + EXPECT_EQ(expectedOffset, freedOffset); + EXPECT_EQ(chunkSize, freedSize); + } +} + +TEST_F(AbstractSmallBuffersTest, givenBuffersAllocatorWhenDrainingPoolsThenOnChunkFreeIgnoredIfNotDefined) { + auto pool1 = DummyBuffersPool{this->memoryManager.get(), 0x0, nullptr}; + pool1.mainStorage.reset(new DummyBuffer(testVal)); + auto buffer1 = pool1.mainStorage.get(); + pool1.chunkAllocator.reset(new NEO::HeapAllocator{DummyBuffersPool::startingOffset, + DummyBuffersPool::aggregatedSmallBuffersPoolSize, + DummyBuffersPool::chunkAlignment, + DummyBuffersPool::smallBufferThreshold}); + auto buffersAllocator = DummyBuffersAllocator{}; + buffersAllocator.addNewBufferPool(std::move(pool1)); + + auto chunkSize = DummyBuffersPool::chunkAlignment * 4; + auto chunkOffset = DummyBuffersPool::chunkAlignment; + for (size_t i = 0; i < 3; i++) { + auto exampleOffset = chunkOffset + i * chunkSize * 2; + buffersAllocator.tryFreeFromPoolBuffer(buffer1, exampleOffset, chunkSize); + } + + auto &chunksToFree1 = buffersAllocator.bufferPools[0].chunksToFree; + auto &freedChunks1 = buffersAllocator.bufferPools[0].freedChunks; + EXPECT_EQ(chunksToFree1.size(), 3u); + EXPECT_EQ(freedChunks1.size(), 0u); + + buffersAllocator.drainUnderLock(); + EXPECT_EQ(chunksToFree1.size(), 0u); + EXPECT_EQ(freedChunks1.size(), 0u); + EXPECT_FALSE(buffersAllocator.bufferPools[0].onChunkFreeCalled); +}