From c7a971a28f68fd383645b355de7106057d3025d4 Mon Sep 17 00:00:00 2001 From: Maciej Bielski Date: Thu, 29 Jun 2023 15:15:50 +0000 Subject: [PATCH] feature: add optional onChunkFree callback to AbstractBuffersPool Instances returned by `getAllocationsVector()` in some cases cannot be freed (in the `malloc/new` sense) until the `drain()` function invokes `allocInUse()` on them. Plus, the `chunksToFree` container operates on pairs `{offset, size}`, not pointers, so such pair cannot be used to release allocations either. Provide an optional callback, which can be implemented by the custom pool derived from `AbstractBuffersPool`. This callback can be used, for example, to perform actual release of an allocation related to the currently processed chunk. Additionally, provide the `drain()` and `tryFreeFromPoolBuffer()` functions with pool-independent versions and keep the previous versions as defaults (for allocators with a single pool). The new versions allow reusing the code for cases when allocator has multiple pools. In both cases, there was no such needs so far but it arose when working on `IsaBuffersAllocator`. The latter is coming with future commits, but the shared code modifications are extracted as an independent step. Related-To: NEO-7788 Signed-off-by: Maciej Bielski --- opencl/source/context/context.cpp | 2 +- .../source/utilities/buffer_pool_allocator.h | 10 +- .../utilities/buffer_pool_allocator.inl | 34 ++++- shared/source/utilities/heap_allocator.h | 4 +- .../utilities/buffer_pool_allocator_tests.cpp | 123 ++++++++++++++++-- 5 files changed, 153 insertions(+), 20 deletions(-) diff --git a/opencl/source/context/context.cpp b/opencl/source/context/context.cpp index 1c2062c647..32123e84b4 100644 --- a/opencl/source/context/context.cpp +++ b/opencl/source/context/context.cpp @@ -504,7 +504,7 @@ bool Context::BufferPoolAllocator::isAggregatedSmallBuffersEnabled(Context *cont (isSupportedForSingleDeviceContexts && context->isSingleDeviceContext()); } -Context::BufferPool::BufferPool(Context *context) : BaseType(context->memoryManager) { +Context::BufferPool::BufferPool(Context *context) : BaseType(context->memoryManager, nullptr) { static constexpr cl_mem_flags flags{}; [[maybe_unused]] cl_int errcodeRet{}; Buffer::AdditionalBufferCreateArgs bufferCreateArgs{}; diff --git a/shared/source/utilities/buffer_pool_allocator.h b/shared/source/utilities/buffer_pool_allocator.h index 4291cedfd5..f9061031ce 100644 --- a/shared/source/utilities/buffer_pool_allocator.h +++ b/shared/source/utilities/buffer_pool_allocator.h @@ -7,6 +7,7 @@ #pragma once #include "shared/source/helpers/constants.h" +#include "shared/source/helpers/non_copyable_or_moveable.h" #include "shared/source/utilities/stackvec.h" #include @@ -31,7 +32,7 @@ struct SmallBuffersParams { }; template -struct AbstractBuffersPool : public SmallBuffersParams { +struct AbstractBuffersPool : public SmallBuffersParams, public NonCopyableClass { // The prototype of a function allocating the `mainStorage` is not specified. // That would be an unnecessary limitation here - it is completely up to derived class implementation. // Perhaps the allocating function needs to leverage `HeapAllocator::allocate()` and also @@ -43,8 +44,9 @@ struct AbstractBuffersPool : public SmallBuffersParams { using Params::smallBufferThreshold; using Params::startingOffset; using AllocsVecCRef = const StackVec &; + using OnChunkFreeCallback = void (PoolT::*)(uint64_t offset, size_t size); - AbstractBuffersPool(MemoryManager *memoryManager); + AbstractBuffersPool(MemoryManager *memoryManager, OnChunkFreeCallback onChunkFreeCallback); AbstractBuffersPool(AbstractBuffersPool &&bufferPool); void tryFreeFromPoolBuffer(BufferParentType *possiblePoolBuffer, size_t offset, size_t size); bool isPoolBuffer(const BufferParentType *buffer) const; @@ -60,6 +62,7 @@ struct AbstractBuffersPool : public SmallBuffersParams { std::unique_ptr mainStorage; std::unique_ptr chunkAllocator; std::vector> chunksToFree; + OnChunkFreeCallback onChunkFreeCallback = nullptr; }; template @@ -81,8 +84,11 @@ class AbstractBuffersAllocator : public SmallBuffersParams { protected: inline bool isSizeWithinThreshold(size_t size) const { return smallBufferThreshold >= size; } + void tryFreeFromPoolBuffer(BufferParentType *possiblePoolBuffer, size_t offset, size_t size, std::vector &bufferPoolsVec); void drain(); + void drain(std::vector &bufferPoolsVec); void addNewBufferPool(BuffersPoolType &&bufferPool); + void addNewBufferPool(BuffersPoolType &&bufferPool, std::vector &bufferPoolsVec); std::mutex mutex; std::vector bufferPools; diff --git a/shared/source/utilities/buffer_pool_allocator.inl b/shared/source/utilities/buffer_pool_allocator.inl index bc19971530..7f0206792a 100644 --- a/shared/source/utilities/buffer_pool_allocator.inl +++ b/shared/source/utilities/buffer_pool_allocator.inl @@ -14,7 +14,8 @@ namespace NEO { template -AbstractBuffersPool::AbstractBuffersPool(MemoryManager *mm) : memoryManager{mm} { +AbstractBuffersPool::AbstractBuffersPool(MemoryManager *memoryManager, OnChunkFreeCallback onChunkFreeCb) + : memoryManager{memoryManager}, onChunkFreeCallback{onChunkFreeCb} { static_assert(std::is_base_of_v); } @@ -22,12 +23,13 @@ template AbstractBuffersPool::AbstractBuffersPool(AbstractBuffersPool &&bufferPool) : memoryManager{bufferPool.memoryManager}, mainStorage{std::move(bufferPool.mainStorage)}, - chunkAllocator{std::move(bufferPool.chunkAllocator)} {} + chunkAllocator{std::move(bufferPool.chunkAllocator)}, + onChunkFreeCallback{bufferPool.onChunkFreeCallback} {} template void AbstractBuffersPool::tryFreeFromPoolBuffer(BufferParentType *possiblePoolBuffer, size_t offset, size_t size) { if (this->isPoolBuffer(possiblePoolBuffer)) { - this->chunksToFree.push_back({offset + startingOffset, size}); + this->chunksToFree.push_back({offset, size}); } } @@ -47,7 +49,10 @@ void AbstractBuffersPool::drain() { } } for (auto &chunk : this->chunksToFree) { - this->chunkAllocator->free(chunk.first, chunk.second); + this->chunkAllocator->free(chunk.first + startingOffset, chunk.second); + if (static_cast(this)->onChunkFreeCallback) { + (static_cast(this)->*onChunkFreeCallback)(chunk.first, chunk.second); + } } this->chunksToFree.clear(); } @@ -66,23 +71,38 @@ bool AbstractBuffersAllocator::is template void AbstractBuffersAllocator::tryFreeFromPoolBuffer(BufferParentType *possiblePoolBuffer, size_t offset, size_t size) { + this->tryFreeFromPoolBuffer(possiblePoolBuffer, offset, size, this->bufferPools); +} + +template +void AbstractBuffersAllocator::tryFreeFromPoolBuffer(BufferParentType *possiblePoolBuffer, size_t offset, size_t size, std::vector &bufferPoolsVec) { auto lock = std::unique_lock(this->mutex); - for (auto &bufferPool : this->bufferPools) { + for (auto &bufferPool : bufferPoolsVec) { bufferPool.tryFreeFromPoolBuffer(possiblePoolBuffer, offset, size); // NOLINT(clang-analyzer-cplusplus.NewDelete) } } template void AbstractBuffersAllocator::drain() { - for (auto &bufferPool : this->bufferPools) { + this->drain(this->bufferPools); +} + +template +void AbstractBuffersAllocator::drain(std::vector &bufferPoolsVec) { + for (auto &bufferPool : bufferPoolsVec) { bufferPool.drain(); } } template void AbstractBuffersAllocator::addNewBufferPool(BuffersPoolType &&bufferPool) { + this->addNewBufferPool(std::move(bufferPool), this->bufferPools); +} + +template +void AbstractBuffersAllocator::addNewBufferPool(BuffersPoolType &&bufferPool, std::vector &bufferPoolsVec) { if (bufferPool.mainStorage) { - this->bufferPools.push_back(std::move(bufferPool)); + bufferPoolsVec.push_back(std::move(bufferPool)); } } } // namespace NEO \ No newline at end of file diff --git a/shared/source/utilities/heap_allocator.h b/shared/source/utilities/heap_allocator.h index 6ea9efb853..636411e6b5 100644 --- a/shared/source/utilities/heap_allocator.h +++ b/shared/source/utilities/heap_allocator.h @@ -38,13 +38,15 @@ class HeapAllocator { freedChunksSmall.reserve(50); } + MOCKABLE_VIRTUAL ~HeapAllocator() = default; + uint64_t allocate(size_t &sizeToAllocate) { return allocateWithCustomAlignment(sizeToAllocate, 0u); } uint64_t allocateWithCustomAlignment(size_t &sizeToAllocate, size_t alignment); - void free(uint64_t ptr, size_t size); + MOCKABLE_VIRTUAL void free(uint64_t ptr, size_t size); uint64_t getLeftSize() const { return availableSize; diff --git a/shared/test/unit_test/utilities/buffer_pool_allocator_tests.cpp b/shared/test/unit_test/utilities/buffer_pool_allocator_tests.cpp index 4220d67216..11d8a86c44 100644 --- a/shared/test/unit_test/utilities/buffer_pool_allocator_tests.cpp +++ b/shared/test/unit_test/utilities/buffer_pool_allocator_tests.cpp @@ -13,7 +13,9 @@ #include "gtest/gtest.h" +#include #include +#include struct DummyBufferPool; @@ -34,27 +36,38 @@ struct DummyBuffersPool : public NEO::AbstractBuffersPool; static constexpr auto dummyPtr = 0xdeadbeef0000; - DummyBuffersPool(NEO::MemoryManager *memoryManager, uint32_t poolOffset) : BaseType(memoryManager) { + DummyBuffersPool(NEO::MemoryManager *memoryManager, uint32_t poolOffset, BaseType::OnChunkFreeCallback onChunkFreeCallback) + : BaseType{memoryManager, onChunkFreeCallback} { dummyAllocations.resize(2); dummyAllocations[0] = reinterpret_cast(poolOffset + dummyPtr); dummyAllocations[1] = nullptr; // makes sure nullptrs don't cause SEGFAULTs } - DummyBuffersPool(NEO::MemoryManager *memoryManager) : DummyBuffersPool(memoryManager, 0x0) {} + DummyBuffersPool(NEO::MemoryManager *memoryManager) : DummyBuffersPool(memoryManager, 0x0, &DummyBuffersPool::onChunkFree) {} BaseType::AllocsVecCRef getAllocationsVector() { return dummyAllocations; } + void onChunkFree(uint64_t offset, size_t size) { + this->freedChunks.push_back({offset, size}); + this->onChunkFreeCalled = true; + } StackVec dummyAllocations; + std::vector> freedChunks{}; + bool onChunkFreeCalled = false; }; struct DummyBuffersAllocator : public NEO::AbstractBuffersAllocator { using BaseType = NEO::AbstractBuffersAllocator; using BaseType::addNewBufferPool; using BaseType::bufferPools; - using BaseType::drain; using BaseType::isSizeWithinThreshold; + + void drainUnderLock() { + auto lock = std::unique_lock(this->mutex); + this->BaseType::drain(); + } }; using NEO::MockExecutionEnvironment; @@ -158,13 +171,13 @@ TEST_F(AbstractSmallBuffersTest, givenBuffersAllocatorWhenChunkOfMainStorageTrie auto &chunksToFree2 = buffersAllocator.bufferPools[1].chunksToFree; EXPECT_EQ(chunksToFree1.size(), 0u); EXPECT_EQ(chunksToFree2.size(), 0u); - auto chunkSize = sizeof(DummyBuffer) / 8; - auto chunkOffset = sizeof(DummyBuffer) / 2; + auto chunkSize = DummyBuffersPool::chunkAlignment * 4; + auto chunkOffset = DummyBuffersPool::chunkAlignment; buffersAllocator.tryFreeFromPoolBuffer(poolStorage2, chunkOffset, chunkSize); EXPECT_EQ(chunksToFree1.size(), 0u); EXPECT_EQ(chunksToFree2.size(), 1u); auto [effectiveChunkOffset, size] = chunksToFree2[0]; - EXPECT_EQ(effectiveChunkOffset, chunkOffset + DummyBuffersPool::startingOffset); + EXPECT_EQ(effectiveChunkOffset, chunkOffset); EXPECT_EQ(size, chunkSize); buffersAllocator.releaseSmallBufferPool(); @@ -193,8 +206,8 @@ TEST_F(AbstractSmallBuffersTest, givenBuffersAllocatorWhenDrainingPoolsThenOnlyA buffersAllocator.addNewBufferPool(std::move(pool1)); buffersAllocator.addNewBufferPool(std::move(pool2)); - auto chunkSize = sizeof(DummyBuffer) / 16; - auto chunkOffset = sizeof(DummyBuffer) / 2; + auto chunkSize = DummyBuffersPool::chunkAlignment * 4; + auto chunkOffset = DummyBuffersPool::chunkAlignment; for (size_t i = 0; i < 3; i++) { auto exampleOffset = chunkOffset + i * chunkSize * 2; buffersAllocator.tryFreeFromPoolBuffer(buffer1, exampleOffset, chunkSize); @@ -203,11 +216,103 @@ TEST_F(AbstractSmallBuffersTest, givenBuffersAllocatorWhenDrainingPoolsThenOnlyA auto &chunksToFree1 = buffersAllocator.bufferPools[0].chunksToFree; auto &chunksToFree2 = buffersAllocator.bufferPools[1].chunksToFree; + auto &freedChunks1 = buffersAllocator.bufferPools[0].freedChunks; + auto &freedChunks2 = buffersAllocator.bufferPools[1].freedChunks; EXPECT_EQ(chunksToFree1.size(), 3u); EXPECT_EQ(chunksToFree2.size(), 3u); + EXPECT_EQ(freedChunks1.size(), 0u); + EXPECT_EQ(freedChunks2.size(), 0u); otherMemoryManager->deferAllocInUse = true; - buffersAllocator.drain(); + buffersAllocator.drainUnderLock(); EXPECT_EQ(chunksToFree1.size(), 0u); EXPECT_EQ(chunksToFree2.size(), 3u); + ASSERT_EQ(freedChunks1.size(), 3u); + EXPECT_EQ(freedChunks2.size(), 0u); + EXPECT_TRUE(buffersAllocator.bufferPools[0].onChunkFreeCalled); + EXPECT_FALSE(buffersAllocator.bufferPools[1].onChunkFreeCalled); + for (size_t i = 0; i < 3; i++) { + auto expectedOffset = chunkOffset + i * chunkSize * 2; + auto [freedOffset, freedSize] = freedChunks1[i]; + EXPECT_EQ(expectedOffset, freedOffset); + EXPECT_EQ(chunkSize, freedSize); + } +} + +TEST_F(AbstractSmallBuffersTest, givenBuffersAllocatorWhenDrainingPoolsThenOnChunkFreeIgnoredIfNotDefined) { + auto pool1 = DummyBuffersPool{this->memoryManager.get(), 0x0, nullptr}; + pool1.mainStorage.reset(new DummyBuffer(testVal)); + auto buffer1 = pool1.mainStorage.get(); + pool1.chunkAllocator.reset(new NEO::HeapAllocator{DummyBuffersPool::startingOffset, + DummyBuffersPool::aggregatedSmallBuffersPoolSize, + DummyBuffersPool::chunkAlignment, + DummyBuffersPool::smallBufferThreshold}); + auto buffersAllocator = DummyBuffersAllocator{}; + buffersAllocator.addNewBufferPool(std::move(pool1)); + + auto chunkSize = DummyBuffersPool::chunkAlignment * 4; + auto chunkOffset = DummyBuffersPool::chunkAlignment; + for (size_t i = 0; i < 3; i++) { + auto exampleOffset = chunkOffset + i * chunkSize * 2; + buffersAllocator.tryFreeFromPoolBuffer(buffer1, exampleOffset, chunkSize); + } + + auto &chunksToFree1 = buffersAllocator.bufferPools[0].chunksToFree; + auto &freedChunks1 = buffersAllocator.bufferPools[0].freedChunks; + EXPECT_EQ(chunksToFree1.size(), 3u); + EXPECT_EQ(freedChunks1.size(), 0u); + + buffersAllocator.drainUnderLock(); + EXPECT_EQ(chunksToFree1.size(), 0u); + EXPECT_EQ(freedChunks1.size(), 0u); + EXPECT_FALSE(buffersAllocator.bufferPools[0].onChunkFreeCalled); +} + +TEST_F(AbstractSmallBuffersTest, givenBuffersAllocatorWhenDrainingPoolThenOffsetsPassedToChunkAllocatorAreShiftedProperly) { + + struct ProxyHeapAllocator : public NEO::HeapAllocator { + using BaseType = NEO::HeapAllocator; + + ProxyHeapAllocator(uint64_t address, uint64_t size, size_t allocationAlignment, size_t threshold) + : BaseType{address, size, allocationAlignment, threshold} {} + + ~ProxyHeapAllocator() override { + this->registeredOffsets.clear(); + } + + void free(uint64_t offset, size_t size) override { + this->registeredOffsets.push_back(offset); + this->BaseType::free(offset, size); + } + + std::vector registeredOffsets; + }; + + auto pool1 = DummyBuffersPool{this->memoryManager.get(), 0x0, nullptr}; + pool1.mainStorage.reset(new DummyBuffer(testVal)); + auto buffer1 = pool1.mainStorage.get(); + pool1.chunkAllocator.reset(new ProxyHeapAllocator{DummyBuffersPool::startingOffset, + DummyBuffersPool::aggregatedSmallBuffersPoolSize, + DummyBuffersPool::chunkAlignment, + DummyBuffersPool::smallBufferThreshold}); + auto buffersAllocator = DummyBuffersAllocator{}; + buffersAllocator.addNewBufferPool(std::move(pool1)); + + auto chunkSize = DummyBuffersPool::chunkAlignment * 4; + auto exampleOffsets = std::array{0u, 0u, 0u}; + for (size_t i = 0; i < 3; i++) { + exampleOffsets[i] = DummyBuffersPool::startingOffset + i * chunkSize * 2; + buffersAllocator.tryFreeFromPoolBuffer(buffer1, exampleOffsets[i], chunkSize); + } + + auto &chunksToFree1 = buffersAllocator.bufferPools[0].chunksToFree; + EXPECT_EQ(chunksToFree1.size(), 3u); + + buffersAllocator.drainUnderLock(); + EXPECT_EQ(chunksToFree1.size(), 0u); + auto heapAllocator = static_cast(buffersAllocator.bufferPools[0].chunkAllocator.get()); + ASSERT_EQ(heapAllocator->registeredOffsets.size(), 3u); + for (size_t i = 0; i < 3; i++) { + EXPECT_EQ(heapAllocator->registeredOffsets[i], exampleOffsets[i] + DummyBuffersPool::startingOffset); + } } \ No newline at end of file