diff --git a/opencl/source/context/context.cpp b/opencl/source/context/context.cpp index 1c2062c647..32123e84b4 100644 --- a/opencl/source/context/context.cpp +++ b/opencl/source/context/context.cpp @@ -504,7 +504,7 @@ bool Context::BufferPoolAllocator::isAggregatedSmallBuffersEnabled(Context *cont (isSupportedForSingleDeviceContexts && context->isSingleDeviceContext()); } -Context::BufferPool::BufferPool(Context *context) : BaseType(context->memoryManager) { +Context::BufferPool::BufferPool(Context *context) : BaseType(context->memoryManager, nullptr) { static constexpr cl_mem_flags flags{}; [[maybe_unused]] cl_int errcodeRet{}; Buffer::AdditionalBufferCreateArgs bufferCreateArgs{}; diff --git a/shared/source/utilities/buffer_pool_allocator.h b/shared/source/utilities/buffer_pool_allocator.h index 4291cedfd5..f9061031ce 100644 --- a/shared/source/utilities/buffer_pool_allocator.h +++ b/shared/source/utilities/buffer_pool_allocator.h @@ -7,6 +7,7 @@ #pragma once #include "shared/source/helpers/constants.h" +#include "shared/source/helpers/non_copyable_or_moveable.h" #include "shared/source/utilities/stackvec.h" #include @@ -31,7 +32,7 @@ struct SmallBuffersParams { }; template -struct AbstractBuffersPool : public SmallBuffersParams { +struct AbstractBuffersPool : public SmallBuffersParams, public NonCopyableClass { // The prototype of a function allocating the `mainStorage` is not specified. // That would be an unnecessary limitation here - it is completely up to derived class implementation. // Perhaps the allocating function needs to leverage `HeapAllocator::allocate()` and also @@ -43,8 +44,9 @@ struct AbstractBuffersPool : public SmallBuffersParams { using Params::smallBufferThreshold; using Params::startingOffset; using AllocsVecCRef = const StackVec &; + using OnChunkFreeCallback = void (PoolT::*)(uint64_t offset, size_t size); - AbstractBuffersPool(MemoryManager *memoryManager); + AbstractBuffersPool(MemoryManager *memoryManager, OnChunkFreeCallback onChunkFreeCallback); AbstractBuffersPool(AbstractBuffersPool &&bufferPool); void tryFreeFromPoolBuffer(BufferParentType *possiblePoolBuffer, size_t offset, size_t size); bool isPoolBuffer(const BufferParentType *buffer) const; @@ -60,6 +62,7 @@ struct AbstractBuffersPool : public SmallBuffersParams { std::unique_ptr mainStorage; std::unique_ptr chunkAllocator; std::vector> chunksToFree; + OnChunkFreeCallback onChunkFreeCallback = nullptr; }; template @@ -81,8 +84,11 @@ class AbstractBuffersAllocator : public SmallBuffersParams { protected: inline bool isSizeWithinThreshold(size_t size) const { return smallBufferThreshold >= size; } + void tryFreeFromPoolBuffer(BufferParentType *possiblePoolBuffer, size_t offset, size_t size, std::vector &bufferPoolsVec); void drain(); + void drain(std::vector &bufferPoolsVec); void addNewBufferPool(BuffersPoolType &&bufferPool); + void addNewBufferPool(BuffersPoolType &&bufferPool, std::vector &bufferPoolsVec); std::mutex mutex; std::vector bufferPools; diff --git a/shared/source/utilities/buffer_pool_allocator.inl b/shared/source/utilities/buffer_pool_allocator.inl index bc19971530..7f0206792a 100644 --- a/shared/source/utilities/buffer_pool_allocator.inl +++ b/shared/source/utilities/buffer_pool_allocator.inl @@ -14,7 +14,8 @@ namespace NEO { template -AbstractBuffersPool::AbstractBuffersPool(MemoryManager *mm) : memoryManager{mm} { +AbstractBuffersPool::AbstractBuffersPool(MemoryManager *memoryManager, OnChunkFreeCallback onChunkFreeCb) + : memoryManager{memoryManager}, onChunkFreeCallback{onChunkFreeCb} { static_assert(std::is_base_of_v); } @@ -22,12 +23,13 @@ template AbstractBuffersPool::AbstractBuffersPool(AbstractBuffersPool &&bufferPool) : memoryManager{bufferPool.memoryManager}, mainStorage{std::move(bufferPool.mainStorage)}, - chunkAllocator{std::move(bufferPool.chunkAllocator)} {} + chunkAllocator{std::move(bufferPool.chunkAllocator)}, + onChunkFreeCallback{bufferPool.onChunkFreeCallback} {} template void AbstractBuffersPool::tryFreeFromPoolBuffer(BufferParentType *possiblePoolBuffer, size_t offset, size_t size) { if (this->isPoolBuffer(possiblePoolBuffer)) { - this->chunksToFree.push_back({offset + startingOffset, size}); + this->chunksToFree.push_back({offset, size}); } } @@ -47,7 +49,10 @@ void AbstractBuffersPool::drain() { } } for (auto &chunk : this->chunksToFree) { - this->chunkAllocator->free(chunk.first, chunk.second); + this->chunkAllocator->free(chunk.first + startingOffset, chunk.second); + if (static_cast(this)->onChunkFreeCallback) { + (static_cast(this)->*onChunkFreeCallback)(chunk.first, chunk.second); + } } this->chunksToFree.clear(); } @@ -66,23 +71,38 @@ bool AbstractBuffersAllocator::is template void AbstractBuffersAllocator::tryFreeFromPoolBuffer(BufferParentType *possiblePoolBuffer, size_t offset, size_t size) { + this->tryFreeFromPoolBuffer(possiblePoolBuffer, offset, size, this->bufferPools); +} + +template +void AbstractBuffersAllocator::tryFreeFromPoolBuffer(BufferParentType *possiblePoolBuffer, size_t offset, size_t size, std::vector &bufferPoolsVec) { auto lock = std::unique_lock(this->mutex); - for (auto &bufferPool : this->bufferPools) { + for (auto &bufferPool : bufferPoolsVec) { bufferPool.tryFreeFromPoolBuffer(possiblePoolBuffer, offset, size); // NOLINT(clang-analyzer-cplusplus.NewDelete) } } template void AbstractBuffersAllocator::drain() { - for (auto &bufferPool : this->bufferPools) { + this->drain(this->bufferPools); +} + +template +void AbstractBuffersAllocator::drain(std::vector &bufferPoolsVec) { + for (auto &bufferPool : bufferPoolsVec) { bufferPool.drain(); } } template void AbstractBuffersAllocator::addNewBufferPool(BuffersPoolType &&bufferPool) { + this->addNewBufferPool(std::move(bufferPool), this->bufferPools); +} + +template +void AbstractBuffersAllocator::addNewBufferPool(BuffersPoolType &&bufferPool, std::vector &bufferPoolsVec) { if (bufferPool.mainStorage) { - this->bufferPools.push_back(std::move(bufferPool)); + bufferPoolsVec.push_back(std::move(bufferPool)); } } } // namespace NEO \ No newline at end of file diff --git a/shared/source/utilities/heap_allocator.h b/shared/source/utilities/heap_allocator.h index 6ea9efb853..636411e6b5 100644 --- a/shared/source/utilities/heap_allocator.h +++ b/shared/source/utilities/heap_allocator.h @@ -38,13 +38,15 @@ class HeapAllocator { freedChunksSmall.reserve(50); } + MOCKABLE_VIRTUAL ~HeapAllocator() = default; + uint64_t allocate(size_t &sizeToAllocate) { return allocateWithCustomAlignment(sizeToAllocate, 0u); } uint64_t allocateWithCustomAlignment(size_t &sizeToAllocate, size_t alignment); - void free(uint64_t ptr, size_t size); + MOCKABLE_VIRTUAL void free(uint64_t ptr, size_t size); uint64_t getLeftSize() const { return availableSize; diff --git a/shared/test/unit_test/utilities/buffer_pool_allocator_tests.cpp b/shared/test/unit_test/utilities/buffer_pool_allocator_tests.cpp index 4220d67216..11d8a86c44 100644 --- a/shared/test/unit_test/utilities/buffer_pool_allocator_tests.cpp +++ b/shared/test/unit_test/utilities/buffer_pool_allocator_tests.cpp @@ -13,7 +13,9 @@ #include "gtest/gtest.h" +#include #include +#include struct DummyBufferPool; @@ -34,27 +36,38 @@ struct DummyBuffersPool : public NEO::AbstractBuffersPool; static constexpr auto dummyPtr = 0xdeadbeef0000; - DummyBuffersPool(NEO::MemoryManager *memoryManager, uint32_t poolOffset) : BaseType(memoryManager) { + DummyBuffersPool(NEO::MemoryManager *memoryManager, uint32_t poolOffset, BaseType::OnChunkFreeCallback onChunkFreeCallback) + : BaseType{memoryManager, onChunkFreeCallback} { dummyAllocations.resize(2); dummyAllocations[0] = reinterpret_cast(poolOffset + dummyPtr); dummyAllocations[1] = nullptr; // makes sure nullptrs don't cause SEGFAULTs } - DummyBuffersPool(NEO::MemoryManager *memoryManager) : DummyBuffersPool(memoryManager, 0x0) {} + DummyBuffersPool(NEO::MemoryManager *memoryManager) : DummyBuffersPool(memoryManager, 0x0, &DummyBuffersPool::onChunkFree) {} BaseType::AllocsVecCRef getAllocationsVector() { return dummyAllocations; } + void onChunkFree(uint64_t offset, size_t size) { + this->freedChunks.push_back({offset, size}); + this->onChunkFreeCalled = true; + } StackVec dummyAllocations; + std::vector> freedChunks{}; + bool onChunkFreeCalled = false; }; struct DummyBuffersAllocator : public NEO::AbstractBuffersAllocator { using BaseType = NEO::AbstractBuffersAllocator; using BaseType::addNewBufferPool; using BaseType::bufferPools; - using BaseType::drain; using BaseType::isSizeWithinThreshold; + + void drainUnderLock() { + auto lock = std::unique_lock(this->mutex); + this->BaseType::drain(); + } }; using NEO::MockExecutionEnvironment; @@ -158,13 +171,13 @@ TEST_F(AbstractSmallBuffersTest, givenBuffersAllocatorWhenChunkOfMainStorageTrie auto &chunksToFree2 = buffersAllocator.bufferPools[1].chunksToFree; EXPECT_EQ(chunksToFree1.size(), 0u); EXPECT_EQ(chunksToFree2.size(), 0u); - auto chunkSize = sizeof(DummyBuffer) / 8; - auto chunkOffset = sizeof(DummyBuffer) / 2; + auto chunkSize = DummyBuffersPool::chunkAlignment * 4; + auto chunkOffset = DummyBuffersPool::chunkAlignment; buffersAllocator.tryFreeFromPoolBuffer(poolStorage2, chunkOffset, chunkSize); EXPECT_EQ(chunksToFree1.size(), 0u); EXPECT_EQ(chunksToFree2.size(), 1u); auto [effectiveChunkOffset, size] = chunksToFree2[0]; - EXPECT_EQ(effectiveChunkOffset, chunkOffset + DummyBuffersPool::startingOffset); + EXPECT_EQ(effectiveChunkOffset, chunkOffset); EXPECT_EQ(size, chunkSize); buffersAllocator.releaseSmallBufferPool(); @@ -193,8 +206,8 @@ TEST_F(AbstractSmallBuffersTest, givenBuffersAllocatorWhenDrainingPoolsThenOnlyA buffersAllocator.addNewBufferPool(std::move(pool1)); buffersAllocator.addNewBufferPool(std::move(pool2)); - auto chunkSize = sizeof(DummyBuffer) / 16; - auto chunkOffset = sizeof(DummyBuffer) / 2; + auto chunkSize = DummyBuffersPool::chunkAlignment * 4; + auto chunkOffset = DummyBuffersPool::chunkAlignment; for (size_t i = 0; i < 3; i++) { auto exampleOffset = chunkOffset + i * chunkSize * 2; buffersAllocator.tryFreeFromPoolBuffer(buffer1, exampleOffset, chunkSize); @@ -203,11 +216,103 @@ TEST_F(AbstractSmallBuffersTest, givenBuffersAllocatorWhenDrainingPoolsThenOnlyA auto &chunksToFree1 = buffersAllocator.bufferPools[0].chunksToFree; auto &chunksToFree2 = buffersAllocator.bufferPools[1].chunksToFree; + auto &freedChunks1 = buffersAllocator.bufferPools[0].freedChunks; + auto &freedChunks2 = buffersAllocator.bufferPools[1].freedChunks; EXPECT_EQ(chunksToFree1.size(), 3u); EXPECT_EQ(chunksToFree2.size(), 3u); + EXPECT_EQ(freedChunks1.size(), 0u); + EXPECT_EQ(freedChunks2.size(), 0u); otherMemoryManager->deferAllocInUse = true; - buffersAllocator.drain(); + buffersAllocator.drainUnderLock(); EXPECT_EQ(chunksToFree1.size(), 0u); EXPECT_EQ(chunksToFree2.size(), 3u); + ASSERT_EQ(freedChunks1.size(), 3u); + EXPECT_EQ(freedChunks2.size(), 0u); + EXPECT_TRUE(buffersAllocator.bufferPools[0].onChunkFreeCalled); + EXPECT_FALSE(buffersAllocator.bufferPools[1].onChunkFreeCalled); + for (size_t i = 0; i < 3; i++) { + auto expectedOffset = chunkOffset + i * chunkSize * 2; + auto [freedOffset, freedSize] = freedChunks1[i]; + EXPECT_EQ(expectedOffset, freedOffset); + EXPECT_EQ(chunkSize, freedSize); + } +} + +TEST_F(AbstractSmallBuffersTest, givenBuffersAllocatorWhenDrainingPoolsThenOnChunkFreeIgnoredIfNotDefined) { + auto pool1 = DummyBuffersPool{this->memoryManager.get(), 0x0, nullptr}; + pool1.mainStorage.reset(new DummyBuffer(testVal)); + auto buffer1 = pool1.mainStorage.get(); + pool1.chunkAllocator.reset(new NEO::HeapAllocator{DummyBuffersPool::startingOffset, + DummyBuffersPool::aggregatedSmallBuffersPoolSize, + DummyBuffersPool::chunkAlignment, + DummyBuffersPool::smallBufferThreshold}); + auto buffersAllocator = DummyBuffersAllocator{}; + buffersAllocator.addNewBufferPool(std::move(pool1)); + + auto chunkSize = DummyBuffersPool::chunkAlignment * 4; + auto chunkOffset = DummyBuffersPool::chunkAlignment; + for (size_t i = 0; i < 3; i++) { + auto exampleOffset = chunkOffset + i * chunkSize * 2; + buffersAllocator.tryFreeFromPoolBuffer(buffer1, exampleOffset, chunkSize); + } + + auto &chunksToFree1 = buffersAllocator.bufferPools[0].chunksToFree; + auto &freedChunks1 = buffersAllocator.bufferPools[0].freedChunks; + EXPECT_EQ(chunksToFree1.size(), 3u); + EXPECT_EQ(freedChunks1.size(), 0u); + + buffersAllocator.drainUnderLock(); + EXPECT_EQ(chunksToFree1.size(), 0u); + EXPECT_EQ(freedChunks1.size(), 0u); + EXPECT_FALSE(buffersAllocator.bufferPools[0].onChunkFreeCalled); +} + +TEST_F(AbstractSmallBuffersTest, givenBuffersAllocatorWhenDrainingPoolThenOffsetsPassedToChunkAllocatorAreShiftedProperly) { + + struct ProxyHeapAllocator : public NEO::HeapAllocator { + using BaseType = NEO::HeapAllocator; + + ProxyHeapAllocator(uint64_t address, uint64_t size, size_t allocationAlignment, size_t threshold) + : BaseType{address, size, allocationAlignment, threshold} {} + + ~ProxyHeapAllocator() override { + this->registeredOffsets.clear(); + } + + void free(uint64_t offset, size_t size) override { + this->registeredOffsets.push_back(offset); + this->BaseType::free(offset, size); + } + + std::vector registeredOffsets; + }; + + auto pool1 = DummyBuffersPool{this->memoryManager.get(), 0x0, nullptr}; + pool1.mainStorage.reset(new DummyBuffer(testVal)); + auto buffer1 = pool1.mainStorage.get(); + pool1.chunkAllocator.reset(new ProxyHeapAllocator{DummyBuffersPool::startingOffset, + DummyBuffersPool::aggregatedSmallBuffersPoolSize, + DummyBuffersPool::chunkAlignment, + DummyBuffersPool::smallBufferThreshold}); + auto buffersAllocator = DummyBuffersAllocator{}; + buffersAllocator.addNewBufferPool(std::move(pool1)); + + auto chunkSize = DummyBuffersPool::chunkAlignment * 4; + auto exampleOffsets = std::array{0u, 0u, 0u}; + for (size_t i = 0; i < 3; i++) { + exampleOffsets[i] = DummyBuffersPool::startingOffset + i * chunkSize * 2; + buffersAllocator.tryFreeFromPoolBuffer(buffer1, exampleOffsets[i], chunkSize); + } + + auto &chunksToFree1 = buffersAllocator.bufferPools[0].chunksToFree; + EXPECT_EQ(chunksToFree1.size(), 3u); + + buffersAllocator.drainUnderLock(); + EXPECT_EQ(chunksToFree1.size(), 0u); + auto heapAllocator = static_cast(buffersAllocator.bufferPools[0].chunkAllocator.get()); + ASSERT_EQ(heapAllocator->registeredOffsets.size(), 3u); + for (size_t i = 0; i < 3; i++) { + EXPECT_EQ(heapAllocator->registeredOffsets[i], exampleOffsets[i] + DummyBuffersPool::startingOffset); + } } \ No newline at end of file