diff --git a/opencl/source/context/context.cpp b/opencl/source/context/context.cpp index 043a0244de..87e8908b7a 100644 --- a/opencl/source/context/context.cpp +++ b/opencl/source/context/context.cpp @@ -595,6 +595,10 @@ Buffer *Context::BufferPool::allocate(const MemoryProperties &memoryProperties, void Context::BufferPoolAllocator::initAggregatedSmallBuffers(Context *context) { this->context = context; + const auto &device = context->getDevice(0)->getDevice(); + const auto bitfield = device.getDeviceBitfield(); + const auto deviceMemory = device.getGlobalMemorySize(static_cast(bitfield.to_ulong())); + this->maxPoolCount = this->calculateMaxPoolCount(deviceMemory, 2); this->addNewBufferPool(Context::BufferPool{this->context}); } @@ -624,7 +628,7 @@ Buffer *Context::BufferPoolAllocator::allocateBufferFromPool(const MemoryPropert return bufferFromPool; } - if (this->bufferPools.size() < BufferPoolAllocator::maxPoolCount) { + if (this->bufferPools.size() < this->maxPoolCount) { this->addNewBufferPool(BufferPool{this->context}); return this->allocateFromPools(memoryProperties, flags, flagsIntel, requestedSize, hostPtr, errcodeRet); } diff --git a/opencl/source/context/context.h b/opencl/source/context/context.h index dd3172bd7e..ab13993e72 100644 --- a/opencl/source/context/context.h +++ b/opencl/source/context/context.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2023 Intel Corporation + * Copyright (C) 2018-2024 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -86,8 +86,13 @@ class Context : public BaseObject<_cl_context> { size_t requestedSize, void *hostPtr, cl_int &errcodeRet); + static inline size_t calculateMaxPoolCount(uint64_t totalMemory, size_t percentOfMemory) { + const auto maxPoolCount = static_cast(totalMemory * (percentOfMemory / 100.0) / BufferPoolAllocator::aggregatedSmallBuffersPoolSize); + return maxPoolCount ? maxPoolCount : 1u; + } Context *context{nullptr}; + size_t maxPoolCount{1u}; }; static const cl_ulong objectMagic = 0xA4234321DC002130LL; diff --git a/opencl/test/unit_test/mem_obj/buffer_pool_alloc_tests.cpp b/opencl/test/unit_test/mem_obj/buffer_pool_alloc_tests.cpp index c1c753bd36..d502d82077 100644 --- a/opencl/test/unit_test/mem_obj/buffer_pool_alloc_tests.cpp +++ b/opencl/test/unit_test/mem_obj/buffer_pool_alloc_tests.cpp @@ -63,6 +63,7 @@ class AggregatedSmallBuffersTestTemplate : public ::testing::Test { EXPECT_EQ(retVal, CL_SUCCESS); this->setAllocationToFail(false); this->poolAllocator = static_cast(&context->smallBufferPoolAllocator); + this->poolAllocator->maxPoolCount = 1u; } }; @@ -191,6 +192,13 @@ TEST_F(AggregatedSmallBuffersDisabledTest, givenAggregatedSmallBuffersDisabledWh using AggregatedSmallBuffersEnabledTest = AggregatedSmallBuffersTestTemplate<1>; +TEST_F(AggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledWhenCalculateMaxPoolCountCalledThenCorrectValueIsReturned) { + EXPECT_EQ(81u, MockBufferPoolAllocator::calculateMaxPoolCount(8 * MemoryConstants::gigaByte, 2)); + EXPECT_EQ(204u, MockBufferPoolAllocator::calculateMaxPoolCount(8 * MemoryConstants::gigaByte, 5)); + EXPECT_EQ(1u, MockBufferPoolAllocator::calculateMaxPoolCount(128 * MemoryConstants::megaByte, 2)); + EXPECT_EQ(1u, MockBufferPoolAllocator::calculateMaxPoolCount(64 * MemoryConstants::megaByte, 2)); +} + TEST_F(AggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledWhenAllocatingMainStorageThenMakeDeviceBufferLockable) { EXPECT_TRUE(poolAllocator->isAggregatedSmallBuffersEnabled(context.get())); EXPECT_EQ(1u, poolAllocator->bufferPools.size()); @@ -310,6 +318,7 @@ TEST_F(AggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledAndB } TEST_F(AggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledAndBufferPoolIsExhaustedAndAllocationsAreNotInUseAndNoBuffersFreedThenNewPoolIsCreated) { + this->poolAllocator->maxPoolCount = 2u; EXPECT_TRUE(poolAllocator->isAggregatedSmallBuffersEnabled(context.get())); EXPECT_EQ(1u, poolAllocator->bufferPools.size()); EXPECT_NE(nullptr, poolAllocator->bufferPools[0].mainStorage.get()); @@ -334,6 +343,7 @@ TEST_F(AggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledAndB } TEST_F(AggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledAndBufferPoolIsExhaustedAndAllocationsAreInUseThenNewPoolIsCreated) { + this->poolAllocator->maxPoolCount = 2u; EXPECT_TRUE(poolAllocator->isAggregatedSmallBuffersEnabled(context.get())); EXPECT_EQ(1u, poolAllocator->bufferPools.size()); EXPECT_NE(nullptr, poolAllocator->bufferPools[0].mainStorage.get()); @@ -358,18 +368,19 @@ TEST_F(AggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledAndB } TEST_F(AggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledAndBufferPoolIsExhaustedAndAllocationsAreInUseAndPoolLimitIsReachedThenNewPoolIsNotCreated) { + this->poolAllocator->maxPoolCount = 2u; EXPECT_TRUE(poolAllocator->isAggregatedSmallBuffersEnabled(context.get())); EXPECT_EQ(1u, poolAllocator->bufferPools.size()); EXPECT_NE(nullptr, poolAllocator->bufferPools[0].mainStorage.get()); - constexpr auto buffersToCreate = (PoolAllocator::aggregatedSmallBuffersPoolSize / PoolAllocator::smallBufferThreshold) * PoolAllocator::maxPoolCount; + const std::vector>::size_type buffersToCreate = (PoolAllocator::aggregatedSmallBuffersPoolSize / PoolAllocator::smallBufferThreshold) * poolAllocator->maxPoolCount; std::vector> buffers(buffersToCreate); for (auto i = 0u; i < buffersToCreate; ++i) { buffers[i].reset(Buffer::create(context.get(), flags, size, hostPtr, retVal)); EXPECT_EQ(retVal, CL_SUCCESS); } - EXPECT_EQ(PoolAllocator::maxPoolCount, poolAllocator->bufferPools.size()); - for (auto i = 0u; i < PoolAllocator::maxPoolCount; ++i) { + EXPECT_EQ(poolAllocator->maxPoolCount, poolAllocator->bufferPools.size()); + for (auto i = 0u; i < poolAllocator->maxPoolCount; ++i) { EXPECT_EQ(PoolAllocator::aggregatedSmallBuffersPoolSize, poolAllocator->bufferPools[i].chunkAllocator->getUsedSize()); } EXPECT_EQ(1u, mockMemoryManager->allocInUseCalled); @@ -379,7 +390,7 @@ TEST_F(AggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledAndB std::unique_ptr bufferAfterExhaustMustFail(Buffer::create(context.get(), flags, size, hostPtr, retVal)); EXPECT_EQ(nullptr, bufferAfterExhaustMustFail.get()); EXPECT_NE(retVal, CL_SUCCESS); - EXPECT_EQ(PoolAllocator::maxPoolCount, poolAllocator->bufferPools.size()); + EXPECT_EQ(poolAllocator->maxPoolCount, poolAllocator->bufferPools.size()); EXPECT_EQ(3u, mockMemoryManager->allocInUseCalled); } diff --git a/opencl/test/unit_test/mocks/mock_context.h b/opencl/test/unit_test/mocks/mock_context.h index b151d5c36a..910b7093df 100644 --- a/opencl/test/unit_test/mocks/mock_context.h +++ b/opencl/test/unit_test/mocks/mock_context.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2023 Intel Corporation + * Copyright (C) 2018-2024 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -56,7 +56,9 @@ class MockContext : public Context { class MockBufferPoolAllocator : public BufferPoolAllocator { public: using BufferPoolAllocator::bufferPools; + using BufferPoolAllocator::calculateMaxPoolCount; using BufferPoolAllocator::isAggregatedSmallBuffersEnabled; + using BufferPoolAllocator::maxPoolCount; }; private: diff --git a/shared/source/utilities/buffer_pool_allocator.h b/shared/source/utilities/buffer_pool_allocator.h index 259871b466..ecd5551b76 100644 --- a/shared/source/utilities/buffer_pool_allocator.h +++ b/shared/source/utilities/buffer_pool_allocator.h @@ -29,7 +29,6 @@ struct SmallBuffersParams { static constexpr auto smallBufferThreshold = 1 * MemoryConstants::megaByte; static constexpr auto chunkAlignment = MemoryConstants::pageSize64k; static constexpr auto startingOffset = chunkAlignment; - static constexpr auto maxPoolCount = 2u; }; template @@ -42,7 +41,6 @@ struct AbstractBuffersPool : public SmallBuffersParams, public NonCopyabl using Params = SmallBuffersParams; using Params::aggregatedSmallBuffersPoolSize; using Params::chunkAlignment; - using Params::maxPoolCount; using Params::smallBufferThreshold; using Params::startingOffset; using AllocsVecCRef = const StackVec &; @@ -77,7 +75,6 @@ class AbstractBuffersAllocator : public SmallBuffersParams { using Params = SmallBuffersParams; using Params::aggregatedSmallBuffersPoolSize; using Params::chunkAlignment; - using Params::maxPoolCount; using Params::smallBufferThreshold; using Params::startingOffset; static_assert(aggregatedSmallBuffersPoolSize > smallBufferThreshold, "Largest allowed buffer needs to fit in pool");