fix: configure small buffers params based on productHelper

Refactor buffer pool allocator to support configurable
SmallBuffersParams based on product helper capabilities.

This patch enables setting custom pool
parameters instead of using fixed static values.

For devices with 2MB local memory alignment enabled
(is2MBLocalMemAlignmentEnabled),
use larger pool configuration:
- Pool size: 16MB (up from 2MB)
- Threshold: 2MB (up from 1MB)
- Alignment: 64KB (unchanged)
- Starting offset: 64KB (unchanged)

This improves memory utilization for devices supporting larger memory
alignments
while maintaining original parameters for other devices.

Key changes:
- Moved params from static template to instance member
- Added SmallBuffersParams struct with default/large configs
- Added constructor and setter methods for params configuration

Related-To: NEO-12287
Signed-off-by: Fabian Zwoliński <fabian.zwolinski@intel.com>
This commit is contained in:
Fabian Zwoliński 2025-02-06 23:56:09 +00:00 committed by Compute-Runtime-Automation
parent ef961df421
commit 1eb8e0efd9
9 changed files with 298 additions and 116 deletions

View File

@ -17,6 +17,7 @@
#include "shared/source/kernel/kernel_properties.h" #include "shared/source/kernel/kernel_properties.h"
#include "shared/source/memory_manager/memory_manager.h" #include "shared/source/memory_manager/memory_manager.h"
#include "shared/source/os_interface/driver_info.h" #include "shared/source/os_interface/driver_info.h"
#include "shared/source/utilities/buffer_pool_allocator.inl"
#include "opencl/source/cl_device/cl_device.h" #include "opencl/source/cl_device/cl_device.h"
#include "opencl/source/context/context.h" #include "opencl/source/context/context.h"
@ -480,7 +481,8 @@ void ClDevice::initializeMaxPoolCount() {
auto &device = getDevice(); auto &device = getDevice();
const auto bitfield = device.getDeviceBitfield(); const auto bitfield = device.getDeviceBitfield();
const auto deviceMemory = device.getGlobalMemorySize(static_cast<uint32_t>(bitfield.to_ulong())); const auto deviceMemory = device.getGlobalMemorySize(static_cast<uint32_t>(bitfield.to_ulong()));
const auto maxPoolCount = Context::BufferPoolAllocator::calculateMaxPoolCount(deviceMemory, 2); const auto preferredBufferPoolParams = SmallBuffersParams::getPreferredBufferPoolParams(device.getProductHelper());
const auto maxPoolCount = Context::BufferPoolAllocator::calculateMaxPoolCount(preferredBufferPoolParams, deviceMemory, 2);
device.updateMaxPoolCount(maxPoolCount); device.updateMaxPoolCount(maxPoolCount);
} }

View File

@ -310,6 +310,8 @@ bool Context::createImpl(const cl_context_properties *properties,
auto requiresWritableStaging = device->getDefaultEngine().commandStreamReceiver->getType() != CommandStreamReceiverType::hardware; auto requiresWritableStaging = device->getDefaultEngine().commandStreamReceiver->getType() != CommandStreamReceiverType::hardware;
this->stagingBufferManager = std::make_unique<StagingBufferManager>(svmAllocsManager, rootDeviceIndices, deviceBitfields, requiresWritableStaging); this->stagingBufferManager = std::make_unique<StagingBufferManager>(svmAllocsManager, rootDeviceIndices, deviceBitfields, requiresWritableStaging);
} }
smallBufferPoolAllocator.setParams(SmallBuffersParams::getPreferredBufferPoolParams(device->getProductHelper()));
} }
return true; return true;
@ -584,7 +586,9 @@ bool Context::BufferPoolAllocator::isAggregatedSmallBuffersEnabled(Context *cont
(isSupportedForSingleDeviceContexts && context->isSingleDeviceContext()); (isSupportedForSingleDeviceContexts && context->isSingleDeviceContext());
} }
Context::BufferPool::BufferPool(Context *context) : BaseType(context->memoryManager, nullptr) { Context::BufferPool::BufferPool(Context *context) : BaseType(context->memoryManager,
nullptr,
SmallBuffersParams::getPreferredBufferPoolParams(context->getDevice(0)->getDevice().getProductHelper())) {
static constexpr cl_mem_flags flags = CL_MEM_UNCOMPRESSED_HINT_INTEL; static constexpr cl_mem_flags flags = CL_MEM_UNCOMPRESSED_HINT_INTEL;
[[maybe_unused]] cl_int errcodeRet{}; [[maybe_unused]] cl_int errcodeRet{};
Buffer::AdditionalBufferCreateArgs bufferCreateArgs{}; Buffer::AdditionalBufferCreateArgs bufferCreateArgs{};
@ -592,14 +596,14 @@ Context::BufferPool::BufferPool(Context *context) : BaseType(context->memoryMana
bufferCreateArgs.makeAllocationLockable = true; bufferCreateArgs.makeAllocationLockable = true;
this->mainStorage.reset(Buffer::create(context, this->mainStorage.reset(Buffer::create(context,
flags, flags,
BufferPoolAllocator::aggregatedSmallBuffersPoolSize, context->getBufferPoolAllocator().getParams().aggregatedSmallBuffersPoolSize,
nullptr, nullptr,
bufferCreateArgs, bufferCreateArgs,
errcodeRet)); errcodeRet));
if (this->mainStorage) { if (this->mainStorage) {
this->chunkAllocator.reset(new HeapAllocator(BufferPool::startingOffset, this->chunkAllocator.reset(new HeapAllocator(params.startingOffset,
BufferPoolAllocator::aggregatedSmallBuffersPoolSize, context->getBufferPoolAllocator().getParams().aggregatedSmallBuffersPoolSize,
BufferPoolAllocator::chunkAlignment)); context->getBufferPoolAllocator().getParams().chunkAlignment));
context->decRefInternal(); context->decRefInternal();
} }
} }
@ -620,7 +624,7 @@ Buffer *Context::BufferPool::allocate(const MemoryProperties &memoryProperties,
if (bufferRegion.origin == 0) { if (bufferRegion.origin == 0) {
return nullptr; return nullptr;
} }
bufferRegion.origin -= BufferPool::startingOffset; bufferRegion.origin -= params.startingOffset;
bufferRegion.size = requestedSize; bufferRegion.size = requestedSize;
auto bufferFromPool = this->mainStorage->createSubBuffer(flags, flagsIntel, &bufferRegion, errcodeRet); auto bufferFromPool = this->mainStorage->createSubBuffer(flags, flagsIntel, &bufferRegion, errcodeRet);
bufferFromPool->createFunction = this->mainStorage->createFunction; bufferFromPool->createFunction = this->mainStorage->createFunction;

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (C) 2018-2024 Intel Corporation * Copyright (C) 2018-2025 Intel Corporation
* *
* SPDX-License-Identifier: MIT * SPDX-License-Identifier: MIT
* *
@ -37,6 +37,7 @@ class Kernel;
class MemoryManager; class MemoryManager;
class SharingFunctions; class SharingFunctions;
class SVMAllocsManager; class SVMAllocsManager;
class ProductHelper;
class Program; class Program;
class Platform; class Platform;
class TagAllocatorBase; class TagAllocatorBase;
@ -69,7 +70,11 @@ class Context : public BaseObject<_cl_context> {
}; };
class BufferPoolAllocator : public AbstractBuffersAllocator<BufferPool, Buffer, MemObj> { class BufferPoolAllocator : public AbstractBuffersAllocator<BufferPool, Buffer, MemObj> {
using BaseType = AbstractBuffersAllocator<BufferPool, Buffer, MemObj>;
public: public:
BufferPoolAllocator() = default;
bool isAggregatedSmallBuffersEnabled(Context *context) const; bool isAggregatedSmallBuffersEnabled(Context *context) const;
void initAggregatedSmallBuffers(Context *context); void initAggregatedSmallBuffers(Context *context);
Buffer *allocateBufferFromPool(const MemoryProperties &memoryProperties, Buffer *allocateBufferFromPool(const MemoryProperties &memoryProperties,
@ -79,8 +84,8 @@ class Context : public BaseObject<_cl_context> {
void *hostPtr, void *hostPtr,
cl_int &errcodeRet); cl_int &errcodeRet);
bool flagsAllowBufferFromPool(const cl_mem_flags &flags, const cl_mem_flags_intel &flagsIntel) const; bool flagsAllowBufferFromPool(const cl_mem_flags &flags, const cl_mem_flags_intel &flagsIntel) const;
static inline uint32_t calculateMaxPoolCount(uint64_t totalMemory, size_t percentOfMemory) { static inline uint32_t calculateMaxPoolCount(SmallBuffersParams smallBuffersParams, uint64_t totalMemory, size_t percentOfMemory) {
const auto maxPoolCount = static_cast<uint32_t>(totalMemory * (percentOfMemory / 100.0) / BufferPoolAllocator::aggregatedSmallBuffersPoolSize); const auto maxPoolCount = static_cast<uint32_t>(totalMemory * (percentOfMemory / 100.0) / (smallBuffersParams.aggregatedSmallBuffersPoolSize));
return maxPoolCount ? maxPoolCount : 1u; return maxPoolCount ? maxPoolCount : 1u;
} }

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (C) 2022-2024 Intel Corporation * Copyright (C) 2022-2025 Intel Corporation
* *
* SPDX-License-Identifier: MIT * SPDX-License-Identifier: MIT
* *
@ -13,6 +13,7 @@
#include "shared/test/common/helpers/raii_product_helper.h" #include "shared/test/common/helpers/raii_product_helper.h"
#include "shared/test/common/mocks/mock_ail_configuration.h" #include "shared/test/common/mocks/mock_ail_configuration.h"
#include "shared/test/common/mocks/mock_memory_manager.h" #include "shared/test/common/mocks/mock_memory_manager.h"
#include "shared/test/common/mocks/mock_product_helper.h"
#include "shared/test/common/test_macros/hw_test.h" #include "shared/test/common/test_macros/hw_test.h"
#include "shared/test/common/test_macros/test.h" #include "shared/test/common/test_macros/test.h"
@ -46,7 +47,7 @@ class AggregatedSmallBuffersTestTemplate : public ::testing::Test {
MockMemoryManager *mockMemoryManager; MockMemoryManager *mockMemoryManager;
cl_mem_flags flags{}; cl_mem_flags flags{};
size_t size = PoolAllocator::smallBufferThreshold; size_t size{0u};
void *hostPtr = nullptr; void *hostPtr = nullptr;
cl_int retVal = CL_SUCCESS; cl_int retVal = CL_SUCCESS;
static const auto rootDeviceIndex = 1u; static const auto rootDeviceIndex = 1u;
@ -63,7 +64,7 @@ class AggregatedSmallBuffersTestTemplate : public ::testing::Test {
this->mockNeoDevice = static_cast<MockDevice *>(&this->device->getDevice()); this->mockNeoDevice = static_cast<MockDevice *>(&this->device->getDevice());
const auto bitfield = mockNeoDevice->getDeviceBitfield(); const auto bitfield = mockNeoDevice->getDeviceBitfield();
const auto deviceMemory = mockNeoDevice->getGlobalMemorySize(static_cast<uint32_t>(bitfield.to_ulong())); const auto deviceMemory = mockNeoDevice->getGlobalMemorySize(static_cast<uint32_t>(bitfield.to_ulong()));
const auto expectedMaxPoolCount = Context::BufferPoolAllocator::calculateMaxPoolCount(deviceMemory, 2); const auto expectedMaxPoolCount = Context::BufferPoolAllocator::calculateMaxPoolCount(SmallBuffersParams::getPreferredBufferPoolParams(this->device->getProductHelper()), deviceMemory, 2);
EXPECT_EQ(expectedMaxPoolCount, mockNeoDevice->maxBufferPoolCount); EXPECT_EQ(expectedMaxPoolCount, mockNeoDevice->maxBufferPoolCount);
this->mockMemoryManager = static_cast<MockMemoryManager *>(device->getMemoryManager()); this->mockMemoryManager = static_cast<MockMemoryManager *>(device->getMemoryManager());
this->mockMemoryManager->localMemorySupported[rootDeviceIndex] = true; this->mockMemoryManager->localMemorySupported[rootDeviceIndex] = true;
@ -74,8 +75,9 @@ class AggregatedSmallBuffersTestTemplate : public ::testing::Test {
this->context->initializeUsmAllocationPools(); this->context->initializeUsmAllocationPools();
EXPECT_EQ(retVal, CL_SUCCESS); EXPECT_EQ(retVal, CL_SUCCESS);
this->setAllocationToFail(false); this->setAllocationToFail(false);
this->poolAllocator = static_cast<MockBufferPoolAllocator *>(&context->smallBufferPoolAllocator); this->poolAllocator = static_cast<MockBufferPoolAllocator *>(&context->getBufferPoolAllocator());
this->mockNeoDevice->updateMaxPoolCount(1u); this->mockNeoDevice->updateMaxPoolCount(1u);
size = this->poolAllocator->params.smallBufferThreshold;
} }
}; };
@ -181,10 +183,17 @@ TEST_F(AggregatedSmallBuffersDisabledTest, givenAggregatedSmallBuffersDisabledWh
using AggregatedSmallBuffersEnabledTest = AggregatedSmallBuffersTestTemplate<1>; using AggregatedSmallBuffersEnabledTest = AggregatedSmallBuffersTestTemplate<1>;
TEST_F(AggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledWhenCalculateMaxPoolCountCalledThenCorrectValueIsReturned) { TEST_F(AggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledWhenCalculateMaxPoolCountCalledThenCorrectValueIsReturned) {
EXPECT_EQ(81u, MockBufferPoolAllocator::calculateMaxPoolCount(8 * MemoryConstants::gigaByte, 2)); if (device->getProductHelper().is2MBLocalMemAlignmentEnabled()) {
EXPECT_EQ(204u, MockBufferPoolAllocator::calculateMaxPoolCount(8 * MemoryConstants::gigaByte, 5)); EXPECT_EQ(10u, MockBufferPoolAllocator::calculateMaxPoolCount(this->poolAllocator->getParams(), 8 * MemoryConstants::gigaByte, 2));
EXPECT_EQ(1u, MockBufferPoolAllocator::calculateMaxPoolCount(128 * MemoryConstants::megaByte, 2)); EXPECT_EQ(25u, MockBufferPoolAllocator::calculateMaxPoolCount(this->poolAllocator->getParams(), 8 * MemoryConstants::gigaByte, 5));
EXPECT_EQ(1u, MockBufferPoolAllocator::calculateMaxPoolCount(64 * MemoryConstants::megaByte, 2)); EXPECT_EQ(1u, MockBufferPoolAllocator::calculateMaxPoolCount(this->poolAllocator->getParams(), 128 * MemoryConstants::megaByte, 2));
EXPECT_EQ(1u, MockBufferPoolAllocator::calculateMaxPoolCount(this->poolAllocator->getParams(), 64 * MemoryConstants::megaByte, 2));
} else {
EXPECT_EQ(81u, MockBufferPoolAllocator::calculateMaxPoolCount(this->poolAllocator->getParams(), 8 * MemoryConstants::gigaByte, 2));
EXPECT_EQ(204u, MockBufferPoolAllocator::calculateMaxPoolCount(this->poolAllocator->getParams(), 8 * MemoryConstants::gigaByte, 5));
EXPECT_EQ(1u, MockBufferPoolAllocator::calculateMaxPoolCount(this->poolAllocator->getParams(), 128 * MemoryConstants::megaByte, 2));
EXPECT_EQ(1u, MockBufferPoolAllocator::calculateMaxPoolCount(this->poolAllocator->getParams(), 64 * MemoryConstants::megaByte, 2));
}
} }
TEST_F(AggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledWhenAllocatingMainStorageThenMakeDeviceBufferLockableAndNotCompressed) { TEST_F(AggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledWhenAllocatingMainStorageThenMakeDeviceBufferLockableAndNotCompressed) {
@ -200,7 +209,7 @@ TEST_F(AggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledAndS
EXPECT_TRUE(poolAllocator->isAggregatedSmallBuffersEnabled(context.get())); EXPECT_TRUE(poolAllocator->isAggregatedSmallBuffersEnabled(context.get()));
EXPECT_EQ(1u, poolAllocator->bufferPools.size()); EXPECT_EQ(1u, poolAllocator->bufferPools.size());
EXPECT_NE(nullptr, poolAllocator->bufferPools[0].mainStorage.get()); EXPECT_NE(nullptr, poolAllocator->bufferPools[0].mainStorage.get());
size = PoolAllocator::smallBufferThreshold + 1; size = poolAllocator->params.smallBufferThreshold + 1;
std::unique_ptr<Buffer> buffer(Buffer::create(context.get(), flags, size, hostPtr, retVal)); std::unique_ptr<Buffer> buffer(Buffer::create(context.get(), flags, size, hostPtr, retVal));
EXPECT_NE(nullptr, buffer); EXPECT_NE(nullptr, buffer);
EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(CL_SUCCESS, retVal);
@ -211,7 +220,7 @@ TEST_F(AggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledAndF
EXPECT_TRUE(poolAllocator->isAggregatedSmallBuffersEnabled(context.get())); EXPECT_TRUE(poolAllocator->isAggregatedSmallBuffersEnabled(context.get()));
EXPECT_EQ(1u, poolAllocator->bufferPools.size()); EXPECT_EQ(1u, poolAllocator->bufferPools.size());
EXPECT_NE(nullptr, poolAllocator->bufferPools[0].mainStorage.get()); EXPECT_NE(nullptr, poolAllocator->bufferPools[0].mainStorage.get());
size = PoolAllocator::smallBufferThreshold; size = poolAllocator->params.smallBufferThreshold;
flags |= CL_MEM_COMPRESSED_HINT_INTEL; flags |= CL_MEM_COMPRESSED_HINT_INTEL;
std::unique_ptr<Buffer> buffer(Buffer::create(context.get(), flags, size, hostPtr, retVal)); std::unique_ptr<Buffer> buffer(Buffer::create(context.get(), flags, size, hostPtr, retVal));
EXPECT_NE(nullptr, buffer); EXPECT_NE(nullptr, buffer);
@ -224,17 +233,17 @@ TEST_F(AggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledAndS
EXPECT_EQ(1u, poolAllocator->bufferPools.size()); EXPECT_EQ(1u, poolAllocator->bufferPools.size());
EXPECT_NE(nullptr, poolAllocator->bufferPools[0].mainStorage.get()); EXPECT_NE(nullptr, poolAllocator->bufferPools[0].mainStorage.get());
EXPECT_EQ(0u, poolAllocator->bufferPools[0].chunkAllocator->getUsedSize()); EXPECT_EQ(0u, poolAllocator->bufferPools[0].chunkAllocator->getUsedSize());
size = PoolAllocator::chunkAlignment / 2; size = poolAllocator->params.chunkAlignment / 2;
std::unique_ptr<Buffer> buffer(Buffer::create(context.get(), flags, size, hostPtr, retVal)); std::unique_ptr<Buffer> buffer(Buffer::create(context.get(), flags, size, hostPtr, retVal));
EXPECT_NE(buffer, nullptr); EXPECT_NE(buffer, nullptr);
EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(size, buffer->getSize()); EXPECT_EQ(size, buffer->getSize());
EXPECT_EQ(PoolAllocator::chunkAlignment, poolAllocator->bufferPools[0].chunkAllocator->getUsedSize()); EXPECT_EQ(poolAllocator->params.chunkAlignment, poolAllocator->bufferPools[0].chunkAllocator->getUsedSize());
auto mockBuffer = static_cast<MockBuffer *>(buffer.get()); auto mockBuffer = static_cast<MockBuffer *>(buffer.get());
EXPECT_EQ(PoolAllocator::chunkAlignment, mockBuffer->sizeInPoolAllocator); EXPECT_EQ(poolAllocator->params.chunkAlignment, mockBuffer->sizeInPoolAllocator);
buffer.reset(nullptr); buffer.reset(nullptr);
EXPECT_EQ(PoolAllocator::chunkAlignment, poolAllocator->bufferPools[0].chunkAllocator->getUsedSize()); EXPECT_EQ(poolAllocator->params.chunkAlignment, poolAllocator->bufferPools[0].chunkAllocator->getUsedSize());
} }
TEST_F(AggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledAndSizeEqualToThresholdWhenBufferCreateCalledThenUsePool) { TEST_F(AggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledAndSizeEqualToThresholdWhenBufferCreateCalledThenUsePool) {
@ -250,7 +259,7 @@ TEST_F(AggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledAndS
auto mockBuffer = static_cast<MockBuffer *>(buffer.get()); auto mockBuffer = static_cast<MockBuffer *>(buffer.get());
EXPECT_GE(mockBuffer->getSize(), size); EXPECT_GE(mockBuffer->getSize(), size);
EXPECT_GE(mockBuffer->getOffset(), 0u); EXPECT_GE(mockBuffer->getOffset(), 0u);
EXPECT_LE(mockBuffer->getOffset(), PoolAllocator::aggregatedSmallBuffersPoolSize - size); EXPECT_LE(mockBuffer->getOffset(), poolAllocator->params.aggregatedSmallBuffersPoolSize - size);
EXPECT_TRUE(mockBuffer->isSubBuffer()); EXPECT_TRUE(mockBuffer->isSubBuffer());
EXPECT_EQ(mockBuffer->associatedMemObject, poolAllocator->bufferPools[0].mainStorage.get()); EXPECT_EQ(mockBuffer->associatedMemObject, poolAllocator->bufferPools[0].mainStorage.get());
@ -285,7 +294,7 @@ TEST_F(AggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledAndB
EXPECT_EQ(1u, poolAllocator->bufferPools.size()); EXPECT_EQ(1u, poolAllocator->bufferPools.size());
EXPECT_NE(nullptr, poolAllocator->bufferPools[0].mainStorage.get()); EXPECT_NE(nullptr, poolAllocator->bufferPools[0].mainStorage.get());
constexpr auto buffersToCreate = PoolAllocator::aggregatedSmallBuffersPoolSize / PoolAllocator::smallBufferThreshold; auto buffersToCreate = poolAllocator->params.aggregatedSmallBuffersPoolSize / poolAllocator->params.smallBufferThreshold;
std::vector<std::unique_ptr<Buffer>> buffers(buffersToCreate); std::vector<std::unique_ptr<Buffer>> buffers(buffersToCreate);
for (auto i = 0u; i < buffersToCreate; i++) { for (auto i = 0u; i < buffersToCreate; i++) {
buffers[i].reset(Buffer::create(context.get(), flags, size, hostPtr, retVal)); buffers[i].reset(Buffer::create(context.get(), flags, size, hostPtr, retVal));
@ -312,7 +321,7 @@ TEST_F(AggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledAndB
EXPECT_EQ(1u, poolAllocator->bufferPools.size()); EXPECT_EQ(1u, poolAllocator->bufferPools.size());
EXPECT_NE(nullptr, poolAllocator->bufferPools[0].mainStorage.get()); EXPECT_NE(nullptr, poolAllocator->bufferPools[0].mainStorage.get());
constexpr auto buffersToCreate = PoolAllocator::aggregatedSmallBuffersPoolSize / PoolAllocator::smallBufferThreshold; auto buffersToCreate = poolAllocator->params.aggregatedSmallBuffersPoolSize / poolAllocator->params.smallBufferThreshold;
std::vector<std::unique_ptr<Buffer>> buffers(buffersToCreate); std::vector<std::unique_ptr<Buffer>> buffers(buffersToCreate);
for (auto i = 0u; i < buffersToCreate; i++) { for (auto i = 0u; i < buffersToCreate; i++) {
buffers[i].reset(Buffer::create(context.get(), flags, size, hostPtr, retVal)); buffers[i].reset(Buffer::create(context.get(), flags, size, hostPtr, retVal));
@ -337,7 +346,7 @@ TEST_F(AggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledAndB
EXPECT_EQ(1u, poolAllocator->bufferPools.size()); EXPECT_EQ(1u, poolAllocator->bufferPools.size());
EXPECT_NE(nullptr, poolAllocator->bufferPools[0].mainStorage.get()); EXPECT_NE(nullptr, poolAllocator->bufferPools[0].mainStorage.get());
constexpr auto buffersToCreate = PoolAllocator::aggregatedSmallBuffersPoolSize / PoolAllocator::smallBufferThreshold; auto buffersToCreate = poolAllocator->params.aggregatedSmallBuffersPoolSize / poolAllocator->params.smallBufferThreshold;
std::vector<std::unique_ptr<Buffer>> buffers(buffersToCreate); std::vector<std::unique_ptr<Buffer>> buffers(buffersToCreate);
for (auto i = 0u; i < buffersToCreate; i++) { for (auto i = 0u; i < buffersToCreate; i++) {
buffers[i].reset(Buffer::create(context.get(), flags, size, hostPtr, retVal)); buffers[i].reset(Buffer::create(context.get(), flags, size, hostPtr, retVal));
@ -362,7 +371,7 @@ TEST_F(AggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledAndB
EXPECT_EQ(1u, poolAllocator->bufferPools.size()); EXPECT_EQ(1u, poolAllocator->bufferPools.size());
EXPECT_NE(nullptr, poolAllocator->bufferPools[0].mainStorage.get()); EXPECT_NE(nullptr, poolAllocator->bufferPools[0].mainStorage.get());
const std::vector<std::unique_ptr<Buffer>>::size_type buffersToCreate = (PoolAllocator::aggregatedSmallBuffersPoolSize / PoolAllocator::smallBufferThreshold) * mockNeoDevice->maxBufferPoolCount; const std::vector<std::unique_ptr<Buffer>>::size_type buffersToCreate = (poolAllocator->params.aggregatedSmallBuffersPoolSize / poolAllocator->params.smallBufferThreshold) * mockNeoDevice->maxBufferPoolCount;
std::vector<std::unique_ptr<Buffer>> buffers(buffersToCreate); std::vector<std::unique_ptr<Buffer>> buffers(buffersToCreate);
for (auto i = 0u; i < buffersToCreate; ++i) { for (auto i = 0u; i < buffersToCreate; ++i) {
buffers[i].reset(Buffer::create(context.get(), flags, size, hostPtr, retVal)); buffers[i].reset(Buffer::create(context.get(), flags, size, hostPtr, retVal));
@ -370,7 +379,7 @@ TEST_F(AggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledAndB
} }
EXPECT_EQ(mockNeoDevice->maxBufferPoolCount, poolAllocator->bufferPools.size()); EXPECT_EQ(mockNeoDevice->maxBufferPoolCount, poolAllocator->bufferPools.size());
for (auto i = 0u; i < mockNeoDevice->maxBufferPoolCount; ++i) { for (auto i = 0u; i < mockNeoDevice->maxBufferPoolCount; ++i) {
EXPECT_EQ(PoolAllocator::aggregatedSmallBuffersPoolSize, poolAllocator->bufferPools[i].chunkAllocator->getUsedSize()); EXPECT_EQ(poolAllocator->params.aggregatedSmallBuffersPoolSize, poolAllocator->bufferPools[i].chunkAllocator->getUsedSize());
} }
EXPECT_EQ(1u, mockMemoryManager->allocInUseCalled); EXPECT_EQ(1u, mockMemoryManager->allocInUseCalled);
mockMemoryManager->deferAllocInUse = true; mockMemoryManager->deferAllocInUse = true;
@ -403,7 +412,7 @@ TEST_F(AggregatedSmallBuffersEnabledTest, givenCopyHostPointerWhenCreatingBuffer
context->setSpecialQueue(commandQueue, rootDeviceIndex); context->setSpecialQueue(commandQueue, rootDeviceIndex);
flags = CL_MEM_COPY_HOST_PTR; flags = CL_MEM_COPY_HOST_PTR;
auto dataToCopy = std::unique_ptr<unsigned char[]>(new unsigned char[PoolAllocator::smallBufferThreshold]); auto dataToCopy = std::unique_ptr<unsigned char[]>(new unsigned char[poolAllocator->params.smallBufferThreshold]);
hostPtr = dataToCopy.get(); hostPtr = dataToCopy.get();
EXPECT_TRUE(poolAllocator->isAggregatedSmallBuffersEnabled(context.get())); EXPECT_TRUE(poolAllocator->isAggregatedSmallBuffersEnabled(context.get()));
@ -427,7 +436,7 @@ TEST_F(AggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledAndS
EXPECT_EQ(1u, poolAllocator->bufferPools.size()); EXPECT_EQ(1u, poolAllocator->bufferPools.size());
EXPECT_NE(nullptr, poolAllocator->bufferPools[0].mainStorage.get()); EXPECT_NE(nullptr, poolAllocator->bufferPools[0].mainStorage.get());
constexpr auto buffersToCreate = PoolAllocator::aggregatedSmallBuffersPoolSize / PoolAllocator::smallBufferThreshold; auto buffersToCreate = poolAllocator->params.aggregatedSmallBuffersPoolSize / poolAllocator->params.smallBufferThreshold;
std::vector<std::unique_ptr<Buffer>> buffers(buffersToCreate); std::vector<std::unique_ptr<Buffer>> buffers(buffersToCreate);
for (auto i = 0u; i < buffersToCreate; i++) { for (auto i = 0u; i < buffersToCreate; i++) {
buffers[i].reset(Buffer::create(context.get(), flags, size, hostPtr, retVal)); buffers[i].reset(Buffer::create(context.get(), flags, size, hostPtr, retVal));
@ -452,7 +461,7 @@ TEST_F(AggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledAndS
EXPECT_NE(nullptr, poolAllocator->bufferPools[0].mainStorage.get()); EXPECT_NE(nullptr, poolAllocator->bufferPools[0].mainStorage.get());
EXPECT_GE(mockBuffer->getSize(), size); EXPECT_GE(mockBuffer->getSize(), size);
EXPECT_GE(mockBuffer->getOffset(), 0u); EXPECT_GE(mockBuffer->getOffset(), 0u);
EXPECT_LE(mockBuffer->getOffset(), PoolAllocator::aggregatedSmallBuffersPoolSize - size); EXPECT_LE(mockBuffer->getOffset(), poolAllocator->params.aggregatedSmallBuffersPoolSize - size);
subBuffersBounds[i] = Bounds{mockBuffer->getOffset(), mockBuffer->getOffset() + mockBuffer->getSize()}; subBuffersBounds[i] = Bounds{mockBuffer->getOffset(), mockBuffer->getOffset() + mockBuffer->getSize()};
} }
@ -479,7 +488,7 @@ TEST_F(AggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledAndM
this->setAllocationToFail(false); this->setAllocationToFail(false);
EXPECT_EQ(2u, mockNeoDevice->bufferPoolCount.load()); EXPECT_EQ(2u, mockNeoDevice->bufferPoolCount.load());
constexpr auto buffersToCreate = PoolAllocator::aggregatedSmallBuffersPoolSize / PoolAllocator::smallBufferThreshold; auto buffersToCreate = poolAllocator->params.aggregatedSmallBuffersPoolSize / poolAllocator->params.smallBufferThreshold;
std::vector<std::unique_ptr<Buffer>> buffers(buffersToCreate); std::vector<std::unique_ptr<Buffer>> buffers(buffersToCreate);
for (auto i = 0u; i < buffersToCreate; i++) { for (auto i = 0u; i < buffersToCreate; i++) {
buffers[i].reset(Buffer::create(context.get(), flags, size, hostPtr, retVal)); buffers[i].reset(Buffer::create(context.get(), flags, size, hostPtr, retVal));
@ -493,14 +502,18 @@ TEST_F(AggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledAndM
EXPECT_FALSE(bufferAfterExhaustMustSucceed->isSubBuffer()); EXPECT_FALSE(bufferAfterExhaustMustSucceed->isSubBuffer());
mockNeoDevice->callBaseGetGlobalMemorySize = false; mockNeoDevice->callBaseGetGlobalMemorySize = false;
mockNeoDevice->getGlobalMemorySizeReturn = static_cast<uint64_t>(2 * 2 * MemoryConstants::megaByte / 0.02); if (mockNeoDevice->getProductHelper().is2MBLocalMemAlignmentEnabled()) {
mockNeoDevice->getGlobalMemorySizeReturn = static_cast<uint64_t>(16 * 2 * MemoryConstants::megaByte / 0.02);
} else {
mockNeoDevice->getGlobalMemorySizeReturn = static_cast<uint64_t>(2 * 2 * MemoryConstants::megaByte / 0.02);
}
const auto bitfield = mockNeoDevice->getDeviceBitfield(); const auto bitfield = mockNeoDevice->getDeviceBitfield();
const auto deviceMemory = mockNeoDevice->getGlobalMemorySize(static_cast<uint32_t>(bitfield.to_ulong())); const auto deviceMemory = mockNeoDevice->getGlobalMemorySize(static_cast<uint32_t>(bitfield.to_ulong()));
EXPECT_EQ(2u, MockBufferPoolAllocator::calculateMaxPoolCount(deviceMemory, 2)); EXPECT_EQ(2u, MockBufferPoolAllocator::calculateMaxPoolCount(this->poolAllocator->getParams(), deviceMemory, 2));
std::unique_ptr<MockContext> thirdContext; std::unique_ptr<MockContext> thirdContext;
thirdContext.reset(Context::create<MockContext>(nullptr, ClDeviceVector(devices, 1), nullptr, nullptr, retVal)); thirdContext.reset(Context::create<MockContext>(nullptr, ClDeviceVector(devices, 1), nullptr, nullptr, retVal));
EXPECT_EQ(retVal, CL_SUCCESS); EXPECT_EQ(retVal, CL_SUCCESS);
MockBufferPoolAllocator *thirdPoolAllocator = static_cast<MockBufferPoolAllocator *>(&thirdContext->smallBufferPoolAllocator); MockBufferPoolAllocator *thirdPoolAllocator = static_cast<MockBufferPoolAllocator *>(&thirdContext->getBufferPoolAllocator());
EXPECT_EQ(0u, thirdPoolAllocator->bufferPools.size()); EXPECT_EQ(0u, thirdPoolAllocator->bufferPools.size());
EXPECT_EQ(2u, mockNeoDevice->bufferPoolCount.load()); EXPECT_EQ(2u, mockNeoDevice->bufferPoolCount.load());
@ -560,6 +573,51 @@ TEST_F(AggregatedSmallBuffersEnabledTestDoNotRunSetup, givenAggregatedSmallBuffe
EXPECT_EQ(0u, output.size()); EXPECT_EQ(0u, output.size());
} }
TEST_F(AggregatedSmallBuffersEnabledTestDoNotRunSetup, givenProductWithAndWithout2MBLocalMemAlignmentWhenCreatingContextThenBufferPoolAllocatorHasCorrectParams) {
auto compareSmallBuffersParams = [](const NEO::SmallBuffersParams &first, const NEO::SmallBuffersParams &second) {
return first.aggregatedSmallBuffersPoolSize == second.aggregatedSmallBuffersPoolSize &&
first.smallBufferThreshold == second.smallBufferThreshold &&
first.chunkAlignment == second.chunkAlignment &&
first.startingOffset == second.startingOffset;
};
debugManager.flags.ExperimentalSmallBufferPoolAllocator.set(1);
debugManager.flags.EnableDeviceUsmAllocationPool.set(0);
debugManager.flags.EnableHostUsmAllocationPool.set(0);
debugManager.flags.RenderCompressedBuffersEnabled.set(1);
this->deviceFactory = std::make_unique<UltClDeviceFactory>(2, 0);
this->device = deviceFactory->rootDevices[rootDeviceIndex];
this->mockNeoDevice = static_cast<MockDevice *>(&this->device->getDevice());
auto mockProductHelper = new MockProductHelper;
mockNeoDevice->getRootDeviceEnvironmentRef().productHelper.reset(mockProductHelper);
mockProductHelper->is2MBLocalMemAlignmentEnabledResult = false;
auto &productHelper = mockNeoDevice->getRootDeviceEnvironment().getProductHelper();
EXPECT_FALSE(productHelper.is2MBLocalMemAlignmentEnabled());
cl_device_id devices[] = {device};
this->context.reset(Context::create<MockContext>(nullptr, ClDeviceVector(devices, 1), nullptr, nullptr, retVal));
auto &bufferPoolAllocator = context->getBufferPoolAllocator();
auto bufferPoolAllocatorParams = bufferPoolAllocator.getParams();
auto preferredParams = NEO::SmallBuffersParams::getPreferredBufferPoolParams(productHelper);
EXPECT_TRUE(compareSmallBuffersParams(bufferPoolAllocatorParams, preferredParams));
mockProductHelper->is2MBLocalMemAlignmentEnabledResult = true;
EXPECT_TRUE(productHelper.is2MBLocalMemAlignmentEnabled());
std::unique_ptr<MockContext> secondContext;
secondContext.reset(Context::create<MockContext>(nullptr, ClDeviceVector(devices, 1), nullptr, nullptr, retVal));
auto &bufferPoolAllocator2 = secondContext->getBufferPoolAllocator();
auto bufferPoolAllocatorParams2 = bufferPoolAllocator2.getParams();
preferredParams = NEO::SmallBuffersParams::getPreferredBufferPoolParams(productHelper);
EXPECT_TRUE(compareSmallBuffersParams(bufferPoolAllocatorParams2, preferredParams));
}
template <int32_t poolBufferFlag = -1> template <int32_t poolBufferFlag = -1>
class AggregatedSmallBuffersApiTestTemplate : public ::testing::Test { class AggregatedSmallBuffersApiTestTemplate : public ::testing::Test {
void SetUp() override { void SetUp() override {
@ -571,13 +629,14 @@ class AggregatedSmallBuffersApiTestTemplate : public ::testing::Test {
EXPECT_EQ(retVal, CL_SUCCESS); EXPECT_EQ(retVal, CL_SUCCESS);
context = castToObject<Context>(clContext); context = castToObject<Context>(clContext);
poolAllocator = static_cast<MockBufferPoolAllocator *>(&context->getBufferPoolAllocator()); poolAllocator = static_cast<MockBufferPoolAllocator *>(&context->getBufferPoolAllocator());
size = poolAllocator->params.smallBufferThreshold;
} }
public: public:
std::unique_ptr<UltClDeviceFactory> deviceFactory; std::unique_ptr<UltClDeviceFactory> deviceFactory;
cl_mem_flags flags = CL_MEM_READ_WRITE; cl_mem_flags flags = CL_MEM_READ_WRITE;
size_t size = PoolAllocator::smallBufferThreshold; size_t size{0u};
cl_int retVal = CL_SUCCESS; cl_int retVal = CL_SUCCESS;
void *hostPtr{nullptr}; void *hostPtr{nullptr};
cl_context clContext{nullptr}; cl_context clContext{nullptr};
@ -598,7 +657,7 @@ TEST_F(AggregatedSmallBuffersEnabledApiTest, givenNoBufferCreatedWhenReleasingCo
} }
TEST_F(AggregatedSmallBuffersEnabledApiTest, givenNotSmallBufferWhenCreatingBufferThenDoNotUsePool) { TEST_F(AggregatedSmallBuffersEnabledApiTest, givenNotSmallBufferWhenCreatingBufferThenDoNotUsePool) {
size = PoolAllocator::smallBufferThreshold + 1; size = poolAllocator->params.smallBufferThreshold + 1;
cl_mem buffer = clCreateBuffer(clContext, flags, size, hostPtr, &retVal); cl_mem buffer = clCreateBuffer(clContext, flags, size, hostPtr, &retVal);
EXPECT_EQ(retVal, CL_SUCCESS); EXPECT_EQ(retVal, CL_SUCCESS);
EXPECT_NE(buffer, nullptr); EXPECT_NE(buffer, nullptr);
@ -634,7 +693,7 @@ TEST_F(AggregatedSmallBuffersEnabledApiTest, givenSmallBufferWhenCreatingBufferT
TEST_F(AggregatedSmallBuffersEnabledApiTest, givenUseHostPointerWhenCreatingBufferThenDoNotUsePool) { TEST_F(AggregatedSmallBuffersEnabledApiTest, givenUseHostPointerWhenCreatingBufferThenDoNotUsePool) {
flags |= CL_MEM_USE_HOST_PTR; flags |= CL_MEM_USE_HOST_PTR;
auto hostData = std::unique_ptr<unsigned char[]>(new unsigned char[PoolAllocator::smallBufferThreshold]); auto hostData = std::unique_ptr<unsigned char[]>(new unsigned char[poolAllocator->params.smallBufferThreshold]);
hostPtr = hostData.get(); hostPtr = hostData.get();
cl_mem smallBuffer = clCreateBuffer(clContext, flags, size, hostPtr, &retVal); cl_mem smallBuffer = clCreateBuffer(clContext, flags, size, hostPtr, &retVal);
EXPECT_EQ(retVal, CL_SUCCESS); EXPECT_EQ(retVal, CL_SUCCESS);
@ -697,7 +756,7 @@ TEST_F(AggregatedSmallBuffersEnabledApiTest, givenBufferFromPoolWhenGetMemObjInf
TEST_F(AggregatedSmallBuffersEnabledApiTest, givenSubBufferNotFromPoolAndAggregatedSmallBuffersEnabledWhenReleaseMemObjectCalledThenItSucceeds) { TEST_F(AggregatedSmallBuffersEnabledApiTest, givenSubBufferNotFromPoolAndAggregatedSmallBuffersEnabledWhenReleaseMemObjectCalledThenItSucceeds) {
DebugManagerStateRestore restore; DebugManagerStateRestore restore;
debugManager.flags.ExperimentalSmallBufferPoolAllocator.set(0); debugManager.flags.ExperimentalSmallBufferPoolAllocator.set(0);
size_t size = PoolAllocator::smallBufferThreshold + 1; size_t size = poolAllocator->params.smallBufferThreshold + 1;
cl_mem largeBuffer = clCreateBuffer(clContext, flags, size, hostPtr, &retVal); cl_mem largeBuffer = clCreateBuffer(clContext, flags, size, hostPtr, &retVal);
EXPECT_EQ(retVal, CL_SUCCESS); EXPECT_EQ(retVal, CL_SUCCESS);
@ -721,7 +780,7 @@ TEST_F(AggregatedSmallBuffersEnabledApiTest, givenSubBufferNotFromPoolAndAggrega
TEST_F(AggregatedSmallBuffersEnabledApiTest, givenCopyHostPointerWhenCreatingBufferThenUsePoolAndCopyHostPointer) { TEST_F(AggregatedSmallBuffersEnabledApiTest, givenCopyHostPointerWhenCreatingBufferThenUsePoolAndCopyHostPointer) {
flags |= CL_MEM_COPY_HOST_PTR; flags |= CL_MEM_COPY_HOST_PTR;
auto dataToCopy = std::unique_ptr<unsigned char[]>(new unsigned char[PoolAllocator::smallBufferThreshold]); auto dataToCopy = std::unique_ptr<unsigned char[]>(new unsigned char[poolAllocator->params.smallBufferThreshold]);
dataToCopy[0] = 123; dataToCopy[0] = 123;
hostPtr = dataToCopy.get(); hostPtr = dataToCopy.get();
auto contextRefCountBefore = context->getRefInternalCount(); auto contextRefCountBefore = context->getRefInternalCount();
@ -849,7 +908,7 @@ TEST_F(AggregatedSmallBuffersSubBufferApiTest, givenBufferFromPoolWhenCreateSubB
EXPECT_EQ(subBuffer, nullptr); EXPECT_EQ(subBuffer, nullptr);
region.size = 1; region.size = 1;
region.origin = PoolAllocator::smallBufferThreshold; region.origin = poolAllocator->params.smallBufferThreshold;
subBuffer = clCreateSubBuffer(buffer, flags, CL_BUFFER_CREATE_TYPE_REGION, &region, &retVal); subBuffer = clCreateSubBuffer(buffer, flags, CL_BUFFER_CREATE_TYPE_REGION, &region, &retVal);
EXPECT_EQ(retVal, CL_INVALID_VALUE); EXPECT_EQ(retVal, CL_INVALID_VALUE);
EXPECT_EQ(subBuffer, nullptr); EXPECT_EQ(subBuffer, nullptr);
@ -885,4 +944,4 @@ TEST_F(AggregatedSmallBuffersSubBufferApiTest, givenSubBufferFromBufferFromPoolW
EXPECT_EQ(clReleaseContext(context), CL_SUCCESS); EXPECT_EQ(clReleaseContext(context), CL_SUCCESS);
} }
} // namespace Ult } // namespace Ult

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (C) 2018-2024 Intel Corporation * Copyright (C) 2018-2025 Intel Corporation
* *
* SPDX-License-Identifier: MIT * SPDX-License-Identifier: MIT
* *
@ -59,6 +59,7 @@ class MockContext : public Context {
using BufferPoolAllocator::bufferPools; using BufferPoolAllocator::bufferPools;
using BufferPoolAllocator::calculateMaxPoolCount; using BufferPoolAllocator::calculateMaxPoolCount;
using BufferPoolAllocator::isAggregatedSmallBuffersEnabled; using BufferPoolAllocator::isAggregatedSmallBuffersEnabled;
using BufferPoolAllocator::params;
}; };
private: private:

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (C) 2023-2024 Intel Corporation * Copyright (C) 2023-2025 Intel Corporation
* *
* SPDX-License-Identifier: MIT * SPDX-License-Identifier: MIT
* *
@ -21,32 +21,47 @@ namespace NEO {
class GraphicsAllocation; class GraphicsAllocation;
class HeapAllocator; class HeapAllocator;
class MemoryManager; class MemoryManager;
class ProductHelper;
template <typename PoolT>
struct SmallBuffersParams { struct SmallBuffersParams {
protected: size_t aggregatedSmallBuffersPoolSize{0};
static constexpr auto aggregatedSmallBuffersPoolSize = 2 * MemoryConstants::megaByte; size_t smallBufferThreshold{0};
static constexpr auto smallBufferThreshold = 1 * MemoryConstants::megaByte; size_t chunkAlignment{0};
static constexpr auto chunkAlignment = MemoryConstants::pageSize64k; size_t startingOffset{0};
static constexpr auto startingOffset = chunkAlignment;
static SmallBuffersParams getDefaultParams() {
return {
2 * MemoryConstants::megaByte, // aggregatedSmallBuffersPoolSize
1 * MemoryConstants::megaByte, // smallBufferThreshold
MemoryConstants::pageSize64k, // chunkAlignment
MemoryConstants::pageSize64k // startingOffset
};
}
static SmallBuffersParams getLargePagesParams() {
return {
16 * MemoryConstants::megaByte, // aggregatedSmallBuffersPoolSize
2 * MemoryConstants::megaByte, // smallBufferThreshold
MemoryConstants::pageSize64k, // chunkAlignment
MemoryConstants::pageSize64k // startingOffset
};
}
static inline SmallBuffersParams getPreferredBufferPoolParams(const ProductHelper &productHelper);
}; };
template <typename PoolT, typename BufferType, typename BufferParentType = BufferType> template <typename PoolT, typename BufferType, typename BufferParentType = BufferType>
struct AbstractBuffersPool : public SmallBuffersParams<PoolT>, public NonCopyableClass { struct AbstractBuffersPool : public NonCopyableClass {
// The prototype of a function allocating the `mainStorage` is not specified. // The prototype of a function allocating the `mainStorage` is not specified.
// That would be an unnecessary limitation here - it is completely up to derived class implementation. // That would be an unnecessary limitation here - it is completely up to derived class implementation.
// Perhaps the allocating function needs to leverage `HeapAllocator::allocate()` and also // Perhaps the allocating function needs to leverage `HeapAllocator::allocate()` and also
// a BufferType-dependent function reserving chunks within `mainStorage`. // a BufferType-dependent function reserving chunks within `mainStorage`.
// Example: see `NEO::Context::BufferPool::allocate()` // Example: see `NEO::Context::BufferPool::allocate()`
using Params = SmallBuffersParams<PoolT>;
using Params::aggregatedSmallBuffersPoolSize;
using Params::chunkAlignment;
using Params::smallBufferThreshold;
using Params::startingOffset;
using AllocsVecCRef = const StackVec<NEO::GraphicsAllocation *, 1> &; using AllocsVecCRef = const StackVec<NEO::GraphicsAllocation *, 1> &;
using OnChunkFreeCallback = void (PoolT::*)(uint64_t offset, size_t size); using OnChunkFreeCallback = void (PoolT::*)(uint64_t offset, size_t size);
AbstractBuffersPool(MemoryManager *memoryManager, OnChunkFreeCallback onChunkFreeCallback); AbstractBuffersPool(MemoryManager *memoryManager, OnChunkFreeCallback onChunkFreeCallback);
AbstractBuffersPool(MemoryManager *memoryManager, OnChunkFreeCallback onChunkFreeCallback, const SmallBuffersParams &params);
AbstractBuffersPool(AbstractBuffersPool<PoolT, BufferType, BufferParentType> &&bufferPool); AbstractBuffersPool(AbstractBuffersPool<PoolT, BufferType, BufferParentType> &&bufferPool);
AbstractBuffersPool &operator=(AbstractBuffersPool &&) = delete; AbstractBuffersPool &operator=(AbstractBuffersPool &&) = delete;
virtual ~AbstractBuffersPool() = default; virtual ~AbstractBuffersPool() = default;
@ -66,28 +81,32 @@ struct AbstractBuffersPool : public SmallBuffersParams<PoolT>, public NonCopyabl
std::unique_ptr<HeapAllocator> chunkAllocator; std::unique_ptr<HeapAllocator> chunkAllocator;
std::vector<std::pair<uint64_t, size_t>> chunksToFree; std::vector<std::pair<uint64_t, size_t>> chunksToFree;
OnChunkFreeCallback onChunkFreeCallback = nullptr; OnChunkFreeCallback onChunkFreeCallback = nullptr;
SmallBuffersParams params;
}; };
template <typename BuffersPoolType, typename BufferType, typename BufferParentType = BufferType> template <typename BuffersPoolType, typename BufferType, typename BufferParentType = BufferType>
class AbstractBuffersAllocator : public SmallBuffersParams<BuffersPoolType> { class AbstractBuffersAllocator {
// The prototype of a function allocating buffers from the pool is not specified (see similar comment in `AbstractBufersPool`). // The prototype of a function allocating buffers from the pool is not specified (see similar comment in `AbstractBufersPool`).
// By common sense, in order to allocate buffers from the pool the function should leverage a call provided by `BuffersPoolType`. // By common sense, in order to allocate buffers from the pool the function should leverage a call provided by `BuffersPoolType`.
// Example: see `NEO::Context::BufferPoolAllocator::allocateBufferFromPool()`. // Example: see `NEO::Context::BufferPoolAllocator::allocateBufferFromPool()`.
public: public:
using Params = SmallBuffersParams<BuffersPoolType>; AbstractBuffersAllocator(const SmallBuffersParams &params);
using Params::aggregatedSmallBuffersPoolSize; AbstractBuffersAllocator();
using Params::chunkAlignment;
using Params::smallBufferThreshold;
using Params::startingOffset;
static_assert(aggregatedSmallBuffersPoolSize > smallBufferThreshold, "Largest allowed buffer needs to fit in pool");
void releasePools() { this->bufferPools.clear(); } void releasePools() { this->bufferPools.clear(); }
bool isPoolBuffer(const BufferParentType *buffer) const; bool isPoolBuffer(const BufferParentType *buffer) const;
void tryFreeFromPoolBuffer(BufferParentType *possiblePoolBuffer, size_t offset, size_t size); void tryFreeFromPoolBuffer(BufferParentType *possiblePoolBuffer, size_t offset, size_t size);
uint32_t getPoolsCount() { return static_cast<uint32_t>(this->bufferPools.size()); } uint32_t getPoolsCount() { return static_cast<uint32_t>(this->bufferPools.size()); }
void setParams(const SmallBuffersParams &newParams) {
params = newParams;
}
SmallBuffersParams getParams() const {
return params;
};
protected: protected:
inline bool isSizeWithinThreshold(size_t size) const { return smallBufferThreshold >= size; } inline bool isSizeWithinThreshold(size_t size) const { return params.smallBufferThreshold >= size; }
void tryFreeFromPoolBuffer(BufferParentType *possiblePoolBuffer, size_t offset, size_t size, std::vector<BuffersPoolType> &bufferPoolsVec); void tryFreeFromPoolBuffer(BufferParentType *possiblePoolBuffer, size_t offset, size_t size, std::vector<BuffersPoolType> &bufferPoolsVec);
void drain(); void drain();
void drain(std::vector<BuffersPoolType> &bufferPoolsVec); void drain(std::vector<BuffersPoolType> &bufferPoolsVec);
@ -96,5 +115,6 @@ class AbstractBuffersAllocator : public SmallBuffersParams<BuffersPoolType> {
std::mutex mutex; std::mutex mutex;
std::vector<BuffersPoolType> bufferPools; std::vector<BuffersPoolType> bufferPools;
SmallBuffersParams params;
}; };
} // namespace NEO } // namespace NEO

View File

@ -1,11 +1,12 @@
/* /*
* Copyright (C) 2023 Intel Corporation * Copyright (C) 2023-2025 Intel Corporation
* *
* SPDX-License-Identifier: MIT * SPDX-License-Identifier: MIT
* *
*/ */
#include "shared/source/memory_manager/memory_manager.h" #include "shared/source/memory_manager/memory_manager.h"
#include "shared/source/os_interface/product_helper.h"
#include "shared/source/utilities/buffer_pool_allocator.h" #include "shared/source/utilities/buffer_pool_allocator.h"
#include "shared/source/utilities/heap_allocator.h" #include "shared/source/utilities/heap_allocator.h"
@ -13,9 +14,17 @@
namespace NEO { namespace NEO {
inline SmallBuffersParams SmallBuffersParams::getPreferredBufferPoolParams(const ProductHelper &productHelper) {
return productHelper.is2MBLocalMemAlignmentEnabled() ? SmallBuffersParams::getLargePagesParams() : SmallBuffersParams::getDefaultParams();
}
template <typename PoolT, typename BufferType, typename BufferParentType> template <typename PoolT, typename BufferType, typename BufferParentType>
AbstractBuffersPool<PoolT, BufferType, BufferParentType>::AbstractBuffersPool(MemoryManager *memoryManager, OnChunkFreeCallback onChunkFreeCb) AbstractBuffersPool<PoolT, BufferType, BufferParentType>::AbstractBuffersPool(MemoryManager *memoryManager, OnChunkFreeCallback onChunkFreeCb)
: memoryManager{memoryManager}, onChunkFreeCallback{onChunkFreeCb} { : AbstractBuffersPool<PoolT, BufferType, BufferParentType>::AbstractBuffersPool(memoryManager, onChunkFreeCb, SmallBuffersParams::getDefaultParams()) {}
template <typename PoolT, typename BufferType, typename BufferParentType>
AbstractBuffersPool<PoolT, BufferType, BufferParentType>::AbstractBuffersPool(MemoryManager *memoryManager, OnChunkFreeCallback onChunkFreeCb, const SmallBuffersParams &params)
: memoryManager{memoryManager}, onChunkFreeCallback{onChunkFreeCb}, params{params} {
static_assert(std::is_base_of_v<BufferParentType, BufferType>); static_assert(std::is_base_of_v<BufferParentType, BufferType>);
} }
@ -24,7 +33,8 @@ AbstractBuffersPool<PoolT, BufferType, BufferParentType>::AbstractBuffersPool(Ab
: memoryManager{bufferPool.memoryManager}, : memoryManager{bufferPool.memoryManager},
mainStorage{std::move(bufferPool.mainStorage)}, mainStorage{std::move(bufferPool.mainStorage)},
chunkAllocator{std::move(bufferPool.chunkAllocator)}, chunkAllocator{std::move(bufferPool.chunkAllocator)},
onChunkFreeCallback{bufferPool.onChunkFreeCallback} {} onChunkFreeCallback{bufferPool.onChunkFreeCallback},
params{bufferPool.params} {}
template <typename PoolT, typename BufferType, typename BufferParentType> template <typename PoolT, typename BufferType, typename BufferParentType>
void AbstractBuffersPool<PoolT, BufferType, BufferParentType>::tryFreeFromPoolBuffer(BufferParentType *possiblePoolBuffer, size_t offset, size_t size) { void AbstractBuffersPool<PoolT, BufferType, BufferParentType>::tryFreeFromPoolBuffer(BufferParentType *possiblePoolBuffer, size_t offset, size_t size) {
@ -49,7 +59,7 @@ void AbstractBuffersPool<PoolT, BufferType, BufferParentType>::drain() {
} }
} }
for (auto &chunk : this->chunksToFree) { for (auto &chunk : this->chunksToFree) {
this->chunkAllocator->free(chunk.first + startingOffset, chunk.second); this->chunkAllocator->free(chunk.first + params.startingOffset, chunk.second);
if (static_cast<PoolT *>(this)->onChunkFreeCallback) { if (static_cast<PoolT *>(this)->onChunkFreeCallback) {
(static_cast<PoolT *>(this)->*onChunkFreeCallback)(chunk.first, chunk.second); (static_cast<PoolT *>(this)->*onChunkFreeCallback)(chunk.first, chunk.second);
} }
@ -57,6 +67,16 @@ void AbstractBuffersPool<PoolT, BufferType, BufferParentType>::drain() {
this->chunksToFree.clear(); this->chunksToFree.clear();
} }
template <typename BuffersPoolType, typename BufferType, typename BufferParentType>
AbstractBuffersAllocator<BuffersPoolType, BufferType, BufferParentType>::AbstractBuffersAllocator(const SmallBuffersParams &params)
: params{params} {
DEBUG_BREAK_IF(params.aggregatedSmallBuffersPoolSize < params.smallBufferThreshold);
}
template <typename BuffersPoolType, typename BufferType, typename BufferParentType>
AbstractBuffersAllocator<BuffersPoolType, BufferType, BufferParentType>::AbstractBuffersAllocator()
: AbstractBuffersAllocator(SmallBuffersParams::getDefaultParams()) {}
template <typename BuffersPoolType, typename BufferType, typename BufferParentType> template <typename BuffersPoolType, typename BufferType, typename BufferParentType>
bool AbstractBuffersAllocator<BuffersPoolType, BufferType, BufferParentType>::isPoolBuffer(const BufferParentType *buffer) const { bool AbstractBuffersAllocator<BuffersPoolType, BufferType, BufferParentType>::isPoolBuffer(const BufferParentType *buffer) const {
static_assert(std::is_base_of_v<BufferParentType, BufferType>); static_assert(std::is_base_of_v<BufferParentType, BufferType>);

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (C) 2024 Intel Corporation * Copyright (C) 2024-2025 Intel Corporation
* *
* SPDX-License-Identifier: MIT * SPDX-License-Identifier: MIT
* *
@ -16,7 +16,7 @@ namespace NEO {
ISAPool::ISAPool(Device *device, bool isBuiltin, size_t storageSize) ISAPool::ISAPool(Device *device, bool isBuiltin, size_t storageSize)
: BaseType(device->getMemoryManager(), nullptr), device(device), isBuiltin(isBuiltin) { : BaseType(device->getMemoryManager(), nullptr), device(device), isBuiltin(isBuiltin) {
this->chunkAllocator.reset(new NEO::HeapAllocator(startingOffset, storageSize, MemoryConstants::pageSize, 0u)); this->chunkAllocator.reset(new NEO::HeapAllocator(params.startingOffset, storageSize, MemoryConstants::pageSize, 0u));
auto allocationType = isBuiltin ? NEO::AllocationType::kernelIsaInternal : NEO::AllocationType::kernelIsa; auto allocationType = isBuiltin ? NEO::AllocationType::kernelIsaInternal : NEO::AllocationType::kernelIsa;
auto graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), auto graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(),
@ -47,7 +47,7 @@ SharedIsaAllocation *ISAPool::allocateISA(size_t requestedSize) const {
if (offset == 0) { if (offset == 0) {
return nullptr; return nullptr;
} }
return new SharedIsaAllocation{this->mainStorage.get(), offset - startingOffset, requestedSize, mtx.get()}; return new SharedIsaAllocation{this->mainStorage.get(), offset - params.startingOffset, requestedSize, mtx.get()};
} }
const StackVec<NEO::GraphicsAllocation *, 1> &ISAPool::getAllocationsVector() { const StackVec<NEO::GraphicsAllocation *, 1> &ISAPool::getAllocationsVector() {

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (C) 2023-2024 Intel Corporation * Copyright (C) 2023-2025 Intel Corporation
* *
* SPDX-License-Identifier: MIT * SPDX-License-Identifier: MIT
* *
@ -10,6 +10,7 @@
#include "shared/source/utilities/stackvec.h" #include "shared/source/utilities/stackvec.h"
#include "shared/test/common/mocks/mock_execution_environment.h" #include "shared/test/common/mocks/mock_execution_environment.h"
#include "shared/test/common/mocks/mock_memory_manager.h" #include "shared/test/common/mocks/mock_memory_manager.h"
#include "shared/test/common/mocks/mock_product_helper.h"
#include "gtest/gtest.h" #include "gtest/gtest.h"
@ -24,20 +25,19 @@ struct DummyBuffer {
int val; int val;
}; };
template <>
struct NEO::SmallBuffersParams<DummyBufferPool> {
static constexpr auto aggregatedSmallBuffersPoolSize = 32 * MemoryConstants::kiloByte;
static constexpr auto smallBufferThreshold = 2 * MemoryConstants::kiloByte;
static constexpr auto chunkAlignment = 1024u;
static constexpr auto startingOffset = chunkAlignment;
};
struct DummyBuffersPool : public NEO::AbstractBuffersPool<DummyBuffersPool, DummyBuffer> { struct DummyBuffersPool : public NEO::AbstractBuffersPool<DummyBuffersPool, DummyBuffer> {
using BaseType = NEO::AbstractBuffersPool<DummyBuffersPool, DummyBuffer>; using BaseType = NEO::AbstractBuffersPool<DummyBuffersPool, DummyBuffer>;
static constexpr auto dummyPtr = 0xdeadbeef0000; static constexpr auto dummyPtr = 0xdeadbeef0000;
static constexpr NEO::SmallBuffersParams defaultParams{
32 * MemoryConstants::kiloByte, // aggregatedSmallBuffersPoolSize
2 * MemoryConstants::kiloByte, // smallBufferThreshold
1024u, // chunkAlignment
1024u // startingOffset
};
DummyBuffersPool(NEO::MemoryManager *memoryManager, uint32_t poolOffset, BaseType::OnChunkFreeCallback onChunkFreeCallback) DummyBuffersPool(NEO::MemoryManager *memoryManager, uint32_t poolOffset, BaseType::OnChunkFreeCallback onChunkFreeCallback)
: BaseType{memoryManager, onChunkFreeCallback} { : BaseType{memoryManager, onChunkFreeCallback, defaultParams} {
dummyAllocations.resize(2); dummyAllocations.resize(2);
dummyAllocations[0] = reinterpret_cast<NEO::GraphicsAllocation *>(poolOffset + dummyPtr); dummyAllocations[0] = reinterpret_cast<NEO::GraphicsAllocation *>(poolOffset + dummyPtr);
dummyAllocations[1] = nullptr; // makes sure nullptrs don't cause SEGFAULTs dummyAllocations[1] = nullptr; // makes sure nullptrs don't cause SEGFAULTs
@ -64,6 +64,9 @@ struct DummyBuffersAllocator : public NEO::AbstractBuffersAllocator<DummyBuffers
using BaseType::bufferPools; using BaseType::bufferPools;
using BaseType::isSizeWithinThreshold; using BaseType::isSizeWithinThreshold;
DummyBuffersAllocator() : BaseType() {}
DummyBuffersAllocator(const NEO::SmallBuffersParams &params) : BaseType(params) {}
void drainUnderLock() { void drainUnderLock() {
auto lock = std::unique_lock<std::mutex>(this->mutex); auto lock = std::unique_lock<std::mutex>(this->mutex);
this->BaseType::drain(); this->BaseType::drain();
@ -86,10 +89,10 @@ struct AbstractSmallBuffersTest : public ::testing::Test {
TEST_F(AbstractSmallBuffersTest, givenBuffersPoolWhenCreatedAndMovedThenCtorsWorkCorrectly) { TEST_F(AbstractSmallBuffersTest, givenBuffersPoolWhenCreatedAndMovedThenCtorsWorkCorrectly) {
auto pool1 = DummyBuffersPool{this->memoryManager.get()}; auto pool1 = DummyBuffersPool{this->memoryManager.get()};
pool1.mainStorage.reset(new DummyBuffer(testVal)); pool1.mainStorage.reset(new DummyBuffer(testVal));
pool1.chunkAllocator.reset(new NEO::HeapAllocator{DummyBuffersPool::startingOffset, pool1.chunkAllocator.reset(new NEO::HeapAllocator{DummyBuffersPool::defaultParams.startingOffset,
DummyBuffersPool::aggregatedSmallBuffersPoolSize, DummyBuffersPool::defaultParams.aggregatedSmallBuffersPoolSize,
DummyBuffersPool::chunkAlignment, DummyBuffersPool::defaultParams.chunkAlignment,
DummyBuffersPool::smallBufferThreshold}); DummyBuffersPool::defaultParams.smallBufferThreshold});
EXPECT_EQ(pool1.memoryManager, this->memoryManager.get()); EXPECT_EQ(pool1.memoryManager, this->memoryManager.get());
@ -98,7 +101,7 @@ TEST_F(AbstractSmallBuffersTest, givenBuffersPoolWhenCreatedAndMovedThenCtorsWor
EXPECT_EQ(pool2.mainStorage->val, testVal); EXPECT_EQ(pool2.mainStorage->val, testVal);
EXPECT_EQ(static_cast<DummyBuffersPool::BaseType &>(pool2).getAllocationsVector()[0], reinterpret_cast<NEO::GraphicsAllocation *>(DummyBuffersPool::dummyPtr)); EXPECT_EQ(static_cast<DummyBuffersPool::BaseType &>(pool2).getAllocationsVector()[0], reinterpret_cast<NEO::GraphicsAllocation *>(DummyBuffersPool::dummyPtr));
EXPECT_EQ(pool2.chunkAllocator->getUsedSize(), 0ul); EXPECT_EQ(pool2.chunkAllocator->getUsedSize(), 0ul);
EXPECT_EQ(pool2.chunkAllocator->getLeftSize(), DummyBuffersPool::aggregatedSmallBuffersPoolSize); EXPECT_EQ(pool2.chunkAllocator->getLeftSize(), DummyBuffersPool::defaultParams.aggregatedSmallBuffersPoolSize);
} }
TEST_F(AbstractSmallBuffersTest, givenBuffersAllocatorWhenPoolWithoutMainStorageAddedThenItIsIgnored) { TEST_F(AbstractSmallBuffersTest, givenBuffersAllocatorWhenPoolWithoutMainStorageAddedThenItIsIgnored) {
@ -114,11 +117,11 @@ TEST_F(AbstractSmallBuffersTest, givenBuffersAllocatorWhenPoolWithoutMainStorage
TEST_F(AbstractSmallBuffersTest, givenBuffersAllocatorWhenNullptrTriedToBeFreedThenItIsNotConsideredValidBuffer) { TEST_F(AbstractSmallBuffersTest, givenBuffersAllocatorWhenNullptrTriedToBeFreedThenItIsNotConsideredValidBuffer) {
auto pool = DummyBuffersPool{this->memoryManager.get()}; auto pool = DummyBuffersPool{this->memoryManager.get()};
pool.mainStorage.reset(new DummyBuffer(testVal)); pool.mainStorage.reset(new DummyBuffer(testVal));
auto buffersAllocator = DummyBuffersAllocator{}; DummyBuffersAllocator buffersAllocator{pool.params};
buffersAllocator.addNewBufferPool(std::move(pool)); buffersAllocator.addNewBufferPool(std::move(pool));
EXPECT_TRUE(buffersAllocator.isSizeWithinThreshold(DummyBuffersPool::smallBufferThreshold)); EXPECT_TRUE(buffersAllocator.isSizeWithinThreshold(pool.params.smallBufferThreshold));
EXPECT_FALSE(buffersAllocator.isSizeWithinThreshold(DummyBuffersPool::smallBufferThreshold + 1)); EXPECT_FALSE(buffersAllocator.isSizeWithinThreshold(pool.params.smallBufferThreshold + 1));
auto &chunksToFree = buffersAllocator.bufferPools[0].chunksToFree; auto &chunksToFree = buffersAllocator.bufferPools[0].chunksToFree;
EXPECT_EQ(chunksToFree.size(), 0u); EXPECT_EQ(chunksToFree.size(), 0u);
@ -175,8 +178,8 @@ TEST_F(AbstractSmallBuffersTest, givenBuffersAllocatorWhenChunkOfMainStorageTrie
auto &chunksToFree2 = buffersAllocator.bufferPools[1].chunksToFree; auto &chunksToFree2 = buffersAllocator.bufferPools[1].chunksToFree;
EXPECT_EQ(chunksToFree1.size(), 0u); EXPECT_EQ(chunksToFree1.size(), 0u);
EXPECT_EQ(chunksToFree2.size(), 0u); EXPECT_EQ(chunksToFree2.size(), 0u);
auto chunkSize = DummyBuffersPool::chunkAlignment * 4; auto chunkSize = DummyBuffersPool::defaultParams.chunkAlignment * 4;
auto chunkOffset = DummyBuffersPool::chunkAlignment; auto chunkOffset = DummyBuffersPool::defaultParams.chunkAlignment;
buffersAllocator.tryFreeFromPoolBuffer(poolStorage2, chunkOffset, chunkSize); buffersAllocator.tryFreeFromPoolBuffer(poolStorage2, chunkOffset, chunkSize);
EXPECT_EQ(chunksToFree1.size(), 0u); EXPECT_EQ(chunksToFree1.size(), 0u);
EXPECT_EQ(chunksToFree2.size(), 1u); EXPECT_EQ(chunksToFree2.size(), 1u);
@ -197,21 +200,21 @@ TEST_F(AbstractSmallBuffersTest, givenBuffersAllocatorWhenDrainingPoolsThenOnlyA
pool2.mainStorage.reset(new DummyBuffer(testVal + 2)); pool2.mainStorage.reset(new DummyBuffer(testVal + 2));
auto buffer1 = pool1.mainStorage.get(); auto buffer1 = pool1.mainStorage.get();
auto buffer2 = pool2.mainStorage.get(); auto buffer2 = pool2.mainStorage.get();
pool1.chunkAllocator.reset(new NEO::HeapAllocator{DummyBuffersPool::startingOffset, pool1.chunkAllocator.reset(new NEO::HeapAllocator{DummyBuffersPool::defaultParams.startingOffset,
DummyBuffersPool::aggregatedSmallBuffersPoolSize, DummyBuffersPool::defaultParams.aggregatedSmallBuffersPoolSize,
DummyBuffersPool::chunkAlignment, DummyBuffersPool::defaultParams.chunkAlignment,
DummyBuffersPool::smallBufferThreshold}); DummyBuffersPool::defaultParams.smallBufferThreshold});
pool2.chunkAllocator.reset(new NEO::HeapAllocator{DummyBuffersPool::startingOffset, pool2.chunkAllocator.reset(new NEO::HeapAllocator{DummyBuffersPool::defaultParams.startingOffset,
DummyBuffersPool::aggregatedSmallBuffersPoolSize, DummyBuffersPool::defaultParams.aggregatedSmallBuffersPoolSize,
DummyBuffersPool::chunkAlignment, DummyBuffersPool::defaultParams.chunkAlignment,
DummyBuffersPool::smallBufferThreshold}); DummyBuffersPool::defaultParams.smallBufferThreshold});
auto buffersAllocator = DummyBuffersAllocator{}; auto buffersAllocator = DummyBuffersAllocator{};
buffersAllocator.addNewBufferPool(std::move(pool1)); buffersAllocator.addNewBufferPool(std::move(pool1));
buffersAllocator.addNewBufferPool(std::move(pool2)); buffersAllocator.addNewBufferPool(std::move(pool2));
auto chunkSize = DummyBuffersPool::chunkAlignment * 4; auto chunkSize = DummyBuffersPool::defaultParams.chunkAlignment * 4;
auto chunkOffset = DummyBuffersPool::chunkAlignment; auto chunkOffset = DummyBuffersPool::defaultParams.chunkAlignment;
for (size_t i = 0; i < 3; i++) { for (size_t i = 0; i < 3; i++) {
auto exampleOffset = chunkOffset + i * chunkSize * 2; auto exampleOffset = chunkOffset + i * chunkSize * 2;
buffersAllocator.tryFreeFromPoolBuffer(buffer1, exampleOffset, chunkSize); buffersAllocator.tryFreeFromPoolBuffer(buffer1, exampleOffset, chunkSize);
@ -247,15 +250,15 @@ TEST_F(AbstractSmallBuffersTest, givenBuffersAllocatorWhenDrainingPoolsThenOnChu
auto pool1 = DummyBuffersPool{this->memoryManager.get(), 0x0, nullptr}; auto pool1 = DummyBuffersPool{this->memoryManager.get(), 0x0, nullptr};
pool1.mainStorage.reset(new DummyBuffer(testVal)); pool1.mainStorage.reset(new DummyBuffer(testVal));
auto buffer1 = pool1.mainStorage.get(); auto buffer1 = pool1.mainStorage.get();
pool1.chunkAllocator.reset(new NEO::HeapAllocator{DummyBuffersPool::startingOffset, pool1.chunkAllocator.reset(new NEO::HeapAllocator{DummyBuffersPool::defaultParams.startingOffset,
DummyBuffersPool::aggregatedSmallBuffersPoolSize, DummyBuffersPool::defaultParams.aggregatedSmallBuffersPoolSize,
DummyBuffersPool::chunkAlignment, DummyBuffersPool::defaultParams.chunkAlignment,
DummyBuffersPool::smallBufferThreshold}); DummyBuffersPool::defaultParams.smallBufferThreshold});
auto buffersAllocator = DummyBuffersAllocator{}; auto buffersAllocator = DummyBuffersAllocator{};
buffersAllocator.addNewBufferPool(std::move(pool1)); buffersAllocator.addNewBufferPool(std::move(pool1));
auto chunkSize = DummyBuffersPool::chunkAlignment * 4; auto chunkSize = DummyBuffersPool::defaultParams.chunkAlignment * 4;
auto chunkOffset = DummyBuffersPool::chunkAlignment; auto chunkOffset = DummyBuffersPool::defaultParams.chunkAlignment;
for (size_t i = 0; i < 3; i++) { for (size_t i = 0; i < 3; i++) {
auto exampleOffset = chunkOffset + i * chunkSize * 2; auto exampleOffset = chunkOffset + i * chunkSize * 2;
buffersAllocator.tryFreeFromPoolBuffer(buffer1, exampleOffset, chunkSize); buffersAllocator.tryFreeFromPoolBuffer(buffer1, exampleOffset, chunkSize);
@ -295,17 +298,17 @@ TEST_F(AbstractSmallBuffersTest, givenBuffersAllocatorWhenDrainingPoolThenOffset
auto pool1 = DummyBuffersPool{this->memoryManager.get(), 0x0, nullptr}; auto pool1 = DummyBuffersPool{this->memoryManager.get(), 0x0, nullptr};
pool1.mainStorage.reset(new DummyBuffer(testVal)); pool1.mainStorage.reset(new DummyBuffer(testVal));
auto buffer1 = pool1.mainStorage.get(); auto buffer1 = pool1.mainStorage.get();
pool1.chunkAllocator.reset(new ProxyHeapAllocator{DummyBuffersPool::startingOffset, pool1.chunkAllocator.reset(new ProxyHeapAllocator{DummyBuffersPool::defaultParams.startingOffset,
DummyBuffersPool::aggregatedSmallBuffersPoolSize, DummyBuffersPool::defaultParams.aggregatedSmallBuffersPoolSize,
DummyBuffersPool::chunkAlignment, DummyBuffersPool::defaultParams.chunkAlignment,
DummyBuffersPool::smallBufferThreshold}); DummyBuffersPool::defaultParams.smallBufferThreshold});
auto buffersAllocator = DummyBuffersAllocator{}; auto buffersAllocator = DummyBuffersAllocator{};
buffersAllocator.addNewBufferPool(std::move(pool1)); buffersAllocator.addNewBufferPool(std::move(pool1));
auto chunkSize = DummyBuffersPool::chunkAlignment * 4; auto chunkSize = DummyBuffersPool::defaultParams.chunkAlignment * 4;
auto exampleOffsets = std::array<size_t, 3>{0u, 0u, 0u}; auto exampleOffsets = std::array<size_t, 3>{0u, 0u, 0u};
for (size_t i = 0; i < 3; i++) { for (size_t i = 0; i < 3; i++) {
exampleOffsets[i] = DummyBuffersPool::startingOffset + i * chunkSize * 2; exampleOffsets[i] = DummyBuffersPool::defaultParams.startingOffset + i * chunkSize * 2;
buffersAllocator.tryFreeFromPoolBuffer(buffer1, exampleOffsets[i], chunkSize); buffersAllocator.tryFreeFromPoolBuffer(buffer1, exampleOffsets[i], chunkSize);
} }
@ -317,6 +320,74 @@ TEST_F(AbstractSmallBuffersTest, givenBuffersAllocatorWhenDrainingPoolThenOffset
auto heapAllocator = static_cast<ProxyHeapAllocator *>(buffersAllocator.bufferPools[0].chunkAllocator.get()); auto heapAllocator = static_cast<ProxyHeapAllocator *>(buffersAllocator.bufferPools[0].chunkAllocator.get());
ASSERT_EQ(heapAllocator->registeredOffsets.size(), 3u); ASSERT_EQ(heapAllocator->registeredOffsets.size(), 3u);
for (size_t i = 0; i < 3; i++) { for (size_t i = 0; i < 3; i++) {
EXPECT_EQ(heapAllocator->registeredOffsets[i], exampleOffsets[i] + DummyBuffersPool::startingOffset); EXPECT_EQ(heapAllocator->registeredOffsets[i], exampleOffsets[i] + DummyBuffersPool::defaultParams.startingOffset);
} }
} }
struct SmallBuffersParamsTest : public ::testing::Test {
bool compareSmallBuffersParams(const NEO::SmallBuffersParams &first, const NEO::SmallBuffersParams &second) {
return first.aggregatedSmallBuffersPoolSize == second.aggregatedSmallBuffersPoolSize &&
first.smallBufferThreshold == second.smallBufferThreshold &&
first.chunkAlignment == second.chunkAlignment &&
first.startingOffset == second.startingOffset;
}
};
TEST_F(SmallBuffersParamsTest, WhenGettingDefaultParamsThenReturnCorrectValues) {
auto defaultParams = NEO::SmallBuffersParams::getDefaultParams();
EXPECT_EQ(2 * MemoryConstants::megaByte, defaultParams.aggregatedSmallBuffersPoolSize);
EXPECT_EQ(1 * MemoryConstants::megaByte, defaultParams.smallBufferThreshold);
EXPECT_EQ(MemoryConstants::pageSize64k, defaultParams.chunkAlignment);
EXPECT_EQ(MemoryConstants::pageSize64k, defaultParams.startingOffset);
}
TEST_F(SmallBuffersParamsTest, WhenGettingLargePagesParamsThenReturnCorrectValues) {
auto largePagesParams = NEO::SmallBuffersParams::getLargePagesParams();
EXPECT_EQ(16 * MemoryConstants::megaByte, largePagesParams.aggregatedSmallBuffersPoolSize);
EXPECT_EQ(2 * MemoryConstants::megaByte, largePagesParams.smallBufferThreshold);
EXPECT_EQ(MemoryConstants::pageSize64k, largePagesParams.chunkAlignment);
EXPECT_EQ(MemoryConstants::pageSize64k, largePagesParams.startingOffset);
}
TEST_F(SmallBuffersParamsTest, GivenProductHelperWhenGettingPreferredBufferPoolParamsThenReturnsCorrectValues) {
auto mockProductHelper = std::make_unique<NEO::MockProductHelper>();
{
mockProductHelper->is2MBLocalMemAlignmentEnabledResult = false;
auto preferredParams = NEO::SmallBuffersParams::getPreferredBufferPoolParams(*mockProductHelper);
auto expectedParams = NEO::SmallBuffersParams::getDefaultParams();
EXPECT_TRUE(compareSmallBuffersParams(expectedParams, preferredParams));
}
{
mockProductHelper->is2MBLocalMemAlignmentEnabledResult = true;
auto preferredParams = NEO::SmallBuffersParams::getPreferredBufferPoolParams(*mockProductHelper);
auto expectedParams = NEO::SmallBuffersParams::getLargePagesParams();
EXPECT_TRUE(compareSmallBuffersParams(expectedParams, preferredParams));
}
}
TEST_F(SmallBuffersParamsTest, GivenBuffersAllocatorWhenSettingDifferentParamsThenGetParamsReturnsExpectedValues) {
auto buffersAllocator = DummyBuffersAllocator{};
const NEO::SmallBuffersParams params1{
16 * MemoryConstants::kiloByte, // aggregatedSmallBuffersPoolSize
1 * MemoryConstants::kiloByte, // smallBufferThreshold
1024u, // chunkAlignment
1024u // startingOffset
};
const NEO::SmallBuffersParams params2{
32 * MemoryConstants::megaByte, // aggregatedSmallBuffersPoolSize
2 * MemoryConstants::megaByte, // smallBufferThreshold
MemoryConstants::pageSize64k, // chunkAlignment
MemoryConstants::pageSize64k // startingOffset
};
buffersAllocator.setParams(params1);
EXPECT_TRUE(compareSmallBuffersParams(params1, buffersAllocator.getParams()));
buffersAllocator.setParams(params2);
EXPECT_TRUE(compareSmallBuffersParams(params2, buffersAllocator.getParams()));
}