fix: configure small buffers params based on productHelper

Refactor buffer pool allocator to support configurable
SmallBuffersParams based on product helper capabilities.

This patch enables setting custom pool
parameters instead of using fixed static values.

For devices with 2MB local memory alignment enabled
(is2MBLocalMemAlignmentEnabled),
use larger pool configuration:
- Pool size: 16MB (up from 2MB)
- Threshold: 2MB (up from 1MB)
- Alignment: 64KB (unchanged)
- Starting offset: 64KB (unchanged)

This improves memory utilization for devices supporting larger memory
alignments
while maintaining original parameters for other devices.

Key changes:
- Moved params from static template to instance member
- Added SmallBuffersParams struct with default/large configs
- Added constructor and setter methods for params configuration

Related-To: NEO-12287
Signed-off-by: Fabian Zwoliński <fabian.zwolinski@intel.com>
This commit is contained in:
Fabian Zwoliński 2025-02-06 23:56:09 +00:00 committed by Compute-Runtime-Automation
parent ef961df421
commit 1eb8e0efd9
9 changed files with 298 additions and 116 deletions

View File

@ -17,6 +17,7 @@
#include "shared/source/kernel/kernel_properties.h"
#include "shared/source/memory_manager/memory_manager.h"
#include "shared/source/os_interface/driver_info.h"
#include "shared/source/utilities/buffer_pool_allocator.inl"
#include "opencl/source/cl_device/cl_device.h"
#include "opencl/source/context/context.h"
@ -480,7 +481,8 @@ void ClDevice::initializeMaxPoolCount() {
auto &device = getDevice();
const auto bitfield = device.getDeviceBitfield();
const auto deviceMemory = device.getGlobalMemorySize(static_cast<uint32_t>(bitfield.to_ulong()));
const auto maxPoolCount = Context::BufferPoolAllocator::calculateMaxPoolCount(deviceMemory, 2);
const auto preferredBufferPoolParams = SmallBuffersParams::getPreferredBufferPoolParams(device.getProductHelper());
const auto maxPoolCount = Context::BufferPoolAllocator::calculateMaxPoolCount(preferredBufferPoolParams, deviceMemory, 2);
device.updateMaxPoolCount(maxPoolCount);
}

View File

@ -310,6 +310,8 @@ bool Context::createImpl(const cl_context_properties *properties,
auto requiresWritableStaging = device->getDefaultEngine().commandStreamReceiver->getType() != CommandStreamReceiverType::hardware;
this->stagingBufferManager = std::make_unique<StagingBufferManager>(svmAllocsManager, rootDeviceIndices, deviceBitfields, requiresWritableStaging);
}
smallBufferPoolAllocator.setParams(SmallBuffersParams::getPreferredBufferPoolParams(device->getProductHelper()));
}
return true;
@ -584,7 +586,9 @@ bool Context::BufferPoolAllocator::isAggregatedSmallBuffersEnabled(Context *cont
(isSupportedForSingleDeviceContexts && context->isSingleDeviceContext());
}
Context::BufferPool::BufferPool(Context *context) : BaseType(context->memoryManager, nullptr) {
Context::BufferPool::BufferPool(Context *context) : BaseType(context->memoryManager,
nullptr,
SmallBuffersParams::getPreferredBufferPoolParams(context->getDevice(0)->getDevice().getProductHelper())) {
static constexpr cl_mem_flags flags = CL_MEM_UNCOMPRESSED_HINT_INTEL;
[[maybe_unused]] cl_int errcodeRet{};
Buffer::AdditionalBufferCreateArgs bufferCreateArgs{};
@ -592,14 +596,14 @@ Context::BufferPool::BufferPool(Context *context) : BaseType(context->memoryMana
bufferCreateArgs.makeAllocationLockable = true;
this->mainStorage.reset(Buffer::create(context,
flags,
BufferPoolAllocator::aggregatedSmallBuffersPoolSize,
context->getBufferPoolAllocator().getParams().aggregatedSmallBuffersPoolSize,
nullptr,
bufferCreateArgs,
errcodeRet));
if (this->mainStorage) {
this->chunkAllocator.reset(new HeapAllocator(BufferPool::startingOffset,
BufferPoolAllocator::aggregatedSmallBuffersPoolSize,
BufferPoolAllocator::chunkAlignment));
this->chunkAllocator.reset(new HeapAllocator(params.startingOffset,
context->getBufferPoolAllocator().getParams().aggregatedSmallBuffersPoolSize,
context->getBufferPoolAllocator().getParams().chunkAlignment));
context->decRefInternal();
}
}
@ -620,7 +624,7 @@ Buffer *Context::BufferPool::allocate(const MemoryProperties &memoryProperties,
if (bufferRegion.origin == 0) {
return nullptr;
}
bufferRegion.origin -= BufferPool::startingOffset;
bufferRegion.origin -= params.startingOffset;
bufferRegion.size = requestedSize;
auto bufferFromPool = this->mainStorage->createSubBuffer(flags, flagsIntel, &bufferRegion, errcodeRet);
bufferFromPool->createFunction = this->mainStorage->createFunction;

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2024 Intel Corporation
* Copyright (C) 2018-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -37,6 +37,7 @@ class Kernel;
class MemoryManager;
class SharingFunctions;
class SVMAllocsManager;
class ProductHelper;
class Program;
class Platform;
class TagAllocatorBase;
@ -69,7 +70,11 @@ class Context : public BaseObject<_cl_context> {
};
class BufferPoolAllocator : public AbstractBuffersAllocator<BufferPool, Buffer, MemObj> {
using BaseType = AbstractBuffersAllocator<BufferPool, Buffer, MemObj>;
public:
BufferPoolAllocator() = default;
bool isAggregatedSmallBuffersEnabled(Context *context) const;
void initAggregatedSmallBuffers(Context *context);
Buffer *allocateBufferFromPool(const MemoryProperties &memoryProperties,
@ -79,8 +84,8 @@ class Context : public BaseObject<_cl_context> {
void *hostPtr,
cl_int &errcodeRet);
bool flagsAllowBufferFromPool(const cl_mem_flags &flags, const cl_mem_flags_intel &flagsIntel) const;
static inline uint32_t calculateMaxPoolCount(uint64_t totalMemory, size_t percentOfMemory) {
const auto maxPoolCount = static_cast<uint32_t>(totalMemory * (percentOfMemory / 100.0) / BufferPoolAllocator::aggregatedSmallBuffersPoolSize);
static inline uint32_t calculateMaxPoolCount(SmallBuffersParams smallBuffersParams, uint64_t totalMemory, size_t percentOfMemory) {
const auto maxPoolCount = static_cast<uint32_t>(totalMemory * (percentOfMemory / 100.0) / (smallBuffersParams.aggregatedSmallBuffersPoolSize));
return maxPoolCount ? maxPoolCount : 1u;
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2022-2024 Intel Corporation
* Copyright (C) 2022-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -13,6 +13,7 @@
#include "shared/test/common/helpers/raii_product_helper.h"
#include "shared/test/common/mocks/mock_ail_configuration.h"
#include "shared/test/common/mocks/mock_memory_manager.h"
#include "shared/test/common/mocks/mock_product_helper.h"
#include "shared/test/common/test_macros/hw_test.h"
#include "shared/test/common/test_macros/test.h"
@ -46,7 +47,7 @@ class AggregatedSmallBuffersTestTemplate : public ::testing::Test {
MockMemoryManager *mockMemoryManager;
cl_mem_flags flags{};
size_t size = PoolAllocator::smallBufferThreshold;
size_t size{0u};
void *hostPtr = nullptr;
cl_int retVal = CL_SUCCESS;
static const auto rootDeviceIndex = 1u;
@ -63,7 +64,7 @@ class AggregatedSmallBuffersTestTemplate : public ::testing::Test {
this->mockNeoDevice = static_cast<MockDevice *>(&this->device->getDevice());
const auto bitfield = mockNeoDevice->getDeviceBitfield();
const auto deviceMemory = mockNeoDevice->getGlobalMemorySize(static_cast<uint32_t>(bitfield.to_ulong()));
const auto expectedMaxPoolCount = Context::BufferPoolAllocator::calculateMaxPoolCount(deviceMemory, 2);
const auto expectedMaxPoolCount = Context::BufferPoolAllocator::calculateMaxPoolCount(SmallBuffersParams::getPreferredBufferPoolParams(this->device->getProductHelper()), deviceMemory, 2);
EXPECT_EQ(expectedMaxPoolCount, mockNeoDevice->maxBufferPoolCount);
this->mockMemoryManager = static_cast<MockMemoryManager *>(device->getMemoryManager());
this->mockMemoryManager->localMemorySupported[rootDeviceIndex] = true;
@ -74,8 +75,9 @@ class AggregatedSmallBuffersTestTemplate : public ::testing::Test {
this->context->initializeUsmAllocationPools();
EXPECT_EQ(retVal, CL_SUCCESS);
this->setAllocationToFail(false);
this->poolAllocator = static_cast<MockBufferPoolAllocator *>(&context->smallBufferPoolAllocator);
this->poolAllocator = static_cast<MockBufferPoolAllocator *>(&context->getBufferPoolAllocator());
this->mockNeoDevice->updateMaxPoolCount(1u);
size = this->poolAllocator->params.smallBufferThreshold;
}
};
@ -181,10 +183,17 @@ TEST_F(AggregatedSmallBuffersDisabledTest, givenAggregatedSmallBuffersDisabledWh
using AggregatedSmallBuffersEnabledTest = AggregatedSmallBuffersTestTemplate<1>;
TEST_F(AggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledWhenCalculateMaxPoolCountCalledThenCorrectValueIsReturned) {
EXPECT_EQ(81u, MockBufferPoolAllocator::calculateMaxPoolCount(8 * MemoryConstants::gigaByte, 2));
EXPECT_EQ(204u, MockBufferPoolAllocator::calculateMaxPoolCount(8 * MemoryConstants::gigaByte, 5));
EXPECT_EQ(1u, MockBufferPoolAllocator::calculateMaxPoolCount(128 * MemoryConstants::megaByte, 2));
EXPECT_EQ(1u, MockBufferPoolAllocator::calculateMaxPoolCount(64 * MemoryConstants::megaByte, 2));
if (device->getProductHelper().is2MBLocalMemAlignmentEnabled()) {
EXPECT_EQ(10u, MockBufferPoolAllocator::calculateMaxPoolCount(this->poolAllocator->getParams(), 8 * MemoryConstants::gigaByte, 2));
EXPECT_EQ(25u, MockBufferPoolAllocator::calculateMaxPoolCount(this->poolAllocator->getParams(), 8 * MemoryConstants::gigaByte, 5));
EXPECT_EQ(1u, MockBufferPoolAllocator::calculateMaxPoolCount(this->poolAllocator->getParams(), 128 * MemoryConstants::megaByte, 2));
EXPECT_EQ(1u, MockBufferPoolAllocator::calculateMaxPoolCount(this->poolAllocator->getParams(), 64 * MemoryConstants::megaByte, 2));
} else {
EXPECT_EQ(81u, MockBufferPoolAllocator::calculateMaxPoolCount(this->poolAllocator->getParams(), 8 * MemoryConstants::gigaByte, 2));
EXPECT_EQ(204u, MockBufferPoolAllocator::calculateMaxPoolCount(this->poolAllocator->getParams(), 8 * MemoryConstants::gigaByte, 5));
EXPECT_EQ(1u, MockBufferPoolAllocator::calculateMaxPoolCount(this->poolAllocator->getParams(), 128 * MemoryConstants::megaByte, 2));
EXPECT_EQ(1u, MockBufferPoolAllocator::calculateMaxPoolCount(this->poolAllocator->getParams(), 64 * MemoryConstants::megaByte, 2));
}
}
TEST_F(AggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledWhenAllocatingMainStorageThenMakeDeviceBufferLockableAndNotCompressed) {
@ -200,7 +209,7 @@ TEST_F(AggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledAndS
EXPECT_TRUE(poolAllocator->isAggregatedSmallBuffersEnabled(context.get()));
EXPECT_EQ(1u, poolAllocator->bufferPools.size());
EXPECT_NE(nullptr, poolAllocator->bufferPools[0].mainStorage.get());
size = PoolAllocator::smallBufferThreshold + 1;
size = poolAllocator->params.smallBufferThreshold + 1;
std::unique_ptr<Buffer> buffer(Buffer::create(context.get(), flags, size, hostPtr, retVal));
EXPECT_NE(nullptr, buffer);
EXPECT_EQ(CL_SUCCESS, retVal);
@ -211,7 +220,7 @@ TEST_F(AggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledAndF
EXPECT_TRUE(poolAllocator->isAggregatedSmallBuffersEnabled(context.get()));
EXPECT_EQ(1u, poolAllocator->bufferPools.size());
EXPECT_NE(nullptr, poolAllocator->bufferPools[0].mainStorage.get());
size = PoolAllocator::smallBufferThreshold;
size = poolAllocator->params.smallBufferThreshold;
flags |= CL_MEM_COMPRESSED_HINT_INTEL;
std::unique_ptr<Buffer> buffer(Buffer::create(context.get(), flags, size, hostPtr, retVal));
EXPECT_NE(nullptr, buffer);
@ -224,17 +233,17 @@ TEST_F(AggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledAndS
EXPECT_EQ(1u, poolAllocator->bufferPools.size());
EXPECT_NE(nullptr, poolAllocator->bufferPools[0].mainStorage.get());
EXPECT_EQ(0u, poolAllocator->bufferPools[0].chunkAllocator->getUsedSize());
size = PoolAllocator::chunkAlignment / 2;
size = poolAllocator->params.chunkAlignment / 2;
std::unique_ptr<Buffer> buffer(Buffer::create(context.get(), flags, size, hostPtr, retVal));
EXPECT_NE(buffer, nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(size, buffer->getSize());
EXPECT_EQ(PoolAllocator::chunkAlignment, poolAllocator->bufferPools[0].chunkAllocator->getUsedSize());
EXPECT_EQ(poolAllocator->params.chunkAlignment, poolAllocator->bufferPools[0].chunkAllocator->getUsedSize());
auto mockBuffer = static_cast<MockBuffer *>(buffer.get());
EXPECT_EQ(PoolAllocator::chunkAlignment, mockBuffer->sizeInPoolAllocator);
EXPECT_EQ(poolAllocator->params.chunkAlignment, mockBuffer->sizeInPoolAllocator);
buffer.reset(nullptr);
EXPECT_EQ(PoolAllocator::chunkAlignment, poolAllocator->bufferPools[0].chunkAllocator->getUsedSize());
EXPECT_EQ(poolAllocator->params.chunkAlignment, poolAllocator->bufferPools[0].chunkAllocator->getUsedSize());
}
TEST_F(AggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledAndSizeEqualToThresholdWhenBufferCreateCalledThenUsePool) {
@ -250,7 +259,7 @@ TEST_F(AggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledAndS
auto mockBuffer = static_cast<MockBuffer *>(buffer.get());
EXPECT_GE(mockBuffer->getSize(), size);
EXPECT_GE(mockBuffer->getOffset(), 0u);
EXPECT_LE(mockBuffer->getOffset(), PoolAllocator::aggregatedSmallBuffersPoolSize - size);
EXPECT_LE(mockBuffer->getOffset(), poolAllocator->params.aggregatedSmallBuffersPoolSize - size);
EXPECT_TRUE(mockBuffer->isSubBuffer());
EXPECT_EQ(mockBuffer->associatedMemObject, poolAllocator->bufferPools[0].mainStorage.get());
@ -285,7 +294,7 @@ TEST_F(AggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledAndB
EXPECT_EQ(1u, poolAllocator->bufferPools.size());
EXPECT_NE(nullptr, poolAllocator->bufferPools[0].mainStorage.get());
constexpr auto buffersToCreate = PoolAllocator::aggregatedSmallBuffersPoolSize / PoolAllocator::smallBufferThreshold;
auto buffersToCreate = poolAllocator->params.aggregatedSmallBuffersPoolSize / poolAllocator->params.smallBufferThreshold;
std::vector<std::unique_ptr<Buffer>> buffers(buffersToCreate);
for (auto i = 0u; i < buffersToCreate; i++) {
buffers[i].reset(Buffer::create(context.get(), flags, size, hostPtr, retVal));
@ -312,7 +321,7 @@ TEST_F(AggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledAndB
EXPECT_EQ(1u, poolAllocator->bufferPools.size());
EXPECT_NE(nullptr, poolAllocator->bufferPools[0].mainStorage.get());
constexpr auto buffersToCreate = PoolAllocator::aggregatedSmallBuffersPoolSize / PoolAllocator::smallBufferThreshold;
auto buffersToCreate = poolAllocator->params.aggregatedSmallBuffersPoolSize / poolAllocator->params.smallBufferThreshold;
std::vector<std::unique_ptr<Buffer>> buffers(buffersToCreate);
for (auto i = 0u; i < buffersToCreate; i++) {
buffers[i].reset(Buffer::create(context.get(), flags, size, hostPtr, retVal));
@ -337,7 +346,7 @@ TEST_F(AggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledAndB
EXPECT_EQ(1u, poolAllocator->bufferPools.size());
EXPECT_NE(nullptr, poolAllocator->bufferPools[0].mainStorage.get());
constexpr auto buffersToCreate = PoolAllocator::aggregatedSmallBuffersPoolSize / PoolAllocator::smallBufferThreshold;
auto buffersToCreate = poolAllocator->params.aggregatedSmallBuffersPoolSize / poolAllocator->params.smallBufferThreshold;
std::vector<std::unique_ptr<Buffer>> buffers(buffersToCreate);
for (auto i = 0u; i < buffersToCreate; i++) {
buffers[i].reset(Buffer::create(context.get(), flags, size, hostPtr, retVal));
@ -362,7 +371,7 @@ TEST_F(AggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledAndB
EXPECT_EQ(1u, poolAllocator->bufferPools.size());
EXPECT_NE(nullptr, poolAllocator->bufferPools[0].mainStorage.get());
const std::vector<std::unique_ptr<Buffer>>::size_type buffersToCreate = (PoolAllocator::aggregatedSmallBuffersPoolSize / PoolAllocator::smallBufferThreshold) * mockNeoDevice->maxBufferPoolCount;
const std::vector<std::unique_ptr<Buffer>>::size_type buffersToCreate = (poolAllocator->params.aggregatedSmallBuffersPoolSize / poolAllocator->params.smallBufferThreshold) * mockNeoDevice->maxBufferPoolCount;
std::vector<std::unique_ptr<Buffer>> buffers(buffersToCreate);
for (auto i = 0u; i < buffersToCreate; ++i) {
buffers[i].reset(Buffer::create(context.get(), flags, size, hostPtr, retVal));
@ -370,7 +379,7 @@ TEST_F(AggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledAndB
}
EXPECT_EQ(mockNeoDevice->maxBufferPoolCount, poolAllocator->bufferPools.size());
for (auto i = 0u; i < mockNeoDevice->maxBufferPoolCount; ++i) {
EXPECT_EQ(PoolAllocator::aggregatedSmallBuffersPoolSize, poolAllocator->bufferPools[i].chunkAllocator->getUsedSize());
EXPECT_EQ(poolAllocator->params.aggregatedSmallBuffersPoolSize, poolAllocator->bufferPools[i].chunkAllocator->getUsedSize());
}
EXPECT_EQ(1u, mockMemoryManager->allocInUseCalled);
mockMemoryManager->deferAllocInUse = true;
@ -403,7 +412,7 @@ TEST_F(AggregatedSmallBuffersEnabledTest, givenCopyHostPointerWhenCreatingBuffer
context->setSpecialQueue(commandQueue, rootDeviceIndex);
flags = CL_MEM_COPY_HOST_PTR;
auto dataToCopy = std::unique_ptr<unsigned char[]>(new unsigned char[PoolAllocator::smallBufferThreshold]);
auto dataToCopy = std::unique_ptr<unsigned char[]>(new unsigned char[poolAllocator->params.smallBufferThreshold]);
hostPtr = dataToCopy.get();
EXPECT_TRUE(poolAllocator->isAggregatedSmallBuffersEnabled(context.get()));
@ -427,7 +436,7 @@ TEST_F(AggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledAndS
EXPECT_EQ(1u, poolAllocator->bufferPools.size());
EXPECT_NE(nullptr, poolAllocator->bufferPools[0].mainStorage.get());
constexpr auto buffersToCreate = PoolAllocator::aggregatedSmallBuffersPoolSize / PoolAllocator::smallBufferThreshold;
auto buffersToCreate = poolAllocator->params.aggregatedSmallBuffersPoolSize / poolAllocator->params.smallBufferThreshold;
std::vector<std::unique_ptr<Buffer>> buffers(buffersToCreate);
for (auto i = 0u; i < buffersToCreate; i++) {
buffers[i].reset(Buffer::create(context.get(), flags, size, hostPtr, retVal));
@ -452,7 +461,7 @@ TEST_F(AggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledAndS
EXPECT_NE(nullptr, poolAllocator->bufferPools[0].mainStorage.get());
EXPECT_GE(mockBuffer->getSize(), size);
EXPECT_GE(mockBuffer->getOffset(), 0u);
EXPECT_LE(mockBuffer->getOffset(), PoolAllocator::aggregatedSmallBuffersPoolSize - size);
EXPECT_LE(mockBuffer->getOffset(), poolAllocator->params.aggregatedSmallBuffersPoolSize - size);
subBuffersBounds[i] = Bounds{mockBuffer->getOffset(), mockBuffer->getOffset() + mockBuffer->getSize()};
}
@ -479,7 +488,7 @@ TEST_F(AggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledAndM
this->setAllocationToFail(false);
EXPECT_EQ(2u, mockNeoDevice->bufferPoolCount.load());
constexpr auto buffersToCreate = PoolAllocator::aggregatedSmallBuffersPoolSize / PoolAllocator::smallBufferThreshold;
auto buffersToCreate = poolAllocator->params.aggregatedSmallBuffersPoolSize / poolAllocator->params.smallBufferThreshold;
std::vector<std::unique_ptr<Buffer>> buffers(buffersToCreate);
for (auto i = 0u; i < buffersToCreate; i++) {
buffers[i].reset(Buffer::create(context.get(), flags, size, hostPtr, retVal));
@ -493,14 +502,18 @@ TEST_F(AggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledAndM
EXPECT_FALSE(bufferAfterExhaustMustSucceed->isSubBuffer());
mockNeoDevice->callBaseGetGlobalMemorySize = false;
mockNeoDevice->getGlobalMemorySizeReturn = static_cast<uint64_t>(2 * 2 * MemoryConstants::megaByte / 0.02);
if (mockNeoDevice->getProductHelper().is2MBLocalMemAlignmentEnabled()) {
mockNeoDevice->getGlobalMemorySizeReturn = static_cast<uint64_t>(16 * 2 * MemoryConstants::megaByte / 0.02);
} else {
mockNeoDevice->getGlobalMemorySizeReturn = static_cast<uint64_t>(2 * 2 * MemoryConstants::megaByte / 0.02);
}
const auto bitfield = mockNeoDevice->getDeviceBitfield();
const auto deviceMemory = mockNeoDevice->getGlobalMemorySize(static_cast<uint32_t>(bitfield.to_ulong()));
EXPECT_EQ(2u, MockBufferPoolAllocator::calculateMaxPoolCount(deviceMemory, 2));
EXPECT_EQ(2u, MockBufferPoolAllocator::calculateMaxPoolCount(this->poolAllocator->getParams(), deviceMemory, 2));
std::unique_ptr<MockContext> thirdContext;
thirdContext.reset(Context::create<MockContext>(nullptr, ClDeviceVector(devices, 1), nullptr, nullptr, retVal));
EXPECT_EQ(retVal, CL_SUCCESS);
MockBufferPoolAllocator *thirdPoolAllocator = static_cast<MockBufferPoolAllocator *>(&thirdContext->smallBufferPoolAllocator);
MockBufferPoolAllocator *thirdPoolAllocator = static_cast<MockBufferPoolAllocator *>(&thirdContext->getBufferPoolAllocator());
EXPECT_EQ(0u, thirdPoolAllocator->bufferPools.size());
EXPECT_EQ(2u, mockNeoDevice->bufferPoolCount.load());
@ -560,6 +573,51 @@ TEST_F(AggregatedSmallBuffersEnabledTestDoNotRunSetup, givenAggregatedSmallBuffe
EXPECT_EQ(0u, output.size());
}
TEST_F(AggregatedSmallBuffersEnabledTestDoNotRunSetup, givenProductWithAndWithout2MBLocalMemAlignmentWhenCreatingContextThenBufferPoolAllocatorHasCorrectParams) {
auto compareSmallBuffersParams = [](const NEO::SmallBuffersParams &first, const NEO::SmallBuffersParams &second) {
return first.aggregatedSmallBuffersPoolSize == second.aggregatedSmallBuffersPoolSize &&
first.smallBufferThreshold == second.smallBufferThreshold &&
first.chunkAlignment == second.chunkAlignment &&
first.startingOffset == second.startingOffset;
};
debugManager.flags.ExperimentalSmallBufferPoolAllocator.set(1);
debugManager.flags.EnableDeviceUsmAllocationPool.set(0);
debugManager.flags.EnableHostUsmAllocationPool.set(0);
debugManager.flags.RenderCompressedBuffersEnabled.set(1);
this->deviceFactory = std::make_unique<UltClDeviceFactory>(2, 0);
this->device = deviceFactory->rootDevices[rootDeviceIndex];
this->mockNeoDevice = static_cast<MockDevice *>(&this->device->getDevice());
auto mockProductHelper = new MockProductHelper;
mockNeoDevice->getRootDeviceEnvironmentRef().productHelper.reset(mockProductHelper);
mockProductHelper->is2MBLocalMemAlignmentEnabledResult = false;
auto &productHelper = mockNeoDevice->getRootDeviceEnvironment().getProductHelper();
EXPECT_FALSE(productHelper.is2MBLocalMemAlignmentEnabled());
cl_device_id devices[] = {device};
this->context.reset(Context::create<MockContext>(nullptr, ClDeviceVector(devices, 1), nullptr, nullptr, retVal));
auto &bufferPoolAllocator = context->getBufferPoolAllocator();
auto bufferPoolAllocatorParams = bufferPoolAllocator.getParams();
auto preferredParams = NEO::SmallBuffersParams::getPreferredBufferPoolParams(productHelper);
EXPECT_TRUE(compareSmallBuffersParams(bufferPoolAllocatorParams, preferredParams));
mockProductHelper->is2MBLocalMemAlignmentEnabledResult = true;
EXPECT_TRUE(productHelper.is2MBLocalMemAlignmentEnabled());
std::unique_ptr<MockContext> secondContext;
secondContext.reset(Context::create<MockContext>(nullptr, ClDeviceVector(devices, 1), nullptr, nullptr, retVal));
auto &bufferPoolAllocator2 = secondContext->getBufferPoolAllocator();
auto bufferPoolAllocatorParams2 = bufferPoolAllocator2.getParams();
preferredParams = NEO::SmallBuffersParams::getPreferredBufferPoolParams(productHelper);
EXPECT_TRUE(compareSmallBuffersParams(bufferPoolAllocatorParams2, preferredParams));
}
template <int32_t poolBufferFlag = -1>
class AggregatedSmallBuffersApiTestTemplate : public ::testing::Test {
void SetUp() override {
@ -571,13 +629,14 @@ class AggregatedSmallBuffersApiTestTemplate : public ::testing::Test {
EXPECT_EQ(retVal, CL_SUCCESS);
context = castToObject<Context>(clContext);
poolAllocator = static_cast<MockBufferPoolAllocator *>(&context->getBufferPoolAllocator());
size = poolAllocator->params.smallBufferThreshold;
}
public:
std::unique_ptr<UltClDeviceFactory> deviceFactory;
cl_mem_flags flags = CL_MEM_READ_WRITE;
size_t size = PoolAllocator::smallBufferThreshold;
size_t size{0u};
cl_int retVal = CL_SUCCESS;
void *hostPtr{nullptr};
cl_context clContext{nullptr};
@ -598,7 +657,7 @@ TEST_F(AggregatedSmallBuffersEnabledApiTest, givenNoBufferCreatedWhenReleasingCo
}
TEST_F(AggregatedSmallBuffersEnabledApiTest, givenNotSmallBufferWhenCreatingBufferThenDoNotUsePool) {
size = PoolAllocator::smallBufferThreshold + 1;
size = poolAllocator->params.smallBufferThreshold + 1;
cl_mem buffer = clCreateBuffer(clContext, flags, size, hostPtr, &retVal);
EXPECT_EQ(retVal, CL_SUCCESS);
EXPECT_NE(buffer, nullptr);
@ -634,7 +693,7 @@ TEST_F(AggregatedSmallBuffersEnabledApiTest, givenSmallBufferWhenCreatingBufferT
TEST_F(AggregatedSmallBuffersEnabledApiTest, givenUseHostPointerWhenCreatingBufferThenDoNotUsePool) {
flags |= CL_MEM_USE_HOST_PTR;
auto hostData = std::unique_ptr<unsigned char[]>(new unsigned char[PoolAllocator::smallBufferThreshold]);
auto hostData = std::unique_ptr<unsigned char[]>(new unsigned char[poolAllocator->params.smallBufferThreshold]);
hostPtr = hostData.get();
cl_mem smallBuffer = clCreateBuffer(clContext, flags, size, hostPtr, &retVal);
EXPECT_EQ(retVal, CL_SUCCESS);
@ -697,7 +756,7 @@ TEST_F(AggregatedSmallBuffersEnabledApiTest, givenBufferFromPoolWhenGetMemObjInf
TEST_F(AggregatedSmallBuffersEnabledApiTest, givenSubBufferNotFromPoolAndAggregatedSmallBuffersEnabledWhenReleaseMemObjectCalledThenItSucceeds) {
DebugManagerStateRestore restore;
debugManager.flags.ExperimentalSmallBufferPoolAllocator.set(0);
size_t size = PoolAllocator::smallBufferThreshold + 1;
size_t size = poolAllocator->params.smallBufferThreshold + 1;
cl_mem largeBuffer = clCreateBuffer(clContext, flags, size, hostPtr, &retVal);
EXPECT_EQ(retVal, CL_SUCCESS);
@ -721,7 +780,7 @@ TEST_F(AggregatedSmallBuffersEnabledApiTest, givenSubBufferNotFromPoolAndAggrega
TEST_F(AggregatedSmallBuffersEnabledApiTest, givenCopyHostPointerWhenCreatingBufferThenUsePoolAndCopyHostPointer) {
flags |= CL_MEM_COPY_HOST_PTR;
auto dataToCopy = std::unique_ptr<unsigned char[]>(new unsigned char[PoolAllocator::smallBufferThreshold]);
auto dataToCopy = std::unique_ptr<unsigned char[]>(new unsigned char[poolAllocator->params.smallBufferThreshold]);
dataToCopy[0] = 123;
hostPtr = dataToCopy.get();
auto contextRefCountBefore = context->getRefInternalCount();
@ -849,7 +908,7 @@ TEST_F(AggregatedSmallBuffersSubBufferApiTest, givenBufferFromPoolWhenCreateSubB
EXPECT_EQ(subBuffer, nullptr);
region.size = 1;
region.origin = PoolAllocator::smallBufferThreshold;
region.origin = poolAllocator->params.smallBufferThreshold;
subBuffer = clCreateSubBuffer(buffer, flags, CL_BUFFER_CREATE_TYPE_REGION, &region, &retVal);
EXPECT_EQ(retVal, CL_INVALID_VALUE);
EXPECT_EQ(subBuffer, nullptr);
@ -885,4 +944,4 @@ TEST_F(AggregatedSmallBuffersSubBufferApiTest, givenSubBufferFromBufferFromPoolW
EXPECT_EQ(clReleaseContext(context), CL_SUCCESS);
}
} // namespace Ult
} // namespace Ult

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2024 Intel Corporation
* Copyright (C) 2018-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -59,6 +59,7 @@ class MockContext : public Context {
using BufferPoolAllocator::bufferPools;
using BufferPoolAllocator::calculateMaxPoolCount;
using BufferPoolAllocator::isAggregatedSmallBuffersEnabled;
using BufferPoolAllocator::params;
};
private:

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2023-2024 Intel Corporation
* Copyright (C) 2023-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -21,32 +21,47 @@ namespace NEO {
class GraphicsAllocation;
class HeapAllocator;
class MemoryManager;
class ProductHelper;
template <typename PoolT>
struct SmallBuffersParams {
protected:
static constexpr auto aggregatedSmallBuffersPoolSize = 2 * MemoryConstants::megaByte;
static constexpr auto smallBufferThreshold = 1 * MemoryConstants::megaByte;
static constexpr auto chunkAlignment = MemoryConstants::pageSize64k;
static constexpr auto startingOffset = chunkAlignment;
size_t aggregatedSmallBuffersPoolSize{0};
size_t smallBufferThreshold{0};
size_t chunkAlignment{0};
size_t startingOffset{0};
static SmallBuffersParams getDefaultParams() {
return {
2 * MemoryConstants::megaByte, // aggregatedSmallBuffersPoolSize
1 * MemoryConstants::megaByte, // smallBufferThreshold
MemoryConstants::pageSize64k, // chunkAlignment
MemoryConstants::pageSize64k // startingOffset
};
}
static SmallBuffersParams getLargePagesParams() {
return {
16 * MemoryConstants::megaByte, // aggregatedSmallBuffersPoolSize
2 * MemoryConstants::megaByte, // smallBufferThreshold
MemoryConstants::pageSize64k, // chunkAlignment
MemoryConstants::pageSize64k // startingOffset
};
}
static inline SmallBuffersParams getPreferredBufferPoolParams(const ProductHelper &productHelper);
};
template <typename PoolT, typename BufferType, typename BufferParentType = BufferType>
struct AbstractBuffersPool : public SmallBuffersParams<PoolT>, public NonCopyableClass {
struct AbstractBuffersPool : public NonCopyableClass {
// The prototype of a function allocating the `mainStorage` is not specified.
// That would be an unnecessary limitation here - it is completely up to derived class implementation.
// Perhaps the allocating function needs to leverage `HeapAllocator::allocate()` and also
// a BufferType-dependent function reserving chunks within `mainStorage`.
// Example: see `NEO::Context::BufferPool::allocate()`
using Params = SmallBuffersParams<PoolT>;
using Params::aggregatedSmallBuffersPoolSize;
using Params::chunkAlignment;
using Params::smallBufferThreshold;
using Params::startingOffset;
using AllocsVecCRef = const StackVec<NEO::GraphicsAllocation *, 1> &;
using OnChunkFreeCallback = void (PoolT::*)(uint64_t offset, size_t size);
AbstractBuffersPool(MemoryManager *memoryManager, OnChunkFreeCallback onChunkFreeCallback);
AbstractBuffersPool(MemoryManager *memoryManager, OnChunkFreeCallback onChunkFreeCallback, const SmallBuffersParams &params);
AbstractBuffersPool(AbstractBuffersPool<PoolT, BufferType, BufferParentType> &&bufferPool);
AbstractBuffersPool &operator=(AbstractBuffersPool &&) = delete;
virtual ~AbstractBuffersPool() = default;
@ -66,28 +81,32 @@ struct AbstractBuffersPool : public SmallBuffersParams<PoolT>, public NonCopyabl
std::unique_ptr<HeapAllocator> chunkAllocator;
std::vector<std::pair<uint64_t, size_t>> chunksToFree;
OnChunkFreeCallback onChunkFreeCallback = nullptr;
SmallBuffersParams params;
};
template <typename BuffersPoolType, typename BufferType, typename BufferParentType = BufferType>
class AbstractBuffersAllocator : public SmallBuffersParams<BuffersPoolType> {
class AbstractBuffersAllocator {
// The prototype of a function allocating buffers from the pool is not specified (see similar comment in `AbstractBufersPool`).
// By common sense, in order to allocate buffers from the pool the function should leverage a call provided by `BuffersPoolType`.
// Example: see `NEO::Context::BufferPoolAllocator::allocateBufferFromPool()`.
public:
using Params = SmallBuffersParams<BuffersPoolType>;
using Params::aggregatedSmallBuffersPoolSize;
using Params::chunkAlignment;
using Params::smallBufferThreshold;
using Params::startingOffset;
static_assert(aggregatedSmallBuffersPoolSize > smallBufferThreshold, "Largest allowed buffer needs to fit in pool");
AbstractBuffersAllocator(const SmallBuffersParams &params);
AbstractBuffersAllocator();
void releasePools() { this->bufferPools.clear(); }
bool isPoolBuffer(const BufferParentType *buffer) const;
void tryFreeFromPoolBuffer(BufferParentType *possiblePoolBuffer, size_t offset, size_t size);
uint32_t getPoolsCount() { return static_cast<uint32_t>(this->bufferPools.size()); }
void setParams(const SmallBuffersParams &newParams) {
params = newParams;
}
SmallBuffersParams getParams() const {
return params;
};
protected:
inline bool isSizeWithinThreshold(size_t size) const { return smallBufferThreshold >= size; }
inline bool isSizeWithinThreshold(size_t size) const { return params.smallBufferThreshold >= size; }
void tryFreeFromPoolBuffer(BufferParentType *possiblePoolBuffer, size_t offset, size_t size, std::vector<BuffersPoolType> &bufferPoolsVec);
void drain();
void drain(std::vector<BuffersPoolType> &bufferPoolsVec);
@ -96,5 +115,6 @@ class AbstractBuffersAllocator : public SmallBuffersParams<BuffersPoolType> {
std::mutex mutex;
std::vector<BuffersPoolType> bufferPools;
SmallBuffersParams params;
};
} // namespace NEO

View File

@ -1,11 +1,12 @@
/*
* Copyright (C) 2023 Intel Corporation
* Copyright (C) 2023-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/memory_manager/memory_manager.h"
#include "shared/source/os_interface/product_helper.h"
#include "shared/source/utilities/buffer_pool_allocator.h"
#include "shared/source/utilities/heap_allocator.h"
@ -13,9 +14,17 @@
namespace NEO {
inline SmallBuffersParams SmallBuffersParams::getPreferredBufferPoolParams(const ProductHelper &productHelper) {
return productHelper.is2MBLocalMemAlignmentEnabled() ? SmallBuffersParams::getLargePagesParams() : SmallBuffersParams::getDefaultParams();
}
template <typename PoolT, typename BufferType, typename BufferParentType>
AbstractBuffersPool<PoolT, BufferType, BufferParentType>::AbstractBuffersPool(MemoryManager *memoryManager, OnChunkFreeCallback onChunkFreeCb)
: memoryManager{memoryManager}, onChunkFreeCallback{onChunkFreeCb} {
: AbstractBuffersPool<PoolT, BufferType, BufferParentType>::AbstractBuffersPool(memoryManager, onChunkFreeCb, SmallBuffersParams::getDefaultParams()) {}
template <typename PoolT, typename BufferType, typename BufferParentType>
AbstractBuffersPool<PoolT, BufferType, BufferParentType>::AbstractBuffersPool(MemoryManager *memoryManager, OnChunkFreeCallback onChunkFreeCb, const SmallBuffersParams &params)
: memoryManager{memoryManager}, onChunkFreeCallback{onChunkFreeCb}, params{params} {
static_assert(std::is_base_of_v<BufferParentType, BufferType>);
}
@ -24,7 +33,8 @@ AbstractBuffersPool<PoolT, BufferType, BufferParentType>::AbstractBuffersPool(Ab
: memoryManager{bufferPool.memoryManager},
mainStorage{std::move(bufferPool.mainStorage)},
chunkAllocator{std::move(bufferPool.chunkAllocator)},
onChunkFreeCallback{bufferPool.onChunkFreeCallback} {}
onChunkFreeCallback{bufferPool.onChunkFreeCallback},
params{bufferPool.params} {}
template <typename PoolT, typename BufferType, typename BufferParentType>
void AbstractBuffersPool<PoolT, BufferType, BufferParentType>::tryFreeFromPoolBuffer(BufferParentType *possiblePoolBuffer, size_t offset, size_t size) {
@ -49,7 +59,7 @@ void AbstractBuffersPool<PoolT, BufferType, BufferParentType>::drain() {
}
}
for (auto &chunk : this->chunksToFree) {
this->chunkAllocator->free(chunk.first + startingOffset, chunk.second);
this->chunkAllocator->free(chunk.first + params.startingOffset, chunk.second);
if (static_cast<PoolT *>(this)->onChunkFreeCallback) {
(static_cast<PoolT *>(this)->*onChunkFreeCallback)(chunk.first, chunk.second);
}
@ -57,6 +67,16 @@ void AbstractBuffersPool<PoolT, BufferType, BufferParentType>::drain() {
this->chunksToFree.clear();
}
template <typename BuffersPoolType, typename BufferType, typename BufferParentType>
AbstractBuffersAllocator<BuffersPoolType, BufferType, BufferParentType>::AbstractBuffersAllocator(const SmallBuffersParams &params)
: params{params} {
DEBUG_BREAK_IF(params.aggregatedSmallBuffersPoolSize < params.smallBufferThreshold);
}
template <typename BuffersPoolType, typename BufferType, typename BufferParentType>
AbstractBuffersAllocator<BuffersPoolType, BufferType, BufferParentType>::AbstractBuffersAllocator()
: AbstractBuffersAllocator(SmallBuffersParams::getDefaultParams()) {}
template <typename BuffersPoolType, typename BufferType, typename BufferParentType>
bool AbstractBuffersAllocator<BuffersPoolType, BufferType, BufferParentType>::isPoolBuffer(const BufferParentType *buffer) const {
static_assert(std::is_base_of_v<BufferParentType, BufferType>);

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2024 Intel Corporation
* Copyright (C) 2024-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -16,7 +16,7 @@ namespace NEO {
ISAPool::ISAPool(Device *device, bool isBuiltin, size_t storageSize)
: BaseType(device->getMemoryManager(), nullptr), device(device), isBuiltin(isBuiltin) {
this->chunkAllocator.reset(new NEO::HeapAllocator(startingOffset, storageSize, MemoryConstants::pageSize, 0u));
this->chunkAllocator.reset(new NEO::HeapAllocator(params.startingOffset, storageSize, MemoryConstants::pageSize, 0u));
auto allocationType = isBuiltin ? NEO::AllocationType::kernelIsaInternal : NEO::AllocationType::kernelIsa;
auto graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(),
@ -47,7 +47,7 @@ SharedIsaAllocation *ISAPool::allocateISA(size_t requestedSize) const {
if (offset == 0) {
return nullptr;
}
return new SharedIsaAllocation{this->mainStorage.get(), offset - startingOffset, requestedSize, mtx.get()};
return new SharedIsaAllocation{this->mainStorage.get(), offset - params.startingOffset, requestedSize, mtx.get()};
}
const StackVec<NEO::GraphicsAllocation *, 1> &ISAPool::getAllocationsVector() {

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2023-2024 Intel Corporation
* Copyright (C) 2023-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -10,6 +10,7 @@
#include "shared/source/utilities/stackvec.h"
#include "shared/test/common/mocks/mock_execution_environment.h"
#include "shared/test/common/mocks/mock_memory_manager.h"
#include "shared/test/common/mocks/mock_product_helper.h"
#include "gtest/gtest.h"
@ -24,20 +25,19 @@ struct DummyBuffer {
int val;
};
template <>
struct NEO::SmallBuffersParams<DummyBufferPool> {
static constexpr auto aggregatedSmallBuffersPoolSize = 32 * MemoryConstants::kiloByte;
static constexpr auto smallBufferThreshold = 2 * MemoryConstants::kiloByte;
static constexpr auto chunkAlignment = 1024u;
static constexpr auto startingOffset = chunkAlignment;
};
struct DummyBuffersPool : public NEO::AbstractBuffersPool<DummyBuffersPool, DummyBuffer> {
using BaseType = NEO::AbstractBuffersPool<DummyBuffersPool, DummyBuffer>;
static constexpr auto dummyPtr = 0xdeadbeef0000;
static constexpr NEO::SmallBuffersParams defaultParams{
32 * MemoryConstants::kiloByte, // aggregatedSmallBuffersPoolSize
2 * MemoryConstants::kiloByte, // smallBufferThreshold
1024u, // chunkAlignment
1024u // startingOffset
};
DummyBuffersPool(NEO::MemoryManager *memoryManager, uint32_t poolOffset, BaseType::OnChunkFreeCallback onChunkFreeCallback)
: BaseType{memoryManager, onChunkFreeCallback} {
: BaseType{memoryManager, onChunkFreeCallback, defaultParams} {
dummyAllocations.resize(2);
dummyAllocations[0] = reinterpret_cast<NEO::GraphicsAllocation *>(poolOffset + dummyPtr);
dummyAllocations[1] = nullptr; // makes sure nullptrs don't cause SEGFAULTs
@ -64,6 +64,9 @@ struct DummyBuffersAllocator : public NEO::AbstractBuffersAllocator<DummyBuffers
using BaseType::bufferPools;
using BaseType::isSizeWithinThreshold;
DummyBuffersAllocator() : BaseType() {}
DummyBuffersAllocator(const NEO::SmallBuffersParams &params) : BaseType(params) {}
void drainUnderLock() {
auto lock = std::unique_lock<std::mutex>(this->mutex);
this->BaseType::drain();
@ -86,10 +89,10 @@ struct AbstractSmallBuffersTest : public ::testing::Test {
TEST_F(AbstractSmallBuffersTest, givenBuffersPoolWhenCreatedAndMovedThenCtorsWorkCorrectly) {
auto pool1 = DummyBuffersPool{this->memoryManager.get()};
pool1.mainStorage.reset(new DummyBuffer(testVal));
pool1.chunkAllocator.reset(new NEO::HeapAllocator{DummyBuffersPool::startingOffset,
DummyBuffersPool::aggregatedSmallBuffersPoolSize,
DummyBuffersPool::chunkAlignment,
DummyBuffersPool::smallBufferThreshold});
pool1.chunkAllocator.reset(new NEO::HeapAllocator{DummyBuffersPool::defaultParams.startingOffset,
DummyBuffersPool::defaultParams.aggregatedSmallBuffersPoolSize,
DummyBuffersPool::defaultParams.chunkAlignment,
DummyBuffersPool::defaultParams.smallBufferThreshold});
EXPECT_EQ(pool1.memoryManager, this->memoryManager.get());
@ -98,7 +101,7 @@ TEST_F(AbstractSmallBuffersTest, givenBuffersPoolWhenCreatedAndMovedThenCtorsWor
EXPECT_EQ(pool2.mainStorage->val, testVal);
EXPECT_EQ(static_cast<DummyBuffersPool::BaseType &>(pool2).getAllocationsVector()[0], reinterpret_cast<NEO::GraphicsAllocation *>(DummyBuffersPool::dummyPtr));
EXPECT_EQ(pool2.chunkAllocator->getUsedSize(), 0ul);
EXPECT_EQ(pool2.chunkAllocator->getLeftSize(), DummyBuffersPool::aggregatedSmallBuffersPoolSize);
EXPECT_EQ(pool2.chunkAllocator->getLeftSize(), DummyBuffersPool::defaultParams.aggregatedSmallBuffersPoolSize);
}
TEST_F(AbstractSmallBuffersTest, givenBuffersAllocatorWhenPoolWithoutMainStorageAddedThenItIsIgnored) {
@ -114,11 +117,11 @@ TEST_F(AbstractSmallBuffersTest, givenBuffersAllocatorWhenPoolWithoutMainStorage
TEST_F(AbstractSmallBuffersTest, givenBuffersAllocatorWhenNullptrTriedToBeFreedThenItIsNotConsideredValidBuffer) {
auto pool = DummyBuffersPool{this->memoryManager.get()};
pool.mainStorage.reset(new DummyBuffer(testVal));
auto buffersAllocator = DummyBuffersAllocator{};
DummyBuffersAllocator buffersAllocator{pool.params};
buffersAllocator.addNewBufferPool(std::move(pool));
EXPECT_TRUE(buffersAllocator.isSizeWithinThreshold(DummyBuffersPool::smallBufferThreshold));
EXPECT_FALSE(buffersAllocator.isSizeWithinThreshold(DummyBuffersPool::smallBufferThreshold + 1));
EXPECT_TRUE(buffersAllocator.isSizeWithinThreshold(pool.params.smallBufferThreshold));
EXPECT_FALSE(buffersAllocator.isSizeWithinThreshold(pool.params.smallBufferThreshold + 1));
auto &chunksToFree = buffersAllocator.bufferPools[0].chunksToFree;
EXPECT_EQ(chunksToFree.size(), 0u);
@ -175,8 +178,8 @@ TEST_F(AbstractSmallBuffersTest, givenBuffersAllocatorWhenChunkOfMainStorageTrie
auto &chunksToFree2 = buffersAllocator.bufferPools[1].chunksToFree;
EXPECT_EQ(chunksToFree1.size(), 0u);
EXPECT_EQ(chunksToFree2.size(), 0u);
auto chunkSize = DummyBuffersPool::chunkAlignment * 4;
auto chunkOffset = DummyBuffersPool::chunkAlignment;
auto chunkSize = DummyBuffersPool::defaultParams.chunkAlignment * 4;
auto chunkOffset = DummyBuffersPool::defaultParams.chunkAlignment;
buffersAllocator.tryFreeFromPoolBuffer(poolStorage2, chunkOffset, chunkSize);
EXPECT_EQ(chunksToFree1.size(), 0u);
EXPECT_EQ(chunksToFree2.size(), 1u);
@ -197,21 +200,21 @@ TEST_F(AbstractSmallBuffersTest, givenBuffersAllocatorWhenDrainingPoolsThenOnlyA
pool2.mainStorage.reset(new DummyBuffer(testVal + 2));
auto buffer1 = pool1.mainStorage.get();
auto buffer2 = pool2.mainStorage.get();
pool1.chunkAllocator.reset(new NEO::HeapAllocator{DummyBuffersPool::startingOffset,
DummyBuffersPool::aggregatedSmallBuffersPoolSize,
DummyBuffersPool::chunkAlignment,
DummyBuffersPool::smallBufferThreshold});
pool2.chunkAllocator.reset(new NEO::HeapAllocator{DummyBuffersPool::startingOffset,
DummyBuffersPool::aggregatedSmallBuffersPoolSize,
DummyBuffersPool::chunkAlignment,
DummyBuffersPool::smallBufferThreshold});
pool1.chunkAllocator.reset(new NEO::HeapAllocator{DummyBuffersPool::defaultParams.startingOffset,
DummyBuffersPool::defaultParams.aggregatedSmallBuffersPoolSize,
DummyBuffersPool::defaultParams.chunkAlignment,
DummyBuffersPool::defaultParams.smallBufferThreshold});
pool2.chunkAllocator.reset(new NEO::HeapAllocator{DummyBuffersPool::defaultParams.startingOffset,
DummyBuffersPool::defaultParams.aggregatedSmallBuffersPoolSize,
DummyBuffersPool::defaultParams.chunkAlignment,
DummyBuffersPool::defaultParams.smallBufferThreshold});
auto buffersAllocator = DummyBuffersAllocator{};
buffersAllocator.addNewBufferPool(std::move(pool1));
buffersAllocator.addNewBufferPool(std::move(pool2));
auto chunkSize = DummyBuffersPool::chunkAlignment * 4;
auto chunkOffset = DummyBuffersPool::chunkAlignment;
auto chunkSize = DummyBuffersPool::defaultParams.chunkAlignment * 4;
auto chunkOffset = DummyBuffersPool::defaultParams.chunkAlignment;
for (size_t i = 0; i < 3; i++) {
auto exampleOffset = chunkOffset + i * chunkSize * 2;
buffersAllocator.tryFreeFromPoolBuffer(buffer1, exampleOffset, chunkSize);
@ -247,15 +250,15 @@ TEST_F(AbstractSmallBuffersTest, givenBuffersAllocatorWhenDrainingPoolsThenOnChu
auto pool1 = DummyBuffersPool{this->memoryManager.get(), 0x0, nullptr};
pool1.mainStorage.reset(new DummyBuffer(testVal));
auto buffer1 = pool1.mainStorage.get();
pool1.chunkAllocator.reset(new NEO::HeapAllocator{DummyBuffersPool::startingOffset,
DummyBuffersPool::aggregatedSmallBuffersPoolSize,
DummyBuffersPool::chunkAlignment,
DummyBuffersPool::smallBufferThreshold});
pool1.chunkAllocator.reset(new NEO::HeapAllocator{DummyBuffersPool::defaultParams.startingOffset,
DummyBuffersPool::defaultParams.aggregatedSmallBuffersPoolSize,
DummyBuffersPool::defaultParams.chunkAlignment,
DummyBuffersPool::defaultParams.smallBufferThreshold});
auto buffersAllocator = DummyBuffersAllocator{};
buffersAllocator.addNewBufferPool(std::move(pool1));
auto chunkSize = DummyBuffersPool::chunkAlignment * 4;
auto chunkOffset = DummyBuffersPool::chunkAlignment;
auto chunkSize = DummyBuffersPool::defaultParams.chunkAlignment * 4;
auto chunkOffset = DummyBuffersPool::defaultParams.chunkAlignment;
for (size_t i = 0; i < 3; i++) {
auto exampleOffset = chunkOffset + i * chunkSize * 2;
buffersAllocator.tryFreeFromPoolBuffer(buffer1, exampleOffset, chunkSize);
@ -295,17 +298,17 @@ TEST_F(AbstractSmallBuffersTest, givenBuffersAllocatorWhenDrainingPoolThenOffset
auto pool1 = DummyBuffersPool{this->memoryManager.get(), 0x0, nullptr};
pool1.mainStorage.reset(new DummyBuffer(testVal));
auto buffer1 = pool1.mainStorage.get();
pool1.chunkAllocator.reset(new ProxyHeapAllocator{DummyBuffersPool::startingOffset,
DummyBuffersPool::aggregatedSmallBuffersPoolSize,
DummyBuffersPool::chunkAlignment,
DummyBuffersPool::smallBufferThreshold});
pool1.chunkAllocator.reset(new ProxyHeapAllocator{DummyBuffersPool::defaultParams.startingOffset,
DummyBuffersPool::defaultParams.aggregatedSmallBuffersPoolSize,
DummyBuffersPool::defaultParams.chunkAlignment,
DummyBuffersPool::defaultParams.smallBufferThreshold});
auto buffersAllocator = DummyBuffersAllocator{};
buffersAllocator.addNewBufferPool(std::move(pool1));
auto chunkSize = DummyBuffersPool::chunkAlignment * 4;
auto chunkSize = DummyBuffersPool::defaultParams.chunkAlignment * 4;
auto exampleOffsets = std::array<size_t, 3>{0u, 0u, 0u};
for (size_t i = 0; i < 3; i++) {
exampleOffsets[i] = DummyBuffersPool::startingOffset + i * chunkSize * 2;
exampleOffsets[i] = DummyBuffersPool::defaultParams.startingOffset + i * chunkSize * 2;
buffersAllocator.tryFreeFromPoolBuffer(buffer1, exampleOffsets[i], chunkSize);
}
@ -317,6 +320,74 @@ TEST_F(AbstractSmallBuffersTest, givenBuffersAllocatorWhenDrainingPoolThenOffset
auto heapAllocator = static_cast<ProxyHeapAllocator *>(buffersAllocator.bufferPools[0].chunkAllocator.get());
ASSERT_EQ(heapAllocator->registeredOffsets.size(), 3u);
for (size_t i = 0; i < 3; i++) {
EXPECT_EQ(heapAllocator->registeredOffsets[i], exampleOffsets[i] + DummyBuffersPool::startingOffset);
EXPECT_EQ(heapAllocator->registeredOffsets[i], exampleOffsets[i] + DummyBuffersPool::defaultParams.startingOffset);
}
}
struct SmallBuffersParamsTest : public ::testing::Test {
bool compareSmallBuffersParams(const NEO::SmallBuffersParams &first, const NEO::SmallBuffersParams &second) {
return first.aggregatedSmallBuffersPoolSize == second.aggregatedSmallBuffersPoolSize &&
first.smallBufferThreshold == second.smallBufferThreshold &&
first.chunkAlignment == second.chunkAlignment &&
first.startingOffset == second.startingOffset;
}
};
TEST_F(SmallBuffersParamsTest, WhenGettingDefaultParamsThenReturnCorrectValues) {
auto defaultParams = NEO::SmallBuffersParams::getDefaultParams();
EXPECT_EQ(2 * MemoryConstants::megaByte, defaultParams.aggregatedSmallBuffersPoolSize);
EXPECT_EQ(1 * MemoryConstants::megaByte, defaultParams.smallBufferThreshold);
EXPECT_EQ(MemoryConstants::pageSize64k, defaultParams.chunkAlignment);
EXPECT_EQ(MemoryConstants::pageSize64k, defaultParams.startingOffset);
}
TEST_F(SmallBuffersParamsTest, WhenGettingLargePagesParamsThenReturnCorrectValues) {
auto largePagesParams = NEO::SmallBuffersParams::getLargePagesParams();
EXPECT_EQ(16 * MemoryConstants::megaByte, largePagesParams.aggregatedSmallBuffersPoolSize);
EXPECT_EQ(2 * MemoryConstants::megaByte, largePagesParams.smallBufferThreshold);
EXPECT_EQ(MemoryConstants::pageSize64k, largePagesParams.chunkAlignment);
EXPECT_EQ(MemoryConstants::pageSize64k, largePagesParams.startingOffset);
}
TEST_F(SmallBuffersParamsTest, GivenProductHelperWhenGettingPreferredBufferPoolParamsThenReturnsCorrectValues) {
auto mockProductHelper = std::make_unique<NEO::MockProductHelper>();
{
mockProductHelper->is2MBLocalMemAlignmentEnabledResult = false;
auto preferredParams = NEO::SmallBuffersParams::getPreferredBufferPoolParams(*mockProductHelper);
auto expectedParams = NEO::SmallBuffersParams::getDefaultParams();
EXPECT_TRUE(compareSmallBuffersParams(expectedParams, preferredParams));
}
{
mockProductHelper->is2MBLocalMemAlignmentEnabledResult = true;
auto preferredParams = NEO::SmallBuffersParams::getPreferredBufferPoolParams(*mockProductHelper);
auto expectedParams = NEO::SmallBuffersParams::getLargePagesParams();
EXPECT_TRUE(compareSmallBuffersParams(expectedParams, preferredParams));
}
}
TEST_F(SmallBuffersParamsTest, GivenBuffersAllocatorWhenSettingDifferentParamsThenGetParamsReturnsExpectedValues) {
auto buffersAllocator = DummyBuffersAllocator{};
const NEO::SmallBuffersParams params1{
16 * MemoryConstants::kiloByte, // aggregatedSmallBuffersPoolSize
1 * MemoryConstants::kiloByte, // smallBufferThreshold
1024u, // chunkAlignment
1024u // startingOffset
};
const NEO::SmallBuffersParams params2{
32 * MemoryConstants::megaByte, // aggregatedSmallBuffersPoolSize
2 * MemoryConstants::megaByte, // smallBufferThreshold
MemoryConstants::pageSize64k, // chunkAlignment
MemoryConstants::pageSize64k // startingOffset
};
buffersAllocator.setParams(params1);
EXPECT_TRUE(compareSmallBuffersParams(params1, buffersAllocator.getParams()));
buffersAllocator.setParams(params2);
EXPECT_TRUE(compareSmallBuffersParams(params2, buffersAllocator.getParams()));
}