Revert "performance: add compressed pool for cl_buffer"

This reverts commit 561385cda1.

Signed-off-by: Compute-Runtime-Validation <compute-runtime-validation@intel.com>
This commit is contained in:
Compute-Runtime-Validation
2025-12-11 00:32:44 +01:00
committed by Compute-Runtime-Automation
parent a9e397e8b4
commit 7610e889fa
15 changed files with 167 additions and 153 deletions

View File

@@ -823,7 +823,7 @@ cl_mem CL_API_CALL clCreateSubBuffer(cl_mem buffer,
}
if (parentBuffer->isSubBuffer() == true) {
if (!parentBuffer->getContext()->isPoolBuffer(parentBuffer->getAssociatedMemObject())) {
if (!parentBuffer->getContext()->getBufferPoolAllocator().isPoolBuffer(parentBuffer->getAssociatedMemObject())) {
retVal = CL_INVALID_MEM_OBJECT;
break;
}

View File

@@ -618,10 +618,9 @@ void ClDevice::initializeMaxPoolCount() {
auto &device = getDevice();
const auto bitfield = device.getDeviceBitfield();
const auto deviceMemory = device.getGlobalMemorySize(static_cast<uint32_t>(bitfield.to_ulong()));
auto maxSmallPoolCount = Context::BufferPoolAllocator::calculateMaxPoolCount(SmallBuffersParams::getDefaultParams(), deviceMemory, 2);
auto maxLargePoolCount = Context::BufferPoolAllocator::calculateMaxPoolCount(SmallBuffersParams::getLargePagesParams(), deviceMemory, 2);
device.updateMaxPoolCount(Context::BufferPoolType::SmallBuffersPool, maxSmallPoolCount);
device.updateMaxPoolCount(Context::BufferPoolType::LargeBuffersPool, maxLargePoolCount);
const auto preferredBufferPoolParams = SmallBuffersParams::getPreferredBufferPoolParams(device.getProductHelper());
const auto maxPoolCount = Context::BufferPoolAllocator::calculateMaxPoolCount(preferredBufferPoolParams, deviceMemory, 2);
device.updateMaxPoolCount(maxPoolCount);
}
const std::string ClDevice::getClDeviceName() const {

View File

@@ -56,13 +56,12 @@ Context::~Context() {
if (multiRootDeviceTimestampPacketAllocator.get() != nullptr) {
multiRootDeviceTimestampPacketAllocator.reset();
}
this->forEachBufferPoolAllocator([this](BufferPoolAllocator &allocator) {
if (allocator.isAggregatedSmallBuffersEnabled(this)) {
auto &device = this->getDevice(0)->getDevice();
device.recordPoolsFreed(allocator.getPoolType(), allocator.getPoolsCount());
allocator.releasePools();
}
});
if (smallBufferPoolAllocator.isAggregatedSmallBuffersEnabled(this)) {
auto &device = this->getDevice(0)->getDevice();
device.recordPoolsFreed(smallBufferPoolAllocator.getPoolsCount());
smallBufferPoolAllocator.releasePools();
}
usmDeviceMemAllocPool.cleanup();
@@ -312,8 +311,8 @@ bool Context::createImpl(const cl_context_properties *properties,
setupContextType();
initializeManagers();
this->bufferPoolAllocators[BufferPoolType::SmallBuffersPool].setParams(SmallBuffersParams::getDefaultParams(), BufferPoolType::SmallBuffersPool);
this->bufferPoolAllocators[BufferPoolType::LargeBuffersPool].setParams(SmallBuffersParams::getLargePagesParams(), BufferPoolType::LargeBuffersPool);
smallBufferPoolAllocator.setParams(SmallBuffersParams::getPreferredBufferPoolParams(device->getProductHelper()));
}
return true;
@@ -563,24 +562,24 @@ bool Context::BufferPoolAllocator::isAggregatedSmallBuffersEnabled(Context *cont
(isSupportedForSingleDeviceContexts && context->isSingleDeviceContext());
}
Context::BufferPool::BufferPool(Context *context, const SmallBuffersParams &params, bool isCpuAccessRequired) : BaseType(context->memoryManager,
nullptr,
params) {
const cl_mem_flags flags = isCpuAccessRequired ? CL_MEM_UNCOMPRESSED_HINT_INTEL : 0;
Context::BufferPool::BufferPool(Context *context) : BaseType(context->memoryManager,
nullptr,
SmallBuffersParams::getPreferredBufferPoolParams(context->getDevice(0)->getDevice().getProductHelper())) {
static constexpr cl_mem_flags flags = CL_MEM_UNCOMPRESSED_HINT_INTEL;
[[maybe_unused]] cl_int errcodeRet{};
Buffer::AdditionalBufferCreateArgs bufferCreateArgs{};
bufferCreateArgs.doNotProvidePerformanceHints = true;
bufferCreateArgs.makeAllocationLockable = isCpuAccessRequired;
bufferCreateArgs.makeAllocationLockable = true;
this->mainStorage.reset(Buffer::create(context,
flags,
params.aggregatedSmallBuffersPoolSize,
context->getBufferPoolAllocator().getParams().aggregatedSmallBuffersPoolSize,
nullptr,
bufferCreateArgs,
errcodeRet));
if (this->mainStorage) {
this->chunkAllocator.reset(new HeapAllocator(params.startingOffset,
params.aggregatedSmallBuffersPoolSize,
params.chunkAlignment));
context->getBufferPoolAllocator().getParams().aggregatedSmallBuffersPoolSize,
context->getBufferPoolAllocator().getParams().chunkAlignment));
context->decRefInternal();
}
}
@@ -612,8 +611,8 @@ Buffer *Context::BufferPool::allocate(const MemoryProperties &memoryProperties,
void Context::BufferPoolAllocator::initAggregatedSmallBuffers(Context *context) {
this->context = context;
auto &device = context->getDevice(0)->getDevice();
if (device.requestPoolCreate(this->poolType, 1u)) {
this->addNewBufferPool(Context::BufferPool{this->context, this->params, this->poolType == BufferPoolType::SmallBuffersPool});
if (device.requestPoolCreate(1u)) {
this->addNewBufferPool(Context::BufferPool{this->context});
}
}
@@ -644,8 +643,8 @@ Buffer *Context::BufferPoolAllocator::allocateBufferFromPool(const MemoryPropert
}
auto &device = context->getDevice(0)->getDevice();
if (device.requestPoolCreate(this->poolType, 1u)) {
this->addNewBufferPool(BufferPool{this->context, this->params, this->poolType == BufferPoolType::SmallBuffersPool});
if (device.requestPoolCreate(1u)) {
this->addNewBufferPool(BufferPool{this->context});
return this->allocateFromPools(memoryProperties, flags, flagsIntel, requestedSize, hostPtr, errcodeRet);
}
return nullptr;
@@ -687,9 +686,4 @@ std::unique_lock<std::mutex> Context::obtainOwnershipForMultiRootDeviceAllocator
return std::unique_lock<std::mutex>(multiRootDeviceAllocatorMtx);
}
bool Context::isPoolBuffer(const MemObj *buffer) {
return this->getBufferPoolAllocator(BufferPoolType::SmallBuffersPool).isPoolBuffer(buffer) ||
this->getBufferPoolAllocator(BufferPoolType::LargeBuffersPool).isPoolBuffer(buffer);
}
} // namespace NEO

View File

@@ -52,15 +52,10 @@ class Context : public BaseObject<_cl_context> {
public:
using BufferAllocationsVec = StackVec<GraphicsAllocation *, 1>;
enum BufferPoolType : uint32_t {
SmallBuffersPool = 0,
LargeBuffersPool = 1,
NumBufferPoolTypes = 2
};
struct BufferPool : public AbstractBuffersPool<BufferPool, Buffer, MemObj> {
using BaseType = AbstractBuffersPool<BufferPool, Buffer, MemObj>;
BufferPool(Context *context, const SmallBuffersParams &params, bool isCpuAccessRequired);
BufferPool(Context *context);
Buffer *allocate(const MemoryProperties &memoryProperties,
cl_mem_flags flags,
cl_mem_flags_intel flagsIntel,
@@ -77,10 +72,7 @@ class Context : public BaseObject<_cl_context> {
public:
BufferPoolAllocator() = default;
void setParams(const SmallBuffersParams &newParams, BufferPoolType type) {
BaseType::setParams(newParams);
this->poolType = type;
}
bool isAggregatedSmallBuffersEnabled(Context *context) const;
void initAggregatedSmallBuffers(Context *context);
Buffer *allocateBufferFromPool(const MemoryProperties &memoryProperties,
@@ -94,10 +86,6 @@ class Context : public BaseObject<_cl_context> {
const auto maxPoolCount = static_cast<uint32_t>(totalMemory * (percentOfMemory / 100.0) / (smallBuffersParams.aggregatedSmallBuffersPoolSize));
return maxPoolCount ? maxPoolCount : 1u;
}
static inline BufferPoolType getBufferPoolTypeBySize(size_t size) {
return (size <= SmallBuffersParams::getDefaultParams().smallBufferThreshold) ? BufferPoolType::SmallBuffersPool : BufferPoolType::LargeBuffersPool;
}
BufferPoolType getPoolType() const { return poolType; }
protected:
Buffer *allocateFromPools(const MemoryProperties &memoryProperties,
@@ -107,7 +95,6 @@ class Context : public BaseObject<_cl_context> {
void *hostPtr,
cl_int &errcodeRet);
Context *context{nullptr};
BufferPoolType poolType{BufferPoolType::SmallBuffersPool};
};
static const cl_ulong objectMagic = 0xA4234321DC002130LL;
@@ -129,11 +116,10 @@ class Context : public BaseObject<_cl_context> {
delete pContext;
pContext = nullptr;
} else {
pContext->forEachBufferPoolAllocator([pContext](auto &bufferPoolAllocator) {
if (bufferPoolAllocator.isAggregatedSmallBuffersEnabled(pContext)) {
bufferPoolAllocator.initAggregatedSmallBuffers(pContext);
}
});
auto &bufferPoolAllocator = pContext->getBufferPoolAllocator();
if (bufferPoolAllocator.isAggregatedSmallBuffersEnabled(pContext)) {
bufferPoolAllocator.initAggregatedSmallBuffers(pContext);
}
}
gtpinNotifyContextCreate(pContext);
return pContext;
@@ -251,8 +237,8 @@ class Context : public BaseObject<_cl_context> {
const std::map<uint32_t, DeviceBitfield> &getDeviceBitfields() const { return deviceBitfields; };
static Platform *getPlatformFromProperties(const cl_context_properties *properties, cl_int &errcode);
BufferPoolAllocator &getBufferPoolAllocator(BufferPoolType type) {
return bufferPoolAllocators[type];
BufferPoolAllocator &getBufferPoolAllocator() {
return smallBufferPoolAllocator;
}
UsmMemAllocPool &getDeviceMemAllocPool() {
return usmDeviceMemAllocPool;
@@ -263,12 +249,6 @@ class Context : public BaseObject<_cl_context> {
void setMultiRootDeviceTimestampPacketAllocator(std::unique_ptr<TagAllocatorBase> &allocator);
void initializeDeviceUsmAllocationPool();
bool isPoolBuffer(const MemObj *buffer);
template <typename Func>
void forEachBufferPoolAllocator(Func func) {
std::for_each(bufferPoolAllocators.begin(), bufferPoolAllocators.end(), func);
}
protected:
struct BuiltInKernel {
@@ -306,7 +286,7 @@ class Context : public BaseObject<_cl_context> {
MapOperationsStorage mapOperationsStorage = {};
StackVec<CommandQueue *, 1> specialQueues;
DriverDiagnostics *driverDiagnostics = nullptr;
std::array<BufferPoolAllocator, BufferPoolType::NumBufferPoolTypes> bufferPoolAllocators;
BufferPoolAllocator smallBufferPoolAllocator;
UsmDeviceMemAllocPool usmDeviceMemAllocPool;
uint32_t maxRootDeviceIndex = std::numeric_limits<uint32_t>::max();

View File

@@ -9,9 +9,8 @@
namespace NEO {
bool Context::BufferPoolAllocator::flagsAllowBufferFromPool(const cl_mem_flags &flags, const cl_mem_flags_intel &flagsIntel) const {
auto forbiddenFlag = this->poolType == BufferPoolType::SmallBuffersPool ? CL_MEM_COMPRESSED_HINT_INTEL : CL_MEM_UNCOMPRESSED_HINT_INTEL;
return (flagsIntel & forbiddenFlag) == false &&
(flags & forbiddenFlag) == false;
return (flagsIntel & CL_MEM_COMPRESSED_HINT_INTEL) == false &&
(flags & CL_MEM_COMPRESSED_HINT_INTEL) == false;
}
} // namespace NEO

View File

@@ -250,7 +250,7 @@ bool inline copyHostPointer(Buffer *buffer,
auto context = buffer->getContext();
auto cmdQ = context->getSpecialQueue(rootDeviceIndex);
auto mapAllocation = buffer->getMapAllocation(rootDeviceIndex);
if (CL_SUCCESS != cmdQ->enqueueWriteBuffer(buffer, CL_TRUE, 0, size, hostPtr, mapAllocation, 0, nullptr, nullptr)) {
if (CL_SUCCESS != cmdQ->enqueueWriteBuffer(buffer, CL_TRUE, buffer->getOffset(), size, hostPtr, mapAllocation, 0, nullptr, nullptr)) {
errcodeRet = CL_OUT_OF_RESOURCES;
return false;
}
@@ -303,8 +303,8 @@ Buffer *Buffer::create(Context *context,
defaultRootDeviceIndex = rootDeviceIndices[0];
pRootDeviceIndices = &rootDeviceIndices;
}
auto poolType = Context::BufferPoolAllocator::getBufferPoolTypeBySize(size);
Context::BufferPoolAllocator &bufferPoolAllocator = context->getBufferPoolAllocator(poolType);
Context::BufferPoolAllocator &bufferPoolAllocator = context->getBufferPoolAllocator();
const bool implicitScalingEnabled = ImplicitScalingHelper::isImplicitScalingEnabled(defaultDevice->getDeviceBitfield(), true);
const bool useHostPtr = memoryProperties.flags.useHostPtr;
const bool copyHostPtr = memoryProperties.flags.copyHostPtr;

View File

@@ -104,10 +104,8 @@ MemObj::~MemObj() {
}
}
if (associatedMemObject) {
context->forEachBufferPoolAllocator([this](auto &bufferPoolAllocator) {
bufferPoolAllocator.tryFreeFromPoolBuffer(this->associatedMemObject, this->offset, this->sizeInPoolAllocator);
});
associatedMemObject->decRefInternal();
context->getBufferPoolAllocator().tryFreeFromPoolBuffer(associatedMemObject, this->offset, this->sizeInPoolAllocator);
}
if (!associatedMemObject) {
releaseAllocatedMapPtr();
@@ -116,7 +114,7 @@ MemObj::~MemObj() {
destructorCallbacks.invoke(this);
const bool needDecrementContextRefCount = !context->isPoolBuffer(this);
const bool needDecrementContextRefCount = !context->getBufferPoolAllocator().isPoolBuffer(this);
if (needDecrementContextRefCount) {
context->decRefInternal();
}
@@ -176,7 +174,7 @@ cl_int MemObj::getMemObjectInfo(cl_mem_info paramName,
case CL_MEM_OFFSET:
clOffset = this->getOffset();
if (nullptr != this->associatedMemObject) {
if (this->getContext()->isPoolBuffer(this->associatedMemObject)) {
if (this->getContext()->getBufferPoolAllocator().isPoolBuffer(this->associatedMemObject)) {
clOffset = 0;
} else {
clOffset -= this->associatedMemObject->getOffset();
@@ -187,7 +185,7 @@ cl_int MemObj::getMemObjectInfo(cl_mem_info paramName,
break;
case CL_MEM_ASSOCIATED_MEMOBJECT:
if (this->getContext()->isPoolBuffer(this->associatedMemObject)) {
if (this->getContext()->getBufferPoolAllocator().isPoolBuffer(this->associatedMemObject)) {
clAssociatedMemObject = nullptr;
}
srcParamSize = sizeof(clAssociatedMemObject);

View File

@@ -28,8 +28,7 @@ namespace NEO {
void MulticontextOclAubFixture::setUp(uint32_t numberOfTiles, EnabledCommandStreamers enabledCommandStreamers, bool enableCompression) {
MulticontextAubFixture::setUp(numberOfTiles, enabledCommandStreamers, enableCompression);
debugManager.flags.RenderCompressedBuffersEnabled.set(-1);
debugManager.flags.RenderCompressedImagesEnabled.set(-1);
cl_int retVal = CL_SUCCESS;
auto createCommandQueueForEngine = [&](uint32_t tileNumber, size_t engineFamily, size_t engineIndex) {
@@ -91,8 +90,6 @@ void MulticontextOclAubFixture::setUp(uint32_t numberOfTiles, EnabledCommandStre
multiTileDefaultContext.reset(MockContext::create<MockContext>(nullptr, ClDeviceVector(&deviceId, 1), nullptr, nullptr, retVal));
EXPECT_EQ(CL_SUCCESS, retVal);
}
debugManager.flags.RenderCompressedBuffersEnabled.set(enableCompression);
debugManager.flags.RenderCompressedImagesEnabled.set(enableCompression);
}
CommandStreamReceiver *MulticontextOclAubFixture::getGpgpuCsr(uint32_t tile, uint32_t engine) {

View File

@@ -62,14 +62,13 @@ class AggregatedSmallBuffersTestTemplate : public ::testing::Test {
debugManager.flags.EnableDeviceUsmAllocationPool.set(0);
debugManager.flags.EnableHostUsmAllocationPool.set(0);
debugManager.flags.RenderCompressedBuffersEnabled.set(1);
debugManager.flags.OverrideBufferSuitableForRenderCompression.set(1);
this->deviceFactory = std::make_unique<UltClDeviceFactoryWithPlatform>(2, 0);
this->device = deviceFactory->rootDevices[rootDeviceIndex];
this->mockNeoDevice = static_cast<MockDevice *>(&this->device->getDevice());
const auto bitfield = mockNeoDevice->getDeviceBitfield();
const auto deviceMemory = mockNeoDevice->getGlobalMemorySize(static_cast<uint32_t>(bitfield.to_ulong()));
const auto expectedMaxPoolCount = Context::BufferPoolAllocator::calculateMaxPoolCount(SmallBuffersParams::getDefaultParams(), deviceMemory, 2);
EXPECT_EQ(expectedMaxPoolCount, mockNeoDevice->maxBufferPoolCount[0]);
const auto expectedMaxPoolCount = Context::BufferPoolAllocator::calculateMaxPoolCount(SmallBuffersParams::getPreferredBufferPoolParams(this->device->getProductHelper()), deviceMemory, 2);
EXPECT_EQ(expectedMaxPoolCount, mockNeoDevice->maxBufferPoolCount);
this->mockMemoryManager = static_cast<MockMemoryManager *>(device->getMemoryManager());
this->mockMemoryManager->localMemorySupported[rootDeviceIndex] = true;
this->setAllocationToFail(failMainStorageAllocation);
@@ -78,8 +77,8 @@ class AggregatedSmallBuffersTestTemplate : public ::testing::Test {
this->context->initializeDeviceUsmAllocationPool();
EXPECT_EQ(retVal, CL_SUCCESS);
this->setAllocationToFail(false);
this->poolAllocator = static_cast<MockBufferPoolAllocator *>(&context->getBufferPoolAllocator(Context::BufferPoolType::SmallBuffersPool));
this->mockNeoDevice->updateMaxPoolCount(0u, 1u);
this->poolAllocator = static_cast<MockBufferPoolAllocator *>(&context->getBufferPoolAllocator());
this->mockNeoDevice->updateMaxPoolCount(1u);
size = this->poolAllocator->params.smallBufferThreshold;
}
};
@@ -121,29 +120,29 @@ HWTEST_F(AggregatedSmallBuffersDefaultTest, givenDifferentFlagValuesAndSingleOrM
// Single device context
{
debugManager.flags.ExperimentalSmallBufferPoolAllocator.set(0);
EXPECT_FALSE(context->getBufferPoolAllocator(Context::BufferPoolType::SmallBuffersPool).isAggregatedSmallBuffersEnabled(context.get()));
EXPECT_FALSE(context->getBufferPoolAllocator().isAggregatedSmallBuffersEnabled(context.get()));
}
{
debugManager.flags.ExperimentalSmallBufferPoolAllocator.set(1);
EXPECT_TRUE(context->getBufferPoolAllocator(Context::BufferPoolType::SmallBuffersPool).isAggregatedSmallBuffersEnabled(context.get()));
EXPECT_TRUE(context->getBufferPoolAllocator().isAggregatedSmallBuffersEnabled(context.get()));
}
{
debugManager.flags.ExperimentalSmallBufferPoolAllocator.set(2);
EXPECT_TRUE(context->getBufferPoolAllocator(Context::BufferPoolType::SmallBuffersPool).isAggregatedSmallBuffersEnabled(context.get()));
EXPECT_TRUE(context->getBufferPoolAllocator().isAggregatedSmallBuffersEnabled(context.get()));
}
// Multi device context
context->devices.push_back(nullptr);
{
debugManager.flags.ExperimentalSmallBufferPoolAllocator.set(0);
EXPECT_FALSE(context->getBufferPoolAllocator(Context::BufferPoolType::SmallBuffersPool).isAggregatedSmallBuffersEnabled(context.get()));
EXPECT_FALSE(context->getBufferPoolAllocator().isAggregatedSmallBuffersEnabled(context.get()));
}
{
debugManager.flags.ExperimentalSmallBufferPoolAllocator.set(1);
EXPECT_FALSE(context->getBufferPoolAllocator(Context::BufferPoolType::SmallBuffersPool).isAggregatedSmallBuffersEnabled(context.get()));
EXPECT_FALSE(context->getBufferPoolAllocator().isAggregatedSmallBuffersEnabled(context.get()));
}
{
debugManager.flags.ExperimentalSmallBufferPoolAllocator.set(2);
EXPECT_TRUE(context->getBufferPoolAllocator(Context::BufferPoolType::SmallBuffersPool).isAggregatedSmallBuffersEnabled(context.get()));
EXPECT_TRUE(context->getBufferPoolAllocator().isAggregatedSmallBuffersEnabled(context.get()));
}
context->devices.pop_back();
}
@@ -157,19 +156,19 @@ HWTEST2_F(AggregatedSmallBuffersDefaultTest, givenSupportsOclBufferPoolCapabilit
raii.mockProductHelper->isBufferPoolAllocatorSupportedValue = true;
mockAIL->isBufferPoolEnabledReturn = true;
EXPECT_TRUE(context->getBufferPoolAllocator(Context::BufferPoolType::SmallBuffersPool).isAggregatedSmallBuffersEnabled(context.get()));
EXPECT_TRUE(context->getBufferPoolAllocator().isAggregatedSmallBuffersEnabled(context.get()));
raii.mockProductHelper->isBufferPoolAllocatorSupportedValue = true;
mockAIL->isBufferPoolEnabledReturn = false;
EXPECT_FALSE(context->getBufferPoolAllocator(Context::BufferPoolType::SmallBuffersPool).isAggregatedSmallBuffersEnabled(context.get()));
EXPECT_FALSE(context->getBufferPoolAllocator().isAggregatedSmallBuffersEnabled(context.get()));
raii.mockProductHelper->isBufferPoolAllocatorSupportedValue = false;
mockAIL->isBufferPoolEnabledReturn = true;
EXPECT_FALSE(context->getBufferPoolAllocator(Context::BufferPoolType::SmallBuffersPool).isAggregatedSmallBuffersEnabled(context.get()));
EXPECT_FALSE(context->getBufferPoolAllocator().isAggregatedSmallBuffersEnabled(context.get()));
raii.mockProductHelper->isBufferPoolAllocatorSupportedValue = false;
mockAIL->isBufferPoolEnabledReturn = false;
EXPECT_FALSE(context->getBufferPoolAllocator(Context::BufferPoolType::SmallBuffersPool).isAggregatedSmallBuffersEnabled(context.get()));
EXPECT_FALSE(context->getBufferPoolAllocator().isAggregatedSmallBuffersEnabled(context.get()));
}
using AggregatedSmallBuffersDisabledTest = AggregatedSmallBuffersTestTemplate<0>;
@@ -193,17 +192,13 @@ TEST_F(AggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledWhen
EXPECT_EQ(1u, MockBufferPoolAllocator::calculateMaxPoolCount(this->poolAllocator->getParams(), MemoryConstants::pageSize64k, 2));
}
TEST_F(AggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledWhenAllocatingMainStorageThenAllocationIsNotCompressed) {
TEST_F(AggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledWhenAllocatingMainStorageThenMakeDeviceBufferLockableAndNotCompressed) {
EXPECT_TRUE(poolAllocator->isAggregatedSmallBuffersEnabled(context.get()));
EXPECT_EQ(1u, poolAllocator->bufferPools.size());
EXPECT_NE(nullptr, poolAllocator->bufferPools[0].mainStorage.get());
EXPECT_NE(nullptr, mockMemoryManager->lastAllocationProperties);
EXPECT_FALSE(poolAllocator->bufferPools[0].mainStorage->isCompressed(rootDeviceIndex));
auto largePoolAllocator = static_cast<MockBufferPoolAllocator *>(&context->getBufferPoolAllocator(Context::BufferPoolType::LargeBuffersPool));
EXPECT_EQ(1u, largePoolAllocator->bufferPools.size());
EXPECT_NE(nullptr, largePoolAllocator->bufferPools[0].mainStorage.get());
EXPECT_TRUE(largePoolAllocator->bufferPools[0].mainStorage->isCompressed(rootDeviceIndex));
EXPECT_TRUE(mockMemoryManager->lastAllocationProperties->makeDeviceBufferLockable);
EXPECT_FALSE(mockMemoryManager->lastAllocationProperties->flags.preferCompressed);
}
TEST_F(AggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledAndSizeLargerThanThresholdWhenBufferCreateCalledThenDoNotUsePool) {
@@ -317,7 +312,7 @@ TEST_F(AggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledAndB
}
TEST_F(AggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledAndBufferPoolIsExhaustedAndAllocationsAreNotInUseAndNoBuffersFreedThenNewPoolIsCreated) {
mockNeoDevice->updateMaxPoolCount(0u, 2u);
mockNeoDevice->updateMaxPoolCount(2u);
EXPECT_TRUE(poolAllocator->isAggregatedSmallBuffersEnabled(context.get()));
EXPECT_EQ(1u, poolAllocator->bufferPools.size());
EXPECT_NE(nullptr, poolAllocator->bufferPools[0].mainStorage.get());
@@ -342,7 +337,7 @@ TEST_F(AggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledAndB
}
TEST_F(AggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledAndBufferPoolIsExhaustedAndAllocationsAreInUseThenNewPoolIsCreated) {
mockNeoDevice->updateMaxPoolCount(0u, 2u);
mockNeoDevice->updateMaxPoolCount(2u);
EXPECT_TRUE(poolAllocator->isAggregatedSmallBuffersEnabled(context.get()));
EXPECT_EQ(1u, poolAllocator->bufferPools.size());
EXPECT_NE(nullptr, poolAllocator->bufferPools[0].mainStorage.get());
@@ -367,19 +362,19 @@ TEST_F(AggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledAndB
}
TEST_F(AggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledAndBufferPoolIsExhaustedAndAllocationsAreInUseAndPoolLimitIsReachedThenNewPoolIsNotCreated) {
mockNeoDevice->updateMaxPoolCount(0u, 2u);
mockNeoDevice->updateMaxPoolCount(2u);
EXPECT_TRUE(poolAllocator->isAggregatedSmallBuffersEnabled(context.get()));
EXPECT_EQ(1u, poolAllocator->bufferPools.size());
EXPECT_NE(nullptr, poolAllocator->bufferPools[0].mainStorage.get());
const std::vector<std::unique_ptr<Buffer>>::size_type buffersToCreate = (poolAllocator->params.aggregatedSmallBuffersPoolSize / poolAllocator->params.smallBufferThreshold) * mockNeoDevice->maxBufferPoolCount[0];
const std::vector<std::unique_ptr<Buffer>>::size_type buffersToCreate = (poolAllocator->params.aggregatedSmallBuffersPoolSize / poolAllocator->params.smallBufferThreshold) * mockNeoDevice->maxBufferPoolCount;
std::vector<std::unique_ptr<Buffer>> buffers(buffersToCreate);
for (auto i = 0u; i < buffersToCreate; ++i) {
buffers[i].reset(Buffer::create(context.get(), flags, size, hostPtr, retVal));
EXPECT_EQ(retVal, CL_SUCCESS);
}
EXPECT_EQ(mockNeoDevice->maxBufferPoolCount[0], poolAllocator->bufferPools.size());
for (auto i = 0u; i < mockNeoDevice->maxBufferPoolCount[0]; ++i) {
EXPECT_EQ(mockNeoDevice->maxBufferPoolCount, poolAllocator->bufferPools.size());
for (auto i = 0u; i < mockNeoDevice->maxBufferPoolCount; ++i) {
EXPECT_EQ(poolAllocator->params.aggregatedSmallBuffersPoolSize, poolAllocator->bufferPools[i].chunkAllocator->getUsedSize());
}
EXPECT_EQ(1u, mockMemoryManager->allocInUseCalled);
@@ -389,7 +384,7 @@ TEST_F(AggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledAndB
std::unique_ptr<Buffer> bufferAfterExhaustMustFail(Buffer::create(context.get(), flags, size, hostPtr, retVal));
EXPECT_EQ(nullptr, bufferAfterExhaustMustFail.get());
EXPECT_NE(retVal, CL_SUCCESS);
EXPECT_EQ(mockNeoDevice->maxBufferPoolCount[0], poolAllocator->bufferPools.size());
EXPECT_EQ(mockNeoDevice->maxBufferPoolCount, poolAllocator->bufferPools.size());
EXPECT_EQ(3u, mockMemoryManager->allocInUseCalled);
}
@@ -477,17 +472,17 @@ TEST_F(AggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledAndS
}
TEST_F(AggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledAndMultipleContextsThenPoolLimitIsTrackedAcrossContexts) {
mockNeoDevice->updateMaxPoolCount(0u, 2u);
mockNeoDevice->updateMaxPoolCount(2u);
EXPECT_TRUE(poolAllocator->isAggregatedSmallBuffersEnabled(context.get()));
EXPECT_EQ(1u, poolAllocator->bufferPools.size());
EXPECT_NE(nullptr, poolAllocator->bufferPools[0].mainStorage.get());
EXPECT_EQ(1u, mockNeoDevice->bufferPoolCount[0].load());
EXPECT_EQ(1u, mockNeoDevice->bufferPoolCount.load());
std::unique_ptr<MockContext> secondContext;
cl_device_id devices[] = {device};
secondContext.reset(Context::create<MockContext>(nullptr, ClDeviceVector(devices, 1), nullptr, nullptr, retVal));
EXPECT_EQ(retVal, CL_SUCCESS);
this->setAllocationToFail(false);
EXPECT_EQ(2u, mockNeoDevice->bufferPoolCount[0].load());
EXPECT_EQ(2u, mockNeoDevice->bufferPoolCount.load());
auto buffersToCreate = poolAllocator->params.aggregatedSmallBuffersPoolSize / poolAllocator->params.smallBufferThreshold;
std::vector<std::unique_ptr<Buffer>> buffers(buffersToCreate);
@@ -511,17 +506,17 @@ TEST_F(AggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledAndM
std::unique_ptr<MockContext> thirdContext;
thirdContext.reset(Context::create<MockContext>(nullptr, ClDeviceVector(devices, 1), nullptr, nullptr, retVal));
EXPECT_EQ(retVal, CL_SUCCESS);
MockBufferPoolAllocator *thirdPoolAllocator = static_cast<MockBufferPoolAllocator *>(&thirdContext->getBufferPoolAllocator(Context::BufferPoolType::SmallBuffersPool));
MockBufferPoolAllocator *thirdPoolAllocator = static_cast<MockBufferPoolAllocator *>(&thirdContext->getBufferPoolAllocator());
EXPECT_EQ(0u, thirdPoolAllocator->bufferPools.size());
EXPECT_EQ(2u, mockNeoDevice->bufferPoolCount[0].load());
EXPECT_EQ(2u, mockNeoDevice->bufferPoolCount.load());
secondContext.reset(nullptr);
EXPECT_EQ(1u, mockNeoDevice->bufferPoolCount[0].load());
EXPECT_EQ(1u, mockNeoDevice->bufferPoolCount.load());
buffers.clear();
bufferAfterExhaustMustSucceed.reset(nullptr);
context.reset(nullptr);
EXPECT_EQ(0u, mockNeoDevice->bufferPoolCount[0].load());
EXPECT_EQ(0u, mockNeoDevice->bufferPoolCount.load());
}
TEST_F(AggregatedSmallBuffersKernelTest, givenBufferFromPoolWhenOffsetSubbufferIsPassedToSetKernelArgThenCorrectGpuVAIsPatched) {
@@ -572,6 +567,51 @@ TEST_F(AggregatedSmallBuffersEnabledTestDoNotRunSetup, givenAggregatedSmallBuffe
EXPECT_EQ(0u, output.size());
}
TEST_F(AggregatedSmallBuffersEnabledTestDoNotRunSetup, givenProductWithAndWithout2MBLocalMemAlignmentWhenCreatingContextThenBufferPoolAllocatorHasCorrectParams) {
auto compareSmallBuffersParams = [](const NEO::SmallBuffersParams &first, const NEO::SmallBuffersParams &second) {
return first.aggregatedSmallBuffersPoolSize == second.aggregatedSmallBuffersPoolSize &&
first.smallBufferThreshold == second.smallBufferThreshold &&
first.chunkAlignment == second.chunkAlignment &&
first.startingOffset == second.startingOffset;
};
debugManager.flags.ExperimentalSmallBufferPoolAllocator.set(1);
debugManager.flags.EnableDeviceUsmAllocationPool.set(0);
debugManager.flags.EnableHostUsmAllocationPool.set(0);
debugManager.flags.RenderCompressedBuffersEnabled.set(1);
this->deviceFactory = std::make_unique<UltClDeviceFactoryWithPlatform>(2, 0);
this->device = deviceFactory->rootDevices[rootDeviceIndex];
this->mockNeoDevice = static_cast<MockDevice *>(&this->device->getDevice());
auto mockProductHelper = new MockProductHelper;
mockNeoDevice->getRootDeviceEnvironmentRef().productHelper.reset(mockProductHelper);
mockProductHelper->is2MBLocalMemAlignmentEnabledResult = false;
auto &productHelper = mockNeoDevice->getRootDeviceEnvironment().getProductHelper();
EXPECT_FALSE(productHelper.is2MBLocalMemAlignmentEnabled());
cl_device_id devices[] = {device};
this->context.reset(Context::create<MockContext>(nullptr, ClDeviceVector(devices, 1), nullptr, nullptr, retVal));
auto &bufferPoolAllocator = context->getBufferPoolAllocator();
auto bufferPoolAllocatorParams = bufferPoolAllocator.getParams();
auto preferredParams = NEO::SmallBuffersParams::getPreferredBufferPoolParams(productHelper);
EXPECT_TRUE(compareSmallBuffersParams(bufferPoolAllocatorParams, preferredParams));
mockProductHelper->is2MBLocalMemAlignmentEnabledResult = true;
EXPECT_TRUE(productHelper.is2MBLocalMemAlignmentEnabled());
std::unique_ptr<MockContext> secondContext;
secondContext.reset(Context::create<MockContext>(nullptr, ClDeviceVector(devices, 1), nullptr, nullptr, retVal));
auto &bufferPoolAllocator2 = secondContext->getBufferPoolAllocator();
auto bufferPoolAllocatorParams2 = bufferPoolAllocator2.getParams();
preferredParams = NEO::SmallBuffersParams::getPreferredBufferPoolParams(productHelper);
EXPECT_TRUE(compareSmallBuffersParams(bufferPoolAllocatorParams2, preferredParams));
}
template <int32_t poolBufferFlag = -1>
class AggregatedSmallBuffersApiTestTemplate : public ::testing::Test {
void SetUp() override {
@@ -582,7 +622,7 @@ class AggregatedSmallBuffersApiTestTemplate : public ::testing::Test {
clContext = clCreateContext(nullptr, 1, devices, nullptr, nullptr, &retVal);
EXPECT_EQ(retVal, CL_SUCCESS);
context = castToObject<Context>(clContext);
poolAllocator = static_cast<MockBufferPoolAllocator *>(&context->getBufferPoolAllocator(Context::BufferPoolType::SmallBuffersPool));
poolAllocator = static_cast<MockBufferPoolAllocator *>(&context->getBufferPoolAllocator());
size = poolAllocator->params.smallBufferThreshold;
}
@@ -610,27 +650,12 @@ TEST_F(AggregatedSmallBuffersEnabledApiTest, givenNoBufferCreatedWhenReleasingCo
EXPECT_EQ(clReleaseContext(context), CL_SUCCESS);
}
TEST_F(AggregatedSmallBuffersEnabledApiTest, givenCorrectSizeWhenCreatingBufferThenUseLargePool) {
TEST_F(AggregatedSmallBuffersEnabledApiTest, givenNotSmallBufferWhenCreatingBufferThenDoNotUsePool) {
size = poolAllocator->params.smallBufferThreshold + 1;
cl_mem buffer = clCreateBuffer(clContext, flags, size, hostPtr, &retVal);
EXPECT_EQ(retVal, CL_SUCCESS);
EXPECT_NE(buffer, nullptr);
MockBuffer *asBuffer = static_cast<MockBuffer *>(buffer);
EXPECT_TRUE(asBuffer->isSubBuffer());
retVal = clReleaseMemObject(buffer);
EXPECT_EQ(retVal, CL_SUCCESS);
EXPECT_EQ(clReleaseContext(context), CL_SUCCESS);
}
TEST_F(AggregatedSmallBuffersEnabledApiTest, givenCorrectSizeWhenCreatingBufferThenDontUseAnyPool) {
size = context->getBufferPoolAllocator(Context::BufferPoolType::LargeBuffersPool).getParams().smallBufferThreshold + 1;
cl_mem buffer = clCreateBuffer(clContext, flags, size, hostPtr, &retVal);
EXPECT_EQ(retVal, CL_SUCCESS);
EXPECT_NE(buffer, nullptr);
MockBuffer *asBuffer = static_cast<MockBuffer *>(buffer);
EXPECT_FALSE(asBuffer->isSubBuffer());
@@ -817,7 +842,7 @@ TEST_F(AggregatedSmallBuffersSubBufferApiTest, givenSubBufferFromBufferPoolWhenG
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_NE(nullptr, buffer1);
auto mockBuffer1 = static_cast<MockBuffer *>(buffer1);
EXPECT_TRUE(context->getBufferPoolAllocator(Context::BufferPoolType::SmallBuffersPool).isPoolBuffer(mockBuffer1->associatedMemObject));
EXPECT_TRUE(context->getBufferPoolAllocator().isPoolBuffer(mockBuffer1->associatedMemObject));
// need buffer to have non-zero offset, to verify offset calculations in clGemMemObjectInfo
// so if we get first pool buffer with offset 0, use a second buffer
@@ -828,7 +853,7 @@ TEST_F(AggregatedSmallBuffersSubBufferApiTest, givenSubBufferFromBufferPoolWhenG
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_NE(nullptr, buffer2);
auto mockBuffer2 = static_cast<MockBuffer *>(buffer2);
EXPECT_TRUE(context->getBufferPoolAllocator(Context::BufferPoolType::SmallBuffersPool).isPoolBuffer(mockBuffer2->associatedMemObject));
EXPECT_TRUE(context->getBufferPoolAllocator().isPoolBuffer(mockBuffer2->associatedMemObject));
EXPECT_NE(0u, mockBuffer2->getOffset());
buffer = buffer2;
retVal = clReleaseMemObject(buffer1);

View File

@@ -40,6 +40,7 @@ class MockContext : public Context {
using Context::rootDeviceIndices;
using Context::setupContextType;
using Context::sharingFunctions;
using Context::smallBufferPoolAllocator;
using Context::specialQueues;
using Context::svmAllocsManager;
using Context::usmPoolInitialized;

View File

@@ -256,21 +256,21 @@ class Device : public ReferenceTrackedObject<Device>, NEO::NonCopyableAndNonMova
return microsecondResolution;
}
void updateMaxPoolCount(uint32_t type, uint32_t maxPoolCount) {
maxBufferPoolCount[type] = maxPoolCount;
void updateMaxPoolCount(uint32_t maxPoolCount) {
maxBufferPoolCount = maxPoolCount;
}
bool requestPoolCreate(uint32_t type, uint32_t count) {
if (maxBufferPoolCount[type] >= count + bufferPoolCount[type].fetch_add(count)) {
bool requestPoolCreate(uint32_t count) {
if (maxBufferPoolCount >= count + bufferPoolCount.fetch_add(count)) {
return true;
} else {
bufferPoolCount[type] -= count;
bufferPoolCount -= count;
return false;
}
}
void recordPoolsFreed(uint32_t type, uint32_t size) {
bufferPoolCount[type] -= size;
void recordPoolsFreed(uint32_t size) {
bufferPoolCount -= size;
}
UsmReuseInfo usmReuseInfo;
@@ -379,8 +379,8 @@ class Device : public ReferenceTrackedObject<Device>, NEO::NonCopyableAndNonMova
std::unique_ptr<UsmMemAllocPool> usmConstantSurfaceAllocPool;
std::unique_ptr<UsmMemAllocPool> usmGlobalSurfaceAllocPool;
std::array<std::atomic_uint32_t, 2> bufferPoolCount = {0u, 0u};
std::array<uint32_t, 2> maxBufferPoolCount = {0u, 0u};
std::atomic_uint32_t bufferPoolCount = 0u;
uint32_t maxBufferPoolCount = 0u;
uint32_t microsecondResolution = 1000u;
std::optional<bool> hasPeerAccess = std::nullopt;

View File

@@ -15,6 +15,10 @@
namespace NEO {
inline SmallBuffersParams SmallBuffersParams::getPreferredBufferPoolParams(const ProductHelper &productHelper) {
return productHelper.is2MBLocalMemAlignmentEnabled() ? SmallBuffersParams::getLargePagesParams() : SmallBuffersParams::getDefaultParams();
}
template <typename PoolT, typename BufferType, typename BufferParentType>
AbstractBuffersPool<PoolT, BufferType, BufferParentType>::AbstractBuffersPool(MemoryManager *memoryManager, OnChunkFreeCallback onChunkFreeCb)
: AbstractBuffersPool<PoolT, BufferType, BufferParentType>::AbstractBuffersPool(memoryManager, std::move(onChunkFreeCb), SmallBuffersParams::getDefaultParams()) {}

View File

@@ -18,8 +18,8 @@ SmallBuffersParams SmallBuffersParams::getDefaultParams() {
SmallBuffersParams SmallBuffersParams::getLargePagesParams() {
return {
.aggregatedSmallBuffersPoolSize = 2 * MemoryConstants::pageSize64k,
.smallBufferThreshold = 8 * MemoryConstants::pageSize,
.aggregatedSmallBuffersPoolSize = MemoryConstants::pageSize64k,
.smallBufferThreshold = 4 * MemoryConstants::pageSize,
.chunkAlignment = MemoryConstants::pageSize,
.startingOffset = MemoryConstants::pageSize};
}

View File

@@ -362,21 +362,21 @@ TEST_F(DeviceTest, GivenDeviceWhenGenerateUuidFromPciBusInfoThenValidValuesAreSe
}
TEST_F(DeviceTest, givenDeviceWhenUsingBufferPoolsTrackingThenCountIsUpdated) {
pDevice->updateMaxPoolCount(0u, 3u);
EXPECT_EQ(3u, pDevice->maxBufferPoolCount[0]);
EXPECT_EQ(0u, pDevice->bufferPoolCount[0].load());
pDevice->updateMaxPoolCount(3u);
EXPECT_EQ(3u, pDevice->maxBufferPoolCount);
EXPECT_EQ(0u, pDevice->bufferPoolCount.load());
EXPECT_FALSE(pDevice->requestPoolCreate(0u, 4u));
EXPECT_EQ(0u, pDevice->bufferPoolCount[0].load());
EXPECT_FALSE(pDevice->requestPoolCreate(4u));
EXPECT_EQ(0u, pDevice->bufferPoolCount.load());
EXPECT_TRUE(pDevice->requestPoolCreate(0u, 3u));
EXPECT_EQ(3u, pDevice->bufferPoolCount[0].load());
EXPECT_TRUE(pDevice->requestPoolCreate(3u));
EXPECT_EQ(3u, pDevice->bufferPoolCount.load());
EXPECT_FALSE(pDevice->requestPoolCreate(0u, 1u));
EXPECT_EQ(3u, pDevice->bufferPoolCount[0].load());
EXPECT_FALSE(pDevice->requestPoolCreate(1u));
EXPECT_EQ(3u, pDevice->bufferPoolCount.load());
pDevice->recordPoolsFreed(0u, 2u);
EXPECT_EQ(1u, pDevice->bufferPoolCount[0].load());
pDevice->recordPoolsFreed(2u);
EXPECT_EQ(1u, pDevice->bufferPoolCount.load());
}
using DeviceGetCapsTest = Test<DeviceFixture>;

View File

@@ -332,6 +332,23 @@ struct SmallBuffersParamsTest : public ::testing::Test {
}
};
TEST_F(SmallBuffersParamsTest, GivenProductHelperWhenGettingPreferredBufferPoolParamsThenReturnsCorrectValues) {
auto mockProductHelper = std::make_unique<NEO::MockProductHelper>();
{
mockProductHelper->is2MBLocalMemAlignmentEnabledResult = false;
auto preferredParams = NEO::SmallBuffersParams::getPreferredBufferPoolParams(*mockProductHelper);
auto expectedParams = NEO::SmallBuffersParams::getDefaultParams();
EXPECT_TRUE(compareSmallBuffersParams(expectedParams, preferredParams));
}
{
mockProductHelper->is2MBLocalMemAlignmentEnabledResult = true;
auto preferredParams = NEO::SmallBuffersParams::getPreferredBufferPoolParams(*mockProductHelper);
auto expectedParams = NEO::SmallBuffersParams::getLargePagesParams();
EXPECT_TRUE(compareSmallBuffersParams(expectedParams, preferredParams));
}
}
TEST_F(SmallBuffersParamsTest, GivenBuffersAllocatorWhenSettingDifferentParamsThenGetParamsReturnsExpectedValues) {
auto buffersAllocator = DummyBuffersAllocator{};