Optimize small buffers allocator

- Do not wait for GPU completion on pool exhaust if allocs are in use,
allocate new pool instead
- Free small buffer address range if allocs are not in use and
buffer pool is exhausted

Resolves: NEO-7769, NEO-7836

Signed-off-by: Igor Venevtsev <igor.venevtsev@intel.com>
This commit is contained in:
Igor Venevtsev
2023-03-24 12:37:05 +00:00
committed by Compute-Runtime-Automation
parent 63470ed698
commit 3e5101424d
5 changed files with 336 additions and 175 deletions

View File

@@ -30,12 +30,6 @@ class AggregatedSmallBuffersTestTemplate : public ::testing::Test {
}
}
void TearDown() override {
if (this->context->getBufferPoolAllocator().isAggregatedSmallBuffersEnabled(context.get())) {
this->context->getBufferPoolAllocator().releaseSmallBufferPool();
}
}
void setAllocationToFail(bool shouldFail) {
this->mockMemoryManager->failInDevicePoolWithError = shouldFail;
}
@@ -62,7 +56,7 @@ class AggregatedSmallBuffersTestTemplate : public ::testing::Test {
this->setAllocationToFail(failMainStorageAllocation);
cl_device_id devices[] = {device};
this->context.reset(Context::create<MockContext>(nullptr, ClDeviceVector(devices, 1), nullptr, nullptr, retVal));
ASSERT_EQ(retVal, CL_SUCCESS);
EXPECT_EQ(retVal, CL_SUCCESS);
this->setAllocationToFail(false);
this->poolAllocator = static_cast<MockBufferPoolAllocator *>(&context->smallBufferPoolAllocator);
}
@@ -83,8 +77,8 @@ class AggregatedSmallBuffersKernelTest : public AggregatedSmallBuffersTestTempla
retVal = CL_INVALID_VALUE;
pMultiDeviceKernel.reset(MultiDeviceKernel::create<MockKernel>(pProgram.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, device->getRootDeviceIndex()), retVal));
pKernel = static_cast<MockKernel *>(pMultiDeviceKernel->getKernel(device->getRootDeviceIndex()));
ASSERT_NE(pKernel, nullptr);
ASSERT_EQ(retVal, CL_SUCCESS);
EXPECT_NE(pKernel, nullptr);
EXPECT_EQ(retVal, CL_SUCCESS);
pKernel->setCrossThreadData(pCrossThreadData, sizeof(pCrossThreadData));
pKernelArg = (void **)(pKernel->getCrossThreadData() + pKernelInfo->argAsPtr(0).stateless);
@@ -143,87 +137,167 @@ HWTEST_F(AggregatedSmallBuffersDefaultTest, givenDifferentFlagValuesAndSingleOrM
using AggregatedSmallBuffersDisabledTest = AggregatedSmallBuffersTestTemplate<0>;
TEST_F(AggregatedSmallBuffersDisabledTest, givenAggregatedSmallBuffersDisabledWhenBufferCreateCalledThenDoNotUsePool) {
ASSERT_FALSE(poolAllocator->isAggregatedSmallBuffersEnabled(context.get()));
ASSERT_EQ(poolAllocator->mainStorage, nullptr);
EXPECT_FALSE(poolAllocator->isAggregatedSmallBuffersEnabled(context.get()));
EXPECT_EQ(0u, poolAllocator->bufferPools.size());
std::unique_ptr<Buffer> buffer(Buffer::create(context.get(), flags, size, hostPtr, retVal));
EXPECT_NE(buffer, nullptr);
EXPECT_EQ(retVal, CL_SUCCESS);
EXPECT_EQ(poolAllocator->mainStorage, nullptr);
EXPECT_NE(nullptr, buffer);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(0u, poolAllocator->bufferPools.size());
}
using AggregatedSmallBuffersEnabledTest = AggregatedSmallBuffersTestTemplate<1>;
TEST_F(AggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledWhenAllocatingMainStorageThenMakeDeviceBufferLockable) {
ASSERT_TRUE(poolAllocator->isAggregatedSmallBuffersEnabled(context.get()));
ASSERT_NE(poolAllocator->mainStorage, nullptr);
ASSERT_NE(mockMemoryManager->lastAllocationProperties, nullptr);
EXPECT_TRUE(poolAllocator->isAggregatedSmallBuffersEnabled(context.get()));
EXPECT_EQ(1u, poolAllocator->bufferPools.size());
EXPECT_NE(nullptr, poolAllocator->bufferPools[0].mainStorage.get());
EXPECT_NE(nullptr, mockMemoryManager->lastAllocationProperties);
EXPECT_TRUE(mockMemoryManager->lastAllocationProperties->makeDeviceBufferLockable);
}
TEST_F(AggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledAndSizeLargerThanThresholdWhenBufferCreateCalledThenDoNotUsePool) {
ASSERT_TRUE(poolAllocator->isAggregatedSmallBuffersEnabled(context.get()));
ASSERT_NE(poolAllocator->mainStorage, nullptr);
EXPECT_TRUE(poolAllocator->isAggregatedSmallBuffersEnabled(context.get()));
EXPECT_EQ(1u, poolAllocator->bufferPools.size());
EXPECT_NE(nullptr, poolAllocator->bufferPools[0].mainStorage.get());
size = PoolAllocator::smallBufferThreshold + 1;
std::unique_ptr<Buffer> buffer(Buffer::create(context.get(), flags, size, hostPtr, retVal));
EXPECT_NE(buffer, nullptr);
EXPECT_EQ(retVal, CL_SUCCESS);
EXPECT_NE(nullptr, buffer);
EXPECT_EQ(CL_SUCCESS, retVal);
}
TEST_F(AggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledAndSizeLowerThenChunkAlignmentWhenBufferCreatedAndDestroyedThenSizeIsAsRequestedAndCorrectSizeIsFreed) {
ASSERT_TRUE(poolAllocator->isAggregatedSmallBuffersEnabled(context.get()));
ASSERT_NE(poolAllocator->mainStorage, nullptr);
ASSERT_EQ(poolAllocator->chunkAllocator->getUsedSize(), 0u);
TEST_F(AggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledAndSizeLowerThenChunkAlignmentWhenBufferCreatedAndDestroyedThenSizeIsAsRequestedAndCorrectSizeIsNotFreed) {
EXPECT_TRUE(poolAllocator->isAggregatedSmallBuffersEnabled(context.get()));
EXPECT_EQ(1u, poolAllocator->bufferPools.size());
EXPECT_NE(nullptr, poolAllocator->bufferPools[0].mainStorage.get());
EXPECT_EQ(0u, poolAllocator->bufferPools[0].chunkAllocator->getUsedSize());
size = PoolAllocator::chunkAlignment / 2;
std::unique_ptr<Buffer> buffer(Buffer::create(context.get(), flags, size, hostPtr, retVal));
EXPECT_NE(buffer, nullptr);
EXPECT_EQ(retVal, CL_SUCCESS);
EXPECT_EQ(buffer->getSize(), size);
EXPECT_EQ(poolAllocator->chunkAllocator->getUsedSize(), PoolAllocator::chunkAlignment);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(size, buffer->getSize());
EXPECT_EQ(PoolAllocator::chunkAlignment, poolAllocator->bufferPools[0].chunkAllocator->getUsedSize());
auto mockBuffer = static_cast<MockBuffer *>(buffer.get());
EXPECT_EQ(mockBuffer->sizeInPoolAllocator, PoolAllocator::chunkAlignment);
EXPECT_EQ(PoolAllocator::chunkAlignment, mockBuffer->sizeInPoolAllocator);
buffer.reset(nullptr);
EXPECT_EQ(poolAllocator->chunkAllocator->getUsedSize(), 0u);
EXPECT_EQ(PoolAllocator::chunkAlignment, poolAllocator->bufferPools[0].chunkAllocator->getUsedSize());
}
TEST_F(AggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledAndSizeEqualToThresholdWhenBufferCreateCalledThenUsePool) {
ASSERT_TRUE(poolAllocator->isAggregatedSmallBuffersEnabled(context.get()));
ASSERT_NE(poolAllocator->mainStorage, nullptr);
EXPECT_TRUE(poolAllocator->isAggregatedSmallBuffersEnabled(context.get()));
EXPECT_EQ(1u, poolAllocator->bufferPools.size());
EXPECT_NE(nullptr, poolAllocator->bufferPools[0].mainStorage.get());
std::unique_ptr<Buffer> buffer(Buffer::create(context.get(), flags, size, hostPtr, retVal));
EXPECT_NE(buffer, nullptr);
EXPECT_EQ(retVal, CL_SUCCESS);
EXPECT_NE(poolAllocator->mainStorage, nullptr);
EXPECT_NE(nullptr, poolAllocator->bufferPools[0].mainStorage.get());
auto mockBuffer = static_cast<MockBuffer *>(buffer.get());
EXPECT_GE(mockBuffer->getSize(), size);
EXPECT_GE(mockBuffer->getOffset(), 0u);
EXPECT_LE(mockBuffer->getOffset(), PoolAllocator::aggregatedSmallBuffersPoolSize - size);
EXPECT_TRUE(mockBuffer->isSubBuffer());
EXPECT_EQ(poolAllocator->mainStorage, mockBuffer->associatedMemObject);
EXPECT_EQ(mockBuffer->associatedMemObject, poolAllocator->bufferPools[0].mainStorage.get());
retVal = clReleaseMemObject(buffer.release());
EXPECT_EQ(retVal, CL_SUCCESS);
}
TEST_F(AggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledWhenClReleaseMemObjectCalledThenWaitForEnginesCompletionCalled) {
ASSERT_TRUE(poolAllocator->isAggregatedSmallBuffersEnabled(context.get()));
ASSERT_NE(poolAllocator->mainStorage, nullptr);
TEST_F(AggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledWhenClReleaseMemObjectCalledThenWaitForEnginesCompletionNotCalledAndMemoryRegionIsNotFreed) {
EXPECT_TRUE(poolAllocator->isAggregatedSmallBuffersEnabled(context.get()));
EXPECT_EQ(1u, poolAllocator->bufferPools.size());
EXPECT_NE(nullptr, poolAllocator->bufferPools[0].mainStorage.get());
EXPECT_EQ(0u, poolAllocator->bufferPools[0].chunkAllocator->getUsedSize());
std::unique_ptr<Buffer> buffer(Buffer::create(context.get(), flags, size, hostPtr, retVal));
ASSERT_NE(buffer, nullptr);
ASSERT_EQ(retVal, CL_SUCCESS);
EXPECT_NE(buffer, nullptr);
EXPECT_EQ(retVal, CL_SUCCESS);
ASSERT_NE(poolAllocator->mainStorage, nullptr);
EXPECT_NE(nullptr, poolAllocator->bufferPools[0].mainStorage.get());
auto mockBuffer = static_cast<MockBuffer *>(buffer.get());
ASSERT_TRUE(mockBuffer->isSubBuffer());
ASSERT_EQ(poolAllocator->mainStorage, mockBuffer->associatedMemObject);
EXPECT_TRUE(mockBuffer->isSubBuffer());
EXPECT_EQ(mockBuffer->associatedMemObject, poolAllocator->bufferPools[0].mainStorage.get());
ASSERT_EQ(mockMemoryManager->waitForEnginesCompletionCalled, 0u);
EXPECT_EQ(mockMemoryManager->waitForEnginesCompletionCalled, 0u);
retVal = clReleaseMemObject(buffer.release());
ASSERT_EQ(retVal, CL_SUCCESS);
EXPECT_EQ(mockMemoryManager->waitForEnginesCompletionCalled, 1u);
EXPECT_EQ(retVal, CL_SUCCESS);
EXPECT_EQ(mockMemoryManager->waitForEnginesCompletionCalled, 0u);
EXPECT_EQ(size, poolAllocator->bufferPools[0].chunkAllocator->getUsedSize());
}
TEST_F(AggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledAndBufferPoolIsExhaustedAndAllocationsAreNotInUseAndBufferWasFreedThenPoolIsReused) {
EXPECT_TRUE(poolAllocator->isAggregatedSmallBuffersEnabled(context.get()));
EXPECT_EQ(1u, poolAllocator->bufferPools.size());
EXPECT_NE(nullptr, poolAllocator->bufferPools[0].mainStorage.get());
constexpr auto buffersToCreate = PoolAllocator::aggregatedSmallBuffersPoolSize / PoolAllocator::smallBufferThreshold;
std::vector<std::unique_ptr<Buffer>> buffers(buffersToCreate);
for (auto i = 0u; i < buffersToCreate; i++) {
buffers[i].reset(Buffer::create(context.get(), flags, size, hostPtr, retVal));
EXPECT_EQ(retVal, CL_SUCCESS);
}
EXPECT_EQ(size * buffersToCreate, poolAllocator->bufferPools[0].chunkAllocator->getUsedSize());
EXPECT_EQ(0u, mockMemoryManager->allocInUseCalled);
mockMemoryManager->deferAllocInUse = false;
buffers[0] = nullptr;
std::unique_ptr<Buffer> bufferAfterFreeMustSucceed(Buffer::create(context.get(), flags, size, hostPtr, retVal));
EXPECT_EQ(retVal, CL_SUCCESS);
EXPECT_EQ(1u, poolAllocator->bufferPools.size());
EXPECT_EQ(poolAllocator->bufferPools[0].mainStorage->getMultiGraphicsAllocation().getGraphicsAllocations().size(), mockMemoryManager->allocInUseCalled);
EXPECT_EQ(size * buffersToCreate, poolAllocator->bufferPools[0].chunkAllocator->getUsedSize());
}
TEST_F(AggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledAndBufferPoolIsExhaustedAndAllocationsAreNotInUseAndNoBuffersFreedThenNewPoolIsCreated) {
EXPECT_TRUE(poolAllocator->isAggregatedSmallBuffersEnabled(context.get()));
EXPECT_EQ(1u, poolAllocator->bufferPools.size());
EXPECT_NE(nullptr, poolAllocator->bufferPools[0].mainStorage.get());
constexpr auto buffersToCreate = PoolAllocator::aggregatedSmallBuffersPoolSize / PoolAllocator::smallBufferThreshold;
std::vector<std::unique_ptr<Buffer>> buffers(buffersToCreate);
for (auto i = 0u; i < buffersToCreate; i++) {
buffers[i].reset(Buffer::create(context.get(), flags, size, hostPtr, retVal));
EXPECT_EQ(retVal, CL_SUCCESS);
}
EXPECT_EQ(size * buffersToCreate, poolAllocator->bufferPools[0].chunkAllocator->getUsedSize());
EXPECT_EQ(0u, mockMemoryManager->allocInUseCalled);
mockMemoryManager->deferAllocInUse = false;
std::unique_ptr<Buffer> bufferAfterExhaustMustSucceed(Buffer::create(context.get(), flags, size, hostPtr, retVal));
EXPECT_EQ(retVal, CL_SUCCESS);
EXPECT_EQ(2u, poolAllocator->bufferPools.size());
EXPECT_EQ(poolAllocator->bufferPools[0].mainStorage->getMultiGraphicsAllocation().getGraphicsAllocations().size(), mockMemoryManager->allocInUseCalled);
EXPECT_EQ(size * buffersToCreate, poolAllocator->bufferPools[0].chunkAllocator->getUsedSize());
EXPECT_EQ(size, poolAllocator->bufferPools[1].chunkAllocator->getUsedSize());
}
TEST_F(AggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledAndBufferPoolIsExhaustedAndAllocationsAreInUseThenNewPoolIsCreated) {
EXPECT_TRUE(poolAllocator->isAggregatedSmallBuffersEnabled(context.get()));
EXPECT_EQ(1u, poolAllocator->bufferPools.size());
EXPECT_NE(nullptr, poolAllocator->bufferPools[0].mainStorage.get());
constexpr auto buffersToCreate = PoolAllocator::aggregatedSmallBuffersPoolSize / PoolAllocator::smallBufferThreshold;
std::vector<std::unique_ptr<Buffer>> buffers(buffersToCreate);
for (auto i = 0u; i < buffersToCreate; i++) {
buffers[i].reset(Buffer::create(context.get(), flags, size, hostPtr, retVal));
EXPECT_EQ(retVal, CL_SUCCESS);
}
EXPECT_EQ(size * buffersToCreate, poolAllocator->bufferPools[0].chunkAllocator->getUsedSize());
EXPECT_EQ(0u, mockMemoryManager->allocInUseCalled);
mockMemoryManager->deferAllocInUse = true;
std::unique_ptr<Buffer> bufferAfterExhaustMustSucceed(Buffer::create(context.get(), flags, size, hostPtr, retVal));
EXPECT_EQ(retVal, CL_SUCCESS);
EXPECT_EQ(2u, poolAllocator->bufferPools.size());
EXPECT_EQ(poolAllocator->bufferPools[0].mainStorage->getMultiGraphicsAllocation().getGraphicsAllocations().size(), mockMemoryManager->allocInUseCalled);
EXPECT_EQ(size * buffersToCreate, poolAllocator->bufferPools[0].chunkAllocator->getUsedSize());
EXPECT_EQ(size, poolAllocator->bufferPools[1].chunkAllocator->getUsedSize());
}
TEST_F(AggregatedSmallBuffersEnabledTest, givenCopyHostPointerWhenCreatingBufferButCopyFailedThenDoNotUsePool) {
@@ -249,14 +323,15 @@ TEST_F(AggregatedSmallBuffersEnabledTest, givenCopyHostPointerWhenCreatingBuffer
unsigned char dataToCopy[PoolAllocator::smallBufferThreshold];
hostPtr = dataToCopy;
ASSERT_TRUE(poolAllocator->isAggregatedSmallBuffersEnabled(context.get()));
ASSERT_NE(poolAllocator->mainStorage, nullptr);
EXPECT_TRUE(poolAllocator->isAggregatedSmallBuffersEnabled(context.get()));
EXPECT_EQ(1u, poolAllocator->bufferPools.size());
EXPECT_NE(nullptr, poolAllocator->bufferPools[0].mainStorage.get());
std::unique_ptr<Buffer> buffer(Buffer::create(context.get(), flags, size, hostPtr, retVal));
if (commandQueue->writeBufferCounter == 0) {
GTEST_SKIP();
}
EXPECT_EQ(retVal, CL_SUCCESS);
ASSERT_NE(buffer, nullptr);
EXPECT_NE(buffer, nullptr);
auto mockBuffer = static_cast<MockBuffer *>(buffer.get());
EXPECT_FALSE(mockBuffer->isSubBuffer());
@@ -265,8 +340,9 @@ TEST_F(AggregatedSmallBuffersEnabledTest, givenCopyHostPointerWhenCreatingBuffer
}
TEST_F(AggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledAndSizeEqualToThresholdWhenBufferCreateCalledMultipleTimesThenUsePool) {
ASSERT_TRUE(poolAllocator->isAggregatedSmallBuffersEnabled(context.get()));
ASSERT_NE(poolAllocator->mainStorage, nullptr);
EXPECT_TRUE(poolAllocator->isAggregatedSmallBuffersEnabled(context.get()));
EXPECT_EQ(1u, poolAllocator->bufferPools.size());
EXPECT_NE(nullptr, poolAllocator->bufferPools[0].mainStorage.get());
constexpr auto buffersToCreate = PoolAllocator::aggregatedSmallBuffersPoolSize / PoolAllocator::smallBufferThreshold;
std::vector<std::unique_ptr<Buffer>> buffers(buffersToCreate);
@@ -274,11 +350,8 @@ TEST_F(AggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledAndS
buffers[i].reset(Buffer::create(context.get(), flags, size, hostPtr, retVal));
EXPECT_EQ(retVal, CL_SUCCESS);
}
EXPECT_NE(poolAllocator->mainStorage, nullptr);
std::unique_ptr<Buffer> bufferAfterPoolIsFull(Buffer::create(context.get(), flags, size, hostPtr, retVal));
EXPECT_EQ(retVal, CL_SUCCESS);
EXPECT_NE(bufferAfterPoolIsFull, nullptr);
EXPECT_FALSE(bufferAfterPoolIsFull->isSubBuffer());
EXPECT_NE(nullptr, poolAllocator->bufferPools[0].mainStorage.get());
using Bounds = struct {
size_t left;
@@ -292,7 +365,8 @@ TEST_F(AggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledAndS
EXPECT_NE(buffers[i], nullptr);
EXPECT_TRUE(buffers[i]->isSubBuffer());
auto mockBuffer = static_cast<MockBuffer *>(buffers[i].get());
EXPECT_EQ(poolAllocator->mainStorage, mockBuffer->associatedMemObject);
EXPECT_EQ(mockBuffer->associatedMemObject, poolAllocator->bufferPools[0].mainStorage.get());
EXPECT_NE(nullptr, poolAllocator->bufferPools[0].mainStorage.get());
EXPECT_GE(mockBuffer->getSize(), size);
EXPECT_GE(mockBuffer->getOffset(), 0u);
EXPECT_LE(mockBuffer->getOffset(), PoolAllocator::aggregatedSmallBuffersPoolSize - size);
@@ -307,42 +381,24 @@ TEST_F(AggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledAndS
subBuffersBounds[j].right <= subBuffersBounds[i].left);
}
}
// freeing subbuffer frees space in pool
ASSERT_LT(poolAllocator->chunkAllocator->getLeftSize(), size);
clReleaseMemObject(buffers[0].release());
EXPECT_GE(poolAllocator->chunkAllocator->getLeftSize(), size);
std::unique_ptr<Buffer> bufferAfterPoolHasSpaceAgain(Buffer::create(context.get(), flags, size, hostPtr, retVal));
EXPECT_EQ(retVal, CL_SUCCESS);
ASSERT_NE(bufferAfterPoolHasSpaceAgain, nullptr);
EXPECT_TRUE(bufferAfterPoolHasSpaceAgain->isSubBuffer());
// subbuffer after free does not overlap
subBuffersBounds[0] = Bounds{bufferAfterPoolHasSpaceAgain->getOffset(), bufferAfterPoolHasSpaceAgain->getOffset() + bufferAfterPoolHasSpaceAgain->getSize()};
for (auto i = 0u; i < buffersToCreate; i++) {
for (auto j = i + 1; j < buffersToCreate; j++) {
EXPECT_TRUE(subBuffersBounds[i].right <= subBuffersBounds[j].left ||
subBuffersBounds[j].right <= subBuffersBounds[i].left);
}
}
}
TEST_F(AggregatedSmallBuffersKernelTest, givenBufferFromPoolWhenOffsetSubbufferIsPassedToSetKernelArgThenCorrectGpuVAIsPatched) {
std::unique_ptr<Buffer> unusedBuffer(Buffer::create(context.get(), flags, size, hostPtr, retVal));
std::unique_ptr<Buffer> buffer(Buffer::create(context.get(), flags, size, hostPtr, retVal));
ASSERT_EQ(retVal, CL_SUCCESS);
ASSERT_NE(buffer, nullptr);
ASSERT_GT(buffer->getOffset(), 0u);
EXPECT_EQ(retVal, CL_SUCCESS);
EXPECT_NE(buffer, nullptr);
EXPECT_GT(buffer->getOffset(), 0u);
cl_buffer_region region;
region.origin = 0xc0;
region.size = 32;
cl_int error = 0;
std::unique_ptr<Buffer> subBuffer(buffer->createSubBuffer(buffer->getFlags(), buffer->getFlagsIntel(), &region, error));
ASSERT_NE(subBuffer, nullptr);
EXPECT_NE(subBuffer, nullptr);
EXPECT_EQ(ptrOffset(buffer->getCpuAddress(), region.origin), subBuffer->getCpuAddress());
const auto graphicsAllocation = subBuffer->getGraphicsAllocation(device->getRootDeviceIndex());
ASSERT_NE(graphicsAllocation, nullptr);
EXPECT_NE(graphicsAllocation, nullptr);
const auto gpuAddress = graphicsAllocation->getGpuAddress();
EXPECT_EQ(ptrOffset(gpuAddress, buffer->getOffset() + region.origin), subBuffer->getBufferAddress(device->getRootDeviceIndex()));
@@ -353,13 +409,13 @@ TEST_F(AggregatedSmallBuffersKernelTest, givenBufferFromPoolWhenOffsetSubbufferI
using AggregatedSmallBuffersEnabledTestFailPoolInit = AggregatedSmallBuffersTestTemplate<1, true>;
TEST_F(AggregatedSmallBuffersEnabledTestFailPoolInit, givenAggregatedSmallBuffersEnabledAndSizeEqualToThresholdWhenBufferCreateCalledButPoolCreateFailedThenDoNotUsePool) {
ASSERT_TRUE(poolAllocator->isAggregatedSmallBuffersEnabled(context.get()));
ASSERT_EQ(poolAllocator->mainStorage, nullptr);
EXPECT_TRUE(poolAllocator->isAggregatedSmallBuffersEnabled(context.get()));
EXPECT_TRUE(poolAllocator->bufferPools.empty());
std::unique_ptr<Buffer> buffer(Buffer::create(context.get(), flags, size, hostPtr, retVal));
EXPECT_EQ(retVal, CL_SUCCESS);
EXPECT_NE(buffer.get(), nullptr);
EXPECT_EQ(poolAllocator->mainStorage, nullptr);
EXPECT_TRUE(poolAllocator->bufferPools.empty());
}
using AggregatedSmallBuffersEnabledTestDoNotRunSetup = AggregatedSmallBuffersTestTemplate<1, false, false>;
@@ -368,9 +424,9 @@ TEST_F(AggregatedSmallBuffersEnabledTestDoNotRunSetup, givenAggregatedSmallBuffe
testing::internal::CaptureStdout();
DebugManager.flags.PrintDriverDiagnostics.set(1);
setUpImpl();
ASSERT_TRUE(poolAllocator->isAggregatedSmallBuffersEnabled(context.get()));
ASSERT_NE(poolAllocator->mainStorage, nullptr);
ASSERT_NE(context->driverDiagnostics, nullptr);
EXPECT_TRUE(poolAllocator->isAggregatedSmallBuffersEnabled(context.get()));
EXPECT_FALSE(poolAllocator->bufferPools.empty());
EXPECT_NE(context->driverDiagnostics, nullptr);
std::string output = testing::internal::GetCapturedStdout();
EXPECT_EQ(0u, output.size());
}
@@ -383,7 +439,7 @@ class AggregatedSmallBuffersApiTestTemplate : public ::testing::Test {
auto device = deviceFactory->rootDevices[0];
cl_device_id devices[] = {device};
clContext = clCreateContext(nullptr, 1, devices, nullptr, nullptr, &retVal);
ASSERT_EQ(retVal, CL_SUCCESS);
EXPECT_EQ(retVal, CL_SUCCESS);
context = castToObject<Context>(clContext);
poolAllocator = static_cast<MockBufferPoolAllocator *>(&context->getBufferPoolAllocator());
}
@@ -416,7 +472,7 @@ TEST_F(AggregatedSmallBuffersEnabledApiTest, givenNotSmallBufferWhenCreatingBuff
size = PoolAllocator::smallBufferThreshold + 1;
cl_mem buffer = clCreateBuffer(clContext, flags, size, hostPtr, &retVal);
EXPECT_EQ(retVal, CL_SUCCESS);
ASSERT_NE(buffer, nullptr);
EXPECT_NE(buffer, nullptr);
MockBuffer *asBuffer = static_cast<MockBuffer *>(buffer);
EXPECT_FALSE(asBuffer->isSubBuffer());
@@ -431,14 +487,13 @@ TEST_F(AggregatedSmallBuffersEnabledApiTest, givenSmallBufferWhenCreatingBufferT
auto contextRefCountBefore = context->getRefInternalCount();
cl_mem smallBuffer = clCreateBuffer(clContext, flags, size, hostPtr, &retVal);
EXPECT_EQ(retVal, CL_SUCCESS);
ASSERT_NE(smallBuffer, nullptr);
EXPECT_NE(smallBuffer, nullptr);
MockBuffer *asBuffer = static_cast<MockBuffer *>(smallBuffer);
EXPECT_TRUE(asBuffer->isSubBuffer());
Buffer *parentBuffer = static_cast<Buffer *>(asBuffer->associatedMemObject);
EXPECT_EQ(2, parentBuffer->getRefInternalCount());
MockBufferPoolAllocator *mockBufferPoolAllocator = static_cast<MockBufferPoolAllocator *>(&context->getBufferPoolAllocator());
EXPECT_EQ(parentBuffer, mockBufferPoolAllocator->mainStorage);
EXPECT_EQ(parentBuffer, poolAllocator->bufferPools[0].mainStorage.get());
retVal = clReleaseMemObject(smallBuffer);
EXPECT_EQ(retVal, CL_SUCCESS);
@@ -448,24 +503,20 @@ TEST_F(AggregatedSmallBuffersEnabledApiTest, givenSmallBufferWhenCreatingBufferT
EXPECT_EQ(clReleaseContext(context), CL_SUCCESS);
}
TEST_F(AggregatedSmallBuffersEnabledApiTest, givenSmallBufferWhenCreatingBufferWithNullPropertiesThenUsePool) {
auto contextRefCountBefore = context->getRefInternalCount();
cl_mem smallBuffer = clCreateBufferWithProperties(clContext, nullptr, flags, size, hostPtr, &retVal);
TEST_F(AggregatedSmallBuffersEnabledApiTest, givenUseHostPointerWhenCreatingBufferThenDoNotUsePool) {
flags |= CL_MEM_USE_HOST_PTR;
unsigned char hostData[PoolAllocator::smallBufferThreshold];
hostPtr = hostData;
cl_mem smallBuffer = clCreateBuffer(clContext, flags, size, hostPtr, &retVal);
EXPECT_EQ(retVal, CL_SUCCESS);
ASSERT_NE(smallBuffer, nullptr);
EXPECT_NE(smallBuffer, nullptr);
MockBuffer *asBuffer = static_cast<MockBuffer *>(smallBuffer);
EXPECT_TRUE(asBuffer->isSubBuffer());
Buffer *parentBuffer = static_cast<Buffer *>(asBuffer->associatedMemObject);
EXPECT_EQ(2, parentBuffer->getRefInternalCount());
MockBufferPoolAllocator *mockBufferPoolAllocator = static_cast<MockBufferPoolAllocator *>(&context->getBufferPoolAllocator());
EXPECT_EQ(parentBuffer, mockBufferPoolAllocator->mainStorage);
EXPECT_FALSE(asBuffer->isSubBuffer());
retVal = clReleaseMemObject(smallBuffer);
EXPECT_EQ(retVal, CL_SUCCESS);
EXPECT_EQ(context->getRefInternalCount(), contextRefCountBefore);
EXPECT_EQ(clReleaseContext(context), CL_SUCCESS);
}
@@ -474,14 +525,13 @@ TEST_F(AggregatedSmallBuffersEnabledApiTest, givenSmallBufferWhenCreatingBufferW
cl_mem_properties memProperties{};
cl_mem smallBuffer = clCreateBufferWithProperties(clContext, &memProperties, flags, size, hostPtr, &retVal);
EXPECT_EQ(retVal, CL_SUCCESS);
ASSERT_NE(smallBuffer, nullptr);
EXPECT_NE(smallBuffer, nullptr);
MockBuffer *asBuffer = static_cast<MockBuffer *>(smallBuffer);
EXPECT_TRUE(asBuffer->isSubBuffer());
Buffer *parentBuffer = static_cast<Buffer *>(asBuffer->associatedMemObject);
EXPECT_EQ(2, parentBuffer->getRefInternalCount());
MockBufferPoolAllocator *mockBufferPoolAllocator = static_cast<MockBufferPoolAllocator *>(&context->getBufferPoolAllocator());
EXPECT_EQ(parentBuffer, mockBufferPoolAllocator->mainStorage);
EXPECT_EQ(parentBuffer, poolAllocator->bufferPools[0].mainStorage.get());
retVal = clReleaseMemObject(smallBuffer);
EXPECT_EQ(retVal, CL_SUCCESS);
@@ -494,7 +544,7 @@ TEST_F(AggregatedSmallBuffersEnabledApiTest, givenSmallBufferWhenCreatingBufferW
TEST_F(AggregatedSmallBuffersEnabledApiTest, givenBufferFromPoolWhenGetMemObjInfoCalledThenReturnValuesLikeForNormalBuffer) {
cl_mem buffer = clCreateBuffer(clContext, flags, size, hostPtr, &retVal);
EXPECT_EQ(retVal, CL_SUCCESS);
ASSERT_NE(buffer, nullptr);
EXPECT_NE(buffer, nullptr);
MockBuffer *asBuffer = static_cast<MockBuffer *>(buffer);
EXPECT_TRUE(asBuffer->isSubBuffer());
@@ -521,14 +571,14 @@ TEST_F(AggregatedSmallBuffersEnabledApiTest, givenSubBufferNotFromPoolAndAggrega
size_t size = PoolAllocator::smallBufferThreshold + 1;
cl_mem largeBuffer = clCreateBuffer(clContext, flags, size, hostPtr, &retVal);
ASSERT_EQ(retVal, CL_SUCCESS);
ASSERT_NE(largeBuffer, nullptr);
EXPECT_EQ(retVal, CL_SUCCESS);
EXPECT_NE(largeBuffer, nullptr);
cl_buffer_region region{};
region.size = 1;
cl_mem subBuffer = clCreateSubBuffer(largeBuffer, flags, CL_BUFFER_CREATE_TYPE_REGION, &region, &retVal);
ASSERT_EQ(retVal, CL_SUCCESS);
ASSERT_NE(subBuffer, nullptr);
EXPECT_EQ(retVal, CL_SUCCESS);
EXPECT_NE(subBuffer, nullptr);
DebugManager.flags.ExperimentalSmallBufferPoolAllocator.set(1);
retVal = clReleaseMemObject(subBuffer);
@@ -549,14 +599,13 @@ TEST_F(AggregatedSmallBuffersEnabledApiTest, givenCopyHostPointerWhenCreatingBuf
cl_mem smallBuffer = clCreateBuffer(clContext, flags, size, hostPtr, &retVal);
EXPECT_EQ(context->getRefInternalCount(), contextRefCountBefore + 1);
EXPECT_EQ(retVal, CL_SUCCESS);
ASSERT_NE(smallBuffer, nullptr);
EXPECT_NE(smallBuffer, nullptr);
MockBuffer *asBuffer = static_cast<MockBuffer *>(smallBuffer);
EXPECT_TRUE(asBuffer->isSubBuffer());
Buffer *parentBuffer = static_cast<Buffer *>(asBuffer->associatedMemObject);
EXPECT_EQ(2, parentBuffer->getRefInternalCount());
MockBufferPoolAllocator *mockBufferPoolAllocator = static_cast<MockBufferPoolAllocator *>(&context->getBufferPoolAllocator());
EXPECT_EQ(parentBuffer, mockBufferPoolAllocator->mainStorage);
EXPECT_EQ(parentBuffer, poolAllocator->bufferPools[0].mainStorage.get());
// check that data has been copied
auto address = asBuffer->getCpuAddress();
@@ -579,17 +628,17 @@ TEST_F(AggregatedSmallBuffersSubBufferApiTest, givenBufferFromPoolWhenCreateSubB
cl_mem buffer = clCreateBuffer(clContext, flags, size, hostPtr, &retVal);
EXPECT_EQ(retVal, CL_SUCCESS);
ASSERT_NE(buffer, nullptr);
EXPECT_NE(buffer, nullptr);
MockBuffer *mockBuffer = static_cast<MockBuffer *>(buffer);
EXPECT_GT(mockBuffer->offset, 0u);
EXPECT_EQ(ptrOffset(poolAllocator->mainStorage->getCpuAddress(), mockBuffer->getOffset()), mockBuffer->getCpuAddress());
EXPECT_EQ(ptrOffset(poolAllocator->bufferPools[0].mainStorage->getCpuAddress(), mockBuffer->getOffset()), mockBuffer->getCpuAddress());
cl_buffer_region region{};
region.size = 1;
region.origin = size / 2;
cl_mem subBuffer = clCreateSubBuffer(buffer, flags, CL_BUFFER_CREATE_TYPE_REGION, &region, &retVal);
EXPECT_EQ(retVal, CL_SUCCESS);
ASSERT_NE(subBuffer, nullptr);
EXPECT_NE(subBuffer, nullptr);
MockBuffer *mockSubBuffer = static_cast<MockBuffer *>(subBuffer);
EXPECT_EQ(mockSubBuffer->associatedMemObject, buffer);
EXPECT_EQ(ptrOffset(mockBuffer->getCpuAddress(), region.origin), mockSubBuffer->getCpuAddress());
@@ -609,16 +658,16 @@ TEST_F(AggregatedSmallBuffersSubBufferApiTest, givenBufferFromPoolWhenCreateSubB
TEST_F(AggregatedSmallBuffersSubBufferApiTest, givenSubBufferFromBufferPoolWhenGetMemObjInfoCalledThenReturnValuesLikeForNormalSubBuffer) {
cl_mem buffer = clCreateBuffer(clContext, flags, size, hostPtr, &retVal);
EXPECT_EQ(retVal, CL_SUCCESS);
ASSERT_NE(buffer, nullptr);
EXPECT_NE(buffer, nullptr);
MockBuffer *mockBuffer = static_cast<MockBuffer *>(buffer);
ASSERT_TRUE(context->getBufferPoolAllocator().isPoolBuffer(mockBuffer->associatedMemObject));
EXPECT_TRUE(context->getBufferPoolAllocator().isPoolBuffer(mockBuffer->associatedMemObject));
cl_buffer_region region{};
region.size = 1;
region.origin = size / 2;
cl_mem subBuffer = clCreateSubBuffer(buffer, flags, CL_BUFFER_CREATE_TYPE_REGION, &region, &retVal);
EXPECT_EQ(retVal, CL_SUCCESS);
ASSERT_NE(subBuffer, nullptr);
EXPECT_NE(subBuffer, nullptr);
cl_mem associatedMemObj = nullptr;
retVal = clGetMemObjectInfo(subBuffer, CL_MEM_ASSOCIATED_MEMOBJECT, sizeof(cl_mem), &associatedMemObj, nullptr);
@@ -642,7 +691,7 @@ TEST_F(AggregatedSmallBuffersSubBufferApiTest, givenSubBufferFromBufferPoolWhenG
TEST_F(AggregatedSmallBuffersSubBufferApiTest, givenBufferFromPoolWhenCreateSubBufferCalledWithRegionOutsideBufferThenItFails) {
cl_mem buffer = clCreateBuffer(clContext, flags, size, hostPtr, &retVal);
EXPECT_EQ(retVal, CL_SUCCESS);
ASSERT_NE(buffer, nullptr);
EXPECT_NE(buffer, nullptr);
cl_buffer_region region{};
region.size = size + 1;
@@ -666,14 +715,14 @@ TEST_F(AggregatedSmallBuffersSubBufferApiTest, givenBufferFromPoolWhenCreateSubB
TEST_F(AggregatedSmallBuffersSubBufferApiTest, givenSubBufferFromBufferFromPoolWhenCreateSubBufferCalledThenItFails) {
cl_mem buffer = clCreateBuffer(clContext, flags, size, hostPtr, &retVal);
EXPECT_EQ(retVal, CL_SUCCESS);
ASSERT_NE(buffer, nullptr);
EXPECT_NE(buffer, nullptr);
cl_buffer_region region{};
region.size = 1;
region.origin = size / 2;
cl_mem subBuffer = clCreateSubBuffer(buffer, flags, CL_BUFFER_CREATE_TYPE_REGION, &region, &retVal);
EXPECT_EQ(retVal, CL_SUCCESS);
ASSERT_NE(subBuffer, nullptr);
EXPECT_NE(subBuffer, nullptr);
region.origin = 0;
cl_mem subSubBuffer = clCreateSubBuffer(subBuffer, flags, CL_BUFFER_CREATE_TYPE_REGION, &region, &retVal);

View File

@@ -55,9 +55,9 @@ class MockContext : public Context {
class MockBufferPoolAllocator : public BufferPoolAllocator {
public:
using BufferPoolAllocator::chunkAllocator;
using BufferPoolAllocator::BufferPool;
using BufferPoolAllocator::bufferPools;
using BufferPoolAllocator::isAggregatedSmallBuffersEnabled;
using BufferPoolAllocator::mainStorage;
};
private: