mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-04 15:53:45 +08:00
performance(ocl): increase buffer pool size
increase pool size to 2MB and threshold to 1MB add limit to the number of pools, set to 2 Related-To: NEO-9690 Signed-off-by: Dominik Dabek <dominik.dabek@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
2d15517fbe
commit
6e434e0424
@@ -101,7 +101,7 @@ struct CopyEngineXeHPAndLater : public MulticontextAubFixture, public ::testing:
|
||||
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
if (compressed) {
|
||||
EXPECT_TRUE(graphicsAllocation->getDefaultGmm()->isCompressionEnabled);
|
||||
EXPECT_TRUE(graphicsAllocation->isCompressionEnabled());
|
||||
}
|
||||
EXPECT_EQ(!inLocalMemory, MemoryPoolHelper::isSystemMemoryPool(graphicsAllocation->getMemoryPool()));
|
||||
|
||||
@@ -113,7 +113,7 @@ struct CopyEngineXeHPAndLater : public MulticontextAubFixture, public ::testing:
|
||||
}
|
||||
|
||||
uint64_t getGpuVA(Buffer &buffer) {
|
||||
return buffer.getGraphicsAllocation(this->rootDeviceIndex)->getGpuAddress();
|
||||
return ptrOffset(buffer.getGraphicsAllocation(this->rootDeviceIndex)->getGpuAddress(), buffer.getOffset());
|
||||
}
|
||||
|
||||
void executeBlitCommand(const BlitProperties &blitProperties, bool blocking) {
|
||||
@@ -160,8 +160,8 @@ struct CopyEngineXeHPAndLater : public MulticontextAubFixture, public ::testing:
|
||||
CommandStreamReceiver *bcsCsr = nullptr;
|
||||
TimestampPacketContainer timestampPacketContainer;
|
||||
CsrDependencies csrDependencies;
|
||||
const size_t bufferSize = MemoryConstants::pageSize64k + BlitterConstants::maxBlitWidth + 3;
|
||||
size_t offset = (bufferSize / 4) - 3;
|
||||
static constexpr size_t bufferSize = MemoryConstants::pageSize64k + BlitterConstants::maxBlitWidth + 3;
|
||||
static constexpr size_t offset = (bufferSize / 4) - 3;
|
||||
aub_stream::EngineType bcsEngineType = aub_stream::EngineType::ENGINE_BCS;
|
||||
|
||||
std::unique_ptr<uint8_t[]> compressiblePattern;
|
||||
@@ -185,17 +185,17 @@ void CopyEngineXeHPAndLater<numTiles, testLocalMemory>::givenNotCompressedBuffer
|
||||
// Buffer to Buffer - uncompressed HBM -> compressed HBM
|
||||
auto blitProperties = BlitProperties::constructPropertiesForCopy(dstCompressedBuffer->getGraphicsAllocation(rootDeviceIndex),
|
||||
srcNotCompressedBuffer->getGraphicsAllocation(rootDeviceIndex),
|
||||
0, 0, {bufferSize, 1, 1}, 0, 0, 0, 0, bcsCsr->getClearColorAllocation());
|
||||
{dstCompressedBuffer->getOffset(), 0, 0}, {srcNotCompressedBuffer->getOffset(), 0, 0}, {bufferSize, 1, 1}, 0, 0, 0, 0, bcsCsr->getClearColorAllocation());
|
||||
executeBlitCommand(blitProperties, true);
|
||||
// Buffer to Buffer - uncompressed HBM -> uncompressed HBM
|
||||
blitProperties = BlitProperties::constructPropertiesForCopy(dstNotCompressedBuffer->getGraphicsAllocation(rootDeviceIndex),
|
||||
srcNotCompressedBuffer->getGraphicsAllocation(rootDeviceIndex),
|
||||
0, 0, {bufferSize, 1, 1}, 0, 0, 0, 0, bcsCsr->getClearColorAllocation());
|
||||
{dstNotCompressedBuffer->getOffset(), 0, 0}, {srcNotCompressedBuffer->getOffset(), 0, 0}, {bufferSize, 1, 1}, 0, 0, 0, 0, bcsCsr->getClearColorAllocation());
|
||||
executeBlitCommand(blitProperties, true);
|
||||
// Buffer to Buffer - compressed HBM -> uncompressed HBM
|
||||
blitProperties = BlitProperties::constructPropertiesForCopy(dstResolvedBuffer->getGraphicsAllocation(rootDeviceIndex),
|
||||
dstCompressedBuffer->getGraphicsAllocation(rootDeviceIndex),
|
||||
0, 0, {bufferSize, 1, 1}, 0, 0, 0, 0, bcsCsr->getClearColorAllocation());
|
||||
{dstResolvedBuffer->getOffset(), 0, 0}, {dstCompressedBuffer->getOffset(), 0, 0}, {bufferSize, 1, 1}, 0, 0, 0, 0, bcsCsr->getClearColorAllocation());
|
||||
executeBlitCommand(blitProperties, true);
|
||||
|
||||
blitProperties = BlitProperties::constructPropertiesForReadWrite(BlitterConstants::BlitDirection::bufferToHostPtr, *bcsCsr,
|
||||
@@ -354,7 +354,7 @@ void CopyEngineXeHPAndLater<numTiles, testLocalMemory>::givenOffsetsWhenBltExecu
|
||||
|
||||
auto blitProperties = BlitProperties::constructPropertiesForCopy(dstBuffer->getGraphicsAllocation(rootDeviceIndex),
|
||||
srcBuffer->getGraphicsAllocation(rootDeviceIndex),
|
||||
{offset, 0, 0}, 0, {copiedSize, 1, 1}, 0, 0, 0, 0, bcsCsr->getClearColorAllocation());
|
||||
{offset + dstBuffer->getOffset(), 0, 0}, {srcBuffer->getOffset(), 0, 0}, {copiedSize, 1, 1}, 0, 0, 0, 0, bcsCsr->getClearColorAllocation());
|
||||
|
||||
executeBlitCommand(blitProperties, true);
|
||||
|
||||
@@ -584,11 +584,11 @@ void CopyEngineXeHPAndLater<numTiles, testLocalMemory>::givenCopyBufferRectWithO
|
||||
auto srcBuffer = createBuffer(false, testLocalMemory, srcMemory.get());
|
||||
auto dstBuffer = createBuffer(false, testLocalMemory, destMemory.get());
|
||||
auto pSrcMemory = &srcMemory[0];
|
||||
auto pDestMemory = reinterpret_cast<uint8_t *>(getGpuAddress(*dstBuffer));
|
||||
auto pDestMemory = reinterpret_cast<uint8_t *>(getGpuAddress((*dstBuffer)));
|
||||
auto clearColorAllocation = bcsCsr->getClearColorAllocation();
|
||||
|
||||
size_t srcOrigin[] = {0, 0, 0};
|
||||
size_t dstOrigin[] = {1 * sizeof(uint8_t), 0, 0};
|
||||
size_t srcOrigin[] = {srcBuffer->getOffset(), 0, 0};
|
||||
size_t dstOrigin[] = {1 * sizeof(uint8_t) + dstBuffer->getOffset(), 0, 0};
|
||||
size_t region[] = {2 * sizeof(uint8_t), 2, 2};
|
||||
size_t srcRowPitch = region[0];
|
||||
size_t srcSlicePitch = srcRowPitch * region[1];
|
||||
@@ -613,8 +613,8 @@ void CopyEngineXeHPAndLater<numTiles, testLocalMemory>::givenCopyBufferRectWithO
|
||||
|
||||
pSrcMemory = ptrOffset(pSrcMemory, 0);
|
||||
|
||||
expectMemoryNotEqual<FamilyType>(ptrOffset(pDestMemory, dstOrigin[0]), pSrcMemory, copySize + 1, 0, 0);
|
||||
expectMemory<FamilyType>(ptrOffset(pDestMemory, dstOrigin[0]), pSrcMemory, copySize, 0, 0);
|
||||
expectMemoryNotEqual<FamilyType>(ptrOffset(pDestMemory, sizeof(uint8_t)), pSrcMemory, copySize + 1, 0, 0);
|
||||
expectMemory<FamilyType>(ptrOffset(pDestMemory, sizeof(uint8_t)), pSrcMemory, copySize, 0, 0);
|
||||
}
|
||||
|
||||
template <uint32_t numTiles, bool testLocalMemory>
|
||||
@@ -638,8 +638,8 @@ void CopyEngineXeHPAndLater<numTiles, testLocalMemory>::givenCopyBufferRectWithB
|
||||
auto pDestMemory = reinterpret_cast<uint8_t *>(getGpuAddress(*dstBuffer));
|
||||
auto clearColorAllocation = bcsCsr->getClearColorAllocation();
|
||||
|
||||
size_t srcOrigin[] = {0, 0, 0};
|
||||
size_t dstOrigin[] = {1, 1, 1};
|
||||
size_t srcOrigin[] = {srcBuffer->getOffset(), 0, 0};
|
||||
size_t dstOrigin[] = {1 + dstBuffer->getOffset(), 1, 1};
|
||||
size_t region[] = {20, 16, 2};
|
||||
size_t srcRowPitch = region[0];
|
||||
size_t srcSlicePitch = srcRowPitch * region[1];
|
||||
@@ -660,7 +660,7 @@ void CopyEngineXeHPAndLater<numTiles, testLocalMemory>::givenCopyBufferRectWithB
|
||||
executeBlitCommand(blitProperties, false);
|
||||
bcsCsr->waitForTaskCountWithKmdNotifyFallback(0, 0, false, QueueThrottle::MEDIUM);
|
||||
|
||||
size_t dstOffset = dstOrigin[0] + dstOrigin[1] * dstRowPitch + dstOrigin[2] * dstSlicePitch;
|
||||
size_t dstOffset = 1 + dstOrigin[1] * dstRowPitch + dstOrigin[2] * dstSlicePitch;
|
||||
|
||||
expectMemoryNotEqual<FamilyType>(ptrOffset(pDestMemory, dstOffset), pSrcMemory, copySize + 1, 0, 0);
|
||||
expectMemory<FamilyType>(ptrOffset(pDestMemory, dstOffset), pSrcMemory, copySize, 0, 0);
|
||||
|
||||
@@ -207,6 +207,19 @@ TEST_F(AggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledAndS
|
||||
std::unique_ptr<Buffer> buffer(Buffer::create(context.get(), flags, size, hostPtr, retVal));
|
||||
EXPECT_NE(nullptr, buffer);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(0u, poolAllocator->bufferPools[0].chunkAllocator->getUsedSize());
|
||||
}
|
||||
|
||||
TEST_F(AggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledAndFlagCompressedPreferredWhenBufferCreateCalledThenDoNotUsePool) {
|
||||
EXPECT_TRUE(poolAllocator->isAggregatedSmallBuffersEnabled(context.get()));
|
||||
EXPECT_EQ(1u, poolAllocator->bufferPools.size());
|
||||
EXPECT_NE(nullptr, poolAllocator->bufferPools[0].mainStorage.get());
|
||||
size = PoolAllocator::smallBufferThreshold;
|
||||
flags |= CL_MEM_COMPRESSED_HINT_INTEL;
|
||||
std::unique_ptr<Buffer> buffer(Buffer::create(context.get(), flags, size, hostPtr, retVal));
|
||||
EXPECT_NE(nullptr, buffer);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(0u, poolAllocator->bufferPools[0].chunkAllocator->getUsedSize());
|
||||
}
|
||||
|
||||
TEST_F(AggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledAndSizeLowerThenChunkAlignmentWhenBufferCreatedAndDestroyedThenSizeIsAsRequestedAndCorrectSizeIsNotFreed) {
|
||||
@@ -344,6 +357,32 @@ TEST_F(AggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledAndB
|
||||
EXPECT_EQ(size, poolAllocator->bufferPools[1].chunkAllocator->getUsedSize());
|
||||
}
|
||||
|
||||
TEST_F(AggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledAndBufferPoolIsExhaustedAndAllocationsAreInUseAndPoolLimitIsReachedThenNewPoolIsNotCreated) {
|
||||
EXPECT_TRUE(poolAllocator->isAggregatedSmallBuffersEnabled(context.get()));
|
||||
EXPECT_EQ(1u, poolAllocator->bufferPools.size());
|
||||
EXPECT_NE(nullptr, poolAllocator->bufferPools[0].mainStorage.get());
|
||||
|
||||
constexpr auto buffersToCreate = (PoolAllocator::aggregatedSmallBuffersPoolSize / PoolAllocator::smallBufferThreshold) * PoolAllocator::maxPoolCount;
|
||||
std::vector<std::unique_ptr<Buffer>> buffers(buffersToCreate);
|
||||
for (auto i = 0u; i < buffersToCreate; ++i) {
|
||||
buffers[i].reset(Buffer::create(context.get(), flags, size, hostPtr, retVal));
|
||||
EXPECT_EQ(retVal, CL_SUCCESS);
|
||||
}
|
||||
EXPECT_EQ(PoolAllocator::maxPoolCount, poolAllocator->bufferPools.size());
|
||||
for (auto i = 0u; i < PoolAllocator::maxPoolCount; ++i) {
|
||||
EXPECT_EQ(PoolAllocator::aggregatedSmallBuffersPoolSize, poolAllocator->bufferPools[i].chunkAllocator->getUsedSize());
|
||||
}
|
||||
EXPECT_EQ(1u, mockMemoryManager->allocInUseCalled);
|
||||
mockMemoryManager->deferAllocInUse = true;
|
||||
mockMemoryManager->failInDevicePoolWithError = true;
|
||||
|
||||
std::unique_ptr<Buffer> bufferAfterExhaustMustFail(Buffer::create(context.get(), flags, size, hostPtr, retVal));
|
||||
EXPECT_EQ(nullptr, bufferAfterExhaustMustFail.get());
|
||||
EXPECT_NE(retVal, CL_SUCCESS);
|
||||
EXPECT_EQ(PoolAllocator::maxPoolCount, poolAllocator->bufferPools.size());
|
||||
EXPECT_EQ(3u, mockMemoryManager->allocInUseCalled);
|
||||
}
|
||||
|
||||
TEST_F(AggregatedSmallBuffersEnabledTest, givenCopyHostPointerWhenCreatingBufferButCopyFailedThenDoNotUsePool) {
|
||||
class MockCommandQueueFailFirstEnqueueWrite : public MockCommandQueue {
|
||||
public:
|
||||
|
||||
Reference in New Issue
Block a user