Optimize Fill buffer calls.

- reuse pattern allocations for subsequent calls.

Signed-off-by: Michal Mrozek <michal.mrozek@intel.com>
This commit is contained in:
Michal Mrozek 2021-03-05 13:51:16 +00:00 committed by Compute-Runtime-Automation
parent 7d808bd560
commit 0cd03220df
2 changed files with 39 additions and 17 deletions

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (C) 2017-2020 Intel Corporation * Copyright (C) 2017-2021 Intel Corporation
* *
* SPDX-License-Identifier: MIT * SPDX-License-Identifier: MIT
* *
@ -37,7 +37,15 @@ cl_int CommandQueueHw<GfxFamily>::enqueueFillBuffer(
buffer->getMigrateableMultiGraphicsAllocation().ensureMemoryOnDevice(*getDevice().getMemoryManager(), rootDeviceIndex); buffer->getMigrateableMultiGraphicsAllocation().ensureMemoryOnDevice(*getDevice().getMemoryManager(), rootDeviceIndex);
auto patternAllocation = memoryManager->allocateGraphicsMemoryWithProperties({getDevice().getRootDeviceIndex(), alignUp(patternSize, MemoryConstants::cacheLineSize), GraphicsAllocation::AllocationType::FILL_PATTERN, getDevice().getDeviceBitfield()}); auto commandStreamReceieverOwnership = getGpgpuCommandStreamReceiver().obtainUniqueOwnership();
auto storageWithAllocations = getGpgpuCommandStreamReceiver().getInternalAllocationStorage();
auto allocationType = GraphicsAllocation::AllocationType::FILL_PATTERN;
auto patternAllocation = storageWithAllocations->obtainReusableAllocation(patternSize, allocationType).release();
commandStreamReceieverOwnership.unlock();
if (!patternAllocation) {
patternAllocation = memoryManager->allocateGraphicsMemoryWithProperties({getDevice().getRootDeviceIndex(), alignUp(patternSize, MemoryConstants::cacheLineSize), GraphicsAllocation::AllocationType::FILL_PATTERN, getDevice().getDeviceBitfield()});
}
if (patternSize == 1) { if (patternSize == 1) {
int patternInt = (uint32_t)((*(uint8_t *)pattern << 24) | (*(uint8_t *)pattern << 16) | (*(uint8_t *)pattern << 8) | *(uint8_t *)pattern); int patternInt = (uint32_t)((*(uint8_t *)pattern << 24) | (*(uint8_t *)pattern << 16) | (*(uint8_t *)pattern << 8) | *(uint8_t *)pattern);
@ -86,7 +94,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueFillBuffer(
event); event);
auto storageForAllocation = getGpgpuCommandStreamReceiver().getInternalAllocationStorage(); auto storageForAllocation = getGpgpuCommandStreamReceiver().getInternalAllocationStorage();
storageForAllocation->storeAllocationWithTaskCount(std::unique_ptr<GraphicsAllocation>(patternAllocation), TEMPORARY_ALLOCATION, taskCount); storageForAllocation->storeAllocationWithTaskCount(std::unique_ptr<GraphicsAllocation>(patternAllocation), REUSABLE_ALLOCATION, taskCount);
return CL_SUCCESS; return CL_SUCCESS;
} }

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (C) 2017-2020 Intel Corporation * Copyright (C) 2017-2021 Intel Corporation
* *
* SPDX-License-Identifier: MIT * SPDX-License-Identifier: MIT
* *
@ -76,7 +76,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueFillBufferCmdTests, WhenFillingBufferThenGpgp
// Compute the SIMD lane mask // Compute the SIMD lane mask
size_t simd = size_t simd =
cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD32 ? 32 : cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD16 ? 16 : 8; cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD32 ? 32 : cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD16 ? 16
: 8;
uint64_t simdMask = maxNBitValue(simd); uint64_t simdMask = maxNBitValue(simd);
// Mask off lanes based on the execution masks // Mask off lanes based on the execution masks
@ -373,8 +374,8 @@ HWTEST_F(EnqueueFillBufferCmdTests, WhenFillingBufferThenPatternShouldBeCopied)
auto &csr = pCmdQ->getGpgpuCommandStreamReceiver(); auto &csr = pCmdQ->getGpgpuCommandStreamReceiver();
ASSERT_TRUE(csr.getTemporaryAllocations().peekIsEmpty()); ASSERT_TRUE(csr.getTemporaryAllocations().peekIsEmpty());
EnqueueFillBufferHelper<>::enqueueFillBuffer(pCmdQ, buffer); EnqueueFillBufferHelper<>::enqueueFillBuffer(pCmdQ, buffer);
ASSERT_FALSE(csr.getTemporaryAllocations().peekIsEmpty()); ASSERT_FALSE(csr.getAllocationsForReuse().peekIsEmpty());
GraphicsAllocation *allocation = csr.getTemporaryAllocations().peekHead(); GraphicsAllocation *allocation = csr.getAllocationsForReuse().peekHead();
while (allocation != nullptr) { while (allocation != nullptr) {
if ((allocation->getUnderlyingBufferSize() >= sizeof(float)) && if ((allocation->getUnderlyingBufferSize() >= sizeof(float)) &&
@ -394,8 +395,8 @@ HWTEST_F(EnqueueFillBufferCmdTests, WhenFillingBufferThenPatternShouldBeAligned)
auto &csr = pCmdQ->getGpgpuCommandStreamReceiver(); auto &csr = pCmdQ->getGpgpuCommandStreamReceiver();
ASSERT_TRUE(csr.getTemporaryAllocations().peekIsEmpty()); ASSERT_TRUE(csr.getTemporaryAllocations().peekIsEmpty());
EnqueueFillBufferHelper<>::enqueueFillBuffer(pCmdQ, buffer); EnqueueFillBufferHelper<>::enqueueFillBuffer(pCmdQ, buffer);
ASSERT_FALSE(csr.getTemporaryAllocations().peekIsEmpty()); ASSERT_TRUE(csr.getTemporaryAllocations().peekIsEmpty());
GraphicsAllocation *allocation = csr.getTemporaryAllocations().peekHead(); GraphicsAllocation *allocation = csr.getAllocationsForReuse().peekHead();
while (allocation != nullptr) { while (allocation != nullptr) {
if ((allocation->getUnderlyingBufferSize() >= sizeof(float)) && if ((allocation->getUnderlyingBufferSize() >= sizeof(float)) &&
@ -412,6 +413,19 @@ HWTEST_F(EnqueueFillBufferCmdTests, WhenFillingBufferThenPatternShouldBeAligned)
EXPECT_EQ(alignUp(allocation->getUnderlyingBufferSize(), MemoryConstants::cacheLineSize), allocation->getUnderlyingBufferSize()); EXPECT_EQ(alignUp(allocation->getUnderlyingBufferSize(), MemoryConstants::cacheLineSize), allocation->getUnderlyingBufferSize());
} }
HWTEST_F(EnqueueFillBufferCmdTests, WhenFillBufferIsCalledTwiceThenPatternAllocationIsReused) {
auto &csr = pCmdQ->getGpgpuCommandStreamReceiver();
ASSERT_TRUE(csr.getAllocationsForReuse().peekIsEmpty());
EnqueueFillBufferHelper<>::enqueueFillBuffer(pCmdQ, buffer);
ASSERT_FALSE(csr.getAllocationsForReuse().peekIsEmpty());
GraphicsAllocation *allocation = csr.getAllocationsForReuse().peekHead();
EnqueueFillBufferHelper<>::enqueueFillBuffer(pCmdQ, buffer);
ASSERT_FALSE(csr.getAllocationsForReuse().peekIsEmpty());
EXPECT_NE(csr.getAllocationsForReuse().peekHead(), nullptr);
EXPECT_EQ(allocation, csr.getAllocationsForReuse().peekHead());
EXPECT_EQ(csr.getAllocationsForReuse().peekTail(), allocation);
}
HWTEST_F(EnqueueFillBufferCmdTests, WhenFillingBufferThenPatternOfSizeOneByteShouldGetPreparedForMiddleKernel) { HWTEST_F(EnqueueFillBufferCmdTests, WhenFillingBufferThenPatternOfSizeOneByteShouldGetPreparedForMiddleKernel) {
auto &csr = pCmdQ->getGpgpuCommandStreamReceiver(); auto &csr = pCmdQ->getGpgpuCommandStreamReceiver();
ASSERT_TRUE(csr.getAllocationsForReuse().peekIsEmpty()); ASSERT_TRUE(csr.getAllocationsForReuse().peekIsEmpty());
@ -436,10 +450,10 @@ HWTEST_F(EnqueueFillBufferCmdTests, WhenFillingBufferThenPatternOfSizeOneByteSho
nullptr); nullptr);
ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_EQ(CL_SUCCESS, retVal);
ASSERT_TRUE(csr.getAllocationsForReuse().peekIsEmpty()); ASSERT_FALSE(csr.getAllocationsForReuse().peekIsEmpty());
ASSERT_FALSE(csr.getTemporaryAllocations().peekIsEmpty()); ASSERT_TRUE(csr.getTemporaryAllocations().peekIsEmpty());
GraphicsAllocation *allocation = csr.getTemporaryAllocations().peekHead(); GraphicsAllocation *allocation = csr.getAllocationsForReuse().peekHead();
ASSERT_NE(nullptr, allocation); ASSERT_NE(nullptr, allocation);
EXPECT_EQ(0, memcmp(allocation->getUnderlyingBuffer(), output, size)); EXPECT_EQ(0, memcmp(allocation->getUnderlyingBuffer(), output, size));
@ -469,10 +483,10 @@ HWTEST_F(EnqueueFillBufferCmdTests, WhenFillingBufferThenPatternOfSizeTwoBytesSh
nullptr); nullptr);
ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_EQ(CL_SUCCESS, retVal);
ASSERT_TRUE(csr.getAllocationsForReuse().peekIsEmpty()); ASSERT_FALSE(csr.getAllocationsForReuse().peekIsEmpty());
ASSERT_FALSE(csr.getTemporaryAllocations().peekIsEmpty()); ASSERT_TRUE(csr.getTemporaryAllocations().peekIsEmpty());
GraphicsAllocation *allocation = csr.getTemporaryAllocations().peekHead(); GraphicsAllocation *allocation = csr.getAllocationsForReuse().peekHead();
ASSERT_NE(nullptr, allocation); ASSERT_NE(nullptr, allocation);
EXPECT_EQ(0, memcmp(allocation->getUnderlyingBuffer(), output, size)); EXPECT_EQ(0, memcmp(allocation->getUnderlyingBuffer(), output, size));
@ -500,9 +514,9 @@ HWTEST_F(EnqueueFillBufferCmdTests, givenEnqueueFillBufferWhenPatternAllocationI
nullptr); nullptr);
ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_EQ(CL_SUCCESS, retVal);
ASSERT_FALSE(csr.getTemporaryAllocations().peekIsEmpty()); ASSERT_FALSE(csr.getAllocationsForReuse().peekIsEmpty());
GraphicsAllocation *patternAllocation = csr.getTemporaryAllocations().peekHead(); GraphicsAllocation *patternAllocation = csr.getAllocationsForReuse().peekHead();
ASSERT_NE(nullptr, patternAllocation); ASSERT_NE(nullptr, patternAllocation);
EXPECT_EQ(GraphicsAllocation::AllocationType::FILL_PATTERN, patternAllocation->getAllocationType()); EXPECT_EQ(GraphicsAllocation::AllocationType::FILL_PATTERN, patternAllocation->getAllocationType());