Optimize Fill buffer calls.
- reuse pattern allocations for subsequent calls. Signed-off-by: Michal Mrozek <michal.mrozek@intel.com>
This commit is contained in:
parent
7d808bd560
commit
0cd03220df
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (C) 2017-2020 Intel Corporation
|
* Copyright (C) 2017-2021 Intel Corporation
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
|
@ -37,7 +37,15 @@ cl_int CommandQueueHw<GfxFamily>::enqueueFillBuffer(
|
||||||
|
|
||||||
buffer->getMigrateableMultiGraphicsAllocation().ensureMemoryOnDevice(*getDevice().getMemoryManager(), rootDeviceIndex);
|
buffer->getMigrateableMultiGraphicsAllocation().ensureMemoryOnDevice(*getDevice().getMemoryManager(), rootDeviceIndex);
|
||||||
|
|
||||||
auto patternAllocation = memoryManager->allocateGraphicsMemoryWithProperties({getDevice().getRootDeviceIndex(), alignUp(patternSize, MemoryConstants::cacheLineSize), GraphicsAllocation::AllocationType::FILL_PATTERN, getDevice().getDeviceBitfield()});
|
auto commandStreamReceieverOwnership = getGpgpuCommandStreamReceiver().obtainUniqueOwnership();
|
||||||
|
auto storageWithAllocations = getGpgpuCommandStreamReceiver().getInternalAllocationStorage();
|
||||||
|
auto allocationType = GraphicsAllocation::AllocationType::FILL_PATTERN;
|
||||||
|
auto patternAllocation = storageWithAllocations->obtainReusableAllocation(patternSize, allocationType).release();
|
||||||
|
commandStreamReceieverOwnership.unlock();
|
||||||
|
|
||||||
|
if (!patternAllocation) {
|
||||||
|
patternAllocation = memoryManager->allocateGraphicsMemoryWithProperties({getDevice().getRootDeviceIndex(), alignUp(patternSize, MemoryConstants::cacheLineSize), GraphicsAllocation::AllocationType::FILL_PATTERN, getDevice().getDeviceBitfield()});
|
||||||
|
}
|
||||||
|
|
||||||
if (patternSize == 1) {
|
if (patternSize == 1) {
|
||||||
int patternInt = (uint32_t)((*(uint8_t *)pattern << 24) | (*(uint8_t *)pattern << 16) | (*(uint8_t *)pattern << 8) | *(uint8_t *)pattern);
|
int patternInt = (uint32_t)((*(uint8_t *)pattern << 24) | (*(uint8_t *)pattern << 16) | (*(uint8_t *)pattern << 8) | *(uint8_t *)pattern);
|
||||||
|
@ -86,7 +94,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueFillBuffer(
|
||||||
event);
|
event);
|
||||||
|
|
||||||
auto storageForAllocation = getGpgpuCommandStreamReceiver().getInternalAllocationStorage();
|
auto storageForAllocation = getGpgpuCommandStreamReceiver().getInternalAllocationStorage();
|
||||||
storageForAllocation->storeAllocationWithTaskCount(std::unique_ptr<GraphicsAllocation>(patternAllocation), TEMPORARY_ALLOCATION, taskCount);
|
storageForAllocation->storeAllocationWithTaskCount(std::unique_ptr<GraphicsAllocation>(patternAllocation), REUSABLE_ALLOCATION, taskCount);
|
||||||
|
|
||||||
return CL_SUCCESS;
|
return CL_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (C) 2017-2020 Intel Corporation
|
* Copyright (C) 2017-2021 Intel Corporation
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
|
@ -76,7 +76,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueFillBufferCmdTests, WhenFillingBufferThenGpgp
|
||||||
|
|
||||||
// Compute the SIMD lane mask
|
// Compute the SIMD lane mask
|
||||||
size_t simd =
|
size_t simd =
|
||||||
cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD32 ? 32 : cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD16 ? 16 : 8;
|
cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD32 ? 32 : cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD16 ? 16
|
||||||
|
: 8;
|
||||||
uint64_t simdMask = maxNBitValue(simd);
|
uint64_t simdMask = maxNBitValue(simd);
|
||||||
|
|
||||||
// Mask off lanes based on the execution masks
|
// Mask off lanes based on the execution masks
|
||||||
|
@ -373,8 +374,8 @@ HWTEST_F(EnqueueFillBufferCmdTests, WhenFillingBufferThenPatternShouldBeCopied)
|
||||||
auto &csr = pCmdQ->getGpgpuCommandStreamReceiver();
|
auto &csr = pCmdQ->getGpgpuCommandStreamReceiver();
|
||||||
ASSERT_TRUE(csr.getTemporaryAllocations().peekIsEmpty());
|
ASSERT_TRUE(csr.getTemporaryAllocations().peekIsEmpty());
|
||||||
EnqueueFillBufferHelper<>::enqueueFillBuffer(pCmdQ, buffer);
|
EnqueueFillBufferHelper<>::enqueueFillBuffer(pCmdQ, buffer);
|
||||||
ASSERT_FALSE(csr.getTemporaryAllocations().peekIsEmpty());
|
ASSERT_FALSE(csr.getAllocationsForReuse().peekIsEmpty());
|
||||||
GraphicsAllocation *allocation = csr.getTemporaryAllocations().peekHead();
|
GraphicsAllocation *allocation = csr.getAllocationsForReuse().peekHead();
|
||||||
|
|
||||||
while (allocation != nullptr) {
|
while (allocation != nullptr) {
|
||||||
if ((allocation->getUnderlyingBufferSize() >= sizeof(float)) &&
|
if ((allocation->getUnderlyingBufferSize() >= sizeof(float)) &&
|
||||||
|
@ -394,8 +395,8 @@ HWTEST_F(EnqueueFillBufferCmdTests, WhenFillingBufferThenPatternShouldBeAligned)
|
||||||
auto &csr = pCmdQ->getGpgpuCommandStreamReceiver();
|
auto &csr = pCmdQ->getGpgpuCommandStreamReceiver();
|
||||||
ASSERT_TRUE(csr.getTemporaryAllocations().peekIsEmpty());
|
ASSERT_TRUE(csr.getTemporaryAllocations().peekIsEmpty());
|
||||||
EnqueueFillBufferHelper<>::enqueueFillBuffer(pCmdQ, buffer);
|
EnqueueFillBufferHelper<>::enqueueFillBuffer(pCmdQ, buffer);
|
||||||
ASSERT_FALSE(csr.getTemporaryAllocations().peekIsEmpty());
|
ASSERT_TRUE(csr.getTemporaryAllocations().peekIsEmpty());
|
||||||
GraphicsAllocation *allocation = csr.getTemporaryAllocations().peekHead();
|
GraphicsAllocation *allocation = csr.getAllocationsForReuse().peekHead();
|
||||||
|
|
||||||
while (allocation != nullptr) {
|
while (allocation != nullptr) {
|
||||||
if ((allocation->getUnderlyingBufferSize() >= sizeof(float)) &&
|
if ((allocation->getUnderlyingBufferSize() >= sizeof(float)) &&
|
||||||
|
@ -412,6 +413,19 @@ HWTEST_F(EnqueueFillBufferCmdTests, WhenFillingBufferThenPatternShouldBeAligned)
|
||||||
EXPECT_EQ(alignUp(allocation->getUnderlyingBufferSize(), MemoryConstants::cacheLineSize), allocation->getUnderlyingBufferSize());
|
EXPECT_EQ(alignUp(allocation->getUnderlyingBufferSize(), MemoryConstants::cacheLineSize), allocation->getUnderlyingBufferSize());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
HWTEST_F(EnqueueFillBufferCmdTests, WhenFillBufferIsCalledTwiceThenPatternAllocationIsReused) {
|
||||||
|
auto &csr = pCmdQ->getGpgpuCommandStreamReceiver();
|
||||||
|
ASSERT_TRUE(csr.getAllocationsForReuse().peekIsEmpty());
|
||||||
|
EnqueueFillBufferHelper<>::enqueueFillBuffer(pCmdQ, buffer);
|
||||||
|
ASSERT_FALSE(csr.getAllocationsForReuse().peekIsEmpty());
|
||||||
|
GraphicsAllocation *allocation = csr.getAllocationsForReuse().peekHead();
|
||||||
|
EnqueueFillBufferHelper<>::enqueueFillBuffer(pCmdQ, buffer);
|
||||||
|
ASSERT_FALSE(csr.getAllocationsForReuse().peekIsEmpty());
|
||||||
|
EXPECT_NE(csr.getAllocationsForReuse().peekHead(), nullptr);
|
||||||
|
EXPECT_EQ(allocation, csr.getAllocationsForReuse().peekHead());
|
||||||
|
EXPECT_EQ(csr.getAllocationsForReuse().peekTail(), allocation);
|
||||||
|
}
|
||||||
|
|
||||||
HWTEST_F(EnqueueFillBufferCmdTests, WhenFillingBufferThenPatternOfSizeOneByteShouldGetPreparedForMiddleKernel) {
|
HWTEST_F(EnqueueFillBufferCmdTests, WhenFillingBufferThenPatternOfSizeOneByteShouldGetPreparedForMiddleKernel) {
|
||||||
auto &csr = pCmdQ->getGpgpuCommandStreamReceiver();
|
auto &csr = pCmdQ->getGpgpuCommandStreamReceiver();
|
||||||
ASSERT_TRUE(csr.getAllocationsForReuse().peekIsEmpty());
|
ASSERT_TRUE(csr.getAllocationsForReuse().peekIsEmpty());
|
||||||
|
@ -436,10 +450,10 @@ HWTEST_F(EnqueueFillBufferCmdTests, WhenFillingBufferThenPatternOfSizeOneByteSho
|
||||||
nullptr);
|
nullptr);
|
||||||
ASSERT_EQ(CL_SUCCESS, retVal);
|
ASSERT_EQ(CL_SUCCESS, retVal);
|
||||||
|
|
||||||
ASSERT_TRUE(csr.getAllocationsForReuse().peekIsEmpty());
|
ASSERT_FALSE(csr.getAllocationsForReuse().peekIsEmpty());
|
||||||
ASSERT_FALSE(csr.getTemporaryAllocations().peekIsEmpty());
|
ASSERT_TRUE(csr.getTemporaryAllocations().peekIsEmpty());
|
||||||
|
|
||||||
GraphicsAllocation *allocation = csr.getTemporaryAllocations().peekHead();
|
GraphicsAllocation *allocation = csr.getAllocationsForReuse().peekHead();
|
||||||
ASSERT_NE(nullptr, allocation);
|
ASSERT_NE(nullptr, allocation);
|
||||||
|
|
||||||
EXPECT_EQ(0, memcmp(allocation->getUnderlyingBuffer(), output, size));
|
EXPECT_EQ(0, memcmp(allocation->getUnderlyingBuffer(), output, size));
|
||||||
|
@ -469,10 +483,10 @@ HWTEST_F(EnqueueFillBufferCmdTests, WhenFillingBufferThenPatternOfSizeTwoBytesSh
|
||||||
nullptr);
|
nullptr);
|
||||||
ASSERT_EQ(CL_SUCCESS, retVal);
|
ASSERT_EQ(CL_SUCCESS, retVal);
|
||||||
|
|
||||||
ASSERT_TRUE(csr.getAllocationsForReuse().peekIsEmpty());
|
ASSERT_FALSE(csr.getAllocationsForReuse().peekIsEmpty());
|
||||||
ASSERT_FALSE(csr.getTemporaryAllocations().peekIsEmpty());
|
ASSERT_TRUE(csr.getTemporaryAllocations().peekIsEmpty());
|
||||||
|
|
||||||
GraphicsAllocation *allocation = csr.getTemporaryAllocations().peekHead();
|
GraphicsAllocation *allocation = csr.getAllocationsForReuse().peekHead();
|
||||||
ASSERT_NE(nullptr, allocation);
|
ASSERT_NE(nullptr, allocation);
|
||||||
|
|
||||||
EXPECT_EQ(0, memcmp(allocation->getUnderlyingBuffer(), output, size));
|
EXPECT_EQ(0, memcmp(allocation->getUnderlyingBuffer(), output, size));
|
||||||
|
@ -500,9 +514,9 @@ HWTEST_F(EnqueueFillBufferCmdTests, givenEnqueueFillBufferWhenPatternAllocationI
|
||||||
nullptr);
|
nullptr);
|
||||||
ASSERT_EQ(CL_SUCCESS, retVal);
|
ASSERT_EQ(CL_SUCCESS, retVal);
|
||||||
|
|
||||||
ASSERT_FALSE(csr.getTemporaryAllocations().peekIsEmpty());
|
ASSERT_FALSE(csr.getAllocationsForReuse().peekIsEmpty());
|
||||||
|
|
||||||
GraphicsAllocation *patternAllocation = csr.getTemporaryAllocations().peekHead();
|
GraphicsAllocation *patternAllocation = csr.getAllocationsForReuse().peekHead();
|
||||||
ASSERT_NE(nullptr, patternAllocation);
|
ASSERT_NE(nullptr, patternAllocation);
|
||||||
|
|
||||||
EXPECT_EQ(GraphicsAllocation::AllocationType::FILL_PATTERN, patternAllocation->getAllocationType());
|
EXPECT_EQ(GraphicsAllocation::AllocationType::FILL_PATTERN, patternAllocation->getAllocationType());
|
||||||
|
|
Loading…
Reference in New Issue