Optimize Fill buffer calls.

- reuse pattern allocations for subsequent calls. Signed-off-by: Michal Mrozek <michal.mrozek@intel.com>
2021-03-05 13:51:16 +00:00 · 2021-03-05 13:51:16 +00:00 · 0cd03220df
parent 7d808bd560
commit 0cd03220df
2 changed files with 39 additions and 17 deletions
--- a/opencl/source/command_queue/enqueue_fill_buffer.h
+++ b/opencl/source/command_queue/enqueue_fill_buffer.h
@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017-2020 Intel Corporation
+ * Copyright (C) 2017-2021 Intel Corporation
 *
 * SPDX-License-Identifier: MIT
 *
@ -37,7 +37,15 @@ cl_int CommandQueueHw<GfxFamily>::enqueueFillBuffer(
    buffer->getMigrateableMultiGraphicsAllocation().ensureMemoryOnDevice(*getDevice().getMemoryManager(), rootDeviceIndex);
-    auto patternAllocation = memoryManager->allocateGraphicsMemoryWithProperties({getDevice().getRootDeviceIndex(), alignUp(patternSize, MemoryConstants::cacheLineSize), GraphicsAllocation::AllocationType::FILL_PATTERN, getDevice().getDeviceBitfield()});
+    auto commandStreamReceieverOwnership = getGpgpuCommandStreamReceiver().obtainUniqueOwnership();
    auto storageWithAllocations = getGpgpuCommandStreamReceiver().getInternalAllocationStorage();
    auto allocationType = GraphicsAllocation::AllocationType::FILL_PATTERN;
    auto patternAllocation = storageWithAllocations->obtainReusableAllocation(patternSize, allocationType).release();
    commandStreamReceieverOwnership.unlock();
    if (!patternAllocation) {
        patternAllocation = memoryManager->allocateGraphicsMemoryWithProperties({getDevice().getRootDeviceIndex(), alignUp(patternSize, MemoryConstants::cacheLineSize), GraphicsAllocation::AllocationType::FILL_PATTERN, getDevice().getDeviceBitfield()});
    }
    if (patternSize == 1) {
        int patternInt = (uint32_t)((*(uint8_t *)pattern << 24) | (*(uint8_t *)pattern << 16) | (*(uint8_t *)pattern << 8) | *(uint8_t *)pattern);
@ -86,7 +94,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueFillBuffer(
        event);
    auto storageForAllocation = getGpgpuCommandStreamReceiver().getInternalAllocationStorage();
-    storageForAllocation->storeAllocationWithTaskCount(std::unique_ptr<GraphicsAllocation>(patternAllocation), TEMPORARY_ALLOCATION, taskCount);
+    storageForAllocation->storeAllocationWithTaskCount(std::unique_ptr<GraphicsAllocation>(patternAllocation), REUSABLE_ALLOCATION, taskCount);
    return CL_SUCCESS;
 }
--- a/opencl/test/unit_test/command_queue/enqueue_fill_buffer_tests.cpp
+++ b/opencl/test/unit_test/command_queue/enqueue_fill_buffer_tests.cpp
@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017-2020 Intel Corporation
+ * Copyright (C) 2017-2021 Intel Corporation
 *
 * SPDX-License-Identifier: MIT
 *
@ -76,7 +76,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueFillBufferCmdTests, WhenFillingBufferThenGpgp
    // Compute the SIMD lane mask
    size_t simd =
-        cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD32 ? 32 : cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD16 ? 16 : 8;
+        cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD32 ? 32 : cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD16 ? 16
                                                                                                                         : 8;
    uint64_t simdMask = maxNBitValue(simd);
    // Mask off lanes based on the execution masks
@ -373,8 +374,8 @@ HWTEST_F(EnqueueFillBufferCmdTests, WhenFillingBufferThenPatternShouldBeCopied)
    auto &csr = pCmdQ->getGpgpuCommandStreamReceiver();
    ASSERT_TRUE(csr.getTemporaryAllocations().peekIsEmpty());
    EnqueueFillBufferHelper<>::enqueueFillBuffer(pCmdQ, buffer);
-    ASSERT_FALSE(csr.getTemporaryAllocations().peekIsEmpty());
+    ASSERT_FALSE(csr.getAllocationsForReuse().peekIsEmpty());
-    GraphicsAllocation *allocation = csr.getTemporaryAllocations().peekHead();
+    GraphicsAllocation *allocation = csr.getAllocationsForReuse().peekHead();
    while (allocation != nullptr) {
        if ((allocation->getUnderlyingBufferSize() >= sizeof(float)) &&
@ -394,8 +395,8 @@ HWTEST_F(EnqueueFillBufferCmdTests, WhenFillingBufferThenPatternShouldBeAligned)
    auto &csr = pCmdQ->getGpgpuCommandStreamReceiver();
    ASSERT_TRUE(csr.getTemporaryAllocations().peekIsEmpty());
    EnqueueFillBufferHelper<>::enqueueFillBuffer(pCmdQ, buffer);
-    ASSERT_FALSE(csr.getTemporaryAllocations().peekIsEmpty());
+    ASSERT_TRUE(csr.getTemporaryAllocations().peekIsEmpty());
-    GraphicsAllocation *allocation = csr.getTemporaryAllocations().peekHead();
+    GraphicsAllocation *allocation = csr.getAllocationsForReuse().peekHead();
    while (allocation != nullptr) {
        if ((allocation->getUnderlyingBufferSize() >= sizeof(float)) &&
@ -412,6 +413,19 @@ HWTEST_F(EnqueueFillBufferCmdTests, WhenFillingBufferThenPatternShouldBeAligned)
    EXPECT_EQ(alignUp(allocation->getUnderlyingBufferSize(), MemoryConstants::cacheLineSize), allocation->getUnderlyingBufferSize());
 }
 HWTEST_F(EnqueueFillBufferCmdTests, WhenFillBufferIsCalledTwiceThenPatternAllocationIsReused) {
    auto &csr = pCmdQ->getGpgpuCommandStreamReceiver();
    ASSERT_TRUE(csr.getAllocationsForReuse().peekIsEmpty());
    EnqueueFillBufferHelper<>::enqueueFillBuffer(pCmdQ, buffer);
    ASSERT_FALSE(csr.getAllocationsForReuse().peekIsEmpty());
    GraphicsAllocation *allocation = csr.getAllocationsForReuse().peekHead();
    EnqueueFillBufferHelper<>::enqueueFillBuffer(pCmdQ, buffer);
    ASSERT_FALSE(csr.getAllocationsForReuse().peekIsEmpty());
    EXPECT_NE(csr.getAllocationsForReuse().peekHead(), nullptr);
    EXPECT_EQ(allocation, csr.getAllocationsForReuse().peekHead());
    EXPECT_EQ(csr.getAllocationsForReuse().peekTail(), allocation);
 }
 HWTEST_F(EnqueueFillBufferCmdTests, WhenFillingBufferThenPatternOfSizeOneByteShouldGetPreparedForMiddleKernel) {
    auto &csr = pCmdQ->getGpgpuCommandStreamReceiver();
    ASSERT_TRUE(csr.getAllocationsForReuse().peekIsEmpty());
@ -436,10 +450,10 @@ HWTEST_F(EnqueueFillBufferCmdTests, WhenFillingBufferThenPatternOfSizeOneByteSho
        nullptr);
    ASSERT_EQ(CL_SUCCESS, retVal);
-    ASSERT_TRUE(csr.getAllocationsForReuse().peekIsEmpty());
+    ASSERT_FALSE(csr.getAllocationsForReuse().peekIsEmpty());
-    ASSERT_FALSE(csr.getTemporaryAllocations().peekIsEmpty());
+    ASSERT_TRUE(csr.getTemporaryAllocations().peekIsEmpty());
-    GraphicsAllocation *allocation = csr.getTemporaryAllocations().peekHead();
+    GraphicsAllocation *allocation = csr.getAllocationsForReuse().peekHead();
    ASSERT_NE(nullptr, allocation);
    EXPECT_EQ(0, memcmp(allocation->getUnderlyingBuffer(), output, size));
@ -469,10 +483,10 @@ HWTEST_F(EnqueueFillBufferCmdTests, WhenFillingBufferThenPatternOfSizeTwoBytesSh
        nullptr);
    ASSERT_EQ(CL_SUCCESS, retVal);
-    ASSERT_TRUE(csr.getAllocationsForReuse().peekIsEmpty());
+    ASSERT_FALSE(csr.getAllocationsForReuse().peekIsEmpty());
-    ASSERT_FALSE(csr.getTemporaryAllocations().peekIsEmpty());
+    ASSERT_TRUE(csr.getTemporaryAllocations().peekIsEmpty());
-    GraphicsAllocation *allocation = csr.getTemporaryAllocations().peekHead();
+    GraphicsAllocation *allocation = csr.getAllocationsForReuse().peekHead();
    ASSERT_NE(nullptr, allocation);
    EXPECT_EQ(0, memcmp(allocation->getUnderlyingBuffer(), output, size));
@ -500,9 +514,9 @@ HWTEST_F(EnqueueFillBufferCmdTests, givenEnqueueFillBufferWhenPatternAllocationI
        nullptr);
    ASSERT_EQ(CL_SUCCESS, retVal);
-    ASSERT_FALSE(csr.getTemporaryAllocations().peekIsEmpty());
+    ASSERT_FALSE(csr.getAllocationsForReuse().peekIsEmpty());
-    GraphicsAllocation *patternAllocation = csr.getTemporaryAllocations().peekHead();
+    GraphicsAllocation *patternAllocation = csr.getAllocationsForReuse().peekHead();
    ASSERT_NE(nullptr, patternAllocation);
    EXPECT_EQ(GraphicsAllocation::AllocationType::FILL_PATTERN, patternAllocation->getAllocationType());