From 37c7e272767b6134d599823f68c228dc6dd24803 Mon Sep 17 00:00:00 2001 From: "Mrozek, Michal" Date: Wed, 31 Jan 2018 14:45:42 +0100 Subject: [PATCH] Fix heap size programming. - In various scenarios code was not programming the max heap size correctly - It was possible for SSH to overcome the limit - Size was programmed smaller then it really was, which resulted in smaller reuse, which led to SBA reprogramming which led to lower performance in ooq scenarios - This change fixes the heap size programming by always utilizing full allocation size and always limiting SSH at proper value Change-Id: Ib703d2b0709ed8227a293def3a454bf1bb516dfd --- Jenkinsfile | 2 +- runtime/command_queue/command_queue.cpp | 28 +++++------ runtime/indirect_heap/indirect_heap.h | 4 ++ .../command_queue/command_queue_tests.cpp | 16 +++++-- .../command_stream_receiver_hw_tests.cpp | 48 +++++++++++++++++++ unit_tests/event/event_tests.cpp | 4 +- 6 files changed, 82 insertions(+), 20 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 85dee8fc5d..29c17dbde2 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -2,4 +2,4 @@ neoDependenciesRev='733920-765' strategy='EQUAL' allowedF=47 -allowedCD=365 +allowedCD=364 diff --git a/runtime/command_queue/command_queue.cpp b/runtime/command_queue/command_queue.cpp index 15858b389f..8d16757b36 100644 --- a/runtime/command_queue/command_queue.cpp +++ b/runtime/command_queue/command_queue.cpp @@ -237,34 +237,34 @@ IndirectHeap &CommandQueue::getIndirectHeap(IndirectHeap::Type heapType, } if (!heapMemory) { - // Heap should be at least minHeapSize unless we're requesting an empty heap - size_t minHeapSize = 64 * KB; - if (IndirectHeap::SURFACE_STATE == heapType) { - minHeapSize -= MemoryConstants::pageSize; - } - size_t reservedSize = 0; if (heapType == IndirectHeap::INSTRUCTION) { reservedSize = getInstructionHeapReservedBlockSize(); } - + auto finalHeapSize = defaultHeapSize; minRequiredSize += reservedSize; - minRequiredSize = minRequiredSize ? std::max(minRequiredSize, minHeapSize) : 0; - minRequiredSize = minRequiredSize > 0 ? alignUp(minRequiredSize, MemoryConstants::cacheLineSize) : 0; - const size_t heapAlignment = MemoryConstants::pageSize; - heapMemory = memoryManager->obtainReusableAllocation(minRequiredSize).release(); + finalHeapSize = alignUp(std::max(finalHeapSize, minRequiredSize), MemoryConstants::pageSize); + + heapMemory = memoryManager->obtainReusableAllocation(finalHeapSize).release(); if (!heapMemory) { - heapMemory = memoryManager->allocateGraphicsMemory(minRequiredSize, heapAlignment); + heapMemory = memoryManager->allocateGraphicsMemory(finalHeapSize, MemoryConstants::pageSize); + } else { + finalHeapSize = std::max(heapMemory->getUnderlyingBufferSize(), finalHeapSize); + } + + if (IndirectHeap::SURFACE_STATE == heapType) { + DEBUG_BREAK_IF(minRequiredSize > maxSshSize); + finalHeapSize = maxSshSize; } if (heap) { - heap->replaceBuffer(heapMemory->getUnderlyingBuffer(), minRequiredSize); + heap->replaceBuffer(heapMemory->getUnderlyingBuffer(), finalHeapSize); heap->replaceGraphicsAllocation(heapMemory); } else { heap = new IndirectHeap(heapMemory); - heap->overrideMaxSize(minRequiredSize); + heap->overrideMaxSize(finalHeapSize); } if (heapType == IndirectHeap::INSTRUCTION) { diff --git a/runtime/indirect_heap/indirect_heap.h b/runtime/indirect_heap/indirect_heap.h index 1f42878bde..0a42f97b4f 100644 --- a/runtime/indirect_heap/indirect_heap.h +++ b/runtime/indirect_heap/indirect_heap.h @@ -24,10 +24,14 @@ #include "runtime/command_stream/linear_stream.h" #include "runtime/helpers/aligned_memory.h" #include "runtime/helpers/ptr_math.h" +#include "runtime/helpers/basic_math.h" namespace OCLRT { class GraphicsAllocation; +constexpr size_t defaultHeapSize = 64 * KB; +constexpr size_t maxSshSize = defaultHeapSize - MemoryConstants::pageSize; + class IndirectHeap : public LinearStream { typedef LinearStream BaseClass; diff --git a/unit_tests/command_queue/command_queue_tests.cpp b/unit_tests/command_queue/command_queue_tests.cpp index 964dcfa77f..dbef7fcdd1 100644 --- a/unit_tests/command_queue/command_queue_tests.cpp +++ b/unit_tests/command_queue/command_queue_tests.cpp @@ -434,7 +434,12 @@ TEST_P(CommandQueueIndirectHeapTest, getIndirectHeapCanRecycle) { const auto &indirectHeap = cmdQ.getIndirectHeap(this->GetParam(), requiredSize); ASSERT_NE(nullptr, &indirectHeap); - EXPECT_GE(indirectHeap.getMaxAvailableSpace(), requiredSize); + if (this->GetParam() == IndirectHeap::SURFACE_STATE) { + //no matter what SSH is always capped + EXPECT_EQ(indirectHeap.getMaxAvailableSpace(), maxSshSize); + } else { + EXPECT_GE(indirectHeap.getMaxAvailableSpace(), requiredSize); + } } TEST_P(CommandQueueIndirectHeapTest, alignSizeToCacheLine) { @@ -469,8 +474,13 @@ TEST_P(CommandQueueIndirectHeapTest, MemoryManagerWithReusableAllocationsWhenAsk EXPECT_EQ(indirectHeap.getGraphicsAllocation(), allocation); - //make sure we are below 64 KB even though we reuse 128KB. - EXPECT_LE(indirectHeap.getMaxAvailableSpace(), 64 * KB); + //if we obtain heap from reusable pool, we need to keep the size of allocation + //surface state heap is an exception, it is capped at ~60KB + if (this->GetParam() == IndirectHeap::SURFACE_STATE) { + EXPECT_EQ(indirectHeap.getMaxAvailableSpace(), 64 * KB - MemoryConstants::pageSize); + } else { + EXPECT_EQ(indirectHeap.getMaxAvailableSpace(), 128 * KB); + } EXPECT_TRUE(memoryManager->allocationsForReuse.peekIsEmpty()); } diff --git a/unit_tests/command_stream/command_stream_receiver_hw_tests.cpp b/unit_tests/command_stream/command_stream_receiver_hw_tests.cpp index e18d704e33..576f090a70 100644 --- a/unit_tests/command_stream/command_stream_receiver_hw_tests.cpp +++ b/unit_tests/command_stream/command_stream_receiver_hw_tests.cpp @@ -531,6 +531,54 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, stateBaseAddressShouldBeSentIfSize EXPECT_NE(cmdList.end(), stateBaseAddressItor); } +HWTEST_F(CommandStreamReceiverFlushTaskTests, givenDshHeapChangeWhenFlushTaskIsCalledThenSbaIsReloaded) { + auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); + configureCSRtoNonDirtyState(); + + dsh.replaceBuffer(nullptr, 0); + flushTask(commandStreamReceiver); + + parseCommands(commandStreamReceiver.commandStream, 0); + auto stateBaseAddressItor = find(cmdList.begin(), cmdList.end()); + EXPECT_NE(cmdList.end(), stateBaseAddressItor); +} + +HWTEST_F(CommandStreamReceiverFlushTaskTests, givenSshHeapChangeWhenFlushTaskIsCalledThenSbaIsReloaded) { + auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); + configureCSRtoNonDirtyState(); + + ssh.replaceBuffer(nullptr, 0); + flushTask(commandStreamReceiver); + + parseCommands(commandStreamReceiver.commandStream, 0); + auto stateBaseAddressItor = find(cmdList.begin(), cmdList.end()); + EXPECT_NE(cmdList.end(), stateBaseAddressItor); +} + +HWTEST_F(CommandStreamReceiverFlushTaskTests, givenIohHeapChangeWhenFlushTaskIsCalledThenSbaIsReloaded) { + auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); + configureCSRtoNonDirtyState(); + + ioh.replaceBuffer(nullptr, 0); + flushTask(commandStreamReceiver); + + parseCommands(commandStreamReceiver.commandStream, 0); + auto stateBaseAddressItor = find(cmdList.begin(), cmdList.end()); + EXPECT_NE(cmdList.end(), stateBaseAddressItor); +} + +HWTEST_F(CommandStreamReceiverFlushTaskTests, givenIshHeapChangeWhenFlushTaskIsCalledThenSbaIsReloaded) { + auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); + configureCSRtoNonDirtyState(); + + ih.replaceBuffer(nullptr, 0); + flushTask(commandStreamReceiver); + + parseCommands(commandStreamReceiver.commandStream, 0); + auto stateBaseAddressItor = find(cmdList.begin(), cmdList.end()); + EXPECT_NE(cmdList.end(), stateBaseAddressItor); +} + HWTEST_F(CommandStreamReceiverFlushTaskTests, stateBaseAddressShouldNotBeSentIfTheSame) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.isPreambleSent = true; diff --git a/unit_tests/event/event_tests.cpp b/unit_tests/event/event_tests.cpp index 712844093f..17fddf3bee 100644 --- a/unit_tests/event/event_tests.cpp +++ b/unit_tests/event/event_tests.cpp @@ -467,7 +467,7 @@ TEST_F(InternalsEventTest, resizeCmdQueueHeapsWhenKernelOparationHeapsAreBigger) auto dsh = createFullHeap(requestedSize); auto ish = createFullHeap(requestedSize); auto ioh = createFullHeap(requestedSize); - auto ssh = createFullHeap(requestedSize); + auto ssh = createFullHeap(maxSshSize); using UniqueIH = std::unique_ptr; auto kernelOperation = new KernelOperation(std::unique_ptr(cmdStream), UniqueIH(dsh), @@ -482,7 +482,7 @@ TEST_F(InternalsEventTest, resizeCmdQueueHeapsWhenKernelOparationHeapsAreBigger) EXPECT_LT(cmdQueueDsh.getMaxAvailableSpace(), dsh->getMaxAvailableSpace()); EXPECT_LT(cmdQueueIsh.getMaxAvailableSpace(), ish->getMaxAvailableSpace()); EXPECT_LT(cmdQueueIoh.getMaxAvailableSpace(), ioh->getMaxAvailableSpace()); - EXPECT_LT(cmdQueueSsh.getMaxAvailableSpace(), ssh->getMaxAvailableSpace()); + EXPECT_EQ(cmdQueueSsh.getMaxAvailableSpace(), ssh->getMaxAvailableSpace()); cmdComputeKernel->submit(0, false);