Fix heap size programming.

- In various scenarios code was not programming the max heap size correctly
- It was possible for SSH to overcome the limit
- Size was programmed smaller then it really was, which resulted in smaller
reuse, which led to SBA reprogramming which led to lower performance in ooq
scenarios
- This change fixes the heap size programming by always utilizing full
allocation size and always limiting SSH at proper value

Change-Id: Ib703d2b0709ed8227a293def3a454bf1bb516dfd
This commit is contained in:
Mrozek, Michal
2018-01-31 14:45:42 +01:00
committed by sys_ocldev
parent 285ecbd9cb
commit 37c7e27276
6 changed files with 82 additions and 20 deletions

2
Jenkinsfile vendored
View File

@ -2,4 +2,4 @@
neoDependenciesRev='733920-765' neoDependenciesRev='733920-765'
strategy='EQUAL' strategy='EQUAL'
allowedF=47 allowedF=47
allowedCD=365 allowedCD=364

View File

@ -237,34 +237,34 @@ IndirectHeap &CommandQueue::getIndirectHeap(IndirectHeap::Type heapType,
} }
if (!heapMemory) { if (!heapMemory) {
// Heap should be at least minHeapSize unless we're requesting an empty heap
size_t minHeapSize = 64 * KB;
if (IndirectHeap::SURFACE_STATE == heapType) {
minHeapSize -= MemoryConstants::pageSize;
}
size_t reservedSize = 0; size_t reservedSize = 0;
if (heapType == IndirectHeap::INSTRUCTION) { if (heapType == IndirectHeap::INSTRUCTION) {
reservedSize = getInstructionHeapReservedBlockSize(); reservedSize = getInstructionHeapReservedBlockSize();
} }
auto finalHeapSize = defaultHeapSize;
minRequiredSize += reservedSize; minRequiredSize += reservedSize;
minRequiredSize = minRequiredSize ? std::max(minRequiredSize, minHeapSize) : 0;
minRequiredSize = minRequiredSize > 0 ? alignUp(minRequiredSize, MemoryConstants::cacheLineSize) : 0;
const size_t heapAlignment = MemoryConstants::pageSize; finalHeapSize = alignUp(std::max(finalHeapSize, minRequiredSize), MemoryConstants::pageSize);
heapMemory = memoryManager->obtainReusableAllocation(minRequiredSize).release();
heapMemory = memoryManager->obtainReusableAllocation(finalHeapSize).release();
if (!heapMemory) { if (!heapMemory) {
heapMemory = memoryManager->allocateGraphicsMemory(minRequiredSize, heapAlignment); heapMemory = memoryManager->allocateGraphicsMemory(finalHeapSize, MemoryConstants::pageSize);
} else {
finalHeapSize = std::max(heapMemory->getUnderlyingBufferSize(), finalHeapSize);
}
if (IndirectHeap::SURFACE_STATE == heapType) {
DEBUG_BREAK_IF(minRequiredSize > maxSshSize);
finalHeapSize = maxSshSize;
} }
if (heap) { if (heap) {
heap->replaceBuffer(heapMemory->getUnderlyingBuffer(), minRequiredSize); heap->replaceBuffer(heapMemory->getUnderlyingBuffer(), finalHeapSize);
heap->replaceGraphicsAllocation(heapMemory); heap->replaceGraphicsAllocation(heapMemory);
} else { } else {
heap = new IndirectHeap(heapMemory); heap = new IndirectHeap(heapMemory);
heap->overrideMaxSize(minRequiredSize); heap->overrideMaxSize(finalHeapSize);
} }
if (heapType == IndirectHeap::INSTRUCTION) { if (heapType == IndirectHeap::INSTRUCTION) {

View File

@ -24,10 +24,14 @@
#include "runtime/command_stream/linear_stream.h" #include "runtime/command_stream/linear_stream.h"
#include "runtime/helpers/aligned_memory.h" #include "runtime/helpers/aligned_memory.h"
#include "runtime/helpers/ptr_math.h" #include "runtime/helpers/ptr_math.h"
#include "runtime/helpers/basic_math.h"
namespace OCLRT { namespace OCLRT {
class GraphicsAllocation; class GraphicsAllocation;
constexpr size_t defaultHeapSize = 64 * KB;
constexpr size_t maxSshSize = defaultHeapSize - MemoryConstants::pageSize;
class IndirectHeap : public LinearStream { class IndirectHeap : public LinearStream {
typedef LinearStream BaseClass; typedef LinearStream BaseClass;

View File

@ -434,7 +434,12 @@ TEST_P(CommandQueueIndirectHeapTest, getIndirectHeapCanRecycle) {
const auto &indirectHeap = cmdQ.getIndirectHeap(this->GetParam(), requiredSize); const auto &indirectHeap = cmdQ.getIndirectHeap(this->GetParam(), requiredSize);
ASSERT_NE(nullptr, &indirectHeap); ASSERT_NE(nullptr, &indirectHeap);
if (this->GetParam() == IndirectHeap::SURFACE_STATE) {
//no matter what SSH is always capped
EXPECT_EQ(indirectHeap.getMaxAvailableSpace(), maxSshSize);
} else {
EXPECT_GE(indirectHeap.getMaxAvailableSpace(), requiredSize); EXPECT_GE(indirectHeap.getMaxAvailableSpace(), requiredSize);
}
} }
TEST_P(CommandQueueIndirectHeapTest, alignSizeToCacheLine) { TEST_P(CommandQueueIndirectHeapTest, alignSizeToCacheLine) {
@ -469,8 +474,13 @@ TEST_P(CommandQueueIndirectHeapTest, MemoryManagerWithReusableAllocationsWhenAsk
EXPECT_EQ(indirectHeap.getGraphicsAllocation(), allocation); EXPECT_EQ(indirectHeap.getGraphicsAllocation(), allocation);
//make sure we are below 64 KB even though we reuse 128KB. //if we obtain heap from reusable pool, we need to keep the size of allocation
EXPECT_LE(indirectHeap.getMaxAvailableSpace(), 64 * KB); //surface state heap is an exception, it is capped at ~60KB
if (this->GetParam() == IndirectHeap::SURFACE_STATE) {
EXPECT_EQ(indirectHeap.getMaxAvailableSpace(), 64 * KB - MemoryConstants::pageSize);
} else {
EXPECT_EQ(indirectHeap.getMaxAvailableSpace(), 128 * KB);
}
EXPECT_TRUE(memoryManager->allocationsForReuse.peekIsEmpty()); EXPECT_TRUE(memoryManager->allocationsForReuse.peekIsEmpty());
} }

View File

@ -531,6 +531,54 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, stateBaseAddressShouldBeSentIfSize
EXPECT_NE(cmdList.end(), stateBaseAddressItor); EXPECT_NE(cmdList.end(), stateBaseAddressItor);
} }
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenDshHeapChangeWhenFlushTaskIsCalledThenSbaIsReloaded) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
configureCSRtoNonDirtyState<FamilyType>();
dsh.replaceBuffer(nullptr, 0);
flushTask(commandStreamReceiver);
parseCommands<FamilyType>(commandStreamReceiver.commandStream, 0);
auto stateBaseAddressItor = find<typename FamilyType::STATE_BASE_ADDRESS *>(cmdList.begin(), cmdList.end());
EXPECT_NE(cmdList.end(), stateBaseAddressItor);
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenSshHeapChangeWhenFlushTaskIsCalledThenSbaIsReloaded) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
configureCSRtoNonDirtyState<FamilyType>();
ssh.replaceBuffer(nullptr, 0);
flushTask(commandStreamReceiver);
parseCommands<FamilyType>(commandStreamReceiver.commandStream, 0);
auto stateBaseAddressItor = find<typename FamilyType::STATE_BASE_ADDRESS *>(cmdList.begin(), cmdList.end());
EXPECT_NE(cmdList.end(), stateBaseAddressItor);
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenIohHeapChangeWhenFlushTaskIsCalledThenSbaIsReloaded) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
configureCSRtoNonDirtyState<FamilyType>();
ioh.replaceBuffer(nullptr, 0);
flushTask(commandStreamReceiver);
parseCommands<FamilyType>(commandStreamReceiver.commandStream, 0);
auto stateBaseAddressItor = find<typename FamilyType::STATE_BASE_ADDRESS *>(cmdList.begin(), cmdList.end());
EXPECT_NE(cmdList.end(), stateBaseAddressItor);
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenIshHeapChangeWhenFlushTaskIsCalledThenSbaIsReloaded) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
configureCSRtoNonDirtyState<FamilyType>();
ih.replaceBuffer(nullptr, 0);
flushTask(commandStreamReceiver);
parseCommands<FamilyType>(commandStreamReceiver.commandStream, 0);
auto stateBaseAddressItor = find<typename FamilyType::STATE_BASE_ADDRESS *>(cmdList.begin(), cmdList.end());
EXPECT_NE(cmdList.end(), stateBaseAddressItor);
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, stateBaseAddressShouldNotBeSentIfTheSame) { HWTEST_F(CommandStreamReceiverFlushTaskTests, stateBaseAddressShouldNotBeSentIfTheSame) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>(); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.isPreambleSent = true; commandStreamReceiver.isPreambleSent = true;

View File

@ -467,7 +467,7 @@ TEST_F(InternalsEventTest, resizeCmdQueueHeapsWhenKernelOparationHeapsAreBigger)
auto dsh = createFullHeap(requestedSize); auto dsh = createFullHeap(requestedSize);
auto ish = createFullHeap(requestedSize); auto ish = createFullHeap(requestedSize);
auto ioh = createFullHeap(requestedSize); auto ioh = createFullHeap(requestedSize);
auto ssh = createFullHeap(requestedSize); auto ssh = createFullHeap(maxSshSize);
using UniqueIH = std::unique_ptr<IndirectHeap>; using UniqueIH = std::unique_ptr<IndirectHeap>;
auto kernelOperation = new KernelOperation(std::unique_ptr<LinearStream>(cmdStream), UniqueIH(dsh), auto kernelOperation = new KernelOperation(std::unique_ptr<LinearStream>(cmdStream), UniqueIH(dsh),
@ -482,7 +482,7 @@ TEST_F(InternalsEventTest, resizeCmdQueueHeapsWhenKernelOparationHeapsAreBigger)
EXPECT_LT(cmdQueueDsh.getMaxAvailableSpace(), dsh->getMaxAvailableSpace()); EXPECT_LT(cmdQueueDsh.getMaxAvailableSpace(), dsh->getMaxAvailableSpace());
EXPECT_LT(cmdQueueIsh.getMaxAvailableSpace(), ish->getMaxAvailableSpace()); EXPECT_LT(cmdQueueIsh.getMaxAvailableSpace(), ish->getMaxAvailableSpace());
EXPECT_LT(cmdQueueIoh.getMaxAvailableSpace(), ioh->getMaxAvailableSpace()); EXPECT_LT(cmdQueueIoh.getMaxAvailableSpace(), ioh->getMaxAvailableSpace());
EXPECT_LT(cmdQueueSsh.getMaxAvailableSpace(), ssh->getMaxAvailableSpace()); EXPECT_EQ(cmdQueueSsh.getMaxAvailableSpace(), ssh->getMaxAvailableSpace());
cmdComputeKernel->submit(0, false); cmdComputeKernel->submit(0, false);