From 82a2afb611ff079214720c09275044e22c32e60c Mon Sep 17 00:00:00 2001 From: "Mrozek, Michal" Date: Tue, 13 Feb 2018 10:01:20 +0100 Subject: [PATCH] Improve DC flush handling. -Do not flush dc for every command in batched mode -Do that only in immediate mode -For commands that needs DC do not noop pipe controls -Ensure that each command buffer in batching mode ends with dc flush. Change-Id: I3cd9d1831c19b69c66092687922f20df7e330245 --- .../command_stream_receiver_hw.inl | 27 +++--- .../command_stream_receiver_hw_tests.cpp | 87 +++++++++++++++++++ 2 files changed, 103 insertions(+), 11 deletions(-) diff --git a/runtime/command_stream/command_stream_receiver_hw.inl b/runtime/command_stream/command_stream_receiver_hw.inl index b806e4300c..c8cfb4fbba 100644 --- a/runtime/command_stream/command_stream_receiver_hw.inl +++ b/runtime/command_stream/command_stream_receiver_hw.inl @@ -125,14 +125,13 @@ CompletionStamp CommandStreamReceiverHw::flushTask( //for ImmediateDispatch we will send this right away, therefore this pipe control will close the level //for BatchedSubmissions it will be nooped and only last ppc in batch will be emitted. levelClosed = true; + //if we guard with ppc, flush dc as well to speed up completion latency + if (dispatchFlags.guardCommandBufferWithPipeControl) { + dispatchFlags.dcFlush = true; + } } - //if we guard with ppc, flush dc as well to speed up completion latency - if (dispatchFlags.guardCommandBufferWithPipeControl) { - dispatchFlags.dcFlush = true; - } - - if (dispatchFlags.outOfOrderExecutionAllowed) { + if (dispatchFlags.outOfOrderExecutionAllowed && !dispatchFlags.dcFlush) { currentPipeControlForNooping = ptrOffset(commandStreamTask.getBase(), commandStreamTask.getUsed()); } @@ -383,6 +382,7 @@ inline void CommandStreamReceiverHw::flushBatchedSubmissions() { return; } typedef typename GfxFamily::MI_BATCH_BUFFER_START MI_BATCH_BUFFER_START; + typedef typename GfxFamily::PIPE_CONTROL PIPE_CONTROL; Device *device = this->getMemoryManager()->device; TakeOwnershipWrapper deviceOwnership(*device); EngineType engineType = device->getEngineType(); @@ -392,7 +392,7 @@ inline void CommandStreamReceiverHw::flushBatchedSubmissions() { ResidencyContainer surfacesForSubmit; ResourcePackage resourcePackage; auto pipeControlLocationSize = getRequiredPipeControlSize(); - void *currentPipeControlForNooping = nullptr; + void *currentPipeControl = nullptr; while (!commandBufferList.peekIsEmpty()) { size_t totalUsedSize = 0u; @@ -405,15 +405,15 @@ inline void CommandStreamReceiverHw::flushBatchedSubmissions() { FlushStampUpdateHelper flushStampUpdateHelper; flushStampUpdateHelper.insert(primaryCmdBuffer->flushStamp->getStampReference()); - currentPipeControlForNooping = primaryCmdBuffer->pipeControlLocation; + currentPipeControl = primaryCmdBuffer->pipeControlLocation; while (nextCommandBuffer && nextCommandBuffer->inspectionId == primaryCmdBuffer->inspectionId) { //noop pipe control - if (currentPipeControlForNooping) { - memset(currentPipeControlForNooping, 0, pipeControlLocationSize); + if (currentPipeControl) { + memset(currentPipeControl, 0, pipeControlLocationSize); } //obtain next candidate for nooping - currentPipeControlForNooping = nextCommandBuffer->pipeControlLocation; + currentPipeControl = nextCommandBuffer->pipeControlLocation; flushStampUpdateHelper.insert(nextCommandBuffer->flushStamp->getStampReference()); auto nextCommandBufferAddress = nextCommandBuffer->batchBuffer.commandBufferAllocation->getUnderlyingBuffer(); @@ -429,6 +429,11 @@ inline void CommandStreamReceiverHw::flushBatchedSubmissions() { surfacesForSubmit.push_back(surface); } + //make sure we flush DC + if (currentPipeControl) { + ((PIPE_CONTROL *)currentPipeControl)->setDcFlushEnable(true); + } + auto flushStamp = this->flush(primaryCmdBuffer->batchBuffer, engineType, &surfacesForSubmit); //after flush task level is closed diff --git a/unit_tests/command_stream/command_stream_receiver_hw_tests.cpp b/unit_tests/command_stream/command_stream_receiver_hw_tests.cpp index f941dfbac3..00c61a2cd4 100644 --- a/unit_tests/command_stream/command_stream_receiver_hw_tests.cpp +++ b/unit_tests/command_stream/command_stream_receiver_hw_tests.cpp @@ -2895,6 +2895,93 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenTwoTasks EXPECT_NE(nullptr, ppcAfterChange); } +HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenDcFlushIsNotRequiredThenItIsNotSet) { + CommandQueueHw commandQueue(nullptr, pDevice, 0); + auto &commandStream = commandQueue.getCS(4096u); + + auto mockCsr = new MockCsrHw2(*platformDevices[0]); + pDevice->resetCommandStreamReceiver(mockCsr); + + mockCsr->overrideDispatchPolicy(CommandStreamReceiver::DispatchMode::BatchedDispatch); + + DispatchFlags dispatchFlags; + dispatchFlags.guardCommandBufferWithPipeControl = true; + + mockCsr->flushTask(commandStream, + 0, + dsh, + ih, + ioh, + ssh, + taskLevel, + dispatchFlags); + + parseCommands(commandStream); + auto itorPipeControl = find(cmdList.begin(), cmdList.end()); + auto pipeControl = genCmdCast(*itorPipeControl); + EXPECT_FALSE(pipeControl->getDcFlushEnable()); +} + +HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenCommandAreSubmittedThenDcFlushIsAdded) { + CommandQueueHw commandQueue(nullptr, pDevice, 0); + auto &commandStream = commandQueue.getCS(4096u); + + auto mockCsr = new MockCsrHw2(*platformDevices[0]); + pDevice->resetCommandStreamReceiver(mockCsr); + + mockCsr->overrideDispatchPolicy(CommandStreamReceiver::DispatchMode::BatchedDispatch); + + DispatchFlags dispatchFlags; + dispatchFlags.guardCommandBufferWithPipeControl = true; + dispatchFlags.outOfOrderExecutionAllowed = true; + + mockCsr->flushTask(commandStream, + 0, + dsh, + ih, + ioh, + ssh, + taskLevel, + dispatchFlags); + + parseCommands(commandStream); + auto itorPipeControl = find(cmdList.begin(), cmdList.end()); + auto pipeControl = genCmdCast(*itorPipeControl); + + mockCsr->flushBatchedSubmissions(); + EXPECT_TRUE(pipeControl->getDcFlushEnable()); +} + +HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenDcFlushIsRequiredThenPipeControlIsNotRegistredForNooping) { + CommandQueueHw commandQueue(nullptr, pDevice, 0); + auto &commandStream = commandQueue.getCS(4096u); + + auto mockCsr = new MockCsrHw2(*platformDevices[0]); + pDevice->resetCommandStreamReceiver(mockCsr); + + mockCsr->overrideDispatchPolicy(CommandStreamReceiver::DispatchMode::BatchedDispatch); + + auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); + mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); + + DispatchFlags dispatchFlags; + dispatchFlags.dcFlush = true; + dispatchFlags.outOfOrderExecutionAllowed = true; + + mockCsr->flushTask(commandStream, + 0, + dsh, + ih, + ioh, + ssh, + taskLevel, + dispatchFlags); + + //validate if we recorded ppc positions + auto cmdBuffer = mockedSubmissionsAggregator->peekCommandBuffers().peekHead(); + EXPECT_EQ(nullptr, cmdBuffer->pipeControlLocation); +} + HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeAndOoqFlagSetToFalseWhenTwoTasksArePassedWithTheSameLevelThenThereIsPipeControlBetweenThemAfterFlush) { CommandQueueHw commandQueue(nullptr, pDevice, 0); auto &commandStream = commandQueue.getCS(4096u);