From 6d610983f16d342365d52b5f4edce1615b10aade Mon Sep 17 00:00:00 2001 From: "Dunajski, Bartosz" Date: Mon, 15 Oct 2018 10:35:45 +0200 Subject: [PATCH] Deferred Pipe Control programming and CSR flush on Barrier request Change-Id: Iabae0f9159bb455518cedf7da068c7d3da72b840 Signed-off-by: Dunajski, Bartosz --- runtime/command_queue/command_queue.cpp | 5 - runtime/command_queue/command_queue.h | 1 - runtime/command_queue/enqueue_common.h | 21 +-- .../command_stream/command_stream_receiver.h | 2 + .../command_stream_receiver_hw.inl | 12 +- runtime/command_stream/csr_definitions.h | 4 - runtime/helpers/task_information.cpp | 8 - runtime/helpers/task_information.h | 2 - unit_tests/helpers/timestamp_packet_tests.cpp | 147 ++++++++---------- .../libult/ult_command_stream_receiver.h | 1 + 10 files changed, 77 insertions(+), 126 deletions(-) diff --git a/runtime/command_queue/command_queue.cpp b/runtime/command_queue/command_queue.cpp index 8168463368..7f62785a97 100644 --- a/runtime/command_queue/command_queue.cpp +++ b/runtime/command_queue/command_queue.cpp @@ -586,9 +586,4 @@ void CommandQueue::obtainNewTimestampPacketNodes(size_t numberOfNodes, Timestamp timestampPacketContainer->add(allocator->getTag()); } } - -bool CommandQueue::allowTimestampPacketPipeControlWrite(uint32_t commandType, EventsRequest &eventsRequest) { - return this->timestampPacketContainer && - ((CL_COMMAND_MARKER == commandType && eventsRequest.outEvent && eventsRequest.numEventsInWaitList == 0) || (CL_COMMAND_BARRIER == commandType)); -} } // namespace OCLRT diff --git a/runtime/command_queue/command_queue.h b/runtime/command_queue/command_queue.h index ebb17daea0..9bc2a4ac5a 100644 --- a/runtime/command_queue/command_queue.h +++ b/runtime/command_queue/command_queue.h @@ -417,7 +417,6 @@ class CommandQueue : public BaseObject<_cl_command_queue> { AuxTranslationDirection auxTranslationDirection); void obtainNewTimestampPacketNodes(size_t numberOfNodes, TimestampPacketContainer &previousNodes); - bool allowTimestampPacketPipeControlWrite(uint32_t commandType, EventsRequest &eventsRequest); Context *context; Device *device; diff --git a/runtime/command_queue/enqueue_common.h b/runtime/command_queue/enqueue_common.h index 0cf3b75c1d..5fcf099ff5 100644 --- a/runtime/command_queue/enqueue_common.h +++ b/runtime/command_queue/enqueue_common.h @@ -198,7 +198,6 @@ void CommandQueueHw::enqueueHandler(Surface **surfacesForResidency, TimestampPacketContainer previousTimestampPacketNodes(device->getMemoryManager()); EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, event); - bool emitPipeControlWithTimestampWrite = allowTimestampPacketPipeControlWrite(commandType, eventsRequest); if (multiDispatchInfo.empty() == false) { HwPerfCounter *hwPerfCounter = nullptr; @@ -271,10 +270,9 @@ void CommandQueueHw::enqueueHandler(Surface **surfacesForResidency, slmUsed = multiDispatchInfo.usesSlm(); } else if (commandStreamReceiver.peekTimestampPacketWriteEnabled()) { - if (emitPipeControlWithTimestampWrite) { - obtainNewTimestampPacketNodes(1, previousTimestampPacketNodes); + if (CL_COMMAND_BARRIER == commandType) { + commandStreamReceiver.requestStallingPipeControlOnNextFlush(); } - if (eventBuilder.getEvent()) { // Event from non-kernel enqueue inherits TimestampPackets from waitlist and command queue eventBuilder.getEvent()->addTimestampPacketNodes(*timestampPacketContainer); @@ -330,7 +328,7 @@ void CommandQueueHw::enqueueHandler(Surface **surfacesForResidency, } } - auto submissionRequired = !isCommandWithoutKernel(commandType) || emitPipeControlWithTimestampWrite; + auto submissionRequired = !isCommandWithoutKernel(commandType); if (submissionRequired) { completionStamp = enqueueNonBlocked( @@ -502,7 +500,7 @@ CompletionStamp CommandQueueHw::enqueueNonBlocked( bool slmUsed, PrintfHandler *printfHandler) { - UNRECOVERABLE_IF(multiDispatchInfo.empty() && !timestampPacketContainer); + UNRECOVERABLE_IF(multiDispatchInfo.empty()); auto &commandStreamReceiver = device->getCommandStreamReceiver(); auto implicitFlush = false; @@ -567,9 +565,7 @@ CompletionStamp CommandQueueHw::enqueueNonBlocked( ioh = &getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0u); } - if (multiDispatchInfo.peekMainKernel()) { - commandStreamReceiver.requestThreadArbitrationPolicy(multiDispatchInfo.peekMainKernel()->getThreadArbitrationPolicy()); - } + commandStreamReceiver.requestThreadArbitrationPolicy(multiDispatchInfo.peekMainKernel()->getThreadArbitrationPolicy()); DispatchFlags dispatchFlags; dispatchFlags.blocking = blocking; @@ -586,9 +582,6 @@ CompletionStamp CommandQueueHw::enqueueNonBlocked( dispatchFlags.outOfOrderExecutionAllowed = !eventBuilder.getEvent() || commandStreamReceiver.isNTo1SubmissionModelEnabled(); if (commandStreamReceiver.peekTimestampPacketWriteEnabled()) { dispatchFlags.outOfDeviceDependencies = &eventsRequest; - if (multiDispatchInfo.empty()) { - dispatchFlags.timestampPacketForPipeControlWrite = timestampPacketContainer->peekNodes().at(0); - } } dispatchFlags.numGrfRequired = numGrfRequired; DEBUG_BREAK_IF(taskLevel >= Event::eventNotReady); @@ -661,10 +654,6 @@ void CommandQueueHw::enqueueBlocked( auto cmd = std::make_unique(*this, commandStreamReceiver, commandType, cmdSize); - if (allowTimestampPacketPipeControlWrite(commandType, eventsRequest)) { - cmd->setTimestampPacketsForPipeControlWrite(*timestampPacketContainer); - } - eventBuilder->getEvent()->setCommand(std::move(cmd)); } else { //store task data in event diff --git a/runtime/command_stream/command_stream_receiver.h b/runtime/command_stream/command_stream_receiver.h index 6980584860..5982a2926c 100644 --- a/runtime/command_stream/command_stream_receiver.h +++ b/runtime/command_stream/command_stream_receiver.h @@ -118,6 +118,7 @@ class CommandStreamReceiver { void cleanupResources(); void requestThreadArbitrationPolicy(uint32_t requiredPolicy) { this->requiredThreadArbitrationPolicy = requiredPolicy; } + void requestStallingPipeControlOnNextFlush() { stallingPipeControlOnNextFlushRequired = true; } virtual void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, OsContext &osContext) = 0; MOCKABLE_VIRTUAL bool waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait); @@ -186,6 +187,7 @@ class CommandStreamReceiver { LinearStream commandStream; + bool stallingPipeControlOnNextFlushRequired = false; uint32_t requiredThreadArbitrationPolicy = ThreadArbitrationPolicy::RoundRobin; uint32_t lastSentThreadArbitrationPolicy = ThreadArbitrationPolicy::NotPresent; diff --git a/runtime/command_stream/command_stream_receiver_hw.inl b/runtime/command_stream/command_stream_receiver_hw.inl index d3963530c8..aedb5a7db1 100644 --- a/runtime/command_stream/command_stream_receiver_hw.inl +++ b/runtime/command_stream/command_stream_receiver_hw.inl @@ -254,10 +254,11 @@ CompletionStamp CommandStreamReceiverHw::flushTask( if (dispatchFlags.outOfDeviceDependencies) { handleEventsTimestampPacketTags(commandStreamCSR, dispatchFlags, device); } - if (dispatchFlags.timestampPacketForPipeControlWrite) { - uint64_t address = dispatchFlags.timestampPacketForPipeControlWrite->tag->pickAddressForDataWrite(TimestampPacket::DataIndex::ContextEnd); - KernelCommandsHelper::programPipeControlDataWriteWithCsStall(commandStreamCSR, address, 0); - makeResident(*dispatchFlags.timestampPacketForPipeControlWrite->getGraphicsAllocation()); + if (stallingPipeControlOnNextFlushRequired) { + stallingPipeControlOnNextFlushRequired = false; + auto stallingPipeControlCmd = commandStream.getSpaceForCmd(); + *stallingPipeControlCmd = PIPE_CONTROL::sInit(); + stallingPipeControlCmd->setCommandStreamerStallEnable(true); } initPageTableManagerRegisters(commandStreamCSR); programPreemption(commandStreamCSR, device, dispatchFlags); @@ -650,6 +651,9 @@ size_t CommandStreamReceiverHw::getRequiredCmdStreamSize(const Dispat if (dispatchFlags.outOfDeviceDependencies) { size += dispatchFlags.outOfDeviceDependencies->numEventsInWaitList * sizeof(typename GfxFamily::MI_SEMAPHORE_WAIT); } + if (stallingPipeControlOnNextFlushRequired) { + size += sizeof(typename GfxFamily::PIPE_CONTROL); + } return size; } diff --git a/runtime/command_stream/csr_definitions.h b/runtime/command_stream/csr_definitions.h index 485643f386..ccb291db82 100644 --- a/runtime/command_stream/csr_definitions.h +++ b/runtime/command_stream/csr_definitions.h @@ -14,9 +14,6 @@ namespace OCLRT { struct FlushStampTrackingObj; -class TimestampPacket; -template -struct TagNode; namespace CSRequirements { //cleanup section usually contains 1-2 pipeControls BB end and place for BB start @@ -44,7 +41,6 @@ struct DispatchFlags { QueueThrottle throttle = QueueThrottle::MEDIUM; bool implicitFlush = false; bool outOfOrderExecutionAllowed = false; - TagNode *timestampPacketForPipeControlWrite = nullptr; FlushStampTrackingObj *flushStampReference = nullptr; PreemptionMode preemptionMode = PreemptionMode::Disabled; EventsRequest *outOfDeviceDependencies = nullptr; diff --git a/runtime/helpers/task_information.cpp b/runtime/helpers/task_information.cpp index ce0341c3a9..9e8397d941 100644 --- a/runtime/helpers/task_information.cpp +++ b/runtime/helpers/task_information.cpp @@ -252,9 +252,6 @@ CompletionStamp &CommandMarker::submit(uint32_t taskLevel, bool terminated) { dispatchFlags.lowPriority = cmdQ.getPriority() == QueuePriority::LOW; dispatchFlags.throttle = cmdQ.getThrottle(); dispatchFlags.preemptionMode = PreemptionHelper::taskPreemptionMode(cmdQ.getDevice(), nullptr); - if (timestampPacketsForPipeControlWrite) { - dispatchFlags.timestampPacketForPipeControlWrite = timestampPacketsForPipeControlWrite->peekNodes().at(0); - } DEBUG_BREAK_IF(taskLevel >= Event::eventNotReady); @@ -273,9 +270,4 @@ CompletionStamp &CommandMarker::submit(uint32_t taskLevel, bool terminated) { return completionStamp; } - -void CommandMarker::setTimestampPacketsForPipeControlWrite(TimestampPacketContainer &inputNodes) { - timestampPacketsForPipeControlWrite = std::make_unique(cmdQ.getDevice().getMemoryManager()); - timestampPacketsForPipeControlWrite->assignAndIncrementNodesRefCounts(inputNodes); -} } // namespace OCLRT diff --git a/runtime/helpers/task_information.h b/runtime/helpers/task_information.h index b34e3230f1..98864740cb 100644 --- a/runtime/helpers/task_information.h +++ b/runtime/helpers/task_information.h @@ -120,11 +120,9 @@ class CommandMarker : public Command { : cmdQ(cmdQ), csr(csr), clCommandType(clCommandType), commandSize(commandSize) { } - void setTimestampPacketsForPipeControlWrite(TimestampPacketContainer &inputNodes); CompletionStamp &submit(uint32_t taskLevel, bool terminated) override; private: - std::unique_ptr timestampPacketsForPipeControlWrite; CommandQueue &cmdQ; CommandStreamReceiver &csr; uint32_t clCommandType; diff --git a/unit_tests/helpers/timestamp_packet_tests.cpp b/unit_tests/helpers/timestamp_packet_tests.cpp index 370dc51864..de9b842985 100644 --- a/unit_tests/helpers/timestamp_packet_tests.cpp +++ b/unit_tests/helpers/timestamp_packet_tests.cpp @@ -830,60 +830,22 @@ HWTEST_F(TimestampPacketTests, givenWaitlistAndOutputEventWhenEnqueueingWithoutK clReleaseEvent(clOutEvent); } -HWTEST_F(TimestampPacketTests, givenEmptyWaitlistAndOutputEventWhenEnqueueingMarkerThenObtainNewPacketAndEmitPipeControlWithWrite) { - using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; - device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; - - MockCommandQueueHw cmdQ(context.get(), device.get(), nullptr); - - MockKernelWithInternals mockKernel(*device, context.get()); - cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); // obtain first TimestmapPacket - - TimestampPacketContainer cmdQNodes(device->getMemoryManager()); - cmdQNodes.assignAndIncrementNodesRefCounts(*cmdQ.timestampPacketContainer); - - cl_event clOutEvent; - cmdQ.enqueueMarkerWithWaitList(0, nullptr, &clOutEvent); - - EXPECT_NE(cmdQ.timestampPacketContainer->peekNodes().at(0), cmdQNodes.peekNodes().at(0)); // new node obtained - EXPECT_EQ(1u, cmdQ.timestampPacketContainer->peekNodes().size()); - - HardwareParse hwParser; - hwParser.parseCommands(device->getUltCommandStreamReceiver().commandStream, 0); - - bool pipeControlFound = false; - uint64_t expectedAddress = cmdQ.timestampPacketContainer->peekNodes().at(0)->tag->pickAddressForDataWrite(TimestampPacket::DataIndex::ContextEnd); - uint32_t expectedAddressLow = static_cast(expectedAddress & 0x0000FFFFFFFFULL); - uint32_t expectedAddressHigh = static_cast(expectedAddress >> 32); - for (auto it = hwParser.cmdList.begin(); it != hwParser.cmdList.end(); it++) { - auto pipeControl = genCmdCast(*it); - if (pipeControl && - pipeControl->getAddress() == expectedAddressLow && - pipeControl->getAddressHigh() == expectedAddressHigh && - pipeControl->getImmediateData() == 0) { - pipeControlFound = true; - break; - } - } - EXPECT_TRUE(pipeControlFound); - - clReleaseEvent(clOutEvent); -} - HWTEST_F(TimestampPacketTests, givenEmptyWaitlistAndNoOutputEventWhenEnqueueingMarkerThenDoNothing) { - device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; + auto &csr = device->getUltCommandStreamReceiver(); + csr.timestampPacketWriteEnabled = true; MockCommandQueueHw cmdQ(context.get(), device.get(), nullptr); cmdQ.enqueueMarkerWithWaitList(0, nullptr, nullptr); EXPECT_EQ(0u, cmdQ.timestampPacketContainer->peekNodes().size()); + EXPECT_FALSE(csr.stallingPipeControlOnNextFlushRequired); } -HWTEST_F(TimestampPacketTests, whenEnqueueingBarrierThenObtainNewPacketAndEmitPipeControlWithDataWrite) { - using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; +HWTEST_F(TimestampPacketTests, whenEnqueueingBarrierThenRequestPipeControlOnCsrFlush) { auto &csr = device->getUltCommandStreamReceiver(); csr.timestampPacketWriteEnabled = true; - csr.storeMakeResidentAllocations = true; + + EXPECT_FALSE(csr.stallingPipeControlOnNextFlushRequired); MockCommandQueueHw cmdQ(context.get(), device.get(), nullptr); @@ -895,62 +857,75 @@ HWTEST_F(TimestampPacketTests, whenEnqueueingBarrierThenObtainNewPacketAndEmitPi cmdQ.enqueueBarrierWithWaitList(0, nullptr, nullptr); - EXPECT_NE(cmdQ.timestampPacketContainer->peekNodes().at(0), cmdQNodes.peekNodes().at(0)); // new node obtained + EXPECT_EQ(cmdQ.timestampPacketContainer->peekNodes().at(0), cmdQNodes.peekNodes().at(0)); // dont obtain new node EXPECT_EQ(1u, cmdQ.timestampPacketContainer->peekNodes().size()); - EXPECT_TRUE(csr.isMadeResident(cmdQ.timestampPacketContainer->peekNodes().at(0)->getGraphicsAllocation())); - - HardwareParse hwParser; - hwParser.parseCommands(csr.commandStream, 0); - - bool pipeControlFound = false; - uint64_t expectedAddress = cmdQ.timestampPacketContainer->peekNodes().at(0)->tag->pickAddressForDataWrite(TimestampPacket::DataIndex::ContextEnd); - uint32_t expectedAddressLow = static_cast(expectedAddress & 0x0000FFFFFFFFULL); - uint32_t expectedAddressHigh = static_cast(expectedAddress >> 32); - for (auto it = hwParser.cmdList.begin(); it != hwParser.cmdList.end(); it++) { - auto pipeControl = genCmdCast(*it); - if (pipeControl && - pipeControl->getAddress() == expectedAddressLow && - pipeControl->getAddressHigh() == expectedAddressHigh && - pipeControl->getImmediateData() == 0) { - pipeControlFound = true; - break; - } - } - EXPECT_TRUE(pipeControlFound); + EXPECT_TRUE(csr.stallingPipeControlOnNextFlushRequired); } -HWTEST_F(TimestampPacketTests, givenBlockedQueueWhenEnqueueingBarrierThenObtainNewPacketAndEmitPipeControlWithWrite) { - using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; +HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteDisabledWhenEnqueueingBarrierThenDontRequestPipeControlOnCsrFlush) { + auto &csr = device->getUltCommandStreamReceiver(); + csr.timestampPacketWriteEnabled = false; + + EXPECT_FALSE(csr.stallingPipeControlOnNextFlushRequired); + + MockCommandQueueHw cmdQ(context.get(), device.get(), nullptr); + + cmdQ.enqueueBarrierWithWaitList(0, nullptr, nullptr); + + EXPECT_FALSE(csr.stallingPipeControlOnNextFlushRequired); +} + +HWTEST_F(TimestampPacketTests, givenBlockedQueueWhenEnqueueingBarrierThenRequestPipeControlOnCsrFlush) { auto &csr = device->getUltCommandStreamReceiver(); csr.timestampPacketWriteEnabled = true; - csr.storeMakeResidentAllocations = true; + EXPECT_FALSE(csr.stallingPipeControlOnNextFlushRequired); MockCommandQueueHw cmdQ(context.get(), device.get(), nullptr); UserEvent userEvent; cl_event waitlist[] = {&userEvent}; cmdQ.enqueueBarrierWithWaitList(1, waitlist, nullptr); + EXPECT_TRUE(csr.stallingPipeControlOnNextFlushRequired); +} - userEvent.setStatus(CL_COMPLETE); - EXPECT_TRUE(csr.isMadeResident(cmdQ.timestampPacketContainer->peekNodes().at(0)->getGraphicsAllocation())); +HWTEST_F(TimestampPacketTests, givenPipeControlRequestWhenEstimatingCsrStreamSizeThenAddSizeForPipeControl) { + auto &csr = device->getUltCommandStreamReceiver(); + DispatchFlags flags; + + csr.stallingPipeControlOnNextFlushRequired = false; + auto sizeWithoutPcRequest = device->getUltCommandStreamReceiver().getRequiredCmdStreamSize(flags, *device.get()); + + csr.stallingPipeControlOnNextFlushRequired = true; + auto sizeWithPcRequest = device->getUltCommandStreamReceiver().getRequiredCmdStreamSize(flags, *device.get()); + + size_t extendedSize = sizeWithoutPcRequest + sizeof(typename FamilyType::PIPE_CONTROL); + + EXPECT_EQ(sizeWithPcRequest, extendedSize); +} + +HWTEST_F(TimestampPacketTests, givenPipeControlRequestWhenFlushingThenProgramPipeControlAndResetRequestFlag) { + using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; + auto &csr = device->getUltCommandStreamReceiver(); + csr.stallingPipeControlOnNextFlushRequired = true; + csr.timestampPacketWriteEnabled = true; + + MockCommandQueueHw cmdQ(context.get(), device.get(), nullptr); + + MockKernelWithInternals mockKernel(*device, context.get()); + cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); + + EXPECT_FALSE(csr.stallingPipeControlOnNextFlushRequired); HardwareParse hwParser; - hwParser.parseCommands(device->getUltCommandStreamReceiver().commandStream, 0); + hwParser.parseCommands(csr.commandStream, 0); + auto secondEnqueueOffset = csr.commandStream.getUsed(); - bool pipeControlFound = false; - uint64_t expectedAddress = cmdQ.timestampPacketContainer->peekNodes().at(0)->tag->pickAddressForDataWrite(TimestampPacket::DataIndex::ContextEnd); - uint32_t expectedAddressLow = static_cast(expectedAddress & 0x0000FFFFFFFFULL); - uint32_t expectedAddressHigh = static_cast(expectedAddress >> 32); - for (auto it = hwParser.cmdList.begin(); it != hwParser.cmdList.end(); it++) { - auto pipeControl = genCmdCast(*it); - if (pipeControl && - pipeControl->getAddress() == expectedAddressLow && - pipeControl->getAddressHigh() == expectedAddressHigh && - pipeControl->getImmediateData() == 0) { - pipeControlFound = true; - break; - } - } - EXPECT_TRUE(pipeControlFound); + auto pipeControl = genCmdCast(*hwParser.cmdList.begin()); + EXPECT_NE(nullptr, pipeControl); + EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_NO_WRITE, pipeControl->getPostSyncOperation()); + EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); + + cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); + EXPECT_EQ(secondEnqueueOffset, csr.commandStream.getUsed()); // nothing programmed when flag is not set } diff --git a/unit_tests/libult/ult_command_stream_receiver.h b/unit_tests/libult/ult_command_stream_receiver.h index bebe33d8cb..ddf8ea2c5e 100644 --- a/unit_tests/libult/ult_command_stream_receiver.h +++ b/unit_tests/libult/ult_command_stream_receiver.h @@ -47,6 +47,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw { using BaseClass::CommandStreamReceiver::requiredScratchSize; using BaseClass::CommandStreamReceiver::requiredThreadArbitrationPolicy; using BaseClass::CommandStreamReceiver::scratchAllocation; + using BaseClass::CommandStreamReceiver::stallingPipeControlOnNextFlushRequired; using BaseClass::CommandStreamReceiver::submissionAggregator; using BaseClass::CommandStreamReceiver::taskCount; using BaseClass::CommandStreamReceiver::taskLevel;