Deferred Pipe Control programming and CSR flush on Barrier request

Change-Id: Iabae0f9159bb455518cedf7da068c7d3da72b840
Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
Dunajski, Bartosz
2018-10-15 10:35:45 +02:00
committed by sys_ocldev
parent 7a476b7bc1
commit 6d610983f1
10 changed files with 77 additions and 126 deletions

View File

@ -586,9 +586,4 @@ void CommandQueue::obtainNewTimestampPacketNodes(size_t numberOfNodes, Timestamp
timestampPacketContainer->add(allocator->getTag());
}
}
bool CommandQueue::allowTimestampPacketPipeControlWrite(uint32_t commandType, EventsRequest &eventsRequest) {
return this->timestampPacketContainer &&
((CL_COMMAND_MARKER == commandType && eventsRequest.outEvent && eventsRequest.numEventsInWaitList == 0) || (CL_COMMAND_BARRIER == commandType));
}
} // namespace OCLRT

View File

@ -417,7 +417,6 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
AuxTranslationDirection auxTranslationDirection);
void obtainNewTimestampPacketNodes(size_t numberOfNodes, TimestampPacketContainer &previousNodes);
bool allowTimestampPacketPipeControlWrite(uint32_t commandType, EventsRequest &eventsRequest);
Context *context;
Device *device;

View File

@ -198,7 +198,6 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
TimestampPacketContainer previousTimestampPacketNodes(device->getMemoryManager());
EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, event);
bool emitPipeControlWithTimestampWrite = allowTimestampPacketPipeControlWrite(commandType, eventsRequest);
if (multiDispatchInfo.empty() == false) {
HwPerfCounter *hwPerfCounter = nullptr;
@ -271,10 +270,9 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
slmUsed = multiDispatchInfo.usesSlm();
} else if (commandStreamReceiver.peekTimestampPacketWriteEnabled()) {
if (emitPipeControlWithTimestampWrite) {
obtainNewTimestampPacketNodes(1, previousTimestampPacketNodes);
if (CL_COMMAND_BARRIER == commandType) {
commandStreamReceiver.requestStallingPipeControlOnNextFlush();
}
if (eventBuilder.getEvent()) {
// Event from non-kernel enqueue inherits TimestampPackets from waitlist and command queue
eventBuilder.getEvent()->addTimestampPacketNodes(*timestampPacketContainer);
@ -330,7 +328,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
}
}
auto submissionRequired = !isCommandWithoutKernel(commandType) || emitPipeControlWithTimestampWrite;
auto submissionRequired = !isCommandWithoutKernel(commandType);
if (submissionRequired) {
completionStamp = enqueueNonBlocked<commandType>(
@ -502,7 +500,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
bool slmUsed,
PrintfHandler *printfHandler) {
UNRECOVERABLE_IF(multiDispatchInfo.empty() && !timestampPacketContainer);
UNRECOVERABLE_IF(multiDispatchInfo.empty());
auto &commandStreamReceiver = device->getCommandStreamReceiver();
auto implicitFlush = false;
@ -567,9 +565,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
ioh = &getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0u);
}
if (multiDispatchInfo.peekMainKernel()) {
commandStreamReceiver.requestThreadArbitrationPolicy(multiDispatchInfo.peekMainKernel()->getThreadArbitrationPolicy<GfxFamily>());
}
commandStreamReceiver.requestThreadArbitrationPolicy(multiDispatchInfo.peekMainKernel()->getThreadArbitrationPolicy<GfxFamily>());
DispatchFlags dispatchFlags;
dispatchFlags.blocking = blocking;
@ -586,9 +582,6 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
dispatchFlags.outOfOrderExecutionAllowed = !eventBuilder.getEvent() || commandStreamReceiver.isNTo1SubmissionModelEnabled();
if (commandStreamReceiver.peekTimestampPacketWriteEnabled()) {
dispatchFlags.outOfDeviceDependencies = &eventsRequest;
if (multiDispatchInfo.empty()) {
dispatchFlags.timestampPacketForPipeControlWrite = timestampPacketContainer->peekNodes().at(0);
}
}
dispatchFlags.numGrfRequired = numGrfRequired;
DEBUG_BREAK_IF(taskLevel >= Event::eventNotReady);
@ -661,10 +654,6 @@ void CommandQueueHw<GfxFamily>::enqueueBlocked(
auto cmd = std::make_unique<CommandMarker>(*this, commandStreamReceiver, commandType, cmdSize);
if (allowTimestampPacketPipeControlWrite(commandType, eventsRequest)) {
cmd->setTimestampPacketsForPipeControlWrite(*timestampPacketContainer);
}
eventBuilder->getEvent()->setCommand(std::move(cmd));
} else {
//store task data in event

View File

@ -118,6 +118,7 @@ class CommandStreamReceiver {
void cleanupResources();
void requestThreadArbitrationPolicy(uint32_t requiredPolicy) { this->requiredThreadArbitrationPolicy = requiredPolicy; }
void requestStallingPipeControlOnNextFlush() { stallingPipeControlOnNextFlushRequired = true; }
virtual void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, OsContext &osContext) = 0;
MOCKABLE_VIRTUAL bool waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait);
@ -186,6 +187,7 @@ class CommandStreamReceiver {
LinearStream commandStream;
bool stallingPipeControlOnNextFlushRequired = false;
uint32_t requiredThreadArbitrationPolicy = ThreadArbitrationPolicy::RoundRobin;
uint32_t lastSentThreadArbitrationPolicy = ThreadArbitrationPolicy::NotPresent;

View File

@ -254,10 +254,11 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
if (dispatchFlags.outOfDeviceDependencies) {
handleEventsTimestampPacketTags(commandStreamCSR, dispatchFlags, device);
}
if (dispatchFlags.timestampPacketForPipeControlWrite) {
uint64_t address = dispatchFlags.timestampPacketForPipeControlWrite->tag->pickAddressForDataWrite(TimestampPacket::DataIndex::ContextEnd);
KernelCommandsHelper<GfxFamily>::programPipeControlDataWriteWithCsStall(commandStreamCSR, address, 0);
makeResident(*dispatchFlags.timestampPacketForPipeControlWrite->getGraphicsAllocation());
if (stallingPipeControlOnNextFlushRequired) {
stallingPipeControlOnNextFlushRequired = false;
auto stallingPipeControlCmd = commandStream.getSpaceForCmd<PIPE_CONTROL>();
*stallingPipeControlCmd = PIPE_CONTROL::sInit();
stallingPipeControlCmd->setCommandStreamerStallEnable(true);
}
initPageTableManagerRegisters(commandStreamCSR);
programPreemption(commandStreamCSR, device, dispatchFlags);
@ -650,6 +651,9 @@ size_t CommandStreamReceiverHw<GfxFamily>::getRequiredCmdStreamSize(const Dispat
if (dispatchFlags.outOfDeviceDependencies) {
size += dispatchFlags.outOfDeviceDependencies->numEventsInWaitList * sizeof(typename GfxFamily::MI_SEMAPHORE_WAIT);
}
if (stallingPipeControlOnNextFlushRequired) {
size += sizeof(typename GfxFamily::PIPE_CONTROL);
}
return size;
}

View File

@ -14,9 +14,6 @@
namespace OCLRT {
struct FlushStampTrackingObj;
class TimestampPacket;
template <typename TagType>
struct TagNode;
namespace CSRequirements {
//cleanup section usually contains 1-2 pipeControls BB end and place for BB start
@ -44,7 +41,6 @@ struct DispatchFlags {
QueueThrottle throttle = QueueThrottle::MEDIUM;
bool implicitFlush = false;
bool outOfOrderExecutionAllowed = false;
TagNode<TimestampPacket> *timestampPacketForPipeControlWrite = nullptr;
FlushStampTrackingObj *flushStampReference = nullptr;
PreemptionMode preemptionMode = PreemptionMode::Disabled;
EventsRequest *outOfDeviceDependencies = nullptr;

View File

@ -252,9 +252,6 @@ CompletionStamp &CommandMarker::submit(uint32_t taskLevel, bool terminated) {
dispatchFlags.lowPriority = cmdQ.getPriority() == QueuePriority::LOW;
dispatchFlags.throttle = cmdQ.getThrottle();
dispatchFlags.preemptionMode = PreemptionHelper::taskPreemptionMode(cmdQ.getDevice(), nullptr);
if (timestampPacketsForPipeControlWrite) {
dispatchFlags.timestampPacketForPipeControlWrite = timestampPacketsForPipeControlWrite->peekNodes().at(0);
}
DEBUG_BREAK_IF(taskLevel >= Event::eventNotReady);
@ -273,9 +270,4 @@ CompletionStamp &CommandMarker::submit(uint32_t taskLevel, bool terminated) {
return completionStamp;
}
void CommandMarker::setTimestampPacketsForPipeControlWrite(TimestampPacketContainer &inputNodes) {
timestampPacketsForPipeControlWrite = std::make_unique<TimestampPacketContainer>(cmdQ.getDevice().getMemoryManager());
timestampPacketsForPipeControlWrite->assignAndIncrementNodesRefCounts(inputNodes);
}
} // namespace OCLRT

View File

@ -120,11 +120,9 @@ class CommandMarker : public Command {
: cmdQ(cmdQ), csr(csr), clCommandType(clCommandType), commandSize(commandSize) {
}
void setTimestampPacketsForPipeControlWrite(TimestampPacketContainer &inputNodes);
CompletionStamp &submit(uint32_t taskLevel, bool terminated) override;
private:
std::unique_ptr<TimestampPacketContainer> timestampPacketsForPipeControlWrite;
CommandQueue &cmdQ;
CommandStreamReceiver &csr;
uint32_t clCommandType;

View File

@ -830,60 +830,22 @@ HWTEST_F(TimestampPacketTests, givenWaitlistAndOutputEventWhenEnqueueingWithoutK
clReleaseEvent(clOutEvent);
}
HWTEST_F(TimestampPacketTests, givenEmptyWaitlistAndOutputEventWhenEnqueueingMarkerThenObtainNewPacketAndEmitPipeControlWithWrite) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
device->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = true;
MockCommandQueueHw<FamilyType> cmdQ(context.get(), device.get(), nullptr);
MockKernelWithInternals mockKernel(*device, context.get());
cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); // obtain first TimestmapPacket
TimestampPacketContainer cmdQNodes(device->getMemoryManager());
cmdQNodes.assignAndIncrementNodesRefCounts(*cmdQ.timestampPacketContainer);
cl_event clOutEvent;
cmdQ.enqueueMarkerWithWaitList(0, nullptr, &clOutEvent);
EXPECT_NE(cmdQ.timestampPacketContainer->peekNodes().at(0), cmdQNodes.peekNodes().at(0)); // new node obtained
EXPECT_EQ(1u, cmdQ.timestampPacketContainer->peekNodes().size());
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(device->getUltCommandStreamReceiver<FamilyType>().commandStream, 0);
bool pipeControlFound = false;
uint64_t expectedAddress = cmdQ.timestampPacketContainer->peekNodes().at(0)->tag->pickAddressForDataWrite(TimestampPacket::DataIndex::ContextEnd);
uint32_t expectedAddressLow = static_cast<uint32_t>(expectedAddress & 0x0000FFFFFFFFULL);
uint32_t expectedAddressHigh = static_cast<uint32_t>(expectedAddress >> 32);
for (auto it = hwParser.cmdList.begin(); it != hwParser.cmdList.end(); it++) {
auto pipeControl = genCmdCast<PIPE_CONTROL *>(*it);
if (pipeControl &&
pipeControl->getAddress() == expectedAddressLow &&
pipeControl->getAddressHigh() == expectedAddressHigh &&
pipeControl->getImmediateData() == 0) {
pipeControlFound = true;
break;
}
}
EXPECT_TRUE(pipeControlFound);
clReleaseEvent(clOutEvent);
}
HWTEST_F(TimestampPacketTests, givenEmptyWaitlistAndNoOutputEventWhenEnqueueingMarkerThenDoNothing) {
device->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = true;
auto &csr = device->getUltCommandStreamReceiver<FamilyType>();
csr.timestampPacketWriteEnabled = true;
MockCommandQueueHw<FamilyType> cmdQ(context.get(), device.get(), nullptr);
cmdQ.enqueueMarkerWithWaitList(0, nullptr, nullptr);
EXPECT_EQ(0u, cmdQ.timestampPacketContainer->peekNodes().size());
EXPECT_FALSE(csr.stallingPipeControlOnNextFlushRequired);
}
HWTEST_F(TimestampPacketTests, whenEnqueueingBarrierThenObtainNewPacketAndEmitPipeControlWithDataWrite) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
HWTEST_F(TimestampPacketTests, whenEnqueueingBarrierThenRequestPipeControlOnCsrFlush) {
auto &csr = device->getUltCommandStreamReceiver<FamilyType>();
csr.timestampPacketWriteEnabled = true;
csr.storeMakeResidentAllocations = true;
EXPECT_FALSE(csr.stallingPipeControlOnNextFlushRequired);
MockCommandQueueHw<FamilyType> cmdQ(context.get(), device.get(), nullptr);
@ -895,62 +857,75 @@ HWTEST_F(TimestampPacketTests, whenEnqueueingBarrierThenObtainNewPacketAndEmitPi
cmdQ.enqueueBarrierWithWaitList(0, nullptr, nullptr);
EXPECT_NE(cmdQ.timestampPacketContainer->peekNodes().at(0), cmdQNodes.peekNodes().at(0)); // new node obtained
EXPECT_EQ(cmdQ.timestampPacketContainer->peekNodes().at(0), cmdQNodes.peekNodes().at(0)); // dont obtain new node
EXPECT_EQ(1u, cmdQ.timestampPacketContainer->peekNodes().size());
EXPECT_TRUE(csr.isMadeResident(cmdQ.timestampPacketContainer->peekNodes().at(0)->getGraphicsAllocation()));
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(csr.commandStream, 0);
bool pipeControlFound = false;
uint64_t expectedAddress = cmdQ.timestampPacketContainer->peekNodes().at(0)->tag->pickAddressForDataWrite(TimestampPacket::DataIndex::ContextEnd);
uint32_t expectedAddressLow = static_cast<uint32_t>(expectedAddress & 0x0000FFFFFFFFULL);
uint32_t expectedAddressHigh = static_cast<uint32_t>(expectedAddress >> 32);
for (auto it = hwParser.cmdList.begin(); it != hwParser.cmdList.end(); it++) {
auto pipeControl = genCmdCast<PIPE_CONTROL *>(*it);
if (pipeControl &&
pipeControl->getAddress() == expectedAddressLow &&
pipeControl->getAddressHigh() == expectedAddressHigh &&
pipeControl->getImmediateData() == 0) {
pipeControlFound = true;
break;
}
}
EXPECT_TRUE(pipeControlFound);
EXPECT_TRUE(csr.stallingPipeControlOnNextFlushRequired);
}
HWTEST_F(TimestampPacketTests, givenBlockedQueueWhenEnqueueingBarrierThenObtainNewPacketAndEmitPipeControlWithWrite) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteDisabledWhenEnqueueingBarrierThenDontRequestPipeControlOnCsrFlush) {
auto &csr = device->getUltCommandStreamReceiver<FamilyType>();
csr.timestampPacketWriteEnabled = false;
EXPECT_FALSE(csr.stallingPipeControlOnNextFlushRequired);
MockCommandQueueHw<FamilyType> cmdQ(context.get(), device.get(), nullptr);
cmdQ.enqueueBarrierWithWaitList(0, nullptr, nullptr);
EXPECT_FALSE(csr.stallingPipeControlOnNextFlushRequired);
}
HWTEST_F(TimestampPacketTests, givenBlockedQueueWhenEnqueueingBarrierThenRequestPipeControlOnCsrFlush) {
auto &csr = device->getUltCommandStreamReceiver<FamilyType>();
csr.timestampPacketWriteEnabled = true;
csr.storeMakeResidentAllocations = true;
EXPECT_FALSE(csr.stallingPipeControlOnNextFlushRequired);
MockCommandQueueHw<FamilyType> cmdQ(context.get(), device.get(), nullptr);
UserEvent userEvent;
cl_event waitlist[] = {&userEvent};
cmdQ.enqueueBarrierWithWaitList(1, waitlist, nullptr);
EXPECT_TRUE(csr.stallingPipeControlOnNextFlushRequired);
}
userEvent.setStatus(CL_COMPLETE);
EXPECT_TRUE(csr.isMadeResident(cmdQ.timestampPacketContainer->peekNodes().at(0)->getGraphicsAllocation()));
HWTEST_F(TimestampPacketTests, givenPipeControlRequestWhenEstimatingCsrStreamSizeThenAddSizeForPipeControl) {
auto &csr = device->getUltCommandStreamReceiver<FamilyType>();
DispatchFlags flags;
csr.stallingPipeControlOnNextFlushRequired = false;
auto sizeWithoutPcRequest = device->getUltCommandStreamReceiver<FamilyType>().getRequiredCmdStreamSize(flags, *device.get());
csr.stallingPipeControlOnNextFlushRequired = true;
auto sizeWithPcRequest = device->getUltCommandStreamReceiver<FamilyType>().getRequiredCmdStreamSize(flags, *device.get());
size_t extendedSize = sizeWithoutPcRequest + sizeof(typename FamilyType::PIPE_CONTROL);
EXPECT_EQ(sizeWithPcRequest, extendedSize);
}
HWTEST_F(TimestampPacketTests, givenPipeControlRequestWhenFlushingThenProgramPipeControlAndResetRequestFlag) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
auto &csr = device->getUltCommandStreamReceiver<FamilyType>();
csr.stallingPipeControlOnNextFlushRequired = true;
csr.timestampPacketWriteEnabled = true;
MockCommandQueueHw<FamilyType> cmdQ(context.get(), device.get(), nullptr);
MockKernelWithInternals mockKernel(*device, context.get());
cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
EXPECT_FALSE(csr.stallingPipeControlOnNextFlushRequired);
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(device->getUltCommandStreamReceiver<FamilyType>().commandStream, 0);
hwParser.parseCommands<FamilyType>(csr.commandStream, 0);
auto secondEnqueueOffset = csr.commandStream.getUsed();
bool pipeControlFound = false;
uint64_t expectedAddress = cmdQ.timestampPacketContainer->peekNodes().at(0)->tag->pickAddressForDataWrite(TimestampPacket::DataIndex::ContextEnd);
uint32_t expectedAddressLow = static_cast<uint32_t>(expectedAddress & 0x0000FFFFFFFFULL);
uint32_t expectedAddressHigh = static_cast<uint32_t>(expectedAddress >> 32);
for (auto it = hwParser.cmdList.begin(); it != hwParser.cmdList.end(); it++) {
auto pipeControl = genCmdCast<PIPE_CONTROL *>(*it);
if (pipeControl &&
pipeControl->getAddress() == expectedAddressLow &&
pipeControl->getAddressHigh() == expectedAddressHigh &&
pipeControl->getImmediateData() == 0) {
pipeControlFound = true;
break;
}
}
EXPECT_TRUE(pipeControlFound);
auto pipeControl = genCmdCast<typename FamilyType::PIPE_CONTROL *>(*hwParser.cmdList.begin());
EXPECT_NE(nullptr, pipeControl);
EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_NO_WRITE, pipeControl->getPostSyncOperation());
EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable());
cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(secondEnqueueOffset, csr.commandStream.getUsed()); // nothing programmed when flag is not set
}

View File

@ -47,6 +47,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily> {
using BaseClass::CommandStreamReceiver::requiredScratchSize;
using BaseClass::CommandStreamReceiver::requiredThreadArbitrationPolicy;
using BaseClass::CommandStreamReceiver::scratchAllocation;
using BaseClass::CommandStreamReceiver::stallingPipeControlOnNextFlushRequired;
using BaseClass::CommandStreamReceiver::submissionAggregator;
using BaseClass::CommandStreamReceiver::taskCount;
using BaseClass::CommandStreamReceiver::taskLevel;