mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-03 14:55:24 +08:00
Deferred Pipe Control programming and CSR flush on Barrier request
Change-Id: Iabae0f9159bb455518cedf7da068c7d3da72b840 Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
committed by
sys_ocldev
parent
7a476b7bc1
commit
6d610983f1
@@ -586,9 +586,4 @@ void CommandQueue::obtainNewTimestampPacketNodes(size_t numberOfNodes, Timestamp
|
||||
timestampPacketContainer->add(allocator->getTag());
|
||||
}
|
||||
}
|
||||
|
||||
bool CommandQueue::allowTimestampPacketPipeControlWrite(uint32_t commandType, EventsRequest &eventsRequest) {
|
||||
return this->timestampPacketContainer &&
|
||||
((CL_COMMAND_MARKER == commandType && eventsRequest.outEvent && eventsRequest.numEventsInWaitList == 0) || (CL_COMMAND_BARRIER == commandType));
|
||||
}
|
||||
} // namespace OCLRT
|
||||
|
||||
@@ -417,7 +417,6 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
|
||||
AuxTranslationDirection auxTranslationDirection);
|
||||
|
||||
void obtainNewTimestampPacketNodes(size_t numberOfNodes, TimestampPacketContainer &previousNodes);
|
||||
bool allowTimestampPacketPipeControlWrite(uint32_t commandType, EventsRequest &eventsRequest);
|
||||
|
||||
Context *context;
|
||||
Device *device;
|
||||
|
||||
@@ -198,7 +198,6 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
||||
|
||||
TimestampPacketContainer previousTimestampPacketNodes(device->getMemoryManager());
|
||||
EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, event);
|
||||
bool emitPipeControlWithTimestampWrite = allowTimestampPacketPipeControlWrite(commandType, eventsRequest);
|
||||
|
||||
if (multiDispatchInfo.empty() == false) {
|
||||
HwPerfCounter *hwPerfCounter = nullptr;
|
||||
@@ -271,10 +270,9 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
||||
|
||||
slmUsed = multiDispatchInfo.usesSlm();
|
||||
} else if (commandStreamReceiver.peekTimestampPacketWriteEnabled()) {
|
||||
if (emitPipeControlWithTimestampWrite) {
|
||||
obtainNewTimestampPacketNodes(1, previousTimestampPacketNodes);
|
||||
if (CL_COMMAND_BARRIER == commandType) {
|
||||
commandStreamReceiver.requestStallingPipeControlOnNextFlush();
|
||||
}
|
||||
|
||||
if (eventBuilder.getEvent()) {
|
||||
// Event from non-kernel enqueue inherits TimestampPackets from waitlist and command queue
|
||||
eventBuilder.getEvent()->addTimestampPacketNodes(*timestampPacketContainer);
|
||||
@@ -330,7 +328,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
||||
}
|
||||
}
|
||||
|
||||
auto submissionRequired = !isCommandWithoutKernel(commandType) || emitPipeControlWithTimestampWrite;
|
||||
auto submissionRequired = !isCommandWithoutKernel(commandType);
|
||||
|
||||
if (submissionRequired) {
|
||||
completionStamp = enqueueNonBlocked<commandType>(
|
||||
@@ -502,7 +500,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
|
||||
bool slmUsed,
|
||||
PrintfHandler *printfHandler) {
|
||||
|
||||
UNRECOVERABLE_IF(multiDispatchInfo.empty() && !timestampPacketContainer);
|
||||
UNRECOVERABLE_IF(multiDispatchInfo.empty());
|
||||
|
||||
auto &commandStreamReceiver = device->getCommandStreamReceiver();
|
||||
auto implicitFlush = false;
|
||||
@@ -567,9 +565,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
|
||||
ioh = &getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0u);
|
||||
}
|
||||
|
||||
if (multiDispatchInfo.peekMainKernel()) {
|
||||
commandStreamReceiver.requestThreadArbitrationPolicy(multiDispatchInfo.peekMainKernel()->getThreadArbitrationPolicy<GfxFamily>());
|
||||
}
|
||||
commandStreamReceiver.requestThreadArbitrationPolicy(multiDispatchInfo.peekMainKernel()->getThreadArbitrationPolicy<GfxFamily>());
|
||||
|
||||
DispatchFlags dispatchFlags;
|
||||
dispatchFlags.blocking = blocking;
|
||||
@@ -586,9 +582,6 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
|
||||
dispatchFlags.outOfOrderExecutionAllowed = !eventBuilder.getEvent() || commandStreamReceiver.isNTo1SubmissionModelEnabled();
|
||||
if (commandStreamReceiver.peekTimestampPacketWriteEnabled()) {
|
||||
dispatchFlags.outOfDeviceDependencies = &eventsRequest;
|
||||
if (multiDispatchInfo.empty()) {
|
||||
dispatchFlags.timestampPacketForPipeControlWrite = timestampPacketContainer->peekNodes().at(0);
|
||||
}
|
||||
}
|
||||
dispatchFlags.numGrfRequired = numGrfRequired;
|
||||
DEBUG_BREAK_IF(taskLevel >= Event::eventNotReady);
|
||||
@@ -661,10 +654,6 @@ void CommandQueueHw<GfxFamily>::enqueueBlocked(
|
||||
|
||||
auto cmd = std::make_unique<CommandMarker>(*this, commandStreamReceiver, commandType, cmdSize);
|
||||
|
||||
if (allowTimestampPacketPipeControlWrite(commandType, eventsRequest)) {
|
||||
cmd->setTimestampPacketsForPipeControlWrite(*timestampPacketContainer);
|
||||
}
|
||||
|
||||
eventBuilder->getEvent()->setCommand(std::move(cmd));
|
||||
} else {
|
||||
//store task data in event
|
||||
|
||||
@@ -118,6 +118,7 @@ class CommandStreamReceiver {
|
||||
void cleanupResources();
|
||||
|
||||
void requestThreadArbitrationPolicy(uint32_t requiredPolicy) { this->requiredThreadArbitrationPolicy = requiredPolicy; }
|
||||
void requestStallingPipeControlOnNextFlush() { stallingPipeControlOnNextFlushRequired = true; }
|
||||
|
||||
virtual void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, OsContext &osContext) = 0;
|
||||
MOCKABLE_VIRTUAL bool waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait);
|
||||
@@ -186,6 +187,7 @@ class CommandStreamReceiver {
|
||||
|
||||
LinearStream commandStream;
|
||||
|
||||
bool stallingPipeControlOnNextFlushRequired = false;
|
||||
uint32_t requiredThreadArbitrationPolicy = ThreadArbitrationPolicy::RoundRobin;
|
||||
uint32_t lastSentThreadArbitrationPolicy = ThreadArbitrationPolicy::NotPresent;
|
||||
|
||||
|
||||
@@ -254,10 +254,11 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
|
||||
if (dispatchFlags.outOfDeviceDependencies) {
|
||||
handleEventsTimestampPacketTags(commandStreamCSR, dispatchFlags, device);
|
||||
}
|
||||
if (dispatchFlags.timestampPacketForPipeControlWrite) {
|
||||
uint64_t address = dispatchFlags.timestampPacketForPipeControlWrite->tag->pickAddressForDataWrite(TimestampPacket::DataIndex::ContextEnd);
|
||||
KernelCommandsHelper<GfxFamily>::programPipeControlDataWriteWithCsStall(commandStreamCSR, address, 0);
|
||||
makeResident(*dispatchFlags.timestampPacketForPipeControlWrite->getGraphicsAllocation());
|
||||
if (stallingPipeControlOnNextFlushRequired) {
|
||||
stallingPipeControlOnNextFlushRequired = false;
|
||||
auto stallingPipeControlCmd = commandStream.getSpaceForCmd<PIPE_CONTROL>();
|
||||
*stallingPipeControlCmd = PIPE_CONTROL::sInit();
|
||||
stallingPipeControlCmd->setCommandStreamerStallEnable(true);
|
||||
}
|
||||
initPageTableManagerRegisters(commandStreamCSR);
|
||||
programPreemption(commandStreamCSR, device, dispatchFlags);
|
||||
@@ -650,6 +651,9 @@ size_t CommandStreamReceiverHw<GfxFamily>::getRequiredCmdStreamSize(const Dispat
|
||||
if (dispatchFlags.outOfDeviceDependencies) {
|
||||
size += dispatchFlags.outOfDeviceDependencies->numEventsInWaitList * sizeof(typename GfxFamily::MI_SEMAPHORE_WAIT);
|
||||
}
|
||||
if (stallingPipeControlOnNextFlushRequired) {
|
||||
size += sizeof(typename GfxFamily::PIPE_CONTROL);
|
||||
}
|
||||
return size;
|
||||
}
|
||||
|
||||
|
||||
@@ -14,9 +14,6 @@
|
||||
|
||||
namespace OCLRT {
|
||||
struct FlushStampTrackingObj;
|
||||
class TimestampPacket;
|
||||
template <typename TagType>
|
||||
struct TagNode;
|
||||
|
||||
namespace CSRequirements {
|
||||
//cleanup section usually contains 1-2 pipeControls BB end and place for BB start
|
||||
@@ -44,7 +41,6 @@ struct DispatchFlags {
|
||||
QueueThrottle throttle = QueueThrottle::MEDIUM;
|
||||
bool implicitFlush = false;
|
||||
bool outOfOrderExecutionAllowed = false;
|
||||
TagNode<TimestampPacket> *timestampPacketForPipeControlWrite = nullptr;
|
||||
FlushStampTrackingObj *flushStampReference = nullptr;
|
||||
PreemptionMode preemptionMode = PreemptionMode::Disabled;
|
||||
EventsRequest *outOfDeviceDependencies = nullptr;
|
||||
|
||||
@@ -252,9 +252,6 @@ CompletionStamp &CommandMarker::submit(uint32_t taskLevel, bool terminated) {
|
||||
dispatchFlags.lowPriority = cmdQ.getPriority() == QueuePriority::LOW;
|
||||
dispatchFlags.throttle = cmdQ.getThrottle();
|
||||
dispatchFlags.preemptionMode = PreemptionHelper::taskPreemptionMode(cmdQ.getDevice(), nullptr);
|
||||
if (timestampPacketsForPipeControlWrite) {
|
||||
dispatchFlags.timestampPacketForPipeControlWrite = timestampPacketsForPipeControlWrite->peekNodes().at(0);
|
||||
}
|
||||
|
||||
DEBUG_BREAK_IF(taskLevel >= Event::eventNotReady);
|
||||
|
||||
@@ -273,9 +270,4 @@ CompletionStamp &CommandMarker::submit(uint32_t taskLevel, bool terminated) {
|
||||
|
||||
return completionStamp;
|
||||
}
|
||||
|
||||
void CommandMarker::setTimestampPacketsForPipeControlWrite(TimestampPacketContainer &inputNodes) {
|
||||
timestampPacketsForPipeControlWrite = std::make_unique<TimestampPacketContainer>(cmdQ.getDevice().getMemoryManager());
|
||||
timestampPacketsForPipeControlWrite->assignAndIncrementNodesRefCounts(inputNodes);
|
||||
}
|
||||
} // namespace OCLRT
|
||||
|
||||
@@ -120,11 +120,9 @@ class CommandMarker : public Command {
|
||||
: cmdQ(cmdQ), csr(csr), clCommandType(clCommandType), commandSize(commandSize) {
|
||||
}
|
||||
|
||||
void setTimestampPacketsForPipeControlWrite(TimestampPacketContainer &inputNodes);
|
||||
CompletionStamp &submit(uint32_t taskLevel, bool terminated) override;
|
||||
|
||||
private:
|
||||
std::unique_ptr<TimestampPacketContainer> timestampPacketsForPipeControlWrite;
|
||||
CommandQueue &cmdQ;
|
||||
CommandStreamReceiver &csr;
|
||||
uint32_t clCommandType;
|
||||
|
||||
Reference in New Issue
Block a user