performance: Signal inOrder counter with pipe control

When on DC flush platform signal inOrder counter directly with pipe
control. Skip not needed inOrder timestamp with its reset and semaphore.
Currently only for non profiling immediate cmd list case.

Related-To: NEO-13441

Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
Lukasz Jobczyk
2024-12-18 15:07:04 +00:00
committed by Compute-Runtime-Automation
parent 9a14fe2478
commit 085988c5e9
8 changed files with 106 additions and 110 deletions

View File

@@ -185,7 +185,7 @@ struct CommandListCoreFamily : public CommandListImp {
void appendWaitOnInOrderDependency(std::shared_ptr<NEO::InOrderExecInfo> &inOrderExecInfo, CommandToPatchContainer *outListCommands,
uint64_t waitValue, uint32_t offset, bool relaxedOrderingAllowed, bool implicitDependency,
bool skipAddingWaitEventsToResidency, bool noopDispatch, bool copyOffloadOperation);
void appendSignalInOrderDependencyCounter(Event *signalEvent, bool copyOffloadOperation);
void appendSignalInOrderDependencyCounter(Event *signalEvent, bool copyOffloadOperation, bool stall);
void handleInOrderDependencyCounter(Event *signalEvent, bool nonWalkerInOrderCmdsChaining, bool copyOffloadOperation);
ze_result_t appendWriteGlobalTimestamp(uint64_t *dstptr, ze_event_handle_t hSignalEvent,

View File

@@ -186,7 +186,7 @@ void CommandListCoreFamily<gfxCoreFamily>::handleInOrderDependencyCounter(Event
inOrderExecInfo->setAllocationOffset(newOffset);
inOrderExecInfo->initializeAllocationsFromHost();
CommandListCoreFamily<gfxCoreFamily>::appendSignalInOrderDependencyCounter(nullptr, copyOffloadOperation); // signal counter on new offset
CommandListCoreFamily<gfxCoreFamily>::appendSignalInOrderDependencyCounter(nullptr, copyOffloadOperation, false); // signal counter on new offset
}
inOrderExecInfo->addCounterValue(getInOrderIncrementValue());
@@ -587,7 +587,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendEventReset(ze_event_hand
}
if (this->isInOrderExecutionEnabled()) {
appendSignalInOrderDependencyCounter(event, false);
appendSignalInOrderDependencyCounter(event, false, false);
}
handleInOrderDependencyCounter(event, false, false);
event->unsetInOrderExecInfo();
@@ -635,7 +635,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryRangesBarrier(uint
addToMappedEventList(signalEvent);
if (this->isInOrderExecutionEnabled()) {
appendSignalInOrderDependencyCounter(signalEvent, false);
appendSignalInOrderDependencyCounter(signalEvent, false, false);
}
handleInOrderDependencyCounter(signalEvent, false, false);
@@ -1416,7 +1416,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendCopyImageBlit(NEO::Graph
appendSignalEventPostWalker(signalEvent, nullptr, nullptr, false, false, true);
if (this->isInOrderExecutionEnabled()) {
appendSignalInOrderDependencyCounter(signalEvent, false);
appendSignalInOrderDependencyCounter(signalEvent, false, false);
}
handleInOrderDependencyCounter(signalEvent, false, false);
@@ -1677,7 +1677,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
NEO::PipeControlArgs args;
NEO::MemorySynchronizationCommands<GfxFamily>::addSingleBarrier(*commandContainer.getCommandStream(), args);
}
appendSignalInOrderDependencyCounter(signalEvent, isCopyOnlyEnabled);
appendSignalInOrderDependencyCounter(signalEvent, isCopyOnlyEnabled, false);
}
if (!isCopyOnlyEnabled || inOrderCopyOnlySignalingAllowed) {
@@ -1782,7 +1782,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyRegion(void *d
if (this->isInOrderExecutionEnabled()) {
if (inOrderCopyOnlySignalingAllowed) {
appendSignalInOrderDependencyCounter(signalEvent, isCopyOnlyEnabled);
appendSignalInOrderDependencyCounter(signalEvent, isCopyOnlyEnabled, false);
handleInOrderDependencyCounter(signalEvent, false, isCopyOnlyEnabled);
}
} else {
@@ -2223,7 +2223,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
NEO::PipeControlArgs args;
NEO::MemorySynchronizationCommands<GfxFamily>::addSingleBarrier(*commandContainer.getCommandStream(), args);
}
appendSignalInOrderDependencyCounter(signalEvent, false);
appendSignalInOrderDependencyCounter(signalEvent, false, false);
} else {
nonWalkerInOrderCmdChaining = isInOrderNonWalkerSignalingRequired(signalEvent);
}
@@ -2295,7 +2295,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBlitFill(void *ptr,
appendSignalEventPostWalker(signalEvent, nullptr, nullptr, false, false, true);
if (isInOrderExecutionEnabled()) {
appendSignalInOrderDependencyCounter(signalEvent, false);
appendSignalInOrderDependencyCounter(signalEvent, false, false);
}
handleInOrderDependencyCounter(signalEvent, false, false);
}
@@ -2536,7 +2536,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendSignalEvent(ze_event_han
}
if (this->isInOrderExecutionEnabled()) {
appendSignalInOrderDependencyCounter(event, false);
appendSignalInOrderDependencyCounter(event, false, false);
}
handleInOrderDependencyCounter(event, false, false);
@@ -2746,7 +2746,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t nu
if (apiRequest) {
if (this->isInOrderExecutionEnabled()) {
appendSignalInOrderDependencyCounter(nullptr, copyOffloadOperation);
appendSignalInOrderDependencyCounter(nullptr, copyOffloadOperation, false);
}
handleInOrderDependencyCounter(nullptr, false, copyOffloadOperation);
}
@@ -2789,13 +2789,26 @@ void CommandListCoreFamily<gfxCoreFamily>::appendSdiInOrderCounterSignalling(uin
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::appendSignalInOrderDependencyCounter(Event *signalEvent, bool copyOffloadOperation) {
void CommandListCoreFamily<gfxCoreFamily>::appendSignalInOrderDependencyCounter(Event *signalEvent, bool copyOffloadOperation, bool stall) {
uint64_t deviceAllocGpuVa = inOrderExecInfo->getBaseDeviceAddress();
uint64_t signalValue = inOrderExecInfo->getCounterValue() + getInOrderIncrementValue();
auto cmdStream = commandContainer.getCommandStream();
if (this->inOrderAtomicSignalingEnabled) {
if (stall) {
NEO::PipeControlArgs args;
args.dcFlushEnable = true;
args.workloadPartitionOffset = partitionCount > 1;
NEO::MemorySynchronizationCommands<GfxFamily>::addBarrierWithPostSyncOperation(
*cmdStream,
NEO::PostSyncMode::immediateData,
deviceAllocGpuVa + inOrderExecInfo->getAllocationOffset(),
signalValue,
device->getNEODevice()->getRootDeviceEnvironment(),
args);
} else if (this->inOrderAtomicSignalingEnabled) {
using ATOMIC_OPCODES = typename GfxFamily::MI_ATOMIC::ATOMIC_OPCODES;
using DATA_SIZE = typename GfxFamily::MI_ATOMIC::DATA_SIZE;
@@ -3016,7 +3029,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWriteGlobalTimestamp(
appendSignalEventPostWalker(signalEvent, nullptr, nullptr, false, false, isCopyOnly(false));
if (this->isInOrderExecutionEnabled()) {
appendSignalInOrderDependencyCounter(signalEvent, false);
appendSignalInOrderDependencyCounter(signalEvent, false, false);
}
handleInOrderDependencyCounter(signalEvent, false, false);
@@ -3575,7 +3588,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBarrier(ze_event_handle_
appendSignalEventPostWalker(signalEvent, nullptr, nullptr, this->isInOrderExecutionEnabled(), false, isCopyOnly(false));
if (isInOrderExecutionEnabled()) {
appendSignalInOrderDependencyCounter(signalEvent, false);
appendSignalInOrderDependencyCounter(signalEvent, false, false);
}
handleInOrderDependencyCounter(signalEvent, false, false);
@@ -3742,7 +3755,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnMemory(void *desc,
appendSignalEventPostWalker(signalEvent, nullptr, nullptr, false, false, isCopyOnly(false));
if (this->isInOrderExecutionEnabled()) {
appendSignalInOrderDependencyCounter(signalEvent, false);
appendSignalInOrderDependencyCounter(signalEvent, false, false);
}
handleInOrderDependencyCounter(signalEvent, false, false);
@@ -3789,7 +3802,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWriteToMemory(void *desc
}
if (this->isInOrderExecutionEnabled()) {
appendSignalInOrderDependencyCounter(nullptr, false);
appendSignalInOrderDependencyCounter(nullptr, false, false);
}
handleInOrderDependencyCounter(nullptr, false, false);

View File

@@ -562,7 +562,7 @@ void CommandListCoreFamilyImmediate<gfxCoreFamily>::handleInOrderNonWalkerSignal
}
CommandListCoreFamily<gfxCoreFamily>::appendWaitOnSingleEvent(event, nullptr, nonWalkerSignalingHasRelaxedOrdering, false, CommandToPatch::Invalid);
CommandListCoreFamily<gfxCoreFamily>::appendSignalInOrderDependencyCounter(event, false);
CommandListCoreFamily<gfxCoreFamily>::appendSignalInOrderDependencyCounter(event, false, false);
}
template <GFXCORE_FAMILY gfxCoreFamily>

View File

@@ -301,7 +301,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
NEO::MemorySynchronizationCommands<GfxFamily>::addSingleBarrier(*commandContainer.getCommandStream(), args);
}
appendSignalInOrderDependencyCounter(event, false);
appendSignalInOrderDependencyCounter(event, false, false);
}
return ZE_RESULT_SUCCESS;

View File

@@ -299,16 +299,18 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
inOrderNonWalkerSignalling = isInOrderNonWalkerSignalingRequired(eventForInOrderExec);
if (inOrderExecSignalRequired) {
if (inOrderNonWalkerSignalling) {
if (!eventForInOrderExec->getAllocation(this->device) && Event::standaloneInOrderTimestampAllocationEnabled()) {
eventForInOrderExec->resetInOrderTimestampNode(device->getInOrderTimestampAllocator()->getTag());
}
dispatchEventPostSyncOperation(eventForInOrderExec, nullptr, launchParams.outListCommands, Event::STATE_CLEARED, false, false, false, false, false);
} else {
inOrderCounterValue = this->inOrderExecInfo->getCounterValue() + getInOrderIncrementValue();
inOrderExecInfo = this->inOrderExecInfo.get();
if (eventForInOrderExec && eventForInOrderExec->isCounterBased() && !isTimestampEvent) {
eventAddress = 0;
if (!compactEvent || !this->isImmediateType() || (!compactEvent->isCounterBased() || compactEvent->isUsingContextEndOffset())) {
if (inOrderNonWalkerSignalling) {
if (!eventForInOrderExec->getAllocation(this->device) && Event::standaloneInOrderTimestampAllocationEnabled()) {
eventForInOrderExec->resetInOrderTimestampNode(device->getInOrderTimestampAllocator()->getTag());
}
dispatchEventPostSyncOperation(eventForInOrderExec, nullptr, launchParams.outListCommands, Event::STATE_CLEARED, false, false, false, false, false);
} else {
inOrderCounterValue = this->inOrderExecInfo->getCounterValue() + getInOrderIncrementValue();
inOrderExecInfo = this->inOrderExecInfo.get();
if (eventForInOrderExec && eventForInOrderExec->isCounterBased() && !isTimestampEvent) {
eventAddress = 0;
}
}
}
}
@@ -382,7 +384,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
}
if (!launchParams.makeKernelCommandView) {
if (compactEvent) {
if ((compactEvent && (!compactEvent->isCounterBased() || compactEvent->isUsingContextEndOffset() || !this->isImmediateType()))) {
void **syncCmdBuffer = nullptr;
if (launchParams.outSyncCommand != nullptr) {
launchParams.outSyncCommand->type = CommandToPatch::SignalEventPostSyncPipeControl;
@@ -406,8 +408,12 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
if (inOrderExecSignalRequired) {
if (inOrderNonWalkerSignalling) {
if (!launchParams.skipInOrderNonWalkerSignaling) {
appendWaitOnSingleEvent(eventForInOrderExec, launchParams.outListCommands, false, false, CommandToPatch::CbEventTimestampPostSyncSemaphoreWait);
appendSignalInOrderDependencyCounter(eventForInOrderExec, false);
if ((compactEvent && (compactEvent->isCounterBased() && !compactEvent->isUsingContextEndOffset() && this->isImmediateType()))) {
appendSignalInOrderDependencyCounter(eventForInOrderExec, false, true);
} else {
appendWaitOnSingleEvent(eventForInOrderExec, launchParams.outListCommands, false, false, CommandToPatch::CbEventTimestampPostSyncSemaphoreWait);
appendSignalInOrderDependencyCounter(eventForInOrderExec, false, false);
}
}
} else {
launchParams.skipInOrderNonWalkerSignaling = false;

View File

@@ -143,7 +143,7 @@ struct BcsSplit {
cmdList->appendEventForProfilingAllWalkers(this->events.marker[markerEventIndex], nullptr, nullptr, false, true, false, true);
if (cmdList->isInOrderExecutionEnabled()) {
cmdList->appendSignalInOrderDependencyCounter(signalEvent, false);
cmdList->appendSignalInOrderDependencyCounter(signalEvent, false, false);
}
cmdList->handleInOrderDependencyCounter(signalEvent, false, false);