performance: Signal inOrder counter with pipe control, part 7

On non DC flush platforms without heapless, use pipe control to handle in order
implicit dependencies only when latest append has optimized counter
based event.

Resolves: HSD-18041394395, HSD-18041399091
Related-To: NEO-13441

Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
Lukasz Jobczyk
2025-02-25 07:20:00 +00:00
committed by Compute-Runtime-Automation
parent 276c606329
commit beb859a53a
4 changed files with 69 additions and 2 deletions

View File

@@ -2539,6 +2539,7 @@ inline ze_result_t CommandListCoreFamily<gfxCoreFamily>::addEventsToCmdList(uint
}
inOrderDependenciesSent = handleInOrderImplicitDependencies(relaxedOrderingAllowed, copyOffloadOperation);
this->latestOperationHasOptimizedCbEvent = false;
}
if (relaxedOrderingAllowed && numWaitEvents > 0 && !inOrderDependenciesSent) {
@@ -2639,7 +2640,7 @@ void CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(std::sh
NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataMemBatchBufferStart(*commandContainer.getCommandStream(), 0, gpuAddress, waitValue, NEO::CompareOperation::less, true, isQwordInOrderCounter(), isCopyOnly(copyOffloadOperation));
} else {
auto resolveDependenciesViaPipeControls = !this->isCopyOnly(copyOffloadOperation) && !this->asMutable() && implicitDependency && (this->dcFlushSupport || !this->heaplessModeEnabled);
auto resolveDependenciesViaPipeControls = !this->isCopyOnly(copyOffloadOperation) && !this->asMutable() && implicitDependency && (this->dcFlushSupport || (!this->heaplessModeEnabled && this->latestOperationHasOptimizedCbEvent));
if (NEO::debugManager.flags.ResolveDependenciesViaPipeControls.get() != -1) {
resolveDependenciesViaPipeControls = NEO::debugManager.flags.ResolveDependenciesViaPipeControls.get();

View File

@@ -1252,7 +1252,6 @@ template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::flushInOrderCounterSignal(bool waitOnInOrderCounterRequired) {
ze_result_t ret = ZE_RESULT_SUCCESS;
if (waitOnInOrderCounterRequired && !this->isHeaplessModeEnabled() && this->latestOperationHasOptimizedCbEvent) {
this->latestOperationHasOptimizedCbEvent = false;
this->appendSignalInOrderDependencyCounter(nullptr, false, true);
this->inOrderExecInfo->addCounterValue(this->getInOrderIncrementValue());
this->handleInOrderCounterOverflow(false);

View File

@@ -224,6 +224,7 @@ struct WhiteBox<L0::CommandListCoreFamilyImmediate<gfxCoreFamily>>
using BaseClass::isSyncModeQueue;
using BaseClass::isTbxMode;
using BaseClass::latestFlushIsHostVisible;
using BaseClass::latestOperationHasOptimizedCbEvent;
using BaseClass::latestOperationRequiredNonWalkerInOrderCmdsChaining;
using BaseClass::partitionCount;
using BaseClass::pipeControlMultiKernelEventSync;

View File

@@ -1004,6 +1004,72 @@ HWTEST2_F(InOrderCmdListTests, givenResolveDependenciesViaPipeControlsForInOrder
ASSERT_NE(cmdList.end(), itor);
}
HWTEST2_F(InOrderCmdListTests, givenOptimizedCbEventWhenSubmittingThenProgramPipeControlOrSemaphoreInBetweenDispatches, IsAtLeastXeHpCore) {
DebugManagerStateRestore restorer;
NEO::debugManager.flags.ResolveDependenciesViaPipeControls.set(-1);
uint32_t counterOffset = 64;
auto immCmdList = createImmCmdList<gfxCoreFamily>();
immCmdList->inOrderExecInfo->setAllocationOffset(counterOffset);
auto cmdStream = immCmdList->getCmdContainer().getCommandStream();
immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false);
auto offset = cmdStream->getUsed();
immCmdList->latestOperationHasOptimizedCbEvent = true;
immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(
cmdList,
ptrOffset(cmdStream->getCpuBase(), offset),
cmdStream->getUsed() - offset));
if (immCmdList->dcFlushSupport || !immCmdList->isHeaplessModeEnabled()) {
auto itor = find<typename FamilyType::PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
ASSERT_NE(cmdList.end(), itor);
} else {
auto itor = find<typename FamilyType::MI_SEMAPHORE_WAIT *>(cmdList.begin(), cmdList.end());
ASSERT_NE(cmdList.end(), itor);
}
}
HWTEST2_F(InOrderCmdListTests, givenInOrderCmdListWhenSubmittingThenProgramPipeControlOrSemaphoreInBetweenDispatches, IsAtLeastXeHpCore) {
DebugManagerStateRestore restorer;
NEO::debugManager.flags.ResolveDependenciesViaPipeControls.set(-1);
uint32_t counterOffset = 64;
auto immCmdList = createImmCmdList<gfxCoreFamily>();
immCmdList->inOrderExecInfo->setAllocationOffset(counterOffset);
auto cmdStream = immCmdList->getCmdContainer().getCommandStream();
immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false);
auto offset = cmdStream->getUsed();
immCmdList->latestOperationHasOptimizedCbEvent = false;
immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(
cmdList,
ptrOffset(cmdStream->getCpuBase(), offset),
cmdStream->getUsed() - offset));
if (immCmdList->dcFlushSupport) {
auto itor = find<typename FamilyType::PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
ASSERT_NE(cmdList.end(), itor);
} else {
auto itor = find<typename FamilyType::MI_SEMAPHORE_WAIT *>(cmdList.begin(), cmdList.end());
ASSERT_NE(cmdList.end(), itor);
}
}
HWTEST2_F(InOrderCmdListTests, givenDependencyFromDifferentRootDeviceWhenAppendCalledThenCreatePeerAllocation, MatchAny) {
NEO::UltDeviceFactory deviceFactory{2, 0};