diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index 9a68083e5c..cbe57a3c23 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -2539,6 +2539,7 @@ inline ze_result_t CommandListCoreFamily::addEventsToCmdList(uint } inOrderDependenciesSent = handleInOrderImplicitDependencies(relaxedOrderingAllowed, copyOffloadOperation); + this->latestOperationHasOptimizedCbEvent = false; } if (relaxedOrderingAllowed && numWaitEvents > 0 && !inOrderDependenciesSent) { @@ -2639,7 +2640,7 @@ void CommandListCoreFamily::appendWaitOnInOrderDependency(std::sh NEO::EncodeBatchBufferStartOrEnd::programConditionalDataMemBatchBufferStart(*commandContainer.getCommandStream(), 0, gpuAddress, waitValue, NEO::CompareOperation::less, true, isQwordInOrderCounter(), isCopyOnly(copyOffloadOperation)); } else { - auto resolveDependenciesViaPipeControls = !this->isCopyOnly(copyOffloadOperation) && !this->asMutable() && implicitDependency && (this->dcFlushSupport || !this->heaplessModeEnabled); + auto resolveDependenciesViaPipeControls = !this->isCopyOnly(copyOffloadOperation) && !this->asMutable() && implicitDependency && (this->dcFlushSupport || (!this->heaplessModeEnabled && this->latestOperationHasOptimizedCbEvent)); if (NEO::debugManager.flags.ResolveDependenciesViaPipeControls.get() != -1) { resolveDependenciesViaPipeControls = NEO::debugManager.flags.ResolveDependenciesViaPipeControls.get(); diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl index 1fb4cbae9b..257bb883b8 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl @@ -1252,7 +1252,6 @@ template ze_result_t CommandListCoreFamilyImmediate::flushInOrderCounterSignal(bool waitOnInOrderCounterRequired) { ze_result_t ret = ZE_RESULT_SUCCESS; if (waitOnInOrderCounterRequired && !this->isHeaplessModeEnabled() && this->latestOperationHasOptimizedCbEvent) { - this->latestOperationHasOptimizedCbEvent = false; this->appendSignalInOrderDependencyCounter(nullptr, false, true); this->inOrderExecInfo->addCounterValue(this->getInOrderIncrementValue()); this->handleInOrderCounterOverflow(false); diff --git a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h index 609906f08d..2c662a2a1d 100644 --- a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h +++ b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h @@ -224,6 +224,7 @@ struct WhiteBox> using BaseClass::isSyncModeQueue; using BaseClass::isTbxMode; using BaseClass::latestFlushIsHostVisible; + using BaseClass::latestOperationHasOptimizedCbEvent; using BaseClass::latestOperationRequiredNonWalkerInOrderCmdsChaining; using BaseClass::partitionCount; using BaseClass::pipeControlMultiKernelEventSync; diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_1.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_1.cpp index da0d0dd6a2..f31a3407ab 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_1.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_1.cpp @@ -1004,6 +1004,72 @@ HWTEST2_F(InOrderCmdListTests, givenResolveDependenciesViaPipeControlsForInOrder ASSERT_NE(cmdList.end(), itor); } +HWTEST2_F(InOrderCmdListTests, givenOptimizedCbEventWhenSubmittingThenProgramPipeControlOrSemaphoreInBetweenDispatches, IsAtLeastXeHpCore) { + DebugManagerStateRestore restorer; + NEO::debugManager.flags.ResolveDependenciesViaPipeControls.set(-1); + + uint32_t counterOffset = 64; + + auto immCmdList = createImmCmdList(); + immCmdList->inOrderExecInfo->setAllocationOffset(counterOffset); + + auto cmdStream = immCmdList->getCmdContainer().getCommandStream(); + + immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false); + + auto offset = cmdStream->getUsed(); + immCmdList->latestOperationHasOptimizedCbEvent = true; + + immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer( + cmdList, + ptrOffset(cmdStream->getCpuBase(), offset), + cmdStream->getUsed() - offset)); + + if (immCmdList->dcFlushSupport || !immCmdList->isHeaplessModeEnabled()) { + auto itor = find(cmdList.begin(), cmdList.end()); + ASSERT_NE(cmdList.end(), itor); + } else { + auto itor = find(cmdList.begin(), cmdList.end()); + ASSERT_NE(cmdList.end(), itor); + } +} + +HWTEST2_F(InOrderCmdListTests, givenInOrderCmdListWhenSubmittingThenProgramPipeControlOrSemaphoreInBetweenDispatches, IsAtLeastXeHpCore) { + DebugManagerStateRestore restorer; + NEO::debugManager.flags.ResolveDependenciesViaPipeControls.set(-1); + + uint32_t counterOffset = 64; + + auto immCmdList = createImmCmdList(); + immCmdList->inOrderExecInfo->setAllocationOffset(counterOffset); + + auto cmdStream = immCmdList->getCmdContainer().getCommandStream(); + + immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false); + + auto offset = cmdStream->getUsed(); + immCmdList->latestOperationHasOptimizedCbEvent = false; + + immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer( + cmdList, + ptrOffset(cmdStream->getCpuBase(), offset), + cmdStream->getUsed() - offset)); + + if (immCmdList->dcFlushSupport) { + auto itor = find(cmdList.begin(), cmdList.end()); + ASSERT_NE(cmdList.end(), itor); + } else { + auto itor = find(cmdList.begin(), cmdList.end()); + ASSERT_NE(cmdList.end(), itor); + } +} + HWTEST2_F(InOrderCmdListTests, givenDependencyFromDifferentRootDeviceWhenAppendCalledThenCreatePeerAllocation, MatchAny) { NEO::UltDeviceFactory deviceFactory{2, 0};