diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.h b/level_zero/core/source/cmdlist/cmdlist_hw.h index cf0048bb62..239e359193 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.h +++ b/level_zero/core/source/cmdlist/cmdlist_hw.h @@ -334,8 +334,9 @@ struct CommandListCoreFamily : CommandListImp { virtual void handleInOrderDependencyCounter(Event *signalEvent); bool isQwordInOrderCounter() const { return GfxFamily::isQwordInOrderCounter; } bool isInOrderNonWalkerSignalingRequired(const Event *event) const; + bool hasInOrderDependencies() const; - void addCmdForPatching(void *cmd, uint64_t counterValue, InOrderPatchCommandTypes::CmdType cmdType); + void addCmdForPatching(void *cmd, uint64_t counterValue, InOrderPatchCommandHelpers::PatchCmdType patchCmdType); InOrderPatchCommandsContainer inOrderPatchCmds; }; diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index 4eb0016661..09515ec034 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -139,10 +139,11 @@ ze_result_t CommandListCoreFamily::reset() { mappedTsEventList.clear(); - inOrderDependencyCounter = 0; inOrderAllocationOffset = 0; if (inOrderExecInfo) { + inOrderExecInfo->inOrderDependencyCounter = 0; + auto &inOrderDependencyCounterAllocation = inOrderExecInfo->inOrderDependencyCounterAllocation; memset(inOrderDependencyCounterAllocation.getUnderlyingBuffer(), 0, inOrderDependencyCounterAllocation.getUnderlyingBufferSize()); } @@ -152,10 +153,10 @@ ze_result_t CommandListCoreFamily::reset() { template void CommandListCoreFamily::handleInOrderDependencyCounter(Event *signalEvent) { - if (!isQwordInOrderCounter() && ((inOrderDependencyCounter + 1) == std::numeric_limits::max())) { - CommandListCoreFamily::appendWaitOnInOrderDependency(&inOrderExecInfo->inOrderDependencyCounterAllocation, inOrderDependencyCounter + 1, inOrderAllocationOffset, false, true); + if (!isQwordInOrderCounter() && ((inOrderExecInfo->inOrderDependencyCounter + 1) == std::numeric_limits::max())) { + CommandListCoreFamily::appendWaitOnInOrderDependency(&inOrderExecInfo->inOrderDependencyCounterAllocation, inOrderExecInfo->inOrderDependencyCounter + 1, inOrderAllocationOffset, false, true); - inOrderDependencyCounter = 0; + inOrderExecInfo->inOrderDependencyCounter = 0; // multitile immediate writes are uint64_t aligned uint32_t offset = this->partitionCount * static_cast(sizeof(uint64_t)); @@ -167,12 +168,12 @@ void CommandListCoreFamily::handleInOrderDependencyCounter(Event CommandListCoreFamily::appendSignalInOrderDependencyCounter(); // write 1 on new offset } - inOrderDependencyCounter++; + inOrderExecInfo->inOrderDependencyCounter++; this->commandContainer.addToResidencyContainer(&inOrderExecInfo->inOrderDependencyCounterAllocation); if (signalEvent && signalEvent->isInOrderExecEvent()) { - signalEvent->updateInOrderExecState(inOrderExecInfo, this->inOrderDependencyCounter, this->inOrderAllocationOffset); + signalEvent->updateInOrderExecState(inOrderExecInfo, inOrderExecInfo->inOrderDependencyCounter, this->inOrderAllocationOffset); } } @@ -2224,24 +2225,20 @@ inline uint32_t CommandListCoreFamily::getRegionOffsetForAppendMe template void CommandListCoreFamily::handleInOrderImplicitDependencies(bool relaxedOrderingAllowed) { - auto hasInOrderDependencies = (inOrderDependencyCounter > 0); - - if (hasInOrderDependencies) { + if (hasInOrderDependencies()) { if (relaxedOrderingAllowed) { NEO::RelaxedOrderingHelper::encodeRegistersBeforeDependencyCheckers(*commandContainer.getCommandStream()); } - CommandListCoreFamily::appendWaitOnInOrderDependency(&inOrderExecInfo->inOrderDependencyCounterAllocation, this->inOrderDependencyCounter, this->inOrderAllocationOffset, relaxedOrderingAllowed, true); + CommandListCoreFamily::appendWaitOnInOrderDependency(&inOrderExecInfo->inOrderDependencyCounterAllocation, inOrderExecInfo->inOrderDependencyCounter, this->inOrderAllocationOffset, relaxedOrderingAllowed, true); } } template inline ze_result_t CommandListCoreFamily::addEventsToCmdList(uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingAllowed, bool trackDependencies) { - auto hasInOrderDependencies = (inOrderDependencyCounter > 0); - handleInOrderImplicitDependencies(relaxedOrderingAllowed); - if (relaxedOrderingAllowed && numWaitEvents > 0 && !hasInOrderDependencies) { + if (relaxedOrderingAllowed && numWaitEvents > 0 && !hasInOrderDependencies()) { NEO::RelaxedOrderingHelper::encodeRegistersBeforeDependencyCheckers(*commandContainer.getCommandStream()); } @@ -2320,7 +2317,7 @@ void CommandListCoreFamily::appendWaitOnInOrderDependency(NEO::Gr false, true, isQwordInOrderCounter(), false); if (implicitDependency) { - addCmdForPatching(semaphoreCommand, waitValue, InOrderPatchCommandTypes::CmdType::Semaphore); + addCmdForPatching(semaphoreCommand, waitValue, InOrderPatchCommandHelpers::PatchCmdType::Semaphore); } } @@ -2426,7 +2423,7 @@ template void CommandListCoreFamily::appendSignalInOrderDependencyCounter() { using MI_STORE_DATA_IMM = typename GfxFamily::MI_STORE_DATA_IMM; - uint64_t signalValue = this->inOrderDependencyCounter + 1; + uint64_t signalValue = inOrderExecInfo->inOrderDependencyCounter + 1; uint64_t gpuVa = inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress() + this->inOrderAllocationOffset; @@ -2435,7 +2432,7 @@ void CommandListCoreFamily::appendSignalInOrderDependencyCounter( NEO::EncodeStoreMemory::programStoreDataImm(miStoreCmd, gpuVa, getLowPart(signalValue), getHighPart(signalValue), isQwordInOrderCounter(), (this->partitionCount > 1)); - addCmdForPatching(miStoreCmd, signalValue, InOrderPatchCommandTypes::CmdType::Sdi); + addCmdForPatching(miStoreCmd, signalValue, InOrderPatchCommandHelpers::PatchCmdType::Sdi); if (NEO::EncodeUserInterruptHelper::isOperationAllowed(NEO::EncodeUserInterruptHelper::onSignalingFenceMask)) { NEO::EnodeUserInterrupt::encode(*commandContainer.getCommandStream()); @@ -3458,20 +3455,25 @@ void CommandListCoreFamily::appendWaitOnSingleEvent(Event *event, } template -void CommandListCoreFamily::addCmdForPatching(void *cmd, uint64_t counterValue, InOrderPatchCommandTypes::CmdType cmdType) { +void CommandListCoreFamily::addCmdForPatching(void *cmd, uint64_t counterValue, InOrderPatchCommandHelpers::PatchCmdType patchCmdType) { if ((NEO::DebugManager.flags.EnableInOrderRegularCmdListPatching.get() != 0) && (this->cmdListType == TYPE_REGULAR)) { - this->inOrderPatchCmds.emplace_back(cmd, counterValue, cmdType); + this->inOrderPatchCmds.emplace_back(cmd, counterValue, patchCmdType); } } template void CommandListCoreFamily::patchInOrderCmds() { - if (isInOrderExecutionEnabled() && inOrderExecInfo->regularCmdListSubmissionCounter > 0) { - auto appendCounter = inOrderExecInfo->regularCmdListSubmissionCounter * inOrderDependencyCounter; + if (isInOrderExecutionEnabled() && inOrderExecInfo->regularCmdListSubmissionCounter > 1) { + auto appendCounter = InOrderPatchCommandHelpers::getAppendCounterValue(*inOrderExecInfo); for (auto &cmd : inOrderPatchCmds) { cmd.patch(appendCounter); } } } +template +bool CommandListCoreFamily::hasInOrderDependencies() const { + return (inOrderExecInfo.get() && inOrderExecInfo->inOrderDependencyCounter > 0); +} + } // namespace L0 diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h index 8ade76632d..230954d038 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h +++ b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h @@ -182,7 +182,6 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily::waitForEventsFromHost() { template bool CommandListCoreFamilyImmediate::hasStallingCmdsForRelaxedOrdering(uint32_t numWaitEvents, bool relaxedOrderingDispatch) const { - return (!relaxedOrderingDispatch && (numWaitEvents > 0 || this->inOrderDependencyCounter > 0)); + return (!relaxedOrderingDispatch && (numWaitEvents > 0 || this->hasInOrderDependencies())); } template @@ -517,7 +517,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendBarrier(ze_even if (isInOrderExecutionEnabled()) { if (isSkippingInOrderBarrierAllowed(hSignalEvent, numWaitEvents, phWaitEvents)) { if (hSignalEvent) { - Event::fromHandle(hSignalEvent)->updateInOrderExecState(inOrderExecInfo, this->inOrderDependencyCounter, this->inOrderAllocationOffset); + Event::fromHandle(hSignalEvent)->updateInOrderExecState(inOrderExecInfo, inOrderExecInfo->inOrderDependencyCounter, this->inOrderAllocationOffset); } return ZE_RESULT_SUCCESS; @@ -1253,7 +1253,7 @@ void CommandListCoreFamilyImmediate::checkAssert() { template bool CommandListCoreFamilyImmediate::isRelaxedOrderingDispatchAllowed(uint32_t numWaitEvents) const { - auto numEvents = numWaitEvents + ((inOrderDependencyCounter > 0) ? 1 : 0); + auto numEvents = numWaitEvents + (this->hasInOrderDependencies() ? 1 : 0); return NEO::RelaxedOrderingHelper::isRelaxedOrderingDispatchAllowed(*this->csr, numEvents); } @@ -1265,7 +1265,7 @@ ze_result_t CommandListCoreFamilyImmediate::synchronizeInOrderExe ze_result_t status = ZE_RESULT_NOT_READY; - auto waitValue = this->inOrderDependencyCounter; + auto waitValue = inOrderExecInfo->inOrderDependencyCounter; lastHangCheckTime = std::chrono::high_resolution_clock::now(); waitStartTime = lastHangCheckTime; diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl index d73c62a95f..68ef6627d9 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl @@ -304,7 +304,7 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K dispatchEventPostSyncOperation(event, Event::STATE_CLEARED, false, false, false, false); } else { dispatchKernelArgs.eventAddress = inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress() + this->inOrderAllocationOffset; - dispatchKernelArgs.postSyncImmValue = this->inOrderDependencyCounter + 1; + dispatchKernelArgs.postSyncImmValue = inOrderExecInfo->inOrderDependencyCounter + 1; } } @@ -334,7 +334,7 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K } } else { UNRECOVERABLE_IF(!dispatchKernelArgs.outWalkerPtr); - addCmdForPatching(dispatchKernelArgs.outWalkerPtr, dispatchKernelArgs.postSyncImmValue, InOrderPatchCommandTypes::CmdType::Walker); + addCmdForPatching(dispatchKernelArgs.outWalkerPtr, dispatchKernelArgs.postSyncImmValue, InOrderPatchCommandHelpers::PatchCmdType::Walker); } } diff --git a/level_zero/core/source/cmdlist/cmdlist_imp.h b/level_zero/core/source/cmdlist/cmdlist_imp.h index 9022dca805..e933b4215d 100644 --- a/level_zero/core/source/cmdlist/cmdlist_imp.h +++ b/level_zero/core/source/cmdlist/cmdlist_imp.h @@ -40,7 +40,6 @@ struct CommandListImp : CommandList { protected: std::shared_ptr inOrderExecInfo; - uint64_t inOrderDependencyCounter = 0; uint32_t inOrderAllocationOffset = 0; ~CommandListImp() override = default; diff --git a/level_zero/core/source/cmdqueue/cmdqueue_hw.inl b/level_zero/core/source/cmdqueue/cmdqueue_hw.inl index 520cb67cfa..e536f31795 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue_hw.inl +++ b/level_zero/core/source/cmdqueue/cmdqueue_hw.inl @@ -537,8 +537,8 @@ void CommandQueueHw::setupCmdListsAndContextParams( auto commandList = static_cast(CommandList::fromHandle(phCommandLists[i])); commandList->setCsr(this->csr); commandList->storeReferenceTsToMappedEvents(false); - commandList->patchInOrderCmds(); commandList->incRegularCmdListSubmissionCounter(); + commandList->patchInOrderCmds(); auto &commandContainer = commandList->getCmdContainer(); diff --git a/level_zero/core/source/event/event.cpp b/level_zero/core/source/event/event.cpp index 06607546f0..b84844779f 100644 --- a/level_zero/core/source/event/event.cpp +++ b/level_zero/core/source/event/event.cpp @@ -404,6 +404,10 @@ void Event::updateInOrderExecState(std::shared_ptr &newInOrderE inOrderAllocationOffset = allocationOffset; } +uint64_t Event::getInOrderExecSignalValue() const { + return (inOrderExecSignalValue + InOrderPatchCommandHelpers::getAppendCounterValue(*inOrderExecInfo)); +} + void Event::setLatestUsedCmdQueue(CommandQueue *newCmdQ) { this->latestUsedCmdQueue = newCmdQ; } diff --git a/level_zero/core/source/event/event.h b/level_zero/core/source/event/event.h index 778f65af24..78e00e7062 100644 --- a/level_zero/core/source/event/event.h +++ b/level_zero/core/source/event/event.h @@ -220,7 +220,7 @@ struct Event : _ze_event_handle_t { bool isInOrderExecEvent() const { return inOrderExecEvent; } void enableInOrderMode() { this->inOrderExecEvent = true; } NEO::GraphicsAllocation *getInOrderExecDataAllocation() const; - uint64_t getInOrderExecSignalValue() const { return inOrderExecSignalValue; } + uint64_t getInOrderExecSignalValue() const; uint32_t getInOrderAllocationOffset() const { return inOrderAllocationOffset; } void setLatestUsedCmdQueue(CommandQueue *newCmdQ); NEO::TimeStampData *peekReferenceTs() { diff --git a/level_zero/core/source/event/event_impl.inl b/level_zero/core/source/event/event_impl.inl index ae0612911b..5f9786cd7a 100644 --- a/level_zero/core/source/event/event_impl.inl +++ b/level_zero/core/source/event/event_impl.inl @@ -157,10 +157,11 @@ ze_result_t EventImp::queryInOrderEventStatus() { } auto hostAddress = static_cast(ptrOffset(inOrderExecInfo->inOrderDependencyCounterAllocation.getUnderlyingBuffer(), this->inOrderAllocationOffset)); + auto waitValue = getInOrderExecSignalValue(); bool signaled = true; for (uint32_t i = 0; i < this->getPacketsInUse(); i++) { - if (!NEO::WaitUtils::waitFunctionWithPredicate(hostAddress, this->inOrderExecSignalValue, std::greater_equal())) { + if (!NEO::WaitUtils::waitFunctionWithPredicate(hostAddress, waitValue, std::greater_equal())) { signaled = false; break; } @@ -391,7 +392,7 @@ ze_result_t EventImp::waitForUserFence(uint64_t timeout) { uint64_t waitAddress = castToUint64(ptrOffset(inOrderExecInfo->inOrderDependencyCounterAllocation.getUnderlyingBuffer(), this->inOrderAllocationOffset)); - if (!csrs[0]->waitUserFence(this->inOrderExecSignalValue, waitAddress, timeout)) { + if (!csrs[0]->waitUserFence(getInOrderExecSignalValue(), waitAddress, timeout)) { return ZE_RESULT_NOT_READY; } diff --git a/level_zero/core/source/helpers/in_order_cmd_helpers.h b/level_zero/core/source/helpers/in_order_cmd_helpers.h index e1bd0786eb..a3cd21abec 100644 --- a/level_zero/core/source/helpers/in_order_cmd_helpers.h +++ b/level_zero/core/source/helpers/in_order_cmd_helpers.h @@ -29,12 +29,21 @@ struct InOrderExecInfo : public NEO::NonCopyableClass { NEO::GraphicsAllocation &inOrderDependencyCounterAllocation; NEO::MemoryManager &memoryManager; + uint64_t inOrderDependencyCounter = 0; uint64_t regularCmdListSubmissionCounter = 0; bool isRegularCmdList = false; }; -namespace InOrderPatchCommandTypes { -enum class CmdType { +namespace InOrderPatchCommandHelpers { +inline uint64_t getAppendCounterValue(const InOrderExecInfo &inOrderExecInfo) { + if (inOrderExecInfo.isRegularCmdList && inOrderExecInfo.regularCmdListSubmissionCounter > 1) { + return inOrderExecInfo.inOrderDependencyCounter * (inOrderExecInfo.regularCmdListSubmissionCounter - 1); + } + + return 0; +} + +enum class PatchCmdType { None, Sdi, Semaphore, @@ -42,18 +51,18 @@ enum class CmdType { }; template -struct BaseCmd { - BaseCmd(void *cmd, uint64_t baseCounterValue, CmdType cmdType) : cmd(cmd), baseCounterValue(baseCounterValue), cmdType(cmdType) {} +struct PatchCmd { + PatchCmd(void *cmd, uint64_t baseCounterValue, PatchCmdType patchCmdType) : cmd(cmd), baseCounterValue(baseCounterValue), patchCmdType(patchCmdType) {} void patch(uint64_t appendCunterValue) { - switch (cmdType) { - case CmdType::Sdi: + switch (patchCmdType) { + case PatchCmdType::Sdi: patchSdi(appendCunterValue); break; - case CmdType::Semaphore: + case PatchCmdType::Semaphore: patchSemaphore(appendCunterValue); break; - case CmdType::Walker: + case PatchCmdType::Walker: patchComputeWalker(appendCunterValue); break; default: @@ -64,7 +73,7 @@ struct BaseCmd { void *cmd = nullptr; const uint64_t baseCounterValue = 0; - const CmdType cmdType = CmdType::None; + const PatchCmdType patchCmdType = PatchCmdType::None; protected: void patchSdi(uint64_t appendCunterValue) { @@ -88,12 +97,12 @@ struct BaseCmd { } } - BaseCmd() = delete; + PatchCmd() = delete; }; -} // namespace InOrderPatchCommandTypes +} // namespace InOrderPatchCommandHelpers template -using InOrderPatchCommandsContainer = std::vector>; +using InOrderPatchCommandsContainer = std::vector>; } // namespace L0 diff --git a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h index 9c3f4efb73..efef19264b 100644 --- a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h +++ b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h @@ -77,7 +77,6 @@ struct WhiteBox<::L0::CommandListCoreFamily> using BaseClass::indirectAllocationsAllowed; using BaseClass::initialize; using BaseClass::inOrderAllocationOffset; - using BaseClass::inOrderDependencyCounter; using BaseClass::inOrderExecInfo; using BaseClass::inOrderPatchCmds; using BaseClass::isFlushTaskSubmissionEnabled; @@ -170,7 +169,6 @@ struct WhiteBox> using BaseClass::getHostPtrAlloc; using BaseClass::hostSynchronize; using BaseClass::immediateCmdListHeapSharing; - using BaseClass::inOrderDependencyCounter; using BaseClass::inOrderExecInfo; using BaseClass::inOrderPatchCmds; using BaseClass::isBcsSplitNeeded; diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp index 3b0cb5e2c1..deef2301f9 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp @@ -959,7 +959,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenResetEventCalledThenResetEven EXPECT_EQ(MemoryConstants::pageSize64k, immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getUnderlyingBufferSize()); EXPECT_TRUE(events[0]->inOrderExecEvent); - EXPECT_EQ(events[0]->inOrderExecSignalValue, immCmdList->inOrderDependencyCounter); + EXPECT_EQ(events[0]->inOrderExecSignalValue, immCmdList->inOrderExecInfo->inOrderDependencyCounter); EXPECT_EQ(&events[0]->inOrderExecInfo->inOrderDependencyCounterAllocation, &immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation); EXPECT_EQ(events[0]->inOrderAllocationOffset, 0u); @@ -1369,17 +1369,17 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenDispatchingThenHandleDependen EXPECT_NE(nullptr, immCmdList->inOrderExecInfo.get()); EXPECT_EQ(AllocationType::TIMESTAMP_PACKET_TAG_BUFFER, immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getAllocationType()); - EXPECT_EQ(0u, immCmdList->inOrderDependencyCounter); + EXPECT_EQ(0u, immCmdList->inOrderExecInfo->inOrderDependencyCounter); auto ultCsr = static_cast *>(device->getNEODevice()->getDefaultEngine().commandStreamReceiver); ultCsr->storeMakeResidentAllocations = true; immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false); - EXPECT_EQ(1u, immCmdList->inOrderDependencyCounter); + EXPECT_EQ(1u, immCmdList->inOrderExecInfo->inOrderDependencyCounter); EXPECT_EQ(1u, ultCsr->makeResidentAllocations[&immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation]); immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false); - EXPECT_EQ(2u, immCmdList->inOrderDependencyCounter); + EXPECT_EQ(2u, immCmdList->inOrderExecInfo->inOrderDependencyCounter); EXPECT_EQ(2u, ultCsr->makeResidentAllocations[&immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation]); } @@ -1573,7 +1573,7 @@ HWTEST2_F(InOrderCmdListTests, givenRelaxedOrderingWhenProgrammingTimestampEvent auto eventPool = createEvents(1, true); events[0]->signalScope = 0; - immCmdList->inOrderDependencyCounter = 1; + immCmdList->inOrderExecInfo->inOrderDependencyCounter = 1; EXPECT_TRUE(immCmdList->isRelaxedOrderingDispatchAllowed(0)); @@ -1582,7 +1582,7 @@ HWTEST2_F(InOrderCmdListTests, givenRelaxedOrderingWhenProgrammingTimestampEvent zeCommandListAppendLaunchKernel(immCmdList->toHandle(), kernel->toHandle(), &groupCount, events[0]->toHandle(), 0, nullptr); ASSERT_EQ(2u, immCmdList->flushData.size()); - EXPECT_EQ(2u, immCmdList->inOrderDependencyCounter); + EXPECT_EQ(2u, immCmdList->inOrderExecInfo->inOrderDependencyCounter); { @@ -1681,7 +1681,7 @@ HWTEST2_F(InOrderCmdListTests, givenDebugFlagSetWhenChainingWithRelaxedOrderingT auto eventPool = createEvents(1, true); events[0]->signalScope = 0; - immCmdList->inOrderDependencyCounter = 1; + immCmdList->inOrderExecInfo->inOrderDependencyCounter = 1; EXPECT_TRUE(immCmdList->isRelaxedOrderingDispatchAllowed(0)); @@ -1690,7 +1690,7 @@ HWTEST2_F(InOrderCmdListTests, givenDebugFlagSetWhenChainingWithRelaxedOrderingT zeCommandListAppendLaunchKernel(immCmdList->toHandle(), kernel->toHandle(), &groupCount, events[0]->toHandle(), 0, nullptr); ASSERT_EQ(1u, immCmdList->flushCount); - EXPECT_EQ(2u, immCmdList->inOrderDependencyCounter); + EXPECT_EQ(2u, immCmdList->inOrderExecInfo->inOrderDependencyCounter); } HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingRegularEventThenClearAndChainWithSyncAllocSignaling, IsAtLeastXeHpCore) { @@ -1786,11 +1786,11 @@ HWTEST2_F(InOrderCmdListTests, givenHostVisibleEventOnLatestFlushWhenCallingSync using NonPostSyncWalkerMatcher = IsWithinGfxCore; HWTEST2_F(InOrderCmdListTests, givenNonPostSyncWalkerWhenPatchingThenThrow, NonPostSyncWalkerMatcher) { - InOrderPatchCommandTypes::BaseCmd incorrectCmd(nullptr, 1, InOrderPatchCommandTypes::CmdType::None); + InOrderPatchCommandHelpers::PatchCmd incorrectCmd(nullptr, 1, InOrderPatchCommandHelpers::PatchCmdType::None); EXPECT_ANY_THROW(incorrectCmd.patch(1)); - InOrderPatchCommandTypes::BaseCmd walkerCmd(nullptr, 1, InOrderPatchCommandTypes::CmdType::Walker); + InOrderPatchCommandHelpers::PatchCmd walkerCmd(nullptr, 1, InOrderPatchCommandHelpers::PatchCmdType::Walker); EXPECT_ANY_THROW(walkerCmd.patch(1)); } @@ -1815,7 +1815,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingWalkerThenProgramP auto immCmdList = createImmCmdList(); immCmdList->inOrderAllocationOffset = 64; - immCmdList->inOrderDependencyCounter = 123; + immCmdList->inOrderExecInfo->inOrderDependencyCounter = 123; auto cmdStream = immCmdList->getCmdContainer().getCommandStream(); @@ -1844,7 +1844,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingWalkerThenProgramP EXPECT_EQ(expectedAddress, sdiCmd->getAddress()); EXPECT_EQ(immCmdList->isQwordInOrderCounter(), sdiCmd->getStoreQword()); - EXPECT_EQ(immCmdList->inOrderDependencyCounter, sdiCmd->getDataDword0()); + EXPECT_EQ(immCmdList->inOrderExecInfo->inOrderDependencyCounter, sdiCmd->getDataDword0()); } HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingKernelSplitThenProgramPcAndSignalAlloc, NonPostSyncWalkerMatcher) { @@ -1854,7 +1854,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingKernelSplitThenPro auto immCmdList = createImmCmdList(); immCmdList->inOrderAllocationOffset = 64; - immCmdList->inOrderDependencyCounter = 123; + immCmdList->inOrderExecInfo->inOrderDependencyCounter = 123; auto cmdStream = immCmdList->getCmdContainer().getCommandStream(); @@ -1892,7 +1892,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingKernelSplitThenPro EXPECT_EQ(expectedAddress, sdiCmd->getAddress()); EXPECT_EQ(immCmdList->isQwordInOrderCounter(), sdiCmd->getStoreQword()); - EXPECT_EQ(immCmdList->inOrderDependencyCounter, sdiCmd->getDataDword0()); + EXPECT_EQ(immCmdList->inOrderExecInfo->inOrderDependencyCounter, sdiCmd->getDataDword0()); context->freeMem(hostAlloc); } @@ -2196,9 +2196,9 @@ HWTEST2_F(InOrderCmdListTests, givenImmediateEventWhenWaitingFromRegularCmdListT ASSERT_EQ(1u, regularCmdList->inOrderPatchCmds.size()); if (NonPostSyncWalkerMatcher::isMatched()) { - EXPECT_EQ(InOrderPatchCommandTypes::CmdType::Sdi, regularCmdList->inOrderPatchCmds[0].cmdType); + EXPECT_EQ(InOrderPatchCommandHelpers::PatchCmdType::Sdi, regularCmdList->inOrderPatchCmds[0].patchCmdType); } else { - EXPECT_EQ(InOrderPatchCommandTypes::CmdType::Walker, regularCmdList->inOrderPatchCmds[0].cmdType); + EXPECT_EQ(InOrderPatchCommandHelpers::PatchCmdType::Walker, regularCmdList->inOrderPatchCmds[0].patchCmdType); } GenCmdList cmdList; @@ -2215,6 +2215,79 @@ HWTEST2_F(InOrderCmdListTests, givenImmediateEventWhenWaitingFromRegularCmdListT EXPECT_NE(cmdList.end(), walkerItor); } +HWTEST2_F(InOrderCmdListTests, givenEventGeneratedByRegularCmdListWhenWaitingFromImmediateThenUseSubmissionCounter, IsAtLeastSkl) { + using WALKER_TYPE = typename FamilyType::WALKER_TYPE; + using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; + + ze_command_queue_desc_t desc = {}; + + auto mockCmdQHw = makeZeUniquePtr>(device, device->getNEODevice()->getDefaultEngine().commandStreamReceiver, &desc); + mockCmdQHw->initialize(true, false, false); + + auto regularCmdList = createRegularCmdList(false); + auto immCmdList = createImmCmdList(); + + auto regularCmdListHandle = regularCmdList->toHandle(); + + auto cmdStream = immCmdList->getCmdContainer().getCommandStream(); + auto offset = cmdStream->getUsed(); + + auto eventPool = createEvents(1, false); + auto eventHandle = events[0]->toHandle(); + + regularCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false); + regularCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false); + regularCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, eventHandle, 0, nullptr, launchParams, false); + uint64_t expectedCounterValue = regularCmdList->inOrderExecInfo->inOrderDependencyCounter; + + regularCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false); + regularCmdList->close(); + + uint64_t expectedCounterAppendValue = regularCmdList->inOrderExecInfo->inOrderDependencyCounter; + + auto verifySemaphore = [&](uint64_t expectedValue) { + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, ptrOffset(cmdStream->getCpuBase(), offset), (cmdStream->getUsed() - offset))); + + auto semaphoreItor = find(cmdList.begin(), cmdList.end()); + ASSERT_NE(cmdList.end(), semaphoreItor); + auto semaphoreCmd = genCmdCast(*semaphoreItor); + ASSERT_NE(nullptr, semaphoreCmd); + + if (semaphoreCmd->getSemaphoreGraphicsAddress() == immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress()) { + // skip implicit dependency + semaphoreItor++; + semaphoreCmd = genCmdCast(*semaphoreItor); + ASSERT_NE(nullptr, semaphoreCmd); + } + + EXPECT_EQ(expectedValue, semaphoreCmd->getSemaphoreDataDword()); + EXPECT_EQ(regularCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress(), semaphoreCmd->getSemaphoreGraphicsAddress()); + }; + + // 0 Execute calls + immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 1, &eventHandle, launchParams, false); + verifySemaphore(expectedCounterValue); + + // 1 Execute call + offset = cmdStream->getUsed(); + mockCmdQHw->executeCommandLists(1, ®ularCmdListHandle, nullptr, false); + immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 1, &eventHandle, launchParams, false); + verifySemaphore(expectedCounterValue); + + // 2 Execute calls + offset = cmdStream->getUsed(); + mockCmdQHw->executeCommandLists(1, ®ularCmdListHandle, nullptr, false); + immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 1, &eventHandle, launchParams, false); + verifySemaphore(expectedCounterValue + expectedCounterAppendValue); + + // 3 Execute calls + offset = cmdStream->getUsed(); + mockCmdQHw->executeCommandLists(1, ®ularCmdListHandle, nullptr, false); + immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 1, &eventHandle, launchParams, false); + verifySemaphore(expectedCounterValue + (expectedCounterAppendValue * 2)); +} + HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingKernelSplitThenDontSignalFromWalker, IsAtLeastXeHpCore) { using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER; using POSTSYNC_DATA = typename FamilyType::POSTSYNC_DATA; @@ -2610,7 +2683,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingCounterWithOverflo using POSTSYNC_DATA = typename FamilyType::POSTSYNC_DATA; auto immCmdList = createImmCmdList(); - immCmdList->inOrderDependencyCounter = std::numeric_limits::max() - 1; + immCmdList->inOrderExecInfo->inOrderDependencyCounter = std::numeric_limits::max() - 1; auto cmdStream = immCmdList->getCmdContainer().getCommandStream(); @@ -2668,7 +2741,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingCounterWithOverflo EXPECT_EQ(1u, sdiCmd->getDataDword0()); } - EXPECT_EQ(expectedCounter, immCmdList->inOrderDependencyCounter); + EXPECT_EQ(expectedCounter, immCmdList->inOrderExecInfo->inOrderDependencyCounter); EXPECT_EQ(offset, immCmdList->inOrderAllocationOffset); EXPECT_EQ(expectedCounter, events[0]->inOrderExecSignalValue); @@ -2756,7 +2829,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingAppendBarrierWitho immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false); - EXPECT_EQ(1u, immCmdList->inOrderDependencyCounter); + EXPECT_EQ(1u, immCmdList->inOrderExecInfo->inOrderDependencyCounter); auto offset = cmdStream->getUsed(); @@ -2820,7 +2893,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingAppendBarrierWitho immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false); - EXPECT_EQ(1u, immCmdList->inOrderDependencyCounter); + EXPECT_EQ(1u, immCmdList->inOrderExecInfo->inOrderDependencyCounter); auto offset = cmdStream->getUsed(); @@ -2856,7 +2929,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingAppendBarrierWitho immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false); - EXPECT_EQ(1u, immCmdList->inOrderDependencyCounter); + EXPECT_EQ(1u, immCmdList->inOrderExecInfo->inOrderDependencyCounter); auto offset = cmdStream->getUsed(); @@ -3404,10 +3477,10 @@ HWTEST2_F(MultiTileInOrderCmdListTests, whenUsingRegularCmdListThenAddWalkerToPa walkerFromParser2 = genCmdCast(*itor); } - EXPECT_EQ(2u, regularCmdList->inOrderDependencyCounter); + EXPECT_EQ(2u, regularCmdList->inOrderExecInfo->inOrderDependencyCounter); auto verifyPatching = [&](uint64_t executionCounter) { - auto appendValue = regularCmdList->inOrderDependencyCounter * executionCounter; + auto appendValue = regularCmdList->inOrderExecInfo->inOrderDependencyCounter * executionCounter; EXPECT_EQ(1u + appendValue, walkerFromContainer1->getPostSync().getImmediateData()); EXPECT_EQ(1u + appendValue, walkerFromParser1->getPostSync().getImmediateData()); @@ -3629,10 +3702,10 @@ HWTEST2_F(InOrderRegularCmdListTests, whenUsingRegularCmdListThenAddCmdsToPatch, sdiFromParser2 = genCmdCast(*sdiItor); } - EXPECT_EQ(2u, regularCmdList->inOrderDependencyCounter); + EXPECT_EQ(2u, regularCmdList->inOrderExecInfo->inOrderDependencyCounter); auto verifyPatching = [&](uint64_t executionCounter) { - auto appendValue = regularCmdList->inOrderDependencyCounter * executionCounter; + auto appendValue = regularCmdList->inOrderExecInfo->inOrderDependencyCounter * executionCounter; EXPECT_EQ(1u + appendValue, sdiFromContainer1->getDataDword0()); EXPECT_EQ(1u + appendValue, sdiFromParser1->getDataDword0()); @@ -3716,10 +3789,10 @@ HWTEST2_F(InOrderRegularCmdListTests, whenUsingRegularCmdListThenAddWalkerToPatc walkerFromParser2 = genCmdCast(*itor); } - EXPECT_EQ(2u, regularCmdList->inOrderDependencyCounter); + EXPECT_EQ(2u, regularCmdList->inOrderExecInfo->inOrderDependencyCounter); auto verifyPatching = [&](uint64_t executionCounter) { - auto appendValue = regularCmdList->inOrderDependencyCounter * executionCounter; + auto appendValue = regularCmdList->inOrderExecInfo->inOrderDependencyCounter * executionCounter; EXPECT_EQ(1u + appendValue, walkerFromContainer1->getPostSync().getImmediateData()); EXPECT_EQ(1u + appendValue, walkerFromParser1->getPostSync().getImmediateData()); @@ -3754,9 +3827,9 @@ HWTEST2_F(InOrderRegularCmdListTests, givenInOrderModeWhenDispatchingRegularCmdL size_t offset = cmdStream->getUsed(); - EXPECT_EQ(0u, regularCmdList->inOrderDependencyCounter); + EXPECT_EQ(0u, regularCmdList->inOrderExecInfo->inOrderDependencyCounter); regularCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false); - EXPECT_EQ(1u, regularCmdList->inOrderDependencyCounter); + EXPECT_EQ(1u, regularCmdList->inOrderExecInfo->inOrderDependencyCounter); { GenCmdList cmdList; @@ -3781,7 +3854,7 @@ HWTEST2_F(InOrderRegularCmdListTests, givenInOrderModeWhenDispatchingRegularCmdL offset = cmdStream->getUsed(); regularCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false); - EXPECT_EQ(2u, regularCmdList->inOrderDependencyCounter); + EXPECT_EQ(2u, regularCmdList->inOrderExecInfo->inOrderDependencyCounter); { GenCmdList cmdList; @@ -3809,7 +3882,7 @@ HWTEST2_F(InOrderRegularCmdListTests, givenInOrderModeWhenDispatchingRegularCmdL *hostAddr = 0x1234; regularCmdList->reset(); - EXPECT_EQ(0u, regularCmdList->inOrderDependencyCounter); + EXPECT_EQ(0u, regularCmdList->inOrderExecInfo->inOrderDependencyCounter); EXPECT_EQ(0u, regularCmdList->inOrderAllocationOffset); EXPECT_EQ(0u, *hostAddr); } @@ -3829,7 +3902,7 @@ HWTEST2_F(InOrderRegularCmdListTests, givenInOrderModeWhenDispatchingRegularCmdL size_t offset = cmdStream->getUsed(); - EXPECT_EQ(0u, regularCmdList->inOrderDependencyCounter); + EXPECT_EQ(0u, regularCmdList->inOrderExecInfo->inOrderDependencyCounter); EXPECT_NE(nullptr, regularCmdList->inOrderExecInfo.get()); constexpr size_t size = 128 * sizeof(uint32_t);