From b4733dedb7ff999ded78318bd793f828fc550ee5 Mon Sep 17 00:00:00 2001 From: "Dunajski, Bartosz" Date: Mon, 2 Oct 2023 09:09:37 +0000 Subject: [PATCH] feature: skip not needed event waits in in-order mode Related-To: NEO-7966 Signed-off-by: Dunajski, Bartosz --- level_zero/core/source/cmdlist/cmdlist_hw.h | 2 +- level_zero/core/source/cmdlist/cmdlist_hw.inl | 18 ++++-- .../source/cmdlist/cmdlist_hw_immediate.inl | 2 +- level_zero/core/source/event/event.h | 1 + .../test_cmdlist_append_launch_kernel_3.cpp | 57 +++++++++++++++++++ 5 files changed, 72 insertions(+), 8 deletions(-) diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.h b/level_zero/core/source/cmdlist/cmdlist_hw.h index 239e359193..12a00676c5 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.h +++ b/level_zero/core/source/cmdlist/cmdlist_hw.h @@ -329,7 +329,7 @@ struct CommandListCoreFamily : CommandListImp { NEO::PreemptionMode obtainKernelPreemptionMode(Kernel *kernel); virtual bool isRelaxedOrderingDispatchAllowed(uint32_t numWaitEvents) const { return false; } virtual void setupFlushMethod(const NEO::RootDeviceEnvironment &rootDeviceEnvironment) {} - bool isInOrderEventWaitRequired(const Event &event) const; + bool canSkipInOrderEventWait(const Event &event) const; void handleInOrderImplicitDependencies(bool relaxedOrderingAllowed); virtual void handleInOrderDependencyCounter(Event *signalEvent); bool isQwordInOrderCounter() const { return GfxFamily::isQwordInOrderCounter; } diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index 09515ec034..5913abfa1d 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -2330,8 +2330,13 @@ void CommandListCoreFamily::appendWaitOnInOrderDependency(NEO::Gr } template -bool CommandListCoreFamily::isInOrderEventWaitRequired(const Event &event) const { - return (event.getInOrderExecDataAllocation() != &inOrderExecInfo->inOrderDependencyCounterAllocation); +bool CommandListCoreFamily::canSkipInOrderEventWait(const Event &event) const { + if (isInOrderExecutionEnabled()) { + return ((this->cmdListType == TYPE_IMMEDIATE && event.getLatestUsedCmdQueue() == this->cmdQImmediate) || // 1. Immediate CmdList can skip "regular Events" from the same CmdList + (event.getInOrderExecDataAllocation() == &inOrderExecInfo->inOrderDependencyCounterAllocation)); // 2. Both Immediate and Regular CmdLists can skip "in-order Events" from the same CmdList + } + + return false; } template @@ -2375,7 +2380,8 @@ ze_result_t CommandListCoreFamily::appendWaitOnEvents(uint32_t nu for (uint32_t i = 0; i < numEvents; i++) { auto event = Event::fromHandle(phEvent[i]); - if (this->cmdListType == TYPE_IMMEDIATE && event->isAlreadyCompleted()) { + if ((this->cmdListType == TYPE_IMMEDIATE && event->isAlreadyCompleted()) || + canSkipInOrderEventWait(*event)) { continue; } @@ -2383,9 +2389,9 @@ ze_result_t CommandListCoreFamily::appendWaitOnEvents(uint32_t nu if (!event->getInOrderExecDataAllocation()) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; // in-order event not signaled yet } - if (isInOrderEventWaitRequired(*event)) { - CommandListCoreFamily::appendWaitOnInOrderDependency(event->getInOrderExecDataAllocation(), event->getInOrderExecSignalValue(), event->getInOrderAllocationOffset(), relaxedOrderingAllowed, false); - } + + CommandListCoreFamily::appendWaitOnInOrderDependency(event->getInOrderExecDataAllocation(), event->getInOrderExecSignalValue(), event->getInOrderAllocationOffset(), relaxedOrderingAllowed, false); + continue; } diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl index b1ec44a546..80cdaeef8e 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl @@ -494,7 +494,7 @@ bool CommandListCoreFamilyImmediate::isSkippingInOrderBarrierAllo uint32_t eventsToWait = numWaitEvents; for (uint32_t i = 0; i < numWaitEvents; i++) { - if (!CommandListCoreFamily::isInOrderEventWaitRequired(*Event::fromHandle(phWaitEvents[i]))) { + if (CommandListCoreFamily::canSkipInOrderEventWait(*Event::fromHandle(phWaitEvents[i]))) { eventsToWait--; } } diff --git a/level_zero/core/source/event/event.h b/level_zero/core/source/event/event.h index 78e00e7062..b69817f377 100644 --- a/level_zero/core/source/event/event.h +++ b/level_zero/core/source/event/event.h @@ -227,6 +227,7 @@ struct Event : _ze_event_handle_t { return &referenceTs; } void setReferenceTs(uint64_t currentCpuTimeStamp); + const CommandQueue *getLatestUsedCmdQueue() const { return latestUsedCmdQueue; } bool hasKerneMappedTsCapability = false; protected: diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp index deef2301f9..c96dc97817 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp @@ -1206,6 +1206,63 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenWaitingForEventFromPreviousAp EXPECT_EQ(cmdList.end(), itor); } +HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenWaitingForEventFromPreviousAppendOnRegularCmdListThenSkip, IsAtLeastSkl) { + using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; + + auto regularCmdList = createRegularCmdList(false); + + auto eventPool = createEvents(1, false); + auto eventHandle = events[0]->toHandle(); + + auto cmdStream = regularCmdList->getCmdContainer().getCommandStream(); + + regularCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, eventHandle, 0, nullptr, launchParams, false); + + auto offset = cmdStream->getUsed(); + + regularCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 1, &eventHandle, launchParams, false); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, ptrOffset(cmdStream->getCpuBase(), offset), cmdStream->getUsed() - offset)); + + auto itor = find(cmdList.begin(), cmdList.end()); + + ASSERT_NE(cmdList.end(), itor); // implicit dependency + + itor = find(++itor, cmdList.end()); + + EXPECT_EQ(cmdList.end(), itor); +} + +HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenWaitingForRegularEventFromPreviousAppendThenSkip, IsAtLeastSkl) { + using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; + + auto immCmdList = createImmCmdList(); + + auto eventPool = createEvents(1, false); + events[0]->inOrderExecEvent = false; + auto eventHandle = events[0]->toHandle(); + + auto cmdStream = immCmdList->getCmdContainer().getCommandStream(); + + immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, eventHandle, 0, nullptr, launchParams, false); + + auto offset = cmdStream->getUsed(); + + immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 1, &eventHandle, launchParams, false); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, ptrOffset(cmdStream->getCpuBase(), offset), cmdStream->getUsed() - offset)); + + auto itor = find(cmdList.begin(), cmdList.end()); + + ASSERT_NE(cmdList.end(), itor); // implicit dependency + + itor = find(++itor, cmdList.end()); + + EXPECT_EQ(cmdList.end(), itor); +} + HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenWaitingForEventFromAfterResetThenDontSkip, IsAtLeastXeHpCore) { auto immCmdList = createImmCmdList();