From d582a48b1c537a717ded9dd2f9b686d0231b1386 Mon Sep 17 00:00:00 2001 From: "Dunajski, Bartosz" Date: Fri, 25 Aug 2023 14:31:21 +0000 Subject: [PATCH] feature: make in-order events optional Related-To: NEO-7966 Signed-off-by: Dunajski, Bartosz --- .../source/cmdlist/cmdlist_hw_immediate.inl | 8 +- .../cmdlist/cmdlist_hw_xehp_and_later.inl | 7 +- level_zero/core/source/event/event.cpp | 4 +- level_zero/core/source/event/event.h | 2 +- .../core/test/unit_tests/mocks/mock_event.h | 1 + .../test_cmdlist_append_launch_kernel_3.cpp | 114 ++++++++++++++++++ .../unit_tests/sources/event/test_event.cpp | 3 +- 7 files changed, 128 insertions(+), 11 deletions(-) diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl index 4550bf9f46..503159868e 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl @@ -483,7 +483,7 @@ bool CommandListCoreFamilyImmediate::isSkippingInOrderBarrierAllo auto signalEvent = Event::fromHandle(hSignalEvent); - return !(signalEvent && signalEvent->isEventTimestampFlagSet()); + return !(signalEvent && (signalEvent->isEventTimestampFlagSet() || !signalEvent->isInOrderExecEvent())); } template @@ -493,7 +493,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendBarrier(ze_even if (isInOrderExecutionEnabled()) { if (isSkippingInOrderBarrierAllowed(hSignalEvent, numWaitEvents, phWaitEvents)) { if (hSignalEvent) { - Event::fromHandle(hSignalEvent)->enableInOrderExecMode(*this->inOrderDependencyCounterAllocation, this->inOrderDependencyCounter, this->inOrderAllocationOffset); + Event::fromHandle(hSignalEvent)->updateInOrderExecState(*this->inOrderDependencyCounterAllocation, this->inOrderDependencyCounter, this->inOrderAllocationOffset); } return ZE_RESULT_SUCCESS; @@ -910,8 +910,8 @@ ze_result_t CommandListCoreFamilyImmediate::flushImmediate(ze_res signalEvent->setCsr(this->csr, isInOrderExecutionEnabled()); this->latestFlushIsHostVisible = signalEvent->isSignalScope(ZE_EVENT_SCOPE_FLAG_HOST); - if (isInOrderExecutionEnabled()) { - signalEvent->enableInOrderExecMode(*this->inOrderDependencyCounterAllocation, this->inOrderDependencyCounter, this->inOrderAllocationOffset); + if (isInOrderExecutionEnabled() && signalEvent->isInOrderExecEvent()) { + signalEvent->updateInOrderExecState(*this->inOrderDependencyCounterAllocation, this->inOrderDependencyCounter, this->inOrderAllocationOffset); } } else { this->latestFlushIsHostVisible = false; diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl index 980301624a..3dd00b8bb2 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl @@ -166,6 +166,7 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K uint64_t eventAddress = 0; bool isTimestampEvent = false; + bool isInOrderExecEvent = false; bool l3FlushEnable = false; bool isHostSignalScopeEvent = launchParams.isHostSignalScopeEvent; Event *compactEvent = nullptr; @@ -174,6 +175,7 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K event->setKernelForPrintf(kernel); } isHostSignalScopeEvent = event->isSignalScope(ZE_EVENT_SCOPE_FLAG_HOST); + isInOrderExecEvent = event->isInOrderExecEvent(); if (compactL3FlushEvent(getDcFlushRequired(event->isSignalScope()))) { compactEvent = event; event = nullptr; @@ -288,9 +290,10 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K }; bool inOrderExecSignalRequired = (this->inOrderExecutionEnabled && !launchParams.isKernelSplitOperation && useCounterAllocationForInOrderMode()); + bool inOrderNonWalkerSignalling = event && (isTimestampEvent || !isInOrderExecEvent); if (inOrderExecSignalRequired) { - if (isTimestampEvent) { + if (inOrderNonWalkerSignalling) { dispatchEventPostSyncOperation(event, Event::STATE_CLEARED, false, false, false, false); } else { dispatchKernelArgs.eventAddress = this->inOrderDependencyCounterAllocation->getGpuAddress() + this->inOrderAllocationOffset; @@ -316,7 +319,7 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K } } - if (inOrderExecSignalRequired && isTimestampEvent) { + if (inOrderExecSignalRequired && inOrderNonWalkerSignalling) { appendWaitOnSingleEvent(event, false); appendSignalInOrderDependencyCounter(); diff --git a/level_zero/core/source/event/event.cpp b/level_zero/core/source/event/event.cpp index a168b547a3..575396baaf 100644 --- a/level_zero/core/source/event/event.cpp +++ b/level_zero/core/source/event/event.cpp @@ -391,9 +391,7 @@ void Event::setIsCompleted() { unsetCmdQueue(true); } -void Event::enableInOrderExecMode(NEO::GraphicsAllocation &inOrderDependenciesAllocation, uint32_t signalValue, uint32_t allocationOffset) { - inOrderExecEvent = true; - +void Event::updateInOrderExecState(NEO::GraphicsAllocation &inOrderDependenciesAllocation, uint32_t signalValue, uint32_t allocationOffset) { inOrderExecSignalValue = signalValue; inOrderExecDataAllocation = &inOrderDependenciesAllocation; inOrderAllocationOffset = allocationOffset; diff --git a/level_zero/core/source/event/event.h b/level_zero/core/source/event/event.h index 974b499d5e..04c5d6c69b 100644 --- a/level_zero/core/source/event/event.h +++ b/level_zero/core/source/event/event.h @@ -215,7 +215,7 @@ struct Event : _ze_event_handle_t { void setMetricStreamer(MetricStreamer *metricStreamer) { this->metricStreamer = metricStreamer; } - void enableInOrderExecMode(NEO::GraphicsAllocation &inOrderDependenciesAllocation, uint32_t signalValue, uint32_t allocationOffset); + void updateInOrderExecState(NEO::GraphicsAllocation &inOrderDependenciesAllocation, uint32_t signalValue, uint32_t allocationOffset); bool isInOrderExecEvent() const { return inOrderExecEvent; } NEO::GraphicsAllocation *getInOrderExecDataAllocation() const { return inOrderExecDataAllocation; } uint32_t getInOrderExecSignalValue() const { return inOrderExecSignalValue; } diff --git a/level_zero/core/test/unit_tests/mocks/mock_event.h b/level_zero/core/test/unit_tests/mocks/mock_event.h index ed13640eb6..c39dc99f31 100644 --- a/level_zero/core/test/unit_tests/mocks/mock_event.h +++ b/level_zero/core/test/unit_tests/mocks/mock_event.h @@ -23,6 +23,7 @@ struct WhiteBox<::L0::Event> : public ::L0::Event { using BaseClass::Event; using BaseClass::gpuHangCheckPeriod; using BaseClass::hostAddress; + using BaseClass::inOrderExecEvent; using BaseClass::isFromIpcPool; using BaseClass::l3FlushAppliedOnKernel; using BaseClass::maxKernelCount; diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp index 9e133307cf..1890cd04f8 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp @@ -701,6 +701,8 @@ struct InOrderCmdListTests : public CommandListAppendLaunchKernel { for (uint32_t i = 0; i < numEvents; i++) { eventDesc.index = i; events.emplace_back(std::unique_ptr(static_cast(Event::create(eventPool.get(), &eventDesc, device)))); + EXPECT_FALSE(events.back()->inOrderExecEvent); + events.back()->inOrderExecEvent = true; } return eventPool; @@ -891,6 +893,7 @@ HWTEST2_F(InOrderCmdListTests, givenDebugFlagSetWhenEventHostSyncCalledThenCallW EXPECT_EQ(2u, ultCsr->waitUserFenecParams.callCount); // non in-order event + events[1]->inOrderExecEvent = false; events[1]->hostSynchronize(2); EXPECT_EQ(2u, ultCsr->waitUserFenecParams.callCount); } @@ -919,6 +922,20 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenResetEventCalledThenResetEven EXPECT_EQ(events[0]->inOrderAllocationOffset, 0u); } +HWTEST2_F(InOrderCmdListTests, givenInOrderModeWheUsingRegularEventThenDontSetInOrderParams, IsAtLeastSkl) { + auto immCmdList = createImmCmdList(); + + auto eventPool = createEvents(1, false); + events[0]->inOrderExecEvent = false; + + immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, events[0]->toHandle(), 0, nullptr, launchParams, false); + + EXPECT_FALSE(events[0]->inOrderExecEvent); + EXPECT_EQ(events[0]->inOrderExecSignalValue, 0u); + EXPECT_EQ(events[0]->inOrderExecDataAllocation, nullptr); + EXPECT_EQ(events[0]->inOrderAllocationOffset, 0u); +} + HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenSubmittingThenProgramSemaphoreForPreviousDispatch, IsAtLeastXeHpCore) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; @@ -960,6 +977,7 @@ HWTEST2_F(InOrderCmdListTests, givenDebugFlagSetWhenDispatchingSemaphoreThenProg auto eventPool = createEvents(1, false); auto eventHandle = events[0]->toHandle(); + events[0]->inOrderExecEvent = false; auto immCmdList = createImmCmdList(); @@ -996,6 +1014,7 @@ HWTEST2_F(InOrderCmdListTests, givenDebugFlagSetWhenDispatchingStoreDataImmThenP auto eventPool = createEvents(1, false); auto eventHandle = events[0]->toHandle(); + events[0]->inOrderExecEvent = false; auto immCmdList = createImmCmdList(); @@ -1039,6 +1058,7 @@ HWTEST2_F(InOrderCmdListTests, givenDebugFlagSetAsMaskWhenDispatchingStoreDataIm auto eventPool = createEvents(1, false); auto eventHandle = events[0]->toHandle(); + events[0]->inOrderExecEvent = false; auto immCmdList = createImmCmdList(); @@ -1456,6 +1476,62 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingTimestampEventThen EXPECT_EQ(1u, sdiCmd->getDataDword0()); } +HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingRegularEventThenClearAndChainWithSyncAllocSignaling, IsAtLeastXeHpCore) { + using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM; + using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; + using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER; + using POSTSYNC_DATA = typename FamilyType::POSTSYNC_DATA; + + auto immCmdList = createImmCmdList(); + + auto cmdStream = immCmdList->getCmdContainer().getCommandStream(); + + auto eventPool = createEvents(1, false); + events[0]->signalScope = 0; + events[0]->inOrderExecEvent = false; + + immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, events[0]->toHandle(), 0, nullptr, launchParams, false); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, cmdStream->getCpuBase(), cmdStream->getUsed())); + + auto sdiItor = find(cmdList.begin(), cmdList.end()); + ASSERT_NE(cmdList.end(), sdiItor); + + auto sdiCmd = genCmdCast(*sdiItor); + ASSERT_NE(nullptr, sdiCmd); + + EXPECT_EQ(events[0]->getCompletionFieldGpuAddress(device), sdiCmd->getAddress()); + EXPECT_EQ(0u, sdiCmd->getStoreQword()); + EXPECT_EQ(Event::STATE_CLEARED, sdiCmd->getDataDword0()); + + auto walkerItor = find(sdiItor, cmdList.end()); + ASSERT_NE(cmdList.end(), walkerItor); + + auto walkerCmd = genCmdCast(*walkerItor); + auto &postSync = walkerCmd->getPostSync(); + + auto eventBaseGpuVa = events[0]->getPacketAddress(device); + auto eventEndGpuVa = events[0]->getCompletionFieldGpuAddress(device); + + EXPECT_EQ(POSTSYNC_DATA::OPERATION_WRITE_IMMEDIATE_DATA, postSync.getOperation()); + EXPECT_EQ(eventBaseGpuVa, postSync.getDestinationAddress()); + + auto semaphoreCmd = genCmdCast(++walkerCmd); + ASSERT_NE(nullptr, semaphoreCmd); + + EXPECT_EQ(static_cast(Event::State::STATE_CLEARED), semaphoreCmd->getSemaphoreDataDword()); + EXPECT_EQ(eventEndGpuVa, semaphoreCmd->getSemaphoreGraphicsAddress()); + EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD, semaphoreCmd->getCompareOperation()); + + sdiCmd = genCmdCast(++semaphoreCmd); + ASSERT_NE(nullptr, sdiCmd); + + EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), sdiCmd->getAddress()); + EXPECT_EQ(0u, sdiCmd->getStoreQword()); + EXPECT_EQ(1u, sdiCmd->getDataDword0()); +} + HWTEST2_F(InOrderCmdListTests, givenHostVisibleEventOnLatestFlushWhenCallingSynchronizeThenUseInOrderSync, IsAtLeastSkl) { auto ultCsr = static_cast *>(device->getNEODevice()->getDefaultEngine().commandStreamReceiver); @@ -2199,6 +2275,43 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingAppendBarrierWitho EXPECT_EQ(0u, sdiCmd->getDataDword1()); } +HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingAppendBarrierWithoutWaitlistAndRegularEventThenSignalSyncAllocation, IsAtLeastXeHpCore) { + using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; + using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM; + + auto immCmdList = createImmCmdList(); + + auto cmdStream = immCmdList->getCmdContainer().getCommandStream(); + + immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false); + + EXPECT_EQ(1u, immCmdList->inOrderDependencyCounter); + + auto offset = cmdStream->getUsed(); + + auto eventPool = createEvents(1, false); + events[0]->inOrderExecEvent = false; + + auto eventHandle = events[0]->toHandle(); + + immCmdList->appendBarrier(eventHandle, 0, nullptr, false); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, + ptrOffset(cmdStream->getCpuBase(), offset), + (cmdStream->getUsed() - offset))); + + auto sdiItor = find(cmdList.begin(), cmdList.end()); + ASSERT_NE(cmdList.end(), sdiItor); + + auto sdiCmd = genCmdCast(*sdiItor); + + EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), sdiCmd->getAddress()); + EXPECT_EQ(0u, sdiCmd->getStoreQword()); + EXPECT_EQ(2u, sdiCmd->getDataDword0()); + EXPECT_EQ(0u, sdiCmd->getDataDword1()); +} + HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenCallingSyncThenHandleCompletion, IsAtLeastXeHpCore) { uint32_t counterOffset = 64; @@ -2872,6 +2985,7 @@ HWTEST2_F(InOrderRegularCmdListTests, givenInOrderModeWhenDispatchingRegularCmdL auto eventPool = createEvents(1, true); auto eventHandle = events[0]->toHandle(); + events[0]->inOrderExecEvent = false; auto regularCmdList = createRegularCmdList(false); auto regularCopyOnlyCmdList = createRegularCmdList(true); diff --git a/level_zero/core/test/unit_tests/sources/event/test_event.cpp b/level_zero/core/test/unit_tests/sources/event/test_event.cpp index 5b2cd9ea0e..5ebe6a1b69 100644 --- a/level_zero/core/test/unit_tests/sources/event/test_event.cpp +++ b/level_zero/core/test/unit_tests/sources/event/test_event.cpp @@ -3182,7 +3182,8 @@ HWTEST_F(EventTests, givenInOrderEventWhenHostEventSyncThenExpectDownloadEventAl NEO::MockGraphicsAllocation syncAllocation(&storage, sizeof(storage)); - event->enableInOrderExecMode(syncAllocation, 1, 0); + event->inOrderExecEvent = true; + event->updateInOrderExecState(syncAllocation, 1, 0); constexpr uint64_t timeout = std::numeric_limits::max(); auto result = event->hostSynchronize(timeout);