From e66fce10b5a9fe225ed771b12cfaf1b3632b92ea Mon Sep 17 00:00:00 2001 From: Compute-Runtime-Validation Date: Tue, 14 Jan 2025 01:26:36 +0100 Subject: [PATCH] Revert "performance: Signal inOrder counter with pipe control, part 6" This reverts commit fa40c1f2fb3ae8d3324cf0391c6d8fdb52057a12. Signed-off-by: Compute-Runtime-Validation --- level_zero/core/source/cmdlist/cmdlist_hw.inl | 2 +- .../cmdlist/cmdlist_hw_xehp_and_later.inl | 6 +- level_zero/core/source/event/event_impl.inl | 28 +---- .../cmdlist/test_cmdlist_xehp_and_later.cpp | 37 +++++- .../cmdlist/test_in_order_cmdlist_1.cpp | 91 +++++--------- .../cmdlist/test_in_order_cmdlist_2.cpp | 119 +++++++++++++++++- 6 files changed, 197 insertions(+), 86 deletions(-) diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index 5ccebfc668..701cb18622 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -428,7 +428,7 @@ ze_result_t CommandListCoreFamily::appendLaunchKernel(ze_kernel_h event, launchParams); if (!launchParams.skipInOrderNonWalkerSignaling) { - handleInOrderDependencyCounter(event, isInOrderNonWalkerSignalingRequired(event) && !(event && event->isCounterBased() && event->isUsingContextEndOffset()), false); + handleInOrderDependencyCounter(event, isInOrderNonWalkerSignalingRequired(event), false); } appendSynchronizedDispatchCleanupSection(); diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl index bd27a25aa6..8d32f32ab6 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl @@ -304,9 +304,9 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K if (!eventForInOrderExec->getAllocation(this->device) && Event::standaloneInOrderTimestampAllocationEnabled()) { eventForInOrderExec->resetInOrderTimestampNode(device->getInOrderTimestampAllocator()->getTag()); } - if ((!compactEvent && this->heaplessModeEnabled) || this->asMutable() || !eventForInOrderExec->isCounterBased()) { + if (!compactEvent || this->asMutable() || !compactEvent->isCounterBased()) { dispatchEventPostSyncOperation(eventForInOrderExec, nullptr, launchParams.outListCommands, Event::STATE_CLEARED, false, false, false, false, false); - } else if (compactEvent) { + } else { eventAddress = eventForInOrderExec->getPacketAddress(this->device); isTimestampEvent = true; if (!launchParams.omitAddingEventResidency) { @@ -415,7 +415,7 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K if (inOrderExecSignalRequired) { if (inOrderNonWalkerSignalling) { - if (!launchParams.skipInOrderNonWalkerSignaling && (!(eventForInOrderExec->isCounterBased() && eventForInOrderExec->isUsingContextEndOffset()) || this->asMutable())) { + if (!launchParams.skipInOrderNonWalkerSignaling) { if (compactEvent && (compactEvent->isCounterBased() && !this->asMutable())) { auto pcCmdPtr = this->commandContainer.getCommandStream()->getSpace(0u); inOrderCounterValue = this->inOrderExecInfo->getCounterValue() + getInOrderIncrementValue(); diff --git a/level_zero/core/source/event/event_impl.inl b/level_zero/core/source/event/event_impl.inl index 3b9339b822..c4edbeb82b 100644 --- a/level_zero/core/source/event/event_impl.inl +++ b/level_zero/core/source/event/event_impl.inl @@ -665,7 +665,7 @@ ze_result_t EventImp::hostSynchronize(uint64_t timeout) { } TaskCountType taskCountToWaitForL3Flush = 0; - if (((this->isCounterBased() && this->inOrderTimestampNode) || this->mitigateHostVisibleSignal) && this->device->getProductHelper().isDcFlushAllowed()) { + if (this->mitigateHostVisibleSignal && this->device->getProductHelper().isDcFlushAllowed()) { auto lock = this->csrs[0]->obtainUniqueOwnership(); this->csrs[0]->flushTagUpdate(); taskCountToWaitForL3Flush = this->csrs[0]->peekLatestFlushedTaskCount(); @@ -677,19 +677,10 @@ ze_result_t EventImp::hostSynchronize(uint64_t timeout) { const bool fenceWait = isKmdWaitModeEnabled() && isCounterBased() && csrs[0]->waitUserFenceSupported(); do { - if (this->isCounterBased() && this->inOrderTimestampNode) { - synchronizeTimestampCompletionWithTimeout(); - if (this->isTimestampPopulated()) { - inOrderExecInfo->setLastWaitedCounterValue(getInOrderExecSignalValueWithSubmissionCounter()); - handleSuccessfulHostSynchronization(); - ret = ZE_RESULT_SUCCESS; - } + if (fenceWait) { + ret = waitForUserFence(timeout); } else { - if (fenceWait) { - ret = waitForUserFence(timeout); - } else { - ret = queryStatus(); - } + ret = queryStatus(); } if (ret == ZE_RESULT_SUCCESS) { if (this->getKernelWithPrintfDeviceMutex() != nullptr) { @@ -799,10 +790,8 @@ template ze_result_t EventImp::queryKernelTimestamp(ze_kernel_timestamp_result_t *dstptr) { ze_kernel_timestamp_result_t &result = *dstptr; - if (!this->isCounterBased() || !this->inOrderTimestampNode) { - if (queryStatus() != ZE_RESULT_SUCCESS) { - return ZE_RESULT_NOT_READY; - } + if (queryStatus() != ZE_RESULT_SUCCESS) { + return ZE_RESULT_NOT_READY; } assignKernelEventCompletionData(getHostAddress()); @@ -810,11 +799,6 @@ ze_result_t EventImp::queryKernelTimestamp(ze_kernel_timestamp_result_ if (!isTimestampPopulated()) { synchronizeTimestampCompletionWithTimeout(); - if (this->inOrderTimestampNode) { - if (!isTimestampPopulated()) { - return ZE_RESULT_NOT_READY; - } - } } auto eventTsSetFunc = [&](uint64_t ×tampFieldToCopy, uint64_t ×tampFieldForWriting) { diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xehp_and_later.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xehp_and_later.cpp index 10cb518537..1bd695c385 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xehp_and_later.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xehp_and_later.cpp @@ -2802,6 +2802,8 @@ HWTEST2_F(CommandListAppendLaunchKernel, HWTEST2_F(CommandListAppendLaunchKernel, givenInOrderCmdListAndTimeStampEventWhenAppendingKernelAndEventWithOutCmdListSetThenStoreStoreDataImmClearAndSemapohreWaitPostSyncCommands, IsAtLeastXeHpCore) { + using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; + using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM; using WalkerVariant = typename FamilyType::WalkerVariant; Mock<::L0::KernelImp> kernel; @@ -2812,6 +2814,8 @@ HWTEST2_F(CommandListAppendLaunchKernel, auto result = commandList->initialize(device, NEO::EngineGroupType::compute, ZE_COMMAND_LIST_FLAG_IN_ORDER); ASSERT_EQ(ZE_RESULT_SUCCESS, result); + bool heapless = commandList->isHeaplessModeEnabled(); + auto &commandContainer = commandList->getCmdContainer(); auto cmdStream = commandContainer.getCommandStream(); @@ -2845,7 +2849,29 @@ HWTEST2_F(CommandListAppendLaunchKernel, ptrOffset(cmdStream->getCpuBase(), commandStreamOffset), cmdStream->getUsed() - commandStreamOffset)); - ASSERT_EQ(0u, outCbEventCmds.size()); + auto eventCompletionAddress = event->getCompletionFieldGpuAddress(device); + + ASSERT_EQ(heapless ? 0u : 2u, outCbEventCmds.size()); + size_t expectedSdi = heapless ? 0 : commandList->inOrderAtomicSignalingEnabled ? 1 + : 2; + + auto storeDataImmList = findAll(cmdList.begin(), cmdList.end()); + ASSERT_EQ(expectedSdi, storeDataImmList.size()); + auto computeWalkerList = NEO::UnitTestHelper::findAllWalkerTypeCmds(cmdList.begin(), cmdList.end()); + ASSERT_EQ(1u, computeWalkerList.size()); + auto semaphoreWaitList = findAll(cmdList.begin(), cmdList.end()); + ASSERT_EQ(heapless ? 0u : 1u, semaphoreWaitList.size()); + + if (!heapless) { + EXPECT_EQ(CommandToPatch::CbEventTimestampClearStoreDataImm, outCbEventCmds[0].type); + EXPECT_EQ(*storeDataImmList[0], outCbEventCmds[0].pDestination); + auto storeDataImmCmd = genCmdCast(outCbEventCmds[0].pDestination); + ASSERT_NE(nullptr, storeDataImmCmd); + EXPECT_EQ(eventCompletionAddress, storeDataImmCmd->getAddress()); + } + EXPECT_EQ(launchParams.outWalker, *computeWalkerList[0]); + + ASSERT_NE(nullptr, launchParams.outWalker); auto eventBaseAddress = event->getGpuAddress(device); WalkerVariant walkerVariant = NEO::UnitTestHelper::getWalkerVariant(launchParams.outWalker); @@ -2858,6 +2884,15 @@ HWTEST2_F(CommandListAppendLaunchKernel, } }, walkerVariant); + + if (!heapless) { + + EXPECT_EQ(CommandToPatch::CbEventTimestampPostSyncSemaphoreWait, outCbEventCmds[1].type); + EXPECT_EQ(*semaphoreWaitList[0], outCbEventCmds[1].pDestination); + auto semaphoreWaitCmd = genCmdCast(outCbEventCmds[1].pDestination); + ASSERT_NE(nullptr, semaphoreWaitCmd); + EXPECT_EQ(eventCompletionAddress, semaphoreWaitCmd->getSemaphoreGraphicsAddress()); + } } HWTEST2_F(CommandListAppendLaunchKernel, diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_1.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_1.cpp index d14e70e4f5..58ca0f6606 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_1.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_1.cpp @@ -2616,34 +2616,42 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingTimestampEventThen GenCmdList cmdList; ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(cmdList, cmdStream->getCpuBase(), cmdStream->getUsed())); - if (immCmdList->isHeaplessModeEnabled()) { - auto sdiItor = find(cmdList.begin(), cmdList.end()); - ASSERT_NE(cmdList.end(), sdiItor); + auto sdiItor = find(cmdList.begin(), cmdList.end()); + ASSERT_NE(cmdList.end(), sdiItor); - auto sdiCmd = genCmdCast(*sdiItor); - ASSERT_NE(nullptr, sdiCmd); + auto sdiCmd = genCmdCast(*sdiItor); + ASSERT_NE(nullptr, sdiCmd); - EXPECT_EQ(events[0]->getCompletionFieldGpuAddress(device), sdiCmd->getAddress()); - EXPECT_EQ(0u, sdiCmd->getStoreQword()); - EXPECT_EQ(Event::STATE_CLEARED, sdiCmd->getDataDword0()); - } + EXPECT_EQ(events[0]->getCompletionFieldGpuAddress(device), sdiCmd->getAddress()); + EXPECT_EQ(0u, sdiCmd->getStoreQword()); + EXPECT_EQ(Event::STATE_CLEARED, sdiCmd->getDataDword0()); auto eventBaseGpuVa = events[0]->getPacketAddress(device); + auto eventEndGpuVa = events[0]->getCompletionFieldGpuAddress(device); - auto walkerItor = NEO::UnitTestHelper::findWalkerTypeCmd(cmdList.begin(), cmdList.end()); + auto walkerItor = NEO::UnitTestHelper::findWalkerTypeCmd(sdiItor, cmdList.end()); ASSERT_NE(cmdList.end(), walkerItor); WalkerVariant walkerVariant = NEO::UnitTestHelper::getWalkerVariant(*walkerItor); - std::visit([eventBaseGpuVa](auto &&walker) { + std::visit([eventBaseGpuVa, eventEndGpuVa, &immCmdList, &sdiCmd](auto &&walker) { auto &postSync = walker->getPostSync(); using PostSyncType = std::decay_t; EXPECT_EQ(PostSyncType::OPERATION::OPERATION_WRITE_TIMESTAMP, postSync.getOperation()); EXPECT_EQ(eventBaseGpuVa, postSync.getDestinationAddress()); auto semaphoreCmd = genCmdCast(++walker); - ASSERT_EQ(nullptr, semaphoreCmd); - auto sdiCmd = genCmdCast(walker); - ASSERT_EQ(nullptr, sdiCmd); + ASSERT_NE(nullptr, semaphoreCmd); + + EXPECT_EQ(static_cast(Event::State::STATE_CLEARED), semaphoreCmd->getSemaphoreDataDword()); + EXPECT_EQ(eventEndGpuVa, semaphoreCmd->getSemaphoreGraphicsAddress()); + EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD, semaphoreCmd->getCompareOperation()); + + sdiCmd = genCmdCast(++semaphoreCmd); + ASSERT_NE(nullptr, sdiCmd); + + EXPECT_EQ(immCmdList->inOrderExecInfo->getBaseDeviceAddress(), sdiCmd->getAddress()); + EXPECT_EQ(immCmdList->isQwordInOrderCounter(), sdiCmd->getStoreQword()); + EXPECT_EQ(1u, sdiCmd->getDataDword0()); }, walkerVariant); } @@ -2752,23 +2760,21 @@ HWTEST2_F(InOrderCmdListTests, givenRelaxedOrderingWhenProgrammingTimestampEvent GenCmdList cmdList; ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(cmdList, cmdStream->getCpuBase(), immCmdList->flushData[1])); - if (immCmdList->isHeaplessModeEnabled()) { - auto sdiItor = find(cmdList.begin(), cmdList.end()); - ASSERT_NE(cmdList.end(), sdiItor); + auto sdiItor = find(cmdList.begin(), cmdList.end()); + ASSERT_NE(cmdList.end(), sdiItor); - auto sdiCmd = genCmdCast(*sdiItor); - ASSERT_NE(nullptr, sdiCmd); + auto sdiCmd = genCmdCast(*sdiItor); + ASSERT_NE(nullptr, sdiCmd); - EXPECT_EQ(events[0]->getCompletionFieldGpuAddress(device), sdiCmd->getAddress()); - EXPECT_EQ(0u, sdiCmd->getStoreQword()); - EXPECT_EQ(Event::STATE_CLEARED, sdiCmd->getDataDword0()); + EXPECT_EQ(events[0]->getCompletionFieldGpuAddress(device), sdiCmd->getAddress()); + EXPECT_EQ(0u, sdiCmd->getStoreQword()); + EXPECT_EQ(Event::STATE_CLEARED, sdiCmd->getDataDword0()); - auto sdiOffset = ptrDiff(sdiCmd, cmdStream->getCpuBase()); - EXPECT_TRUE(sdiOffset >= immCmdList->flushData[0]); - EXPECT_TRUE(sdiOffset < immCmdList->flushData[1]); - } + auto sdiOffset = ptrDiff(sdiCmd, cmdStream->getCpuBase()); + EXPECT_TRUE(sdiOffset >= immCmdList->flushData[0]); + EXPECT_TRUE(sdiOffset < immCmdList->flushData[1]); - auto walkerItor = NEO::UnitTestHelper::findWalkerTypeCmd(cmdList.begin(), cmdList.end()); + auto walkerItor = NEO::UnitTestHelper::findWalkerTypeCmd(sdiItor, cmdList.end()); ASSERT_NE(cmdList.end(), walkerItor); auto eventBaseGpuVa = events[0]->getPacketAddress(device); @@ -5294,37 +5300,6 @@ HWTEST2_F(InOrderCmdListTests, givenMitigateHostVisibleSignalWhenCallingSynchron context->freeMem(hostAddress); } -HWTEST2_F(InOrderCmdListTests, givenCounterBasedTimestampHostVisibleSignalWhenCallingSynchronizeOnCbEventThenFlushDcIfSupported, MatchAny) { - auto ultCsr = static_cast *>(device->getNEODevice()->getDefaultEngine().commandStreamReceiver); - - zex_counter_based_event_desc_t counterBasedDesc = {ZEX_STRUCTURE_COUNTER_BASED_EVENT_DESC}; // NOLINT(clang-analyzer-optin.core.EnumCastOutOfRange), NEO-12901 - counterBasedDesc.flags = ZEX_COUNTER_BASED_EVENT_FLAG_KERNEL_TIMESTAMP | ZEX_COUNTER_BASED_EVENT_FLAG_HOST_VISIBLE; - counterBasedDesc.signalScope = ZE_EVENT_SCOPE_FLAG_HOST; - - ze_event_handle_t handle = nullptr; - EXPECT_EQ(ZE_RESULT_SUCCESS, zexCounterBasedEventCreate2(context, device, &counterBasedDesc, &handle)); - - auto immCmdList = createImmCmdList(); - immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, handle, 0, nullptr, launchParams, false); - - EXPECT_FALSE(ultCsr->waitForTaskCountCalled); - EXPECT_FALSE(ultCsr->flushTagUpdateCalled); - - auto eventObj = Event::fromHandle(handle); - *static_cast(ptrOffset(eventObj->getHostAddress(), eventObj->getContextEndOffset())) = Event::State::STATE_SIGNALED; - EXPECT_EQ(ZE_RESULT_SUCCESS, eventObj->hostSynchronize(-1)); - - if (device->getProductHelper().isDcFlushAllowed()) { - EXPECT_TRUE(ultCsr->waitForTaskCountCalled); - EXPECT_TRUE(ultCsr->flushTagUpdateCalled); - } else { - EXPECT_FALSE(ultCsr->waitForTaskCountCalled); - EXPECT_FALSE(ultCsr->flushTagUpdateCalled); - } - - zeEventDestroy(handle); -} - HWTEST2_F(InOrderCmdListTests, givenStandaloneCbEventWhenPassingExternalInterruptIdThenAssign, MatchAny) { zex_intel_event_sync_mode_exp_desc_t syncModeDesc = {ZEX_INTEL_STRUCTURE_TYPE_EVENT_SYNC_MODE_EXP_DESC}; // NOLINT(clang-analyzer-optin.core.EnumCastOutOfRange), NEO-12901 syncModeDesc.externalInterruptId = 123; diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_2.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_2.cpp index dfa3c56a6b..71d315f984 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_2.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_2.cpp @@ -1912,7 +1912,6 @@ HWTEST2_F(StandaloneInOrderTimestampAllocationTests, givenTimestampEventWhenDisp auto node2 = events[0]->inOrderTimestampNode; - *static_cast(ptrOffset(events[0]->inOrderTimestampNode->getCpuBase(), events[0]->getContextEndOffset())) = Event::State::STATE_SIGNALED; auto hostAddress = cmdList->inOrderExecInfo->getBaseHostAddress(); *hostAddress = 3; @@ -3123,6 +3122,124 @@ HWTEST2_F(MultiTileInOrderCmdListTests, givenMultiTileInOrderModeWhenCallingSync EXPECT_EQ(ZE_RESULT_SUCCESS, events[0]->hostSynchronize(0)); } +HWTEST2_F(MultiTileInOrderCmdListTests, givenMultiTileInOrderModeWhenProgrammingTimestampEventThenHandleChaining, IsAtLeastXeHpCore) { + using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; + + auto immCmdList = createMultiTileImmCmdList(); + + auto cmdStream = immCmdList->getCmdContainer().getCommandStream(); + + auto eventPool = createEvents(1, true); + auto eventHandle = events[0]->toHandle(); + events[0]->signalScope = 0; + + bool inOrderExecSignalRequired = (immCmdList->isInOrderExecutionEnabled() && !launchParams.isKernelSplitOperation && !launchParams.pipeControlSignalling); + bool inOrderNonWalkerSignalling = immCmdList->isInOrderNonWalkerSignalingRequired(events[0].get()); + + if (!inOrderExecSignalRequired || !inOrderNonWalkerSignalling) { + GTEST_SKIP(); + } + + immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, eventHandle, 0, nullptr, launchParams, false); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(cmdList, + cmdStream->getCpuBase(), + cmdStream->getUsed())); + + auto walkerItor = NEO::UnitTestHelper::findWalkerTypeCmd(cmdList.begin(), cmdList.end()); + ASSERT_NE(cmdList.end(), walkerItor); + + auto semaphoreItor = find(walkerItor, cmdList.end()); + ASSERT_NE(cmdList.end(), semaphoreItor); + + auto semaphoreCmd = genCmdCast(*(semaphoreItor)); + ASSERT_NE(nullptr, semaphoreCmd); + + auto eventEndGpuVa = events[0]->getCompletionFieldGpuAddress(device); + + if (eventEndGpuVa != semaphoreCmd->getSemaphoreGraphicsAddress()) { + semaphoreItor = find(++semaphoreItor, cmdList.end()); + ASSERT_NE(cmdList.end(), semaphoreItor); + + semaphoreCmd = genCmdCast(*(semaphoreItor)); + ASSERT_NE(nullptr, semaphoreCmd); + } + + EXPECT_EQ(static_cast(Event::State::STATE_CLEARED), semaphoreCmd->getSemaphoreDataDword()); + EXPECT_EQ(eventEndGpuVa, semaphoreCmd->getSemaphoreGraphicsAddress()); + + semaphoreCmd = genCmdCast(++semaphoreCmd); + EXPECT_EQ(static_cast(Event::State::STATE_CLEARED), semaphoreCmd->getSemaphoreDataDword()); + EXPECT_EQ(eventEndGpuVa + events[0]->getSinglePacketSize(), semaphoreCmd->getSemaphoreGraphicsAddress()); +} + +HWTEST2_F(MultiTileInOrderCmdListTests, givenMultiTileInOrderModeWhenProgrammingTimestampEventThenHandlePacketsChaining, IsAtLeastXeHpCore) { + using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; + + auto immCmdList = createMultiTileImmCmdList(); + + auto cmdStream = immCmdList->getCmdContainer().getCommandStream(); + + auto eventPool = createEvents(1, true); + auto eventHandle = events[0]->toHandle(); + events[0]->signalScope = 0; + + immCmdList->signalAllEventPackets = true; + events[0]->maxPacketCount = 4; + + bool inOrderExecSignalRequired = (immCmdList->isInOrderExecutionEnabled() && !launchParams.isKernelSplitOperation && !launchParams.pipeControlSignalling); + bool inOrderNonWalkerSignalling = immCmdList->isInOrderNonWalkerSignalingRequired(events[0].get()); + + if (!inOrderExecSignalRequired || !inOrderNonWalkerSignalling) { + GTEST_SKIP(); + } + + immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, eventHandle, 0, nullptr, launchParams, false); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(cmdList, + cmdStream->getCpuBase(), + cmdStream->getUsed())); + + auto walkerItor = NEO::UnitTestHelper::findWalkerTypeCmd(cmdList.begin(), cmdList.end()); + ASSERT_NE(cmdList.end(), walkerItor); + + auto semaphoreItor = find(walkerItor, cmdList.end()); + ASSERT_NE(cmdList.end(), semaphoreItor); + + auto semaphoreCmd = genCmdCast(*(semaphoreItor)); + ASSERT_NE(nullptr, semaphoreCmd); + + auto eventEndGpuVa = events[0]->getCompletionFieldGpuAddress(device); + + if (eventEndGpuVa != semaphoreCmd->getSemaphoreGraphicsAddress()) { + semaphoreItor = find(++semaphoreItor, cmdList.end()); + ASSERT_NE(cmdList.end(), semaphoreItor); + + semaphoreCmd = genCmdCast(*(semaphoreItor)); + ASSERT_NE(nullptr, semaphoreCmd); + } + + EXPECT_EQ(static_cast(Event::State::STATE_CLEARED), semaphoreCmd->getSemaphoreDataDword()); + EXPECT_EQ(eventEndGpuVa, semaphoreCmd->getSemaphoreGraphicsAddress()); + + semaphoreCmd = genCmdCast(++semaphoreCmd); + auto offset = events[0]->getSinglePacketSize(); + EXPECT_EQ(static_cast(Event::State::STATE_CLEARED), semaphoreCmd->getSemaphoreDataDword()); + EXPECT_EQ(eventEndGpuVa + offset, semaphoreCmd->getSemaphoreGraphicsAddress()); + + semaphoreCmd = genCmdCast(++semaphoreCmd); + offset += events[0]->getSinglePacketSize(); + EXPECT_EQ(static_cast(Event::State::STATE_CLEARED), semaphoreCmd->getSemaphoreDataDword()); + EXPECT_EQ(eventEndGpuVa + offset, semaphoreCmd->getSemaphoreGraphicsAddress()); + + semaphoreCmd = genCmdCast(++semaphoreCmd); + offset += events[0]->getSinglePacketSize(); + EXPECT_EQ(static_cast(Event::State::STATE_CLEARED), semaphoreCmd->getSemaphoreDataDword()); + EXPECT_EQ(eventEndGpuVa + offset, semaphoreCmd->getSemaphoreGraphicsAddress()); +} + HWTEST2_F(MultiTileInOrderCmdListTests, whenUsingRegularCmdListThenAddWalkerToPatch, IsAtLeastXeHpCore) { using WalkerVariant = typename FamilyType::WalkerVariant;