From ff252d870664ab3b2f79818d0eaae64cca33158e Mon Sep 17 00:00:00 2001 From: Zbigniew Zdanowicz Date: Mon, 12 Dec 2022 10:53:29 +0000 Subject: [PATCH] Fix issue in signal all event packets 4/n This commit fixes two issues for signal event API call Fix one is for signal event call to reposition signaling commands before stalling post sync command. Second fix is to make sure copy command lists also support signal all packets. Related-To: NEO-7490 Signed-off-by: Zbigniew Zdanowicz --- level_zero/core/source/cmdlist/cmdlist_hw.inl | 13 +- .../cmdlist/test_cmdlist_xehp_and_later.cpp | 135 +++++++++++++++--- 2 files changed, 121 insertions(+), 27 deletions(-) diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index 219d92cc7b..950c2a83b0 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -2007,6 +2007,10 @@ ze_result_t CommandListCoreFamily::appendSignalEvent(ze_event_han args.commandWithPostSync = true; NEO::EncodeMiFlushDW::programMiFlushDw(*commandContainer.getCommandStream(), ptrOffset(baseAddr, eventSignalOffset), Event::STATE_SIGNALED, args, hwInfo); + + if (this->signalAllEventPackets && (event->getPacketsInUse() < event->getMaxPacketsCount())) { + setRemainingEventPackets(event, Event::STATE_SIGNALED); + } } else { NEO::PipeControlArgs args; bool applyScope = !!event->signalScope; @@ -2015,6 +2019,11 @@ ze_result_t CommandListCoreFamily::appendSignalEvent(ze_event_han event->setPacketsInUse(this->partitionCount); args.workloadPartitionOffset = true; } + + if (this->signalAllEventPackets && (event->getPacketsInUse() < event->getMaxPacketsCount())) { + setRemainingEventPackets(event, Event::STATE_SIGNALED); + } + if (applyScope || event->isEventTimestampFlagSet()) { NEO::MemorySynchronizationCommands::addBarrierWithPostSyncOperation( *commandContainer.getCommandStream(), @@ -2034,10 +2043,6 @@ ze_result_t CommandListCoreFamily::appendSignalEvent(ze_event_han } } - if (this->signalAllEventPackets) { - setRemainingEventPackets(event, Event::STATE_SIGNALED); - } - if (NEO::DebugManager.flags.EnableSWTags.get()) { neoDevice->getRootDeviceEnvironment().tagsManager->insertTag( *commandContainer.getCommandStream(), diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xehp_and_later.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xehp_and_later.cpp index 5507987c63..f21702335e 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xehp_and_later.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xehp_and_later.cpp @@ -737,6 +737,7 @@ struct CommandListSignalAllEventPacketFixture : public ModuleFixture { void testAppendSignalEvent(ze_event_pool_flags_t eventPoolFlags) { using FamilyType = typename NEO::GfxFamilyMapper::GfxFamily; using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM; + using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW; auto commandList = std::make_unique>>(); auto engineType = copyOnly == 1 ? NEO::EngineGroupType::Copy : NEO::EngineGroupType::Compute; @@ -769,38 +770,94 @@ struct CommandListSignalAllEventPacketFixture : public ModuleFixture { ptrOffset(cmdStream->getCpuBase(), sizeBefore), (sizeAfter - sizeBefore))); - auto itorStoreDataImm = findAll(cmdList.begin(), cmdList.end()); + if constexpr (copyOnly == 1) { + uint32_t flushCmdWaFactor = 1; + if (EncodeMiFlushDW::getMiFlushDwWaSize() > 0) { + flushCmdWaFactor++; + } - uint32_t extraSignalStoreDataImm = 0; - if (eventPoolFlags == 0) { - extraSignalStoreDataImm = 1; // used packet reset for "non-TS, non-signal scope on DC Flush platforms" events performed by SDI command, other resets are via PIPE_CONTROL w/postsync - } + auto itorFlushDw = findAll(cmdList.begin(), cmdList.end()); - if constexpr (limitEventPacketes == 1) { - ASSERT_EQ(extraSignalStoreDataImm, itorStoreDataImm.size()); - } else { - uint32_t packetUsed = event->getPacketsInUse(); - uint32_t remainingPackets = event->getMaxPacketsCount() - packetUsed; - remainingPackets /= commandList->partitionCount; - ASSERT_EQ(remainingPackets + extraSignalStoreDataImm, static_cast(itorStoreDataImm.size())); + uint32_t expectedFlushDw = event->getMaxPacketsCount() * flushCmdWaFactor; + ASSERT_EQ(expectedFlushDw, itorFlushDw.size()); uint64_t gpuAddress = event->getGpuAddress(device); - gpuAddress += (packetUsed * event->getSinglePacketSize()); if (event->isUsingContextEndOffset()) { gpuAddress += event->getContextEndOffset(); } - for (uint32_t i = extraSignalStoreDataImm; i < itorStoreDataImm.size(); i++) { - auto cmd = genCmdCast(*itorStoreDataImm[i]); - EXPECT_EQ(gpuAddress, cmd->getAddress()); - EXPECT_FALSE(cmd->getStoreQword()); - EXPECT_EQ(Event::STATE_SIGNALED, cmd->getDataDword0()); - if constexpr (multiTile == 1) { - EXPECT_TRUE(cmd->getWorkloadPartitionIdOffsetEnable()); - } else { - EXPECT_FALSE(cmd->getWorkloadPartitionIdOffsetEnable()); + for (uint32_t i = 0; i < expectedFlushDw; i++) { + auto cmd = genCmdCast(*itorFlushDw[i]); + if (flushCmdWaFactor == 2) { + // even flush commands are WAs + if ((i & 1) == 0) { + continue; + } + } + EXPECT_EQ(gpuAddress, cmd->getDestinationAddress()); + EXPECT_EQ(Event::STATE_SIGNALED, cmd->getImmediateData()); + gpuAddress += event->getSinglePacketSize(); + } + + } else { + auto itorStoreDataImm = findAll(cmdList.begin(), cmdList.end()); + + uint64_t gpuAddress = event->getGpuAddress(device); + if (event->isUsingContextEndOffset()) { + gpuAddress += event->getContextEndOffset(); + } + + uint32_t extraSignalStoreDataImm = 0; + if (eventPoolFlags == 0) { + extraSignalStoreDataImm = 1; // used packet reset for "non-TS, non-signal scope on DC Flush platforms" events performed by SDI command, other resets are via PIPE_CONTROL w/postsync + } + + if constexpr (limitEventPacketes == 1) { + ASSERT_EQ(extraSignalStoreDataImm, itorStoreDataImm.size()); + if (extraSignalStoreDataImm == 1) { + auto cmd = genCmdCast(*itorStoreDataImm[0]); + EXPECT_EQ(gpuAddress, cmd->getAddress()); + EXPECT_FALSE(cmd->getStoreQword()); + EXPECT_EQ(Event::STATE_SIGNALED, cmd->getDataDword0()); + if constexpr (multiTile == 1) { + EXPECT_TRUE(cmd->getWorkloadPartitionIdOffsetEnable()); + } else { + EXPECT_FALSE(cmd->getWorkloadPartitionIdOffsetEnable()); + } + } + } else { + uint32_t packetUsed = event->getPacketsInUse(); + uint32_t remainingPackets = event->getMaxPacketsCount() - packetUsed; + EXPECT_EQ(0u, remainingPackets % commandList->partitionCount); + remainingPackets /= commandList->partitionCount; + ASSERT_EQ(remainingPackets + extraSignalStoreDataImm, static_cast(itorStoreDataImm.size())); + + if (extraSignalStoreDataImm == 1) { + auto cmd = genCmdCast(*itorStoreDataImm[itorStoreDataImm.size() - 1]); + EXPECT_EQ(gpuAddress, cmd->getAddress()); + EXPECT_FALSE(cmd->getStoreQword()); + EXPECT_EQ(Event::STATE_SIGNALED, cmd->getDataDword0()); + if constexpr (multiTile == 1) { + EXPECT_TRUE(cmd->getWorkloadPartitionIdOffsetEnable()); + } else { + EXPECT_FALSE(cmd->getWorkloadPartitionIdOffsetEnable()); + } + } + + gpuAddress += (packetUsed * event->getSinglePacketSize()); + + for (uint32_t i = 0; i < itorStoreDataImm.size() - extraSignalStoreDataImm; i++) { + auto cmd = genCmdCast(*itorStoreDataImm[i]); + EXPECT_EQ(gpuAddress, cmd->getAddress()); + EXPECT_FALSE(cmd->getStoreQword()); + EXPECT_EQ(Event::STATE_SIGNALED, cmd->getDataDword0()); + if constexpr (multiTile == 1) { + EXPECT_TRUE(cmd->getWorkloadPartitionIdOffsetEnable()); + } else { + EXPECT_FALSE(cmd->getWorkloadPartitionIdOffsetEnable()); + } + gpuAddress += (event->getSinglePacketSize() * commandList->partitionCount); } - gpuAddress += (event->getSinglePacketSize() * commandList->partitionCount); } } } @@ -1344,6 +1401,14 @@ HWTEST2_F(MultiTileCommandListSignalAllEventPacketForCompactEventTest, givenSign } using CopyCommandListSignalAllEventPacketTest = Test>; +HWTEST2_F(CopyCommandListSignalAllEventPacketTest, givenSignalPacketsTimestampEventWhenAppendSignalEventThenAllPacketCompletionDispatched, IsAtLeastXeHpCore) { + testAppendSignalEvent(ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP); +} + +HWTEST2_F(CopyCommandListSignalAllEventPacketTest, givenSignalPacketsImmediateEventWhenAppendSignalEventThenAllPacketCompletionDispatched, IsAtLeastXeHpCore) { + testAppendSignalEvent(0); +} + HWTEST2_F(CopyCommandListSignalAllEventPacketTest, givenSignalPacketsTimestampEventWhenAppendResetEventThenAllPacketResetDispatched, IsAtLeastXeHpCore) { testAppendResetEvent(ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP); } @@ -1353,6 +1418,14 @@ HWTEST2_F(CopyCommandListSignalAllEventPacketTest, givenSignalPacketsImmediateEv } using MultiTileCopyCommandListSignalAllEventPacketTest = Test>; +HWTEST2_F(MultiTileCopyCommandListSignalAllEventPacketTest, givenSignalPacketsTimestampEventWhenAppendSignalEventThenAllPacketCompletionDispatched, IsAtLeastXeHpCore) { + testAppendSignalEvent(ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP); +} + +HWTEST2_F(MultiTileCopyCommandListSignalAllEventPacketTest, givenSignalPacketsImmediateEventWhenAppendSignalEventThenAllPacketCompletionDispatched, IsAtLeastXeHpCore) { + testAppendSignalEvent(0); +} + HWTEST2_F(MultiTileCopyCommandListSignalAllEventPacketTest, givenSignalPacketsTimestampEventWhenAppendResetEventThenAllPacketResetDispatched, IsAtLeastXeHpCore) { testAppendResetEvent(ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP); } @@ -1362,6 +1435,14 @@ HWTEST2_F(MultiTileCopyCommandListSignalAllEventPacketTest, givenSignalPacketsIm } using CopyCommandListSignalAllEventPacketForCompactEventTest = Test>; +HWTEST2_F(CopyCommandListSignalAllEventPacketForCompactEventTest, givenSignalPacketsTimestampEventWhenAppendSignalEventThenAllPacketCompletionDispatchNotNeeded, IsAtLeastXeHpCore) { + testAppendSignalEvent(ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP); +} + +HWTEST2_F(CopyCommandListSignalAllEventPacketForCompactEventTest, givenSignalPacketsImmediateEventWhenAppendSignalEventThenAllPacketCompletionDispatchNotNeeded, IsAtLeastXeHpCore) { + testAppendSignalEvent(0); +} + HWTEST2_F(CopyCommandListSignalAllEventPacketForCompactEventTest, givenSignalPacketsTimestampEventWhenAppendResetEventThenAllPacketResetDispatchNotNeeded, IsAtLeastXeHpCore) { testAppendResetEvent(ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP); } @@ -1371,6 +1452,14 @@ HWTEST2_F(CopyCommandListSignalAllEventPacketForCompactEventTest, givenSignalPac } using MultiTileCopyCommandListSignalAllEventPacketForCompactEventTest = Test>; +HWTEST2_F(MultiTileCopyCommandListSignalAllEventPacketForCompactEventTest, givenSignalPacketsTimestampEventWhenAppendSignalEventThenAllPacketCompletionDispatchNotNeeded, IsAtLeastXeHpCore) { + testAppendSignalEvent(ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP); +} + +HWTEST2_F(MultiTileCopyCommandListSignalAllEventPacketForCompactEventTest, givenSignalPacketsImmediateEventWhenAppendSignalEventThenAllPacketCompletionDispatchNotNeeded, IsAtLeastXeHpCore) { + testAppendSignalEvent(0); +} + HWTEST2_F(MultiTileCopyCommandListSignalAllEventPacketForCompactEventTest, givenSignalPacketsTimestampEventWhenAppendResetEventThenAllPacketResetDispatchNotNeeded, IsAtLeastXeHpCore) { testAppendResetEvent(ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP); }