diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index 950c2a83b0..26621b4743 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -1832,6 +1832,9 @@ void CommandListCoreFamily::appendSignalEventPostWalker(Event *ev args.commandWithPostSync = true; NEO::EncodeMiFlushDW::programMiFlushDw(*commandContainer.getCommandStream(), baseAddr, Event::STATE_SIGNALED, args, hwInfo); + if (this->signalAllEventPackets && (event->getPacketsInUse() < event->getMaxPacketsCount())) { + setRemainingEventPackets(event, Event::STATE_SIGNALED); + } } else { NEO::PipeControlArgs args; args.dcFlushEnable = getDcFlushRequired(!!event->signalScope); @@ -1839,6 +1842,9 @@ void CommandListCoreFamily::appendSignalEventPostWalker(Event *ev args.workloadPartitionOffset = true; event->setPacketsInUse(this->partitionCount); } + if (this->signalAllEventPackets && (event->getPacketsInUse() < event->getMaxPacketsCount())) { + setRemainingEventPackets(event, Event::STATE_SIGNALED); + } NEO::MemorySynchronizationCommands::addBarrierWithPostSyncOperation( *commandContainer.getCommandStream(), NEO::PostSyncMode::ImmediateData, @@ -1847,9 +1853,6 @@ void CommandListCoreFamily::appendSignalEventPostWalker(Event *ev hwInfo, args); } - if (this->signalAllEventPackets) { - setRemainingEventPackets(event, Event::STATE_SIGNALED); - } } } @@ -1866,6 +1869,9 @@ void CommandListCoreFamily::appendEventForProfilingCopyCommand(Ev NEO::MiFlushArgs args; const auto &hwInfo = this->device->getHwInfo(); NEO::EncodeMiFlushDW::programMiFlushDw(*commandContainer.getCommandStream(), 0, 0, args, hwInfo); + if (this->signalAllEventPackets && (event->getPacketsInUse() < event->getMaxPacketsCount())) { + setRemainingEventPackets(event, Event::STATE_SIGNALED); + } } appendWriteKernelTimestamp(event, beforeWalker, false, false); } @@ -2199,6 +2205,10 @@ void CommandListCoreFamily::appendEventForProfiling(Event *event, bool workloadPartition = setupTimestampEventForMultiTile(event); appendWriteKernelTimestamp(event, beforeWalker, true, workloadPartition); } else { + if (this->signalAllEventPackets && (event->getPacketsInUse() < event->getMaxPacketsCount())) { + setRemainingEventPackets(event, Event::STATE_SIGNALED); + } + const auto &hwInfo = this->device->getHwInfo(); NEO::PipeControlArgs args; args.dcFlushEnable = getDcFlushRequired(!!event->signalScope); @@ -2211,9 +2221,6 @@ void CommandListCoreFamily::appendEventForProfiling(Event *event, NEO::MemorySynchronizationCommands::addAdditionalSynchronization(*commandContainer.getCommandStream(), baseAddr, false, hwInfo); bool workloadPartition = isTimestampEventForMultiTile(event); appendWriteKernelTimestamp(event, beforeWalker, true, workloadPartition); - if (this->signalAllEventPackets) { - setRemainingEventPackets(event, Event::STATE_SIGNALED); - } } } } diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xehp_and_later.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xehp_and_later.cpp index 13f1f1505a..45e93ce875 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xehp_and_later.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xehp_and_later.cpp @@ -932,9 +932,11 @@ struct CommandListSignalAllEventPacketFixture : public ModuleFixture { } template - void testAppendSignalEventImmediate() { + void testAppendSignalEventPostAppendCall(ze_event_pool_flags_t eventPoolFlags) { using FamilyType = typename NEO::GfxFamilyMapper::GfxFamily; using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM; + using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW; + using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; auto commandList = std::make_unique>>(); auto engineType = copyOnly == 1 ? NEO::EngineGroupType::Copy : NEO::EngineGroupType::Compute; @@ -945,7 +947,7 @@ struct CommandListSignalAllEventPacketFixture : public ModuleFixture { ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; - eventPoolDesc.flags = 0; + eventPoolDesc.flags = eventPoolFlags; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; @@ -956,6 +958,7 @@ struct CommandListSignalAllEventPacketFixture : public ModuleFixture { auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); ASSERT_NE(nullptr, event.get()); + commandList->setupTimestampEventForMultiTile(event.get()); size_t sizeBefore = cmdStream->getUsed(); commandList->appendSignalEventPostWalker(event.get()); size_t sizeAfter = cmdStream->getUsed(); @@ -967,34 +970,82 @@ struct CommandListSignalAllEventPacketFixture : public ModuleFixture { ptrOffset(cmdStream->getCpuBase(), sizeBefore), (sizeAfter - sizeBefore))); - auto itorStoreDataImm = findAll(cmdList.begin(), cmdList.end()); + if constexpr (copyOnly == 1) { + auto itorFlushDw = findAll(cmdList.begin(), cmdList.end()); - if constexpr (limitEventPacketes == 1) { - constexpr uint32_t expectedStoreDataImm = 0; - ASSERT_EQ(expectedStoreDataImm, itorStoreDataImm.size()); - } else { - uint32_t packetUsed = event->getPacketsInUse(); - uint32_t remainingPackets = event->getMaxPacketsCount() - packetUsed; - remainingPackets /= commandList->partitionCount; - ASSERT_EQ(remainingPackets, static_cast(itorStoreDataImm.size())); + uint32_t flushCmdWaFactor = 1; + if (EncodeMiFlushDW::getMiFlushDwWaSize() > 0) { + flushCmdWaFactor++; + } + + uint32_t expectedFlushDw = event->getMaxPacketsCount(); + expectedFlushDw *= flushCmdWaFactor; + ASSERT_EQ(expectedFlushDw, itorFlushDw.size()); uint64_t gpuAddress = event->getGpuAddress(device); - gpuAddress += (packetUsed * event->getSinglePacketSize()); if (event->isUsingContextEndOffset()) { gpuAddress += event->getContextEndOffset(); } - for (uint32_t i = 0; i < remainingPackets; i++) { - auto cmd = genCmdCast(*itorStoreDataImm[i]); - EXPECT_EQ(gpuAddress, cmd->getAddress()); - EXPECT_FALSE(cmd->getStoreQword()); - EXPECT_EQ(Event::STATE_SIGNALED, cmd->getDataDword0()); - if constexpr (multiTile == 1) { - EXPECT_TRUE(cmd->getWorkloadPartitionIdOffsetEnable()); - } else { - EXPECT_FALSE(cmd->getWorkloadPartitionIdOffsetEnable()); + uint32_t startingSignalCmd = 0; + if (eventPoolFlags != 0) { + auto cmd = genCmdCast(*itorFlushDw[(flushCmdWaFactor - 1)]); + EXPECT_EQ(0u, cmd->getDestinationAddress()); + EXPECT_EQ(0u, cmd->getImmediateData()); + + startingSignalCmd = flushCmdWaFactor; + gpuAddress += event->getSinglePacketSize(); + } + + for (uint32_t i = startingSignalCmd; i < expectedFlushDw; i++) { + auto cmd = genCmdCast(*itorFlushDw[i]); + if (flushCmdWaFactor == 2) { + // even flush commands are WAs + if ((i & 1) == 0) { + continue; + } + } + EXPECT_EQ(gpuAddress, cmd->getDestinationAddress()); + EXPECT_EQ(Event::STATE_SIGNALED, cmd->getImmediateData()); + gpuAddress += event->getSinglePacketSize(); + } + + } else { + auto itorStoreDataImm = findAll(cmdList.begin(), cmdList.end()); + + if constexpr (limitEventPacketes == 1) { + constexpr uint32_t expectedStoreDataImm = 0; + ASSERT_EQ(expectedStoreDataImm, itorStoreDataImm.size()); + } else { + uint32_t packetUsed = event->getPacketsInUse(); + uint32_t remainingPackets = event->getMaxPacketsCount() - packetUsed; + remainingPackets /= commandList->partitionCount; + ASSERT_EQ(remainingPackets, static_cast(itorStoreDataImm.size())); + + uint64_t gpuAddress = event->getGpuAddress(device); + gpuAddress += (packetUsed * event->getSinglePacketSize()); + if (event->isUsingContextEndOffset()) { + gpuAddress += event->getContextEndOffset(); + } + + for (uint32_t i = 0; i < remainingPackets; i++) { + auto cmd = genCmdCast(*itorStoreDataImm[i]); + EXPECT_EQ(gpuAddress, cmd->getAddress()); + EXPECT_FALSE(cmd->getStoreQword()); + EXPECT_EQ(Event::STATE_SIGNALED, cmd->getDataDword0()); + if constexpr (multiTile == 1) { + EXPECT_TRUE(cmd->getWorkloadPartitionIdOffsetEnable()); + } else { + EXPECT_FALSE(cmd->getWorkloadPartitionIdOffsetEnable()); + } + gpuAddress += (event->getSinglePacketSize() * commandList->partitionCount); + } + if (remainingPackets > 0) { + auto lastIterator = itorStoreDataImm[itorStoreDataImm.size() - 1]; + ++lastIterator; + auto cmd = genCmdCast(*lastIterator); + EXPECT_NE(nullptr, cmd); } - gpuAddress += (event->getSinglePacketSize() * commandList->partitionCount); } } } @@ -1242,7 +1293,11 @@ HWTEST2_F(CommandListSignalAllEventPacketTest, givenSignalPacketsEventWhenAppend } HWTEST2_F(CommandListSignalAllEventPacketTest, givenSignalPacketsEventWhenAppendSignalImmediateEventThenAllPacketCompletionDispatched, IsAtLeastXeHpCore) { - testAppendSignalEventImmediate(); + testAppendSignalEventPostAppendCall(0); +} + +HWTEST2_F(CommandListSignalAllEventPacketTest, givenSignalPacketsEventWhenAppendSignalTimestampEventThenAllPacketCompletionDispatched, IsAtLeastXeHpCore) { + testAppendSignalEventPostAppendCall(ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP); } HWTEST2_F(CommandListSignalAllEventPacketTest, givenSignalPacketsTimestampEventWhenAppendResetEventThenAllPacketResetDispatched, IsAtLeastXeHpCore) { @@ -1283,7 +1338,11 @@ HWTEST2_F(MultiTileCommandListSignalAllEventPacketTest, givenSignalPacketsEventW } HWTEST2_F(MultiTileCommandListSignalAllEventPacketTest, givenSignalPacketsEventWhenAppendSignalImmediateEventThenAllPacketCompletionDispatched, IsAtLeastXeHpCore) { - testAppendSignalEventImmediate(); + testAppendSignalEventPostAppendCall(0); +} + +HWTEST2_F(MultiTileCommandListSignalAllEventPacketTest, givenSignalPacketsEventWhenAppendSignalTimestampEventThenAllPacketCompletionDispatched, IsAtLeastXeHpCore) { + testAppendSignalEventPostAppendCall(ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP); } HWTEST2_F(MultiTileCommandListSignalAllEventPacketTest, givenSignalPacketsTimestampEventWhenAppendResetEventThenAllPacketResetDispatched, IsAtLeastXeHpCore) { @@ -1336,7 +1395,11 @@ HWTEST2_F(CommandListSignalAllEventPacketForCompactEventTest, givenSignalPackets } HWTEST2_F(CommandListSignalAllEventPacketForCompactEventTest, givenSignalPacketsEventWhenAppendSignalImmediateEventThenAllPacketCompletionDispatchNotNeeded, IsAtLeastXeHpCore) { - testAppendSignalEventImmediate(); + testAppendSignalEventPostAppendCall(0); +} + +HWTEST2_F(CommandListSignalAllEventPacketForCompactEventTest, givenSignalPacketsEventWhenAppendSignalTimestampEventThenAllPacketCompletionDispatchNotNeeded, IsAtLeastXeHpCore) { + testAppendSignalEventPostAppendCall(ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP); } HWTEST2_F(CommandListSignalAllEventPacketForCompactEventTest, givenSignalPacketsTimestampEventWhenAppendResetEventThenAllPacketResetDispatchNotNeeded, IsAtLeastXeHpCore) { @@ -1377,7 +1440,11 @@ HWTEST2_F(MultiTileCommandListSignalAllEventPacketForCompactEventTest, givenSign } HWTEST2_F(MultiTileCommandListSignalAllEventPacketForCompactEventTest, givenSignalPacketsEventWhenAppendSignalImmediateEventThenAllPacketCompletionDispatchNotNeeded, IsAtLeastXeHpCore) { - testAppendSignalEventImmediate(); + testAppendSignalEventPostAppendCall(0); +} + +HWTEST2_F(MultiTileCommandListSignalAllEventPacketForCompactEventTest, givenSignalPacketsEventWhenAppendSignalTimestampEventThenAllPacketCompletionDispatchNotNeeded, IsAtLeastXeHpCore) { + testAppendSignalEventPostAppendCall(ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP); } HWTEST2_F(MultiTileCommandListSignalAllEventPacketForCompactEventTest, givenSignalPacketsTimestampEventWhenAppendResetEventThenAllPacketResetDispatchNotNeeded, IsAtLeastXeHpCore) { @@ -1409,6 +1476,14 @@ HWTEST2_F(CopyCommandListSignalAllEventPacketTest, givenSignalPacketsImmediateEv testAppendSignalEvent(0); } +HWTEST2_F(CopyCommandListSignalAllEventPacketTest, givenSignalPacketsEventWhenAppendSignalImmediateEventThenAllPacketCompletionDispatched, IsAtLeastXeHpCore) { + testAppendSignalEventPostAppendCall(0); +} + +HWTEST2_F(CopyCommandListSignalAllEventPacketTest, givenSignalPacketsEventWhenAppendSignalTimestampEventThenAllPacketCompletionDispatched, IsAtLeastXeHpCore) { + testAppendSignalEventPostAppendCall(ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP); +} + HWTEST2_F(CopyCommandListSignalAllEventPacketTest, givenSignalPacketsTimestampEventWhenAppendResetEventThenAllPacketResetDispatched, IsAtLeastXeHpCore) { testAppendResetEvent(ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP); } @@ -1426,6 +1501,14 @@ HWTEST2_F(MultiTileCopyCommandListSignalAllEventPacketTest, givenSignalPacketsIm testAppendSignalEvent(0); } +HWTEST2_F(MultiTileCopyCommandListSignalAllEventPacketTest, givenSignalPacketsEventWhenAppendSignalImmediateEventThenAllPacketCompletionDispatched, IsAtLeastXeHpCore) { + testAppendSignalEventPostAppendCall(0); +} + +HWTEST2_F(MultiTileCopyCommandListSignalAllEventPacketTest, givenSignalPacketsEventWhenAppendSignalTimestampEventThenAllPacketCompletionDispatched, IsAtLeastXeHpCore) { + testAppendSignalEventPostAppendCall(ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP); +} + HWTEST2_F(MultiTileCopyCommandListSignalAllEventPacketTest, givenSignalPacketsTimestampEventWhenAppendResetEventThenAllPacketResetDispatched, IsAtLeastXeHpCore) { testAppendResetEvent(ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP); } @@ -1443,6 +1526,14 @@ HWTEST2_F(CopyCommandListSignalAllEventPacketForCompactEventTest, givenSignalPac testAppendSignalEvent(0); } +HWTEST2_F(CopyCommandListSignalAllEventPacketForCompactEventTest, givenSignalPacketsEventWhenAppendSignalImmediateEventThenAllPacketCompletionDispatchNotNeeded, IsAtLeastXeHpCore) { + testAppendSignalEventPostAppendCall(0); +} + +HWTEST2_F(CopyCommandListSignalAllEventPacketForCompactEventTest, givenSignalPacketsEventWhenAppendSignalTimestampEventThenAllPacketCompletionDispatchNotNeeded, IsAtLeastXeHpCore) { + testAppendSignalEventPostAppendCall(ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP); +} + HWTEST2_F(CopyCommandListSignalAllEventPacketForCompactEventTest, givenSignalPacketsTimestampEventWhenAppendResetEventThenAllPacketResetDispatchNotNeeded, IsAtLeastXeHpCore) { testAppendResetEvent(ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP); } @@ -1460,6 +1551,14 @@ HWTEST2_F(MultiTileCopyCommandListSignalAllEventPacketForCompactEventTest, given testAppendSignalEvent(0); } +HWTEST2_F(MultiTileCopyCommandListSignalAllEventPacketForCompactEventTest, givenSignalPacketsEventWhenAppendSignalImmediateEventThenAllPacketCompletionDispatchNotNeeded, IsAtLeastXeHpCore) { + testAppendSignalEventPostAppendCall(0); +} + +HWTEST2_F(MultiTileCopyCommandListSignalAllEventPacketForCompactEventTest, givenSignalPacketsEventWhenAppendSignalTimestampEventThenAllPacketCompletionDispatchNotNeeded, IsAtLeastXeHpCore) { + testAppendSignalEventPostAppendCall(ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP); +} + HWTEST2_F(MultiTileCopyCommandListSignalAllEventPacketForCompactEventTest, givenSignalPacketsTimestampEventWhenAppendResetEventThenAllPacketResetDispatchNotNeeded, IsAtLeastXeHpCore) { testAppendResetEvent(ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP); }