mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-05 18:06:32 +08:00
Fix issues in signal all event packets 5/n
This commit is fixing signal event after append operation. 1st fix is to add signal all packets to timestamp event on copy only lists. 2nd fix is to change order of dispatched commands for compute lists to move light signal event commands before stalling command. Related-To: NEO-7490 Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
e63ce337a4
commit
a734921113
@@ -1832,6 +1832,9 @@ void CommandListCoreFamily<gfxCoreFamily>::appendSignalEventPostWalker(Event *ev
|
||||
args.commandWithPostSync = true;
|
||||
NEO::EncodeMiFlushDW<GfxFamily>::programMiFlushDw(*commandContainer.getCommandStream(), baseAddr, Event::STATE_SIGNALED,
|
||||
args, hwInfo);
|
||||
if (this->signalAllEventPackets && (event->getPacketsInUse() < event->getMaxPacketsCount())) {
|
||||
setRemainingEventPackets(event, Event::STATE_SIGNALED);
|
||||
}
|
||||
} else {
|
||||
NEO::PipeControlArgs args;
|
||||
args.dcFlushEnable = getDcFlushRequired(!!event->signalScope);
|
||||
@@ -1839,6 +1842,9 @@ void CommandListCoreFamily<gfxCoreFamily>::appendSignalEventPostWalker(Event *ev
|
||||
args.workloadPartitionOffset = true;
|
||||
event->setPacketsInUse(this->partitionCount);
|
||||
}
|
||||
if (this->signalAllEventPackets && (event->getPacketsInUse() < event->getMaxPacketsCount())) {
|
||||
setRemainingEventPackets(event, Event::STATE_SIGNALED);
|
||||
}
|
||||
NEO::MemorySynchronizationCommands<GfxFamily>::addBarrierWithPostSyncOperation(
|
||||
*commandContainer.getCommandStream(),
|
||||
NEO::PostSyncMode::ImmediateData,
|
||||
@@ -1847,9 +1853,6 @@ void CommandListCoreFamily<gfxCoreFamily>::appendSignalEventPostWalker(Event *ev
|
||||
hwInfo,
|
||||
args);
|
||||
}
|
||||
if (this->signalAllEventPackets) {
|
||||
setRemainingEventPackets(event, Event::STATE_SIGNALED);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1866,6 +1869,9 @@ void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfilingCopyCommand(Ev
|
||||
NEO::MiFlushArgs args;
|
||||
const auto &hwInfo = this->device->getHwInfo();
|
||||
NEO::EncodeMiFlushDW<GfxFamily>::programMiFlushDw(*commandContainer.getCommandStream(), 0, 0, args, hwInfo);
|
||||
if (this->signalAllEventPackets && (event->getPacketsInUse() < event->getMaxPacketsCount())) {
|
||||
setRemainingEventPackets(event, Event::STATE_SIGNALED);
|
||||
}
|
||||
}
|
||||
appendWriteKernelTimestamp(event, beforeWalker, false, false);
|
||||
}
|
||||
@@ -2199,6 +2205,10 @@ void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfiling(Event *event,
|
||||
bool workloadPartition = setupTimestampEventForMultiTile(event);
|
||||
appendWriteKernelTimestamp(event, beforeWalker, true, workloadPartition);
|
||||
} else {
|
||||
if (this->signalAllEventPackets && (event->getPacketsInUse() < event->getMaxPacketsCount())) {
|
||||
setRemainingEventPackets(event, Event::STATE_SIGNALED);
|
||||
}
|
||||
|
||||
const auto &hwInfo = this->device->getHwInfo();
|
||||
NEO::PipeControlArgs args;
|
||||
args.dcFlushEnable = getDcFlushRequired(!!event->signalScope);
|
||||
@@ -2211,9 +2221,6 @@ void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfiling(Event *event,
|
||||
NEO::MemorySynchronizationCommands<GfxFamily>::addAdditionalSynchronization(*commandContainer.getCommandStream(), baseAddr, false, hwInfo);
|
||||
bool workloadPartition = isTimestampEventForMultiTile(event);
|
||||
appendWriteKernelTimestamp(event, beforeWalker, true, workloadPartition);
|
||||
if (this->signalAllEventPackets) {
|
||||
setRemainingEventPackets(event, Event::STATE_SIGNALED);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -932,9 +932,11 @@ struct CommandListSignalAllEventPacketFixture : public ModuleFixture {
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void testAppendSignalEventImmediate() {
|
||||
void testAppendSignalEventPostAppendCall(ze_event_pool_flags_t eventPoolFlags) {
|
||||
using FamilyType = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;
|
||||
using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW;
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
|
||||
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
|
||||
auto engineType = copyOnly == 1 ? NEO::EngineGroupType::Copy : NEO::EngineGroupType::Compute;
|
||||
@@ -945,7 +947,7 @@ struct CommandListSignalAllEventPacketFixture : public ModuleFixture {
|
||||
|
||||
ze_event_pool_desc_t eventPoolDesc = {};
|
||||
eventPoolDesc.count = 1;
|
||||
eventPoolDesc.flags = 0;
|
||||
eventPoolDesc.flags = eventPoolFlags;
|
||||
|
||||
ze_event_desc_t eventDesc = {};
|
||||
eventDesc.index = 0;
|
||||
@@ -956,6 +958,7 @@ struct CommandListSignalAllEventPacketFixture : public ModuleFixture {
|
||||
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
|
||||
ASSERT_NE(nullptr, event.get());
|
||||
|
||||
commandList->setupTimestampEventForMultiTile(event.get());
|
||||
size_t sizeBefore = cmdStream->getUsed();
|
||||
commandList->appendSignalEventPostWalker(event.get());
|
||||
size_t sizeAfter = cmdStream->getUsed();
|
||||
@@ -967,34 +970,82 @@ struct CommandListSignalAllEventPacketFixture : public ModuleFixture {
|
||||
ptrOffset(cmdStream->getCpuBase(), sizeBefore),
|
||||
(sizeAfter - sizeBefore)));
|
||||
|
||||
auto itorStoreDataImm = findAll<MI_STORE_DATA_IMM *>(cmdList.begin(), cmdList.end());
|
||||
if constexpr (copyOnly == 1) {
|
||||
auto itorFlushDw = findAll<MI_FLUSH_DW *>(cmdList.begin(), cmdList.end());
|
||||
|
||||
if constexpr (limitEventPacketes == 1) {
|
||||
constexpr uint32_t expectedStoreDataImm = 0;
|
||||
ASSERT_EQ(expectedStoreDataImm, itorStoreDataImm.size());
|
||||
} else {
|
||||
uint32_t packetUsed = event->getPacketsInUse();
|
||||
uint32_t remainingPackets = event->getMaxPacketsCount() - packetUsed;
|
||||
remainingPackets /= commandList->partitionCount;
|
||||
ASSERT_EQ(remainingPackets, static_cast<uint32_t>(itorStoreDataImm.size()));
|
||||
uint32_t flushCmdWaFactor = 1;
|
||||
if (EncodeMiFlushDW<FamilyType>::getMiFlushDwWaSize() > 0) {
|
||||
flushCmdWaFactor++;
|
||||
}
|
||||
|
||||
uint32_t expectedFlushDw = event->getMaxPacketsCount();
|
||||
expectedFlushDw *= flushCmdWaFactor;
|
||||
ASSERT_EQ(expectedFlushDw, itorFlushDw.size());
|
||||
|
||||
uint64_t gpuAddress = event->getGpuAddress(device);
|
||||
gpuAddress += (packetUsed * event->getSinglePacketSize());
|
||||
if (event->isUsingContextEndOffset()) {
|
||||
gpuAddress += event->getContextEndOffset();
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < remainingPackets; i++) {
|
||||
auto cmd = genCmdCast<MI_STORE_DATA_IMM *>(*itorStoreDataImm[i]);
|
||||
EXPECT_EQ(gpuAddress, cmd->getAddress());
|
||||
EXPECT_FALSE(cmd->getStoreQword());
|
||||
EXPECT_EQ(Event::STATE_SIGNALED, cmd->getDataDword0());
|
||||
if constexpr (multiTile == 1) {
|
||||
EXPECT_TRUE(cmd->getWorkloadPartitionIdOffsetEnable());
|
||||
} else {
|
||||
EXPECT_FALSE(cmd->getWorkloadPartitionIdOffsetEnable());
|
||||
uint32_t startingSignalCmd = 0;
|
||||
if (eventPoolFlags != 0) {
|
||||
auto cmd = genCmdCast<MI_FLUSH_DW *>(*itorFlushDw[(flushCmdWaFactor - 1)]);
|
||||
EXPECT_EQ(0u, cmd->getDestinationAddress());
|
||||
EXPECT_EQ(0u, cmd->getImmediateData());
|
||||
|
||||
startingSignalCmd = flushCmdWaFactor;
|
||||
gpuAddress += event->getSinglePacketSize();
|
||||
}
|
||||
|
||||
for (uint32_t i = startingSignalCmd; i < expectedFlushDw; i++) {
|
||||
auto cmd = genCmdCast<MI_FLUSH_DW *>(*itorFlushDw[i]);
|
||||
if (flushCmdWaFactor == 2) {
|
||||
// even flush commands are WAs
|
||||
if ((i & 1) == 0) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
EXPECT_EQ(gpuAddress, cmd->getDestinationAddress());
|
||||
EXPECT_EQ(Event::STATE_SIGNALED, cmd->getImmediateData());
|
||||
gpuAddress += event->getSinglePacketSize();
|
||||
}
|
||||
|
||||
} else {
|
||||
auto itorStoreDataImm = findAll<MI_STORE_DATA_IMM *>(cmdList.begin(), cmdList.end());
|
||||
|
||||
if constexpr (limitEventPacketes == 1) {
|
||||
constexpr uint32_t expectedStoreDataImm = 0;
|
||||
ASSERT_EQ(expectedStoreDataImm, itorStoreDataImm.size());
|
||||
} else {
|
||||
uint32_t packetUsed = event->getPacketsInUse();
|
||||
uint32_t remainingPackets = event->getMaxPacketsCount() - packetUsed;
|
||||
remainingPackets /= commandList->partitionCount;
|
||||
ASSERT_EQ(remainingPackets, static_cast<uint32_t>(itorStoreDataImm.size()));
|
||||
|
||||
uint64_t gpuAddress = event->getGpuAddress(device);
|
||||
gpuAddress += (packetUsed * event->getSinglePacketSize());
|
||||
if (event->isUsingContextEndOffset()) {
|
||||
gpuAddress += event->getContextEndOffset();
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < remainingPackets; i++) {
|
||||
auto cmd = genCmdCast<MI_STORE_DATA_IMM *>(*itorStoreDataImm[i]);
|
||||
EXPECT_EQ(gpuAddress, cmd->getAddress());
|
||||
EXPECT_FALSE(cmd->getStoreQword());
|
||||
EXPECT_EQ(Event::STATE_SIGNALED, cmd->getDataDword0());
|
||||
if constexpr (multiTile == 1) {
|
||||
EXPECT_TRUE(cmd->getWorkloadPartitionIdOffsetEnable());
|
||||
} else {
|
||||
EXPECT_FALSE(cmd->getWorkloadPartitionIdOffsetEnable());
|
||||
}
|
||||
gpuAddress += (event->getSinglePacketSize() * commandList->partitionCount);
|
||||
}
|
||||
if (remainingPackets > 0) {
|
||||
auto lastIterator = itorStoreDataImm[itorStoreDataImm.size() - 1];
|
||||
++lastIterator;
|
||||
auto cmd = genCmdCast<PIPE_CONTROL *>(*lastIterator);
|
||||
EXPECT_NE(nullptr, cmd);
|
||||
}
|
||||
gpuAddress += (event->getSinglePacketSize() * commandList->partitionCount);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1242,7 +1293,11 @@ HWTEST2_F(CommandListSignalAllEventPacketTest, givenSignalPacketsEventWhenAppend
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListSignalAllEventPacketTest, givenSignalPacketsEventWhenAppendSignalImmediateEventThenAllPacketCompletionDispatched, IsAtLeastXeHpCore) {
|
||||
testAppendSignalEventImmediate<gfxCoreFamily>();
|
||||
testAppendSignalEventPostAppendCall<gfxCoreFamily>(0);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListSignalAllEventPacketTest, givenSignalPacketsEventWhenAppendSignalTimestampEventThenAllPacketCompletionDispatched, IsAtLeastXeHpCore) {
|
||||
testAppendSignalEventPostAppendCall<gfxCoreFamily>(ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListSignalAllEventPacketTest, givenSignalPacketsTimestampEventWhenAppendResetEventThenAllPacketResetDispatched, IsAtLeastXeHpCore) {
|
||||
@@ -1283,7 +1338,11 @@ HWTEST2_F(MultiTileCommandListSignalAllEventPacketTest, givenSignalPacketsEventW
|
||||
}
|
||||
|
||||
HWTEST2_F(MultiTileCommandListSignalAllEventPacketTest, givenSignalPacketsEventWhenAppendSignalImmediateEventThenAllPacketCompletionDispatched, IsAtLeastXeHpCore) {
|
||||
testAppendSignalEventImmediate<gfxCoreFamily>();
|
||||
testAppendSignalEventPostAppendCall<gfxCoreFamily>(0);
|
||||
}
|
||||
|
||||
HWTEST2_F(MultiTileCommandListSignalAllEventPacketTest, givenSignalPacketsEventWhenAppendSignalTimestampEventThenAllPacketCompletionDispatched, IsAtLeastXeHpCore) {
|
||||
testAppendSignalEventPostAppendCall<gfxCoreFamily>(ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP);
|
||||
}
|
||||
|
||||
HWTEST2_F(MultiTileCommandListSignalAllEventPacketTest, givenSignalPacketsTimestampEventWhenAppendResetEventThenAllPacketResetDispatched, IsAtLeastXeHpCore) {
|
||||
@@ -1336,7 +1395,11 @@ HWTEST2_F(CommandListSignalAllEventPacketForCompactEventTest, givenSignalPackets
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListSignalAllEventPacketForCompactEventTest, givenSignalPacketsEventWhenAppendSignalImmediateEventThenAllPacketCompletionDispatchNotNeeded, IsAtLeastXeHpCore) {
|
||||
testAppendSignalEventImmediate<gfxCoreFamily>();
|
||||
testAppendSignalEventPostAppendCall<gfxCoreFamily>(0);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListSignalAllEventPacketForCompactEventTest, givenSignalPacketsEventWhenAppendSignalTimestampEventThenAllPacketCompletionDispatchNotNeeded, IsAtLeastXeHpCore) {
|
||||
testAppendSignalEventPostAppendCall<gfxCoreFamily>(ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListSignalAllEventPacketForCompactEventTest, givenSignalPacketsTimestampEventWhenAppendResetEventThenAllPacketResetDispatchNotNeeded, IsAtLeastXeHpCore) {
|
||||
@@ -1377,7 +1440,11 @@ HWTEST2_F(MultiTileCommandListSignalAllEventPacketForCompactEventTest, givenSign
|
||||
}
|
||||
|
||||
HWTEST2_F(MultiTileCommandListSignalAllEventPacketForCompactEventTest, givenSignalPacketsEventWhenAppendSignalImmediateEventThenAllPacketCompletionDispatchNotNeeded, IsAtLeastXeHpCore) {
|
||||
testAppendSignalEventImmediate<gfxCoreFamily>();
|
||||
testAppendSignalEventPostAppendCall<gfxCoreFamily>(0);
|
||||
}
|
||||
|
||||
HWTEST2_F(MultiTileCommandListSignalAllEventPacketForCompactEventTest, givenSignalPacketsEventWhenAppendSignalTimestampEventThenAllPacketCompletionDispatchNotNeeded, IsAtLeastXeHpCore) {
|
||||
testAppendSignalEventPostAppendCall<gfxCoreFamily>(ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP);
|
||||
}
|
||||
|
||||
HWTEST2_F(MultiTileCommandListSignalAllEventPacketForCompactEventTest, givenSignalPacketsTimestampEventWhenAppendResetEventThenAllPacketResetDispatchNotNeeded, IsAtLeastXeHpCore) {
|
||||
@@ -1409,6 +1476,14 @@ HWTEST2_F(CopyCommandListSignalAllEventPacketTest, givenSignalPacketsImmediateEv
|
||||
testAppendSignalEvent<gfxCoreFamily>(0);
|
||||
}
|
||||
|
||||
HWTEST2_F(CopyCommandListSignalAllEventPacketTest, givenSignalPacketsEventWhenAppendSignalImmediateEventThenAllPacketCompletionDispatched, IsAtLeastXeHpCore) {
|
||||
testAppendSignalEventPostAppendCall<gfxCoreFamily>(0);
|
||||
}
|
||||
|
||||
HWTEST2_F(CopyCommandListSignalAllEventPacketTest, givenSignalPacketsEventWhenAppendSignalTimestampEventThenAllPacketCompletionDispatched, IsAtLeastXeHpCore) {
|
||||
testAppendSignalEventPostAppendCall<gfxCoreFamily>(ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP);
|
||||
}
|
||||
|
||||
HWTEST2_F(CopyCommandListSignalAllEventPacketTest, givenSignalPacketsTimestampEventWhenAppendResetEventThenAllPacketResetDispatched, IsAtLeastXeHpCore) {
|
||||
testAppendResetEvent<gfxCoreFamily>(ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP);
|
||||
}
|
||||
@@ -1426,6 +1501,14 @@ HWTEST2_F(MultiTileCopyCommandListSignalAllEventPacketTest, givenSignalPacketsIm
|
||||
testAppendSignalEvent<gfxCoreFamily>(0);
|
||||
}
|
||||
|
||||
HWTEST2_F(MultiTileCopyCommandListSignalAllEventPacketTest, givenSignalPacketsEventWhenAppendSignalImmediateEventThenAllPacketCompletionDispatched, IsAtLeastXeHpCore) {
|
||||
testAppendSignalEventPostAppendCall<gfxCoreFamily>(0);
|
||||
}
|
||||
|
||||
HWTEST2_F(MultiTileCopyCommandListSignalAllEventPacketTest, givenSignalPacketsEventWhenAppendSignalTimestampEventThenAllPacketCompletionDispatched, IsAtLeastXeHpCore) {
|
||||
testAppendSignalEventPostAppendCall<gfxCoreFamily>(ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP);
|
||||
}
|
||||
|
||||
HWTEST2_F(MultiTileCopyCommandListSignalAllEventPacketTest, givenSignalPacketsTimestampEventWhenAppendResetEventThenAllPacketResetDispatched, IsAtLeastXeHpCore) {
|
||||
testAppendResetEvent<gfxCoreFamily>(ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP);
|
||||
}
|
||||
@@ -1443,6 +1526,14 @@ HWTEST2_F(CopyCommandListSignalAllEventPacketForCompactEventTest, givenSignalPac
|
||||
testAppendSignalEvent<gfxCoreFamily>(0);
|
||||
}
|
||||
|
||||
HWTEST2_F(CopyCommandListSignalAllEventPacketForCompactEventTest, givenSignalPacketsEventWhenAppendSignalImmediateEventThenAllPacketCompletionDispatchNotNeeded, IsAtLeastXeHpCore) {
|
||||
testAppendSignalEventPostAppendCall<gfxCoreFamily>(0);
|
||||
}
|
||||
|
||||
HWTEST2_F(CopyCommandListSignalAllEventPacketForCompactEventTest, givenSignalPacketsEventWhenAppendSignalTimestampEventThenAllPacketCompletionDispatchNotNeeded, IsAtLeastXeHpCore) {
|
||||
testAppendSignalEventPostAppendCall<gfxCoreFamily>(ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP);
|
||||
}
|
||||
|
||||
HWTEST2_F(CopyCommandListSignalAllEventPacketForCompactEventTest, givenSignalPacketsTimestampEventWhenAppendResetEventThenAllPacketResetDispatchNotNeeded, IsAtLeastXeHpCore) {
|
||||
testAppendResetEvent<gfxCoreFamily>(ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP);
|
||||
}
|
||||
@@ -1460,6 +1551,14 @@ HWTEST2_F(MultiTileCopyCommandListSignalAllEventPacketForCompactEventTest, given
|
||||
testAppendSignalEvent<gfxCoreFamily>(0);
|
||||
}
|
||||
|
||||
HWTEST2_F(MultiTileCopyCommandListSignalAllEventPacketForCompactEventTest, givenSignalPacketsEventWhenAppendSignalImmediateEventThenAllPacketCompletionDispatchNotNeeded, IsAtLeastXeHpCore) {
|
||||
testAppendSignalEventPostAppendCall<gfxCoreFamily>(0);
|
||||
}
|
||||
|
||||
HWTEST2_F(MultiTileCopyCommandListSignalAllEventPacketForCompactEventTest, givenSignalPacketsEventWhenAppendSignalTimestampEventThenAllPacketCompletionDispatchNotNeeded, IsAtLeastXeHpCore) {
|
||||
testAppendSignalEventPostAppendCall<gfxCoreFamily>(ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP);
|
||||
}
|
||||
|
||||
HWTEST2_F(MultiTileCopyCommandListSignalAllEventPacketForCompactEventTest, givenSignalPacketsTimestampEventWhenAppendResetEventThenAllPacketResetDispatchNotNeeded, IsAtLeastXeHpCore) {
|
||||
testAppendResetEvent<gfxCoreFamily>(ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user