From 07fb7ac02e9897371e700061627cb173e9e416b2 Mon Sep 17 00:00:00 2001 From: Bartosz Dunajski Date: Fri, 4 Jul 2025 10:37:59 +0000 Subject: [PATCH] fix: copy only mi_atomic signaling for aggregated events Related-To: NEO-14557 Signed-off-by: Bartosz Dunajski --- level_zero/core/source/cmdlist/cmdlist_hw.h | 1 + level_zero/core/source/cmdlist/cmdlist_hw.inl | 18 ++++- level_zero/core/source/event/event.h | 2 + .../cmdlist/test_in_order_cmdlist_1.cpp | 72 +++++++++++++++++++ .../cmdlist/test_in_order_cmdlist_2.cpp | 2 + 5 files changed, 92 insertions(+), 3 deletions(-) diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.h b/level_zero/core/source/cmdlist/cmdlist_hw.h index eb53c88abc..b7f49e30ce 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.h +++ b/level_zero/core/source/cmdlist/cmdlist_hw.h @@ -309,6 +309,7 @@ struct CommandListCoreFamily : public CommandListImp { void appendWaitOnSingleEvent(Event *event, CommandToPatchContainer *outWaitCmds, bool relaxedOrderingAllowed, bool dualStreamCopyOffload, CommandToPatch::CommandType storedSemaphore); void appendSdiInOrderCounterSignalling(uint64_t baseGpuVa, uint64_t signalValue, bool copyOffloadOperation); + void appendSignalAggregatedEventAtomic(Event &event); ze_result_t prepareIndirectParams(const ze_group_count_t *threadGroupDimensions); void updateStreamPropertiesForRegularCommandLists(Kernel &kernel, bool isCooperative, const ze_group_count_t &threadGroupDimensions, bool isIndirect); diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index 38af923a77..d7cf854af6 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -1969,6 +1969,8 @@ ze_result_t CommandListCoreFamily::appendMemoryCopy(void *dstptr, (launchParams.isKernelSplitOperation || inOrderCopyOnlySignalingAllowed || emitPipeControl)) { dispatchInOrderPostOperationBarrier(signalEvent, dcFlush, isCopyOnlyEnabled); appendSignalInOrderDependencyCounter(signalEvent, memoryCopyParams.copyOffloadAllowed, false, false); + } else if (!useAdditionalBlitProperties && isCopyOnlyEnabled && Event::isAggregatedEvent(signalEvent)) { + appendSignalAggregatedEventAtomic(*signalEvent); } if (!isCopyOnlyEnabled || inOrderCopyOnlySignalingAllowed) { @@ -2075,6 +2077,8 @@ ze_result_t CommandListCoreFamily::appendMemoryCopyRegion(void *d appendSignalInOrderDependencyCounter(signalEvent, memoryCopyParams.copyOffloadAllowed, false, false); } handleInOrderDependencyCounter(signalEvent, false, isCopyOnlyEnabled); + } else if (!useAdditionalBlitProperties && isCopyOnlyEnabled && Event::isAggregatedEvent(signalEvent)) { + appendSignalAggregatedEventAtomic(*signalEvent); } } else { handleInOrderDependencyCounter(signalEvent, false, isCopyOnlyEnabled); @@ -3231,6 +3235,15 @@ void CommandListCoreFamily::appendSdiInOrderCounterSignalling(uin } } +template +void CommandListCoreFamily::appendSignalAggregatedEventAtomic(Event &event) { + using ATOMIC_OPCODES = typename GfxFamily::MI_ATOMIC::ATOMIC_OPCODES; + using DATA_SIZE = typename GfxFamily::MI_ATOMIC::DATA_SIZE; + + NEO::EncodeAtomic::programMiAtomic(*commandContainer.getCommandStream(), event.getInOrderExecInfo()->getBaseDeviceAddress(), ATOMIC_OPCODES::ATOMIC_8B_ADD, + DATA_SIZE::DATA_SIZE_QWORD, 0, 0, event.getInOrderIncrementValue(), 0); +} + template void CommandListCoreFamily::appendSignalInOrderDependencyCounter(Event *signalEvent, bool copyOffloadOperation, bool stall, bool textureFlushRequired) { using ATOMIC_OPCODES = typename GfxFamily::MI_ATOMIC::ATOMIC_OPCODES; @@ -3277,9 +3290,8 @@ void CommandListCoreFamily::appendSignalInOrderDependencyCounter( appendSdiInOrderCounterSignalling(inOrderExecInfo->getBaseHostGpuAddress(), signalValue, copyOffloadOperation); } - if (signalEvent && signalEvent->getInOrderIncrementValue() > 0) { - NEO::EncodeAtomic::programMiAtomic(*cmdStream, signalEvent->getInOrderExecInfo()->getBaseDeviceAddress(), ATOMIC_OPCODES::ATOMIC_8B_ADD, - DATA_SIZE::DATA_SIZE_QWORD, 0, 0, signalEvent->getInOrderIncrementValue(), 0); + if (Event::isAggregatedEvent(signalEvent)) { + appendSignalAggregatedEventAtomic(*signalEvent); } if ((NEO::debugManager.flags.ProgramUserInterruptOnResolvedDependency.get() == 1 || isCopyOnly(copyOffloadOperation)) && signalEvent && signalEvent->isInterruptModeEnabled()) { diff --git a/level_zero/core/source/event/event.h b/level_zero/core/source/event/event.h index e691c2cd81..ffe2c66c8e 100644 --- a/level_zero/core/source/event/event.h +++ b/level_zero/core/source/event/event.h @@ -347,6 +347,8 @@ struct Event : _ze_event_handle_t { this->isEventOnBarrierOptimized = value; } + static bool isAggregatedEvent(const Event *event) { return (event && event->getInOrderIncrementValue() > 0); } + protected: Event(int index, Device *device) : device(device), index(index) {} diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_1.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_1.cpp index 69f9d07709..aace4aa2f6 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_1.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_1.cpp @@ -5940,6 +5940,78 @@ HWTEST_F(InOrderCmdListTests, givenExternalSyncStorageWhenCallingAppendSignalInO context->freeMem(devAddress); } +HWTEST_F(InOrderCmdListTests, givenExternalSyncStorageAndCopyOnlyCmdListWhenCallingAppendMemoryCopyWithDisabledInOrderSignalingThenSignalAtomicStorage) { + using MI_ATOMIC = typename FamilyType::MI_ATOMIC; + using ATOMIC_OPCODES = typename FamilyType::MI_ATOMIC::ATOMIC_OPCODES; + using DATA_SIZE = typename FamilyType::MI_ATOMIC::DATA_SIZE; + + constexpr uint64_t incValue = static_cast(std::numeric_limits::max()) + 1234; + constexpr uint64_t counterValue = incValue * 2; + + auto devAddress = reinterpret_cast(allocDeviceMem(sizeof(uint64_t))); + + auto immCmdList = createCopyOnlyImmCmdList(); + + auto eventObj = createExternalSyncStorageEvent(counterValue, incValue, devAddress); + + auto cmdStream = immCmdList->getCmdContainer().getCommandStream(); + + auto offset = cmdStream->getUsed(); + uint32_t copyData = 0; + copyParams.forceDisableCopyOnlyInOrderSignaling = true; + + { + immCmdList->appendMemoryCopy(©Data, ©Data, 1, eventObj->toHandle(), 0, nullptr, copyParams); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(cmdList, ptrOffset(cmdStream->getCpuBase(), offset), cmdStream->getUsed() - offset)); + + auto it = find(cmdList.begin(), cmdList.end()); + + if (immCmdList->useAdditionalBlitProperties) { + EXPECT_EQ(cmdList.end(), it); + } else { + ASSERT_NE(cmdList.end(), it); + + auto miAtomic = genCmdCast(*it); + EXPECT_EQ(ATOMIC_OPCODES::ATOMIC_8B_ADD, miAtomic->getAtomicOpcode()); + EXPECT_EQ(DATA_SIZE::DATA_SIZE_QWORD, miAtomic->getDataSize()); + EXPECT_EQ(getLowPart(incValue), miAtomic->getOperand1DataDword0()); + EXPECT_EQ(getHighPart(incValue), miAtomic->getOperand1DataDword1()); + + EXPECT_EQ(castToUint64(devAddress), NEO::UnitTestHelper::getAtomicMemoryAddress(*miAtomic)); + } + } + + offset = cmdStream->getUsed(); + + { + ze_copy_region_t region = {0, 0, 0, 1, 1, 1}; + + immCmdList->appendMemoryCopyRegion(©Data, ®ion, 1, 1, ©Data, ®ion, 1, 1, eventObj->toHandle(), 0, nullptr, copyParams); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(cmdList, ptrOffset(cmdStream->getCpuBase(), offset), cmdStream->getUsed() - offset)); + + auto it = find(cmdList.begin(), cmdList.end()); + if (immCmdList->useAdditionalBlitProperties) { + EXPECT_EQ(cmdList.end(), it); + } else { + ASSERT_NE(cmdList.end(), it); + + auto miAtomic = genCmdCast(*it); + EXPECT_EQ(ATOMIC_OPCODES::ATOMIC_8B_ADD, miAtomic->getAtomicOpcode()); + EXPECT_EQ(DATA_SIZE::DATA_SIZE_QWORD, miAtomic->getDataSize()); + EXPECT_EQ(getLowPart(incValue), miAtomic->getOperand1DataDword0()); + EXPECT_EQ(getHighPart(incValue), miAtomic->getOperand1DataDword1()); + + EXPECT_EQ(castToUint64(devAddress), NEO::UnitTestHelper::getAtomicMemoryAddress(*miAtomic)); + } + } + + context->freeMem(devAddress); +} + HWTEST_F(InOrderCmdListTests, givenTimestmapEnabledWhenCreatingStandaloneCbEventThenSetCorrectPacketSize) { zex_counter_based_event_desc_t counterBasedDesc = {ZEX_STRUCTURE_COUNTER_BASED_EVENT_DESC}; counterBasedDesc.flags = ZEX_COUNTER_BASED_EVENT_FLAG_KERNEL_TIMESTAMP; diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_2.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_2.cpp index 8e25351c24..696d4a2021 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_2.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_2.cpp @@ -3977,6 +3977,8 @@ void BcsSplitInOrderCmdListTests::verifySplitCmds(LinearStream &cmdStream, size_ ASSERT_NE(nullptr, signalSubCopyEvent); } itor = ++flushDwItor; + } else { + ASSERT_TRUE(false); } auto semaphoreCmds = findAll(beginItor, itor);