fix: copy only mi_atomic signaling for aggregated events

Related-To: NEO-14557

Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
Bartosz Dunajski
2025-07-04 10:37:59 +00:00
committed by Compute-Runtime-Automation
parent 9dbdbd50f2
commit 07fb7ac02e
5 changed files with 92 additions and 3 deletions

View File

@@ -309,6 +309,7 @@ struct CommandListCoreFamily : public CommandListImp {
void appendWaitOnSingleEvent(Event *event, CommandToPatchContainer *outWaitCmds, bool relaxedOrderingAllowed, bool dualStreamCopyOffload, CommandToPatch::CommandType storedSemaphore);
void appendSdiInOrderCounterSignalling(uint64_t baseGpuVa, uint64_t signalValue, bool copyOffloadOperation);
void appendSignalAggregatedEventAtomic(Event &event);
ze_result_t prepareIndirectParams(const ze_group_count_t *threadGroupDimensions);
void updateStreamPropertiesForRegularCommandLists(Kernel &kernel, bool isCooperative, const ze_group_count_t &threadGroupDimensions, bool isIndirect);

View File

@@ -1969,6 +1969,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
(launchParams.isKernelSplitOperation || inOrderCopyOnlySignalingAllowed || emitPipeControl)) {
dispatchInOrderPostOperationBarrier(signalEvent, dcFlush, isCopyOnlyEnabled);
appendSignalInOrderDependencyCounter(signalEvent, memoryCopyParams.copyOffloadAllowed, false, false);
} else if (!useAdditionalBlitProperties && isCopyOnlyEnabled && Event::isAggregatedEvent(signalEvent)) {
appendSignalAggregatedEventAtomic(*signalEvent);
}
if (!isCopyOnlyEnabled || inOrderCopyOnlySignalingAllowed) {
@@ -2075,6 +2077,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyRegion(void *d
appendSignalInOrderDependencyCounter(signalEvent, memoryCopyParams.copyOffloadAllowed, false, false);
}
handleInOrderDependencyCounter(signalEvent, false, isCopyOnlyEnabled);
} else if (!useAdditionalBlitProperties && isCopyOnlyEnabled && Event::isAggregatedEvent(signalEvent)) {
appendSignalAggregatedEventAtomic(*signalEvent);
}
} else {
handleInOrderDependencyCounter(signalEvent, false, isCopyOnlyEnabled);
@@ -3231,6 +3235,15 @@ void CommandListCoreFamily<gfxCoreFamily>::appendSdiInOrderCounterSignalling(uin
}
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::appendSignalAggregatedEventAtomic(Event &event) {
using ATOMIC_OPCODES = typename GfxFamily::MI_ATOMIC::ATOMIC_OPCODES;
using DATA_SIZE = typename GfxFamily::MI_ATOMIC::DATA_SIZE;
NEO::EncodeAtomic<GfxFamily>::programMiAtomic(*commandContainer.getCommandStream(), event.getInOrderExecInfo()->getBaseDeviceAddress(), ATOMIC_OPCODES::ATOMIC_8B_ADD,
DATA_SIZE::DATA_SIZE_QWORD, 0, 0, event.getInOrderIncrementValue(), 0);
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::appendSignalInOrderDependencyCounter(Event *signalEvent, bool copyOffloadOperation, bool stall, bool textureFlushRequired) {
using ATOMIC_OPCODES = typename GfxFamily::MI_ATOMIC::ATOMIC_OPCODES;
@@ -3277,9 +3290,8 @@ void CommandListCoreFamily<gfxCoreFamily>::appendSignalInOrderDependencyCounter(
appendSdiInOrderCounterSignalling(inOrderExecInfo->getBaseHostGpuAddress(), signalValue, copyOffloadOperation);
}
if (signalEvent && signalEvent->getInOrderIncrementValue() > 0) {
NEO::EncodeAtomic<GfxFamily>::programMiAtomic(*cmdStream, signalEvent->getInOrderExecInfo()->getBaseDeviceAddress(), ATOMIC_OPCODES::ATOMIC_8B_ADD,
DATA_SIZE::DATA_SIZE_QWORD, 0, 0, signalEvent->getInOrderIncrementValue(), 0);
if (Event::isAggregatedEvent(signalEvent)) {
appendSignalAggregatedEventAtomic(*signalEvent);
}
if ((NEO::debugManager.flags.ProgramUserInterruptOnResolvedDependency.get() == 1 || isCopyOnly(copyOffloadOperation)) && signalEvent && signalEvent->isInterruptModeEnabled()) {

View File

@@ -347,6 +347,8 @@ struct Event : _ze_event_handle_t {
this->isEventOnBarrierOptimized = value;
}
static bool isAggregatedEvent(const Event *event) { return (event && event->getInOrderIncrementValue() > 0); }
protected:
Event(int index, Device *device) : device(device), index(index) {}

View File

@@ -5940,6 +5940,78 @@ HWTEST_F(InOrderCmdListTests, givenExternalSyncStorageWhenCallingAppendSignalInO
context->freeMem(devAddress);
}
HWTEST_F(InOrderCmdListTests, givenExternalSyncStorageAndCopyOnlyCmdListWhenCallingAppendMemoryCopyWithDisabledInOrderSignalingThenSignalAtomicStorage) {
using MI_ATOMIC = typename FamilyType::MI_ATOMIC;
using ATOMIC_OPCODES = typename FamilyType::MI_ATOMIC::ATOMIC_OPCODES;
using DATA_SIZE = typename FamilyType::MI_ATOMIC::DATA_SIZE;
constexpr uint64_t incValue = static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()) + 1234;
constexpr uint64_t counterValue = incValue * 2;
auto devAddress = reinterpret_cast<uint64_t *>(allocDeviceMem(sizeof(uint64_t)));
auto immCmdList = createCopyOnlyImmCmdList<FamilyType::gfxCoreFamily>();
auto eventObj = createExternalSyncStorageEvent(counterValue, incValue, devAddress);
auto cmdStream = immCmdList->getCmdContainer().getCommandStream();
auto offset = cmdStream->getUsed();
uint32_t copyData = 0;
copyParams.forceDisableCopyOnlyInOrderSignaling = true;
{
immCmdList->appendMemoryCopy(&copyData, &copyData, 1, eventObj->toHandle(), 0, nullptr, copyParams);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(cmdList, ptrOffset(cmdStream->getCpuBase(), offset), cmdStream->getUsed() - offset));
auto it = find<MI_ATOMIC *>(cmdList.begin(), cmdList.end());
if (immCmdList->useAdditionalBlitProperties) {
EXPECT_EQ(cmdList.end(), it);
} else {
ASSERT_NE(cmdList.end(), it);
auto miAtomic = genCmdCast<MI_ATOMIC *>(*it);
EXPECT_EQ(ATOMIC_OPCODES::ATOMIC_8B_ADD, miAtomic->getAtomicOpcode());
EXPECT_EQ(DATA_SIZE::DATA_SIZE_QWORD, miAtomic->getDataSize());
EXPECT_EQ(getLowPart(incValue), miAtomic->getOperand1DataDword0());
EXPECT_EQ(getHighPart(incValue), miAtomic->getOperand1DataDword1());
EXPECT_EQ(castToUint64(devAddress), NEO::UnitTestHelper<FamilyType>::getAtomicMemoryAddress(*miAtomic));
}
}
offset = cmdStream->getUsed();
{
ze_copy_region_t region = {0, 0, 0, 1, 1, 1};
immCmdList->appendMemoryCopyRegion(&copyData, &region, 1, 1, &copyData, &region, 1, 1, eventObj->toHandle(), 0, nullptr, copyParams);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(cmdList, ptrOffset(cmdStream->getCpuBase(), offset), cmdStream->getUsed() - offset));
auto it = find<MI_ATOMIC *>(cmdList.begin(), cmdList.end());
if (immCmdList->useAdditionalBlitProperties) {
EXPECT_EQ(cmdList.end(), it);
} else {
ASSERT_NE(cmdList.end(), it);
auto miAtomic = genCmdCast<MI_ATOMIC *>(*it);
EXPECT_EQ(ATOMIC_OPCODES::ATOMIC_8B_ADD, miAtomic->getAtomicOpcode());
EXPECT_EQ(DATA_SIZE::DATA_SIZE_QWORD, miAtomic->getDataSize());
EXPECT_EQ(getLowPart(incValue), miAtomic->getOperand1DataDword0());
EXPECT_EQ(getHighPart(incValue), miAtomic->getOperand1DataDword1());
EXPECT_EQ(castToUint64(devAddress), NEO::UnitTestHelper<FamilyType>::getAtomicMemoryAddress(*miAtomic));
}
}
context->freeMem(devAddress);
}
HWTEST_F(InOrderCmdListTests, givenTimestmapEnabledWhenCreatingStandaloneCbEventThenSetCorrectPacketSize) {
zex_counter_based_event_desc_t counterBasedDesc = {ZEX_STRUCTURE_COUNTER_BASED_EVENT_DESC};
counterBasedDesc.flags = ZEX_COUNTER_BASED_EVENT_FLAG_KERNEL_TIMESTAMP;

View File

@@ -3977,6 +3977,8 @@ void BcsSplitInOrderCmdListTests::verifySplitCmds(LinearStream &cmdStream, size_
ASSERT_NE(nullptr, signalSubCopyEvent);
}
itor = ++flushDwItor;
} else {
ASSERT_TRUE(false);
}
auto semaphoreCmds = findAll<MI_SEMAPHORE_WAIT *>(beginItor, itor);