mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-30 01:35:20 +08:00
fix: copy only mi_atomic signaling for aggregated events
Related-To: NEO-14557 Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
9dbdbd50f2
commit
07fb7ac02e
@@ -309,6 +309,7 @@ struct CommandListCoreFamily : public CommandListImp {
|
||||
void appendWaitOnSingleEvent(Event *event, CommandToPatchContainer *outWaitCmds, bool relaxedOrderingAllowed, bool dualStreamCopyOffload, CommandToPatch::CommandType storedSemaphore);
|
||||
|
||||
void appendSdiInOrderCounterSignalling(uint64_t baseGpuVa, uint64_t signalValue, bool copyOffloadOperation);
|
||||
void appendSignalAggregatedEventAtomic(Event &event);
|
||||
|
||||
ze_result_t prepareIndirectParams(const ze_group_count_t *threadGroupDimensions);
|
||||
void updateStreamPropertiesForRegularCommandLists(Kernel &kernel, bool isCooperative, const ze_group_count_t &threadGroupDimensions, bool isIndirect);
|
||||
|
||||
@@ -1969,6 +1969,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
|
||||
(launchParams.isKernelSplitOperation || inOrderCopyOnlySignalingAllowed || emitPipeControl)) {
|
||||
dispatchInOrderPostOperationBarrier(signalEvent, dcFlush, isCopyOnlyEnabled);
|
||||
appendSignalInOrderDependencyCounter(signalEvent, memoryCopyParams.copyOffloadAllowed, false, false);
|
||||
} else if (!useAdditionalBlitProperties && isCopyOnlyEnabled && Event::isAggregatedEvent(signalEvent)) {
|
||||
appendSignalAggregatedEventAtomic(*signalEvent);
|
||||
}
|
||||
|
||||
if (!isCopyOnlyEnabled || inOrderCopyOnlySignalingAllowed) {
|
||||
@@ -2075,6 +2077,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyRegion(void *d
|
||||
appendSignalInOrderDependencyCounter(signalEvent, memoryCopyParams.copyOffloadAllowed, false, false);
|
||||
}
|
||||
handleInOrderDependencyCounter(signalEvent, false, isCopyOnlyEnabled);
|
||||
} else if (!useAdditionalBlitProperties && isCopyOnlyEnabled && Event::isAggregatedEvent(signalEvent)) {
|
||||
appendSignalAggregatedEventAtomic(*signalEvent);
|
||||
}
|
||||
} else {
|
||||
handleInOrderDependencyCounter(signalEvent, false, isCopyOnlyEnabled);
|
||||
@@ -3231,6 +3235,15 @@ void CommandListCoreFamily<gfxCoreFamily>::appendSdiInOrderCounterSignalling(uin
|
||||
}
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::appendSignalAggregatedEventAtomic(Event &event) {
|
||||
using ATOMIC_OPCODES = typename GfxFamily::MI_ATOMIC::ATOMIC_OPCODES;
|
||||
using DATA_SIZE = typename GfxFamily::MI_ATOMIC::DATA_SIZE;
|
||||
|
||||
NEO::EncodeAtomic<GfxFamily>::programMiAtomic(*commandContainer.getCommandStream(), event.getInOrderExecInfo()->getBaseDeviceAddress(), ATOMIC_OPCODES::ATOMIC_8B_ADD,
|
||||
DATA_SIZE::DATA_SIZE_QWORD, 0, 0, event.getInOrderIncrementValue(), 0);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::appendSignalInOrderDependencyCounter(Event *signalEvent, bool copyOffloadOperation, bool stall, bool textureFlushRequired) {
|
||||
using ATOMIC_OPCODES = typename GfxFamily::MI_ATOMIC::ATOMIC_OPCODES;
|
||||
@@ -3277,9 +3290,8 @@ void CommandListCoreFamily<gfxCoreFamily>::appendSignalInOrderDependencyCounter(
|
||||
appendSdiInOrderCounterSignalling(inOrderExecInfo->getBaseHostGpuAddress(), signalValue, copyOffloadOperation);
|
||||
}
|
||||
|
||||
if (signalEvent && signalEvent->getInOrderIncrementValue() > 0) {
|
||||
NEO::EncodeAtomic<GfxFamily>::programMiAtomic(*cmdStream, signalEvent->getInOrderExecInfo()->getBaseDeviceAddress(), ATOMIC_OPCODES::ATOMIC_8B_ADD,
|
||||
DATA_SIZE::DATA_SIZE_QWORD, 0, 0, signalEvent->getInOrderIncrementValue(), 0);
|
||||
if (Event::isAggregatedEvent(signalEvent)) {
|
||||
appendSignalAggregatedEventAtomic(*signalEvent);
|
||||
}
|
||||
|
||||
if ((NEO::debugManager.flags.ProgramUserInterruptOnResolvedDependency.get() == 1 || isCopyOnly(copyOffloadOperation)) && signalEvent && signalEvent->isInterruptModeEnabled()) {
|
||||
|
||||
@@ -347,6 +347,8 @@ struct Event : _ze_event_handle_t {
|
||||
this->isEventOnBarrierOptimized = value;
|
||||
}
|
||||
|
||||
static bool isAggregatedEvent(const Event *event) { return (event && event->getInOrderIncrementValue() > 0); }
|
||||
|
||||
protected:
|
||||
Event(int index, Device *device) : device(device), index(index) {}
|
||||
|
||||
|
||||
@@ -5940,6 +5940,78 @@ HWTEST_F(InOrderCmdListTests, givenExternalSyncStorageWhenCallingAppendSignalInO
|
||||
context->freeMem(devAddress);
|
||||
}
|
||||
|
||||
HWTEST_F(InOrderCmdListTests, givenExternalSyncStorageAndCopyOnlyCmdListWhenCallingAppendMemoryCopyWithDisabledInOrderSignalingThenSignalAtomicStorage) {
|
||||
using MI_ATOMIC = typename FamilyType::MI_ATOMIC;
|
||||
using ATOMIC_OPCODES = typename FamilyType::MI_ATOMIC::ATOMIC_OPCODES;
|
||||
using DATA_SIZE = typename FamilyType::MI_ATOMIC::DATA_SIZE;
|
||||
|
||||
constexpr uint64_t incValue = static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()) + 1234;
|
||||
constexpr uint64_t counterValue = incValue * 2;
|
||||
|
||||
auto devAddress = reinterpret_cast<uint64_t *>(allocDeviceMem(sizeof(uint64_t)));
|
||||
|
||||
auto immCmdList = createCopyOnlyImmCmdList<FamilyType::gfxCoreFamily>();
|
||||
|
||||
auto eventObj = createExternalSyncStorageEvent(counterValue, incValue, devAddress);
|
||||
|
||||
auto cmdStream = immCmdList->getCmdContainer().getCommandStream();
|
||||
|
||||
auto offset = cmdStream->getUsed();
|
||||
uint32_t copyData = 0;
|
||||
copyParams.forceDisableCopyOnlyInOrderSignaling = true;
|
||||
|
||||
{
|
||||
immCmdList->appendMemoryCopy(©Data, ©Data, 1, eventObj->toHandle(), 0, nullptr, copyParams);
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(cmdList, ptrOffset(cmdStream->getCpuBase(), offset), cmdStream->getUsed() - offset));
|
||||
|
||||
auto it = find<MI_ATOMIC *>(cmdList.begin(), cmdList.end());
|
||||
|
||||
if (immCmdList->useAdditionalBlitProperties) {
|
||||
EXPECT_EQ(cmdList.end(), it);
|
||||
} else {
|
||||
ASSERT_NE(cmdList.end(), it);
|
||||
|
||||
auto miAtomic = genCmdCast<MI_ATOMIC *>(*it);
|
||||
EXPECT_EQ(ATOMIC_OPCODES::ATOMIC_8B_ADD, miAtomic->getAtomicOpcode());
|
||||
EXPECT_EQ(DATA_SIZE::DATA_SIZE_QWORD, miAtomic->getDataSize());
|
||||
EXPECT_EQ(getLowPart(incValue), miAtomic->getOperand1DataDword0());
|
||||
EXPECT_EQ(getHighPart(incValue), miAtomic->getOperand1DataDword1());
|
||||
|
||||
EXPECT_EQ(castToUint64(devAddress), NEO::UnitTestHelper<FamilyType>::getAtomicMemoryAddress(*miAtomic));
|
||||
}
|
||||
}
|
||||
|
||||
offset = cmdStream->getUsed();
|
||||
|
||||
{
|
||||
ze_copy_region_t region = {0, 0, 0, 1, 1, 1};
|
||||
|
||||
immCmdList->appendMemoryCopyRegion(©Data, ®ion, 1, 1, ©Data, ®ion, 1, 1, eventObj->toHandle(), 0, nullptr, copyParams);
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(cmdList, ptrOffset(cmdStream->getCpuBase(), offset), cmdStream->getUsed() - offset));
|
||||
|
||||
auto it = find<MI_ATOMIC *>(cmdList.begin(), cmdList.end());
|
||||
if (immCmdList->useAdditionalBlitProperties) {
|
||||
EXPECT_EQ(cmdList.end(), it);
|
||||
} else {
|
||||
ASSERT_NE(cmdList.end(), it);
|
||||
|
||||
auto miAtomic = genCmdCast<MI_ATOMIC *>(*it);
|
||||
EXPECT_EQ(ATOMIC_OPCODES::ATOMIC_8B_ADD, miAtomic->getAtomicOpcode());
|
||||
EXPECT_EQ(DATA_SIZE::DATA_SIZE_QWORD, miAtomic->getDataSize());
|
||||
EXPECT_EQ(getLowPart(incValue), miAtomic->getOperand1DataDword0());
|
||||
EXPECT_EQ(getHighPart(incValue), miAtomic->getOperand1DataDword1());
|
||||
|
||||
EXPECT_EQ(castToUint64(devAddress), NEO::UnitTestHelper<FamilyType>::getAtomicMemoryAddress(*miAtomic));
|
||||
}
|
||||
}
|
||||
|
||||
context->freeMem(devAddress);
|
||||
}
|
||||
|
||||
HWTEST_F(InOrderCmdListTests, givenTimestmapEnabledWhenCreatingStandaloneCbEventThenSetCorrectPacketSize) {
|
||||
zex_counter_based_event_desc_t counterBasedDesc = {ZEX_STRUCTURE_COUNTER_BASED_EVENT_DESC};
|
||||
counterBasedDesc.flags = ZEX_COUNTER_BASED_EVENT_FLAG_KERNEL_TIMESTAMP;
|
||||
|
||||
@@ -3977,6 +3977,8 @@ void BcsSplitInOrderCmdListTests::verifySplitCmds(LinearStream &cmdStream, size_
|
||||
ASSERT_NE(nullptr, signalSubCopyEvent);
|
||||
}
|
||||
itor = ++flushDwItor;
|
||||
} else {
|
||||
ASSERT_TRUE(false);
|
||||
}
|
||||
|
||||
auto semaphoreCmds = findAll<MI_SEMAPHORE_WAIT *>(beginItor, itor);
|
||||
|
||||
Reference in New Issue
Block a user