diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index 72f2959b2c..3871e69daf 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -183,7 +183,7 @@ void CommandListCoreFamily::handleInOrderDependencyCounter(Event this->addResidency(inOrderExecInfo->getDeviceCounterAllocation(), inOrderExecInfo->getHostCounterAllocation()); - if (signalEvent && signalEvent->getInOrderIncrementValue() == 0) { + if (signalEvent && signalEvent->getInOrderIncrementValue(this->partitionCount) == 0) { if (signalEvent->isCounterBased() || nonWalkerInOrderCmdsChaining || (isImmediateType() && this->duplicatedInOrderCounterStorageEnabled)) { assignInOrderExecInfoToEvent(signalEvent); } else { @@ -3321,8 +3321,10 @@ void CommandListCoreFamily::appendSignalAggregatedEventAtomic(Eve using ATOMIC_OPCODES = typename GfxFamily::MI_ATOMIC::ATOMIC_OPCODES; using DATA_SIZE = typename GfxFamily::MI_ATOMIC::DATA_SIZE; + auto incValue = event.getInOrderIncrementValue(this->partitionCount); + NEO::EncodeAtomic::programMiAtomic(*commandContainer.getCommandStream(), event.getInOrderExecInfo()->getBaseDeviceAddress(), ATOMIC_OPCODES::ATOMIC_8B_ADD, - DATA_SIZE::DATA_SIZE_QWORD, 0, 0, event.getInOrderIncrementValue(), 0); + DATA_SIZE::DATA_SIZE_QWORD, 0, 0, incValue, 0); } template diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl index 27e90dd19c..b6b17b684c 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl @@ -1962,7 +1962,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendStagingMemoryCo return ret; } - if (event && event->isCounterBased() && event->getInOrderIncrementValue() == 0) { + if (event && event->isCounterBased() && event->getInOrderIncrementValue(this->partitionCount) == 0) { this->assignInOrderExecInfoToEvent(event); } else if (event && !event->isCounterBased() && !event->isEventTimestampFlagSet()) { ret = this->appendBarrier(hSignalEvent, 0, nullptr, relaxedOrdering); diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl index 0ae95925c9..94203edf1e 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl @@ -352,9 +352,9 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K inOrderExecInfo = this->inOrderExecInfo.get(); if (eventForInOrderExec && eventForInOrderExec->isCounterBased()) { isCounterBasedEvent = true; - if (eventForInOrderExec->getInOrderIncrementValue() > 0) { + if (eventForInOrderExec->getInOrderIncrementValue(this->partitionCount) > 0) { inOrderIncrementGpuAddress = eventForInOrderExec->getInOrderExecInfo()->getBaseDeviceAddress(); - inOrderIncrementValue = eventForInOrderExec->getInOrderIncrementValue(); + inOrderIncrementValue = eventForInOrderExec->getInOrderIncrementValue(this->partitionCount); } } } diff --git a/level_zero/core/source/device/bcs_split.h b/level_zero/core/source/device/bcs_split.h index b22a813a74..1c0cc7c428 100644 --- a/level_zero/core/source/device/bcs_split.h +++ b/level_zero/core/source/device/bcs_split.h @@ -93,10 +93,10 @@ struct BcsSplit { uint64_t aggregatedEventIncrementVal = 1; const bool useSignalEventForSubcopy = aggregatedEventsMode && cmdList->isUsingAdditionalBlitProperties() && Event::isAggregatedEvent(signalEvent) && - (signalEvent->getInOrderIncrementValue() % engineCount == 0); + (signalEvent->getInOrderIncrementValue(1) % engineCount == 0); if (useSignalEventForSubcopy) { - aggregatedEventIncrementVal = signalEvent->getInOrderIncrementValue() / engineCount; + aggregatedEventIncrementVal = signalEvent->getInOrderIncrementValue(1) / engineCount; } else { auto markerEventIndexRet = this->events.obtainForSplit(Context::fromHandle(cmdList->getCmdListContext()), maxEventCountInPool); if (!markerEventIndexRet.has_value()) { diff --git a/level_zero/core/source/event/event.cpp b/level_zero/core/source/event/event.cpp index 7e29d26502..c96b726f31 100644 --- a/level_zero/core/source/event/event.cpp +++ b/level_zero/core/source/event/event.cpp @@ -659,6 +659,11 @@ uint64_t Event::getInOrderExecSignalValueWithSubmissionCounter() const { return (inOrderExecSignalValue + appendCounter); } +uint64_t Event::getInOrderIncrementValue(uint32_t partitionCount) const { + DEBUG_BREAK_IF(inOrderIncrementValue % partitionCount != 0); + return (inOrderIncrementValue / partitionCount); +} + void Event::setLatestUsedCmdQueue(CommandQueue *newCmdQ) { this->latestUsedCmdQueue = newCmdQ; } diff --git a/level_zero/core/source/event/event.h b/level_zero/core/source/event/event.h index 7a6c6d343f..7c0d4c92c4 100644 --- a/level_zero/core/source/event/event.h +++ b/level_zero/core/source/event/event.h @@ -307,7 +307,7 @@ struct Event : _ze_event_handle_t { uint64_t getInOrderExecSignalValueWithSubmissionCounter() const; uint64_t getInOrderExecBaseSignalValue() const { return inOrderExecSignalValue; } uint32_t getInOrderAllocationOffset() const { return inOrderAllocationOffset; } - uint64_t getInOrderIncrementValue() const { return inOrderIncrementValue; } + uint64_t getInOrderIncrementValue(uint32_t partitionCount) const; void setLatestUsedCmdQueue(CommandQueue *newCmdQ); NEO::TimeStampData *peekReferenceTs() { return static_cast(ptrOffset(getHostAddress(), getMaxPacketsCount() * getSinglePacketSize())); @@ -349,7 +349,7 @@ struct Event : _ze_event_handle_t { this->isEventOnBarrierOptimized = value; } - static bool isAggregatedEvent(const Event *event) { return (event && event->getInOrderIncrementValue() > 0); } + static bool isAggregatedEvent(const Event *event) { return (event && event->getInOrderIncrementValue(1) > 0); } CommandList *getRecordedSignalFrom() const { return this->recordedSignalFrom; diff --git a/level_zero/core/source/mutable_cmdlist/mutable_cmdlist_hw.inl b/level_zero/core/source/mutable_cmdlist/mutable_cmdlist_hw.inl index fa468b9428..b851eb098d 100644 --- a/level_zero/core/source/mutable_cmdlist/mutable_cmdlist_hw.inl +++ b/level_zero/core/source/mutable_cmdlist/mutable_cmdlist_hw.inl @@ -828,7 +828,7 @@ void MutableCommandListCoreFamily::storeSignalEventVariable(Mutab if (CommandListImp::isInOrderExecutionEnabled()) { mutableEventParams.eventInsideInOrder = true; mutableEventParams.counterBasedEvent = event->isCounterBased(); - mutableEventParams.inOrderIncrementEvent = event->getInOrderIncrementValue() > 0; + mutableEventParams.inOrderIncrementEvent = event->getInOrderIncrementValue(this->partitionCount) > 0; if (mutableEventParams.counterBasedEvent && CommandListCoreFamily::duplicatedInOrderCounterStorageEnabled) { mutableEventParams.counterBasedTimestampEvent = event->isEventTimestampFlagSet(); } diff --git a/level_zero/core/source/mutable_cmdlist/variable.cpp b/level_zero/core/source/mutable_cmdlist/variable.cpp index 143cc0ea38..7f72c79d63 100644 --- a/level_zero/core/source/mutable_cmdlist/variable.cpp +++ b/level_zero/core/source/mutable_cmdlist/variable.cpp @@ -121,7 +121,7 @@ ze_result_t Variable::setAsSignalEvent(Event *event, MutableComputeWalker *walke this->eventValue.event = event; this->eventValue.eventPoolAllocation = event->getAllocation(cmdList->getBase()->getDevice()); this->eventValue.counterBasedEvent = event->isCounterBased(); - this->eventValue.inOrderIncrementEvent = event->getInOrderIncrementValue() > 0; + this->eventValue.inOrderIncrementEvent = event->getInOrderIncrementValue(cmdList->getBase()->getPartitionCount()) > 0; this->eventValue.walkerCmd = walkerCmd; this->eventValue.postSyncCmd = postSyncCmd; this->eventValue.kernelCount = event->getKernelCount(); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_blit.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_blit.cpp index 2e0076ae11..157a88e7b8 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_blit.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_blit.cpp @@ -1251,7 +1251,7 @@ HWTEST2_F(AggregatedBcsSplitTests, whenObtainCalledThenAggregatedEventsCreated, EXPECT_EQ(0u, *bcsSplit->events.subcopy[i]->getInOrderExecInfo()->getBaseHostAddress()); EXPECT_FALSE(bcsSplit->events.subcopy[i]->isSignalScope(ZE_EVENT_SCOPE_FLAG_HOST)); EXPECT_TRUE(bcsSplit->events.subcopy[i]->isSignalScope(ZE_EVENT_SCOPE_FLAG_DEVICE)); - EXPECT_EQ(1u, bcsSplit->events.subcopy[i]->getInOrderIncrementValue()); + EXPECT_EQ(1u, bcsSplit->events.subcopy[i]->getInOrderIncrementValue(1)); EXPECT_EQ(static_cast(bcsSplit->cmdLists.size()), bcsSplit->events.subcopy[i]->getInOrderExecBaseSignalValue()); EXPECT_EQ(nullptr, bcsSplit->events.marker[i]->getInOrderExecInfo()); @@ -1296,7 +1296,7 @@ HWTEST2_F(AggregatedBcsSplitTests, whenObtainCalledThenAggregatedEventsCreated, for (auto &event : bcsSplit->events.subcopy) { EXPECT_TRUE(event->isCounterBased()); - EXPECT_EQ(1u, event->getInOrderIncrementValue()); + EXPECT_EQ(1u, event->getInOrderIncrementValue(1)); EXPECT_EQ(static_cast(bcsSplit->cmdLists.size()), event->getInOrderExecSignalValueWithSubmissionCounter()); } } diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_1.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_1.cpp index e0ca55e216..b5b8d879d8 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_1.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_1.cpp @@ -5716,7 +5716,7 @@ HWTEST_F(InOrderCmdListTests, givenExternalSyncStorageWhenCreatingCounterBasedEv auto inOrderExecInfo = eventObj->getInOrderExecInfo(); - EXPECT_EQ(incValue, eventObj->getInOrderIncrementValue()); + EXPECT_EQ(incValue, eventObj->getInOrderIncrementValue(1)); EXPECT_EQ(counterValue, inOrderExecInfo->getCounterValue()); EXPECT_EQ(castToUint64(externalStorageAllocProperties.deviceAddress), inOrderExecInfo->getBaseDeviceAddress()); EXPECT_NE(nullptr, inOrderExecInfo->getDeviceCounterAllocation()); @@ -5775,7 +5775,7 @@ HWTEST_F(InOrderCmdListTests, givenExternalSyncStorageWhenCallingAppendThenDontR EXPECT_EQ(inOrderExecInfo, eventObj->getInOrderExecInfo()); EXPECT_EQ(counterValue, eventObj->getInOrderExecInfo()->getCounterValue()); EXPECT_EQ(counterValue, eventObj->getInOrderExecSignalValueWithSubmissionCounter()); - EXPECT_EQ(incValue, eventObj->getInOrderIncrementValue()); + EXPECT_EQ(incValue, eventObj->getInOrderIncrementValue(1)); context->freeMem(devAddress); } diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_2.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_2.cpp index f30fcd7092..0324bb23a9 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_2.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_2.cpp @@ -3549,6 +3549,115 @@ HWTEST2_F(MultiTileInOrderCmdListTests, givenStandaloneEventAndCopyOnlyCmdListWh context->freeMem(hostAddress); } +HWTEST2_F(MultiTileInOrderCmdListTests, givenExternalSyncStorageWhenCallingAppendSignalInOrderDependencyCounterThenProgramAtomicOperation, IsAtLeastXeHpcCore) { + using MI_ATOMIC = typename FamilyType::MI_ATOMIC; + using ATOMIC_OPCODES = typename FamilyType::MI_ATOMIC::ATOMIC_OPCODES; + using DATA_SIZE = typename FamilyType::MI_ATOMIC::DATA_SIZE; + + const uint64_t incValue = (static_cast(std::numeric_limits::max()) + 1234) * partitionCount; + const uint64_t counterValue = incValue * 2; + const uint64_t programmedIncValue = incValue / partitionCount; + + auto devAddress = reinterpret_cast(allocDeviceMem(sizeof(uint64_t))); + + auto immCmdList = createImmCmdList(); + + auto eventObj = createExternalSyncStorageEvent(counterValue, incValue, devAddress); + + auto cmdStream = immCmdList->getCmdContainer().getCommandStream(); + immCmdList->inOrderAtomicSignalingEnabled = false; + immCmdList->appendSignalInOrderDependencyCounter(eventObj.get(), false, false, false, false); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(cmdList, cmdStream->getCpuBase(), cmdStream->getUsed())); + + auto it = find(cmdList.begin(), cmdList.end()); + ASSERT_NE(cmdList.end(), it); + + auto miAtomic = genCmdCast(*it); + EXPECT_EQ(ATOMIC_OPCODES::ATOMIC_8B_ADD, miAtomic->getAtomicOpcode()); + EXPECT_EQ(DATA_SIZE::DATA_SIZE_QWORD, miAtomic->getDataSize()); + EXPECT_EQ(getLowPart(programmedIncValue), miAtomic->getOperand1DataDword0()); + EXPECT_EQ(getHighPart(programmedIncValue), miAtomic->getOperand1DataDword1()); + + EXPECT_EQ(castToUint64(devAddress), NEO::UnitTestHelper::getAtomicMemoryAddress(*miAtomic)); + + context->freeMem(devAddress); +} + +HWTEST2_F(MultiTileInOrderCmdListTests, givenExternalSyncStorageAndCopyOnlyCmdListWhenCallingAppendMemoryCopyWithDisabledInOrderSignalingThenSignalAtomicStorage, IsAtLeastXeHpcCore) { + using MI_ATOMIC = typename FamilyType::MI_ATOMIC; + using ATOMIC_OPCODES = typename FamilyType::MI_ATOMIC::ATOMIC_OPCODES; + using DATA_SIZE = typename FamilyType::MI_ATOMIC::DATA_SIZE; + + const uint64_t incValue = (static_cast(std::numeric_limits::max()) + 1234) * partitionCount; + const uint64_t counterValue = incValue * 2; + const uint64_t programmedIncValue = incValue / partitionCount; + + auto devAddress = reinterpret_cast(allocDeviceMem(sizeof(uint64_t))); + + auto immCmdList = createCopyOnlyImmCmdList(); + + auto eventObj = createExternalSyncStorageEvent(counterValue, incValue, devAddress); + + auto cmdStream = immCmdList->getCmdContainer().getCommandStream(); + + auto offset = cmdStream->getUsed(); + uint32_t copyData = 0; + copyParams.forceDisableCopyOnlyInOrderSignaling = true; + + { + immCmdList->appendMemoryCopy(©Data, ©Data, 1, eventObj->toHandle(), 0, nullptr, copyParams); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(cmdList, ptrOffset(cmdStream->getCpuBase(), offset), cmdStream->getUsed() - offset)); + + auto it = find(cmdList.begin(), cmdList.end()); + + if (immCmdList->useAdditionalBlitProperties) { + EXPECT_EQ(cmdList.end(), it); + } else { + ASSERT_NE(cmdList.end(), it); + + auto miAtomic = genCmdCast(*it); + EXPECT_EQ(ATOMIC_OPCODES::ATOMIC_8B_ADD, miAtomic->getAtomicOpcode()); + EXPECT_EQ(DATA_SIZE::DATA_SIZE_QWORD, miAtomic->getDataSize()); + EXPECT_EQ(getLowPart(programmedIncValue), miAtomic->getOperand1DataDword0()); + EXPECT_EQ(getHighPart(programmedIncValue), miAtomic->getOperand1DataDword1()); + + EXPECT_EQ(castToUint64(devAddress), NEO::UnitTestHelper::getAtomicMemoryAddress(*miAtomic)); + } + } + + offset = cmdStream->getUsed(); + + { + ze_copy_region_t region = {0, 0, 0, 1, 1, 1}; + + immCmdList->appendMemoryCopyRegion(©Data, ®ion, 1, 1, ©Data, ®ion, 1, 1, eventObj->toHandle(), 0, nullptr, copyParams); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(cmdList, ptrOffset(cmdStream->getCpuBase(), offset), cmdStream->getUsed() - offset)); + + auto it = find(cmdList.begin(), cmdList.end()); + if (immCmdList->useAdditionalBlitProperties) { + EXPECT_EQ(cmdList.end(), it); + } else { + ASSERT_NE(cmdList.end(), it); + + auto miAtomic = genCmdCast(*it); + EXPECT_EQ(ATOMIC_OPCODES::ATOMIC_8B_ADD, miAtomic->getAtomicOpcode()); + EXPECT_EQ(DATA_SIZE::DATA_SIZE_QWORD, miAtomic->getDataSize()); + EXPECT_EQ(getLowPart(programmedIncValue), miAtomic->getOperand1DataDword0()); + EXPECT_EQ(getHighPart(programmedIncValue), miAtomic->getOperand1DataDword1()); + + EXPECT_EQ(castToUint64(devAddress), NEO::UnitTestHelper::getAtomicMemoryAddress(*miAtomic)); + } + } + + context->freeMem(devAddress); +} + HWTEST2_F(MultiTileInOrderCmdListTests, givenDebugFlagSetWhenAskingForAtomicSignallingThenReturnTrue, IsAtLeastXeCore) { auto immCmdList = createMultiTileImmCmdList(); auto heaplessEnabled = immCmdList->isHeaplessModeEnabled();