From 5b9d45f3bc71992a296b57be5d384a26c613c6bf Mon Sep 17 00:00:00 2001 From: "Dunajski, Bartosz" Date: Tue, 4 Jul 2023 10:43:51 +0000 Subject: [PATCH] feature: handle in-order allocation overflow Related-To: NEO-7966 Signed-off-by: Dunajski, Bartosz --- level_zero/core/source/cmdlist/cmdlist_hw.h | 2 +- level_zero/core/source/cmdlist/cmdlist_hw.inl | 14 ++-- .../source/cmdlist/cmdlist_hw_immediate.h | 4 +- .../source/cmdlist/cmdlist_hw_immediate.inl | 34 ++++++-- .../cmdlist/cmdlist_hw_xehp_and_later.inl | 2 +- .../core/source/cmdlist/cmdlist_imp.cpp | 2 +- level_zero/core/source/cmdlist/cmdlist_imp.h | 1 + level_zero/core/source/event/event.cpp | 3 +- level_zero/core/source/event/event.h | 4 +- level_zero/core/source/event/event_impl.inl | 3 +- .../core/test/unit_tests/mocks/mock_cmdlist.h | 1 + .../test_cmdlist_append_launch_kernel_3.cpp | 79 +++++++++++++++++-- .../unit_tests/sources/event/test_event.cpp | 2 +- 13 files changed, 124 insertions(+), 27 deletions(-) diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.h b/level_zero/core/source/cmdlist/cmdlist_hw.h index d91443be0b..a8b9b25eeb 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.h +++ b/level_zero/core/source/cmdlist/cmdlist_hw.h @@ -169,7 +169,7 @@ struct CommandListCoreFamily : CommandListImp { ze_result_t appendSignalEvent(ze_event_handle_t hEvent) override; ze_result_t appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent, bool relaxedOrderingAllowed, bool trackDependencies, bool signalInOrderCompletion) override; - void appendWaitOnInOrderDependency(NEO::GraphicsAllocation *dependencyCounterAllocation, uint64_t waitValue, bool relaxedOrderingAllowed); + void appendWaitOnInOrderDependency(NEO::GraphicsAllocation *dependencyCounterAllocation, uint64_t waitValue, uint32_t offset, bool relaxedOrderingAllowed); void appendSignalInOrderDependencyCounter(); ze_result_t appendWriteGlobalTimestamp(uint64_t *dstptr, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override; diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index 7260ec0e0a..e09339052e 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -2132,7 +2132,7 @@ inline ze_result_t CommandListCoreFamily::addEventsToCmdList(uint } if (hasInOrderDependencies) { - CommandListCoreFamily::appendWaitOnInOrderDependency(this->inOrderDependencyCounterAllocation, this->inOrderDependencyCounter, relaxedOrderingAllowed); + CommandListCoreFamily::appendWaitOnInOrderDependency(this->inOrderDependencyCounterAllocation, this->inOrderDependencyCounter, this->inOrderAllocationOffset, relaxedOrderingAllowed); } if (numWaitEvents > 0) { @@ -2187,14 +2187,14 @@ ze_result_t CommandListCoreFamily::appendSignalEvent(ze_event_han } template -void CommandListCoreFamily::appendWaitOnInOrderDependency(NEO::GraphicsAllocation *dependencyCounterAllocation, uint64_t waitValue, bool relaxedOrderingAllowed) { +void CommandListCoreFamily::appendWaitOnInOrderDependency(NEO::GraphicsAllocation *dependencyCounterAllocation, uint64_t waitValue, uint32_t offset, bool relaxedOrderingAllowed) { using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION; - UNRECOVERABLE_IF(waitValue >= std::numeric_limits::max()); + UNRECOVERABLE_IF(waitValue > std::numeric_limits::max()); commandContainer.addToResidencyContainer(dependencyCounterAllocation); - uint64_t gpuAddress = dependencyCounterAllocation->getGpuAddress(); + uint64_t gpuAddress = dependencyCounterAllocation->getGpuAddress() + offset; for (uint32_t i = 0; i < this->partitionCount; i++) { if (relaxedOrderingAllowed) { @@ -2262,7 +2262,7 @@ ze_result_t CommandListCoreFamily::appendWaitOnEvents(uint32_t nu (event->getInOrderExecSignalValue() == this->inOrderDependencyCounter); if (!eventFromPreviousAppend) { - CommandListCoreFamily::appendWaitOnInOrderDependency(event->getInOrderExecDataAllocation(), event->getInOrderExecSignalValue(), relaxedOrderingAllowed); + CommandListCoreFamily::appendWaitOnInOrderDependency(event->getInOrderExecDataAllocation(), event->getInOrderExecSignalValue(), event->getInOrderAllocationOffset(), relaxedOrderingAllowed); } continue; } @@ -2318,7 +2318,9 @@ void CommandListCoreFamily::appendSignalInOrderDependencyCounter( auto lowPart = static_cast(signalValue & 0x0000FFFFFFFFULL); auto highPart = static_cast(signalValue >> 32); - NEO::EncodeStoreMemory::programStoreDataImm(*commandContainer.getCommandStream(), this->inOrderDependencyCounterAllocation->getGpuAddress(), + uint64_t gpuVa = this->inOrderDependencyCounterAllocation->getGpuAddress() + this->inOrderAllocationOffset; + + NEO::EncodeStoreMemory::programStoreDataImm(*commandContainer.getCommandStream(), gpuVa, lowPart, highPart, true, (this->partitionCount > 1)); } diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h index f94e43ca59..ce7df46ea1 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h +++ b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h @@ -42,6 +42,7 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily::waitForEventsFromHost() { } template -bool CommandListCoreFamilyImmediate::hasStallingCmdsForRelaxedOrdering(uint32_t numWaitEvents, bool relaxedOrderingDispatch) { +bool CommandListCoreFamilyImmediate::hasStallingCmdsForRelaxedOrdering(uint32_t numWaitEvents, bool relaxedOrderingDispatch) const { return (!relaxedOrderingDispatch && (numWaitEvents > 0 || this->inOrderDependencyCounter > 0)); } @@ -393,7 +393,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendBarrier( if (signalEvent->isEventTimestampFlagSet()) { earlyReturn = false; } else { - signalEvent->enableInOrderExecMode(*this->inOrderDependencyCounterAllocation, this->inOrderDependencyCounter); + signalEvent->enableInOrderExecMode(*this->inOrderDependencyCounterAllocation, this->inOrderDependencyCounter, this->inOrderAllocationOffset); } } @@ -765,9 +765,7 @@ ze_result_t CommandListCoreFamilyImmediate::flushImmediate(ze_res if (inputRet == ZE_RESULT_SUCCESS) { if (isInOrderExecutionEnabled()) { - inOrderDependencyCounter++; - - this->commandContainer.addToResidencyContainer(this->inOrderDependencyCounterAllocation); + handleInOrderDependencyCounter(); } if (this->isFlushTaskSubmissionEnabled) { @@ -784,13 +782,35 @@ ze_result_t CommandListCoreFamilyImmediate::flushImmediate(ze_res signalEvent->setCsr(this->csr); if (isInOrderExecutionEnabled()) { - signalEvent->enableInOrderExecMode(*this->inOrderDependencyCounterAllocation, this->inOrderDependencyCounter); + signalEvent->enableInOrderExecMode(*this->inOrderDependencyCounterAllocation, this->inOrderDependencyCounter, this->inOrderAllocationOffset); } } return inputRet; } +template +void CommandListCoreFamilyImmediate::handleInOrderDependencyCounter() { + if ((inOrderDependencyCounter + 1) == std::numeric_limits::max()) { + CommandListCoreFamily::appendWaitOnInOrderDependency(inOrderDependencyCounterAllocation, inOrderDependencyCounter + 1, inOrderAllocationOffset, false); + + inOrderDependencyCounter = 0; + + // multitile immediate writes are uint64_t aligned + uint32_t offset = this->partitionCount * static_cast(sizeof(uint64_t)); + + inOrderAllocationOffset += offset; + + UNRECOVERABLE_IF(inOrderAllocationOffset + offset >= inOrderDependencyCounterAllocation->getUnderlyingBufferSize()); + + CommandListCoreFamily::appendSignalInOrderDependencyCounter(); // write 1 on new offset + } + + inOrderDependencyCounter++; + + this->commandContainer.addToResidencyContainer(this->inOrderDependencyCounterAllocation); +} + template bool CommandListCoreFamilyImmediate::preferCopyThroughLockedPtr(CpuMemCopyInfo &cpuMemCopyInfo, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { if (NEO::DebugManager.flags.ExperimentalForceCopyThroughLock.get() == 1) { @@ -1136,7 +1156,7 @@ ze_result_t CommandListCoreFamilyImmediate::synchronizeInOrderExe bool signaled = true; - auto hostAddress = static_cast(this->inOrderDependencyCounterAllocation->getUnderlyingBuffer()); + auto hostAddress = static_cast(ptrOffset(this->inOrderDependencyCounterAllocation->getUnderlyingBuffer(), this->inOrderAllocationOffset)); for (uint32_t i = 0; i < this->partitionCount; i++) { if (!NEO::WaitUtils::waitFunctionWithPredicate(hostAddress, waitValue, std::greater_equal())) { diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl index ea41a1d709..0ebe7daaef 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl @@ -279,7 +279,7 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K bool inOrderExecSignalRequired = (this->inOrderExecutionEnabled && !launchParams.isKernelSplitOperation); if (inOrderExecSignalRequired && !event) { - dispatchKernelArgs.eventAddress = this->inOrderDependencyCounterAllocation->getGpuAddress(); + dispatchKernelArgs.eventAddress = this->inOrderDependencyCounterAllocation->getGpuAddress() + this->inOrderAllocationOffset; dispatchKernelArgs.postSyncImmValue = this->inOrderDependencyCounter + 1; } diff --git a/level_zero/core/source/cmdlist/cmdlist_imp.cpp b/level_zero/core/source/cmdlist/cmdlist_imp.cpp index 264e041cf5..e60dca529d 100644 --- a/level_zero/core/source/cmdlist/cmdlist_imp.cpp +++ b/level_zero/core/source/cmdlist/cmdlist_imp.cpp @@ -232,7 +232,7 @@ void CommandListImp::enableInOrderExecution() { auto device = this->device->getNEODevice(); - NEO::AllocationProperties allocationProperties{device->getRootDeviceIndex(), sizeof(uint32_t), NEO::AllocationType::TIMESTAMP_PACKET_TAG_BUFFER, device->getDeviceBitfield()}; + NEO::AllocationProperties allocationProperties{device->getRootDeviceIndex(), MemoryConstants::pageSize64k, NEO::AllocationType::TIMESTAMP_PACKET_TAG_BUFFER, device->getDeviceBitfield()}; inOrderDependencyCounterAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties(allocationProperties); diff --git a/level_zero/core/source/cmdlist/cmdlist_imp.h b/level_zero/core/source/cmdlist/cmdlist_imp.h index 9308242a52..470e15cc3e 100644 --- a/level_zero/core/source/cmdlist/cmdlist_imp.h +++ b/level_zero/core/source/cmdlist/cmdlist_imp.h @@ -44,6 +44,7 @@ struct CommandListImp : CommandList { std::unique_ptr nonImmediateLogicalStateHelper; NEO::GraphicsAllocation *inOrderDependencyCounterAllocation = nullptr; uint64_t inOrderDependencyCounter = 0; + uint32_t inOrderAllocationOffset = 0; bool inOrderExecutionEnabled = false; ~CommandListImp() override = default; diff --git a/level_zero/core/source/event/event.cpp b/level_zero/core/source/event/event.cpp index e2910d449e..3fee874ca5 100644 --- a/level_zero/core/source/event/event.cpp +++ b/level_zero/core/source/event/event.cpp @@ -391,11 +391,12 @@ void Event::setIsCompleted() { unsetCmdQueue(true); } -void Event::enableInOrderExecMode(NEO::GraphicsAllocation &inOrderDependenciesAllocation, uint64_t signalValue) { +void Event::enableInOrderExecMode(NEO::GraphicsAllocation &inOrderDependenciesAllocation, uint64_t signalValue, uint32_t allocationOffset) { inOrderExecEvent = true; inOrderExecSignalValue = signalValue; inOrderExecDataAllocation = &inOrderDependenciesAllocation; + inOrderAllocationOffset = allocationOffset; } void Event::setLatestUsedCmdQueue(CommandQueue *newCmdQ) { diff --git a/level_zero/core/source/event/event.h b/level_zero/core/source/event/event.h index db299ee15c..53af86ecfb 100644 --- a/level_zero/core/source/event/event.h +++ b/level_zero/core/source/event/event.h @@ -211,10 +211,11 @@ struct Event : _ze_event_handle_t { void setMetricStreamer(MetricStreamer *metricStreamer) { this->metricStreamer = metricStreamer; } - void enableInOrderExecMode(NEO::GraphicsAllocation &inOrderDependenciesAllocation, uint64_t signalValue); + void enableInOrderExecMode(NEO::GraphicsAllocation &inOrderDependenciesAllocation, uint64_t signalValue, uint32_t allocationOffset); bool isInOrderExecEvent() const { return inOrderExecEvent; } NEO::GraphicsAllocation *getInOrderExecDataAllocation() const { return inOrderExecDataAllocation; } uint64_t getInOrderExecSignalValue() const { return inOrderExecSignalValue; } + uint32_t getInOrderAllocationOffset() const { return inOrderAllocationOffset; } void setLatestUsedCmdQueue(CommandQueue *newCmdQ); void setReferenceTs(NEO::TimeStampData ×tamp) { referenceTs = timestamp; @@ -233,6 +234,7 @@ struct Event : _ze_event_handle_t { NEO::TimeStampData referenceTs{}; uint64_t inOrderExecSignalValue = 0; + uint32_t inOrderAllocationOffset = 0; std::chrono::microseconds gpuHangCheckPeriod{500'000}; std::bitset l3FlushAppliedOnKernel; diff --git a/level_zero/core/source/event/event_impl.inl b/level_zero/core/source/event/event_impl.inl index 727f44f861..1855ed2ece 100644 --- a/level_zero/core/source/event/event_impl.inl +++ b/level_zero/core/source/event/event_impl.inl @@ -134,7 +134,7 @@ void EventImp::assignKernelEventCompletionData(void *address) { template ze_result_t EventImp::queryInOrderEventStatus() { - auto hostAddress = static_cast(this->inOrderExecDataAllocation->getUnderlyingBuffer()); + auto hostAddress = static_cast(ptrOffset(this->inOrderExecDataAllocation->getUnderlyingBuffer(), this->inOrderAllocationOffset)); bool signaled = true; for (uint32_t i = 0; i < this->getPacketsInUse(); i++) { @@ -403,6 +403,7 @@ ze_result_t EventImp::reset() { inOrderExecEvent = false; inOrderExecDataAllocation = nullptr; inOrderExecSignalValue = 0; + inOrderAllocationOffset = 0; } unsetCmdQueue(false); this->resetCompletionStatus(); diff --git a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h index fa7a88cf41..f4a3ddadfe 100644 --- a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h +++ b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h @@ -75,6 +75,7 @@ struct WhiteBox<::L0::CommandListCoreFamily> using BaseClass::immediateCmdListHeapSharing; using BaseClass::indirectAllocationsAllowed; using BaseClass::initialize; + using BaseClass::inOrderAllocationOffset; using BaseClass::isFlushTaskSubmissionEnabled; using BaseClass::isRelaxedOrderingDispatchAllowed; using BaseClass::isSyncModeQueue; diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp index 251dda7ce8..9696391fe6 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp @@ -676,6 +676,7 @@ struct InOrderCmdListTests : public CommandListAppendLaunchKernel { using EventImp::maxPacketCount; using EventImp::inOrderExecDataAllocation; using EventImp::inOrderExecSignalValue; + using EventImp::inOrderAllocationOffset; }; void SetUp() override { @@ -766,22 +767,30 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenResetEventCalledThenResetEven immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, events[0]->toHandle(), 0, nullptr, launchParams, false); + EXPECT_EQ(MemoryConstants::pageSize64k, immCmdList->inOrderDependencyCounterAllocation->getUnderlyingBufferSize()); + EXPECT_TRUE(events[0]->inOrderExecEvent); EXPECT_EQ(events[0]->inOrderExecSignalValue, immCmdList->inOrderDependencyCounter); EXPECT_EQ(events[0]->inOrderExecDataAllocation, immCmdList->inOrderDependencyCounterAllocation); + EXPECT_EQ(events[0]->inOrderAllocationOffset, 0u); + events[0]->inOrderAllocationOffset = 123; events[0]->reset(); EXPECT_FALSE(events[0]->inOrderExecEvent); EXPECT_EQ(events[0]->inOrderExecSignalValue, 0u); EXPECT_EQ(events[0]->inOrderExecDataAllocation, nullptr); + EXPECT_EQ(events[0]->inOrderAllocationOffset, 0u); } HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenSubmittingThenProgramSemaphoreForPreviousDispatch, IsAtLeastXeHpCore) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; + uint32_t counterOffset = 64; + auto immCmdList = createImmCmdList(); + immCmdList->inOrderAllocationOffset = counterOffset; auto cmdStream = immCmdList->getCmdContainer().getCommandStream(); @@ -804,7 +813,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenSubmittingThenProgramSemaphor auto semaphoreCmd = genCmdCast(*itor); EXPECT_EQ(1u, semaphoreCmd->getSemaphoreDataDword()); - EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), semaphoreCmd->getSemaphoreGraphicsAddress()); + EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress() + counterOffset, semaphoreCmd->getSemaphoreGraphicsAddress()); EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD, semaphoreCmd->getCompareOperation()); } @@ -874,7 +883,10 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenWaitingForEventFromAfterReset HWTEST2_F(InOrderCmdListTests, givenInOrderEventModeWhenSubmittingThenProgramSemaphoreForEvent, IsAtLeastXeHpCore) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; + uint32_t counterOffset = 64; + auto immCmdList = createImmCmdList(); + immCmdList->inOrderAllocationOffset = counterOffset; auto eventPool = createEvents(1, false); @@ -907,7 +919,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderEventModeWhenSubmittingThenProgramSem auto semaphoreCmd = genCmdCast(*itor); EXPECT_EQ(2u, semaphoreCmd->getSemaphoreDataDword()); - EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), semaphoreCmd->getSemaphoreGraphicsAddress()); + EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress() + counterOffset, semaphoreCmd->getSemaphoreGraphicsAddress()); EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD, semaphoreCmd->getCompareOperation()); } @@ -1038,7 +1050,10 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingWalkerThenSignalSy using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM; + uint32_t counterOffset = 64; + auto immCmdList = createImmCmdList(); + immCmdList->inOrderAllocationOffset = counterOffset; auto cmdStream = immCmdList->getCmdContainer().getCommandStream(); @@ -1059,7 +1074,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingWalkerThenSignalSy EXPECT_EQ(POSTSYNC_DATA::OPERATION_WRITE_IMMEDIATE_DATA, postSync.getOperation()); EXPECT_EQ(1u, postSync.getImmediateData()); - EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), postSync.getDestinationAddress()); + EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress() + counterOffset, postSync.getDestinationAddress()); } auto offset = cmdStream->getUsed(); @@ -1096,13 +1111,13 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingWalkerThenSignalSy auto sdiCmd = genCmdCast(++semaphoreCmd); ASSERT_NE(nullptr, sdiCmd); - EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), sdiCmd->getAddress()); + EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress() + counterOffset, sdiCmd->getAddress()); EXPECT_EQ(1u, sdiCmd->getStoreQword()); EXPECT_EQ(2u, sdiCmd->getDataDword0()); EXPECT_EQ(0u, sdiCmd->getDataDword1()); } - auto hostAddress = static_cast(immCmdList->inOrderDependencyCounterAllocation->getUnderlyingBuffer()); + auto hostAddress = static_cast(ptrOffset(immCmdList->inOrderDependencyCounterAllocation->getUnderlyingBuffer(), counterOffset)); *hostAddress = 1; EXPECT_EQ(ZE_RESULT_NOT_READY, events[0]->hostSynchronize(1)); @@ -1354,6 +1369,55 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingAppendWaitOnEvents EXPECT_EQ(2u, sdiCmd->getDataDword0()); } +HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingCounterWithOverflowThenHandleItCorrectly, IsAtLeastXeHpCore) { + using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM; + using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; + + auto immCmdList = createImmCmdList(); + immCmdList->inOrderDependencyCounter = std::numeric_limits::max() - 1; + + auto cmdStream = immCmdList->getCmdContainer().getCommandStream(); + + auto eventPool = createEvents(1, false); + + auto eventHandle = events[0]->toHandle(); + + immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, eventHandle, 0, nullptr, launchParams, false); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, cmdStream->getCpuBase(), cmdStream->getUsed())); + + auto sdiItor = find(cmdList.begin(), cmdList.end()); + ASSERT_NE(cmdList.end(), sdiItor); + + auto sdiCmd = genCmdCast(*sdiItor); + + uint64_t baseGpuVa = immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(); + + EXPECT_EQ(baseGpuVa, sdiCmd->getAddress()); + EXPECT_EQ(std::numeric_limits::max(), sdiCmd->getDataDword0()); + + auto semaphoreCmd = genCmdCast(++sdiCmd); + ASSERT_NE(nullptr, semaphoreCmd); + + EXPECT_EQ(std::numeric_limits::max(), semaphoreCmd->getSemaphoreDataDword()); + EXPECT_EQ(baseGpuVa, semaphoreCmd->getSemaphoreGraphicsAddress()); + + sdiCmd = genCmdCast(++semaphoreCmd); + ASSERT_NE(nullptr, sdiCmd); + + uint32_t offset = static_cast(sizeof(uint64_t)); + + EXPECT_EQ(baseGpuVa + offset, sdiCmd->getAddress()); + EXPECT_EQ(1u, sdiCmd->getDataDword0()); + + EXPECT_EQ(1u, immCmdList->inOrderDependencyCounter); + EXPECT_EQ(offset, immCmdList->inOrderAllocationOffset); + + EXPECT_EQ(1u, events[0]->inOrderExecSignalValue); + EXPECT_EQ(offset, events[0]->inOrderAllocationOffset); +} + HWTEST2_F(InOrderCmdListTests, givenCopyOnlyInOrderModeWhenProgrammingBarrierThenSignalInOrderAllocation, IsAtLeastXeHpCore) { using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM; @@ -1489,13 +1553,16 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingAppendBarrierWitho } HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenCallingSyncThenHandleCompletion, IsAtLeastXeHpCore) { + uint32_t counterOffset = 64; + auto immCmdList = createImmCmdList(); + immCmdList->inOrderAllocationOffset = counterOffset; auto ultCsr = static_cast *>(device->getNEODevice()->getDefaultEngine().commandStreamReceiver); immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false); - auto hostAddress = static_cast(immCmdList->inOrderDependencyCounterAllocation->getUnderlyingBuffer()); + auto hostAddress = static_cast(ptrOffset(immCmdList->inOrderDependencyCounterAllocation->getUnderlyingBuffer(), counterOffset)); *hostAddress = 0; const uint32_t failCounter = 3; diff --git a/level_zero/core/test/unit_tests/sources/event/test_event.cpp b/level_zero/core/test/unit_tests/sources/event/test_event.cpp index 7b48d53031..3c3e399397 100644 --- a/level_zero/core/test/unit_tests/sources/event/test_event.cpp +++ b/level_zero/core/test/unit_tests/sources/event/test_event.cpp @@ -3086,7 +3086,7 @@ HWTEST_F(EventTests, givenInOrderEventWhenHostEventSyncThenExpectDownloadEventAl NEO::MockGraphicsAllocation syncAllocation(&storage, sizeof(storage)); - event->enableInOrderExecMode(syncAllocation, 1); + event->enableInOrderExecMode(syncAllocation, 1, 0); constexpr uint64_t timeout = std::numeric_limits::max(); auto result = event->hostSynchronize(timeout);