From c8a3d7d26831f5799af8eaa3cc356194fad01278 Mon Sep 17 00:00:00 2001 From: "Dunajski, Bartosz" Date: Tue, 5 Sep 2023 16:54:37 +0000 Subject: [PATCH] feature: enable in-order sync allocation for regular cmd lists Related-To: NEO-7966 Signed-off-by: Dunajski, Bartosz --- level_zero/core/source/cmdlist/cmdlist_hw.h | 2 +- level_zero/core/source/cmdlist/cmdlist_hw.inl | 144 ++++++++++++++---- .../source/cmdlist/cmdlist_hw_immediate.h | 2 - .../source/cmdlist/cmdlist_hw_immediate.inl | 26 ---- .../cmdlist/cmdlist_hw_xehp_and_later.inl | 2 +- .../core/source/cmdlist/cmdlist_imp.cpp | 12 +- .../test_cmdlist_append_launch_kernel_3.cpp | 93 ++++++++--- 7 files changed, 194 insertions(+), 87 deletions(-) diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.h b/level_zero/core/source/cmdlist/cmdlist_hw.h index af1ae27716..2323ae78ae 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.h +++ b/level_zero/core/source/cmdlist/cmdlist_hw.h @@ -329,8 +329,8 @@ struct CommandListCoreFamily : CommandListImp { virtual bool isRelaxedOrderingDispatchAllowed(uint32_t numWaitEvents) const { return false; } virtual void setupFlushMethod(const NEO::RootDeviceEnvironment &rootDeviceEnvironment) {} bool isInOrderEventWaitRequired(const Event &event) const; - virtual bool useCounterAllocationForInOrderMode() const { return false; } void handleInOrderImplicitDependencies(bool relaxedOrderingAllowed); + virtual void handleInOrderDependencyCounter(); }; template diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index 25207acfba..bf3a6540b9 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -142,9 +142,35 @@ ze_result_t CommandListCoreFamily::reset() { inOrderDependencyCounter = 0; inOrderAllocationOffset = 0; + if (inOrderDependencyCounterAllocation) { + memset(inOrderDependencyCounterAllocation->getUnderlyingBuffer(), 0, inOrderDependencyCounterAllocation->getUnderlyingBufferSize()); + } + return ZE_RESULT_SUCCESS; } +template +void CommandListCoreFamily::handleInOrderDependencyCounter() { + if ((inOrderDependencyCounter + 1) == std::numeric_limits::max()) { + CommandListCoreFamily::appendWaitOnInOrderDependency(inOrderDependencyCounterAllocation, inOrderDependencyCounter + 1, inOrderAllocationOffset, false); + + inOrderDependencyCounter = 0; + + // multitile immediate writes are uint64_t aligned + uint32_t offset = this->partitionCount * static_cast(sizeof(uint64_t)); + + inOrderAllocationOffset += offset; + + UNRECOVERABLE_IF(inOrderAllocationOffset + offset >= inOrderDependencyCounterAllocation->getUnderlyingBufferSize()); + + CommandListCoreFamily::appendSignalInOrderDependencyCounter(); // write 1 on new offset + } + + inOrderDependencyCounter++; + + this->commandContainer.addToResidencyContainer(this->inOrderDependencyCounterAllocation); +} + template void CommandListCoreFamily::handlePostSubmissionState() { this->commandContainer.getResidencyContainer().clear(); @@ -334,6 +360,11 @@ ze_result_t CommandListCoreFamily::appendLaunchKernel(ze_kernel_h auto res = appendLaunchKernelWithParams(Kernel::fromHandle(kernelHandle), threadGroupDimensions, event, launchParams); + + if (isInOrderExecutionEnabled()) { + handleInOrderDependencyCounter(); + } + addToMappedEventList(event); if (NEO::DebugManager.flags.EnableSWTags.get()) { neoDevice->getRootDeviceEnvironment().tagsManager->insertTag( @@ -370,6 +401,10 @@ ze_result_t CommandListCoreFamily::appendLaunchCooperativeKernel( ret = appendLaunchKernelWithParams(Kernel::fromHandle(kernelHandle), launchKernelArgs, event, launchParams); addToMappedEventList(event); + + if (this->inOrderExecutionEnabled) { + handleInOrderDependencyCounter(); + } return ret; } @@ -402,6 +437,10 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelIndirect(ze_ addToMappedEventList(event); appendSignalEventPostWalker(event); + if (isInOrderExecutionEnabled()) { + handleInOrderDependencyCounter(); + } + return ret; } @@ -486,8 +525,9 @@ ze_result_t CommandListCoreFamily::appendEventReset(ze_event_hand } } - if (this->inOrderExecutionEnabled && useCounterAllocationForInOrderMode()) { + if (this->inOrderExecutionEnabled) { appendSignalInOrderDependencyCounter(); + handleInOrderDependencyCounter(); } if (NEO::DebugManager.flags.EnableSWTags.get()) { @@ -524,8 +564,9 @@ ze_result_t CommandListCoreFamily::appendMemoryRangesBarrier(uint appendSignalEventPostWalker(signalEvent); addToMappedEventList(signalEvent); - if (this->inOrderExecutionEnabled && useCounterAllocationForInOrderMode()) { + if (this->inOrderExecutionEnabled) { appendSignalInOrderDependencyCounter(); + handleInOrderDependencyCounter(); } return ZE_RESULT_SUCCESS; @@ -679,6 +720,11 @@ ze_result_t CommandListCoreFamily::appendImageCopyFromMemory(ze_i event, numWaitEvents, phWaitEvents, launchParams, relaxedOrderingDispatch); addToMappedEventList(Event::fromHandle(hEvent)); + + if (this->inOrderExecutionEnabled) { + handleInOrderDependencyCounter(); + } + return status; } @@ -838,6 +884,10 @@ ze_result_t CommandListCoreFamily::appendImageCopyToMemory(void * addFlushRequiredCommand(allocationStruct.needsFlush, event); + if (this->inOrderExecutionEnabled) { + handleInOrderDependencyCounter(); + } + return ret; } @@ -981,6 +1031,11 @@ ze_result_t CommandListCoreFamily::appendImageCopyRegion(ze_image event, numWaitEvents, phWaitEvents, launchParams, relaxedOrderingDispatch); addToMappedEventList(event); + + if (this->inOrderExecutionEnabled) { + handleInOrderDependencyCounter(); + } + return status; } @@ -1423,12 +1478,18 @@ ze_result_t CommandListCoreFamily::appendMemoryCopy(void *dstptr, addFlushRequiredCommand(dstAllocationStruct.needsFlush, signalEvent); addToMappedEventList(signalEvent); - if (this->inOrderExecutionEnabled && useCounterAllocationForInOrderMode() && (launchParams.isKernelSplitOperation || inOrderCopyOnlySignalingAllowed)) { - if (!signalEvent && !isCopyOnly()) { - NEO::PipeControlArgs args; - NEO::MemorySynchronizationCommands::addSingleBarrier(*commandContainer.getCommandStream(), args); + if (this->inOrderExecutionEnabled) { + if (launchParams.isKernelSplitOperation || inOrderCopyOnlySignalingAllowed) { + if (!signalEvent && !isCopyOnly()) { + NEO::PipeControlArgs args; + NEO::MemorySynchronizationCommands::addSingleBarrier(*commandContainer.getCommandStream(), args); + } + appendSignalInOrderDependencyCounter(); + } + + if (!isCopyOnly() || inOrderCopyOnlySignalingAllowed) { + handleInOrderDependencyCounter(); } - appendSignalInOrderDependencyCounter(); } if (NEO::DebugManager.flags.EnableSWTags.get()) { @@ -1456,7 +1517,7 @@ ze_result_t CommandListCoreFamily::appendMemoryCopyRegion(void *d ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch, bool forceDisableCopyOnlyInOrderSignaling) { - const bool inOrderCopyOnlySignalingAllowed = this->inOrderExecutionEnabled && !forceDisableCopyOnlyInOrderSignaling; + const bool inOrderCopyOnlySignalingAllowed = this->inOrderExecutionEnabled && !forceDisableCopyOnlyInOrderSignaling && isCopyOnly(); NEO::Device *neoDevice = device->getNEODevice(); uint32_t callId = 0; @@ -1516,8 +1577,14 @@ ze_result_t CommandListCoreFamily::appendMemoryCopyRegion(void *d addToMappedEventList(signalEvent); addFlushRequiredCommand(dstAllocationStruct.needsFlush, signalEvent); - if (this->inOrderExecutionEnabled && useCounterAllocationForInOrderMode() && isCopyOnly() && inOrderCopyOnlySignalingAllowed) { - appendSignalInOrderDependencyCounter(); + if (this->inOrderExecutionEnabled) { + if (inOrderCopyOnlySignalingAllowed) { + appendSignalInOrderDependencyCounter(); + } + + if (!isCopyOnly() || inOrderCopyOnlySignalingAllowed) { + handleInOrderDependencyCounter(); + } } if (NEO::DebugManager.flags.EnableSWTags.get()) { @@ -1939,12 +2006,16 @@ ze_result_t CommandListCoreFamily::appendMemoryFill(void *ptr, appendEventForProfilingAllWalkers(signalEvent, false, singlePipeControlPacket); addFlushRequiredCommand(hostPointerNeedsFlush, signalEvent); - if (this->inOrderExecutionEnabled && useCounterAllocationForInOrderMode() && launchParams.isKernelSplitOperation) { - if (!signalEvent) { - NEO::PipeControlArgs args; - NEO::MemorySynchronizationCommands::addSingleBarrier(*commandContainer.getCommandStream(), args); + if (this->inOrderExecutionEnabled) { + if (launchParams.isKernelSplitOperation) { + if (!signalEvent) { + NEO::PipeControlArgs args; + NEO::MemorySynchronizationCommands::addSingleBarrier(*commandContainer.getCommandStream(), args); + } + appendSignalInOrderDependencyCounter(); } - appendSignalInOrderDependencyCounter(); + + handleInOrderDependencyCounter(); } if (NEO::DebugManager.flags.EnableSWTags.get()) { @@ -2005,8 +2076,9 @@ ze_result_t CommandListCoreFamily::appendBlitFill(void *ptr, appendSignalEventPostWalker(signalEvent); - if (isInOrderExecutionEnabled() && useCounterAllocationForInOrderMode()) { + if (isInOrderExecutionEnabled()) { appendSignalInOrderDependencyCounter(); + handleInOrderDependencyCounter(); } } return ZE_RESULT_SUCCESS; @@ -2173,16 +2245,12 @@ void CommandListCoreFamily::handleInOrderImplicitDependencies(boo NEO::RelaxedOrderingHelper::encodeRegistersBeforeDependencyCheckers(*commandContainer.getCommandStream()); } - if (useCounterAllocationForInOrderMode()) { + if (this->cmdListType == CommandListType::TYPE_IMMEDIATE) { CommandListCoreFamily::appendWaitOnInOrderDependency(this->inOrderDependencyCounterAllocation, this->inOrderDependencyCounter, this->inOrderAllocationOffset, relaxedOrderingAllowed); } else if (!isCopyOnly()) { appendComputeBarrierCommand(); } } - - if (cmdListType == TYPE_REGULAR && this->inOrderExecutionEnabled && !hasInOrderDependencies) { - inOrderDependencyCounter++; // First append is without dependencies. Increment counter to program barrier on next calls. - } } template @@ -2231,8 +2299,9 @@ ze_result_t CommandListCoreFamily::appendSignalEvent(ze_event_han bool appendPipeControlWithPostSync = (!isCopyOnly()) && (event->isSignalScope() || event->isEventTimestampFlagSet()); dispatchEventPostSyncOperation(event, Event::STATE_SIGNALED, false, false, appendPipeControlWithPostSync, false); - if (this->inOrderExecutionEnabled && useCounterAllocationForInOrderMode()) { + if (this->inOrderExecutionEnabled) { appendSignalInOrderDependencyCounter(); + handleInOrderDependencyCounter(); } if (NEO::DebugManager.flags.EnableSWTags.get()) { @@ -2251,7 +2320,7 @@ void CommandListCoreFamily::appendWaitOnInOrderDependency(NEO::Gr using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION; UNRECOVERABLE_IF(waitValue > std::numeric_limits::max()); - UNRECOVERABLE_IF(!useCounterAllocationForInOrderMode()); + UNRECOVERABLE_IF(this->cmdListType == CommandListType::TYPE_REGULAR); commandContainer.addToResidencyContainer(dependencyCounterAllocation); @@ -2345,8 +2414,9 @@ ze_result_t CommandListCoreFamily::appendWaitOnEvents(uint32_t nu commandContainer.addToResidencyContainer(this->csr->getTagAllocation()); } - if (signalInOrderCompletion && useCounterAllocationForInOrderMode()) { + if (signalInOrderCompletion) { appendSignalInOrderDependencyCounter(); + handleInOrderDependencyCounter(); } makeResidentDummyAllocation(); @@ -2364,8 +2434,6 @@ ze_result_t CommandListCoreFamily::appendWaitOnEvents(uint32_t nu template void CommandListCoreFamily::appendSignalInOrderDependencyCounter() { - UNRECOVERABLE_IF(!useCounterAllocationForInOrderMode()); - uint32_t signalValue = this->inOrderDependencyCounter + 1; uint64_t gpuVa = this->inOrderDependencyCounterAllocation->getGpuAddress() + this->inOrderAllocationOffset; @@ -2501,8 +2569,9 @@ ze_result_t CommandListCoreFamily::appendWriteGlobalTimestamp( appendSignalEventPostWalker(signalEvent); - if (this->inOrderExecutionEnabled && useCounterAllocationForInOrderMode()) { + if (this->inOrderExecutionEnabled) { appendSignalInOrderDependencyCounter(); + handleInOrderDependencyCounter(); } auto allocationStruct = getAlignedAllocationData(this->device, dstptr, sizeof(uint64_t), false); @@ -2997,7 +3066,7 @@ ze_result_t CommandListCoreFamily::appendBarrier(ze_event_handle_ appendEventForProfiling(signalEvent, true); - if (this->inOrderExecutionEnabled && useCounterAllocationForInOrderMode()) { + if (this->inOrderExecutionEnabled) { appendSignalInOrderDependencyCounter(); } else if (isCopyOnly()) { NEO::MiFlushArgs args{this->dummyBlitWa}; @@ -3018,6 +3087,11 @@ ze_result_t CommandListCoreFamily::appendBarrier(ze_event_handle_ addToMappedEventList(signalEvent); appendSignalEventPostWalker(signalEvent); + + if (isInOrderExecutionEnabled()) { + handleInOrderDependencyCounter(); + } + return ZE_RESULT_SUCCESS; } @@ -3139,12 +3213,12 @@ ze_result_t CommandListCoreFamily::appendWaitOnMemory(void *desc, } UNRECOVERABLE_IF(srcAllocationStruct.alloc == nullptr); + appendEventForProfiling(signalEvent, true); + if (this->inOrderExecutionEnabled) { handleInOrderImplicitDependencies(false); } - appendEventForProfiling(signalEvent, true); - commandContainer.addToResidencyContainer(srcAllocationStruct.alloc); uint64_t gpuAddress = static_cast(srcAllocationStruct.alignedAllocationPtr); NEO::EncodeSemaphore::addMiSemaphoreWaitCommand(*commandContainer.getCommandStream(), gpuAddress, data, comparator, false, false, false); @@ -3160,6 +3234,11 @@ ze_result_t CommandListCoreFamily::appendWaitOnMemory(void *desc, appendSignalEventPostWalker(signalEvent); + if (this->inOrderExecutionEnabled) { + appendSignalInOrderDependencyCounter(); + handleInOrderDependencyCounter(); + } + return ZE_RESULT_SUCCESS; } @@ -3203,6 +3282,11 @@ ze_result_t CommandListCoreFamily::appendWriteToMemory(void *desc args); } + if (this->inOrderExecutionEnabled) { + appendSignalInOrderDependencyCounter(); + handleInOrderDependencyCounter(); + } + return ZE_RESULT_SUCCESS; } diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h index e48375b44f..2892da5f0d 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h +++ b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h @@ -192,9 +192,7 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily::flushImmediate(ze_res auto signalEvent = Event::fromHandle(hSignalEvent); if (inputRet == ZE_RESULT_SUCCESS) { - if (isInOrderExecutionEnabled()) { - handleInOrderDependencyCounter(); - } - if (this->isFlushTaskSubmissionEnabled) { if (signalEvent && (NEO::DebugManager.flags.TrackNumCsrClientsOnSyncPoints.get() != 0)) { signalEvent->setLatestUsedCmdQueue(this->cmdQImmediate); @@ -911,28 +907,6 @@ ze_result_t CommandListCoreFamilyImmediate::flushImmediate(ze_res return inputRet; } -template -void CommandListCoreFamilyImmediate::handleInOrderDependencyCounter() { - if ((inOrderDependencyCounter + 1) == std::numeric_limits::max()) { - CommandListCoreFamily::appendWaitOnInOrderDependency(inOrderDependencyCounterAllocation, inOrderDependencyCounter + 1, inOrderAllocationOffset, false); - - inOrderDependencyCounter = 0; - - // multitile immediate writes are uint64_t aligned - uint32_t offset = this->partitionCount * static_cast(sizeof(uint64_t)); - - inOrderAllocationOffset += offset; - - UNRECOVERABLE_IF(inOrderAllocationOffset + offset >= inOrderDependencyCounterAllocation->getUnderlyingBufferSize()); - - CommandListCoreFamily::appendSignalInOrderDependencyCounter(); // write 1 on new offset - } - - inOrderDependencyCounter++; - - this->commandContainer.addToResidencyContainer(this->inOrderDependencyCounterAllocation); -} - template bool CommandListCoreFamilyImmediate::preferCopyThroughLockedPtr(CpuMemCopyInfo &cpuMemCopyInfo, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { if (NEO::DebugManager.flags.ExperimentalForceCopyThroughLock.get() == 1) { diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl index a4e3b45c88..232c57035d 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl @@ -296,7 +296,7 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K this->dcFlushSupport // dcFlushEnable }; - bool inOrderExecSignalRequired = (this->inOrderExecutionEnabled && !launchParams.isKernelSplitOperation && useCounterAllocationForInOrderMode()); + bool inOrderExecSignalRequired = (this->inOrderExecutionEnabled && !launchParams.isKernelSplitOperation); bool inOrderNonWalkerSignalling = event && (isTimestampEvent || !isInOrderExecEvent); if (inOrderExecSignalRequired) { diff --git a/level_zero/core/source/cmdlist/cmdlist_imp.cpp b/level_zero/core/source/cmdlist/cmdlist_imp.cpp index 1d416e1f0f..5f0c352d17 100644 --- a/level_zero/core/source/cmdlist/cmdlist_imp.cpp +++ b/level_zero/core/source/cmdlist/cmdlist_imp.cpp @@ -229,17 +229,15 @@ void CommandListImp::setStreamPropertiesDefaultSettings(NEO::StreamProperties &s void CommandListImp::enableInOrderExecution() { UNRECOVERABLE_IF(inOrderExecutionEnabled); - if (this->cmdListType == TYPE_IMMEDIATE) { - auto device = this->device->getNEODevice(); + auto device = this->device->getNEODevice(); - NEO::AllocationProperties allocationProperties{device->getRootDeviceIndex(), MemoryConstants::pageSize64k, NEO::AllocationType::TIMESTAMP_PACKET_TAG_BUFFER, device->getDeviceBitfield()}; + NEO::AllocationProperties allocationProperties{device->getRootDeviceIndex(), MemoryConstants::pageSize64k, NEO::AllocationType::TIMESTAMP_PACKET_TAG_BUFFER, device->getDeviceBitfield()}; - inOrderDependencyCounterAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties(allocationProperties); + inOrderDependencyCounterAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties(allocationProperties); - UNRECOVERABLE_IF(!inOrderDependencyCounterAllocation); + UNRECOVERABLE_IF(!inOrderDependencyCounterAllocation); - memset(inOrderDependencyCounterAllocation->getUnderlyingBuffer(), 0, inOrderDependencyCounterAllocation->getUnderlyingBufferSize()); - } + memset(inOrderDependencyCounterAllocation->getUnderlyingBuffer(), 0, inOrderDependencyCounterAllocation->getUnderlyingBufferSize()); inOrderExecutionEnabled = true; } diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp index 8c9184296f..1890e8e9d7 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp @@ -1807,6 +1807,8 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderRegularCmdListWhenProgrammingNonKerne uint8_t ptr[64] = {}; + uint64_t inOrderSyncVa = regularCmdList->inOrderDependencyCounterAllocation->getGpuAddress(); + regularCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false); auto verifyPc = [](const GenCmdList::iterator &iterator) { @@ -1815,9 +1817,21 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderRegularCmdListWhenProgrammingNonKerne ASSERT_NE(nullptr, pcCmd); }; - auto verifySdi = [](GenCmdList::reverse_iterator rIterator) { + auto verifySdi = [&inOrderSyncVa](GenCmdList::reverse_iterator rIterator, GenCmdList::reverse_iterator rEnd, uint32_t signalValue) { auto sdiCmd = genCmdCast(*rIterator); - EXPECT_EQ(nullptr, sdiCmd); + while (sdiCmd == nullptr) { + sdiCmd = genCmdCast(*(++rIterator)); + if (rIterator == rEnd) { + break; + } + } + + ASSERT_NE(nullptr, sdiCmd); + + EXPECT_EQ(inOrderSyncVa, sdiCmd->getAddress()); + EXPECT_EQ(0u, sdiCmd->getStoreQword()); + EXPECT_EQ(signalValue, sdiCmd->getDataDword0()); + EXPECT_EQ(0u, sdiCmd->getDataDword1()); }; { @@ -1831,7 +1845,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderRegularCmdListWhenProgrammingNonKerne (cmdStream->getUsed() - offset))); verifyPc(cmdList.begin()); - verifySdi(cmdList.rbegin()); + verifySdi(cmdList.rbegin(), cmdList.rend(), 2); } { @@ -1847,7 +1861,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderRegularCmdListWhenProgrammingNonKerne (cmdStream->getUsed() - offset))); verifyPc(cmdList.begin()); - verifySdi(cmdList.rbegin()); + verifySdi(cmdList.rbegin(), cmdList.rend(), 3); } { @@ -1861,7 +1875,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderRegularCmdListWhenProgrammingNonKerne (cmdStream->getUsed() - offset))); verifyPc(cmdList.begin()); - verifySdi(cmdList.rbegin()); + verifySdi(cmdList.rbegin(), cmdList.rend(), 4); } { @@ -1877,7 +1891,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderRegularCmdListWhenProgrammingNonKerne (cmdStream->getUsed() - offset))); verifyPc(cmdList.begin()); - verifySdi(cmdList.rbegin()); + verifySdi(cmdList.rbegin(), cmdList.rend(), 5); } { @@ -1893,7 +1907,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderRegularCmdListWhenProgrammingNonKerne (cmdStream->getUsed() - offset))); verifyPc(cmdList.begin()); - verifySdi(cmdList.rbegin()); + verifySdi(cmdList.rbegin(), cmdList.rend(), 6); } } @@ -2275,7 +2289,17 @@ HWTEST2_F(InOrderCmdListTests, givenRegularInOrderCmdListWhenProgrammingAppendWa ASSERT_NE(nullptr, pcCmd); auto sdiItor = find(cmdList.begin(), cmdList.end()); - EXPECT_EQ(cmdList.end(), sdiItor); + EXPECT_NE(cmdList.end(), sdiItor); + + auto sdiCmd = genCmdCast(*sdiItor); + ASSERT_NE(nullptr, sdiCmd); + + uint64_t syncVa = regularCmdList->inOrderDependencyCounterAllocation->getGpuAddress(); + + EXPECT_EQ(syncVa, sdiCmd->getAddress()); + EXPECT_EQ(0u, sdiCmd->getStoreQword()); + EXPECT_EQ(3u, sdiCmd->getDataDword0()); + EXPECT_EQ(0u, sdiCmd->getDataDword1()); } HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingCounterWithOverflowThenHandleItCorrectly, IsAtLeastXeHpCore) { @@ -3171,7 +3195,9 @@ HWTEST2_F(InOrderRegularCmdListTests, givenInOrderModeWhenDispatchingRegularCmdL auto walkerCmd = genCmdCast(*walkerItor); auto &postSync = walkerCmd->getPostSync(); - EXPECT_EQ(POSTSYNC_DATA::OPERATION_NO_WRITE, postSync.getOperation()); + EXPECT_EQ(POSTSYNC_DATA::OPERATION_WRITE_IMMEDIATE_DATA, postSync.getOperation()); + EXPECT_EQ(1u, postSync.getImmediateData()); + EXPECT_EQ(regularCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), postSync.getDestinationAddress()); auto sdiItor = find(cmdList.begin(), cmdList.end()); EXPECT_EQ(cmdList.end(), sdiItor); @@ -3180,7 +3206,7 @@ HWTEST2_F(InOrderRegularCmdListTests, givenInOrderModeWhenDispatchingRegularCmdL offset = cmdStream->getUsed(); regularCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false); - EXPECT_EQ(1u, regularCmdList->inOrderDependencyCounter); + EXPECT_EQ(2u, regularCmdList->inOrderDependencyCounter); { GenCmdList cmdList; @@ -3195,20 +3221,25 @@ HWTEST2_F(InOrderRegularCmdListTests, givenInOrderModeWhenDispatchingRegularCmdL auto walkerCmd = genCmdCast(*walkerItor); auto &postSync = walkerCmd->getPostSync(); - EXPECT_EQ(POSTSYNC_DATA::OPERATION_NO_WRITE, postSync.getOperation()); + EXPECT_EQ(POSTSYNC_DATA::OPERATION_WRITE_IMMEDIATE_DATA, postSync.getOperation()); + EXPECT_EQ(2u, postSync.getImmediateData()); + EXPECT_EQ(regularCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), postSync.getDestinationAddress()); auto sdiItor = find(cmdList.begin(), cmdList.end()); EXPECT_EQ(cmdList.end(), sdiItor); } regularCmdList->inOrderAllocationOffset = 123; + auto hostAddr = static_cast(regularCmdList->inOrderDependencyCounterAllocation->getUnderlyingBuffer()); + *hostAddr = 0x1234; regularCmdList->reset(); EXPECT_EQ(0u, regularCmdList->inOrderDependencyCounter); EXPECT_EQ(0u, regularCmdList->inOrderAllocationOffset); + EXPECT_EQ(0u, *hostAddr); } -HWTEST2_F(InOrderRegularCmdListTests, givenInOrderModeWhenDispatchingRegularCmdListThenDontUpdateCounterAllocation, IsAtLeastXeHpCore) { +HWTEST2_F(InOrderRegularCmdListTests, givenInOrderModeWhenDispatchingRegularCmdListThenUpdateCounterAllocation, IsAtLeastXeHpCore) { using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM; auto eventPool = createEvents(1, true); @@ -3224,7 +3255,7 @@ HWTEST2_F(InOrderRegularCmdListTests, givenInOrderModeWhenDispatchingRegularCmdL size_t offset = cmdStream->getUsed(); EXPECT_EQ(0u, regularCmdList->inOrderDependencyCounter); - EXPECT_EQ(nullptr, regularCmdList->inOrderDependencyCounterAllocation); + EXPECT_NE(nullptr, regularCmdList->inOrderDependencyCounterAllocation); constexpr size_t size = 128 * sizeof(uint32_t); auto data = allocHostMem(size); @@ -3245,8 +3276,8 @@ HWTEST2_F(InOrderRegularCmdListTests, givenInOrderModeWhenDispatchingRegularCmdL ptrOffset(cmdStream->getCpuBase(), offset), (cmdStream->getUsed() - offset))); - auto sdiItor = find(cmdList.begin(), cmdList.end()); - EXPECT_EQ(cmdList.end(), sdiItor); + auto sdiCmds = findAll(cmdList.begin(), cmdList.end()); + EXPECT_EQ(2u, sdiCmds.size()); } offset = copyOnlyCmdStream->getUsed(); @@ -3259,7 +3290,7 @@ HWTEST2_F(InOrderRegularCmdListTests, givenInOrderModeWhenDispatchingRegularCmdL (copyOnlyCmdStream->getUsed() - offset))); auto sdiItor = find(cmdList.begin(), cmdList.end()); - EXPECT_EQ(cmdList.end(), sdiItor); + EXPECT_NE(cmdList.end(), sdiItor); } context->freeMem(data); @@ -3288,7 +3319,18 @@ HWTEST2_F(InOrderRegularCopyOnlyCmdListTests, givenInOrderModeWhenDispatchingReg (cmdStream->getUsed() - offset))); auto sdiItor = find(cmdList.begin(), cmdList.end()); - EXPECT_EQ(cmdList.end(), sdiItor); + EXPECT_NE(cmdList.end(), sdiItor); + + auto sdiCmd = genCmdCast(*sdiItor); + + ASSERT_NE(nullptr, sdiCmd); + + auto gpuAddress = regularCmdList->inOrderDependencyCounterAllocation->getGpuAddress(); + + EXPECT_EQ(gpuAddress, sdiCmd->getAddress()); + EXPECT_EQ(0u, sdiCmd->getStoreQword()); + EXPECT_EQ(1u, sdiCmd->getDataDword0()); + EXPECT_EQ(0u, sdiCmd->getDataDword1()); } offset = cmdStream->getUsed(); @@ -3301,12 +3343,23 @@ HWTEST2_F(InOrderRegularCopyOnlyCmdListTests, givenInOrderModeWhenDispatchingReg ptrOffset(cmdStream->getCpuBase(), offset), (cmdStream->getUsed() - offset))); - auto sdiItor = find(cmdList.begin(), cmdList.end()); - EXPECT_EQ(cmdList.end(), sdiItor); - auto copyCmd = genCmdCast(*cmdList.begin()); EXPECT_NE(nullptr, copyCmd); + + auto sdiItor = find(cmdList.begin(), cmdList.end()); + EXPECT_NE(cmdList.end(), sdiItor); + + auto sdiCmd = genCmdCast(*sdiItor); + + ASSERT_NE(nullptr, sdiCmd); + + auto gpuAddress = regularCmdList->inOrderDependencyCounterAllocation->getGpuAddress(); + + EXPECT_EQ(gpuAddress, sdiCmd->getAddress()); + EXPECT_EQ(0u, sdiCmd->getStoreQword()); + EXPECT_EQ(2u, sdiCmd->getDataDword0()); + EXPECT_EQ(0u, sdiCmd->getDataDword1()); } alignedFree(alignedPtr);