diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.h b/level_zero/core/source/cmdlist/cmdlist_hw.h index 7ac326266e..2a70b02c65 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.h +++ b/level_zero/core/source/cmdlist/cmdlist_hw.h @@ -327,8 +327,8 @@ struct CommandListCoreFamily : CommandListImp { virtual bool isRelaxedOrderingDispatchAllowed(uint32_t numWaitEvents) const { return false; } virtual void setupFlushMethod(const NEO::RootDeviceEnvironment &rootDeviceEnvironment) {} bool isInOrderEventWaitRequired(const Event &event) const; - virtual bool useCounterAllocationForInOrderMode() const { return false; } void handleInOrderImplicitDependencies(bool relaxedOrderingAllowed); + virtual void handleInOrderDependencyCounter(); }; template diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index 9937d84a94..4d1ddb69d7 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -141,9 +141,35 @@ ze_result_t CommandListCoreFamily::reset() { inOrderDependencyCounter = 0; inOrderAllocationOffset = 0; + if (inOrderDependencyCounterAllocation) { + memset(inOrderDependencyCounterAllocation->getUnderlyingBuffer(), 0, inOrderDependencyCounterAllocation->getUnderlyingBufferSize()); + } + return ZE_RESULT_SUCCESS; } +template +void CommandListCoreFamily::handleInOrderDependencyCounter() { + if ((inOrderDependencyCounter + 1) == std::numeric_limits::max()) { + CommandListCoreFamily::appendWaitOnInOrderDependency(inOrderDependencyCounterAllocation, inOrderDependencyCounter + 1, inOrderAllocationOffset, false); + + inOrderDependencyCounter = 0; + + // multitile immediate writes are uint64_t aligned + uint32_t offset = this->partitionCount * static_cast(sizeof(uint64_t)); + + inOrderAllocationOffset += offset; + + UNRECOVERABLE_IF(inOrderAllocationOffset + offset >= inOrderDependencyCounterAllocation->getUnderlyingBufferSize()); + + CommandListCoreFamily::appendSignalInOrderDependencyCounter(); // write 1 on new offset + } + + inOrderDependencyCounter++; + + this->commandContainer.addToResidencyContainer(this->inOrderDependencyCounterAllocation); +} + template void CommandListCoreFamily::handlePostSubmissionState() { this->commandContainer.getResidencyContainer().clear(); @@ -327,6 +353,11 @@ ze_result_t CommandListCoreFamily::appendLaunchKernel(ze_kernel_h auto res = appendLaunchKernelWithParams(Kernel::fromHandle(kernelHandle), threadGroupDimensions, event, launchParams); + + if (isInOrderExecutionEnabled()) { + handleInOrderDependencyCounter(); + } + addToMappedEventList(event); if (NEO::DebugManager.flags.EnableSWTags.get()) { neoDevice->getRootDeviceEnvironment().tagsManager->insertTag( @@ -363,6 +394,10 @@ ze_result_t CommandListCoreFamily::appendLaunchCooperativeKernel( ret = appendLaunchKernelWithParams(Kernel::fromHandle(kernelHandle), launchKernelArgs, event, launchParams); addToMappedEventList(event); + + if (this->inOrderExecutionEnabled) { + handleInOrderDependencyCounter(); + } return ret; } @@ -395,6 +430,10 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelIndirect(ze_ addToMappedEventList(event); appendSignalEventPostWalker(event); + if (isInOrderExecutionEnabled()) { + handleInOrderDependencyCounter(); + } + return ret; } @@ -479,8 +518,9 @@ ze_result_t CommandListCoreFamily::appendEventReset(ze_event_hand } } - if (this->inOrderExecutionEnabled && useCounterAllocationForInOrderMode()) { + if (this->inOrderExecutionEnabled) { appendSignalInOrderDependencyCounter(); + handleInOrderDependencyCounter(); } if (NEO::DebugManager.flags.EnableSWTags.get()) { @@ -517,8 +557,9 @@ ze_result_t CommandListCoreFamily::appendMemoryRangesBarrier(uint appendSignalEventPostWalker(signalEvent); addToMappedEventList(signalEvent); - if (this->inOrderExecutionEnabled && useCounterAllocationForInOrderMode()) { + if (this->inOrderExecutionEnabled) { appendSignalInOrderDependencyCounter(); + handleInOrderDependencyCounter(); } return ZE_RESULT_SUCCESS; @@ -672,6 +713,7 @@ ze_result_t CommandListCoreFamily::appendImageCopyFromMemory(ze_i event, numWaitEvents, phWaitEvents, launchParams, relaxedOrderingDispatch); addToMappedEventList(Event::fromHandle(hEvent)); + return status; } @@ -974,6 +1016,7 @@ ze_result_t CommandListCoreFamily::appendImageCopyRegion(ze_image event, numWaitEvents, phWaitEvents, launchParams, relaxedOrderingDispatch); addToMappedEventList(event); + return status; } @@ -1416,12 +1459,18 @@ ze_result_t CommandListCoreFamily::appendMemoryCopy(void *dstptr, addFlushRequiredCommand(dstAllocationStruct.needsFlush, signalEvent); addToMappedEventList(signalEvent); - if (this->inOrderExecutionEnabled && useCounterAllocationForInOrderMode() && (launchParams.isKernelSplitOperation || inOrderCopyOnlySignalingAllowed)) { - if (!signalEvent && !isCopyOnly()) { - NEO::PipeControlArgs args; - NEO::MemorySynchronizationCommands::addSingleBarrier(*commandContainer.getCommandStream(), args); + if (this->inOrderExecutionEnabled) { + if (launchParams.isKernelSplitOperation || inOrderCopyOnlySignalingAllowed) { + if (!signalEvent && !isCopyOnly()) { + NEO::PipeControlArgs args; + NEO::MemorySynchronizationCommands::addSingleBarrier(*commandContainer.getCommandStream(), args); + } + appendSignalInOrderDependencyCounter(); + } + + if (!isCopyOnly() || inOrderCopyOnlySignalingAllowed) { + handleInOrderDependencyCounter(); } - appendSignalInOrderDependencyCounter(); } if (NEO::DebugManager.flags.EnableSWTags.get()) { @@ -1449,7 +1498,7 @@ ze_result_t CommandListCoreFamily::appendMemoryCopyRegion(void *d ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch, bool forceDisableCopyOnlyInOrderSignaling) { - const bool inOrderCopyOnlySignalingAllowed = this->inOrderExecutionEnabled && !forceDisableCopyOnlyInOrderSignaling; + const bool inOrderCopyOnlySignalingAllowed = this->inOrderExecutionEnabled && !forceDisableCopyOnlyInOrderSignaling && isCopyOnly(); NEO::Device *neoDevice = device->getNEODevice(); uint32_t callId = 0; @@ -1509,8 +1558,14 @@ ze_result_t CommandListCoreFamily::appendMemoryCopyRegion(void *d addToMappedEventList(signalEvent); addFlushRequiredCommand(dstAllocationStruct.needsFlush, signalEvent); - if (this->inOrderExecutionEnabled && useCounterAllocationForInOrderMode() && isCopyOnly() && inOrderCopyOnlySignalingAllowed) { - appendSignalInOrderDependencyCounter(); + if (this->inOrderExecutionEnabled) { + if (inOrderCopyOnlySignalingAllowed) { + appendSignalInOrderDependencyCounter(); + } + + if (!isCopyOnly() || inOrderCopyOnlySignalingAllowed) { + handleInOrderDependencyCounter(); + } } if (NEO::DebugManager.flags.EnableSWTags.get()) { @@ -1932,12 +1987,16 @@ ze_result_t CommandListCoreFamily::appendMemoryFill(void *ptr, appendEventForProfilingAllWalkers(signalEvent, false, singlePipeControlPacket); addFlushRequiredCommand(hostPointerNeedsFlush, signalEvent); - if (this->inOrderExecutionEnabled && useCounterAllocationForInOrderMode() && launchParams.isKernelSplitOperation) { - if (!signalEvent) { - NEO::PipeControlArgs args; - NEO::MemorySynchronizationCommands::addSingleBarrier(*commandContainer.getCommandStream(), args); + if (this->inOrderExecutionEnabled) { + if (launchParams.isKernelSplitOperation) { + if (!signalEvent) { + NEO::PipeControlArgs args; + NEO::MemorySynchronizationCommands::addSingleBarrier(*commandContainer.getCommandStream(), args); + } + appendSignalInOrderDependencyCounter(); } - appendSignalInOrderDependencyCounter(); + + handleInOrderDependencyCounter(); } if (NEO::DebugManager.flags.EnableSWTags.get()) { @@ -1998,8 +2057,9 @@ ze_result_t CommandListCoreFamily::appendBlitFill(void *ptr, appendSignalEventPostWalker(signalEvent); - if (isInOrderExecutionEnabled() && useCounterAllocationForInOrderMode()) { + if (isInOrderExecutionEnabled()) { appendSignalInOrderDependencyCounter(); + handleInOrderDependencyCounter(); } } return ZE_RESULT_SUCCESS; @@ -2166,16 +2226,12 @@ void CommandListCoreFamily::handleInOrderImplicitDependencies(boo NEO::RelaxedOrderingHelper::encodeRegistersBeforeDependencyCheckers(*commandContainer.getCommandStream()); } - if (useCounterAllocationForInOrderMode()) { + if (this->cmdListType == CommandListType::TYPE_IMMEDIATE) { CommandListCoreFamily::appendWaitOnInOrderDependency(this->inOrderDependencyCounterAllocation, this->inOrderDependencyCounter, this->inOrderAllocationOffset, relaxedOrderingAllowed); } else if (!isCopyOnly()) { appendComputeBarrierCommand(); } } - - if (cmdListType == TYPE_REGULAR && this->inOrderExecutionEnabled && !hasInOrderDependencies) { - inOrderDependencyCounter++; // First append is without dependencies. Increment counter to program barrier on next calls. - } } template @@ -2224,8 +2280,9 @@ ze_result_t CommandListCoreFamily::appendSignalEvent(ze_event_han bool appendPipeControlWithPostSync = (!isCopyOnly()) && (event->isSignalScope() || event->isEventTimestampFlagSet()); dispatchEventPostSyncOperation(event, Event::STATE_SIGNALED, false, false, appendPipeControlWithPostSync, false); - if (this->inOrderExecutionEnabled && useCounterAllocationForInOrderMode()) { + if (this->inOrderExecutionEnabled) { appendSignalInOrderDependencyCounter(); + handleInOrderDependencyCounter(); } if (NEO::DebugManager.flags.EnableSWTags.get()) { @@ -2244,7 +2301,7 @@ void CommandListCoreFamily::appendWaitOnInOrderDependency(NEO::Gr using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION; UNRECOVERABLE_IF(waitValue > std::numeric_limits::max()); - UNRECOVERABLE_IF(!useCounterAllocationForInOrderMode()); + UNRECOVERABLE_IF(this->cmdListType == CommandListType::TYPE_REGULAR); commandContainer.addToResidencyContainer(dependencyCounterAllocation); @@ -2338,8 +2395,9 @@ ze_result_t CommandListCoreFamily::appendWaitOnEvents(uint32_t nu commandContainer.addToResidencyContainer(this->csr->getTagAllocation()); } - if (signalInOrderCompletion && useCounterAllocationForInOrderMode()) { + if (signalInOrderCompletion) { appendSignalInOrderDependencyCounter(); + handleInOrderDependencyCounter(); } makeResidentDummyAllocation(); @@ -2357,8 +2415,6 @@ ze_result_t CommandListCoreFamily::appendWaitOnEvents(uint32_t nu template void CommandListCoreFamily::appendSignalInOrderDependencyCounter() { - UNRECOVERABLE_IF(!useCounterAllocationForInOrderMode()); - uint32_t signalValue = this->inOrderDependencyCounter + 1; uint64_t gpuVa = this->inOrderDependencyCounterAllocation->getGpuAddress() + this->inOrderAllocationOffset; @@ -2494,8 +2550,9 @@ ze_result_t CommandListCoreFamily::appendWriteGlobalTimestamp( appendSignalEventPostWalker(signalEvent); - if (this->inOrderExecutionEnabled && useCounterAllocationForInOrderMode()) { + if (this->inOrderExecutionEnabled) { appendSignalInOrderDependencyCounter(); + handleInOrderDependencyCounter(); } auto allocationStruct = getAlignedAllocationData(this->device, dstptr, sizeof(uint64_t), false); @@ -2988,7 +3045,7 @@ ze_result_t CommandListCoreFamily::appendBarrier(ze_event_handle_ appendEventForProfiling(signalEvent, true); - if (this->inOrderExecutionEnabled && useCounterAllocationForInOrderMode()) { + if (this->inOrderExecutionEnabled) { appendSignalInOrderDependencyCounter(); } else if (isCopyOnly()) { NEO::MiFlushArgs args{this->dummyBlitWa}; @@ -3009,6 +3066,11 @@ ze_result_t CommandListCoreFamily::appendBarrier(ze_event_handle_ addToMappedEventList(signalEvent); appendSignalEventPostWalker(signalEvent); + + if (isInOrderExecutionEnabled()) { + handleInOrderDependencyCounter(); + } + return ZE_RESULT_SUCCESS; } @@ -3130,12 +3192,12 @@ ze_result_t CommandListCoreFamily::appendWaitOnMemory(void *desc, } UNRECOVERABLE_IF(srcAllocationStruct.alloc == nullptr); + appendEventForProfiling(signalEvent, true); + if (this->inOrderExecutionEnabled) { handleInOrderImplicitDependencies(false); } - appendEventForProfiling(signalEvent, true); - commandContainer.addToResidencyContainer(srcAllocationStruct.alloc); uint64_t gpuAddress = static_cast(srcAllocationStruct.alignedAllocationPtr); NEO::EncodeSemaphore::addMiSemaphoreWaitCommand(*commandContainer.getCommandStream(), gpuAddress, data, comparator, false, false, false); @@ -3151,6 +3213,11 @@ ze_result_t CommandListCoreFamily::appendWaitOnMemory(void *desc, appendSignalEventPostWalker(signalEvent); + if (this->inOrderExecutionEnabled) { + appendSignalInOrderDependencyCounter(); + handleInOrderDependencyCounter(); + } + return ZE_RESULT_SUCCESS; } @@ -3194,6 +3261,11 @@ ze_result_t CommandListCoreFamily::appendWriteToMemory(void *desc args); } + if (this->inOrderExecutionEnabled) { + appendSignalInOrderDependencyCounter(); + handleInOrderDependencyCounter(); + } + return ZE_RESULT_SUCCESS; } diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h index edea867ac5..749ac24bb3 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h +++ b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h @@ -189,9 +189,7 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily::flushImmediate(ze_res auto signalEvent = Event::fromHandle(hSignalEvent); if (inputRet == ZE_RESULT_SUCCESS) { - if (isInOrderExecutionEnabled()) { - handleInOrderDependencyCounter(); - } - if (this->isFlushTaskSubmissionEnabled) { if (signalEvent && (NEO::DebugManager.flags.TrackNumCsrClientsOnSyncPoints.get() != 0)) { signalEvent->setLatestUsedCmdQueue(this->cmdQImmediate); @@ -904,28 +900,6 @@ ze_result_t CommandListCoreFamilyImmediate::flushImmediate(ze_res return inputRet; } -template -void CommandListCoreFamilyImmediate::handleInOrderDependencyCounter() { - if ((inOrderDependencyCounter + 1) == std::numeric_limits::max()) { - CommandListCoreFamily::appendWaitOnInOrderDependency(inOrderDependencyCounterAllocation, inOrderDependencyCounter + 1, inOrderAllocationOffset, false); - - inOrderDependencyCounter = 0; - - // multitile immediate writes are uint64_t aligned - uint32_t offset = this->partitionCount * static_cast(sizeof(uint64_t)); - - inOrderAllocationOffset += offset; - - UNRECOVERABLE_IF(inOrderAllocationOffset + offset >= inOrderDependencyCounterAllocation->getUnderlyingBufferSize()); - - CommandListCoreFamily::appendSignalInOrderDependencyCounter(); // write 1 on new offset - } - - inOrderDependencyCounter++; - - this->commandContainer.addToResidencyContainer(this->inOrderDependencyCounterAllocation); -} - template bool CommandListCoreFamilyImmediate::preferCopyThroughLockedPtr(CpuMemCopyInfo &cpuMemCopyInfo, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { if (NEO::DebugManager.flags.ExperimentalForceCopyThroughLock.get() == 1) { diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl index aac6153161..b5b19191ca 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl @@ -296,7 +296,7 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K this->dcFlushSupport // dcFlushEnable }; - bool inOrderExecSignalRequired = (this->inOrderExecutionEnabled && !launchParams.isKernelSplitOperation && useCounterAllocationForInOrderMode()); + bool inOrderExecSignalRequired = (this->inOrderExecutionEnabled && !launchParams.isKernelSplitOperation); bool inOrderNonWalkerSignalling = event && (isTimestampEvent || !isInOrderExecEvent); if (inOrderExecSignalRequired) { diff --git a/level_zero/core/source/cmdlist/cmdlist_imp.cpp b/level_zero/core/source/cmdlist/cmdlist_imp.cpp index c85da7297b..530b1224ce 100644 --- a/level_zero/core/source/cmdlist/cmdlist_imp.cpp +++ b/level_zero/core/source/cmdlist/cmdlist_imp.cpp @@ -228,17 +228,15 @@ void CommandListImp::setStreamPropertiesDefaultSettings(NEO::StreamProperties &s void CommandListImp::enableInOrderExecution() { UNRECOVERABLE_IF(inOrderExecutionEnabled); - if (this->cmdListType == TYPE_IMMEDIATE) { - auto device = this->device->getNEODevice(); + auto device = this->device->getNEODevice(); - NEO::AllocationProperties allocationProperties{device->getRootDeviceIndex(), MemoryConstants::pageSize64k, NEO::AllocationType::TIMESTAMP_PACKET_TAG_BUFFER, device->getDeviceBitfield()}; + NEO::AllocationProperties allocationProperties{device->getRootDeviceIndex(), MemoryConstants::pageSize64k, NEO::AllocationType::TIMESTAMP_PACKET_TAG_BUFFER, device->getDeviceBitfield()}; - inOrderDependencyCounterAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties(allocationProperties); + inOrderDependencyCounterAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties(allocationProperties); - UNRECOVERABLE_IF(!inOrderDependencyCounterAllocation); + UNRECOVERABLE_IF(!inOrderDependencyCounterAllocation); - memset(inOrderDependencyCounterAllocation->getUnderlyingBuffer(), 0, inOrderDependencyCounterAllocation->getUnderlyingBufferSize()); - } + memset(inOrderDependencyCounterAllocation->getUnderlyingBuffer(), 0, inOrderDependencyCounterAllocation->getUnderlyingBufferSize()); inOrderExecutionEnabled = true; } diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp index ff9d78645f..2ab02d0f41 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp @@ -1807,6 +1807,8 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderRegularCmdListWhenProgrammingNonKerne uint8_t ptr[64] = {}; + uint64_t inOrderSyncVa = regularCmdList->inOrderDependencyCounterAllocation->getGpuAddress(); + regularCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false); auto verifyPc = [](const GenCmdList::iterator &iterator) { @@ -1815,9 +1817,21 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderRegularCmdListWhenProgrammingNonKerne ASSERT_NE(nullptr, pcCmd); }; - auto verifySdi = [](GenCmdList::reverse_iterator rIterator) { + auto verifySdi = [&inOrderSyncVa](GenCmdList::reverse_iterator rIterator, GenCmdList::reverse_iterator rEnd, uint32_t signalValue) { auto sdiCmd = genCmdCast(*rIterator); - EXPECT_EQ(nullptr, sdiCmd); + while (sdiCmd == nullptr) { + sdiCmd = genCmdCast(*(++rIterator)); + if (rIterator == rEnd) { + break; + } + } + + ASSERT_NE(nullptr, sdiCmd); + + EXPECT_EQ(inOrderSyncVa, sdiCmd->getAddress()); + EXPECT_EQ(0u, sdiCmd->getStoreQword()); + EXPECT_EQ(signalValue, sdiCmd->getDataDword0()); + EXPECT_EQ(0u, sdiCmd->getDataDword1()); }; { @@ -1831,7 +1845,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderRegularCmdListWhenProgrammingNonKerne (cmdStream->getUsed() - offset))); verifyPc(cmdList.begin()); - verifySdi(cmdList.rbegin()); + verifySdi(cmdList.rbegin(), cmdList.rend(), 2); } { @@ -1847,7 +1861,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderRegularCmdListWhenProgrammingNonKerne (cmdStream->getUsed() - offset))); verifyPc(cmdList.begin()); - verifySdi(cmdList.rbegin()); + verifySdi(cmdList.rbegin(), cmdList.rend(), 3); } { @@ -1861,7 +1875,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderRegularCmdListWhenProgrammingNonKerne (cmdStream->getUsed() - offset))); verifyPc(cmdList.begin()); - verifySdi(cmdList.rbegin()); + verifySdi(cmdList.rbegin(), cmdList.rend(), 4); } { @@ -1877,7 +1891,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderRegularCmdListWhenProgrammingNonKerne (cmdStream->getUsed() - offset))); verifyPc(cmdList.begin()); - verifySdi(cmdList.rbegin()); + verifySdi(cmdList.rbegin(), cmdList.rend(), 5); } { @@ -1893,7 +1907,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderRegularCmdListWhenProgrammingNonKerne (cmdStream->getUsed() - offset))); verifyPc(cmdList.begin()); - verifySdi(cmdList.rbegin()); + verifySdi(cmdList.rbegin(), cmdList.rend(), 6); } } @@ -2275,7 +2289,17 @@ HWTEST2_F(InOrderCmdListTests, givenRegularInOrderCmdListWhenProgrammingAppendWa ASSERT_NE(nullptr, pcCmd); auto sdiItor = find(cmdList.begin(), cmdList.end()); - EXPECT_EQ(cmdList.end(), sdiItor); + EXPECT_NE(cmdList.end(), sdiItor); + + auto sdiCmd = genCmdCast(*sdiItor); + ASSERT_NE(nullptr, sdiCmd); + + uint64_t syncVa = regularCmdList->inOrderDependencyCounterAllocation->getGpuAddress(); + + EXPECT_EQ(syncVa, sdiCmd->getAddress()); + EXPECT_EQ(0u, sdiCmd->getStoreQword()); + EXPECT_EQ(3u, sdiCmd->getDataDword0()); + EXPECT_EQ(0u, sdiCmd->getDataDword1()); } HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingCounterWithOverflowThenHandleItCorrectly, IsAtLeastXeHpCore) { @@ -3171,7 +3195,9 @@ HWTEST2_F(InOrderRegularCmdListTests, givenInOrderModeWhenDispatchingRegularCmdL auto walkerCmd = genCmdCast(*walkerItor); auto &postSync = walkerCmd->getPostSync(); - EXPECT_EQ(POSTSYNC_DATA::OPERATION_NO_WRITE, postSync.getOperation()); + EXPECT_EQ(POSTSYNC_DATA::OPERATION_WRITE_IMMEDIATE_DATA, postSync.getOperation()); + EXPECT_EQ(1u, postSync.getImmediateData()); + EXPECT_EQ(regularCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), postSync.getDestinationAddress()); auto sdiItor = find(cmdList.begin(), cmdList.end()); EXPECT_EQ(cmdList.end(), sdiItor); @@ -3180,7 +3206,7 @@ HWTEST2_F(InOrderRegularCmdListTests, givenInOrderModeWhenDispatchingRegularCmdL offset = cmdStream->getUsed(); regularCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false); - EXPECT_EQ(1u, regularCmdList->inOrderDependencyCounter); + EXPECT_EQ(2u, regularCmdList->inOrderDependencyCounter); { GenCmdList cmdList; @@ -3195,20 +3221,25 @@ HWTEST2_F(InOrderRegularCmdListTests, givenInOrderModeWhenDispatchingRegularCmdL auto walkerCmd = genCmdCast(*walkerItor); auto &postSync = walkerCmd->getPostSync(); - EXPECT_EQ(POSTSYNC_DATA::OPERATION_NO_WRITE, postSync.getOperation()); + EXPECT_EQ(POSTSYNC_DATA::OPERATION_WRITE_IMMEDIATE_DATA, postSync.getOperation()); + EXPECT_EQ(2u, postSync.getImmediateData()); + EXPECT_EQ(regularCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), postSync.getDestinationAddress()); auto sdiItor = find(cmdList.begin(), cmdList.end()); EXPECT_EQ(cmdList.end(), sdiItor); } regularCmdList->inOrderAllocationOffset = 123; + auto hostAddr = static_cast(regularCmdList->inOrderDependencyCounterAllocation->getUnderlyingBuffer()); + *hostAddr = 0x1234; regularCmdList->reset(); EXPECT_EQ(0u, regularCmdList->inOrderDependencyCounter); EXPECT_EQ(0u, regularCmdList->inOrderAllocationOffset); + EXPECT_EQ(0u, *hostAddr); } -HWTEST2_F(InOrderRegularCmdListTests, givenInOrderModeWhenDispatchingRegularCmdListThenDontUpdateCounterAllocation, IsAtLeastXeHpCore) { +HWTEST2_F(InOrderRegularCmdListTests, givenInOrderModeWhenDispatchingRegularCmdListThenUpdateCounterAllocation, IsAtLeastXeHpCore) { using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM; auto eventPool = createEvents(1, true); @@ -3224,7 +3255,7 @@ HWTEST2_F(InOrderRegularCmdListTests, givenInOrderModeWhenDispatchingRegularCmdL size_t offset = cmdStream->getUsed(); EXPECT_EQ(0u, regularCmdList->inOrderDependencyCounter); - EXPECT_EQ(nullptr, regularCmdList->inOrderDependencyCounterAllocation); + EXPECT_NE(nullptr, regularCmdList->inOrderDependencyCounterAllocation); constexpr size_t size = 128 * sizeof(uint32_t); auto data = allocHostMem(size); @@ -3245,8 +3276,8 @@ HWTEST2_F(InOrderRegularCmdListTests, givenInOrderModeWhenDispatchingRegularCmdL ptrOffset(cmdStream->getCpuBase(), offset), (cmdStream->getUsed() - offset))); - auto sdiItor = find(cmdList.begin(), cmdList.end()); - EXPECT_EQ(cmdList.end(), sdiItor); + auto sdiCmds = findAll(cmdList.begin(), cmdList.end()); + EXPECT_EQ(2u, sdiCmds.size()); } offset = copyOnlyCmdStream->getUsed(); @@ -3259,7 +3290,7 @@ HWTEST2_F(InOrderRegularCmdListTests, givenInOrderModeWhenDispatchingRegularCmdL (copyOnlyCmdStream->getUsed() - offset))); auto sdiItor = find(cmdList.begin(), cmdList.end()); - EXPECT_EQ(cmdList.end(), sdiItor); + EXPECT_NE(cmdList.end(), sdiItor); } context->freeMem(data); @@ -3288,7 +3319,18 @@ HWTEST2_F(InOrderRegularCopyOnlyCmdListTests, givenInOrderModeWhenDispatchingReg (cmdStream->getUsed() - offset))); auto sdiItor = find(cmdList.begin(), cmdList.end()); - EXPECT_EQ(cmdList.end(), sdiItor); + EXPECT_NE(cmdList.end(), sdiItor); + + auto sdiCmd = genCmdCast(*sdiItor); + + ASSERT_NE(nullptr, sdiCmd); + + auto gpuAddress = regularCmdList->inOrderDependencyCounterAllocation->getGpuAddress(); + + EXPECT_EQ(gpuAddress, sdiCmd->getAddress()); + EXPECT_EQ(0u, sdiCmd->getStoreQword()); + EXPECT_EQ(1u, sdiCmd->getDataDword0()); + EXPECT_EQ(0u, sdiCmd->getDataDword1()); } offset = cmdStream->getUsed(); @@ -3301,12 +3343,23 @@ HWTEST2_F(InOrderRegularCopyOnlyCmdListTests, givenInOrderModeWhenDispatchingReg ptrOffset(cmdStream->getCpuBase(), offset), (cmdStream->getUsed() - offset))); - auto sdiItor = find(cmdList.begin(), cmdList.end()); - EXPECT_EQ(cmdList.end(), sdiItor); - auto copyCmd = genCmdCast(*cmdList.begin()); EXPECT_NE(nullptr, copyCmd); + + auto sdiItor = find(cmdList.begin(), cmdList.end()); + EXPECT_NE(cmdList.end(), sdiItor); + + auto sdiCmd = genCmdCast(*sdiItor); + + ASSERT_NE(nullptr, sdiCmd); + + auto gpuAddress = regularCmdList->inOrderDependencyCounterAllocation->getGpuAddress(); + + EXPECT_EQ(gpuAddress, sdiCmd->getAddress()); + EXPECT_EQ(0u, sdiCmd->getStoreQword()); + EXPECT_EQ(2u, sdiCmd->getDataDword0()); + EXPECT_EQ(0u, sdiCmd->getDataDword1()); } alignedFree(alignedPtr);