From 6011eb147fb98fa2a3b2c7afe55bb7c20cb1d584 Mon Sep 17 00:00:00 2001 From: Zbigniew Zdanowicz Date: Fri, 23 Feb 2024 10:25:06 +0000 Subject: [PATCH] refactor: add skip event residency flag to compacted event Related-To: NEO-10064 Signed-off-by: Zbigniew Zdanowicz --- level_zero/core/source/cmdlist/cmdlist_hw.h | 8 +-- level_zero/core/source/cmdlist/cmdlist_hw.inl | 50 ++++++++++--------- .../cmdlist/cmdlist_hw_skl_to_tgllp.inl | 6 +-- .../cmdlist/cmdlist_hw_xehp_and_later.inl | 10 ++-- level_zero/core/source/device/bcs_split.h | 6 +-- .../sources/cmdlist/test_cmdlist_1.cpp | 2 +- .../test_cmdlist_append_signal_event.cpp | 6 +-- .../cmdlist/test_cmdlist_xehp_and_later.cpp | 36 ++++++++++++- .../xe_hpc_core/test_cmdlist_xe_hpc_core.cpp | 2 +- 9 files changed, 81 insertions(+), 45 deletions(-) diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.h b/level_zero/core/source/cmdlist/cmdlist_hw.h index 48085d4ecb..9594e699b0 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.h +++ b/level_zero/core/source/cmdlist/cmdlist_hw.h @@ -182,7 +182,7 @@ struct CommandListCoreFamily : public CommandListImp { uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) override; void appendMultiPartitionPrologue(uint32_t partitionDataSize) override; void appendMultiPartitionEpilogue() override; - void appendEventForProfilingAllWalkers(Event *event, bool beforeWalker, bool singlePacketEvent); + void appendEventForProfilingAllWalkers(Event *event, void **syncCmdBuffer, bool beforeWalker, bool singlePacketEvent, bool skipAddingEventToResidency); ze_result_t addEventsToCmdList(uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingAllowed, bool trackDependencies, bool waitForImplicitInOrderDependency); ze_result_t reserveSpace(size_t size, void **ptr) override; @@ -289,7 +289,7 @@ struct CommandListCoreFamily : public CommandListImp { void adjustWriteKernelTimestamp(uint64_t globalAddress, uint64_t contextAddress, bool maskLsb, uint32_t mask, bool workloadPartition); void appendEventForProfiling(Event *event, bool beforeWalker, bool skipBarrierForEndProfiling); void appendEventForProfilingCopyCommand(Event *event, bool beforeWalker); - void appendSignalEventPostWalker(Event *event, bool skipBarrierForEndProfiling); + void appendSignalEventPostWalker(Event *event, void **syncCmdBuffer, bool skipBarrierForEndProfiling, bool skipAddingEventToResidency); virtual void programStateBaseAddress(NEO::CommandContainer &container, bool useSbaProperties); void appendComputeBarrierCommand(); NEO::PipeControlArgs createBarrierFlags(); @@ -320,9 +320,9 @@ struct CommandListCoreFamily : public CommandListImp { CmdListEventOperation estimateEventPostSync(Event *event, uint32_t operations); void dispatchPostSyncCopy(uint64_t gpuAddress, uint32_t value, bool workloadPartition); void dispatchPostSyncCompute(uint64_t gpuAddress, uint32_t value, bool workloadPartition); - void dispatchPostSyncCommands(const CmdListEventOperation &eventOperations, uint64_t gpuAddress, uint32_t value, bool useLastPipeControl, bool signalScope, bool skipPartitionOffsetProgramming); + void dispatchPostSyncCommands(const CmdListEventOperation &eventOperations, uint64_t gpuAddress, void **syncCmdBuffer, uint32_t value, bool useLastPipeControl, bool signalScope, bool skipPartitionOffsetProgramming); void dispatchEventRemainingPacketsPostSyncOperation(Event *event); - void dispatchEventPostSyncOperation(Event *event, uint32_t value, bool omitFirstOperation, bool useMax, bool useLastPipeControl, bool skipPartitionOffsetProgramming); + void dispatchEventPostSyncOperation(Event *event, void **syncCmdBuffer, uint32_t value, bool omitFirstOperation, bool useMax, bool useLastPipeControl, bool skipPartitionOffsetProgramming); bool isKernelUncachedMocsRequired(bool kernelState) { this->containsStatelessUncachedResource |= kernelState; if (this->stateBaseAddressTracking) { diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index 8bcaf8cfe0..000f1a0400 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -465,7 +465,7 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelIndirect(ze_ ret = appendLaunchKernelWithParams(Kernel::fromHandle(kernelHandle), pDispatchArgumentsBuffer, nullptr, launchParams); addToMappedEventList(event); - appendSignalEventPostWalker(event, false); + appendSignalEventPostWalker(event, nullptr, false, false); handleInOrderDependencyCounter(event, isInOrderNonWalkerSignalingRequired(event)); @@ -516,7 +516,7 @@ ze_result_t CommandListCoreFamily::appendLaunchMultipleKernelsInd } } addToMappedEventList(event); - appendSignalEventPostWalker(event, false); + appendSignalEventPostWalker(event, nullptr, false, false); return ret; } @@ -554,7 +554,7 @@ ze_result_t CommandListCoreFamily::appendEventReset(ze_event_hand bool useMaxPackets = event->isEventTimestampFlagSet() || (event->getPacketsInUse() < this->partitionCount); bool appendPipeControlWithPostSync = (!isCopyOnly()) && (event->isSignalScope() || event->isEventTimestampFlagSet()); - dispatchEventPostSyncOperation(event, Event::STATE_CLEARED, false, useMaxPackets, appendPipeControlWithPostSync, false); + dispatchEventPostSyncOperation(event, nullptr, Event::STATE_CLEARED, false, useMaxPackets, appendPipeControlWithPostSync, false); if (!isCopyOnly()) { if (this->partitionCount > 1) { @@ -602,7 +602,7 @@ ze_result_t CommandListCoreFamily::appendMemoryRangesBarrier(uint appendEventForProfiling(signalEvent, true, false); applyMemoryRangesBarrier(numRanges, pRangeSizes, pRanges); - appendSignalEventPostWalker(signalEvent, false); + appendSignalEventPostWalker(signalEvent, nullptr, false, false); addToMappedEventList(signalEvent); if (this->isInOrderExecutionEnabled()) { @@ -1272,7 +1272,7 @@ ze_result_t CommandListCoreFamily::appendMemoryCopyBlitRegion(Ali } makeResidentDummyAllocation(); - appendSignalEventPostWalker(signalEvent, false); + appendSignalEventPostWalker(signalEvent, nullptr, false, false); return ZE_RESULT_SUCCESS; } @@ -1301,7 +1301,7 @@ ze_result_t CommandListCoreFamily::appendCopyImageBlit(NEO::Graph NEO::BlitCommandsHelper::dispatchBlitCommandsForImageRegion(blitProperties, *commandContainer.getCommandStream(), dummyBlitWa); makeResidentDummyAllocation(); - appendSignalEventPostWalker(signalEvent, false); + appendSignalEventPostWalker(signalEvent, nullptr, false, false); return ZE_RESULT_SUCCESS; } @@ -1459,7 +1459,7 @@ ze_result_t CommandListCoreFamily::appendMemoryCopy(void *dstptr, launchParams.pipeControlSignalling = (signalEvent && singlePipeControlPacket) || dstAllocationStruct.needsFlush; - appendEventForProfilingAllWalkers(signalEvent, true, singlePipeControlPacket); + appendEventForProfilingAllWalkers(signalEvent, nullptr, true, singlePipeControlPacket, false); if (isCopyOnly()) { ret = appendMemoryCopyBlit(dstAllocationStruct.alignedAllocationPtr, @@ -1524,7 +1524,7 @@ ze_result_t CommandListCoreFamily::appendMemoryCopy(void *dstptr, } } - appendEventForProfilingAllWalkers(signalEvent, false, singlePipeControlPacket); + appendEventForProfilingAllWalkers(signalEvent, nullptr, false, singlePipeControlPacket, false); addFlushRequiredCommand(dstAllocationStruct.needsFlush, signalEvent); addToMappedEventList(signalEvent); @@ -1922,7 +1922,7 @@ ze_result_t CommandListCoreFamily::appendMemoryFill(void *ptr, launchParams.isKernelSplitOperation = (fillArguments.leftRemainingBytes > 0 || fillArguments.rightRemainingBytes > 0); bool singlePipeControlPacket = eventSignalPipeControl(launchParams.isKernelSplitOperation, dcFlush); - appendEventForProfilingAllWalkers(signalEvent, true, singlePipeControlPacket); + appendEventForProfilingAllWalkers(signalEvent, nullptr, true, singlePipeControlPacket, false); if (fillArguments.leftRemainingBytes > 0) { launchParams.numKernelsInSplitLaunch++; @@ -2067,7 +2067,7 @@ ze_result_t CommandListCoreFamily::appendMemoryFill(void *ptr, } addToMappedEventList(signalEvent); - appendEventForProfilingAllWalkers(signalEvent, false, singlePipeControlPacket); + appendEventForProfilingAllWalkers(signalEvent, nullptr, false, singlePipeControlPacket, false); addFlushRequiredCommand(hostPointerNeedsFlush, signalEvent); bool nonWalkerInOrderCmdChaining = false; @@ -2144,7 +2144,7 @@ ze_result_t CommandListCoreFamily::appendBlitFill(void *ptr, this->dummyBlitWa); makeResidentDummyAllocation(); - appendSignalEventPostWalker(signalEvent, false); + appendSignalEventPostWalker(signalEvent, nullptr, false, false); if (isInOrderExecutionEnabled()) { appendSignalInOrderDependencyCounter(signalEvent); @@ -2155,7 +2155,7 @@ ze_result_t CommandListCoreFamily::appendBlitFill(void *ptr, } template -void CommandListCoreFamily::appendSignalEventPostWalker(Event *event, bool skipBarrierForEndProfiling) { +void CommandListCoreFamily::appendSignalEventPostWalker(Event *event, void **syncCmdBuffer, bool skipBarrierForEndProfiling, bool skipAddingEventToResidency) { if (event == nullptr || !event->getPoolAllocation(this->device)) { return; } @@ -2163,10 +2163,12 @@ void CommandListCoreFamily::appendSignalEventPostWalker(Event *ev appendEventForProfiling(event, false, skipBarrierForEndProfiling); } else { event->resetKernelCountAndPacketUsedCount(); - commandContainer.addToResidencyContainer(event->getPoolAllocation(this->device)); + if (!skipAddingEventToResidency) { + commandContainer.addToResidencyContainer(event->getPoolAllocation(this->device)); + } event->setPacketsInUse(this->partitionCount); - dispatchEventPostSyncOperation(event, Event::STATE_SIGNALED, false, false, !isCopyOnly(), false); + dispatchEventPostSyncOperation(event, syncCmdBuffer, Event::STATE_SIGNALED, false, false, !isCopyOnly(), false); } } @@ -2183,7 +2185,7 @@ void CommandListCoreFamily::appendEventForProfilingCopyCommand(Ev NEO::MiFlushArgs args{this->dummyBlitWa}; NEO::EncodeMiFlushDW::programWithWa(*commandContainer.getCommandStream(), 0, 0, args); makeResidentDummyAllocation(); - dispatchEventPostSyncOperation(event, Event::STATE_SIGNALED, true, false, false, false); + dispatchEventPostSyncOperation(event, nullptr, Event::STATE_SIGNALED, true, false, false, false); } appendWriteKernelTimestamp(event, beforeWalker, false, false); } @@ -2376,7 +2378,7 @@ ze_result_t CommandListCoreFamily::appendSignalEvent(ze_event_han event->setPacketsInUse(this->partitionCount); bool appendPipeControlWithPostSync = (!isCopyOnly()) && (event->isSignalScope() || event->isEventTimestampFlagSet()); - dispatchEventPostSyncOperation(event, Event::STATE_SIGNALED, false, false, appendPipeControlWithPostSync, false); + dispatchEventPostSyncOperation(event, nullptr, Event::STATE_SIGNALED, false, false, appendPipeControlWithPostSync, false); if (this->isInOrderExecutionEnabled()) { appendSignalInOrderDependencyCounter(event); @@ -2643,7 +2645,7 @@ void CommandListCoreFamily::appendEventForProfiling(Event *event, bool workloadPartition = setupTimestampEventForMultiTile(event); appendWriteKernelTimestamp(event, beforeWalker, true, workloadPartition); } else { - dispatchEventPostSyncOperation(event, Event::STATE_SIGNALED, true, false, false, true); + dispatchEventPostSyncOperation(event, nullptr, Event::STATE_SIGNALED, true, false, false, true); const auto &rootDeviceEnvironment = this->device->getNEODevice()->getRootDeviceEnvironment(); @@ -2713,7 +2715,7 @@ ze_result_t CommandListCoreFamily::appendWriteGlobalTimestamp( args); } - appendSignalEventPostWalker(signalEvent, false); + appendSignalEventPostWalker(signalEvent, nullptr, false, false); if (this->isInOrderExecutionEnabled()) { appendSignalInOrderDependencyCounter(signalEvent); @@ -3260,7 +3262,7 @@ ze_result_t CommandListCoreFamily::appendBarrier(ze_event_handle_ } addToMappedEventList(signalEvent); - appendSignalEventPostWalker(signalEvent, this->isInOrderExecutionEnabled()); + appendSignalEventPostWalker(signalEvent, nullptr, this->isInOrderExecutionEnabled(), false); if (isInOrderExecutionEnabled()) { appendSignalInOrderDependencyCounter(signalEvent); @@ -3425,7 +3427,7 @@ ze_result_t CommandListCoreFamily::appendWaitOnMemory(void *desc, NEO::MemorySynchronizationCommands::addAdditionalSynchronization(*commandContainer.getCommandStream(), gpuAddress, true, rootDeviceEnvironment); } - appendSignalEventPostWalker(signalEvent, false); + appendSignalEventPostWalker(signalEvent, nullptr, false, false); if (this->isInOrderExecutionEnabled()) { appendSignalInOrderDependencyCounter(signalEvent); @@ -3553,7 +3555,7 @@ void CommandListCoreFamily::dispatchPostSyncCompute(uint64_t gpuA } template -void CommandListCoreFamily::dispatchPostSyncCommands(const CmdListEventOperation &eventOperations, uint64_t gpuAddress, uint32_t value, bool useLastPipeControl, bool signalScope, bool skipPartitionOffsetProgramming) { +void CommandListCoreFamily::dispatchPostSyncCommands(const CmdListEventOperation &eventOperations, uint64_t gpuAddress, void **syncCmdBuffer, uint32_t value, bool useLastPipeControl, bool signalScope, bool skipPartitionOffsetProgramming) { decltype(&CommandListCoreFamily::dispatchPostSyncCompute) dispatchFunction = &CommandListCoreFamily::dispatchPostSyncCompute; if (isCopyOnly()) { dispatchFunction = &CommandListCoreFamily::dispatchPostSyncCopy; @@ -3603,7 +3605,7 @@ void CommandListCoreFamily::dispatchPostSyncCommands(const CmdLis } template -void CommandListCoreFamily::dispatchEventPostSyncOperation(Event *event, uint32_t value, bool omitFirstOperation, bool useMax, bool useLastPipeControl, bool skipPartitionOffsetProgramming) { +void CommandListCoreFamily::dispatchEventPostSyncOperation(Event *event, void **syncCmdBuffer, uint32_t value, bool omitFirstOperation, bool useMax, bool useLastPipeControl, bool skipPartitionOffsetProgramming) { uint32_t packets = event->getPacketsInUse(); if (this->signalAllEventPackets || useMax) { packets = event->getMaxPacketsCount(); @@ -3616,7 +3618,7 @@ void CommandListCoreFamily::dispatchEventPostSyncOperation(Event eventPostSync.operationCount--; } - dispatchPostSyncCommands(eventPostSync, gpuAddress, value, useLastPipeControl, event->isSignalScope(), skipPartitionOffsetProgramming); + dispatchPostSyncCommands(eventPostSync, gpuAddress, syncCmdBuffer, value, useLastPipeControl, event->isSignalScope(), skipPartitionOffsetProgramming); } template @@ -3629,7 +3631,7 @@ void CommandListCoreFamily::dispatchEventRemainingPacketsPostSync eventAddress += event->getSinglePacketSize() * event->getPacketsInUse(); constexpr bool appendLastPipeControl = false; - dispatchPostSyncCommands(remainingPacketsOperation, eventAddress, Event::STATE_SIGNALED, appendLastPipeControl, event->isSignalScope(), false); + dispatchPostSyncCommands(remainingPacketsOperation, eventAddress, nullptr, Event::STATE_SIGNALED, appendLastPipeControl, event->isSignalScope(), false); } } diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_skl_to_tgllp.inl b/level_zero/core/source/cmdlist/cmdlist_hw_skl_to_tgllp.inl index 9e7bf94000..6c589d4075 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_skl_to_tgllp.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_skl_to_tgllp.inl @@ -247,7 +247,7 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K *reinterpret_cast(surfaceStateSpace) = surfaceState; } - appendSignalEventPostWalker(event, false); + appendSignalEventPostWalker(event, nullptr, false, false); commandContainer.addToResidencyContainer(kernelImmutableData->getIsaGraphicsAllocation()); auto &residencyContainer = kernel->getResidencyContainer(); @@ -329,11 +329,11 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelSplit(Kernel } template -void CommandListCoreFamily::appendEventForProfilingAllWalkers(Event *event, bool beforeWalker, bool singlePacketEvent) { +void CommandListCoreFamily::appendEventForProfilingAllWalkers(Event *event, void **syncCmdBuffer, bool beforeWalker, bool singlePacketEvent, bool skipAddingEventToResidency) { if (beforeWalker) { appendEventForProfiling(event, true, false); } else { - appendSignalEventPostWalker(event, false); + appendSignalEventPostWalker(event, syncCmdBuffer, false, skipAddingEventToResidency); } } diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl index 32f161ab43..1ae10afb7e 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl @@ -286,7 +286,7 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K std::list additionalCommands; if (compactEvent) { - appendEventForProfilingAllWalkers(compactEvent, true, true); + appendEventForProfilingAllWalkers(compactEvent, nullptr, true, true, false); } bool inOrderExecSignalRequired = (this->isInOrderExecutionEnabled() && !launchParams.isKernelSplitOperation && !launchParams.pipeControlSignalling); @@ -297,7 +297,7 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K if (inOrderExecSignalRequired) { if (inOrderNonWalkerSignalling) { - dispatchEventPostSyncOperation(eventForInOrderExec, Event::STATE_CLEARED, false, false, false, false); + dispatchEventPostSyncOperation(eventForInOrderExec, nullptr, Event::STATE_CLEARED, false, false, false, false); } else { inOrderCounterValue = this->inOrderExecInfo->getCounterValue() + getInOrderIncrementValue(); inOrderExecInfo = this->inOrderExecInfo.get(); @@ -348,7 +348,7 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K } if (compactEvent) { - appendEventForProfilingAllWalkers(compactEvent, false, true); + appendEventForProfilingAllWalkers(compactEvent, nullptr, false, true, launchParams.omitAddingEventResidency); } else if (event) { event->setPacketsInUse(partitionCount); if (l3FlushEnable) { @@ -506,12 +506,12 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelSplit(Kernel } template -void CommandListCoreFamily::appendEventForProfilingAllWalkers(Event *event, bool beforeWalker, bool singlePacketEvent) { +void CommandListCoreFamily::appendEventForProfilingAllWalkers(Event *event, void **syncCmdBuffer, bool beforeWalker, bool singlePacketEvent, bool skipAddingEventToResidency) { if (isCopyOnly() || singlePacketEvent) { if (beforeWalker) { appendEventForProfiling(event, true, false); } else { - appendSignalEventPostWalker(event, false); + appendSignalEventPostWalker(event, syncCmdBuffer, false, skipAddingEventToResidency); } } else { if (event) { diff --git a/level_zero/core/source/device/bcs_split.h b/level_zero/core/source/device/bcs_split.h index f8809369b7..18e985c276 100644 --- a/level_zero/core/source/device/bcs_split.h +++ b/level_zero/core/source/device/bcs_split.h @@ -109,7 +109,7 @@ struct BcsSplit { cmdList->addEventsToCmdList(numWaitEvents, phWaitEvents, hasRelaxedOrderingDependencies, false, true); if (signalEvent && i == 0u) { - cmdList->appendEventForProfilingAllWalkers(signalEvent, true, true); + cmdList->appendEventForProfilingAllWalkers(signalEvent, nullptr, true, true, false); } auto localSize = totalSize / engineCount; @@ -137,9 +137,9 @@ struct BcsSplit { cmdList->addEventsToCmdList(static_cast(cmdQsForSplit.size()), eventHandles.data(), hasRelaxedOrderingDependencies, false, true); if (signalEvent) { - cmdList->appendEventForProfilingAllWalkers(signalEvent, false, true); + cmdList->appendEventForProfilingAllWalkers(signalEvent, nullptr, false, true, false); } - cmdList->appendEventForProfilingAllWalkers(this->events.marker[markerEventIndex], false, true); + cmdList->appendEventForProfilingAllWalkers(this->events.marker[markerEventIndex], nullptr, false, true, false); if (cmdList->isInOrderExecutionEnabled()) { cmdList->appendSignalInOrderDependencyCounter(signalEvent); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp index 68eb335c6e..d1a0ce0eed 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp @@ -2945,7 +2945,7 @@ HWTEST2_F(CommandListCreate, givenNullEventWhenAppendEventAfterWalkerThenNothing auto usedBefore = commandList->getCmdContainer().getCommandStream()->getUsed(); - commandList->appendSignalEventPostWalker(nullptr, false); + commandList->appendSignalEventPostWalker(nullptr, nullptr, false, false); EXPECT_EQ(commandList->getCmdContainer().getCommandStream()->getUsed(), usedBefore); } diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_signal_event.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_signal_event.cpp index 304ac30244..03f003b99b 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_signal_event.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_signal_event.cpp @@ -412,7 +412,7 @@ HWTEST2_F(CommandListAppendUsedPacketSignalEvent, event->signalScope = ZE_EVENT_SCOPE_FLAG_HOST; commandList->partitionCount = packets; - commandList->appendSignalEventPostWalker(event.get(), false); + commandList->appendSignalEventPostWalker(event.get(), nullptr, false, false); EXPECT_EQ(packets, event->getPacketsInUse()); auto gpuAddress = event->getCompletionFieldGpuAddress(device); @@ -472,7 +472,7 @@ HWTEST2_F(CommandListAppendUsedPacketSignalEvent, event->signalScope = ZE_EVENT_SCOPE_FLAG_HOST; commandList->partitionCount = packets; - commandList->appendSignalEventPostWalker(event.get(), false); + commandList->appendSignalEventPostWalker(event.get(), nullptr, false, false); EXPECT_EQ(packets, event->getPacketsInUse()); auto gpuAddress = event->getCompletionFieldGpuAddress(device); @@ -645,7 +645,7 @@ HWTEST2_F(CommandListAppendUsedPacketSignalEvent, event->setEventTimestampFlag(false); - commandList->appendSignalEventPostWalker(event.get(), false); + commandList->appendSignalEventPostWalker(event.get(), nullptr, false, false); size_t usedAfterSize = cmdStream->getUsed(); GenCmdList cmdList; diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xehp_and_later.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xehp_and_later.cpp index d26c2e3096..33afe5e44f 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xehp_and_later.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xehp_and_later.cpp @@ -950,7 +950,7 @@ struct CommandListSignalAllEventPacketFixture : public ModuleFixture { commandList->setupTimestampEventForMultiTile(event.get()); size_t sizeBefore = cmdStream->getUsed(); - commandList->appendSignalEventPostWalker(event.get(), false); + commandList->appendSignalEventPostWalker(event.get(), nullptr, false, false); size_t sizeAfter = cmdStream->getUsed(); EXPECT_EQ(ZE_RESULT_SUCCESS, result); @@ -2397,5 +2397,39 @@ HWTEST2_F(CommandListCreate, givenPlatformSupportsHdcUntypedCacheFlushWhenAppend EXPECT_TRUE(timestampPostSyncFound); } +HWTEST2_F(CommandListCreate, givenAppendSignalEventWhenSkipAddToResidencyTrueThenEventAllocationNotAddedToResidency, IsAtLeastXeHpCore) { + auto commandList = std::make_unique>>(); + auto result = commandList->initialize(device, NEO::EngineGroupType::compute, 0u); + ASSERT_EQ(ZE_RESULT_SUCCESS, result); + + ze_event_pool_desc_t eventPoolDesc = {}; + eventPoolDesc.count = 1; + eventPoolDesc.flags = 0; + + ze_event_desc_t eventDesc = {}; + eventDesc.index = 0; + eventDesc.signal = 0; + + auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); + ASSERT_NE(nullptr, event.get()); + + auto &residencyContainer = commandList->getCmdContainer().getResidencyContainer(); + auto eventAllocation = event->getPoolAllocation(device); + + bool skipAdd = true; + commandList->appendEventForProfilingAllWalkers(event.get(), nullptr, false, true, skipAdd); + + auto eventAllocIt = std::find(residencyContainer.begin(), residencyContainer.end(), eventAllocation); + EXPECT_EQ(residencyContainer.end(), eventAllocIt); + + skipAdd = false; + + commandList->appendEventForProfilingAllWalkers(event.get(), nullptr, false, true, skipAdd); + eventAllocIt = std::find(residencyContainer.begin(), residencyContainer.end(), eventAllocation); + EXPECT_NE(residencyContainer.end(), eventAllocIt); +} + } // namespace ult } // namespace L0 diff --git a/level_zero/core/test/unit_tests/xe_hpc_core/test_cmdlist_xe_hpc_core.cpp b/level_zero/core/test/unit_tests/xe_hpc_core/test_cmdlist_xe_hpc_core.cpp index 27648d346a..c55ba91e92 100644 --- a/level_zero/core/test/unit_tests/xe_hpc_core/test_cmdlist_xe_hpc_core.cpp +++ b/level_zero/core/test/unit_tests/xe_hpc_core/test_cmdlist_xe_hpc_core.cpp @@ -686,7 +686,7 @@ HWTEST2_F(CommandListEventFenceTestsXeHpcCore, givenCommandListWithRegularEventA auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); - commandList->appendSignalEventPostWalker(event.get(), false); + commandList->appendSignalEventPostWalker(event.get(), nullptr, false, false); GenCmdList cmdList; ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(