From 91d28f17ccd7e3637dc0045a484c1b37120f3586 Mon Sep 17 00:00:00 2001 From: Bellekallu Rajkiran Date: Wed, 24 Sep 2025 12:05:44 +0000 Subject: [PATCH] fix: Add RAII wrapper for insert sw tag Add wrapper to not miss capturing end tags for early returns. Signed-off-by: Bellekallu Rajkiran --- level_zero/core/source/cmdlist/cmdlist.h | 1 + level_zero/core/source/cmdlist/cmdlist_hw.h | 4 + level_zero/core/source/cmdlist/cmdlist_hw.inl | 130 ++++-------------- .../core/source/gen12lp/cmdlist_gen12lp.cpp | 2 +- level_zero/core/source/helpers/sw_tag_scope.h | 40 ++++++ 5 files changed, 72 insertions(+), 105 deletions(-) create mode 100644 level_zero/core/source/helpers/sw_tag_scope.h diff --git a/level_zero/core/source/cmdlist/cmdlist.h b/level_zero/core/source/cmdlist/cmdlist.h index d7c189a268..d524655881 100644 --- a/level_zero/core/source/cmdlist/cmdlist.h +++ b/level_zero/core/source/cmdlist/cmdlist.h @@ -644,6 +644,7 @@ struct CommandList : _ze_command_list_handle_t { bool isWalkerWithProfilingEnqueued = false; bool shouldRegisterEnqueuedWalkerWithProfiling = false; bool inOrderWaitsDisabled = false; + bool swTagsEnabled = false; }; using CommandListAllocatorFn = CommandList *(*)(uint32_t); diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.h b/level_zero/core/source/cmdlist/cmdlist_hw.h index 29f6d0cecd..af829c874c 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.h +++ b/level_zero/core/source/cmdlist/cmdlist_hw.h @@ -29,6 +29,9 @@ enum class Builtin : uint32_t; struct Event; struct EventPool; +template +class SWTagScope; + #pragma pack(1) struct EventData { uint64_t address; @@ -446,6 +449,7 @@ struct CommandListCoreFamily : public CommandListImp { virtual uint32_t getIohSizeForPrefetch(const Kernel &kernel, uint32_t reserveExtraSpace) const; virtual void ensureCmdBufferSpaceForPrefetch() {} bool transferDirectionRequiresBcsSplit(NEO::TransferDirection direction) const; + std::optional> emplaceSWTagScope(const char *callName); template void addResidency(const Container &allocs) { diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index 4b5f3213f0..915d29bf82 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -57,6 +57,7 @@ #include "level_zero/core/source/driver/driver_handle_imp.h" #include "level_zero/core/source/event/event.h" #include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper.h" +#include "level_zero/core/source/helpers/sw_tag_scope.h" #include "level_zero/core/source/image/image.h" #include "level_zero/core/source/kernel/kernel.h" #include "level_zero/core/source/kernel/kernel_imp.h" @@ -100,24 +101,6 @@ void CommandListCoreFamily::postInitComputeSetup() { currentBindingTablePoolBaseAddress = NEO::StreamProperty64::initValue; } -template - requires( - std::is_same_v || - std::is_same_v) -inline void insertSWTagWithCallId( - NEO::Device &device, - NEO::LinearStream &cmdStream, - const char *callName, - uint32_t &callId) { - if (NEO::debugManager.flags.EnableSWTags.get()) { - if constexpr (std::is_same_v) { - callId = device.getRootDeviceEnvironment().tagsManager->incrementAndGetCurrentCallCount(); - } - device.getRootDeviceEnvironment().tagsManager->insertTag( - cmdStream, device, callName, callId); - } -} - template ze_result_t CommandListCoreFamily::reset() { this->storeFillPatternResourcesForReuse(); @@ -293,6 +276,7 @@ ze_result_t CommandListCoreFamily::initialize(Device *device, NEO this->defaultPipelinedThreadArbitrationPolicy = NEO::debugManager.flags.OverrideThreadArbitrationPolicy.get(); } this->statelessBuiltinsEnabled = compilerProductHelper.isForceToStatelessRequired(); + this->swTagsEnabled = NEO::debugManager.flags.EnableSWTags.get(); this->commandContainer.doubleSbaWaRef() = this->doubleSbaWa; this->commandContainer.l1CachePolicyDataRef() = &this->l1CachePolicyData; @@ -415,6 +399,18 @@ void CommandListCoreFamily::prefetchKernelMemory(NEO::LinearStrea } } +template +std::optional::GfxFamily>> +CommandListCoreFamily::emplaceSWTagScope(const char *callName) { + if (this->swTagsEnabled) { + return std::make_optional>( + *device->getNEODevice(), + *commandContainer.getCommandStream(), + callName); + } + return std::nullopt; +} + template uint32_t CommandListCoreFamily::getIohSizeForPrefetch(const Kernel &kernel, uint32_t reserveExtraSpace) const { return kernel.getIndirectSize() + reserveExtraSpace; @@ -428,13 +424,7 @@ ze_result_t CommandListCoreFamily::appendLaunchKernel(ze_kernel_h ze_event_handle_t *phWaitEvents, CmdListKernelLaunchParams &launchParams) { - NEO::Device *neoDevice = device->getNEODevice(); - uint32_t callId = 0; - insertSWTagWithCallId( - *neoDevice, - *commandContainer.getCommandStream(), - launchParams.isCooperative ? "zeCommandListAppendLaunchCooperativeKernel" : "zeCommandListAppendLaunchKernel", - callId); + auto swTagScope = emplaceSWTagScope(launchParams.isCooperative ? "zeCommandListAppendLaunchCooperativeKernel" : "zeCommandListAppendLaunchKernel"); auto kernel = Kernel::fromHandle(kernelHandle); @@ -484,11 +474,6 @@ ze_result_t CommandListCoreFamily::appendLaunchKernel(ze_kernel_h } addToMappedEventList(event); - insertSWTagWithCallId( - *neoDevice, - *commandContainer.getCommandStream(), - launchParams.isCooperative ? "zeCommandListAppendLaunchCooperativeKernel" : "zeCommandListAppendLaunchKernel", - callId); return res; } @@ -660,12 +645,7 @@ ze_result_t CommandListCoreFamily::appendEventReset(ze_event_hand } NEO::Device *neoDevice = device->getNEODevice(); - uint32_t callId = 0; - insertSWTagWithCallId( - *neoDevice, - *commandContainer.getCommandStream(), - "zeCommandListAppendEventReset", - callId); + auto swTagScope = emplaceSWTagScope("zeCommandListAppendEventReset"); if (this->isInOrderExecutionEnabled()) { handleInOrderImplicitDependencies(isRelaxedOrderingDispatchAllowed(0, false), false); @@ -697,12 +677,6 @@ ze_result_t CommandListCoreFamily::appendEventReset(ze_event_hand appendSynchronizedDispatchCleanupSection(); - insertSWTagWithCallId( - *neoDevice, - *commandContainer.getCommandStream(), - "zeCommandListAppendEventReset", - callId); - return ZE_RESULT_SUCCESS; } @@ -1827,15 +1801,9 @@ ze_result_t CommandListCoreFamily::appendMemoryCopy(void *dstptr, ze_event_handle_t *phWaitEvents, CmdListMemoryCopyParams &memoryCopyParams) { - NEO::Device *neoDevice = device->getNEODevice(); bool sharedSystemEnabled = isSharedSystemEnabled(); - uint32_t callId = 0; - insertSWTagWithCallId( - *neoDevice, - *commandContainer.getCommandStream(), - "zeCommandListAppendMemoryCopy", - callId); + auto swTagScope = emplaceSWTagScope("zeCommandListAppendMemoryCopy"); auto allocSize = NEO::getIfValid(memoryCopyParams.bcsSplitTotalDstSize, size); auto dstAllocationStruct = getAlignedAllocationData(this->device, sharedSystemEnabled, NEO::getIfValid(memoryCopyParams.bcsSplitBaseDstPtr, dstptr), allocSize, false, isCopyOffloadEnabled()); @@ -2038,12 +2006,6 @@ ze_result_t CommandListCoreFamily::appendMemoryCopy(void *dstptr, } appendSynchronizedDispatchCleanupSection(); - insertSWTagWithCallId( - *neoDevice, - *commandContainer.getCommandStream(), - "zeCommandListAppendMemoryCopy", - callId); - return ret; } @@ -2074,15 +2036,9 @@ ze_result_t CommandListCoreFamily::appendMemoryCopyRegion(void *d uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, CmdListMemoryCopyParams &memoryCopyParams) { - NEO::Device *neoDevice = device->getNEODevice(); bool sharedSystemEnabled = isSharedSystemEnabled(); - uint32_t callId = 0; - insertSWTagWithCallId( - *neoDevice, - *commandContainer.getCommandStream(), - "zeCommandListAppendMemoryCopyRegion", - callId); + auto swTagScope = emplaceSWTagScope("zeCommandListAppendMemoryCopyRegion"); auto dstSize = this->getTotalSizeForCopyRegion(dstRegion, dstPitch, dstSlicePitch); auto srcSize = this->getTotalSizeForCopyRegion(srcRegion, srcPitch, srcSlicePitch); @@ -2168,12 +2124,6 @@ ze_result_t CommandListCoreFamily::appendMemoryCopyRegion(void *d handleInOrderDependencyCounter(signalEvent, false, isCopyOnlyEnabled); } - insertSWTagWithCallId( - *neoDevice, - *commandContainer.getCommandStream(), - "zeCommandListAppendMemoryCopyRegion", - callId); - return ZE_RESULT_SUCCESS; } @@ -2441,12 +2391,7 @@ ze_result_t CommandListCoreFamily::appendMemoryFill(void *ptr, NEO::Device *neoDevice = device->getNEODevice(); bool sharedSystemEnabled = isSharedSystemEnabled(); - uint32_t callId = 0; - insertSWTagWithCallId( - *neoDevice, - *commandContainer.getCommandStream(), - "zeCommandListAppendMemoryFill", - callId); + auto swTagScope = emplaceSWTagScope("zeCommandListAppendMemoryFill"); CmdListKernelLaunchParams launchParams = {}; @@ -2599,10 +2544,10 @@ ze_result_t CommandListCoreFamily::appendMemoryFill(void *ptr, if (patternAllocationSize > MemoryConstants::cacheLineSize) { patternGfxAlloc = device->obtainReusableAllocation(patternAllocationSize, NEO::AllocationType::fillPattern); if (patternGfxAlloc == nullptr) { - NEO::AllocationProperties allocationProperties{device->getNEODevice()->getRootDeviceIndex(), + NEO::AllocationProperties allocationProperties{neoDevice->getRootDeviceIndex(), patternAllocationSize, NEO::AllocationType::fillPattern, - device->getNEODevice()->getDeviceBitfield()}; + neoDevice->getDeviceBitfield()}; allocationProperties.alignment = MemoryConstants::pageSize; patternGfxAlloc = device->getDriverHandle()->getMemoryManager()->allocateGraphicsMemoryWithProperties(allocationProperties); } @@ -2710,12 +2655,6 @@ ze_result_t CommandListCoreFamily::appendMemoryFill(void *ptr, appendSynchronizedDispatchCleanupSection(); - insertSWTagWithCallId( - *neoDevice, - *commandContainer.getCommandStream(), - "zeCommandListAppendMemoryFill", - callId); - return res; } @@ -2756,7 +2695,6 @@ ze_result_t CommandListCoreFamily::appendBlitFill(void *ptr, cons return ZE_RESULT_ERROR_INVALID_ARGUMENT; } - auto neoDevice = device->getNEODevice(); if (isCopyOnlySignaling) { appendEventForProfiling(signalEvent, nullptr, true, false, false, true); } @@ -2787,7 +2725,7 @@ ze_result_t CommandListCoreFamily::appendBlitFill(void *ptr, cons commandContainer.addToResidencyContainer(gpuAllocation); blitProperties = NEO::BlitProperties::constructPropertiesForMemoryFill(gpuAllocation, 0, size, patternToCommand, patternSize, offset); - size_t nBlits = NEO::BlitCommandsHelper::getNumberOfBlitsForColorFill(blitProperties.copySize, patternSize, device->getNEODevice()->getRootDeviceEnvironmentRef(), blitProperties.isSystemMemoryPoolUsed); + size_t nBlits = NEO::BlitCommandsHelper::getNumberOfBlitsForColorFill(blitProperties.copySize, patternSize, neoDevice->getRootDeviceEnvironmentRef(), blitProperties.isSystemMemoryPoolUsed); useAdditionalTimestamp = nBlits > 1; } else if (sharedSystemEnabled == true) { if (NEO::debugManager.flags.EmitMemAdvisePriorToCopyForNonUsm.get() == 1) { @@ -3053,13 +2991,8 @@ ze_result_t CommandListCoreFamily::appendSignalEvent(ze_event_han } commandContainer.addToResidencyContainer(event->getAllocation(this->device)); - NEO::Device *neoDevice = device->getNEODevice(); - uint32_t callId = 0; - insertSWTagWithCallId( - *neoDevice, - *commandContainer.getCommandStream(), - "zeCommandListAppendSignalEvent", - callId); + + auto swTagScope = emplaceSWTagScope("zeCommandListAppendSignalEvent"); event->setPacketsInUse(this->partitionCount); bool appendPipeControlWithPostSync = (!isCopyOnly(false)) && (event->isSignalScope() || event->isEventTimestampFlagSet()); @@ -3077,12 +3010,6 @@ ze_result_t CommandListCoreFamily::appendSignalEvent(ze_event_han } handleInOrderDependencyCounter(event, false, false); - insertSWTagWithCallId( - *neoDevice, - *commandContainer.getCommandStream(), - "zeCommandListAppendSignalEvent", - callId); - return ZE_RESULT_SUCCESS; } @@ -3232,10 +3159,8 @@ bool CommandListCoreFamily::canSkipInOrderEventWait(Event &event, template ze_result_t CommandListCoreFamily::appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent, CommandToPatchContainer *outWaitCmds, bool relaxedOrderingAllowed, bool trackDependencies, bool apiRequest, bool skipAddingWaitEventsToResidency, bool skipFlush, bool copyOffloadOperation) { - NEO::Device *neoDevice = device->getNEODevice(); - uint32_t callId = 0; - insertSWTagWithCallId( - *neoDevice, *commandContainer.getCommandStream(), "zeCommandListAppendWaitOnEvents", callId); + + auto swTagScope = emplaceSWTagScope("zeCommandListAppendWaitOnEvents"); const bool dualStreamCopyOffload = isDualStreamCopyOffloadOperation(copyOffloadOperation); @@ -3315,9 +3240,6 @@ ze_result_t CommandListCoreFamily::appendWaitOnEvents(uint32_t nu handleInOrderDependencyCounter(nullptr, false, false); } - insertSWTagWithCallId( - *neoDevice, *commandContainer.getCommandStream(), "zeCommandListAppendWaitOnEvents", callId); - return ZE_RESULT_SUCCESS; } diff --git a/level_zero/core/source/gen12lp/cmdlist_gen12lp.cpp b/level_zero/core/source/gen12lp/cmdlist_gen12lp.cpp index 0ab60112cf..ca114d4b77 100644 --- a/level_zero/core/source/gen12lp/cmdlist_gen12lp.cpp +++ b/level_zero/core/source/gen12lp/cmdlist_gen12lp.cpp @@ -191,7 +191,7 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY; } - if (NEO::debugManager.flags.EnableSWTags.get()) { + if (this->swTagsEnabled) { neoDevice->getRootDeviceEnvironment().tagsManager->insertTag( *commandContainer.getCommandStream(), *neoDevice, diff --git a/level_zero/core/source/helpers/sw_tag_scope.h b/level_zero/core/source/helpers/sw_tag_scope.h new file mode 100644 index 0000000000..7bad4ee2e1 --- /dev/null +++ b/level_zero/core/source/helpers/sw_tag_scope.h @@ -0,0 +1,40 @@ +/* + * Copyright (C) 2025 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#pragma once + +#include "shared/source/command_stream/linear_stream.h" +#include "shared/source/device/device.h" +#include "shared/source/helpers/non_copyable_or_moveable.h" +#include "shared/source/utilities/software_tags_manager.h" + +namespace L0 { +template +class SWTagScope : public NEO::NonCopyableAndNonMovableClass { + public: + SWTagScope() = delete; + + SWTagScope(NEO::Device &device, NEO::LinearStream &cmdStream, const char *callName) + : device(device), cmdStream(cmdStream), callName(callName) { + tagsManager = device.getRootDeviceEnvironment().tagsManager.get(); + callId = tagsManager->incrementAndGetCurrentCallCount(); + tagsManager->insertTag(cmdStream, device, callName, callId); + } + + ~SWTagScope() { + tagsManager->insertTag(cmdStream, device, callName, callId); + } + + private: + NEO::Device &device; + NEO::LinearStream &cmdStream; + const char *callName = nullptr; + NEO::SWTagsManager *tagsManager = nullptr; + uint32_t callId = 0; +}; + +} // namespace L0 \ No newline at end of file