diff --git a/level_zero/core/source/cmdlist/cmdlist.cpp b/level_zero/core/source/cmdlist/cmdlist.cpp index cbdc37dbde..38f36c3c98 100644 --- a/level_zero/core/source/cmdlist/cmdlist.cpp +++ b/level_zero/core/source/cmdlist/cmdlist.cpp @@ -233,4 +233,11 @@ NEO::CommandStreamReceiver *CommandList::getCsr(bool copyOffload) const { return static_cast(queue)->getCsr(); } + +void CommandList::registerWalkerWithProfilingEnqueued(Event *event) { + if (this->shouldRegisterEnqueuedWalkerWithProfiling && event && event->isEventTimestampFlagSet()) { + this->isWalkerWithProfilingEnqueued = true; + } +} + } // namespace L0 diff --git a/level_zero/core/source/cmdlist/cmdlist.h b/level_zero/core/source/cmdlist/cmdlist.h index f69ab59388..91dc0f384a 100644 --- a/level_zero/core/source/cmdlist/cmdlist.h +++ b/level_zero/core/source/cmdlist/cmdlist.h @@ -480,6 +480,7 @@ struct CommandList : _ze_command_list_handle_t { bool isDualStreamCopyOffloadOperation(bool offloadOperation) const { return (getCopyOffloadModeForOperation(offloadOperation) == CopyOffloadModes::dualStream); } bool isNonDualStreamCopyOffloadOperation(bool offloadOperation) const { return offloadOperation && !isDualStreamCopyOffloadOperation(offloadOperation); } + void registerWalkerWithProfilingEnqueued(Event *event); std::map hostPtrMap; NEO::PrivateAllocsToReuseContainer ownedPrivateAllocations; diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index 8ff43d894d..2503a144ff 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -451,9 +451,7 @@ ze_result_t CommandListCoreFamily::appendLaunchKernel(ze_kernel_h event->resetKernelCountAndPacketUsedCount(); } - if (event->isEventTimestampFlagSet() && this->shouldRegisterEnqueuedWalkerWithProfiling) { - this->isWalkerWithProfilingEnqueued = true; - } + registerWalkerWithProfilingEnqueued(event); } if (!handleCounterBasedEventOperations(event, launchParams.omitAddingEventResidency)) { @@ -507,10 +505,7 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelIndirect(ze_ event->setKernelWithPrintfDeviceMutex(kernel->getDevicePrintfKernelMutex()); } launchParams.isHostSignalScopeEvent = event->isSignalScope(ZE_EVENT_SCOPE_FLAG_HOST); - - if (event->isEventTimestampFlagSet() && this->shouldRegisterEnqueuedWalkerWithProfiling) { - this->isWalkerWithProfilingEnqueued = true; - } + registerWalkerWithProfilingEnqueued(event); } if (!handleCounterBasedEventOperations(event, false)) { @@ -557,10 +552,7 @@ ze_result_t CommandListCoreFamily::appendLaunchMultipleKernelsInd if (hEvent) { event = Event::fromHandle(hEvent); launchParams.isHostSignalScopeEvent = event->isSignalScope(ZE_EVENT_SCOPE_FLAG_HOST); - - if (this->shouldRegisterEnqueuedWalkerWithProfiling && event->isEventTimestampFlagSet()) { - this->isWalkerWithProfilingEnqueued = true; - } + registerWalkerWithProfilingEnqueued(event); } if (!handleCounterBasedEventOperations(event, false)) { @@ -1491,9 +1483,7 @@ ze_result_t CommandListCoreFamily::appendMemoryCopyKernelWithGA(v auto lock = device->getBuiltinFunctionsLib()->obtainUniqueOwnership(); if (signalEvent) { - if (signalEvent->isEventTimestampFlagSet() && this->shouldRegisterEnqueuedWalkerWithProfiling) { - this->isWalkerWithProfilingEnqueued = true; - } + registerWalkerWithProfilingEnqueued(signalEvent); } Kernel *builtinKernel = nullptr; @@ -2371,9 +2361,7 @@ ze_result_t CommandListCoreFamily::appendMemoryFill(void *ptr, signalEvent = Event::fromHandle(hSignalEvent); launchParams.isHostSignalScopeEvent = signalEvent->isSignalScope(ZE_EVENT_SCOPE_FLAG_HOST); dcFlush = getDcFlushRequired(signalEvent->isSignalScope()); - if (signalEvent->isEventTimestampFlagSet() && this->shouldRegisterEnqueuedWalkerWithProfiling) { - this->isWalkerWithProfilingEnqueued = true; - } + registerWalkerWithProfilingEnqueued(signalEvent); } if (isCopyOnly(memoryCopyParams.copyOffloadAllowed)) { diff --git a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h index 7bc8a35d62..ee66639bc3 100644 --- a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h +++ b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h @@ -348,6 +348,9 @@ using CommandList = WhiteBox<::L0::CommandListImp>; template <> struct Mock : public CommandList { using BaseClass = CommandList; + using BaseClass::isWalkerWithProfilingEnqueued; + using BaseClass::registerWalkerWithProfilingEnqueued; + using BaseClass::shouldRegisterEnqueuedWalkerWithProfiling; Mock(Device *device = nullptr); ~Mock() override; diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_6.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_6.cpp index 69a8990ca2..1da4083a4b 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_6.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_6.cpp @@ -1106,6 +1106,22 @@ TEST(CommandList, WhenConsumeTextureCacheFlushPendingThenReturnsCurrentValueAndC } } +TEST(CommandList, givenNullEventWhenRegisterWalkerWithProfilingEnqueuedThenReturnFalse) { + MockCommandList cmdlist; + + { + cmdlist.shouldRegisterEnqueuedWalkerWithProfiling = false; + cmdlist.registerWalkerWithProfilingEnqueued(nullptr); + EXPECT_FALSE(cmdlist.isWalkerWithProfilingEnqueued); + } + + { + cmdlist.shouldRegisterEnqueuedWalkerWithProfiling = true; + cmdlist.registerWalkerWithProfilingEnqueued(nullptr); + EXPECT_FALSE(cmdlist.isWalkerWithProfilingEnqueued); + } +} + class MockCommandQueueIndirectAccess : public Mock { public: MockCommandQueueIndirectAccess(L0::Device *device, NEO::CommandStreamReceiver *csr, const ze_command_queue_desc_t *desc) : Mock(device, csr, desc) {} diff --git a/opencl/source/command_queue/command_queue.cpp b/opencl/source/command_queue/command_queue.cpp index bd3d474425..5265beca65 100644 --- a/opencl/source/command_queue/command_queue.cpp +++ b/opencl/source/command_queue/command_queue.cpp @@ -1574,4 +1574,10 @@ size_t CommandQueue::calculateHostPtrSizeForImage(const size_t *region, size_t r return Image::calculateHostPtrSize(region, dstRowPitch, dstSlicePitch, bytesPerPixel, image->getImageDesc().image_type); } +void CommandQueue::registerWalkerWithProfilingEnqueued(Event *event) { + if (this->shouldRegisterEnqueuedWalkerWithProfiling && isProfilingEnabled() && event) { + this->isWalkerWithProfilingEnqueued = true; + } +} + } // namespace NEO diff --git a/opencl/source/command_queue/command_queue.h b/opencl/source/command_queue/command_queue.h index b827c3f780..ca3c5ff24a 100644 --- a/opencl/source/command_queue/command_queue.h +++ b/opencl/source/command_queue/command_queue.h @@ -415,14 +415,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> { return this->isCacheFlushOnNextBcsWriteRequired && this->isImageWriteOperation(cmdType); } - bool getShouldRegisterEnqueuedWalkerWithProfiling() { - return this->shouldRegisterEnqueuedWalkerWithProfiling; - } - - void registerWalkerWithProfilingEnqueued() { - this->isWalkerWithProfilingEnqueued = true; - } - + void registerWalkerWithProfilingEnqueued(Event *event); bool getAndClearIsWalkerWithProfilingEnqueued() { bool retVal = this->isWalkerWithProfilingEnqueued; this->isWalkerWithProfilingEnqueued = false; diff --git a/opencl/source/command_queue/hardware_interface_base.inl b/opencl/source/command_queue/hardware_interface_base.inl index 4aff39c990..a0c6171a14 100644 --- a/opencl/source/command_queue/hardware_interface_base.inl +++ b/opencl/source/command_queue/hardware_interface_base.inl @@ -155,9 +155,7 @@ void HardwareInterface::dispatchWalker( dispatchInfo.dispatchEpilogueCommands(*commandStream, walkerArgs.timestampPacketDependencies, commandQueue.getDevice().getRootDeviceEnvironment()); } - if (commandQueue.getShouldRegisterEnqueuedWalkerWithProfiling() && commandQueue.isProfilingEnabled() && walkerArgs.event) { - commandQueue.registerWalkerWithProfilingEnqueued(); - } + commandQueue.registerWalkerWithProfilingEnqueued(walkerArgs.event); if (PauseOnGpuProperties::gpuScratchRegWriteAllowed(debugManager.flags.GpuScratchRegWriteAfterWalker.get(), commandQueue.getGpgpuCommandStreamReceiver().peekTaskCount())) { uint32_t registerOffset = debugManager.flags.GpuScratchRegWriteRegisterOffset.get();