From aea5f435db673669291ed4abe2625d802a65bb55 Mon Sep 17 00:00:00 2001 From: "Dunajski, Bartosz" Date: Thu, 22 Jun 2023 16:38:44 +0000 Subject: [PATCH] feature: unregister CSR client on Event host synchronize Related-To: NEO-7458 Signed-off-by: Dunajski, Bartosz --- .../source/cmdlist/cmdlist_hw_immediate.inl | 7 +- level_zero/core/source/cmdqueue/cmdqueue.cpp | 11 +- level_zero/core/source/cmdqueue/cmdqueue.h | 3 +- .../core/source/cmdqueue/cmdqueue_imp.h | 1 + level_zero/core/source/event/event.cpp | 12 ++ level_zero/core/source/event/event.h | 5 + level_zero/core/source/event/event_impl.inl | 2 + .../sources/cmdlist/test_cmdlist_1.cpp | 108 ++++++++++++++++++ .../debug_settings/debug_variables_base.inl | 1 + shared/test/common/test_files/igdrcl.config | 1 + 10 files changed, 145 insertions(+), 6 deletions(-) diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl index 07f90f7e1e..f9822470ff 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl @@ -727,6 +727,8 @@ ze_result_t CommandListCoreFamilyImmediate::hostSynchronize(uint6 template ze_result_t CommandListCoreFamilyImmediate::flushImmediate(ze_result_t inputRet, bool performMigration, bool hasStallingCmds, bool hasRelaxedOrderingDependencies, ze_event_handle_t hSignalEvent) { + auto signalEvent = Event::fromHandle(hSignalEvent); + if (inputRet == ZE_RESULT_SUCCESS) { if (isInOrderExecutionEnabled()) { inOrderDependencyCounter++; @@ -735,14 +737,15 @@ ze_result_t CommandListCoreFamilyImmediate::flushImmediate(ze_res } if (this->isFlushTaskSubmissionEnabled) { + if (signalEvent && (NEO::DebugManager.flags.TrackNumCsrClientsOnSyncPoints.get() != 0)) { + signalEvent->setLatestUsedCmdQueue(this->cmdQImmediate); + } inputRet = executeCommandListImmediateWithFlushTask(performMigration, hasStallingCmds, hasRelaxedOrderingDependencies); } else { inputRet = executeCommandListImmediate(performMigration); } } - auto signalEvent = Event::fromHandle(hSignalEvent); - if (signalEvent) { signalEvent->setCsr(this->csr); diff --git a/level_zero/core/source/cmdqueue/cmdqueue.cpp b/level_zero/core/source/cmdqueue/cmdqueue.cpp index 75e6a79559..7511f0687b 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue.cpp +++ b/level_zero/core/source/cmdqueue/cmdqueue.cpp @@ -53,9 +53,7 @@ CommandQueueImp::CommandQueueImp(Device *device, NEO::CommandStreamReceiver *csr } ze_result_t CommandQueueImp::destroy() { - if (this->clientId != CommandQueue::clientNotRegistered) { - this->csr->unregisterClient(); - } + unregisterCsrClient(); if (commandStream.getCpuBase() != nullptr) { commandStream.replaceGraphicsAllocation(nullptr); @@ -252,6 +250,13 @@ CommandQueue *CommandQueue::create(uint32_t productFamily, Device *device, NEO:: return commandQueue; } +void CommandQueueImp::unregisterCsrClient() { + if (getClientId() != CommandQueue::clientNotRegistered) { + this->csr->unregisterClient(); + setClientId(CommandQueue::clientNotRegistered); + } +} + ze_command_queue_mode_t CommandQueueImp::getSynchronousMode() const { return desc.mode; } diff --git a/level_zero/core/source/cmdqueue/cmdqueue.h b/level_zero/core/source/cmdqueue/cmdqueue.h index 5221f779f0..63515fc4c0 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue.h +++ b/level_zero/core/source/cmdqueue/cmdqueue.h @@ -60,8 +60,9 @@ struct CommandQueue : _ze_command_queue_handle_t { bool peekIsCopyOnlyCommandQueue() const { return this->isCopyOnlyCommandQueue; } - uint32_t getClientId() { return this->clientId; } + uint32_t getClientId() const { return this->clientId; } void setClientId(uint32_t value) { this->clientId = value; } + virtual void unregisterCsrClient() = 0; static constexpr uint32_t clientNotRegistered = std::numeric_limits::max(); diff --git a/level_zero/core/source/cmdqueue/cmdqueue_imp.h b/level_zero/core/source/cmdqueue/cmdqueue_imp.h index 7288e03d34..c2bfebb7f9 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue_imp.h +++ b/level_zero/core/source/cmdqueue/cmdqueue_imp.h @@ -91,6 +91,7 @@ struct CommandQueueImp : public CommandQueue { void makeResidentAndMigrate(bool performMigration, const NEO::ResidencyContainer &residencyContainer) override; void printKernelsPrintfOutput(bool hangDetected); void checkAssert(); + void unregisterCsrClient() override; protected: MOCKABLE_VIRTUAL NEO::SubmissionStatus submitBatchBuffer(size_t offset, NEO::ResidencyContainer &residencyContainer, void *endingCmdPtr, diff --git a/level_zero/core/source/event/event.cpp b/level_zero/core/source/event/event.cpp index 2256faafa2..e2910d449e 100644 --- a/level_zero/core/source/event/event.cpp +++ b/level_zero/core/source/event/event.cpp @@ -388,6 +388,7 @@ void Event::setIsCompleted() { if (this->isCompleted.load() == STATE_CLEARED) { this->isCompleted = STATE_SIGNALED; } + unsetCmdQueue(true); } void Event::enableInOrderExecMode(NEO::GraphicsAllocation &inOrderDependenciesAllocation, uint64_t signalValue) { @@ -397,4 +398,15 @@ void Event::enableInOrderExecMode(NEO::GraphicsAllocation &inOrderDependenciesAl inOrderExecDataAllocation = &inOrderDependenciesAllocation; } +void Event::setLatestUsedCmdQueue(CommandQueue *newCmdQ) { + this->latestUsedCmdQueue = newCmdQ; +} + +void Event::unsetCmdQueue(bool unregisterClient) { + if (latestUsedCmdQueue && unregisterClient) { + latestUsedCmdQueue->unregisterCsrClient(); + } + latestUsedCmdQueue = nullptr; +} + } // namespace L0 diff --git a/level_zero/core/source/event/event.h b/level_zero/core/source/event/event.h index 2e844ecab8..db299ee15c 100644 --- a/level_zero/core/source/event/event.h +++ b/level_zero/core/source/event/event.h @@ -37,6 +37,7 @@ struct EventPool; struct MetricStreamer; struct ContextImp; struct Context; +struct CommandQueue; struct DriverHandle; struct DriverHandleImp; struct Device; @@ -214,6 +215,7 @@ struct Event : _ze_event_handle_t { bool isInOrderExecEvent() const { return inOrderExecEvent; } NEO::GraphicsAllocation *getInOrderExecDataAllocation() const { return inOrderExecDataAllocation; } uint64_t getInOrderExecSignalValue() const { return inOrderExecSignalValue; } + void setLatestUsedCmdQueue(CommandQueue *newCmdQ); void setReferenceTs(NEO::TimeStampData ×tamp) { referenceTs = timestamp; } @@ -222,6 +224,8 @@ struct Event : _ze_event_handle_t { protected: Event(EventPool *eventPool, int index, Device *device) : device(device), eventPool(eventPool), index(index) {} + void unsetCmdQueue(bool unregisterClient); + uint64_t globalStartTS = 1; uint64_t globalEndTS = 1; uint64_t contextStartTS = 1; @@ -253,6 +257,7 @@ struct Event : _ze_event_handle_t { EventPool *eventPool = nullptr; Kernel *kernelWithPrintf = nullptr; NEO::GraphicsAllocation *inOrderExecDataAllocation = nullptr; + CommandQueue *latestUsedCmdQueue = nullptr; uint32_t maxKernelCount = 0; uint32_t kernelCount = 1u; diff --git a/level_zero/core/source/event/event_impl.inl b/level_zero/core/source/event/event_impl.inl index 7b166f85b0..b59c27d449 100644 --- a/level_zero/core/source/event/event_impl.inl +++ b/level_zero/core/source/event/event_impl.inl @@ -163,6 +163,7 @@ void EventImp::handleSuccessfulHostSynchronization() { } } this->setIsCompleted(); + unsetCmdQueue(true); for (auto &csr : csrs) { csr->getInternalAllocationStorage()->cleanAllocationList(csr->peekTaskCount(), NEO::AllocationUsage::TEMPORARY_ALLOCATION); } @@ -396,6 +397,7 @@ ze_result_t EventImp::reset() { inOrderExecDataAllocation = nullptr; inOrderExecSignalValue = 0; } + unsetCmdQueue(false); this->resetCompletionStatus(); this->resetDeviceCompletionData(false); this->l3FlushAppliedOnKernel.reset(); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp index 97994c2042..bffe52aebe 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp @@ -10,6 +10,7 @@ #include "shared/source/indirect_heap/indirect_heap.h" #include "shared/source/memory_manager/internal_allocation_storage.h" #include "shared/test/common/cmd_parse/gen_cmd_parse.h" +#include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/relaxed_ordering_commands_helper.h" #include "shared/test/common/helpers/unit_test_helper.h" #include "shared/test/common/libult/ult_command_stream_receiver.h" @@ -1108,6 +1109,113 @@ HWTEST2_F(CommandListCreate, whenDispatchingThenPassNumCsrClients, IsAtLeastXeHp EXPECT_EQ(ultCsr->latestFlushedBatchBuffer.numCsrClients, ultCsr->getNumClients()); } +HWTEST_F(CommandListCreate, givenSignalEventWhenCallingSynchronizeThenUnregisterClient) { + ze_command_queue_desc_t desc = {}; + desc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS; + ze_result_t returnValue; + std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue)); + ASSERT_NE(nullptr, commandList); + auto whiteBoxCmdList = static_cast(commandList.get()); + + Mock<::L0::Kernel> kernel; + ze_group_count_t groupCount{1, 1, 1}; + CmdListKernelLaunchParams launchParams = {}; + + auto ultCsr = static_cast *>(whiteBoxCmdList->csr); + + ze_event_pool_desc_t eventPoolDesc = {}; + eventPoolDesc.count = 3; + + ze_event_desc_t eventDesc = {}; + + ze_event_handle_t event1 = nullptr; + ze_event_handle_t event2 = nullptr; + ze_event_handle_t event3 = nullptr; + + std::unique_ptr eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); + + ASSERT_EQ(ZE_RESULT_SUCCESS, eventPool->createEvent(&eventDesc, &event1)); + ASSERT_EQ(ZE_RESULT_SUCCESS, eventPool->createEvent(&eventDesc, &event2)); + ASSERT_EQ(ZE_RESULT_SUCCESS, eventPool->createEvent(&eventDesc, &event3)); + + EXPECT_EQ(ultCsr->getNumClients(), 0u); + + { + commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, event1, 0, nullptr, launchParams, false); + EXPECT_EQ(ultCsr->getNumClients(), 1u); + + Event::fromHandle(event1)->setIsCompleted(); + + zeEventHostSynchronize(event1, std::numeric_limits::max()); + EXPECT_EQ(ultCsr->getNumClients(), 0u); + } + + { + commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, event2, 0, nullptr, launchParams, false); + EXPECT_EQ(ultCsr->getNumClients(), 1u); + + *reinterpret_cast(Event::fromHandle(event2)->getHostAddress()) = static_cast(Event::STATE_SIGNALED); + + zeEventHostSynchronize(event2, std::numeric_limits::max()); + EXPECT_EQ(ultCsr->getNumClients(), 0u); + } + + { + commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, event3, 0, nullptr, launchParams, false); + EXPECT_EQ(ultCsr->getNumClients(), 1u); + + zeEventHostReset(event3); + + zeEventHostSynchronize(event3, 1); + EXPECT_EQ(ultCsr->getNumClients(), 1u); + } + + zeEventDestroy(event1); + zeEventDestroy(event2); + zeEventDestroy(event3); +} + +HWTEST_F(CommandListCreate, givenDebugFlagSetWhenCallingSynchronizeThenDontUnregister) { + DebugManagerStateRestore restore; + DebugManager.flags.TrackNumCsrClientsOnSyncPoints.set(0); + + ze_command_queue_desc_t desc = {}; + desc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS; + ze_result_t returnValue; + std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue)); + ASSERT_NE(nullptr, commandList); + auto whiteBoxCmdList = static_cast(commandList.get()); + + Mock<::L0::Kernel> kernel; + ze_group_count_t groupCount{1, 1, 1}; + CmdListKernelLaunchParams launchParams = {}; + + auto ultCsr = static_cast *>(whiteBoxCmdList->csr); + + ze_event_pool_desc_t eventPoolDesc = {}; + eventPoolDesc.count = 1; + + ze_event_desc_t eventDesc = {}; + + ze_event_handle_t event = nullptr; + + std::unique_ptr eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); + + ASSERT_EQ(ZE_RESULT_SUCCESS, eventPool->createEvent(&eventDesc, &event)); + + EXPECT_EQ(ultCsr->getNumClients(), 0u); + commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, event, 0, nullptr, launchParams, false); + EXPECT_EQ(ultCsr->getNumClients(), 1u); + + Event::fromHandle(event)->setIsCompleted(); + + zeEventHostSynchronize(event, std::numeric_limits::max()); + + EXPECT_EQ(ultCsr->getNumClients(), 1u); + + zeEventDestroy(event); +} + HWTEST2_F(CommandListCreate, givenDirectSubmissionAndImmCmdListWhenDispatchingThenPassRelaxedOrderingDependenciesInfo, IsAtLeastXeHpcCore) { DebugManagerStateRestore restore; DebugManager.flags.DirectSubmissionRelaxedOrdering.set(1); diff --git a/shared/source/debug_settings/debug_variables_base.inl b/shared/source/debug_settings/debug_variables_base.inl index 962d31cda0..025490d3d8 100644 --- a/shared/source/debug_settings/debug_variables_base.inl +++ b/shared/source/debug_settings/debug_variables_base.inl @@ -245,6 +245,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, DebugSetMemoryDiagnosticsDelay, -1, "-1: default DECLARE_DEBUG_VARIABLE(int32_t, EnableDeviceStateVerification, -1, "-1: default, 0: disable, 1: enable check of device state before submit on Windows") DECLARE_DEBUG_VARIABLE(int32_t, EnableDynamicPostSyncAllocLayout, -1, "-1: default, 0: Keep Timestamp size layout, 1: Use write immediate layout (qword) and switch dynamically to TS for profiling") DECLARE_DEBUG_VARIABLE(int32_t, PrintTimestampPacketUsage, -1, "-1: default, 0: Disabled, 1: Print when TSP is allocated, initialized, returned to pool, etc.") +DECLARE_DEBUG_VARIABLE(int32_t, TrackNumCsrClientsOnSyncPoints, -1, "-1: default, 0: Disabled, 1: If set, synchronization points like zeEventHostSynchronize will unregister CmdQ from CSR clients") /*LOGGING FLAGS*/ DECLARE_DEBUG_VARIABLE(int32_t, PrintDriverDiagnostics, -1, "prints driver diagnostics messages to standard output, value corresponds to hint level") diff --git a/shared/test/common/test_files/igdrcl.config b/shared/test/common/test_files/igdrcl.config index d6e04f48d7..c1be2d3b32 100644 --- a/shared/test/common/test_files/igdrcl.config +++ b/shared/test/common/test_files/igdrcl.config @@ -536,5 +536,6 @@ VfBarResourceAllocationWa = 1 EnableDynamicPostSyncAllocLayout = -1 ForceNumberOfThreadsInGpgpuThreadGroup = -1 PrintTimestampPacketUsage = -1 +TrackNumCsrClientsOnSyncPoints = -1 CommandListTimestampRefreshIntervalInMilliSec = -1 # Please don't edit below this line