From 171f1e27a3f92714ccbce5c4912c342f5076aaef Mon Sep 17 00:00:00 2001 From: Jitendra Sharma Date: Tue, 22 Oct 2024 18:31:55 +0000 Subject: [PATCH] fix: Add debug variables for configurable timeouts in debugger Related-To: NEO-13046 Signed-off-by: Jitendra Sharma --- .../tools/source/debug/debug_session_imp.cpp | 2 ++ .../tools/source/debug/debug_session_imp.h | 2 +- .../source/debug/linux/debug_session.cpp | 2 -- .../tools/source/debug/linux/debug_session.h | 1 + .../source/debug/linux/xe/debug_session.cpp | 12 +++++-- .../sources/debug/debug_session_tests.cpp | 6 ++-- .../prelim/debug_session_fixtures_linux.h | 1 + .../linux/prelim/test_debug_api_linux.cpp | 2 +- .../prelim/tile_debug_session_linux_tests.cpp | 2 +- .../xe/debug_session_fixtures_linux_xe.h | 2 ++ .../linux/xe/test_debug_api_linux_xe.cpp | 36 +++++++++++++++++-- .../sources/debug/mock_debug_session.h | 3 +- .../debug_settings/debug_variables_base.inl | 4 ++- shared/test/common/test_files/igdrcl.config | 4 ++- 14 files changed, 62 insertions(+), 17 deletions(-) diff --git a/level_zero/tools/source/debug/debug_session_imp.cpp b/level_zero/tools/source/debug/debug_session_imp.cpp index 8999268110..5bab39ace4 100644 --- a/level_zero/tools/source/debug/debug_session_imp.cpp +++ b/level_zero/tools/source/debug/debug_session_imp.cpp @@ -1757,7 +1757,9 @@ void DebugSessionImp::pollFifo() { auto timeSinceLastFifoRead = currentTime - lastFifoReadTime; if (timeSinceLastFifoRead.count() > fifoPollInterval) { + PRINT_DEBUGGER_FIFO_LOG("%s", "Polling FIFO start\n"); handleStoppedThreads(); + PRINT_DEBUGGER_FIFO_LOG("%s", "Polling FIFO ends\n"); } } diff --git a/level_zero/tools/source/debug/debug_session_imp.h b/level_zero/tools/source/debug/debug_session_imp.h index 0172cae9b4..dc6f26e6fd 100644 --- a/level_zero/tools/source/debug/debug_session_imp.h +++ b/level_zero/tools/source/debug/debug_session_imp.h @@ -70,7 +70,6 @@ struct DebugSessionImp : DebugSession { static const SIP::regset_desc *getDebugScratchRegsetDesc(); static const SIP::regset_desc *getThreadScratchRegsetDesc(); static uint32_t typeToRegsetFlags(uint32_t type); - constexpr static int64_t interruptTimeout = 2000; using ApiEventQueue = std::queue; @@ -176,6 +175,7 @@ struct DebugSessionImp : DebugSession { void handleStoppedThreads(); void pollFifo(); int32_t fifoPollInterval = 150; + int64_t interruptTimeout = 2000; std::unordered_map attentionEventContext{}; std::chrono::milliseconds lastFifoReadTime = std::chrono::milliseconds(0); virtual void updateStoppedThreadsAndCheckTriggerEvents(const AttentionEventFields &attention, uint32_t tileIndex, std::vector &threadsWithAttention) = 0; diff --git a/level_zero/tools/source/debug/linux/debug_session.cpp b/level_zero/tools/source/debug/linux/debug_session.cpp index 8e81249a5d..3247b82d99 100644 --- a/level_zero/tools/source/debug/linux/debug_session.cpp +++ b/level_zero/tools/source/debug/linux/debug_session.cpp @@ -337,7 +337,6 @@ ze_result_t DebugSessionLinux::readGpuMemory(uint64_t vmHandle, char *output, si } else { size_t pendingSize = size; uint8_t retry = 0; - const uint8_t maxRetries = 3; size_t retrySize = size; do { PRINT_DEBUGGER_MEM_ACCESS_LOG("Reading (pread) memory from gpu va = %#" PRIx64 ", size = %zu\n", gpuVa, pendingSize); @@ -400,7 +399,6 @@ ze_result_t DebugSessionLinux::writeGpuMemory(uint64_t vmHandle, const char *inp } else { size_t pendingSize = size; uint8_t retry = 0; - const uint8_t maxRetries = 3; size_t retrySize = size; do { PRINT_DEBUGGER_MEM_ACCESS_LOG("Writing (pwrite) memory to gpu va = %#" PRIx64 ", size = %zu\n", gpuVa, pendingSize); diff --git a/level_zero/tools/source/debug/linux/debug_session.h b/level_zero/tools/source/debug/linux/debug_session.h index 331e122dfd..3b6b1f8ca6 100644 --- a/level_zero/tools/source/debug/linux/debug_session.h +++ b/level_zero/tools/source/debug/linux/debug_session.h @@ -250,6 +250,7 @@ struct DebugSessionLinux : DebugSessionImp { }; void handlePageFaultEvent(PageFaultEvent &pfEvent); + uint8_t maxRetries = 3; std::unique_ptr ioctlHandler; }; } // namespace L0 \ No newline at end of file diff --git a/level_zero/tools/source/debug/linux/xe/debug_session.cpp b/level_zero/tools/source/debug/linux/xe/debug_session.cpp index 4620d9198f..f87d706c7f 100644 --- a/level_zero/tools/source/debug/linux/xe/debug_session.cpp +++ b/level_zero/tools/source/debug/linux/xe/debug_session.cpp @@ -75,17 +75,23 @@ bool DebugSessionLinuxXe::handleInternalEvent() { void *DebugSessionLinuxXe::asyncThreadFunction(void *arg) { DebugSessionLinuxXe *self = reinterpret_cast(arg); - if (NEO::debugManager.flags.FifoPollInterval.get() != -1) { - self->fifoPollInterval = NEO::debugManager.flags.FifoPollInterval.get(); + if (NEO::debugManager.flags.DebugUmdFifoPollInterval.get() != -1) { + self->fifoPollInterval = NEO::debugManager.flags.DebugUmdFifoPollInterval.get(); + } + if (NEO::debugManager.flags.DebugUmdInterruptTimeout.get() != -1) { + self->interruptTimeout = NEO::debugManager.flags.DebugUmdInterruptTimeout.get(); + } + if (NEO::debugManager.flags.DebugUmdMaxReadWriteRetry.get() != -1) { + self->maxRetries = NEO::debugManager.flags.DebugUmdMaxReadWriteRetry.get(); } PRINT_DEBUGGER_INFO_LOG("Debugger async thread start\n", ""); while (self->asyncThread.threadActive) { self->handleEventsAsync(); + self->pollFifo(); self->generateEventsAndResumeStoppedThreads(); self->sendInterrupts(); - self->pollFifo(); } PRINT_DEBUGGER_INFO_LOG("Debugger async thread closing\n", ""); diff --git a/level_zero/tools/test/unit_tests/sources/debug/debug_session_tests.cpp b/level_zero/tools/test/unit_tests/sources/debug/debug_session_tests.cpp index 91c0df9ef8..4b30992e44 100644 --- a/level_zero/tools/test/unit_tests/sources/debug/debug_session_tests.cpp +++ b/level_zero/tools/test/unit_tests/sources/debug/debug_session_tests.cpp @@ -621,7 +621,7 @@ TEST(DebugSessionTest, givenPendingInterruptAfterTimeoutWhenGenerateEventsAndRes sessionMock->triggerEvents = false; sessionMock->interruptSent = true; - sessionMock->returnTimeDiff = 5 * DebugSessionImp::interruptTimeout; + sessionMock->returnTimeDiff = 5 * sessionMock->interruptTimeout; sessionMock->generateEventsAndResumeStoppedThreads(); @@ -651,7 +651,7 @@ TEST(DebugSessionTest, givenPendingInterruptBeforeTimeoutWhenGenerateEventsAndRe sessionMock->triggerEvents = false; sessionMock->interruptSent = true; - sessionMock->returnTimeDiff = DebugSessionImp::interruptTimeout / 2; + sessionMock->returnTimeDiff = sessionMock->interruptTimeout / 2; sessionMock->generateEventsAndResumeStoppedThreads(); @@ -1963,7 +1963,7 @@ TEST_F(DebugSessionTestSwFifoFixture, GivenSwFifoWithHeadAndTailIndexEqualWhenPo stateSaveAreaHeaderPtr->regHeaderV3.fifo_head = fifoTail; DebugManagerStateRestore stateRestore; - debugManager.flags.FifoPollInterval.set(0); + debugManager.flags.DebugUmdFifoPollInterval.set(0); session->attentionEventContext[10] = {11, 12, 1}; session->pollFifo(); diff --git a/level_zero/tools/test/unit_tests/sources/debug/linux/prelim/debug_session_fixtures_linux.h b/level_zero/tools/test/unit_tests/sources/debug/linux/prelim/debug_session_fixtures_linux.h index f78a5de338..81496b84af 100644 --- a/level_zero/tools/test/unit_tests/sources/debug/linux/prelim/debug_session_fixtures_linux.h +++ b/level_zero/tools/test/unit_tests/sources/debug/linux/prelim/debug_session_fixtures_linux.h @@ -140,6 +140,7 @@ struct MockDebugSessionLinuxi915 : public L0::DebugSessionLinuxi915 { using L0::DebugSessionImp::fillResumeAndStoppedThreadsFromNewlyStopped; using L0::DebugSessionImp::generateEventsForPendingInterrupts; using L0::DebugSessionImp::interruptSent; + using L0::DebugSessionImp::interruptTimeout; using L0::DebugSessionImp::isValidGpuAddress; using L0::DebugSessionImp::newAttentionRaised; using L0::DebugSessionImp::sipSupportsSlm; diff --git a/level_zero/tools/test/unit_tests/sources/debug/linux/prelim/test_debug_api_linux.cpp b/level_zero/tools/test/unit_tests/sources/debug/linux/prelim/test_debug_api_linux.cpp index 586d72edcb..6833c1e62e 100644 --- a/level_zero/tools/test/unit_tests/sources/debug/linux/prelim/test_debug_api_linux.cpp +++ b/level_zero/tools/test/unit_tests/sources/debug/linux/prelim/test_debug_api_linux.cpp @@ -7185,7 +7185,7 @@ TEST_F(DebugApiLinuxAsyncThreadTest, GivenInterruptedThreadsWhenNoAttentionEvent auto handler = new MockIoctlHandlerI915; session->ioctlHandler.reset(handler); - session->returnTimeDiff = DebugSessionLinuxi915::interruptTimeout * 10; + session->returnTimeDiff = session->interruptTimeout * 10; session->synchronousInternalEventRead = true; ze_device_thread_t thread = {0, 0, 0, UINT32_MAX}; diff --git a/level_zero/tools/test/unit_tests/sources/debug/linux/prelim/tile_debug_session_linux_tests.cpp b/level_zero/tools/test/unit_tests/sources/debug/linux/prelim/tile_debug_session_linux_tests.cpp index e43eb78dd0..29b9cb1a03 100644 --- a/level_zero/tools/test/unit_tests/sources/debug/linux/prelim/tile_debug_session_linux_tests.cpp +++ b/level_zero/tools/test/unit_tests/sources/debug/linux/prelim/tile_debug_session_linux_tests.cpp @@ -1149,7 +1149,7 @@ using TileAttachAsyncThreadTest = Test>; TEST_F(TileAttachAsyncThreadTest, GivenInterruptedThreadsWhenNoAttentionEventIsReadThenThreadUnavailableEventIsGenerated) { rootSession->tileSessions[0].second = true; - tileSessions[0]->returnTimeDiff = DebugSessionLinuxi915::interruptTimeout * 10; + tileSessions[0]->returnTimeDiff = rootSession->interruptTimeout * 10; ze_device_thread_t thread = {0, 0, 0, 0}; auto result = tileSessions[0]->interrupt(thread); diff --git a/level_zero/tools/test/unit_tests/sources/debug/linux/xe/debug_session_fixtures_linux_xe.h b/level_zero/tools/test/unit_tests/sources/debug/linux/xe/debug_session_fixtures_linux_xe.h index cf6ac91687..10664a545a 100644 --- a/level_zero/tools/test/unit_tests/sources/debug/linux/xe/debug_session_fixtures_linux_xe.h +++ b/level_zero/tools/test/unit_tests/sources/debug/linux/xe/debug_session_fixtures_linux_xe.h @@ -150,10 +150,12 @@ struct MockDebugSessionLinuxXe : public L0::DebugSessionLinuxXe { using L0::DebugSessionImp::expectedAttentionEvents; using L0::DebugSessionImp::fifoPollInterval; using L0::DebugSessionImp::interruptSent; + using L0::DebugSessionImp::interruptTimeout; using L0::DebugSessionImp::readFifo; using L0::DebugSessionImp::stateSaveAreaHeader; using L0::DebugSessionImp::triggerEvents; using L0::DebugSessionLinux::getClientConnection; + using L0::DebugSessionLinux::maxRetries; using L0::DebugSessionLinux::updateStoppedThreadsAndCheckTriggerEvents; using L0::DebugSessionLinuxXe::addThreadToNewlyStoppedFromRaisedAttentionForTileSession; using L0::DebugSessionLinuxXe::asyncThread; diff --git a/level_zero/tools/test/unit_tests/sources/debug/linux/xe/test_debug_api_linux_xe.cpp b/level_zero/tools/test/unit_tests/sources/debug/linux/xe/test_debug_api_linux_xe.cpp index 44adfcd55d..bc3d2dab87 100644 --- a/level_zero/tools/test/unit_tests/sources/debug/linux/xe/test_debug_api_linux_xe.cpp +++ b/level_zero/tools/test/unit_tests/sources/debug/linux/xe/test_debug_api_linux_xe.cpp @@ -2244,7 +2244,7 @@ TEST_F(DebugApiLinuxTestXe, GivenInterruptedThreadsWhenNoAttentionEventIsReadThe auto handler = new MockIoctlHandlerXe; session->ioctlHandler.reset(handler); - session->returnTimeDiff = DebugSessionLinuxXe::interruptTimeout * 10; + session->returnTimeDiff = session->interruptTimeout * 10; session->synchronousInternalEventRead = true; ze_device_thread_t thread = {0, 0, 0, UINT32_MAX}; @@ -2308,9 +2308,9 @@ TEST_F(DebugApiLinuxTestXe, GivenBindInfoForVmHandleWhenReadingModuleDebugAreaTh EXPECT_EQ(4u, session->debugArea.pgsize); } -TEST_F(DebugApiLinuxTestXe, GivenFifoPollEnvironmentVariableWhenAsyncThreadLaunchedThenFifoPollIntervalUpdated) { +TEST_F(DebugApiLinuxTestXe, GivenFifoPollEnvironmentVariableWhenAsyncThreadLaunchedThenDebugUmdFifoPollIntervalUpdated) { DebugManagerStateRestore stateRestore; - NEO::debugManager.flags.FifoPollInterval.set(100); + NEO::debugManager.flags.DebugUmdFifoPollInterval.set(100); auto session = std::make_unique(zet_debug_config_t{0x1234}, device, 10); ASSERT_NE(nullptr, session); @@ -2323,6 +2323,36 @@ TEST_F(DebugApiLinuxTestXe, GivenFifoPollEnvironmentVariableWhenAsyncThreadLaunc session->closeAsyncThread(); } +TEST_F(DebugApiLinuxTestXe, GivenInterruptTimeoutProvidedByDebugVariablesWhenAsyncThreadLaunchedThenInterruptTimeoutCorrectlyRead) { + DebugManagerStateRestore stateRestore; + NEO::debugManager.flags.DebugUmdInterruptTimeout.set(5000); + auto session = std::make_unique(zet_debug_config_t{0x1234}, device, 10); + ASSERT_NE(nullptr, session); + + EXPECT_EQ(2000, session->interruptTimeout); + session->asyncThread.threadActive = false; + session->asyncThreadFunction(session.get()); + + EXPECT_EQ(5000, session->interruptTimeout); + + session->closeAsyncThread(); +} + +TEST_F(DebugApiLinuxTestXe, GivenMaxRetriesProvidedByDebugVariablesWhenAsyncThreadLaunchedThenMaxRetriesCorrectlyRead) { + DebugManagerStateRestore stateRestore; + NEO::debugManager.flags.DebugUmdMaxReadWriteRetry.set(10); + auto session = std::make_unique(zet_debug_config_t{0x1234}, device, 10); + ASSERT_NE(nullptr, session); + + EXPECT_EQ(3, session->maxRetries); + session->asyncThread.threadActive = false; + session->asyncThreadFunction(session.get()); + + EXPECT_EQ(10, session->maxRetries); + + session->closeAsyncThread(); +} + TEST(DebugSessionLinuxXeTest, GivenRootDebugSessionWhenCreateTileSessionCalledThenSessionIsNotCreated) { auto hwInfo = *NEO::defaultHwInfo.get(); NEO::MockDevice *neoDevice(NEO::MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); diff --git a/level_zero/tools/test/unit_tests/sources/debug/mock_debug_session.h b/level_zero/tools/test/unit_tests/sources/debug/mock_debug_session.h index 3b8826db6e..23fa12c60f 100644 --- a/level_zero/tools/test/unit_tests/sources/debug/mock_debug_session.h +++ b/level_zero/tools/test/unit_tests/sources/debug/mock_debug_session.h @@ -161,6 +161,7 @@ struct MockDebugSession : public L0::DebugSessionImp { using L0::DebugSessionImp::generateEventsForStoppedThreads; using L0::DebugSessionImp::getRegisterSize; using L0::DebugSessionImp::getStateSaveAreaHeader; + using L0::DebugSessionImp::interruptTimeout; using L0::DebugSessionImp::isValidNode; using L0::DebugSessionImp::newAttentionRaised; using L0::DebugSessionImp::pollFifo; @@ -586,7 +587,7 @@ struct MockDebugSession : public L0::DebugSessionImp { std::vector readMemoryBuffer; uint64_t regs[16]; - int returnTimeDiff = -1; + int64_t returnTimeDiff = -1; bool returnStateSaveAreaGpuVa = true; bool forceZeroStateSaveAreaSize = false; diff --git a/shared/source/debug_settings/debug_variables_base.inl b/shared/source/debug_settings/debug_variables_base.inl index 0936236025..d89ba257e9 100644 --- a/shared/source/debug_settings/debug_variables_base.inl +++ b/shared/source/debug_settings/debug_variables_base.inl @@ -294,7 +294,9 @@ DECLARE_DEBUG_VARIABLE(int32_t, ForceComputeWalkerPostSyncFlushWithWrite, -1, "- DECLARE_DEBUG_VARIABLE(int32_t, DeferStateInitSubmissionToFirstRegularUsage, -1, "-1: ignore, 0: disabled, 1: enabled. If set, instead of initializing at Device creation, submit initial state during first usage (eg. kernel submission)") DECLARE_DEBUG_VARIABLE(int32_t, ForceNonWalkerSplitMemoryCopy, -1, "-1: default, 0: disabled, 1: enabled. If set, memory copy will be executed as single byte copy Walker without performance optimizations") DECLARE_DEBUG_VARIABLE(int32_t, OverrideTimestampWidth, -1, "-1: default from KMD, > 0: Override timestamp width used for profiling. Requires XeKMD kernel.") -DECLARE_DEBUG_VARIABLE(int32_t, FifoPollInterval, -1, "-1: default , > 0: Fifo will be polled based on input in milliseconds.") +DECLARE_DEBUG_VARIABLE(int32_t, DebugUmdFifoPollInterval, -1, "-1: default , > 0: Fifo will be polled based on input in milliseconds.") +DECLARE_DEBUG_VARIABLE(int32_t, DebugUmdInterruptTimeout, -1, "-1: default , > 0: Fifo will be polled based on input in milliseconds. Default is 2000 milliseconds") +DECLARE_DEBUG_VARIABLE(int32_t, DebugUmdMaxReadWriteRetry, -1, "-1: default , > 0: Fifo will be polled based on input in milliseconds. Default is 3") /*LOGGING FLAGS*/ DECLARE_DEBUG_VARIABLE(int32_t, PrintDriverDiagnostics, -1, "prints driver diagnostics messages to standard output, value corresponds to hint level") diff --git a/shared/test/common/test_files/igdrcl.config b/shared/test/common/test_files/igdrcl.config index 0b44c0c9b5..c419dd4b1a 100644 --- a/shared/test/common/test_files/igdrcl.config +++ b/shared/test/common/test_files/igdrcl.config @@ -628,8 +628,10 @@ ForceNonWalkerSplitMemoryCopy = -1 DirectSubmissionSwitchSemaphoreMode = -1 OverrideTimestampWidth = -1 IgnoreZebinUnknownAttributes = 0 -FifoPollInterval = -1 +DebugUmdFifoPollInterval = -1 MaxSubSlicesSupportedOverride = -1 ForceWddmHugeChunkSizeMB = -1 DirectSubmissionControllerIdleDetection = -1 +DebugUmdInterruptTimeout = -1 +DebugUmdMaxReadWriteRetry = -1 # Please don't edit below this line