From 8b508fd15f697ee12e83b350b0c84c3b91f4b278 Mon Sep 17 00:00:00 2001 From: Jemale Lockett Date: Fri, 9 May 2025 15:48:51 +0000 Subject: [PATCH] feature: Add sync host event handling on windows Resolves: NEO-13744 Signed-off-by: Jemale Lockett --- .../tools/source/debug/debug_session_imp.cpp | 4 +- .../tools/source/debug/debug_session_imp.h | 2 +- .../source/debug/linux/debug_session.cpp | 5 +- .../tools/source/debug/linux/debug_session.h | 2 +- .../debug/linux/prelim/debug_session.cpp | 4 +- .../source/debug/linux/xe/debug_session.cpp | 4 +- .../source/debug/windows/debug_session.cpp | 140 +++++++++++------- .../source/debug/windows/debug_session.h | 3 +- .../sources/debug/mock_debug_session.h | 2 +- .../debug/windows/test_debug_api_windows.cpp | 39 +++++ 10 files changed, 146 insertions(+), 59 deletions(-) diff --git a/level_zero/tools/source/debug/debug_session_imp.cpp b/level_zero/tools/source/debug/debug_session_imp.cpp index 3725c3af1f..12b5eb3986 100644 --- a/level_zero/tools/source/debug/debug_session_imp.cpp +++ b/level_zero/tools/source/debug/debug_session_imp.cpp @@ -1682,7 +1682,9 @@ ze_result_t DebugSessionImp::isValidNode(uint64_t vmHandle, uint64_t gpuVa, SIP: ze_result_t DebugSessionImp::readFifo(uint64_t vmHandle, std::vector &threadsWithAttention) { auto stateSaveAreaHeader = getStateSaveAreaHeader(); - if (stateSaveAreaHeader->versionHeader.version.major != 3) { + if (!stateSaveAreaHeader) { + return ZE_RESULT_ERROR_UNKNOWN; + } else if (stateSaveAreaHeader->versionHeader.version.major != 3) { return ZE_RESULT_SUCCESS; } diff --git a/level_zero/tools/source/debug/debug_session_imp.h b/level_zero/tools/source/debug/debug_session_imp.h index b06827f566..81480fa257 100644 --- a/level_zero/tools/source/debug/debug_session_imp.h +++ b/level_zero/tools/source/debug/debug_session_imp.h @@ -180,7 +180,7 @@ struct DebugSessionImp : DebugSession { int64_t interruptTimeout = 2000; std::unordered_map attentionEventContext{}; std::chrono::milliseconds lastFifoReadTime = std::chrono::milliseconds(0); - virtual void updateStoppedThreadsAndCheckTriggerEvents(const AttentionEventFields &attention, uint32_t tileIndex, std::vector &threadsWithAttention) = 0; + virtual ze_result_t updateStoppedThreadsAndCheckTriggerEvents(const AttentionEventFields &attention, uint32_t tileIndex, std::vector &threadsWithAttention) = 0; std::chrono::high_resolution_clock::time_point interruptTime; std::atomic interruptSent = false; diff --git a/level_zero/tools/source/debug/linux/debug_session.cpp b/level_zero/tools/source/debug/linux/debug_session.cpp index 8e96251434..635ff7a8c0 100644 --- a/level_zero/tools/source/debug/linux/debug_session.cpp +++ b/level_zero/tools/source/debug/linux/debug_session.cpp @@ -684,10 +684,10 @@ ze_result_t DebugSessionLinux::getElfOffset(const zet_debug_memory_space_desc_t return status; } -void DebugSessionLinux::updateStoppedThreadsAndCheckTriggerEvents(const AttentionEventFields &attention, uint32_t tileIndex, std::vector &threadsWithAttention) { +ze_result_t DebugSessionLinux::updateStoppedThreadsAndCheckTriggerEvents(const AttentionEventFields &attention, uint32_t tileIndex, std::vector &threadsWithAttention) { auto vmHandle = getVmHandleFromClientAndlrcHandle(attention.clientHandle, attention.lrcHandle); if (vmHandle == invalidHandle) { - return; + return ZE_RESULT_ERROR_UNKNOWN; } auto hwInfo = connectedDevice->getHwInfo(); @@ -745,6 +745,7 @@ void DebugSessionLinux::updateStoppedThreadsAndCheckTriggerEvents(const Attentio } else { checkTriggerEventsForAttention(); } + return ZE_RESULT_SUCCESS; } ze_result_t DebugSessionLinux::getISAVMHandle(uint32_t deviceIndex, const zet_debug_memory_space_desc_t *desc, size_t size, uint64_t &vmHandle) { diff --git a/level_zero/tools/source/debug/linux/debug_session.h b/level_zero/tools/source/debug/linux/debug_session.h index 77e82f8d73..0a388d2b6f 100644 --- a/level_zero/tools/source/debug/linux/debug_session.h +++ b/level_zero/tools/source/debug/linux/debug_session.h @@ -253,7 +253,7 @@ struct DebugSessionLinux : DebugSessionImp { return allInstancesRemoved; } - void updateStoppedThreadsAndCheckTriggerEvents(const AttentionEventFields &attention, uint32_t tileIndex, std::vector &threadsWithAttention) override; + ze_result_t updateStoppedThreadsAndCheckTriggerEvents(const AttentionEventFields &attention, uint32_t tileIndex, std::vector &threadsWithAttention) override; virtual void updateContextAndLrcHandlesForThreadsWithAttention(EuThread::ThreadId threadId, const AttentionEventFields &attention) = 0; virtual uint64_t getVmHandleFromClientAndlrcHandle(uint64_t clientHandle, uint64_t lrcHandle) = 0; virtual std::unique_lock getThreadStateMutexForTileSession(uint32_t tileIndex) = 0; diff --git a/level_zero/tools/source/debug/linux/prelim/debug_session.cpp b/level_zero/tools/source/debug/linux/prelim/debug_session.cpp index a8e2eeea85..19ef5f4a7c 100644 --- a/level_zero/tools/source/debug/linux/prelim/debug_session.cpp +++ b/level_zero/tools/source/debug/linux/prelim/debug_session.cpp @@ -1003,7 +1003,9 @@ void DebugSessionLinuxi915::handleAttentionEvent(prelim_drm_i915_debug_event_eu_ attentionEventFields.contextHandle = attention->ctx_handle; attentionEventFields.lrcHandle = attention->lrc_handle; - return updateStoppedThreadsAndCheckTriggerEvents(attentionEventFields, tileIndex, threadsWithAttention); + if (updateStoppedThreadsAndCheckTriggerEvents(attentionEventFields, tileIndex, threadsWithAttention) != ZE_RESULT_SUCCESS) { + PRINT_DEBUGGER_ERROR_LOG("Failed to update stopped threads and check trigger events\n", ""); + } } std::unique_lock DebugSessionLinuxi915::getThreadStateMutexForTileSession(uint32_t tileIndex) { diff --git a/level_zero/tools/source/debug/linux/xe/debug_session.cpp b/level_zero/tools/source/debug/linux/xe/debug_session.cpp index 764171156f..e3eab7250c 100644 --- a/level_zero/tools/source/debug/linux/xe/debug_session.cpp +++ b/level_zero/tools/source/debug/linux/xe/debug_session.cpp @@ -749,7 +749,9 @@ void DebugSessionLinuxXe::handleAttentionEvent(NEO::EuDebugEventEuAttention *att attentionEventFields.contextHandle = attention->execQueueHandle; attentionEventFields.lrcHandle = attention->lrcHandle; - return updateStoppedThreadsAndCheckTriggerEvents(attentionEventFields, 0, threadsWithAttention); + if (updateStoppedThreadsAndCheckTriggerEvents(attentionEventFields, 0, threadsWithAttention) != ZE_RESULT_SUCCESS) { + PRINT_DEBUGGER_ERROR_LOG("Failed to update stopped threads and check trigger events\n", ""); + } } int DebugSessionLinuxXe::threadControlInterruptAll() { diff --git a/level_zero/tools/source/debug/windows/debug_session.cpp b/level_zero/tools/source/debug/windows/debug_session.cpp index 03f2e4acab..6acf1542dd 100644 --- a/level_zero/tools/source/debug/windows/debug_session.cpp +++ b/level_zero/tools/source/debug/windows/debug_session.cpp @@ -126,6 +126,7 @@ void *DebugSessionWindows::asyncThreadFunction(void *arg) { while (self->asyncThread.threadActive) { self->readAndHandleEvent(100); + self->pollFifo(); self->generateEventsAndResumeStoppedThreads(); self->sendInterrupts(); } @@ -195,6 +196,8 @@ ze_result_t DebugSessionWindows::readAndHandleEvent(uint64_t timeoutMs) { return handleDeviceCreateDestroyEvent(eventParamsBuffer.eventParamsBuffer.DeviceCreateDestroyEventParams); case DBGUMD_READ_EVENT_CREATE_DEBUG_DATA: return handleCreateDebugDataEvent(eventParamsBuffer.eventParamsBuffer.ReadCreateDebugDataParams); + case DBGUMD_READ_EVENT_SYNC_HOST: + return handleSyncHostEvent(eventParamsBuffer.eventParamsBuffer.SyncHostDataParams); default: break; } @@ -242,57 +245,15 @@ ze_result_t DebugSessionWindows::handleEuAttentionBitsEvent(DBGUMD_READ_EVENT_EU euAttentionBitsParams.hContextHandle, euAttentionBitsParams.LRCA, euAttentionBitsParams.BitMaskSizeInBytes, &euAttentionBitsParams.BitmaskArrayPtr); - auto hwInfo = connectedDevice->getHwInfo(); - auto &l0GfxCoreHelper = connectedDevice->getNEODevice()->getRootDeviceEnvironment().getHelper(); + std::vector threadsWithAttention; + newAttentionRaised(); + AttentionEventFields attentionEventFields; + attentionEventFields.bitmask = reinterpret_cast(&euAttentionBitsParams.BitmaskArrayPtr); + attentionEventFields.bitmaskSize = euAttentionBitsParams.BitMaskSizeInBytes; + attentionEventFields.contextHandle = euAttentionBitsParams.hContextHandle; + attentionEventFields.lrcHandle = euAttentionBitsParams.LRCA; - auto threadsWithAttention = l0GfxCoreHelper.getThreadsFromAttentionBitmask(hwInfo, 0u, - reinterpret_cast(&euAttentionBitsParams.BitmaskArrayPtr), - euAttentionBitsParams.BitMaskSizeInBytes); - - printBitmask(reinterpret_cast(&euAttentionBitsParams.BitmaskArrayPtr), euAttentionBitsParams.BitMaskSizeInBytes); - - PRINT_DEBUGGER_THREAD_LOG("ATTENTION received for thread count = %d\n", (int)threadsWithAttention.size()); - - if (threadsWithAttention.size() > 0) { - - uint64_t memoryHandle = DebugSessionWindows::invalidHandle; - { - if (allContexts.empty()) { - return ZE_RESULT_ERROR_UNINITIALIZED; - } - memoryHandle = *allContexts.begin(); - } - - auto gpuVa = getContextStateSaveAreaGpuVa(memoryHandle); - auto stateSaveAreaSize = getContextStateSaveAreaSize(memoryHandle); - auto stateSaveReadResult = ZE_RESULT_ERROR_UNKNOWN; - - std::unique_lock lock(threadStateMutex); - - if (gpuVa != 0 && stateSaveAreaSize != 0) { - std::vector newThreads; - getNotStoppedThreads(threadsWithAttention, newThreads); - if (newThreads.size() > 0) { - allocateStateSaveAreaMemory(stateSaveAreaSize); - stateSaveReadResult = readGpuMemory(memoryHandle, stateSaveAreaMemory.data(), stateSaveAreaSize, gpuVa); - } - } else { - PRINT_DEBUGGER_ERROR_LOG("Context state save area bind info invalid\n", ""); - DEBUG_BREAK_IF(true); - } - - if (stateSaveReadResult == ZE_RESULT_SUCCESS) { - - for (auto &threadId : threadsWithAttention) { - PRINT_DEBUGGER_THREAD_LOG("ATTENTION event for thread: %s\n", EuThread::toString(threadId).c_str()); - addThreadToNewlyStoppedFromRaisedAttention(threadId, memoryHandle, stateSaveAreaMemory.data()); - } - } - } - - checkTriggerEventsForAttention(); - - return ZE_RESULT_SUCCESS; + return updateStoppedThreadsAndCheckTriggerEvents(attentionEventFields, 0, threadsWithAttention); } ze_result_t DebugSessionWindows::handleAllocationDataEvent(uint32_t seqNo, DBGUMD_READ_EVENT_READ_ALLOCATION_DATA_PARAMS &allocationDataParams) { @@ -395,6 +356,85 @@ ze_result_t DebugSessionWindows::handleCreateDebugDataEvent(DBGUMD_READ_EVENT_CR return ZE_RESULT_SUCCESS; } +ze_result_t DebugSessionWindows::handleSyncHostEvent(DBGUMD_READ_EVENT_SYNC_HOST_DATA_PARAMS &readEventSyncHostDataParams) { + PRINT_DEBUGGER_INFO_LOG("DBGUMD_READ_EVENT_SYNC_HOST: hContextHandle=0x%llX\n", + readEventSyncHostDataParams.hContextHandle); + + uint64_t memoryHandle = DebugSessionWindows::invalidHandle; + { + std::unique_lock lock(asyncThreadMutex); + if (allContexts.empty()) { + return ZE_RESULT_ERROR_UNINITIALIZED; + } + memoryHandle = *allContexts.begin(); + } + + AttentionEventFields attentionEventFields = {}; + attentionEventFields.clientHandle = debugHandle; + attentionEventFields.contextHandle = readEventSyncHostDataParams.hContextHandle; + + attentionEventContext[memoryHandle] = attentionEventFields; + + handleStoppedThreads(); + return ZE_RESULT_SUCCESS; +} + +ze_result_t DebugSessionWindows::updateStoppedThreadsAndCheckTriggerEvents(const AttentionEventFields &attention, uint32_t tileIndex, std::vector &threadsWithAttention) { + auto hwInfo = connectedDevice->getHwInfo(); + auto &l0GfxCoreHelper = connectedDevice->getL0GfxCoreHelper(); + + if (threadsWithAttention.size() == 0) { + threadsWithAttention = l0GfxCoreHelper.getThreadsFromAttentionBitmask(hwInfo, 0u, + attention.bitmask, attention.bitmaskSize); + + printBitmask(attention.bitmask, attention.bitmaskSize); + } + + PRINT_DEBUGGER_THREAD_LOG("ATTENTION for tile = %d thread count = %d\n", tileIndex, (int)threadsWithAttention.size()); + + if (threadsWithAttention.size() > 0) { + + uint64_t memoryHandle = DebugSessionWindows::invalidHandle; + { + std::unique_lock lock(asyncThreadMutex); + if (allContexts.empty()) { + PRINT_DEBUGGER_ERROR_LOG("No contexts found\n", ""); + return ZE_RESULT_ERROR_UNINITIALIZED; + } + memoryHandle = *allContexts.begin(); + } + + auto gpuVa = getContextStateSaveAreaGpuVa(memoryHandle); + auto stateSaveAreaSize = getContextStateSaveAreaSize(memoryHandle); + auto stateSaveReadResult = ZE_RESULT_ERROR_UNKNOWN; + + std::unique_lock lock(threadStateMutex); + + if (gpuVa != 0 && stateSaveAreaSize != 0) { + std::vector newThreads; + getNotStoppedThreads(threadsWithAttention, newThreads); + if (newThreads.size() > 0) { + allocateStateSaveAreaMemory(stateSaveAreaSize); + stateSaveReadResult = readGpuMemory(memoryHandle, stateSaveAreaMemory.data(), stateSaveAreaSize, gpuVa); + } + } else { + PRINT_DEBUGGER_ERROR_LOG("Context state save area bind info invalid\n", ""); + DEBUG_BREAK_IF(true); + } + + if (stateSaveReadResult == ZE_RESULT_SUCCESS) { + + for (auto &threadId : threadsWithAttention) { + PRINT_DEBUGGER_THREAD_LOG("ATTENTION event for thread: %s\n", EuThread::toString(threadId).c_str()); + addThreadToNewlyStoppedFromRaisedAttention(threadId, memoryHandle, stateSaveAreaMemory.data()); + } + } + } + + checkTriggerEventsForAttention(); + return ZE_RESULT_SUCCESS; +} + ze_result_t DebugSessionWindows::acknowledgeEventImp(uint32_t seqNo, uint32_t eventType) { PRINT_DEBUGGER_INFO_LOG("DBGUMD_ACTION_ACKNOWLEDGE_EVENT: seqNo: %d eventType: %d\n", seqNo, eventType); KM_ESCAPE_INFO escapeInfo = {}; diff --git a/level_zero/tools/source/debug/windows/debug_session.h b/level_zero/tools/source/debug/windows/debug_session.h index 8c8922ff4d..6569284daa 100644 --- a/level_zero/tools/source/debug/windows/debug_session.h +++ b/level_zero/tools/source/debug/windows/debug_session.h @@ -78,6 +78,7 @@ struct DebugSessionWindows : DebugSessionImp { ze_result_t handleContextCreateDestroyEvent(DBGUMD_READ_EVENT_CONTEXT_CREATE_DESTROY_EVENT_PARAMS &contextCreateDestroyParams); ze_result_t handleDeviceCreateDestroyEvent(DBGUMD_READ_EVENT_DEVICE_CREATE_DESTROY_EVENT_PARAMS &deviceCreateDestroyParams); ze_result_t handleCreateDebugDataEvent(DBGUMD_READ_EVENT_CREATE_DEBUG_DATA_PARAMS &createDebugDataParams); + ze_result_t handleSyncHostEvent(DBGUMD_READ_EVENT_SYNC_HOST_DATA_PARAMS &syncHostDataParams); ze_result_t readAllocationDebugData(uint32_t seqNo, uint64_t umdDataBufferPtr, void *outBuf, size_t outBufSize); void enqueueApiEvent(zet_debug_event_t &debugEvent) override; @@ -95,7 +96,7 @@ struct DebugSessionWindows : DebugSessionImp { UNRECOVERABLE_IF(true); } - void updateStoppedThreadsAndCheckTriggerEvents(const AttentionEventFields &attention, uint32_t tileIndex, std::vector &threadsWithAttention) override {} + ze_result_t updateStoppedThreadsAndCheckTriggerEvents(const AttentionEventFields &attention, uint32_t tileIndex, std::vector &threadsWithAttention) override; static void *asyncThreadFunction(void *arg); diff --git a/level_zero/tools/test/unit_tests/sources/debug/mock_debug_session.h b/level_zero/tools/test/unit_tests/sources/debug/mock_debug_session.h index ae1c04b052..7edd8d817f 100644 --- a/level_zero/tools/test/unit_tests/sources/debug/mock_debug_session.h +++ b/level_zero/tools/test/unit_tests/sources/debug/mock_debug_session.h @@ -351,7 +351,7 @@ struct MockDebugSession : public L0::DebugSessionImp { return DebugSessionImp::readThreadScratchRegisters(thread, start, count, pRegisterValues); } - void updateStoppedThreadsAndCheckTriggerEvents(const AttentionEventFields &attention, uint32_t tileIndex, std::vector &threadsWithAttention) override {} + ze_result_t updateStoppedThreadsAndCheckTriggerEvents(const AttentionEventFields &attention, uint32_t tileIndex, std::vector &threadsWithAttention) override { return ZE_RESULT_SUCCESS; } void resumeAccidentallyStoppedThreads(const std::vector &threadIds) override { resumeAccidentallyStoppedCalled++; return DebugSessionImp::resumeAccidentallyStoppedThreads(threadIds); diff --git a/level_zero/tools/test/unit_tests/sources/debug/windows/test_debug_api_windows.cpp b/level_zero/tools/test/unit_tests/sources/debug/windows/test_debug_api_windows.cpp index 2cbabd5805..47d0419101 100644 --- a/level_zero/tools/test/unit_tests/sources/debug/windows/test_debug_api_windows.cpp +++ b/level_zero/tools/test/unit_tests/sources/debug/windows/test_debug_api_windows.cpp @@ -33,6 +33,7 @@ struct MockDebugSessionWindows : DebugSessionWindows { using DebugSessionWindows::allModules; using DebugSessionWindows::allThreads; using DebugSessionWindows::asyncThread; + using DebugSessionWindows::attentionEventContext; using DebugSessionWindows::calculateThreadSlotOffset; using DebugSessionWindows::closeAsyncThread; using DebugSessionWindows::debugArea; @@ -2021,5 +2022,43 @@ TEST_F(DebugApiWindowsTest, GivenResumeImpCalledThenBitmaskIsCorrect) { EXPECT_EQ(0u, bitmask[4]); } +TEST_F(DebugApiWindowsTest, givenSyncHostEventReceivedThenEventIsHandledAndAttentionEventContextUpdated) { + zet_debug_config_t config = {}; + config.pid = 0x1234; + + auto session = std::make_unique(config, device); + session->wddm = mockWddm; + + session->allContexts = {}; + session->allContexts.insert(0x01); + auto &l0GfxCoreHelper = neoDevice->getRootDeviceEnvironment().getHelper(); + if (l0GfxCoreHelper.threadResumeRequiresUnlock()) { + mockWddm->numEvents = 1; + mockWddm->eventQueue[0].readEventType = DBGUMD_READ_EVENT_SYNC_HOST; + mockWddm->eventQueue[0].eventParamsBuffer.eventParamsBuffer.SyncHostDataParams.hContextHandle = 0x12345; + EXPECT_EQ(ZE_RESULT_SUCCESS, session->readAndHandleEvent(100)); + EXPECT_EQ(1u, session->attentionEventContext.size()); + } +} + +TEST_F(DebugApiWindowsTest, givenErrorCasesWhenHandlingSyncHostThenErrorIsReturned) { + zet_debug_config_t config = {}; + config.pid = 0x1234; + + auto session = std::make_unique(config, device); + session->wddm = mockWddm; + + session->allContexts = {}; + + auto &l0GfxCoreHelper = neoDevice->getRootDeviceEnvironment().getHelper(); + if (l0GfxCoreHelper.threadResumeRequiresUnlock()) { + mockWddm->numEvents = 1; + mockWddm->eventQueue[0].readEventType = DBGUMD_READ_EVENT_SYNC_HOST; + mockWddm->eventQueue[0].eventParamsBuffer.eventParamsBuffer.SyncHostDataParams.hContextHandle = 0x12345; + + EXPECT_EQ(ZE_RESULT_ERROR_UNINITIALIZED, session->readAndHandleEvent(100)); + } +} + } // namespace ult } // namespace L0