diff --git a/level_zero/tools/source/debug/debug_session_imp.cpp b/level_zero/tools/source/debug/debug_session_imp.cpp index 15ac868f5d..42c074fde0 100644 --- a/level_zero/tools/source/debug/debug_session_imp.cpp +++ b/level_zero/tools/source/debug/debug_session_imp.cpp @@ -821,7 +821,12 @@ void DebugSessionImp::fillResumeAndStoppedThreadsFromNewlyStopped(std::vectorgetPageFault()) { + const uint32_t cr0PFBit16 = 0x10000; + reg[1] = reg[1] | cr0PFBit16; + writeRegistersImp(newlyStopped, ZET_DEBUG_REGSET_TYPE_CR_INTEL_GPU, 0, 1, reg.get()); + } + if (isForceExceptionOrForceExternalHaltOnlyExceptionReason(reg.get()) && !allThreads[newlyStopped]->getPageFault()) { bool threadWasInterrupted = false; for (auto &request : pendingInterrupts) { @@ -845,6 +850,7 @@ void DebugSessionImp::fillResumeAndStoppedThreadsFromNewlyStopped(std::vectortoString().c_str(), reg[1]); + allThreads[newlyStopped]->setPageFault(false); stoppedThreadsToReport.push_back(newlyStopped); } } diff --git a/level_zero/tools/source/debug/eu_thread.h b/level_zero/tools/source/debug/eu_thread.h index fb2218aa5f..4cb2f22216 100644 --- a/level_zero/tools/source/debug/eu_thread.h +++ b/level_zero/tools/source/debug/eu_thread.h @@ -163,6 +163,13 @@ class EuThread { DEBUG_BREAK_IF(reportedAsStopped && state != State::Stopped); return reportedAsStopped; } + void setPageFault(bool value) { + hasPageFault = value; + } + bool getPageFault() { + return hasPageFault; + ; + } public: static constexpr uint64_t invalidHandle = std::numeric_limits::max(); @@ -173,6 +180,7 @@ class EuThread { uint8_t systemRoutineCounter = 0; std::atomic memoryHandle = invalidHandle; std::atomic reportedAsStopped = false; + bool hasPageFault = false; }; static_assert(sizeof(EuThread::ThreadId) == sizeof(uint64_t)); diff --git a/level_zero/tools/source/debug/linux/prelim/debug_session.cpp b/level_zero/tools/source/debug/linux/prelim/debug_session.cpp index ea446a0f45..2e19197fcb 100644 --- a/level_zero/tools/source/debug/linux/prelim/debug_session.cpp +++ b/level_zero/tools/source/debug/linux/prelim/debug_session.cpp @@ -718,7 +718,6 @@ void DebugSessionLinux::handleEvent(prelim_drm_i915_debug_event *event) { (int)attention->base.flags, (uint64_t)attention->base.seqno, (uint64_t)attention->base.size, (uint64_t)attention->client_handle, (uint64_t)attention->flags, (uint32_t)attention->ci.engine_class, (uint32_t)attention->ci.engine_instance, (uint32_t)attention->bitmask_size, uint64_t(attention->ctx_handle)); - handleAttentionEvent(attention); } break; @@ -726,6 +725,17 @@ void DebugSessionLinux::handleEvent(prelim_drm_i915_debug_event *event) { prelim_drm_i915_debug_event_engines *engines = reinterpret_cast(event); handleEnginesEvent(engines); } break; + + case PRELIM_DRM_I915_DEBUG_EVENT_PAGE_FAULT: { + prelim_drm_i915_debug_event_page_fault *pf = reinterpret_cast(event); + PRINT_DEBUGGER_INFO_LOG("PRELIM_I915_DEBUG_IOCTL_READ_EVENT type: PRELIM_DRM_I915_DEBUG_EVENT_PAGE_FAULT flags = %d, address = %llu seqno = %d, size = %llu" + " client_handle = %llu flags = %llu class = %lu instance = %lu bitmask_size = %lu ctx_handle = %llu\n", + (int)pf->base.flags, (uint64_t)pf->page_fault_address, (uint64_t)pf->base.seqno, (uint64_t)pf->base.size, + (uint64_t)pf->client_handle, (uint64_t)pf->flags, (uint32_t)pf->ci.engine_class, + (uint32_t)pf->ci.engine_instance, (uint32_t)pf->bitmask_size, uint64_t(pf->ctx_handle)); + handlePageFaultEvent(pf); + } break; + default: PRINT_DEBUGGER_INFO_LOG("PRELIM_I915_DEBUG_IOCTL_READ_EVENT type: UNHANDLED %d flags = %d size = %llu\n", (int)event->type, (int)event->flags, (uint64_t)event->size); break; @@ -805,20 +815,14 @@ void DebugSessionLinux::readStateSaveAreaHeader() { ze_result_t DebugSessionLinux::readEventImp(prelim_drm_i915_debug_event *drmDebugEvent) { auto ret = ioctl(PRELIM_I915_DEBUG_IOCTL_READ_EVENT, drmDebugEvent); - if (ret != 0) { PRINT_DEBUGGER_ERROR_LOG("PRELIM_I915_DEBUG_IOCTL_READ_EVENT failed: retCode: %d errno = %d\n", ret, errno); - } else { - if ((drmDebugEvent->flags & PRELIM_DRM_I915_DEBUG_EVENT_CREATE) == 0 && - (drmDebugEvent->flags & PRELIM_DRM_I915_DEBUG_EVENT_DESTROY) == 0 && - (drmDebugEvent->flags & PRELIM_DRM_I915_DEBUG_EVENT_STATE_CHANGE) == 0) { - - PRINT_DEBUGGER_ERROR_LOG("PRELIM_I915_DEBUG_IOCTL_READ_EVENT unsupported flag = %d\n", (int)drmDebugEvent->flags); - return ZE_RESULT_ERROR_UNKNOWN; - } - return ZE_RESULT_SUCCESS; + return ZE_RESULT_NOT_READY; + } else if (drmDebugEvent->flags & ~static_cast(PRELIM_DRM_I915_DEBUG_EVENT_CREATE | PRELIM_DRM_I915_DEBUG_EVENT_DESTROY | PRELIM_DRM_I915_DEBUG_EVENT_STATE_CHANGE | PRELIM_DRM_I915_DEBUG_EVENT_NEED_ACK)) { + PRINT_DEBUGGER_ERROR_LOG("PRELIM_I915_DEBUG_IOCTL_READ_EVENT unsupported flag = %d\n", (int)drmDebugEvent->flags); + return ZE_RESULT_ERROR_UNKNOWN; } - return ZE_RESULT_NOT_READY; + return ZE_RESULT_SUCCESS; } bool DebugSessionLinux::handleVmBindEvent(prelim_drm_i915_debug_event_vm_bind *vmBind) { @@ -1254,6 +1258,25 @@ void DebugSessionLinux::handleContextParamEvent(prelim_drm_i915_debug_event_cont } } +uint64_t DebugSessionLinux::getVmHandleFromClientAndlrcHandle(uint64_t clientHandle, uint64_t lrcHandle) { + + if (clientHandleToConnection.find(clientHandle) == clientHandleToConnection.end()) { + return invalidHandle; + } + + auto &clientConnection = clientHandleToConnection[clientHandle]; + if (clientConnection->lrcToContextHandle.find(lrcHandle) == clientConnection->lrcToContextHandle.end()) { + return invalidHandle; + } + + auto contextHandle = clientConnection->lrcToContextHandle[lrcHandle]; + if (clientConnection->contextsCreated.find(contextHandle) == clientConnection->contextsCreated.end()) { + return invalidHandle; + } + + return clientConnection->contextsCreated[contextHandle].vm; +} + void DebugSessionLinux::handleAttentionEvent(prelim_drm_i915_debug_event_eu_attention *attention) { NEO::EngineClassInstance engineClassInstance = {attention->ci.engine_class, attention->ci.engine_instance}; auto tileIndex = DrmHelper::getEngineTileIndex(connectedDevice, engineClassInstance); @@ -1264,23 +1287,10 @@ void DebugSessionLinux::handleAttentionEvent(prelim_drm_i915_debug_event_eu_atte return; } - newAttentionRaised(tileIndex); + newAttentionRaised( + tileIndex); - if (clientHandleToConnection.find(attention->client_handle) == clientHandleToConnection.end()) { - return; - } - - auto &clientConnection = clientHandleToConnection[attention->client_handle]; - if (clientConnection->lrcToContextHandle.find(attention->lrc_handle) == clientConnection->lrcToContextHandle.end()) { - return; - } - - auto contextHandle = clientConnection->lrcToContextHandle[attention->lrc_handle]; - if (clientConnection->contextsCreated.find(contextHandle) == clientConnection->contextsCreated.end()) { - return; - } - - auto vmHandle = clientConnection->contextsCreated[contextHandle].vm; + auto vmHandle = getVmHandleFromClientAndlrcHandle(attention->client_handle, attention->lrc_handle); if (vmHandle == invalidHandle) { return; } @@ -1356,6 +1366,78 @@ void DebugSessionLinux::handleAttentionEvent(prelim_drm_i915_debug_event_eu_atte } } +void DebugSessionLinux::handlePageFaultEvent(prelim_drm_i915_debug_event_page_fault *pf) { + NEO::EngineClassInstance engineClassInstance = {pf->ci.engine_class, pf->ci.engine_instance}; + auto tileIndex = DrmHelper::getEngineTileIndex(connectedDevice, engineClassInstance); + + DEBUG_BREAK_IF(pf->bitmask_size % 3u != 0u); + size_t size = pf->bitmask_size / 3; + uint8_t *bitmaskBefore = &pf->bitmask[0]; + uint8_t *bitmaskAfter = &pf->bitmask[size]; + uint8_t *bitmaskResolved = &pf->bitmask[size * 2]; + PRINT_DEBUGGER_INFO_LOG("PageFault event BEFORE", 0); + printBitmask(bitmaskBefore, size); + PRINT_DEBUGGER_INFO_LOG("PageFault event AFTER", 0); + printBitmask(bitmaskAfter, size); + PRINT_DEBUGGER_INFO_LOG("PageFault event RESOLVED", 0); + printBitmask(bitmaskResolved, size); + + auto vmHandle = getVmHandleFromClientAndlrcHandle(pf->client_handle, pf->lrc_handle); + if (vmHandle == invalidHandle) { + return; + } + + if (!connectedDevice->getNEODevice()->getDeviceBitfield().test(tileIndex)) { + return; + } + + std::unique_ptr bitmaskPF = std::make_unique(size); + std::transform(bitmaskAfter, bitmaskAfter + size, bitmaskResolved, bitmaskPF.get(), std::bit_xor()); + auto hwInfo = connectedDevice->getHwInfo(); + auto &l0GfxCoreHelper = connectedDevice->getL0GfxCoreHelper(); + auto threadsWithPF = l0GfxCoreHelper.getThreadsFromAttentionBitmask(hwInfo, tileIndex, bitmaskPF.get(), size); + auto stoppedThreads = l0GfxCoreHelper.getThreadsFromAttentionBitmask(hwInfo, tileIndex, bitmaskResolved, size); + + if (threadsWithPF.size() == 0) { + zet_debug_event_t debugEvent = {}; + debugEvent.type = ZET_DEBUG_EVENT_TYPE_PAGE_FAULT; + PRINT_DEBUGGER_INFO_LOG("PageFault event for unknown thread", 0); + enqueueApiEvent(debugEvent); + } + + auto gpuVa = getContextStateSaveAreaGpuVa(vmHandle); + auto stateSaveAreaSize = getContextStateSaveAreaSize(vmHandle); + allocateStateSaveAreaMemory(stateSaveAreaSize); + auto stateSaveReadResult = readGpuMemory(vmHandle, stateSaveAreaMemory.data(), stateSaveAreaSize, gpuVa); + if (stateSaveReadResult == ZE_RESULT_SUCCESS) { + + std::unique_lock lock; + if (tileSessionsEnabled) { + lock = std::unique_lock(static_cast(tileSessions[tileIndex].first)->threadStateMutex); + } else { + lock = std::unique_lock(threadStateMutex); + } + for (auto threadId : threadsWithPF) { + PRINT_DEBUGGER_INFO_LOG("PageFault event for thread %s", EuThread::toString(threadId).c_str()); + allThreads[threadId]->setPageFault(true); + } + for (auto threadId : stoppedThreads) { + if (tileSessionsEnabled) { + static_cast(tileSessions[tileIndex].first)->addThreadToNewlyStoppedFromRaisedAttention(threadId, vmHandle, stateSaveAreaMemory.data()); + } else { + addThreadToNewlyStoppedFromRaisedAttention(threadId, vmHandle, stateSaveAreaMemory.data()); + } + } + } + + if (tileSessionsEnabled) { + static_cast(tileSessions[tileIndex].first)->checkTriggerEventsForAttention(); + } else { + checkTriggerEventsForAttention(); + } + return; +} + void DebugSessionLinux::handleEnginesEvent(prelim_drm_i915_debug_event_engines *engines) { PRINT_DEBUGGER_INFO_LOG("ENGINES event: client_handle = %llu, ctx_handle = %llu, num_engines = %llu %s\n", (uint64_t)engines->client_handle, diff --git a/level_zero/tools/source/debug/linux/prelim/debug_session.h b/level_zero/tools/source/debug/linux/prelim/debug_session.h index 4730ab4ccb..e0b83153a8 100644 --- a/level_zero/tools/source/debug/linux/prelim/debug_session.h +++ b/level_zero/tools/source/debug/linux/prelim/debug_session.h @@ -245,10 +245,12 @@ struct DebugSessionLinux : DebugSessionImp { return 0.5; } + uint64_t getVmHandleFromClientAndlrcHandle(uint64_t clientHandle, uint64_t lrcHandle); bool handleVmBindEvent(prelim_drm_i915_debug_event_vm_bind *vmBind); void handleContextParamEvent(prelim_drm_i915_debug_event_context_param *contextParam); void handleAttentionEvent(prelim_drm_i915_debug_event_eu_attention *attention); void handleEnginesEvent(prelim_drm_i915_debug_event_engines *engines); + void handlePageFaultEvent(prelim_drm_i915_debug_event_page_fault *pf); virtual bool ackIsaEvents(uint32_t deviceIndex, uint64_t isaVa); virtual bool ackModuleEvents(uint32_t deviceIndex, uint64_t moduleUuidHandle); diff --git a/level_zero/tools/test/unit_tests/sources/debug/debug_session_tests.cpp b/level_zero/tools/test/unit_tests/sources/debug/debug_session_tests.cpp index 42a005a69d..d43353a259 100644 --- a/level_zero/tools/test/unit_tests/sources/debug/debug_session_tests.cpp +++ b/level_zero/tools/test/unit_tests/sources/debug/debug_session_tests.cpp @@ -756,6 +756,45 @@ TEST(DebugSessionTest, givenStoppedThreadsWhenFillingResumeAndStoppedThreadsFrom } } +TEST(DebugSessionTest, givenThreadsStoppedWithPageFaultWhenCallingfillResumeAndStoppedThreadsFromNewlyStoppedThenCRIsWritten) { + zet_debug_config_t config = {}; + config.pid = 0x1234; + auto hwInfo = *NEO::defaultHwInfo.get(); + + NEO::MockDevice *neoDevice(NEO::MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); + Mock deviceImp(neoDevice, neoDevice->getExecutionEnvironment()); + + auto sessionMock = std::make_unique(config, &deviceImp); + + EuThread::ThreadId thread = {0, 0, 0, 0, 1}; + + sessionMock->newlyStoppedThreads.push_back(thread); + sessionMock->onlyForceException = true; + + std::vector resumeThreads; + std::vector stoppedThreads; + std::vector interruptedThreads; + + sessionMock->allThreads[thread]->stopThread(1u); + sessionMock->allThreads[thread]->setPageFault(true); + + sessionMock->fillResumeAndStoppedThreadsFromNewlyStopped(resumeThreads, stoppedThreads, interruptedThreads); + EXPECT_EQ(0u, resumeThreads.size()); + EXPECT_EQ(1u, stoppedThreads.size()); + EXPECT_EQ(1u, sessionMock->writeRegistersCallCount); + EXPECT_EQ(ZET_DEBUG_REGSET_TYPE_CR_INTEL_GPU, sessionMock->writeRegistersReg); + EXPECT_EQ(false, sessionMock->allThreads[thread]->getPageFault()); + + resumeThreads.clear(); + stoppedThreads.clear(); + sessionMock->newlyStoppedThreads.push_back(thread); + sessionMock->onlyForceException = true; + sessionMock->fillResumeAndStoppedThreadsFromNewlyStopped(resumeThreads, stoppedThreads, interruptedThreads); + EXPECT_EQ(1u, resumeThreads.size()); + EXPECT_EQ(0u, stoppedThreads.size()); + EXPECT_EQ(1u, sessionMock->writeRegistersCallCount); +} + TEST(DebugSessionTest, givenNoThreadsStoppedWhenCallingfillResumeAndStoppedThreadsFromNewlyStoppedThenReadStateSaveAreaNotCalled) { zet_debug_config_t config = {}; config.pid = 0x1234; diff --git a/level_zero/tools/test/unit_tests/sources/debug/eu_thread_tests.cpp b/level_zero/tools/test/unit_tests/sources/debug/eu_thread_tests.cpp index c2ecddaf2a..8b97556195 100644 --- a/level_zero/tools/test/unit_tests/sources/debug/eu_thread_tests.cpp +++ b/level_zero/tools/test/unit_tests/sources/debug/eu_thread_tests.cpp @@ -255,5 +255,17 @@ TEST(EuThread, GivenEuThreadWhenGettingLastCounterThenCorrectValueIsReturned) { EXPECT_EQ(9u, euThread.getLastCounter()); } +TEST(EuThread, GivenEuThreadWhenGettingPageFaultThenCorrectValueIsReturned) { + ze_device_thread_t devThread = {3, 4, 5, 6}; + EuThread::ThreadId threadId(0, devThread); + EuThread euThread(threadId); + + EXPECT_EQ(false, euThread.getPageFault()); + euThread.setPageFault(true); + EXPECT_EQ(true, euThread.getPageFault()); + euThread.setPageFault(false); + EXPECT_EQ(false, euThread.getPageFault()); +} + } // namespace ult } // namespace L0 diff --git a/level_zero/tools/test/unit_tests/sources/debug/linux/debug_session_fixtures_linux.h b/level_zero/tools/test/unit_tests/sources/debug/linux/debug_session_fixtures_linux.h index 3d3b315211..50d9cb47ec 100644 --- a/level_zero/tools/test/unit_tests/sources/debug/linux/debug_session_fixtures_linux.h +++ b/level_zero/tools/test/unit_tests/sources/debug/linux/debug_session_fixtures_linux.h @@ -20,6 +20,7 @@ #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_built_ins.h" #include "level_zero/tools/source/debug/linux/prelim/debug_session.h" +#include "level_zero/tools/test/unit_tests/sources/debug/debug_session_common.h" #include "common/StateSaveAreaHeader.h" @@ -619,6 +620,61 @@ struct DebugApiLinuxFixture : public DeviceFixture { static constexpr uint8_t bufferSize = 16; }; +struct DebugApiPageFaultEventFixture : public DebugApiLinuxFixture { + void setUp() { + DebugApiLinuxFixture::setUp(); + zet_debug_config_t config = {}; + config.pid = 0x1234; + + sessionMock = std::make_unique(config, device, 10); + ASSERT_NE(nullptr, sessionMock); + sessionMock->clientHandle = MockDebugSessionLinux::mockClientHandle; + + auto handler = new MockIoctlHandler; + sessionMock->ioctlHandler.reset(handler); + SIP::version version = {2, 0, 0}; + initStateSaveArea(sessionMock->stateSaveAreaHeader, version, device); + handler->setPreadMemory(sessionMock->stateSaveAreaHeader.data(), sessionMock->stateSaveAreaHeader.size(), 0x1000); + sessionMock->clientHandleToConnection[MockDebugSessionLinux::mockClientHandle]->contextsCreated[ctxHandle].vm = vmHandle; + sessionMock->clientHandleToConnection[MockDebugSessionLinux::mockClientHandle]->lrcToContextHandle[lrcHandle] = ctxHandle; + DebugSessionLinux::BindInfo cssaInfo = {0x1000, sessionMock->stateSaveAreaHeader.size()}; + sessionMock->clientHandleToConnection[MockDebugSessionLinux::mockClientHandle]->vmToContextStateSaveAreaBindInfo[vmHandle] = cssaInfo; + } + + void tearDown() { + DebugApiLinuxFixture::tearDown(); + } + + void buildPfi915Event() { + buildPfi915Event(MockDebugSessionLinux::mockClientHandle); + } + void buildPfi915Event(uint64_t clientHandle) { + prelim_drm_i915_debug_event_page_fault pf = {}; + pf.base.type = PRELIM_DRM_I915_DEBUG_EVENT_PAGE_FAULT; + pf.base.flags = 0; + pf.base.size = sizeof(prelim_drm_i915_debug_event_page_fault) + (bitmaskSize * 3u); + pf.client_handle = clientHandle; + pf.lrc_handle = lrcHandle; + pf.flags = 0; + pf.ci.engine_class = 0; + pf.ci.engine_instance = 0; + pf.bitmask_size = static_cast(bitmaskSize * 3); + + memcpy(data, &pf, sizeof(prelim_drm_i915_debug_event_page_fault)); + memcpy(ptrOffset(data, offsetof(prelim_drm_i915_debug_event_page_fault, bitmask)), bitmaskBefore.get(), bitmaskSize); + memcpy(ptrOffset(data, offsetof(prelim_drm_i915_debug_event_page_fault, bitmask) + bitmaskSize), bitmaskAfter.get(), bitmaskSize); + memcpy(ptrOffset(data, offsetof(prelim_drm_i915_debug_event_page_fault, bitmask) + (2 * bitmaskSize)), bitmaskResolved.get(), bitmaskSize); + } + + size_t bitmaskSize = 256; + uint8_t data[sizeof(prelim_drm_i915_debug_event_page_fault) + (256 * 3)]; + std::unique_ptr bitmaskBefore, bitmaskAfter, bitmaskResolved; + std::unique_ptr sessionMock; + uint64_t ctxHandle = 2; + uint64_t vmHandle = 7; + uint64_t lrcHandle = 8; +}; + struct DebugApiLinuxMultiDeviceFixture : public MultipleDevicesWithCustomHwInfo { void setUp(); diff --git a/level_zero/tools/test/unit_tests/sources/debug/linux/test_debug_api_linux.cpp b/level_zero/tools/test/unit_tests/sources/debug/linux/test_debug_api_linux.cpp index b4fd58470e..2c82d92682 100644 --- a/level_zero/tools/test/unit_tests/sources/debug/linux/test_debug_api_linux.cpp +++ b/level_zero/tools/test/unit_tests/sources/debug/linux/test_debug_api_linux.cpp @@ -3044,7 +3044,6 @@ TEST_F(DebugApiLinuxTest, GivenDebugSessionWhenClientCreateAndDestroyEventsReadO EXPECT_EQ(ZE_RESULT_ERROR_DEVICE_LOST, result); EXPECT_EQ(eventsCount, static_cast(session->getInternalEventCounter.load())); } - TEST_F(DebugApiLinuxTest, GivenEventWithInvalidFlagsWhenReadingEventThenUnknownErrorIsReturned) { zet_debug_config_t config = {}; config.pid = 0x1234; @@ -3068,7 +3067,7 @@ TEST_F(DebugApiLinuxTest, GivenEventWithInvalidFlagsWhenReadingEventThenUnknownE auto memory = std::make_unique(MockDebugSessionLinux::maxEventSize / sizeof(uint64_t)); prelim_drm_i915_debug_event *event = reinterpret_cast(memory.get()); event->type = PRELIM_DRM_I915_DEBUG_EVENT_READ; - event->flags = 0; + event->flags = 0x8000; event->size = MockDebugSessionLinux::maxEventSize; ze_result_t result = session->readEventImp(event); @@ -3099,33 +3098,6 @@ TEST_F(DebugApiLinuxTest, GivenDebugSessionInitializationWhenNoValidEventsAreRea EXPECT_EQ(ZE_RESULT_NOT_READY, result); } -TEST_F(DebugApiLinuxTest, GivenInvalidFlagsWhenReadingEventThenUnknownErrorIsReturned) { - zet_debug_config_t config = {}; - config.pid = 0x1234; - - auto session = std::make_unique(config, device, 10); - ASSERT_NE(nullptr, session); - - prelim_drm_i915_debug_event_client clientInvalidFlag = {}; - clientInvalidFlag.base.type = PRELIM_DRM_I915_DEBUG_EVENT_CLIENT; - clientInvalidFlag.base.flags = 0x8000; - clientInvalidFlag.base.size = sizeof(prelim_drm_i915_debug_event_client); - clientInvalidFlag.handle = 1; - - auto handler = new MockIoctlHandler; - handler->eventQueue.push({reinterpret_cast(&clientInvalidFlag), static_cast(clientInvalidFlag.base.size)}); - handler->pollRetVal = 1; - - session->ioctlHandler.reset(handler); - - uint64_t data[512]; - auto drmDebugEvent = reinterpret_cast(data); - - ze_result_t result = session->readEventImp(drmDebugEvent); - EXPECT_EQ(ZE_RESULT_ERROR_UNKNOWN, result); - EXPECT_EQ(1u, static_cast(handler->ioctlCalled)); -} - TEST_F(DebugApiLinuxTest, GivenValidFlagsWhenReadingEventThenEventIsNotProcessed) { zet_debug_config_t config = {}; config.pid = 0x1234; @@ -5871,6 +5843,130 @@ TEST_F(DebugApiLinuxTest, givenTileAttachEnabledWhenDeviceDoesNotHaveTilesThenTi EXPECT_EQ(0u, session->tileSessions.size()); } +using DebugApiLinuxPageFaultEventTest = Test; + +TEST_F(DebugApiLinuxPageFaultEventTest, GivenNoPageFaultingThreadWhenHandlingPageFaultEventThenL0ApiEventGenerated) { + auto &hwInfo = neoDevice->getHardwareInfo(); + auto &l0GfxCoreHelper = neoDevice->getRootDeviceEnvironment().getHelper(); + + std::vector threads{ + {0, 0, 0, 0, 0}, + {0, 0, 0, 0, 1}, + {0, 0, 0, 0, 2}, + {0, 0, 0, 0, 3}, + {0, 0, 0, 0, 4}, + {0, 0, 0, 0, 5}, + {0, 0, 0, 0, 6}}; + + for (auto thread : threads) { + sessionMock->stoppedThreads[thread.packed] = 1; + } + l0GfxCoreHelper.getAttentionBitmaskForSingleThreads(threads, hwInfo, bitmaskBefore, bitmaskSize); + l0GfxCoreHelper.getAttentionBitmaskForSingleThreads(threads, hwInfo, bitmaskAfter, bitmaskSize); + l0GfxCoreHelper.getAttentionBitmaskForSingleThreads(threads, hwInfo, bitmaskResolved, bitmaskSize); + + bitmaskSize = std::min(size_t(128), bitmaskSize); + buildPfi915Event(); + sessionMock->handleEvent(reinterpret_cast(data)); + EXPECT_EQ(threads.size(), sessionMock->newlyStoppedThreads.size()); + for (auto thread : threads) { + EXPECT_FALSE(sessionMock->allThreads[thread]->getPageFault()); + } + ASSERT_EQ(1u, sessionMock->apiEvents.size()); + auto event = sessionMock->apiEvents.front(); + ASSERT_EQ(event.type, ZET_DEBUG_EVENT_TYPE_PAGE_FAULT); +} + +TEST_F(DebugApiLinuxPageFaultEventTest, GivenPageFaultEventWIthInvalidClientHandleThenNoThreadsReportedStopped) { + + auto &hwInfo = neoDevice->getHardwareInfo(); + auto &l0GfxCoreHelper = neoDevice->getRootDeviceEnvironment().getHelper(); + + std::vector threads; + l0GfxCoreHelper.getAttentionBitmaskForSingleThreads(threads, hwInfo, bitmaskBefore, bitmaskSize); + l0GfxCoreHelper.getAttentionBitmaskForSingleThreads(threads, hwInfo, bitmaskAfter, bitmaskSize); + + threads.push_back({0, 0, 0, 0, 0}); + threads.push_back({0, 0, 0, 0, 2}); + threads.push_back({0, 0, 0, 0, 3}); + threads.push_back({0, 0, 0, 0, 4}); + threads.push_back({0, 0, 0, 0, 6}); + for (auto thread : threads) { + sessionMock->stoppedThreads[thread.packed] = 1; + } + l0GfxCoreHelper.getAttentionBitmaskForSingleThreads(threads, hwInfo, bitmaskResolved, bitmaskSize); + + bitmaskSize = std::min(size_t(128), bitmaskSize); + buildPfi915Event(MockDebugSessionLinux::invalidClientHandle); + sessionMock->handleEvent(reinterpret_cast(data)); + + EXPECT_EQ(0u, sessionMock->newlyStoppedThreads.size()); +} + +TEST_F(DebugApiLinuxPageFaultEventTest, GivenPageFaultEventWhenHandlingEventThenThreadsReportedStoppedAndPfSet) { + + auto &hwInfo = neoDevice->getHardwareInfo(); + auto &l0GfxCoreHelper = neoDevice->getRootDeviceEnvironment().getHelper(); + + std::vector threads; + l0GfxCoreHelper.getAttentionBitmaskForSingleThreads(threads, hwInfo, bitmaskBefore, bitmaskSize); + l0GfxCoreHelper.getAttentionBitmaskForSingleThreads(threads, hwInfo, bitmaskAfter, bitmaskSize); + + threads.push_back({0, 0, 0, 0, 0}); + threads.push_back({0, 0, 0, 0, 2}); + threads.push_back({0, 0, 0, 0, 3}); + threads.push_back({0, 0, 0, 0, 4}); + threads.push_back({0, 0, 0, 0, 6}); + for (auto thread : threads) { + sessionMock->stoppedThreads[thread.packed] = 1; + } + l0GfxCoreHelper.getAttentionBitmaskForSingleThreads(threads, hwInfo, bitmaskResolved, bitmaskSize); + + bitmaskSize = std::min(size_t(128), bitmaskSize); + buildPfi915Event(); + sessionMock->handleEvent(reinterpret_cast(data)); + + EXPECT_EQ(threads.size(), sessionMock->newlyStoppedThreads.size()); + for (auto thread : threads) { + EXPECT_TRUE(sessionMock->allThreads[thread]->getPageFault()); + } +} + +TEST_F(DebugApiLinuxPageFaultEventTest, GivenPageFaultEventWhenHandlingEventThenThreadsNotNewlyResolvedAreNotMarkedAsPf) { + + auto &hwInfo = neoDevice->getHardwareInfo(); + auto &l0GfxCoreHelper = neoDevice->getRootDeviceEnvironment().getHelper(); + + std::vector threadsBefore, threadsAfter, threadsResolved; + l0GfxCoreHelper.getAttentionBitmaskForSingleThreads(threadsBefore, hwInfo, bitmaskBefore, bitmaskSize); + threadsAfter.push_back({0, 0, 0, 0, 0}); + threadsAfter.push_back({0, 0, 0, 0, 1}); + l0GfxCoreHelper.getAttentionBitmaskForSingleThreads(threadsAfter, hwInfo, bitmaskAfter, bitmaskSize); + threadsResolved.push_back({0, 0, 0, 0, 0}); + threadsResolved.push_back({0, 0, 0, 0, 1}); + threadsResolved.push_back({0, 0, 0, 0, 2}); + threadsResolved.push_back({0, 0, 0, 0, 3}); + + for (auto thread : threadsResolved) { + sessionMock->stoppedThreads[thread.packed] = 1; + } + l0GfxCoreHelper.getAttentionBitmaskForSingleThreads(threadsResolved, hwInfo, bitmaskResolved, bitmaskSize); + + bitmaskSize = std::min(size_t(128), bitmaskSize); + buildPfi915Event(); + sessionMock->handleEvent(reinterpret_cast(data)); + + EXPECT_EQ(threadsResolved.size(), sessionMock->newlyStoppedThreads.size()); + + for (auto thread : threadsResolved) { + if (std::find(threadsAfter.begin(), threadsAfter.end(), thread) == threadsAfter.end()) { + EXPECT_TRUE(sessionMock->allThreads[thread]->getPageFault()); + } else { + EXPECT_FALSE(sessionMock->allThreads[thread]->getPageFault()); + } + } +} + using DebugApiLinuxAttentionTest = Test; TEST_F(DebugApiLinuxAttentionTest, GivenEuAttentionEventForThreadsWhenHandlingEventThenNewlyStoppedThreadsSaved) { @@ -8533,6 +8629,37 @@ TEST_F(AffinityMaskMultipleSubdevicesTestLinux, GivenEventWithAckFlagAndTileNotW EXPECT_EQ(vmBindIsa->base.seqno, handler->debugEventAcked.seqno); } +TEST_F(AffinityMaskMultipleSubdevicesTestLinux, GivenPfEventForTileNotWithinBitfieldWhenHandlingEventThenEventIsSkipped) { + auto debugSession = std::make_unique(zet_debug_config_t{1234}, deviceImp, 10); + + uint64_t ctxHandle = 2; + uint64_t vmHandle = 7; + uint64_t lrcHandle = 8; + + debugSession->clientHandleToConnection[debugSession->clientHandle]->contextsCreated[ctxHandle].vm = vmHandle; + debugSession->clientHandleToConnection[debugSession->clientHandle]->lrcToContextHandle[lrcHandle] = ctxHandle; + debugSession->clientHandleToConnection[debugSession->clientHandle]->vmToTile[vmHandle] = 2; + + prelim_drm_i915_debug_event_page_fault pf = {}; + pf.base.type = PRELIM_DRM_I915_DEBUG_EVENT_PAGE_FAULT; + pf.base.flags = 0; + pf.base.size = sizeof(prelim_drm_i915_debug_event_page_fault); + pf.client_handle = MockDebugSessionLinux::mockClientHandle; + pf.lrc_handle = lrcHandle; + pf.flags = 0; + + auto engineInfo = mockDrm->getEngineInfo(); + auto ci = engineInfo->getEngineInstance(2, hwInfo.capabilityTable.defaultEngineType); + pf.ci.engine_class = ci->engineClass; + pf.ci.engine_instance = ci->engineInstance; + + ze_device_thread_t thread = {0, 0, 0, UINT32_MAX}; + debugSession->pendingInterrupts.push_back(std::pair(thread, false)); + + debugSession->handleEvent(&pf.base); + EXPECT_FALSE(debugSession->triggerEvents); +} + TEST_F(AffinityMaskMultipleSubdevicesTestLinux, GivenAttEventForTileNotWithinBitfieldWhenHandlingEventThenEventIsSkipped) { auto debugSession = std::make_unique(zet_debug_config_t{1234}, deviceImp, 10); diff --git a/level_zero/tools/test/unit_tests/sources/debug/linux/tile_debug_session_linux_tests.cpp b/level_zero/tools/test/unit_tests/sources/debug/linux/tile_debug_session_linux_tests.cpp index eed7653796..e00f614c0a 100644 --- a/level_zero/tools/test/unit_tests/sources/debug/linux/tile_debug_session_linux_tests.cpp +++ b/level_zero/tools/test/unit_tests/sources/debug/linux/tile_debug_session_linux_tests.cpp @@ -819,6 +819,70 @@ TEST_F(TileAttachTest, givenStoppedThreadsWhenHandlingAttentionEventThenStoppedT EXPECT_TRUE(tileSessions[1]->triggerEvents); } +TEST_F(TileAttachTest, givenStoppedThreadsWhenHandlingPageFaultEventThenStoppedThreadsFromEventAreProcessed) { + // debug attach both tiles + rootSession->tileSessions[0].second = true; + rootSession->tileSessions[1].second = true; + + uint64_t ctxHandle = 2; + uint64_t vmHandle = 7; + uint64_t lrcHandle = 8; + + rootSession->clientHandleToConnection[MockDebugSessionLinux::mockClientHandle]->contextsCreated[ctxHandle].vm = vmHandle; + rootSession->clientHandleToConnection[MockDebugSessionLinux::mockClientHandle]->lrcToContextHandle[lrcHandle] = ctxHandle; + rootSession->clientHandleToConnection[MockDebugSessionLinux::mockClientHandle]->vmToTile[vmHandle] = 1; + + SIP::version version = {2, 0, 0}; + initStateSaveArea(rootSession->stateSaveAreaHeader, version, deviceImp); + DebugSessionLinux::BindInfo cssaInfo = {reinterpret_cast(rootSession->stateSaveAreaHeader.data()), rootSession->stateSaveAreaHeader.size()}; + rootSession->clientHandleToConnection[MockDebugSessionLinux::mockClientHandle]->vmToContextStateSaveAreaBindInfo[vmHandle] = cssaInfo; + + auto handler = new MockIoctlHandler; + rootSession->ioctlHandler.reset(handler); + handler->setPreadMemory(rootSession->stateSaveAreaHeader.data(), rootSession->stateSaveAreaHeader.size(), reinterpret_cast(rootSession->stateSaveAreaHeader.data())); + + uint8_t data[sizeof(prelim_drm_i915_debug_event_page_fault) + 128 * 3]; + + auto engineInfo = mockDrm->getEngineInfo(); + auto engineInstance = engineInfo->getEngineInstance(1, hwInfo.capabilityTable.defaultEngineType); + + EuThread::ThreadId thread = {1, 0, 0, 0, 0}; + tileSessions[1]->stoppedThreads[thread.packed] = 1; + + std::unique_ptr bitmaskBefore, bitmaskAfter, bitmaskResolved; + size_t bitmaskSize = 0; + auto &hwInfo = neoDevice->getHardwareInfo(); + auto &l0GfxCoreHelper = neoDevice->getRootDeviceEnvironment().getHelper(); + + l0GfxCoreHelper.getAttentionBitmaskForSingleThreads({thread}, hwInfo, bitmaskBefore, bitmaskSize); + l0GfxCoreHelper.getAttentionBitmaskForSingleThreads({thread}, hwInfo, bitmaskAfter, bitmaskSize); + l0GfxCoreHelper.getAttentionBitmaskForSingleThreads({thread}, hwInfo, bitmaskResolved, bitmaskSize); + + prelim_drm_i915_debug_event_page_fault pf = {}; + pf.base.type = PRELIM_DRM_I915_DEBUG_EVENT_PAGE_FAULT; + pf.base.flags = PRELIM_DRM_I915_DEBUG_EVENT_STATE_CHANGE; + pf.base.size = sizeof(prelim_drm_i915_debug_event_page_fault); + pf.base.seqno = 2; + pf.client_handle = MockDebugSessionLinux::mockClientHandle; + pf.lrc_handle = lrcHandle; + pf.flags = 0; + pf.ci.engine_class = engineInstance->engineClass; + pf.ci.engine_instance = engineInstance->engineInstance; + pf.bitmask_size = static_cast(bitmaskSize * 3u); + + bitmaskSize = std::min(size_t(128), bitmaskSize); + memcpy(data, &pf, sizeof(prelim_drm_i915_debug_event_page_fault)); + memcpy(ptrOffset(data, offsetof(prelim_drm_i915_debug_event_page_fault, bitmask)), bitmaskBefore.get(), bitmaskSize); + memcpy(ptrOffset(data, offsetof(prelim_drm_i915_debug_event_page_fault, bitmask) + bitmaskSize), bitmaskAfter.get(), bitmaskSize); + memcpy(ptrOffset(data, offsetof(prelim_drm_i915_debug_event_page_fault, bitmask) + (2 * bitmaskSize)), bitmaskResolved.get(), bitmaskSize); + + rootSession->handleEvent(reinterpret_cast(data)); + + auto expectedThreadsToCheck = hwInfo.capabilityTable.fusedEuEnabled ? 2u : 1u; + EXPECT_EQ(expectedThreadsToCheck, tileSessions[1]->newlyStoppedThreads.size()); + EXPECT_TRUE(tileSessions[1]->triggerEvents); +} + TEST_F(TileAttachTest, GivenBlockingOnCpuDetachedTileAndZebinModulesWithEventsToAckWhenDetachingTileThenNoAckIoctlIsCalled) { auto handler = new MockIoctlHandler; rootSession->ioctlHandler.reset(handler); diff --git a/level_zero/tools/test/unit_tests/sources/debug/mock_debug_session.h b/level_zero/tools/test/unit_tests/sources/debug/mock_debug_session.h index 2b1d3070e8..ef1889dca5 100644 --- a/level_zero/tools/test/unit_tests/sources/debug/mock_debug_session.h +++ b/level_zero/tools/test/unit_tests/sources/debug/mock_debug_session.h @@ -298,7 +298,6 @@ struct MockDebugSession : public L0::DebugSessionImp { [[maybe_unused]] auto offset = ptrDiff(gpuVa, reinterpret_cast(stateSaveAreaHeader.data())); memcpy_s(reinterpret_cast(gpuVa), size, input, size); } - return writeMemoryResult; } diff --git a/shared/source/os_interface/linux/i915_prelim.h b/shared/source/os_interface/linux/i915_prelim.h index 739ca8b093..07391b548a 100644 --- a/shared/source/os_interface/linux/i915_prelim.h +++ b/shared/source/os_interface/linux/i915_prelim.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2022 Intel Corporation + * Copyright (C) 2022-2023 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -51,6 +51,7 @@ using NEO::PrelimI915::prelim_drm_i915_debug_event_context; using NEO::PrelimI915::prelim_drm_i915_debug_event_context_param; using NEO::PrelimI915::prelim_drm_i915_debug_event_engines; using NEO::PrelimI915::prelim_drm_i915_debug_event_eu_attention; +using NEO::PrelimI915::prelim_drm_i915_debug_event_page_fault; using NEO::PrelimI915::prelim_drm_i915_debug_event_uuid; using NEO::PrelimI915::prelim_drm_i915_debug_event_vm; using NEO::PrelimI915::prelim_drm_i915_debug_event_vm_bind; diff --git a/third_party/uapi/prelim/drm/i915_drm_prelim.h b/third_party/uapi/prelim/drm/i915_drm_prelim.h index 25e5318042..5e38ec915c 100644 --- a/third_party/uapi/prelim/drm/i915_drm_prelim.h +++ b/third_party/uapi/prelim/drm/i915_drm_prelim.h @@ -772,7 +772,8 @@ struct prelim_drm_i915_debug_event { #define PRELIM_DRM_I915_DEBUG_EVENT_CONTEXT_PARAM 7 #define PRELIM_DRM_I915_DEBUG_EVENT_EU_ATTENTION 8 #define PRELIM_DRM_I915_DEBUG_EVENT_ENGINES 9 -#define PRELIM_DRM_I915_DEBUG_EVENT_MAX_EVENT PRELIM_DRM_I915_DEBUG_EVENT_ENGINES +#define PRELIM_DRM_I915_DEBUG_EVENT_PAGE_FAULT 10 +#define PRELIM_DRM_I915_DEBUG_EVENT_MAX_EVENT PRELIM_DRM_I915_DEBUG_EVENT_PAGE_FAULT __u32 flags; #define PRELIM_DRM_I915_DEBUG_EVENT_CREATE (1 << 31) @@ -870,6 +871,34 @@ struct prelim_drm_i915_debug_event_eu_attention { __u8 bitmask[0]; } __attribute__((packed)); +struct prelim_drm_i915_debug_event_page_fault { + struct prelim_drm_i915_debug_event base; + __u64 client_handle; + __u64 ctx_handle; + __u64 lrc_handle; + + __u32 flags; + + struct i915_engine_class_instance ci; + + __u64 page_fault_address; + + /** + * Size of one bitmask: sum of size before/after/resolved att bits. + * It has three times the size of prelim_drm_i915_debug_event_eu_attention.bitmask_size. + */ + __u32 bitmask_size; + + /** + * Bitmask of thread attentions starting from natural + * hardware order of slice=0,subslice=0,eu=0, 8 attention + * bits per eu. + * The order of the bitmask array is before, after, resolved. + */ + + __u8 bitmask[0]; +} __attribute__((packed)); + struct prelim_drm_i915_debug_read_uuid { __u64 client_handle; __u64 handle;