From ec009cf9e338b8cfcd3a7b76d5a0ae95fbbd889e Mon Sep 17 00:00:00 2001 From: Young Jin Yoon Date: Thu, 21 Mar 2024 18:02:47 +0000 Subject: [PATCH] fix: abort only when disabling scratch page Modifed getResetStatus to abort only when scratch page is disabled Removed an incorrect UNRECOVERABLE_IF statement based on the status: validPageFault can be true when banned flag is not set, if CAT error does not occur as a result of page fault. Related-To: GSD-5673 Signed-off-by: Young Jin Yoon --- shared/source/os_interface/linux/drm_neo.cpp | 8 ++++---- shared/test/unit_test/os_interface/linux/drm_tests.cpp | 4 +++- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/shared/source/os_interface/linux/drm_neo.cpp b/shared/source/os_interface/linux/drm_neo.cpp index 31e276de08..a5554c816a 100644 --- a/shared/source/os_interface/linux/drm_neo.cpp +++ b/shared/source/os_interface/linux/drm_neo.cpp @@ -273,10 +273,10 @@ bool Drm::checkResetStatus(OsContext &osContext) { uint32_t status = 0; const auto retVal{ioctlHelper->getResetStats(resetStats, &status, &fault)}; UNRECOVERABLE_IF(retVal != 0); - if (ioctlHelper->validPageFault(fault.flags)) { - UNRECOVERABLE_IF((status & ioctlHelper->getStatusForResetStats(true)) == 0); - PRINT_DEBUG_STRING(debugManager.flags.PrintDebugMessages.get(), stderr, "ERROR: Unexpected page fault from GPU at 0x%llx, type: %d, level: %d, access: %d, aborting.\n", - fault.addr, fault.type, fault.level, fault.access); + if (disableScratch && ioctlHelper->validPageFault(fault.flags)) { + bool banned = ((status & ioctlHelper->getStatusForResetStats(true)) == 0); + PRINT_DEBUG_STRING(debugManager.flags.PrintDebugMessages.get(), stderr, "ERROR: Unexpected page fault from GPU at 0x%llx, type: %d, level: %d, access: %d, banned: %d, aborting.\n", + fault.addr, fault.type, fault.level, fault.access, banned); UNRECOVERABLE_IF(true); } if (resetStats.batchActive > 0 || resetStats.batchPending > 0) { diff --git a/shared/test/unit_test/os_interface/linux/drm_tests.cpp b/shared/test/unit_test/os_interface/linux/drm_tests.cpp index a029a495cc..0fd634182b 100644 --- a/shared/test/unit_test/os_interface/linux/drm_tests.cpp +++ b/shared/test/unit_test/os_interface/linux/drm_tests.cpp @@ -1408,8 +1408,10 @@ class MockIoctlHelperResetStats : public MockIoctlHelper { }; TEST(DrmDeathTest, GivenResetStatsWithValidFaultWhenIsGpuHangIsCalledThenProcessTerminated) { - MockExecutionEnvironment executionEnvironment{}; + DebugManagerStateRestore restore; + debugManager.flags.DisableScratchPages.set(true); + MockExecutionEnvironment executionEnvironment{}; DrmMock drm{*executionEnvironment.rootDeviceEnvironments[0]}; uint32_t contextId{0}; EngineDescriptor engineDescriptor{EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_BCS, EngineUsage::regular})};