mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-21 09:14:47 +08:00
fix: Don't abort application due to gpu fault when debugging is enabled
Signed-off-by: Brandon Yates <brandon.yates@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
c306c457db
commit
106e8be9a9
@@ -253,7 +253,8 @@ bool Drm::checkResetStatus(OsContext &osContext) {
|
||||
uint32_t status = 0;
|
||||
const auto retVal{ioctlHelper->getResetStats(resetStats, &status, &fault)};
|
||||
UNRECOVERABLE_IF(retVal != 0);
|
||||
if (checkToDisableScratchPage() && ioctlHelper->validPageFault(fault.flags)) {
|
||||
auto debuggingEnabled = rootDeviceEnvironment.executionEnvironment.isDebuggingEnabled();
|
||||
if (!debuggingEnabled && checkToDisableScratchPage() && ioctlHelper->validPageFault(fault.flags)) {
|
||||
bool banned = ((status & ioctlHelper->getStatusForResetStats(true)) != 0);
|
||||
IoFunctions::fprintf(stderr, "Segmentation fault from GPU at 0x%llx, ctx_id: %u (%s) type: %d (%s), level: %d (%s), access: %d (%s), banned: %d, aborting.\n",
|
||||
fault.addr,
|
||||
|
||||
@@ -1469,6 +1469,39 @@ class MockIoctlHelperResetStats : public MockIoctlHelper {
|
||||
ResetStatsFault resetStatsFaultReturnValue{};
|
||||
};
|
||||
|
||||
TEST(DrmTest, GivenResetStatsWithValidFaultAndDebuggingEnabledWhenIsGpuHangIsCalledThenProcessNotTerminated) {
|
||||
DebugManagerStateRestore restore;
|
||||
debugManager.flags.DisableScratchPages.set(true);
|
||||
|
||||
MockExecutionEnvironment executionEnvironment{};
|
||||
DrmMock drm{*executionEnvironment.rootDeviceEnvironments[0]};
|
||||
executionEnvironment.setDebuggingMode(NEO::DebuggingMode::online);
|
||||
drm.configureScratchPagePolicy();
|
||||
drm.configureGpuFaultCheckThreshold();
|
||||
uint32_t contextId{0};
|
||||
EngineDescriptor engineDescriptor{EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_BCS, EngineUsage::regular})};
|
||||
auto ioctlHelper = std::make_unique<MockIoctlHelperResetStats>(drm);
|
||||
|
||||
MockOsContextLinux mockOsContextLinux{drm, 0, contextId, engineDescriptor};
|
||||
mockOsContextLinux.drmContextIds.push_back(0);
|
||||
|
||||
ResetStats resetStatsExpected{};
|
||||
ResetStatsFault resetStatsFaultExpected{};
|
||||
resetStatsExpected.contextId = 0;
|
||||
drm.resetStatsToReturn.push_back(resetStatsExpected);
|
||||
|
||||
resetStatsFaultExpected.flags = 1;
|
||||
resetStatsFaultExpected.addr = 0x1234;
|
||||
resetStatsFaultExpected.type = 2;
|
||||
resetStatsFaultExpected.level = 3;
|
||||
|
||||
ioctlHelper->statusReturnValue = 2u;
|
||||
ioctlHelper->resetStatsFaultReturnValue = resetStatsFaultExpected;
|
||||
|
||||
drm.ioctlHelper = std::move(ioctlHelper);
|
||||
EXPECT_FALSE(drm.isGpuHangDetected(mockOsContextLinux));
|
||||
}
|
||||
|
||||
TEST(DrmDeathTest, GivenResetStatsWithValidFaultWhenIsGpuHangIsCalledThenProcessTerminated) {
|
||||
DebugManagerStateRestore restore;
|
||||
debugManager.flags.DisableScratchPages.set(true);
|
||||
|
||||
Reference in New Issue
Block a user