Implement GPU hang detection

This change uses DRM_IOCTL_I915_GET_RESET_STATS to detect
GPU hangs. When such situation is encountered, then
zeCommandQueueSynchronize returns ZE_RESULT_ERROR_DEVICE_LOST.

Related-To: NEO-5313
Signed-off-by: Patryk Wrobel <patryk.wrobel@intel.com>
This commit is contained in:
Patryk Wrobel
2022-01-20 16:56:19 +00:00
committed by Compute-Runtime-Automation
parent 543c854a3b
commit 498cf5e871
37 changed files with 556 additions and 101 deletions

View File

@ -1487,7 +1487,7 @@ struct TestEventCsr : public UltCommandStreamReceiver<GfxFamily> {
TestEventCsr(const ExecutionEnvironment &executionEnvironment, const DeviceBitfield deviceBitfield)
: UltCommandStreamReceiver<GfxFamily>(const_cast<ExecutionEnvironment &>(executionEnvironment), 0, deviceBitfield) {}
bool waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMs, uint32_t taskCountToWait) override {
WaitStatus waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMs, uint32_t taskCountToWait) override {
waitForCompletionWithTimeoutCalled++;
waitForCompletionWithTimeoutParamsPassed.push_back({enableTimeout, timeoutMs, taskCountToWait});
return waitForCompletionWithTimeoutResult;
@ -1500,7 +1500,7 @@ struct TestEventCsr : public UltCommandStreamReceiver<GfxFamily> {
};
uint32_t waitForCompletionWithTimeoutCalled = 0u;
bool waitForCompletionWithTimeoutResult = true;
WaitStatus waitForCompletionWithTimeoutResult = WaitStatus::Ready;
StackVec<WaitForCompletionWithTimeoutParams, 1> waitForCompletionWithTimeoutParamsPassed{};
};