Implement GPU hang detection

This change uses DRM_IOCTL_I915_GET_RESET_STATS to detect
GPU hangs. When such situation is encountered, then
zeCommandQueueSynchronize returns ZE_RESULT_ERROR_DEVICE_LOST.

Related-To: NEO-5313
Signed-off-by: Patryk Wrobel <patryk.wrobel@intel.com>
This commit is contained in:
Patryk Wrobel
2022-01-20 16:56:19 +00:00
committed by Compute-Runtime-Automation
parent 543c854a3b
commit 498cf5e871
37 changed files with 556 additions and 101 deletions

View File

@@ -492,13 +492,13 @@ void TbxCommandStreamReceiverHw<GfxFamily>::flushSubmissionsAndDownloadAllocatio
}
template <typename GfxFamily>
void TbxCommandStreamReceiverHw<GfxFamily>::waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) {
WaitStatus TbxCommandStreamReceiverHw<GfxFamily>::waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) {
flushSubmissionsAndDownloadAllocations(taskCountToWait);
BaseClass::waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, useQuickKmdSleep, forcePowerSavingMode);
return BaseClass::waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, useQuickKmdSleep, forcePowerSavingMode);
}
template <typename GfxFamily>
bool TbxCommandStreamReceiverHw<GfxFamily>::waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait) {
WaitStatus TbxCommandStreamReceiverHw<GfxFamily>::waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait) {
flushSubmissionsAndDownloadAllocations(taskCountToWait);
return BaseClass::waitForCompletionWithTimeout(enableTimeout, timeoutMicroseconds, taskCountToWait);
}