Implement GPU hang detection

This change uses DRM_IOCTL_I915_GET_RESET_STATS to detect GPU hangs. When such situation is encountered, then zeCommandQueueSynchronize returns ZE_RESULT_ERROR_DEVICE_LOST. Related-To: NEO-5313 Signed-off-by: Patryk Wrobel <patryk.wrobel@intel.com>
2025-12-25 13:33:02 +08:00 · 2022-01-20 16:56:19 +00:00
parent 543c854a3b
commit 498cf5e871
37 changed files with 556 additions and 101 deletions
--- a/shared/source/command_stream/aub_command_stream_receiver_hw.h
+++ b/shared/source/command_stream/aub_command_stream_receiver_hw.h
@@ -63,7 +63,7 @@ class AUBCommandStreamReceiverHw : public CommandStreamReceiverSimulatedHw<GfxFa
    MOCKABLE_VIRTUAL void submitBatchBufferAub(uint64_t batchBufferGpuAddress, const void *batchBuffer, size_t batchBufferSize, uint32_t memoryBank, uint64_t entryBits);
    void pollForCompletion() override;
    void pollForCompletionImpl() override;
-    void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) override;
+    WaitStatus waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) override;

    uint32_t getDumpHandle();
    MOCKABLE_VIRTUAL void addContextToken(uint32_t dumpHandle);
--- a/shared/source/command_stream/aub_command_stream_receiver_hw_base.inl
+++ b/shared/source/command_stream/aub_command_stream_receiver_hw_base.inl
@@ -606,9 +606,11 @@ void AUBCommandStreamReceiverHw<GfxFamily>::pollForCompletionImpl() {
 }

 template <typename GfxFamily>
-inline void AUBCommandStreamReceiverHw<GfxFamily>::waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) {
-    CommandStreamReceiverSimulatedHw<GfxFamily>::waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, useQuickKmdSleep, forcePowerSavingMode);
+inline WaitStatus AUBCommandStreamReceiverHw<GfxFamily>::waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) {
+    const auto result = CommandStreamReceiverSimulatedHw<GfxFamily>::waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, useQuickKmdSleep, forcePowerSavingMode);
    pollForCompletion();
+
+    return result;
 }

 template <typename GfxFamily>
--- a/shared/source/command_stream/command_stream_receiver.cpp
+++ b/shared/source/command_stream/command_stream_receiver.cpp
@@ -227,6 +227,10 @@ bool CommandStreamReceiver::skipResourceCleanup() const {
    return this->getOSInterface() && this->getOSInterface()->getDriverModel() && this->getOSInterface()->getDriverModel()->skipResourceCleanup();
 }

+bool CommandStreamReceiver::isGpuHangDetected() const {
+    return this->getOSInterface() && this->getOSInterface()->getDriverModel() && this->getOSInterface()->getDriverModel()->isGpuHangDetected(osContext->getContextId());
+}
+
 void CommandStreamReceiver::cleanupResources() {
    if (this->skipResourceCleanup()) {
        return;
@@ -286,19 +290,21 @@ void CommandStreamReceiver::cleanupResources() {
    }
 }

-bool CommandStreamReceiver::waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait) {
+WaitStatus CommandStreamReceiver::waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait) {
    uint32_t latestSentTaskCount = this->latestFlushedTaskCount;
    if (latestSentTaskCount < taskCountToWait) {
        if (!this->flushBatchedSubmissions()) {
-            return false;
+            const auto isGpuHang{isGpuHangDetected()};
+            return isGpuHang ? WaitStatus::GpuHang : WaitStatus::NotReady;
        }
    }

    return baseWaitFunction(getTagAddress(), enableTimeout, timeoutMicroseconds, taskCountToWait);
 }

-bool CommandStreamReceiver::baseWaitFunction(volatile uint32_t *pollAddress, bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait) {
-    std::chrono::high_resolution_clock::time_point time1, time2;
+WaitStatus CommandStreamReceiver::baseWaitFunction(volatile uint32_t *pollAddress, bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait) {
+    std::chrono::microseconds elapsedTimeSinceGpuHangCheck{0};
+    std::chrono::high_resolution_clock::time_point waitStartTime, lastHangCheckTime, currentTime;
    int64_t timeDiff = 0;

    uint32_t latestSentTaskCount = this->latestFlushedTaskCount;
@@ -308,23 +314,33 @@ bool CommandStreamReceiver::baseWaitFunction(volatile uint32_t *pollAddress, boo

    volatile uint32_t *partitionAddress = pollAddress;

-    time1 = std::chrono::high_resolution_clock::now();
+    waitStartTime = std::chrono::high_resolution_clock::now();
+    lastHangCheckTime = waitStartTime;
    for (uint32_t i = 0; i < activePartitions; i++) {
        while (*partitionAddress < taskCountToWait && timeDiff <= timeoutMicroseconds) {
            if (WaitUtils::waitFunction(partitionAddress, taskCountToWait)) {
                break;
            }

+            currentTime = std::chrono::high_resolution_clock::now();
+            elapsedTimeSinceGpuHangCheck = std::chrono::duration_cast<std::chrono::microseconds>(currentTime - lastHangCheckTime);
+
+            if (elapsedTimeSinceGpuHangCheck.count() >= gpuHangCheckPeriod.count()) {
+                lastHangCheckTime = currentTime;
+                if (isGpuHangDetected()) {
+                    return WaitStatus::GpuHang;
+                }
+            }
+
            if (enableTimeout) {
-                time2 = std::chrono::high_resolution_clock::now();
-                timeDiff = std::chrono::duration_cast<std::chrono::microseconds>(time2 - time1).count();
+                timeDiff = std::chrono::duration_cast<std::chrono::microseconds>(currentTime - waitStartTime).count();
            }
        }

        partitionAddress = ptrOffset(partitionAddress, this->postSyncWriteOffset);
    }

-    return testTaskCountReady(pollAddress, taskCountToWait);
+    return testTaskCountReady(pollAddress, taskCountToWait) ? WaitStatus::Ready : WaitStatus::NotReady;
 }

 void CommandStreamReceiver::setTagAllocation(GraphicsAllocation *allocation) {
--- a/shared/source/command_stream/command_stream_receiver.h
+++ b/shared/source/command_stream/command_stream_receiver.h
@@ -26,6 +26,7 @@
 #include "shared/source/os_interface/os_thread.h"
 #include "shared/source/utilities/spinlock.h"

+#include <chrono>
 #include <cstddef>
 #include <cstdint>

@@ -63,6 +64,12 @@ enum class DispatchMode {
    BatchedDispatch             // dispatching is batched, explicit clFlush is required
 };

+enum class WaitStatus {
+    NotReady = 0,
+    Ready = 1,
+    GpuHang = 2,
+};
+
 class CommandStreamReceiver {
  public:
    enum class SamplerCacheFlushState {
@@ -158,9 +165,9 @@ class CommandStreamReceiver {
    void requestStallingCommandsOnNextFlush() { stallingCommandsOnNextFlushRequired = true; }
    bool isStallingCommandsOnNextFlushRequired() const { return stallingCommandsOnNextFlushRequired; }

-    virtual void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) = 0;
-    virtual bool waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait);
-    bool baseWaitFunction(volatile uint32_t *pollAddress, bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait);
+    virtual WaitStatus waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) = 0;
+    virtual WaitStatus waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait);
+    WaitStatus baseWaitFunction(volatile uint32_t *pollAddress, bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait);
    bool testTaskCountReady(volatile uint32_t *pollAddress, uint32_t taskCountToWait);
    virtual void downloadAllocations(){};

@@ -316,6 +323,7 @@ class CommandStreamReceiver {
    void printDeviceIndex();
    void checkForNewResources(uint32_t submittedTaskCount, uint32_t allocationTaskCount, GraphicsAllocation &gfxAllocation);
    bool checkImplicitFlushForGpuIdle();
+    bool isGpuHangDetected() const;
    MOCKABLE_VIRTUAL std::unique_lock<MutexType> obtainHostPtrSurfaceCreationLock();

    std::unique_ptr<FlushStampTracker> flushStamp;
@@ -373,6 +381,7 @@ class CommandStreamReceiver {
    SamplerCacheFlushState samplerCacheFlushRequired = SamplerCacheFlushState::samplerCacheFlushNotRequired;
    PreemptionMode lastPreemptionMode = PreemptionMode::Initial;

+    std::chrono::microseconds gpuHangCheckPeriod{500'000};
    uint32_t lastSentL3Config = 0;
    uint32_t latestSentStatelessMocsConfig = 0;
    uint32_t lastSentNumGrfRequired = GrfConfig::DefaultGrfNumber;
--- a/shared/source/command_stream/command_stream_receiver_hw.h
+++ b/shared/source/command_stream/command_stream_receiver_hw.h
@@ -77,7 +77,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
    bool isPipelineSelectAlreadyProgrammed() const;
    void programComputeMode(LinearStream &csr, DispatchFlags &dispatchFlags, const HardwareInfo &hwInfo);

-    void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) override;
+    WaitStatus waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) override;

    void collectStateBaseAddresPatchInfo(
        uint64_t commandBufferAddress,
--- a/shared/source/command_stream/command_stream_receiver_hw_base.inl
+++ b/shared/source/command_stream/command_stream_receiver_hw_base.inl
@@ -859,7 +859,7 @@ inline size_t CommandStreamReceiverHw<GfxFamily>::getCmdSizeForPipelineSelect()
 }

 template <typename GfxFamily>
-inline void CommandStreamReceiverHw<GfxFamily>::waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) {
+inline WaitStatus CommandStreamReceiverHw<GfxFamily>::waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) {
    int64_t waitTimeout = 0;
    bool enableTimeout = false;

@@ -870,12 +870,18 @@ inline void CommandStreamReceiverHw<GfxFamily>::waitForTaskCountWithKmdNotifyFal
                       "\nWaiting for task count %u at location %p. Current value: %u\n",
                       taskCountToWait, getTagAddress(), *getTagAddress());

-    bool status = waitForCompletionWithTimeout(enableTimeout, waitTimeout, taskCountToWait);
-    if (!status) {
+    auto status = waitForCompletionWithTimeout(enableTimeout, waitTimeout, taskCountToWait);
+    if (status == WaitStatus::NotReady) {
        waitForFlushStamp(flushStampToWait);
        //now call blocking wait, this is to ensure that task count is reached
        status = waitForCompletionWithTimeout(false, 0, taskCountToWait);
    }
+
+    // If GPU hang occured, then propagate it to the caller.
+    if (status == WaitStatus::GpuHang) {
+        return status;
+    }
+
    UNRECOVERABLE_IF(*getTagAddress() < taskCountToWait);

    if (kmdNotifyHelper->quickKmdSleepForSporadicWaitsEnabled()) {
@@ -884,6 +890,8 @@ inline void CommandStreamReceiverHw<GfxFamily>::waitForTaskCountWithKmdNotifyFal

    PRINT_DEBUG_STRING(DebugManager.flags.LogWaitingForCompletion.get(), stdout,
                       "\nWaiting completed. Current value: %u\n", *getTagAddress());
+
+    return WaitStatus::Ready;
 }

 template <typename GfxFamily>
--- a/shared/source/command_stream/command_stream_receiver_with_aub_dump.h
+++ b/shared/source/command_stream/command_stream_receiver_with_aub_dump.h
@@ -39,8 +39,8 @@ class CommandStreamReceiverWithAUBDump : public BaseCSR {
        return CommandStreamReceiverType::CSR_HW_WITH_AUB;
    }

-    void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait,
-                                               bool useQuickKmdSleep, bool forcePowerSavingMode) override;
+    WaitStatus waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait,
+                                                     bool useQuickKmdSleep, bool forcePowerSavingMode) override;

    size_t getPreferredTagPoolSize() const override { return 1; }

--- a/shared/source/command_stream/command_stream_receiver_with_aub_dump.inl
+++ b/shared/source/command_stream/command_stream_receiver_with_aub_dump.inl
@@ -78,13 +78,13 @@ void CommandStreamReceiverWithAUBDump<BaseCSR>::setupContext(OsContext &osContex
 }

 template <typename BaseCSR>
-void CommandStreamReceiverWithAUBDump<BaseCSR>::waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait,
-                                                                                      bool useQuickKmdSleep, bool forcePowerSavingMode) {
+WaitStatus CommandStreamReceiverWithAUBDump<BaseCSR>::waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait,
+                                                                                            bool useQuickKmdSleep, bool forcePowerSavingMode) {
    if (aubCSR) {
        aubCSR->waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, useQuickKmdSleep, forcePowerSavingMode);
    }

-    BaseCSR::waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, useQuickKmdSleep, forcePowerSavingMode);
+    return BaseCSR::waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, useQuickKmdSleep, forcePowerSavingMode);
 }

 template <typename BaseCSR>
--- a/shared/source/command_stream/tbx_command_stream_receiver_hw.h
+++ b/shared/source/command_stream/tbx_command_stream_receiver_hw.h
@@ -42,8 +42,8 @@ class TbxCommandStreamReceiverHw : public CommandStreamReceiverSimulatedHw<GfxFa

    SubmissionStatus flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) override;

-    void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) override;
-    bool waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait) override;
+    WaitStatus waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) override;
+    WaitStatus waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait) override;
    void downloadAllocations() override;
    void downloadAllocation(GraphicsAllocation &gfxAllocation) override;

--- a/shared/source/command_stream/tbx_command_stream_receiver_hw.inl
+++ b/shared/source/command_stream/tbx_command_stream_receiver_hw.inl
@@ -492,13 +492,13 @@ void TbxCommandStreamReceiverHw<GfxFamily>::flushSubmissionsAndDownloadAllocatio
 }

 template <typename GfxFamily>
-void TbxCommandStreamReceiverHw<GfxFamily>::waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) {
+WaitStatus TbxCommandStreamReceiverHw<GfxFamily>::waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) {
    flushSubmissionsAndDownloadAllocations(taskCountToWait);
-    BaseClass::waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, useQuickKmdSleep, forcePowerSavingMode);
+    return BaseClass::waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, useQuickKmdSleep, forcePowerSavingMode);
 }

 template <typename GfxFamily>
-bool TbxCommandStreamReceiverHw<GfxFamily>::waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait) {
+WaitStatus TbxCommandStreamReceiverHw<GfxFamily>::waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait) {
    flushSubmissionsAndDownloadAllocations(taskCountToWait);
    return BaseClass::waitForCompletionWithTimeout(enableTimeout, timeoutMicroseconds, taskCountToWait);
 }
--- a/shared/source/os_interface/linux/drm_neo.cpp
+++ b/shared/source/os_interface/linux/drm_neo.cpp
@@ -317,6 +317,28 @@ int Drm::queryGttSize(uint64_t &gttSizeOutput) {
    return ret;
 }

+bool Drm::isGpuHangDetected(uint32_t contextId) {
+    const auto &engines = this->rootDeviceEnvironment.executionEnvironment.memoryManager->getRegisteredEngines();
+    UNRECOVERABLE_IF(engines.size() <= contextId);
+
+    const auto osContextLinux = static_cast<OsContextLinux *>(engines[contextId].osContext);
+    const auto &drmContextIds = osContextLinux->getDrmContextIds();
+
+    for (const auto drmContextId : drmContextIds) {
+        drm_i915_reset_stats reset_stats{};
+        reset_stats.ctx_id = drmContextId;
+
+        const auto retVal{ioctl(DRM_IOCTL_I915_GET_RESET_STATS, &reset_stats)};
+        UNRECOVERABLE_IF(retVal != 0);
+
+        if (reset_stats.batch_active > 0 || reset_stats.batch_pending > 0) {
+            return true;
+        }
+    }
+
+    return false;
+}
+
 void Drm::checkPreemptionSupport() {
    int value = 0;
    auto ret = getParamIoctl(I915_PARAM_HAS_SCHEDULER, &value);
--- a/shared/source/os_interface/linux/drm_neo.h
+++ b/shared/source/os_interface/linux/drm_neo.h
@@ -148,6 +148,7 @@ class Drm : public DriverModel {
    MOCKABLE_VIRTUAL void getPrelimVersion(std::string &prelimVersion);

    PhysicalDevicePciBusInfo getPciBusInfo() const override;
+    bool isGpuHangDetected(uint32_t contextId) override;

    bool areNonPersistentContextsSupported() const { return nonPersistentContextsSupported; }
    void checkNonPersistentContextsSupport();
--- a/shared/source/os_interface/os_interface.h
+++ b/shared/source/os_interface/os_interface.h
@@ -85,6 +85,10 @@ class DriverModel : public NonCopyableClass {
        return false;
    }

+    virtual bool isGpuHangDetected(uint32_t contextId) {
+        return false;
+    }
+
  protected:
    DriverModelType driverModelType;
 };
--- a/shared/source/os_interface/windows/os_interface_win.cpp
+++ b/shared/source/os_interface/windows/os_interface_win.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2018-2021 Intel Corporation
+ * Copyright (C) 2018-2022 Intel Corporation
 *
 * SPDX-License-Identifier: MIT
 *
@@ -17,4 +17,5 @@ bool OSInterface::requiresSupportForWddmTrimNotification = true;
 bool OSInterface::isDebugAttachAvailable() const {
    return false;
 }
+
 } // namespace NEO
--- a/shared/test/common/libult/linux/drm_mock.cpp
+++ b/shared/test/common/libult/linux/drm_mock.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2019-2021 Intel Corporation
+ * Copyright (C) 2019-2022 Intel Corporation
 *
 * SPDX-License-Identifier: MIT
 *
@@ -204,6 +204,18 @@ int DrmMock::ioctl(unsigned long request, void *arg) {
    if (request == DRM_IOCTL_GEM_CLOSE) {
        return 0;
    }
+    if (request == DRM_IOCTL_I915_GET_RESET_STATS && arg != nullptr) {
+        auto outResetStats = static_cast<drm_i915_reset_stats *>(arg);
+        for (const auto &resetStats : resetStatsToReturn) {
+            if (resetStats.ctx_id == outResetStats->ctx_id) {
+                *outResetStats = resetStats;
+                return 0;
+            }
+        }
+
+        return -1;
+    }
+
    if (request == DRM_IOCTL_I915_QUERY && arg != nullptr) {
        auto queryArg = static_cast<drm_i915_query *>(arg);
        auto queryItemArg = reinterpret_cast<drm_i915_query_item *>(queryArg->items_ptr);
--- a/shared/test/common/libult/linux/drm_mock.h
+++ b/shared/test/common/libult/linux/drm_mock.h
@@ -17,6 +17,7 @@
 #include <fstream>
 #include <limits.h>
 #include <map>
+#include <vector>

 using namespace NEO;

@@ -145,11 +146,11 @@ class DrmMock : public Drm {
    int storedExecSoftPin = 0;
    int storedRetValForVmId = 1;
    int storedCsTimestampFrequency = 1000;
-
    bool disableSomeTopology = false;
    bool allowDebugAttach = false;
    bool allowDebugAttachCallBase = false;
    uint32_t passedContextDebugId = std::numeric_limits<uint32_t>::max();
+    std::vector<drm_i915_reset_stats> resetStatsToReturn{};

    drm_i915_gem_context_create_ext_setparam receivedContextCreateSetParam = {};
    uint32_t receivedContextCreateFlags = 0;
--- a/shared/test/common/libult/ult_command_stream_receiver.h
+++ b/shared/test/common/libult/ult_command_stream_receiver.h
@@ -18,6 +18,7 @@

 #include <map>
 #include <memory>
+#include <optional>

 namespace NEO {

@@ -77,6 +78,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
    using BaseClass::CommandStreamReceiver::experimentalCmdBuffer;
    using BaseClass::CommandStreamReceiver::flushStamp;
    using BaseClass::CommandStreamReceiver::globalFenceAllocation;
+    using BaseClass::CommandStreamReceiver::gpuHangCheckPeriod;
    using BaseClass::CommandStreamReceiver::GSBAFor32BitProgrammed;
    using BaseClass::CommandStreamReceiver::initDirectSubmission;
    using BaseClass::CommandStreamReceiver::internalAllocationStorage;
@@ -122,7 +124,8 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
                             uint32_t rootDeviceIndex,
                             const DeviceBitfield deviceBitfield)
        : BaseClass(executionEnvironment, rootDeviceIndex, deviceBitfield), recursiveLockCounter(0),
-          recordedDispatchFlags(DispatchFlagsHelper::createDefaultDispatchFlags()) {}
+          recordedDispatchFlags(DispatchFlagsHelper::createDefaultDispatchFlags()) {
+    }
    static CommandStreamReceiver *create(bool withAubDump,
                                         ExecutionEnvironment &executionEnvironment,
                                         uint32_t rootDeviceIndex,
@@ -169,7 +172,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
        downloadAllocationCalled = true;
    }

-    bool waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait) override {
+    WaitStatus waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait) override {
        latestWaitForCompletionWithTimeoutTaskCount.store(taskCountToWait);
        waitForCompletionWithTimeoutTaskCountCalled++;
        if (callBaseWaitForCompletionWithTimeout) {
@@ -222,6 +225,11 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
    }
    bool flushBatchedSubmissions() override {
        flushBatchedSubmissionsCalled = true;
+
+        if (shouldFailFlushBatchedSubmissions) {
+            return false;
+        }
+
        return CommandStreamReceiverHw<GfxFamily>::flushBatchedSubmissions();
    }
    void initProgrammingFlags() override {
@@ -328,6 +336,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
    bool blitterDirectSubmissionAvailable = false;
    bool callBaseIsMultiOsContextCapable = false;
    bool callBaseWaitForCompletionWithTimeout = true;
-    bool returnWaitForCompletionWithTimeout = true;
+    bool shouldFailFlushBatchedSubmissions = false;
+    WaitStatus returnWaitForCompletionWithTimeout = WaitStatus::Ready;
 };
 } // namespace NEO
--- a/shared/test/common/mocks/CMakeLists.txt
+++ b/shared/test/common/mocks/CMakeLists.txt
@@ -45,6 +45,7 @@ set(NEO_CORE_tests_mocks
    ${CMAKE_CURRENT_SOURCE_DIR}/mock_direct_submission_diagnostic_collector.h
    ${CMAKE_CURRENT_SOURCE_DIR}/mock_direct_submission_hw.h
    ${CMAKE_CURRENT_SOURCE_DIR}/mock_dispatch_kernel_encoder_interface.h
+    ${CMAKE_CURRENT_SOURCE_DIR}/mock_driver_model.h
    ${CMAKE_CURRENT_SOURCE_DIR}/mock_elf.h
    ${CMAKE_CURRENT_SOURCE_DIR}/mock_execution_environment.h
    ${CMAKE_CURRENT_SOURCE_DIR}/mock_experimental_command_buffer.h
@@ -105,6 +106,7 @@ else()
       ${CMAKE_CURRENT_SOURCE_DIR}/linux/mock_drm_allocation.h
       ${CMAKE_CURRENT_SOURCE_DIR}/linux/mock_drm_command_stream_receiver.h
       ${CMAKE_CURRENT_SOURCE_DIR}/linux/mock_drm_memory_manager.h
+       ${CMAKE_CURRENT_SOURCE_DIR}/linux/mock_os_context_linux.h
  )
 endif()

--- a/shared/test/common/mocks/linux/mock_os_context_linux.h
+++ b/shared/test/common/mocks/linux/mock_os_context_linux.h
@@ -0,0 +1,16 @@
+/*
+ * Copyright (C) 2022 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#pragma once
+
+#include "shared/source/os_interface/linux/os_context_linux.h"
+
+class MockOsContextLinux : public NEO::OsContextLinux {
+  public:
+    using NEO::OsContextLinux::drmContextIds;
+    using NEO::OsContextLinux::OsContextLinux;
+};
--- a/shared/test/common/mocks/mock_aub_csr.h
+++ b/shared/test/common/mocks/mock_aub_csr.h
@@ -121,8 +121,8 @@ struct MockAubCsr : public AUBCommandStreamReceiverHw<GfxFamily> {
        expectMemoryCompressedCalled = true;
        return AUBCommandStreamReceiverHw<GfxFamily>::expectMemoryCompressed(gfxAddress, srcAddress, length);
    }
-    bool waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait) override {
-        return true;
+    WaitStatus waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait) override {
+        return NEO::WaitStatus::Ready;
    }
    void addAubComment(const char *message) override {
        AUBCommandStreamReceiverHw<GfxFamily>::addAubComment(message);
--- a/shared/test/common/mocks/mock_command_stream_receiver.h
+++ b/shared/test/common/mocks/mock_command_stream_receiver.h
@@ -50,9 +50,9 @@ class MockCommandStreamReceiver : public CommandStreamReceiver {
        memset(const_cast<uint32_t *>(CommandStreamReceiver::tagAddress), 0xFFFFFFFF, tagSize * sizeof(uint32_t));
    }

-    bool waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait) override {
+    WaitStatus waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait) override {
        waitForCompletionWithTimeoutCalled++;
-        return true;
+        return NEO::WaitStatus::Ready;
    }
    SubmissionStatus flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) override;

@@ -86,7 +86,8 @@ class MockCommandStreamReceiver : public CommandStreamReceiver {
        return true;
    }

-    void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool quickKmdSleep, bool forcePowerSavingMode) override {
+    WaitStatus waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool quickKmdSleep, bool forcePowerSavingMode) override {
+        return WaitStatus::Ready;
    }

    uint32_t blitBuffer(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) override { return taskCount; };
@@ -197,7 +198,8 @@ class MockCsrHw2 : public CommandStreamReceiverHw<GfxFamily> {
    using CommandStreamReceiver::useNewResourceImplicitFlush;

    MockCsrHw2(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex, const DeviceBitfield deviceBitfield)
-        : CommandStreamReceiverHw<GfxFamily>::CommandStreamReceiverHw(executionEnvironment, rootDeviceIndex, deviceBitfield) {}
+        : CommandStreamReceiverHw<GfxFamily>::CommandStreamReceiverHw(executionEnvironment, rootDeviceIndex, deviceBitfield) {
+    }

    SubmissionAggregator *peekSubmissionAggregator() {
        return this->submissionAggregator.get();
--- a/shared/test/common/mocks/mock_driver_model.h
+++ b/shared/test/common/mocks/mock_driver_model.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2022 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#pragma once
+
+#include "shared/source/os_interface/driver_info.h"
+#include "shared/source/os_interface/os_interface.h"
+
+#include <cstdint>
+#include <functional>
+
+class MockDriverModel : public NEO::DriverModel {
+  public:
+    MockDriverModel() : NEO::DriverModel(NEO::DriverModelType::UNKNOWN) {}
+
+    void setGmmInputArgs(void *args) override {}
+
+    uint32_t getDeviceHandle() const override { return {}; }
+
+    NEO::PhysicalDevicePciBusInfo getPciBusInfo() const override { return pciBusInfo; }
+
+    size_t getMaxMemAllocSize() const override {
+        return 0;
+    }
+
+    bool isGpuHangDetected(uint32_t contextId) override {
+        if (isGpuHangDetectedSideEffect) {
+            std::invoke(isGpuHangDetectedSideEffect);
+        }
+
+        return isGpuHangDetectedToReturn;
+    }
+
+    NEO::PhysicalDevicePciBusInfo pciBusInfo{};
+    bool isGpuHangDetectedToReturn{};
+    std::function<void()> isGpuHangDetectedSideEffect{};
+};
--- a/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp
+++ b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp
@@ -13,6 +13,7 @@
 #include "shared/source/memory_manager/surface.h"
 #include "shared/source/os_interface/device_factory.h"
 #include "shared/source/os_interface/hw_info_config.h"
+#include "shared/source/os_interface/os_interface.h"
 #include "shared/source/utilities/tag_allocator.h"
 #include "shared/test/common/fixtures/device_fixture.h"
 #include "shared/test/common/helpers/debug_manager_state_restore.h"
@@ -20,6 +21,7 @@
 #include "shared/test/common/helpers/unit_test_helper.h"
 #include "shared/test/common/mocks/mock_allocation_properties.h"
 #include "shared/test/common/mocks/mock_csr.h"
+#include "shared/test/common/mocks/mock_driver_model.h"
 #include "shared/test/common/mocks/mock_execution_environment.h"
 #include "shared/test/common/mocks/mock_memory_manager.h"
 #include "shared/test/common/mocks/ult_device_factory.h"
@@ -30,10 +32,15 @@

 #include "gmock/gmock.h"

+#include <chrono>
+#include <functional>
+#include <limits>
+
 namespace NEO {
 extern ApiSpecificConfig::ApiType apiTypeForUlts;
 } // namespace NEO
 using namespace NEO;
+using namespace std::chrono_literals;

 struct CommandStreamReceiverTest : public DeviceFixture,
                                   public ::testing::Test {
@@ -165,6 +172,99 @@ HWTEST_F(CommandStreamReceiverTest, whenStoreAllocationThenStoredAllocationHasTa
    EXPECT_EQ(csr.peekTaskCount(), allocation->getTaskCount(csr.getOsContext().getContextId()));
 }

+HWTEST_F(CommandStreamReceiverTest, givenGpuHangWhenWaititingForCompletionWithTimeoutThenGpuHangIsReturned) {
+    auto driverModelMock = std::make_unique<MockDriverModel>();
+    driverModelMock->isGpuHangDetectedToReturn = true;
+
+    auto osInterface = std::make_unique<OSInterface>();
+    osInterface->setDriverModel(std::move(driverModelMock));
+
+    auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
+    csr.executionEnvironment.rootDeviceEnvironments[csr.rootDeviceIndex]->osInterface = std::move(osInterface);
+    csr.callBaseWaitForCompletionWithTimeout = true;
+    csr.activePartitions = 1;
+    csr.gpuHangCheckPeriod = 0us;
+
+    volatile std::uint32_t tasksCount[16] = {};
+    csr.tagAddress = tasksCount;
+
+    constexpr auto enableTimeout = false;
+    constexpr auto timeoutMicroseconds = std::numeric_limits<std::int64_t>::max();
+    constexpr auto taskCountToWait = 1;
+
+    const auto waitStatus = csr.waitForCompletionWithTimeout(enableTimeout, timeoutMicroseconds, taskCountToWait);
+    EXPECT_EQ(WaitStatus::GpuHang, waitStatus);
+}
+
+HWTEST_F(CommandStreamReceiverTest, givenNoGpuHangWhenWaititingForCompletionWithTimeoutThenReadyIsReturned) {
+    auto driverModelMock = std::make_unique<MockDriverModel>();
+    driverModelMock->isGpuHangDetectedToReturn = false;
+
+    volatile std::uint32_t tasksCount[16] = {};
+    driverModelMock->isGpuHangDetectedSideEffect = [&tasksCount] {
+        tasksCount[0]++;
+    };
+
+    auto osInterface = std::make_unique<OSInterface>();
+    osInterface->setDriverModel(std::move(driverModelMock));
+
+    auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
+    csr.executionEnvironment.rootDeviceEnvironments[csr.rootDeviceIndex]->osInterface = std::move(osInterface);
+    csr.callBaseWaitForCompletionWithTimeout = true;
+    csr.tagAddress = tasksCount;
+    csr.activePartitions = 1;
+    csr.gpuHangCheckPeriod = 0us;
+
+    constexpr auto enableTimeout = false;
+    constexpr auto timeoutMicroseconds = std::numeric_limits<std::int64_t>::max();
+    constexpr auto taskCountToWait = 1;
+
+    const auto waitStatus = csr.waitForCompletionWithTimeout(enableTimeout, timeoutMicroseconds, taskCountToWait);
+    EXPECT_EQ(WaitStatus::Ready, waitStatus);
+}
+
+HWTEST_F(CommandStreamReceiverTest, givenFailingFlushSubmissionsAndGpuHangWhenWaititingForCompletionWithTimeoutThenGpuHangIsReturned) {
+    auto driverModelMock = std::make_unique<MockDriverModel>();
+    driverModelMock->isGpuHangDetectedToReturn = true;
+
+    auto osInterface = std::make_unique<OSInterface>();
+    osInterface->setDriverModel(std::move(driverModelMock));
+
+    auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
+    csr.latestFlushedTaskCount = 0;
+    csr.shouldFailFlushBatchedSubmissions = true;
+    csr.executionEnvironment.rootDeviceEnvironments[csr.rootDeviceIndex]->osInterface = std::move(osInterface);
+    csr.callBaseWaitForCompletionWithTimeout = true;
+
+    constexpr auto enableTimeout = false;
+    constexpr auto timeoutMicroseconds = std::numeric_limits<std::int64_t>::max();
+    constexpr auto taskCountToWait = 1;
+
+    const auto waitStatus = csr.waitForCompletionWithTimeout(enableTimeout, timeoutMicroseconds, taskCountToWait);
+    EXPECT_EQ(WaitStatus::GpuHang, waitStatus);
+}
+
+HWTEST_F(CommandStreamReceiverTest, givenFailingFlushSubmissionsAndNoGpuHangWhenWaititingForCompletionWithTimeoutThenNotReadyIsReturned) {
+    auto driverModelMock = std::make_unique<MockDriverModel>();
+    driverModelMock->isGpuHangDetectedToReturn = false;
+
+    auto osInterface = std::make_unique<OSInterface>();
+    osInterface->setDriverModel(std::move(driverModelMock));
+
+    auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
+    csr.latestFlushedTaskCount = 0;
+    csr.shouldFailFlushBatchedSubmissions = true;
+    csr.executionEnvironment.rootDeviceEnvironments[csr.rootDeviceIndex]->osInterface = std::move(osInterface);
+    csr.callBaseWaitForCompletionWithTimeout = true;
+
+    constexpr auto enableTimeout = false;
+    constexpr auto timeoutMicroseconds = std::numeric_limits<std::int64_t>::max();
+    constexpr auto taskCountToWait = 1;
+
+    const auto waitStatus = csr.waitForCompletionWithTimeout(enableTimeout, timeoutMicroseconds, taskCountToWait);
+    EXPECT_EQ(WaitStatus::NotReady, waitStatus);
+}
+
 HWTEST_F(CommandStreamReceiverTest, givenCommandStreamReceiverWhenCheckedForInitialStatusOfStatelessMocsIndexThenUnknownMocsIsReturend) {
    auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
    EXPECT_EQ(CacheSettings::unknownMocs, csr.latestSentStatelessMocsConfig);
--- a/shared/test/unit_test/os_interface/device_uuid_tests.cpp
+++ b/shared/test/unit_test/os_interface/device_uuid_tests.cpp
@@ -9,6 +9,7 @@
 #include "shared/test/common/helpers/debug_manager_state_restore.h"
 #include "shared/test/common/helpers/ult_hw_config.h"
 #include "shared/test/common/mocks/mock_device.h"
+#include "shared/test/common/mocks/mock_driver_model.h"
 #include "shared/test/common/mocks/mock_execution_environment.h"
 #include "shared/test/common/mocks/mock_memory_manager.h"
 #include "shared/test/common/mocks/ult_device_factory.h"
@@ -28,17 +29,6 @@ class MockMemoryManagerOsAgnosticContext : public MockMemoryManager {
    }
 };

-struct MockDriverModel : NEO::DriverModel {
-    PhysicalDevicePciBusInfo pciBusInfo{};
-    MockDriverModel() : NEO::DriverModel(NEO::DriverModelType::UNKNOWN) {}
-    void setGmmInputArgs(void *args) override {}
-    uint32_t getDeviceHandle() const override { return {}; }
-    PhysicalDevicePciBusInfo getPciBusInfo() const override { return pciBusInfo; }
-    size_t getMaxMemAllocSize() const override {
-        return 0;
-    }
-};
-
 template <PRODUCT_FAMILY gfxProduct>
 class MockHwInfoConfigHw : public HwInfoConfigHw<gfxProduct> {
  public: