mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-15 13:01:45 +08:00
Implement GPU hang detection
This change uses DRM_IOCTL_I915_GET_RESET_STATS to detect GPU hangs. When such situation is encountered, then zeCommandQueueSynchronize returns ZE_RESULT_ERROR_DEVICE_LOST. Related-To: NEO-5313 Signed-off-by: Patryk Wrobel <patryk.wrobel@intel.com>
This commit is contained in:

committed by
Compute-Runtime-Automation

parent
543c854a3b
commit
498cf5e871
@ -1886,5 +1886,5 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenWaitForCompletionWithTimeoutI
|
||||
mockCsr.latestSentTaskCount = 1;
|
||||
auto cmdBuffer = std::make_unique<CommandBuffer>(*pDevice);
|
||||
mockCsr.submissionAggregator->recordCommandBuffer(cmdBuffer.release());
|
||||
EXPECT_FALSE(mockCsr.waitForCompletionWithTimeout(false, 0, 1));
|
||||
EXPECT_EQ(NEO::WaitStatus::NotReady, mockCsr.waitForCompletionWithTimeout(false, 0, 1));
|
||||
}
|
||||
|
@ -738,6 +738,6 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenTagValueNotMeetingTaskCountTo
|
||||
CpuIntrinsicsTests::pauseAddress = mockCsr->tagAddress;
|
||||
CpuIntrinsicsTests::pauseValue = taskCountToWait;
|
||||
|
||||
bool ret = mockCsr->waitForCompletionWithTimeout(false, 1, taskCountToWait);
|
||||
EXPECT_TRUE(ret);
|
||||
const auto ret = mockCsr->waitForCompletionWithTimeout(false, 1, taskCountToWait);
|
||||
EXPECT_EQ(NEO::WaitStatus::Ready, ret);
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2020-2021 Intel Corporation
|
||||
* Copyright (C) 2020-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@ -468,7 +468,7 @@ HWTEST_F(UltCommandStreamReceiverTest, givenComputeOverrideDisableWhenComputeSup
|
||||
HWTEST_F(UltCommandStreamReceiverTest, givenSinglePartitionWhenCallingWaitKmdNotifyThenExpectImplicitBusyLoopWaitCalled) {
|
||||
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
commandStreamReceiver.callBaseWaitForCompletionWithTimeout = false;
|
||||
commandStreamReceiver.returnWaitForCompletionWithTimeout = false;
|
||||
commandStreamReceiver.returnWaitForCompletionWithTimeout = NEO::WaitStatus::NotReady;
|
||||
|
||||
commandStreamReceiver.waitForTaskCountWithKmdNotifyFallback(0, 0, false, false);
|
||||
EXPECT_EQ(2u, commandStreamReceiver.waitForCompletionWithTimeoutTaskCountCalled);
|
||||
@ -477,7 +477,7 @@ HWTEST_F(UltCommandStreamReceiverTest, givenSinglePartitionWhenCallingWaitKmdNot
|
||||
HWTEST_F(UltCommandStreamReceiverTest, givenMultiplePartitionsWhenCallingWaitKmdNotifyThenExpectExplicitBusyLoopWaitCalled) {
|
||||
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
commandStreamReceiver.callBaseWaitForCompletionWithTimeout = false;
|
||||
commandStreamReceiver.returnWaitForCompletionWithTimeout = false;
|
||||
commandStreamReceiver.returnWaitForCompletionWithTimeout = NEO::WaitStatus::NotReady;
|
||||
|
||||
commandStreamReceiver.waitForTaskCountWithKmdNotifyFallback(0, 0, false, false);
|
||||
EXPECT_EQ(2u, commandStreamReceiver.waitForCompletionWithTimeoutTaskCountCalled);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2020-2021 Intel Corporation
|
||||
* Copyright (C) 2020-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@ -610,13 +610,14 @@ HWTEST_F(BcsTests, whenBlitFromHostPtrCalledThenCallWaitWithKmdFallback) {
|
||||
public:
|
||||
using UltCommandStreamReceiver<FamilyType>::UltCommandStreamReceiver;
|
||||
|
||||
void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait,
|
||||
bool useQuickKmdSleep, bool forcePowerSavingMode) override {
|
||||
WaitStatus waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait,
|
||||
bool useQuickKmdSleep, bool forcePowerSavingMode) override {
|
||||
waitForTaskCountWithKmdNotifyFallbackCalled++;
|
||||
taskCountToWaitPassed = taskCountToWait;
|
||||
flushStampToWaitPassed = flushStampToWait;
|
||||
useQuickKmdSleepPassed = useQuickKmdSleep;
|
||||
forcePowerSavingModePassed = forcePowerSavingMode;
|
||||
return WaitStatus::Ready;
|
||||
}
|
||||
|
||||
FlushStamp flushStampToWaitPassed = 0;
|
||||
|
Reference in New Issue
Block a user