fix: check for gpu hang during wait for ring completion

Related-To: NEO-13490

Signed-off-by: Szymon Morek <szymon.morek@intel.com>
This commit is contained in:
Szymon Morek
2025-01-09 15:34:25 +00:00
committed by Compute-Runtime-Automation
parent 3e9f1d0730
commit f3c9362fc5
7 changed files with 60 additions and 10 deletions

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2020-2024 Intel Corporation
* Copyright (C) 2020-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -82,6 +82,7 @@ struct MockDrmDirectSubmission : public DrmDirectSubmission<GfxFamily, Dispatche
using BaseClass::getSizeNewResourceHandler;
using BaseClass::getSizeSwitchRingBufferSection;
using BaseClass::getTagAddressValue;
using BaseClass::gpuHangCheckPeriod;
using BaseClass::handleNewResourcesSubmission;
using BaseClass::handleResidency;
using BaseClass::handleSwitchRingBuffers;
@@ -1098,3 +1099,28 @@ HWTEST_F(DrmDirectSubmissionTest,
executionEnvironment.memoryManager->freeGraphicsMemory(commandBuffer);
*drmDirectSubmission.tagAddress = 1;
}
HWTEST_F(DrmDirectSubmissionTest, givenGpuHangWhenWaitCalledThenGpuHangDetected) {
using Dispatcher = RenderDispatcher<FamilyType>;
VariableBackup<bool> backupWaitpkgUse(&WaitUtils::waitpkgUse, false);
VariableBackup<uint32_t> backupWaitCount(&WaitUtils::waitCount, 1);
MockDrmDirectSubmission<FamilyType, Dispatcher> directSubmission(*device->getDefaultEngine().commandStreamReceiver);
directSubmission.gpuHangCheckPeriod = {};
bool ret = directSubmission.allocateResources();
EXPECT_TRUE(ret);
auto pollAddress = directSubmission.tagAddress;
*pollAddress = 0;
auto drm = static_cast<DrmMock *>(executionEnvironment.rootDeviceEnvironments[0]->osInterface->getDriverModel()->as<Drm>());
ResetStats resetStats{};
resetStats.contextId = 0;
resetStats.batchActive = 1;
drm->resetStatsToReturn.push_back(resetStats);
EXPECT_EQ(0, drm->ioctlCount.getResetStats);
directSubmission.wait(1);
EXPECT_EQ(1, drm->ioctlCount.getResetStats);
}