fix: make gpuFaultCheckCounter more robust

Modified drm_neo.h and .cpp to check when condition is greater
than and equal to instead of equal, and changed gpuFaultCheckCounter
to be atomic

Related-To: GSD-5673
Signed-off-by: Young Jin Yoon <young.jin.yoon@intel.com>
This commit is contained in:
Young Jin Yoon
2024-03-15 07:52:00 +00:00
committed by Compute-Runtime-Automation
parent 470def9be6
commit 9633f49dab
2 changed files with 3 additions and 2 deletions

View File

@@ -251,7 +251,7 @@ int Drm::queryGttSize(uint64_t &gttSizeOutput) {
bool Drm::isGpuHangDetected(OsContext &osContext) {
bool ret = checkResetStatus(osContext);
if (gpuFaultCheckThreshold != 0) {
if (gpuFaultCheckCounter == gpuFaultCheckThreshold) {
if (gpuFaultCheckCounter >= gpuFaultCheckThreshold) {
auto memoryManager = static_cast<DrmMemoryManager *>(this->rootDeviceEnvironment.executionEnvironment.memoryManager.get());
memoryManager->checkUnexpectedGpuPageFault();
gpuFaultCheckCounter = 0;

View File

@@ -19,6 +19,7 @@
#include "igfxfmid.h"
#include <array>
#include <atomic>
#include <cstdint>
#include <limits>
#include <memory>
@@ -346,7 +347,7 @@ class Drm : public DriverModel {
bool disableScratch = false;
uint32_t gpuFaultCheckThreshold = 0u;
uint32_t gpuFaultCheckCounter = 0u;
std::atomic<uint32_t> gpuFaultCheckCounter{0u};
private:
int getParamIoctl(DrmParam param, int *dstValue);