mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-03 06:49:52 +08:00
refactor: Merge Ras prelim files with non-prelim files
Related-To: NEO-9469 Signed-off-by: Bellekallu Rajkiran <bellekallu.rajkiran@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
616ef4c9c7
commit
16725e2438
@@ -8,21 +8,9 @@ if(UNIX)
|
||||
target_sources(${L0_STATIC_LIB_NAME}
|
||||
PRIVATE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/sysman_os_ras_imp.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/sysman_os_ras_imp.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/sysman_os_ras_imp_gt.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/sysman_os_ras_imp_hbm.cpp
|
||||
)
|
||||
|
||||
if(NEO_ENABLE_i915_PRELIM_DETECTION)
|
||||
target_sources(${L0_STATIC_LIB_NAME}
|
||||
PRIVATE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/sysman_os_ras_imp_prelim.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/sysman_os_ras_imp_prelim.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/sysman_os_ras_imp_gt.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/sysman_os_ras_imp_hbm.cpp
|
||||
)
|
||||
else()
|
||||
target_sources(${L0_STATIC_LIB_NAME}
|
||||
PRIVATE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/sysman_os_ras_imp.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/sysman_os_ras_imp.h
|
||||
)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
@@ -8,35 +8,46 @@
|
||||
#include "level_zero/sysman/source/api/ras/linux/sysman_os_ras_imp.h"
|
||||
|
||||
#include "shared/source/debug_settings/debug_settings_manager.h"
|
||||
#include "shared/source/helpers/string.h"
|
||||
#include "shared/source/os_interface/linux/system_info.h"
|
||||
|
||||
#include "level_zero/sysman/source/shared/linux/zes_os_sysman_imp.h"
|
||||
|
||||
#include <cstring>
|
||||
#include "drm/intel_hwconfig_types.h"
|
||||
|
||||
namespace L0 {
|
||||
namespace Sysman {
|
||||
|
||||
LinuxRasImp::LinuxRasImp(OsSysman *pOsSysman, zes_ras_error_type_t type, ze_bool_t onSubdevice, uint32_t subdeviceId) : osRasErrorType(type), isSubdevice(onSubdevice), subdeviceId(subdeviceId) {
|
||||
pLinuxSysmanImp = static_cast<LinuxSysmanImp *>(pOsSysman);
|
||||
pFsAccess = &pLinuxSysmanImp->getFsAccess();
|
||||
static bool isMemoryTypeHbm(LinuxSysmanImp *pLinuxSysmanImp) {
|
||||
uint32_t memType = pLinuxSysmanImp->getMemoryType();
|
||||
if (memType == INTEL_HWCONFIG_MEMORY_TYPE_HBM2e || memType == INTEL_HWCONFIG_MEMORY_TYPE_HBM2) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void OsRas::getSupportedRasErrorTypes(std::set<zes_ras_error_type_t> &errorType, OsSysman *pOsSysman, ze_bool_t isSubDevice, uint32_t subDeviceId) {}
|
||||
void OsRas::getSupportedRasErrorTypes(std::set<zes_ras_error_type_t> &errorType, OsSysman *pOsSysman, ze_bool_t isSubDevice, uint32_t subDeviceId) {
|
||||
|
||||
ze_result_t LinuxRasImp::osRasGetState(zes_ras_state_t &state, ze_bool_t clear) {
|
||||
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
|
||||
constexpr auto maxErrorTypes = 2;
|
||||
LinuxRasSourceGt::getSupportedRasErrorTypes(errorType, pOsSysman, isSubDevice, subDeviceId);
|
||||
if (errorType.size() < maxErrorTypes) {
|
||||
auto pLinuxSysmanImp = static_cast<LinuxSysmanImp *>(pOsSysman);
|
||||
if (isMemoryTypeHbm(pLinuxSysmanImp) == true) {
|
||||
LinuxRasSourceHbm::getSupportedRasErrorTypes(errorType, pOsSysman, isSubDevice, subDeviceId);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ze_result_t LinuxRasImp::osRasGetConfig(zes_ras_config_t *config) {
|
||||
config->totalThreshold = totalThreshold;
|
||||
memcpy(config->detailedThresholds.category, categoryThreshold, maxRasErrorCategoryCount * sizeof(uint64_t));
|
||||
memcpy_s(config->detailedThresholds.category, maxRasErrorCategoryCount * sizeof(uint64_t), categoryThreshold, maxRasErrorCategoryCount * sizeof(uint64_t));
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
ze_result_t LinuxRasImp::osRasSetConfig(const zes_ras_config_t *config) {
|
||||
if (pFsAccess->isRootUser() == true) {
|
||||
totalThreshold = config->totalThreshold;
|
||||
memcpy(categoryThreshold, config->detailedThresholds.category, maxRasErrorCategoryCount * sizeof(uint64_t));
|
||||
memcpy_s(categoryThreshold, maxRasErrorCategoryCount * sizeof(uint64_t), config->detailedThresholds.category, maxRasErrorCategoryCount * sizeof(uint64_t));
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): Insufficient permissions and returning error:0x%x \n", __FUNCTION__, ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS);
|
||||
@@ -51,6 +62,42 @@ ze_result_t LinuxRasImp::osRasGetProperties(zes_ras_properties_t &properties) {
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
ze_result_t LinuxRasImp::osRasGetState(zes_ras_state_t &state, ze_bool_t clear) {
|
||||
if (clear == true) {
|
||||
if (pFsAccess->isRootUser() == false) {
|
||||
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): Insufficient permissions and returning error:0x%x \n", __FUNCTION__, ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS);
|
||||
return ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS;
|
||||
}
|
||||
}
|
||||
|
||||
ze_result_t result = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
|
||||
for (auto &rasSource : rasSources) {
|
||||
zes_ras_state_t localState = {};
|
||||
ze_result_t localResult = rasSource->osRasGetState(localState, clear);
|
||||
if (localResult != ZE_RESULT_SUCCESS) {
|
||||
continue;
|
||||
}
|
||||
for (uint32_t i = 0; i < maxRasErrorCategoryCount; i++) {
|
||||
state.category[i] += localState.category[i];
|
||||
}
|
||||
result = ZE_RESULT_SUCCESS;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
void LinuxRasImp::initSources() {
|
||||
rasSources.push_back(std::make_unique<L0::Sysman::LinuxRasSourceGt>(pLinuxSysmanImp, osRasErrorType, isSubdevice, subdeviceId));
|
||||
if (isMemoryTypeHbm(pLinuxSysmanImp) == true) {
|
||||
rasSources.push_back(std::make_unique<L0::Sysman::LinuxRasSourceHbm>(pLinuxSysmanImp, osRasErrorType, subdeviceId));
|
||||
}
|
||||
}
|
||||
|
||||
LinuxRasImp::LinuxRasImp(OsSysman *pOsSysman, zes_ras_error_type_t type, ze_bool_t onSubdevice, uint32_t subdeviceId) : osRasErrorType(type), isSubdevice(onSubdevice), subdeviceId(subdeviceId) {
|
||||
pLinuxSysmanImp = static_cast<LinuxSysmanImp *>(pOsSysman);
|
||||
pFsAccess = &pLinuxSysmanImp->getFsAccess();
|
||||
initSources();
|
||||
}
|
||||
|
||||
OsRas *OsRas::create(OsSysman *pOsSysman, zes_ras_error_type_t type, ze_bool_t onSubdevice, uint32_t subdeviceId) {
|
||||
LinuxRasImp *pLinuxRasImp = new LinuxRasImp(pOsSysman, type, onSubdevice, subdeviceId);
|
||||
return static_cast<OsRas *>(pLinuxRasImp);
|
||||
|
||||
@@ -9,12 +9,28 @@
|
||||
#include "shared/source/helpers/non_copyable_or_moveable.h"
|
||||
|
||||
#include "level_zero/sysman/source/api/ras/sysman_os_ras.h"
|
||||
#include "level_zero/sysman/source/device/sysman_device_imp.h"
|
||||
#include "level_zero/sysman/source/shared/linux/pmu/sysman_pmu_imp.h"
|
||||
#include "level_zero/sysman/source/shared/linux/sysman_fs_access.h"
|
||||
#include "level_zero/sysman/source/sysman_const.h"
|
||||
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace L0 {
|
||||
namespace Sysman {
|
||||
|
||||
class LinuxSysmanImp;
|
||||
class FirmwareUtil;
|
||||
|
||||
class LinuxRasSources : NEO::NonCopyableOrMovableClass {
|
||||
public:
|
||||
virtual ze_result_t osRasGetState(zes_ras_state_t &state, ze_bool_t clear) = 0;
|
||||
virtual ~LinuxRasSources() = default;
|
||||
};
|
||||
|
||||
class LinuxRasImp : public OsRas, NEO::NonCopyableOrMovableClass {
|
||||
public:
|
||||
ze_result_t osRasGetProperties(zes_ras_properties_t &properties) override;
|
||||
@@ -29,13 +45,70 @@ class LinuxRasImp : public OsRas, NEO::NonCopyableOrMovableClass {
|
||||
zes_ras_error_type_t osRasErrorType = {};
|
||||
FsAccess *pFsAccess = nullptr;
|
||||
LinuxSysmanImp *pLinuxSysmanImp = nullptr;
|
||||
std::vector<std::unique_ptr<L0::Sysman::LinuxRasSources>> rasSources = {};
|
||||
|
||||
private:
|
||||
void initSources();
|
||||
bool isSubdevice = false;
|
||||
uint32_t subdeviceId = 0;
|
||||
uint64_t totalThreshold = 0;
|
||||
uint64_t categoryThreshold[maxRasErrorCategoryCount] = {0};
|
||||
};
|
||||
|
||||
class LinuxRasSourceGt : public LinuxRasSources {
|
||||
public:
|
||||
ze_result_t osRasGetState(zes_ras_state_t &state, ze_bool_t clear) override;
|
||||
static void getSupportedRasErrorTypes(std::set<zes_ras_error_type_t> &errorType, OsSysman *pOsSysman, ze_bool_t isSubDevice, uint32_t subDeviceId);
|
||||
LinuxRasSourceGt(LinuxSysmanImp *pLinuxSysmanImp, zes_ras_error_type_t type, ze_bool_t onSubdevice, uint32_t subdeviceId);
|
||||
LinuxRasSourceGt() = default;
|
||||
~LinuxRasSourceGt() override;
|
||||
|
||||
protected:
|
||||
LinuxSysmanImp *pLinuxSysmanImp = nullptr;
|
||||
zes_ras_error_type_t osRasErrorType = {};
|
||||
PmuInterface *pPmuInterface = nullptr;
|
||||
FsAccess *pFsAccess = nullptr;
|
||||
SysfsAccess *pSysfsAccess = nullptr;
|
||||
|
||||
private:
|
||||
void initRasErrors(ze_bool_t clear);
|
||||
ze_result_t getPmuConfig(
|
||||
const std::string &eventDirectory,
|
||||
const std::vector<std::string> &listOfEvents,
|
||||
const std::string &errorFileToGetConfig,
|
||||
std::string &pmuConfig);
|
||||
ze_result_t getBootUpErrorCountFromSysfs(
|
||||
std::string nameOfError,
|
||||
const std::string &errorCounterDir,
|
||||
uint64_t &errorVal);
|
||||
void closeFds();
|
||||
int64_t groupFd = -1;
|
||||
std::vector<int64_t> memberFds = {};
|
||||
uint64_t initialErrorCount[maxRasErrorCategoryCount] = {0};
|
||||
std::map<zes_ras_error_cat_t, uint64_t> errorCategoryToEventCount;
|
||||
uint64_t totalEventCount = 0;
|
||||
bool isSubdevice = false;
|
||||
uint32_t subdeviceId = 0;
|
||||
};
|
||||
|
||||
class LinuxRasSourceHbm : public LinuxRasSources {
|
||||
public:
|
||||
ze_result_t osRasGetState(zes_ras_state_t &state, ze_bool_t clear) override;
|
||||
static void getSupportedRasErrorTypes(std::set<zes_ras_error_type_t> &errorType, OsSysman *pOsSysman, ze_bool_t isSubDevice, uint32_t subDeviceId);
|
||||
LinuxRasSourceHbm(LinuxSysmanImp *pLinuxSysmanImp, zes_ras_error_type_t type, uint32_t subdeviceId);
|
||||
LinuxRasSourceHbm() = default;
|
||||
~LinuxRasSourceHbm() override{};
|
||||
|
||||
protected:
|
||||
LinuxSysmanImp *pLinuxSysmanImp = nullptr;
|
||||
zes_ras_error_type_t osRasErrorType = {};
|
||||
FirmwareUtil *pFwInterface = nullptr;
|
||||
SysmanDeviceImp *pDevice = nullptr;
|
||||
|
||||
private:
|
||||
uint64_t errorBaseline = 0;
|
||||
uint32_t subdeviceId = 0;
|
||||
};
|
||||
|
||||
} // namespace Sysman
|
||||
} // namespace L0
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
|
||||
#include "shared/source/debug_settings/debug_settings_manager.h"
|
||||
|
||||
#include "level_zero/sysman/source/api/ras/linux/sysman_os_ras_imp_prelim.h"
|
||||
#include "level_zero/sysman/source/api/ras/linux/sysman_os_ras_imp.h"
|
||||
#include "level_zero/sysman/source/shared/linux/zes_os_sysman_imp.h"
|
||||
|
||||
#include <cstring>
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
#include "shared/source/debug_settings/debug_settings_manager.h"
|
||||
#include "shared/source/helpers/gfx_core_helper.h"
|
||||
|
||||
#include "level_zero/sysman/source/api/ras/linux/sysman_os_ras_imp_prelim.h"
|
||||
#include "level_zero/sysman/source/api/ras/linux/sysman_os_ras_imp.h"
|
||||
#include "level_zero/sysman/source/shared/firmware_util/sysman_firmware_util.h"
|
||||
#include "level_zero/sysman/source/shared/linux/zes_os_sysman_imp.h"
|
||||
|
||||
|
||||
@@ -1,107 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2023 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "level_zero/sysman/source/api/ras/linux/sysman_os_ras_imp_prelim.h"
|
||||
|
||||
#include "shared/source/debug_settings/debug_settings_manager.h"
|
||||
#include "shared/source/helpers/string.h"
|
||||
#include "shared/source/os_interface/linux/system_info.h"
|
||||
|
||||
#include "level_zero/sysman/source/shared/linux/zes_os_sysman_imp.h"
|
||||
|
||||
#include "drm/intel_hwconfig_types.h"
|
||||
|
||||
namespace L0 {
|
||||
namespace Sysman {
|
||||
|
||||
static bool isMemoryTypeHbm(LinuxSysmanImp *pLinuxSysmanImp) {
|
||||
uint32_t memType = pLinuxSysmanImp->getMemoryType();
|
||||
if (memType == INTEL_HWCONFIG_MEMORY_TYPE_HBM2e || memType == INTEL_HWCONFIG_MEMORY_TYPE_HBM2) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void OsRas::getSupportedRasErrorTypes(std::set<zes_ras_error_type_t> &errorType, OsSysman *pOsSysman, ze_bool_t isSubDevice, uint32_t subDeviceId) {
|
||||
|
||||
constexpr auto maxErrorTypes = 2;
|
||||
LinuxRasSourceGt::getSupportedRasErrorTypes(errorType, pOsSysman, isSubDevice, subDeviceId);
|
||||
if (errorType.size() < maxErrorTypes) {
|
||||
auto pLinuxSysmanImp = static_cast<LinuxSysmanImp *>(pOsSysman);
|
||||
if (isMemoryTypeHbm(pLinuxSysmanImp) == true) {
|
||||
LinuxRasSourceHbm::getSupportedRasErrorTypes(errorType, pOsSysman, isSubDevice, subDeviceId);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ze_result_t LinuxRasImp::osRasGetConfig(zes_ras_config_t *config) {
|
||||
config->totalThreshold = totalThreshold;
|
||||
memcpy_s(config->detailedThresholds.category, maxRasErrorCategoryCount * sizeof(uint64_t), categoryThreshold, maxRasErrorCategoryCount * sizeof(uint64_t));
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
ze_result_t LinuxRasImp::osRasSetConfig(const zes_ras_config_t *config) {
|
||||
if (pFsAccess->isRootUser() == true) {
|
||||
totalThreshold = config->totalThreshold;
|
||||
memcpy_s(categoryThreshold, maxRasErrorCategoryCount * sizeof(uint64_t), config->detailedThresholds.category, maxRasErrorCategoryCount * sizeof(uint64_t));
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): Insufficient permissions and returning error:0x%x \n", __FUNCTION__, ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS);
|
||||
return ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS;
|
||||
}
|
||||
|
||||
ze_result_t LinuxRasImp::osRasGetProperties(zes_ras_properties_t &properties) {
|
||||
properties.pNext = nullptr;
|
||||
properties.type = osRasErrorType;
|
||||
properties.onSubdevice = isSubdevice;
|
||||
properties.subdeviceId = subdeviceId;
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
ze_result_t LinuxRasImp::osRasGetState(zes_ras_state_t &state, ze_bool_t clear) {
|
||||
if (clear == true) {
|
||||
if (pFsAccess->isRootUser() == false) {
|
||||
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): Insufficient permissions and returning error:0x%x \n", __FUNCTION__, ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS);
|
||||
return ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS;
|
||||
}
|
||||
}
|
||||
|
||||
ze_result_t result = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
|
||||
for (auto &rasSource : rasSources) {
|
||||
zes_ras_state_t localState = {};
|
||||
ze_result_t localResult = rasSource->osRasGetState(localState, clear);
|
||||
if (localResult != ZE_RESULT_SUCCESS) {
|
||||
continue;
|
||||
}
|
||||
for (uint32_t i = 0; i < maxRasErrorCategoryCount; i++) {
|
||||
state.category[i] += localState.category[i];
|
||||
}
|
||||
result = ZE_RESULT_SUCCESS;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
void LinuxRasImp::initSources() {
|
||||
rasSources.push_back(std::make_unique<L0::Sysman::LinuxRasSourceGt>(pLinuxSysmanImp, osRasErrorType, isSubdevice, subdeviceId));
|
||||
if (isMemoryTypeHbm(pLinuxSysmanImp) == true) {
|
||||
rasSources.push_back(std::make_unique<L0::Sysman::LinuxRasSourceHbm>(pLinuxSysmanImp, osRasErrorType, subdeviceId));
|
||||
}
|
||||
}
|
||||
|
||||
LinuxRasImp::LinuxRasImp(OsSysman *pOsSysman, zes_ras_error_type_t type, ze_bool_t onSubdevice, uint32_t subdeviceId) : osRasErrorType(type), isSubdevice(onSubdevice), subdeviceId(subdeviceId) {
|
||||
pLinuxSysmanImp = static_cast<LinuxSysmanImp *>(pOsSysman);
|
||||
pFsAccess = &pLinuxSysmanImp->getFsAccess();
|
||||
initSources();
|
||||
}
|
||||
|
||||
OsRas *OsRas::create(OsSysman *pOsSysman, zes_ras_error_type_t type, ze_bool_t onSubdevice, uint32_t subdeviceId) {
|
||||
LinuxRasImp *pLinuxRasImp = new LinuxRasImp(pOsSysman, type, onSubdevice, subdeviceId);
|
||||
return static_cast<OsRas *>(pLinuxRasImp);
|
||||
}
|
||||
|
||||
} // namespace Sysman
|
||||
} // namespace L0
|
||||
@@ -1,114 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2023 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "shared/source/helpers/non_copyable_or_moveable.h"
|
||||
|
||||
#include "level_zero/sysman/source/api/ras/sysman_os_ras.h"
|
||||
#include "level_zero/sysman/source/device/sysman_device_imp.h"
|
||||
#include "level_zero/sysman/source/shared/linux/pmu/sysman_pmu_imp.h"
|
||||
#include "level_zero/sysman/source/shared/linux/sysman_fs_access.h"
|
||||
#include "level_zero/sysman/source/sysman_const.h"
|
||||
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace L0 {
|
||||
namespace Sysman {
|
||||
|
||||
class LinuxSysmanImp;
|
||||
class FirmwareUtil;
|
||||
|
||||
class LinuxRasSources : NEO::NonCopyableOrMovableClass {
|
||||
public:
|
||||
virtual ze_result_t osRasGetState(zes_ras_state_t &state, ze_bool_t clear) = 0;
|
||||
virtual ~LinuxRasSources() = default;
|
||||
};
|
||||
|
||||
class LinuxRasImp : public OsRas, NEO::NonCopyableOrMovableClass {
|
||||
public:
|
||||
ze_result_t osRasGetProperties(zes_ras_properties_t &properties) override;
|
||||
ze_result_t osRasGetState(zes_ras_state_t &state, ze_bool_t clear) override;
|
||||
ze_result_t osRasGetConfig(zes_ras_config_t *config) override;
|
||||
ze_result_t osRasSetConfig(const zes_ras_config_t *config) override;
|
||||
LinuxRasImp(OsSysman *pOsSysman, zes_ras_error_type_t type, ze_bool_t onSubdevice, uint32_t subdeviceId);
|
||||
LinuxRasImp() = default;
|
||||
~LinuxRasImp() override = default;
|
||||
|
||||
protected:
|
||||
zes_ras_error_type_t osRasErrorType = {};
|
||||
FsAccess *pFsAccess = nullptr;
|
||||
LinuxSysmanImp *pLinuxSysmanImp = nullptr;
|
||||
std::vector<std::unique_ptr<L0::Sysman::LinuxRasSources>> rasSources = {};
|
||||
|
||||
private:
|
||||
void initSources();
|
||||
bool isSubdevice = false;
|
||||
uint32_t subdeviceId = 0;
|
||||
uint64_t totalThreshold = 0;
|
||||
uint64_t categoryThreshold[maxRasErrorCategoryCount] = {0};
|
||||
};
|
||||
|
||||
class LinuxRasSourceGt : public LinuxRasSources {
|
||||
public:
|
||||
ze_result_t osRasGetState(zes_ras_state_t &state, ze_bool_t clear) override;
|
||||
static void getSupportedRasErrorTypes(std::set<zes_ras_error_type_t> &errorType, OsSysman *pOsSysman, ze_bool_t isSubDevice, uint32_t subDeviceId);
|
||||
LinuxRasSourceGt(LinuxSysmanImp *pLinuxSysmanImp, zes_ras_error_type_t type, ze_bool_t onSubdevice, uint32_t subdeviceId);
|
||||
LinuxRasSourceGt() = default;
|
||||
~LinuxRasSourceGt() override;
|
||||
|
||||
protected:
|
||||
LinuxSysmanImp *pLinuxSysmanImp = nullptr;
|
||||
zes_ras_error_type_t osRasErrorType = {};
|
||||
PmuInterface *pPmuInterface = nullptr;
|
||||
FsAccess *pFsAccess = nullptr;
|
||||
SysfsAccess *pSysfsAccess = nullptr;
|
||||
|
||||
private:
|
||||
void initRasErrors(ze_bool_t clear);
|
||||
ze_result_t getPmuConfig(
|
||||
const std::string &eventDirectory,
|
||||
const std::vector<std::string> &listOfEvents,
|
||||
const std::string &errorFileToGetConfig,
|
||||
std::string &pmuConfig);
|
||||
ze_result_t getBootUpErrorCountFromSysfs(
|
||||
std::string nameOfError,
|
||||
const std::string &errorCounterDir,
|
||||
uint64_t &errorVal);
|
||||
void closeFds();
|
||||
int64_t groupFd = -1;
|
||||
std::vector<int64_t> memberFds = {};
|
||||
uint64_t initialErrorCount[maxRasErrorCategoryCount] = {0};
|
||||
std::map<zes_ras_error_cat_t, uint64_t> errorCategoryToEventCount;
|
||||
uint64_t totalEventCount = 0;
|
||||
bool isSubdevice = false;
|
||||
uint32_t subdeviceId = 0;
|
||||
};
|
||||
|
||||
class LinuxRasSourceHbm : public LinuxRasSources {
|
||||
public:
|
||||
ze_result_t osRasGetState(zes_ras_state_t &state, ze_bool_t clear) override;
|
||||
static void getSupportedRasErrorTypes(std::set<zes_ras_error_type_t> &errorType, OsSysman *pOsSysman, ze_bool_t isSubDevice, uint32_t subDeviceId);
|
||||
LinuxRasSourceHbm(LinuxSysmanImp *pLinuxSysmanImp, zes_ras_error_type_t type, uint32_t subdeviceId);
|
||||
LinuxRasSourceHbm() = default;
|
||||
~LinuxRasSourceHbm() override{};
|
||||
|
||||
protected:
|
||||
LinuxSysmanImp *pLinuxSysmanImp = nullptr;
|
||||
zes_ras_error_type_t osRasErrorType = {};
|
||||
FirmwareUtil *pFwInterface = nullptr;
|
||||
SysmanDeviceImp *pDevice = nullptr;
|
||||
|
||||
private:
|
||||
uint64_t errorBaseline = 0;
|
||||
uint32_t subdeviceId = 0;
|
||||
};
|
||||
|
||||
} // namespace Sysman
|
||||
} // namespace L0
|
||||
Reference in New Issue
Block a user