mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-03 23:03:02 +08:00
Update Sysman RAS Module
Change-Id: I2b99dae4336811ea4b539da48c1434657a9cf62a Signed-off-by: mraghuwa <mayank.raghuwanshi@intel.com>
This commit is contained in:
@@ -5,7 +5,7 @@
|
||||
#
|
||||
|
||||
set(L0_SRCS_TOOLS_SYSMAN_RAS_LINUX
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/os_ras_imp.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/os_ras_imp.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/os_ras_imp.h
|
||||
)
|
||||
|
||||
|
||||
@@ -11,32 +11,22 @@
|
||||
|
||||
namespace L0 {
|
||||
|
||||
const std::string LinuxRasImp::rasCounterDir("/var/lib/libze_intel_gpu/");
|
||||
const std::string LinuxRasImp::resetCounter("ras_reset_count");
|
||||
const std::string LinuxRasImp::resetCounterFile = rasCounterDir + resetCounter;
|
||||
|
||||
void LinuxRasImp::setRasErrorType(zes_ras_error_type_t type) {
|
||||
osRasErrorType = type;
|
||||
}
|
||||
bool LinuxRasImp::isRasSupported(void) {
|
||||
if (false == pFsAccess->fileExists(rasCounterDir)) {
|
||||
return false;
|
||||
}
|
||||
if (osRasErrorType == ZES_RAS_ERROR_TYPE_CORRECTABLE) {
|
||||
return false;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
ze_result_t OsRas::getSupportedRasErrorTypes(std::vector<zes_ras_error_type_t> &errorType, OsSysman *pOsSysman) {
|
||||
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
|
||||
}
|
||||
|
||||
LinuxRasImp::LinuxRasImp(OsSysman *pOsSysman) {
|
||||
LinuxSysmanImp *pLinuxSysmanImp = static_cast<LinuxSysmanImp *>(pOsSysman);
|
||||
pFsAccess = &pLinuxSysmanImp->getFsAccess();
|
||||
osRasErrorType = ZES_RAS_ERROR_TYPE_UNCORRECTABLE;
|
||||
ze_result_t LinuxRasImp::osRasGetProperties(zes_ras_properties_t &properties) {
|
||||
properties.pNext = nullptr;
|
||||
properties.type = osRasErrorType;
|
||||
properties.onSubdevice = false;
|
||||
properties.subdeviceId = 0;
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
LinuxRasImp::LinuxRasImp(OsSysman *pOsSysman, zes_ras_error_type_t type) : osRasErrorType(type) {
|
||||
}
|
||||
|
||||
OsRas *OsRas::create(OsSysman *pOsSysman) {
|
||||
LinuxRasImp *pLinuxRasImp = new LinuxRasImp(pOsSysman);
|
||||
OsRas *OsRas::create(OsSysman *pOsSysman, zes_ras_error_type_t type) {
|
||||
LinuxRasImp *pLinuxRasImp = new LinuxRasImp(pOsSysman, type);
|
||||
return static_cast<OsRas *>(pLinuxRasImp);
|
||||
}
|
||||
|
||||
|
||||
@@ -18,21 +18,13 @@ namespace L0 {
|
||||
class FsAccess;
|
||||
class LinuxRasImp : public OsRas, NEO::NonCopyableOrMovableClass {
|
||||
public:
|
||||
LinuxRasImp(OsSysman *pOsSysman);
|
||||
ze_result_t osRasGetProperties(zes_ras_properties_t &properties) override;
|
||||
LinuxRasImp(OsSysman *pOsSysman, zes_ras_error_type_t type);
|
||||
LinuxRasImp() = default;
|
||||
~LinuxRasImp() override = default;
|
||||
bool isRasSupported(void) override;
|
||||
void setRasErrorType(zes_ras_error_type_t rasErrorType) override;
|
||||
|
||||
protected:
|
||||
FsAccess *pFsAccess = nullptr;
|
||||
zes_ras_error_type_t osRasErrorType;
|
||||
|
||||
private:
|
||||
static const std::string rasCounterDir;
|
||||
static const std::string resetCounter;
|
||||
static const std::string resetCounterFile;
|
||||
std::vector<std::string> rasCounterDirFileList = {};
|
||||
zes_ras_error_type_t osRasErrorType = {};
|
||||
};
|
||||
|
||||
} // namespace L0
|
||||
|
||||
@@ -9,14 +9,16 @@
|
||||
|
||||
#include <level_zero/zes_api.h>
|
||||
|
||||
#include <vector>
|
||||
|
||||
namespace L0 {
|
||||
|
||||
struct OsSysman;
|
||||
class OsRas {
|
||||
public:
|
||||
virtual bool isRasSupported(void) = 0;
|
||||
virtual void setRasErrorType(zes_ras_error_type_t type) = 0;
|
||||
static OsRas *create(OsSysman *pOsSysman);
|
||||
virtual ze_result_t osRasGetProperties(zes_ras_properties_t &properties) = 0;
|
||||
static OsRas *create(OsSysman *pOsSysman, zes_ras_error_type_t type);
|
||||
static ze_result_t getSupportedRasErrorTypes(std::vector<zes_ras_error_type_t> &errorType, OsSysman *pOsSysman);
|
||||
virtual ~OsRas() = default;
|
||||
};
|
||||
|
||||
|
||||
@@ -18,15 +18,15 @@ RasHandleContext::~RasHandleContext() {
|
||||
}
|
||||
void RasHandleContext::createHandle(zes_ras_error_type_t type) {
|
||||
Ras *pRas = new RasImp(pOsSysman, type);
|
||||
if (pRas->isRasErrorSupported == true) {
|
||||
handleList.push_back(pRas);
|
||||
} else {
|
||||
delete pRas;
|
||||
}
|
||||
handleList.push_back(pRas);
|
||||
}
|
||||
|
||||
void RasHandleContext::init() {
|
||||
createHandle(ZES_RAS_ERROR_TYPE_UNCORRECTABLE);
|
||||
createHandle(ZES_RAS_ERROR_TYPE_CORRECTABLE);
|
||||
std::vector<zes_ras_error_type_t> errorType = {};
|
||||
OsRas::getSupportedRasErrorTypes(errorType, pOsSysman);
|
||||
for (const auto &type : errorType) {
|
||||
createHandle(type);
|
||||
}
|
||||
}
|
||||
ze_result_t RasHandleContext::rasGet(uint32_t *pCount,
|
||||
zes_ras_handle_t *phRas) {
|
||||
|
||||
@@ -15,9 +15,6 @@
|
||||
namespace L0 {
|
||||
|
||||
ze_result_t RasImp::rasGetProperties(zes_ras_properties_t *pProperties) {
|
||||
rasProperties.type = this->rasErrorType;
|
||||
rasProperties.onSubdevice = false;
|
||||
rasProperties.subdeviceId = 0;
|
||||
*pProperties = rasProperties;
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
@@ -35,13 +32,11 @@ ze_result_t RasImp::rasGetState(const zes_ras_state_t *pState) {
|
||||
}
|
||||
|
||||
void RasImp::init() {
|
||||
pOsRas->setRasErrorType(this->rasErrorType);
|
||||
isRasErrorSupported = pOsRas->isRasSupported();
|
||||
pOsRas->osRasGetProperties(rasProperties);
|
||||
}
|
||||
|
||||
RasImp::RasImp(OsSysman *pOsSysman, zes_ras_error_type_t type) {
|
||||
pOsRas = OsRas::create(pOsSysman);
|
||||
this->rasErrorType = type;
|
||||
pOsRas = OsRas::create(pOsSysman, type);
|
||||
init();
|
||||
}
|
||||
|
||||
|
||||
@@ -10,17 +10,18 @@
|
||||
namespace L0 {
|
||||
|
||||
class WddmRasImp : public OsRas {
|
||||
bool isRasSupported(void) override;
|
||||
void setRasErrorType(zes_ras_error_type_t type) override;
|
||||
ze_result_t osRasGetProperties(zes_ras_properties_t &properties) override;
|
||||
};
|
||||
|
||||
bool WddmRasImp::isRasSupported(void) {
|
||||
return false;
|
||||
ze_result_t OsRas::getSupportedRasErrorTypes(std::vector<zes_ras_error_type_t> &errorType, OsSysman *pOsSysman) {
|
||||
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
|
||||
}
|
||||
|
||||
void WddmRasImp::setRasErrorType(zes_ras_error_type_t type) {}
|
||||
ze_result_t WddmRasImp::osRasGetProperties(zes_ras_properties_t &properties) {
|
||||
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
|
||||
}
|
||||
|
||||
OsRas *OsRas::create(OsSysman *pOsSysman) {
|
||||
OsRas *OsRas::create(OsSysman *pOsSysman, zes_ras_error_type_t type) {
|
||||
WddmRasImp *pWddmRasImp = new WddmRasImp();
|
||||
return static_cast<OsRas *>(pWddmRasImp);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user