Update Sysman RAS Module

Change-Id: I2b99dae4336811ea4b539da48c1434657a9cf62a
Signed-off-by: mraghuwa <mayank.raghuwanshi@intel.com>
This commit is contained in:
mraghuwa
2020-09-22 18:12:08 +05:30
committed by sys_ocldev
parent fc090f74c6
commit 2643346b48
11 changed files with 84 additions and 158 deletions

View File

@@ -5,7 +5,7 @@
#
set(L0_SRCS_TOOLS_SYSMAN_RAS_LINUX
${CMAKE_CURRENT_SOURCE_DIR}/os_ras_imp.cpp
${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/os_ras_imp.cpp
${CMAKE_CURRENT_SOURCE_DIR}/os_ras_imp.h
)

View File

@@ -11,32 +11,22 @@
namespace L0 {
const std::string LinuxRasImp::rasCounterDir("/var/lib/libze_intel_gpu/");
const std::string LinuxRasImp::resetCounter("ras_reset_count");
const std::string LinuxRasImp::resetCounterFile = rasCounterDir + resetCounter;
void LinuxRasImp::setRasErrorType(zes_ras_error_type_t type) {
osRasErrorType = type;
}
bool LinuxRasImp::isRasSupported(void) {
if (false == pFsAccess->fileExists(rasCounterDir)) {
return false;
}
if (osRasErrorType == ZES_RAS_ERROR_TYPE_CORRECTABLE) {
return false;
} else {
return false;
}
ze_result_t OsRas::getSupportedRasErrorTypes(std::vector<zes_ras_error_type_t> &errorType, OsSysman *pOsSysman) {
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
LinuxRasImp::LinuxRasImp(OsSysman *pOsSysman) {
LinuxSysmanImp *pLinuxSysmanImp = static_cast<LinuxSysmanImp *>(pOsSysman);
pFsAccess = &pLinuxSysmanImp->getFsAccess();
osRasErrorType = ZES_RAS_ERROR_TYPE_UNCORRECTABLE;
ze_result_t LinuxRasImp::osRasGetProperties(zes_ras_properties_t &properties) {
properties.pNext = nullptr;
properties.type = osRasErrorType;
properties.onSubdevice = false;
properties.subdeviceId = 0;
return ZE_RESULT_SUCCESS;
}
LinuxRasImp::LinuxRasImp(OsSysman *pOsSysman, zes_ras_error_type_t type) : osRasErrorType(type) {
}
OsRas *OsRas::create(OsSysman *pOsSysman) {
LinuxRasImp *pLinuxRasImp = new LinuxRasImp(pOsSysman);
OsRas *OsRas::create(OsSysman *pOsSysman, zes_ras_error_type_t type) {
LinuxRasImp *pLinuxRasImp = new LinuxRasImp(pOsSysman, type);
return static_cast<OsRas *>(pLinuxRasImp);
}

View File

@@ -18,21 +18,13 @@ namespace L0 {
class FsAccess;
class LinuxRasImp : public OsRas, NEO::NonCopyableOrMovableClass {
public:
LinuxRasImp(OsSysman *pOsSysman);
ze_result_t osRasGetProperties(zes_ras_properties_t &properties) override;
LinuxRasImp(OsSysman *pOsSysman, zes_ras_error_type_t type);
LinuxRasImp() = default;
~LinuxRasImp() override = default;
bool isRasSupported(void) override;
void setRasErrorType(zes_ras_error_type_t rasErrorType) override;
protected:
FsAccess *pFsAccess = nullptr;
zes_ras_error_type_t osRasErrorType;
private:
static const std::string rasCounterDir;
static const std::string resetCounter;
static const std::string resetCounterFile;
std::vector<std::string> rasCounterDirFileList = {};
zes_ras_error_type_t osRasErrorType = {};
};
} // namespace L0

View File

@@ -9,14 +9,16 @@
#include <level_zero/zes_api.h>
#include <vector>
namespace L0 {
struct OsSysman;
class OsRas {
public:
virtual bool isRasSupported(void) = 0;
virtual void setRasErrorType(zes_ras_error_type_t type) = 0;
static OsRas *create(OsSysman *pOsSysman);
virtual ze_result_t osRasGetProperties(zes_ras_properties_t &properties) = 0;
static OsRas *create(OsSysman *pOsSysman, zes_ras_error_type_t type);
static ze_result_t getSupportedRasErrorTypes(std::vector<zes_ras_error_type_t> &errorType, OsSysman *pOsSysman);
virtual ~OsRas() = default;
};

View File

@@ -18,15 +18,15 @@ RasHandleContext::~RasHandleContext() {
}
void RasHandleContext::createHandle(zes_ras_error_type_t type) {
Ras *pRas = new RasImp(pOsSysman, type);
if (pRas->isRasErrorSupported == true) {
handleList.push_back(pRas);
} else {
delete pRas;
}
handleList.push_back(pRas);
}
void RasHandleContext::init() {
createHandle(ZES_RAS_ERROR_TYPE_UNCORRECTABLE);
createHandle(ZES_RAS_ERROR_TYPE_CORRECTABLE);
std::vector<zes_ras_error_type_t> errorType = {};
OsRas::getSupportedRasErrorTypes(errorType, pOsSysman);
for (const auto &type : errorType) {
createHandle(type);
}
}
ze_result_t RasHandleContext::rasGet(uint32_t *pCount,
zes_ras_handle_t *phRas) {

View File

@@ -15,9 +15,6 @@
namespace L0 {
ze_result_t RasImp::rasGetProperties(zes_ras_properties_t *pProperties) {
rasProperties.type = this->rasErrorType;
rasProperties.onSubdevice = false;
rasProperties.subdeviceId = 0;
*pProperties = rasProperties;
return ZE_RESULT_SUCCESS;
}
@@ -35,13 +32,11 @@ ze_result_t RasImp::rasGetState(const zes_ras_state_t *pState) {
}
void RasImp::init() {
pOsRas->setRasErrorType(this->rasErrorType);
isRasErrorSupported = pOsRas->isRasSupported();
pOsRas->osRasGetProperties(rasProperties);
}
RasImp::RasImp(OsSysman *pOsSysman, zes_ras_error_type_t type) {
pOsRas = OsRas::create(pOsSysman);
this->rasErrorType = type;
pOsRas = OsRas::create(pOsSysman, type);
init();
}

View File

@@ -10,17 +10,18 @@
namespace L0 {
class WddmRasImp : public OsRas {
bool isRasSupported(void) override;
void setRasErrorType(zes_ras_error_type_t type) override;
ze_result_t osRasGetProperties(zes_ras_properties_t &properties) override;
};
bool WddmRasImp::isRasSupported(void) {
return false;
ze_result_t OsRas::getSupportedRasErrorTypes(std::vector<zes_ras_error_type_t> &errorType, OsSysman *pOsSysman) {
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
void WddmRasImp::setRasErrorType(zes_ras_error_type_t type) {}
ze_result_t WddmRasImp::osRasGetProperties(zes_ras_properties_t &properties) {
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
OsRas *OsRas::create(OsSysman *pOsSysman) {
OsRas *OsRas::create(OsSysman *pOsSysman, zes_ras_error_type_t type) {
WddmRasImp *pWddmRasImp = new WddmRasImp();
return static_cast<OsRas *>(pWddmRasImp);
}