Implement zesRasGetConfig and zesRasSetConfig

Signed-off-by: Mayank Raghuwanshi <mayank.raghuwanshi@intel.com>
This commit is contained in:
Mayank Raghuwanshi
2021-02-18 11:22:59 +05:30
committed by Compute-Runtime-Automation
parent 0c035cfcc9
commit 0f973f146e
11 changed files with 155 additions and 8 deletions

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2020 Intel Corporation
* Copyright (C) 2020-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -273,6 +273,10 @@ std::string FsAccess::getDirName(const std::string path) {
return path.substr(0, pos);
}
bool FsAccess::isRootUser() {
return (geteuid() == 0);
}
// Procfs Access
const std::string ProcfsAccess::procDir = "/proc/";
const std::string ProcfsAccess::fdDir = "/fd/";

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2020 Intel Corporation
* Copyright (C) 2020-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -44,6 +44,7 @@ class FsAccess {
virtual ze_result_t readSymLink(const std::string path, std::string &buf);
virtual ze_result_t getRealPath(const std::string path, std::string &buf);
virtual ze_result_t listDirectory(const std::string path, std::vector<std::string> &list);
virtual bool isRootUser();
std::string getBaseName(const std::string path);
std::string getDirName(const std::string path);
virtual bool fileExists(const std::string file);

View File

@@ -19,6 +19,21 @@ ze_result_t LinuxRasImp::osRasGetState(zes_ras_state_t &state, ze_bool_t clear)
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
ze_result_t LinuxRasImp::osRasGetConfig(zes_ras_config_t *config) {
config->totalThreshold = totalThreshold;
memcpy(config->detailedThresholds.category, categoryThreshold, sizeof(config->detailedThresholds.category));
return ZE_RESULT_SUCCESS;
}
ze_result_t LinuxRasImp::osRasSetConfig(const zes_ras_config_t *config) {
if (pFsAccess->isRootUser() == true) {
totalThreshold = config->totalThreshold;
memcpy(categoryThreshold, config->detailedThresholds.category, sizeof(config->detailedThresholds.category));
return ZE_RESULT_SUCCESS;
}
return ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS;
}
ze_result_t LinuxRasImp::osRasGetProperties(zes_ras_properties_t &properties) {
properties.pNext = nullptr;
properties.type = osRasErrorType;
@@ -27,6 +42,8 @@ ze_result_t LinuxRasImp::osRasGetProperties(zes_ras_properties_t &properties) {
return ZE_RESULT_SUCCESS;
}
LinuxRasImp::LinuxRasImp(OsSysman *pOsSysman, zes_ras_error_type_t type, ze_bool_t onSubdevice, uint32_t subdeviceId) : osRasErrorType(type), isSubdevice(onSubdevice), subdeviceId(subdeviceId) {
pLinuxSysmanImp = static_cast<LinuxSysmanImp *>(pOsSysman);
pFsAccess = &pLinuxSysmanImp->getFsAccess();
}
OsRas *OsRas::create(OsSysman *pOsSysman, zes_ras_error_type_t type, ze_bool_t onSubdevice, uint32_t subdeviceId) {

View File

@@ -11,21 +11,28 @@
#include "level_zero/tools/source/sysman/ras/os_ras.h"
namespace L0 {
class FsAccess;
class LinuxSysmanImp;
class LinuxRasImp : public OsRas, NEO::NonCopyableOrMovableClass {
public:
ze_result_t osRasGetProperties(zes_ras_properties_t &properties) override;
ze_result_t osRasGetState(zes_ras_state_t &state, ze_bool_t clear) override;
ze_result_t osRasGetConfig(zes_ras_config_t *config) override;
ze_result_t osRasSetConfig(const zes_ras_config_t *config) override;
LinuxRasImp(OsSysman *pOsSysman, zes_ras_error_type_t type, ze_bool_t onSubdevice, uint32_t subdeviceId);
LinuxRasImp() = default;
~LinuxRasImp() override = default;
protected:
zes_ras_error_type_t osRasErrorType = {};
FsAccess *pFsAccess = nullptr;
LinuxSysmanImp *pLinuxSysmanImp = nullptr;
private:
bool isSubdevice = false;
uint32_t subdeviceId = 0;
uint64_t totalThreshold = 0;
uint64_t categoryThreshold[ZES_MAX_RAS_ERROR_CATEGORY_COUNT] = {0};
};
} // namespace L0

View File

@@ -18,6 +18,8 @@ class OsRas {
public:
virtual ze_result_t osRasGetProperties(zes_ras_properties_t &properties) = 0;
virtual ze_result_t osRasGetState(zes_ras_state_t &state, ze_bool_t clear) = 0;
virtual ze_result_t osRasGetConfig(zes_ras_config_t *config) = 0;
virtual ze_result_t osRasSetConfig(const zes_ras_config_t *config) = 0;
static OsRas *create(OsSysman *pOsSysman, zes_ras_error_type_t type, ze_bool_t onSubdevice, uint32_t subdeviceId);
static ze_result_t getSupportedRasErrorTypes(std::vector<zes_ras_error_type_t> &errorType, OsSysman *pOsSysman, ze_device_handle_t deviceHandle);
virtual ~OsRas() = default;

View File

@@ -20,11 +20,11 @@ ze_result_t RasImp::rasGetProperties(zes_ras_properties_t *pProperties) {
}
ze_result_t RasImp::rasGetConfig(zes_ras_config_t *pConfig) {
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
return pOsRas->osRasGetConfig(pConfig);
}
ze_result_t RasImp::rasSetConfig(const zes_ras_config_t *pConfig) {
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
return pOsRas->osRasSetConfig(pConfig);
}
ze_result_t RasImp::rasGetState(zes_ras_state_t *pState, ze_bool_t clear) {

View File

@@ -12,6 +12,8 @@ namespace L0 {
class WddmRasImp : public OsRas {
ze_result_t osRasGetProperties(zes_ras_properties_t &properties) override;
ze_result_t osRasGetState(zes_ras_state_t &state, ze_bool_t clear) override;
ze_result_t osRasGetConfig(zes_ras_config_t *config) override;
ze_result_t osRasSetConfig(const zes_ras_config_t *config) override;
};
ze_result_t OsRas::getSupportedRasErrorTypes(std::vector<zes_ras_error_type_t> &errorType, OsSysman *pOsSysman, ze_device_handle_t deviceHandle) {
@@ -22,6 +24,14 @@ ze_result_t WddmRasImp::osRasGetProperties(zes_ras_properties_t &properties) {
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
ze_result_t WddmRasImp::osRasGetConfig(zes_ras_config_t *config) {
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
ze_result_t WddmRasImp::osRasSetConfig(const zes_ras_config_t *config) {
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
ze_result_t WddmRasImp::osRasGetState(zes_ras_state_t &state, ze_bool_t clear) {
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
}

View File

@@ -357,6 +357,7 @@ void testSysmanRas(ze_device_handle_t &device) {
std::cout << std::endl
<< " ---- Ras tests ---- " << std::endl;
uint32_t count = 0;
bool iamroot = (geteuid() == 0);
VALIDATECALL(zesDeviceEnumRasErrorSets(device, &count, nullptr));
if (count == 0) {
std::cout << "Could not retrieve Ras Error Sets" << std::endl;
@@ -397,6 +398,34 @@ void testSysmanRas(ze_device_handle_t &device) {
std::cout << "Number of correctable display errors that have occurred = " << rasState.category[ZES_RAS_ERROR_CAT_DISPLAY_ERRORS] << std::endl;
}
}
if (iamroot) {
zes_ras_config_t getConfig = {};
zes_ras_config_t setConfig = {};
setConfig.totalThreshold = 14;
memset(setConfig.detailedThresholds.category, 0, sizeof(setConfig.detailedThresholds.category));
VALIDATECALL(zesRasSetConfig(handle, &setConfig));
if (verbose) {
std::cout << "Setting Total threshold = " << setConfig.totalThreshold << std::endl;
std::cout << "Setting Threshold for Engine Resets = " << setConfig.detailedThresholds.category[0] << std::endl;
std::cout << "Setting Threshold for Programming Errors = " << setConfig.detailedThresholds.category[1] << std::endl;
std::cout << "Setting Threshold for Driver Errors = " << setConfig.detailedThresholds.category[2] << std::endl;
std::cout << "Setting Threshold for Compute Errors = " << setConfig.detailedThresholds.category[3] << std::endl;
std::cout << "Setting Threshold for Non Compute Errors = " << setConfig.detailedThresholds.category[4] << std::endl;
std::cout << "Setting Threshold for Cache Errors = " << setConfig.detailedThresholds.category[5] << std::endl;
std::cout << "Setting Threshold for Display Errors = " << setConfig.detailedThresholds.category[6] << std::endl;
}
VALIDATECALL(zesRasGetConfig(handle, &getConfig));
if (verbose) {
std::cout << "Getting Total threshold = " << getConfig.totalThreshold << std::endl;
std::cout << "Getting Threshold for Engine Resets = " << getConfig.detailedThresholds.category[0] << std::endl;
std::cout << "Getting Threshold for Programming Errors = " << getConfig.detailedThresholds.category[1] << std::endl;
std::cout << "Getting Threshold for Driver Errors = " << getConfig.detailedThresholds.category[2] << std::endl;
std::cout << "Getting Threshold for Compute Errors = " << getConfig.detailedThresholds.category[3] << std::endl;
std::cout << "Getting Threshold for Non Compute Errors = " << getConfig.detailedThresholds.category[4] << std::endl;
std::cout << "Getting Threshold for Cache Errors = " << getConfig.detailedThresholds.category[5] << std::endl;
std::cout << "Getting Threshold for Display Errors = " << getConfig.detailedThresholds.category[6] << std::endl;
}
}
}
}
std::string getStandbyType(zes_standby_type_t standbyType) {
@@ -722,6 +751,12 @@ void testSysmanListenEvents(ze_driver_handle_t driver, std::vector<ze_device_han
if (pEvents[index] & ZES_EVENT_TYPE_FLAG_DEVICE_ATTACH) {
std::cout << "Device " << index << "got DEVICE_ATTACH event" << std::endl;
}
if (pEvents[index] & ZES_EVENT_TYPE_FLAG_RAS_UNCORRECTABLE_ERRORS) {
std::cout << "Device " << index << "got RAS UNCORRECTABLE event" << std::endl;
}
if (pEvents[index] & ZES_EVENT_TYPE_FLAG_RAS_CORRECTABLE_ERRORS) {
std::cout << "Device " << index << "got RAS CORRECTABLE event" << std::endl;
}
}
}
}
@@ -981,10 +1016,10 @@ int main(int argc, char *argv[]) {
case 'E':
std::for_each(devices.begin(), devices.end(), [&](auto device) {
zesDeviceEventRegister(device,
ZES_EVENT_TYPE_FLAG_DEVICE_RESET_REQUIRED | ZES_EVENT_TYPE_FLAG_DEVICE_DETACH | ZES_EVENT_TYPE_FLAG_DEVICE_ATTACH);
ZES_EVENT_TYPE_FLAG_DEVICE_RESET_REQUIRED | ZES_EVENT_TYPE_FLAG_DEVICE_DETACH | ZES_EVENT_TYPE_FLAG_DEVICE_ATTACH | ZES_EVENT_TYPE_FLAG_RAS_CORRECTABLE_ERRORS | ZES_EVENT_TYPE_FLAG_RAS_UNCORRECTABLE_ERRORS);
});
testSysmanListenEvents(driver, devices,
ZES_EVENT_TYPE_FLAG_DEVICE_RESET_REQUIRED | ZES_EVENT_TYPE_FLAG_DEVICE_DETACH | ZES_EVENT_TYPE_FLAG_DEVICE_ATTACH);
ZES_EVENT_TYPE_FLAG_DEVICE_RESET_REQUIRED | ZES_EVENT_TYPE_FLAG_DEVICE_DETACH | ZES_EVENT_TYPE_FLAG_DEVICE_ATTACH | ZES_EVENT_TYPE_FLAG_RAS_CORRECTABLE_ERRORS | ZES_EVENT_TYPE_FLAG_RAS_UNCORRECTABLE_ERRORS);
break;
case 'F':
std::for_each(devices.begin(), devices.end(), [&](auto device) {

View File

@@ -125,5 +125,15 @@ TEST_F(SysmanMultiDeviceFixture, GivenValidDeviceHandleHavingSubdevicesWhenValid
}
}
TEST_F(SysmanMultiDeviceFixture, GivenValidEffectiveUserIdCheckWhetherPermissionsReturnedByIsRootUserAreCorrect) {
int euid = geteuid();
auto pFsAccess = pLinuxSysmanImp->getFsAccess();
if (euid == 0) {
EXPECT_EQ(true, pFsAccess.isRootUser());
} else {
EXPECT_EQ(false, pFsAccess.isRootUser());
}
}
} // namespace ult
} // namespace L0

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2020 Intel Corporation
* Copyright (C) 2020-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -15,5 +15,19 @@
namespace L0 {
namespace ult {
class RasFsAccess : public FsAccess {};
template <>
struct Mock<RasFsAccess> : public RasFsAccess {
MOCK_METHOD(bool, isRootUser, (), (override));
bool userIsRoot() {
return true;
}
bool userIsNotRoot() {
return false;
}
Mock<RasFsAccess>() = default;
};
} // namespace ult
} // namespace L0

View File

@@ -19,9 +19,16 @@ namespace ult {
constexpr uint32_t mockHandleCount = 0;
struct SysmanRasFixture : public SysmanDeviceFixture {
protected:
std::unique_ptr<Mock<RasFsAccess>> pFsAccess;
std::vector<ze_device_handle_t> deviceHandles;
FsAccess *pFsAccessOriginal = nullptr;
void SetUp() override {
SysmanDeviceFixture::SetUp();
pFsAccess = std::make_unique<NiceMock<Mock<RasFsAccess>>>();
pFsAccessOriginal = pLinuxSysmanImp->pFsAccess;
pLinuxSysmanImp->pFsAccess = pFsAccess.get();
ON_CALL(*pFsAccess.get(), isRootUser())
.WillByDefault(::testing::Invoke(pFsAccess.get(), &Mock<RasFsAccess>::userIsRoot));
pSysmanDeviceImp->pRasHandleContext->handleList.clear();
uint32_t subDeviceCount = 0;
Device::fromHandle(device->toHandle())->getSubDevices(&subDeviceCount, nullptr);
@@ -35,6 +42,7 @@ struct SysmanRasFixture : public SysmanDeviceFixture {
}
void TearDown() override {
SysmanDeviceFixture::TearDown();
pLinuxSysmanImp->pFsAccess = pFsAccessOriginal;
}
std::vector<zes_ras_handle_t> get_ras_handles(uint32_t count) {
@@ -108,5 +116,44 @@ TEST_F(SysmanRasFixture, GivenValidRasHandleWhileCallingZesRasGetStateThenFailur
delete pTestRasImp;
}
TEST_F(SysmanRasFixture, GivenValidRasHandleWhenCallingzesRasGetConfigAfterzesRasSetConfigThenSuccessIsReturned) {
RasImp *pTestRasImp = new RasImp(pSysmanDeviceImp->pRasHandleContext->pOsSysman, ZES_RAS_ERROR_TYPE_CORRECTABLE, device->toHandle());
pSysmanDeviceImp->pRasHandleContext->handleList.push_back(pTestRasImp);
auto handles = get_ras_handles(mockHandleCount + 1);
for (auto handle : handles) {
zes_ras_config_t setConfig = {};
zes_ras_config_t getConfig = {};
setConfig.totalThreshold = 50;
memset(setConfig.detailedThresholds.category, 1, sizeof(setConfig.detailedThresholds.category));
EXPECT_EQ(ZE_RESULT_SUCCESS, zesRasSetConfig(handle, &setConfig));
EXPECT_EQ(ZE_RESULT_SUCCESS, zesRasGetConfig(handle, &getConfig));
EXPECT_EQ(setConfig.totalThreshold, getConfig.totalThreshold);
int compare = std::memcmp(setConfig.detailedThresholds.category, getConfig.detailedThresholds.category, sizeof(setConfig.detailedThresholds.category));
EXPECT_EQ(0, compare);
}
pSysmanDeviceImp->pRasHandleContext->handleList.pop_back();
delete pTestRasImp;
}
TEST_F(SysmanRasFixture, GivenValidRasHandleWhenCallingzesRasSetConfigWithoutPermissionThenFailureIsReturned) {
ON_CALL(*pFsAccess.get(), isRootUser())
.WillByDefault(::testing::Invoke(pFsAccess.get(), &Mock<RasFsAccess>::userIsNotRoot));
RasImp *pTestRasImp = new RasImp(pSysmanDeviceImp->pRasHandleContext->pOsSysman, ZES_RAS_ERROR_TYPE_CORRECTABLE, device->toHandle());
pSysmanDeviceImp->pRasHandleContext->handleList.push_back(pTestRasImp);
auto handles = get_ras_handles(mockHandleCount + 1);
for (auto handle : handles) {
zes_ras_config_t setConfig = {};
setConfig.totalThreshold = 50;
memset(setConfig.detailedThresholds.category, 1, sizeof(setConfig.detailedThresholds.category));
EXPECT_EQ(ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS, zesRasSetConfig(handle, &setConfig));
}
pSysmanDeviceImp->pRasHandleContext->handleList.pop_back();
delete pTestRasImp;
}
} // namespace ult
} // namespace L0