mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-08 14:02:58 +08:00
Implement zesRasGetConfig and zesRasSetConfig
Signed-off-by: Mayank Raghuwanshi <mayank.raghuwanshi@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
0c035cfcc9
commit
0f973f146e
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2020 Intel Corporation
|
||||
* Copyright (C) 2020-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -273,6 +273,10 @@ std::string FsAccess::getDirName(const std::string path) {
|
||||
return path.substr(0, pos);
|
||||
}
|
||||
|
||||
bool FsAccess::isRootUser() {
|
||||
return (geteuid() == 0);
|
||||
}
|
||||
|
||||
// Procfs Access
|
||||
const std::string ProcfsAccess::procDir = "/proc/";
|
||||
const std::string ProcfsAccess::fdDir = "/fd/";
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2020 Intel Corporation
|
||||
* Copyright (C) 2020-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -44,6 +44,7 @@ class FsAccess {
|
||||
virtual ze_result_t readSymLink(const std::string path, std::string &buf);
|
||||
virtual ze_result_t getRealPath(const std::string path, std::string &buf);
|
||||
virtual ze_result_t listDirectory(const std::string path, std::vector<std::string> &list);
|
||||
virtual bool isRootUser();
|
||||
std::string getBaseName(const std::string path);
|
||||
std::string getDirName(const std::string path);
|
||||
virtual bool fileExists(const std::string file);
|
||||
|
||||
@@ -19,6 +19,21 @@ ze_result_t LinuxRasImp::osRasGetState(zes_ras_state_t &state, ze_bool_t clear)
|
||||
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
|
||||
}
|
||||
|
||||
ze_result_t LinuxRasImp::osRasGetConfig(zes_ras_config_t *config) {
|
||||
config->totalThreshold = totalThreshold;
|
||||
memcpy(config->detailedThresholds.category, categoryThreshold, sizeof(config->detailedThresholds.category));
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
ze_result_t LinuxRasImp::osRasSetConfig(const zes_ras_config_t *config) {
|
||||
if (pFsAccess->isRootUser() == true) {
|
||||
totalThreshold = config->totalThreshold;
|
||||
memcpy(categoryThreshold, config->detailedThresholds.category, sizeof(config->detailedThresholds.category));
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
return ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS;
|
||||
}
|
||||
|
||||
ze_result_t LinuxRasImp::osRasGetProperties(zes_ras_properties_t &properties) {
|
||||
properties.pNext = nullptr;
|
||||
properties.type = osRasErrorType;
|
||||
@@ -27,6 +42,8 @@ ze_result_t LinuxRasImp::osRasGetProperties(zes_ras_properties_t &properties) {
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
LinuxRasImp::LinuxRasImp(OsSysman *pOsSysman, zes_ras_error_type_t type, ze_bool_t onSubdevice, uint32_t subdeviceId) : osRasErrorType(type), isSubdevice(onSubdevice), subdeviceId(subdeviceId) {
|
||||
pLinuxSysmanImp = static_cast<LinuxSysmanImp *>(pOsSysman);
|
||||
pFsAccess = &pLinuxSysmanImp->getFsAccess();
|
||||
}
|
||||
|
||||
OsRas *OsRas::create(OsSysman *pOsSysman, zes_ras_error_type_t type, ze_bool_t onSubdevice, uint32_t subdeviceId) {
|
||||
|
||||
@@ -11,21 +11,28 @@
|
||||
#include "level_zero/tools/source/sysman/ras/os_ras.h"
|
||||
|
||||
namespace L0 {
|
||||
|
||||
class FsAccess;
|
||||
class LinuxSysmanImp;
|
||||
class LinuxRasImp : public OsRas, NEO::NonCopyableOrMovableClass {
|
||||
public:
|
||||
ze_result_t osRasGetProperties(zes_ras_properties_t &properties) override;
|
||||
ze_result_t osRasGetState(zes_ras_state_t &state, ze_bool_t clear) override;
|
||||
ze_result_t osRasGetConfig(zes_ras_config_t *config) override;
|
||||
ze_result_t osRasSetConfig(const zes_ras_config_t *config) override;
|
||||
LinuxRasImp(OsSysman *pOsSysman, zes_ras_error_type_t type, ze_bool_t onSubdevice, uint32_t subdeviceId);
|
||||
LinuxRasImp() = default;
|
||||
~LinuxRasImp() override = default;
|
||||
|
||||
protected:
|
||||
zes_ras_error_type_t osRasErrorType = {};
|
||||
FsAccess *pFsAccess = nullptr;
|
||||
LinuxSysmanImp *pLinuxSysmanImp = nullptr;
|
||||
|
||||
private:
|
||||
bool isSubdevice = false;
|
||||
uint32_t subdeviceId = 0;
|
||||
uint64_t totalThreshold = 0;
|
||||
uint64_t categoryThreshold[ZES_MAX_RAS_ERROR_CATEGORY_COUNT] = {0};
|
||||
};
|
||||
|
||||
} // namespace L0
|
||||
|
||||
@@ -18,6 +18,8 @@ class OsRas {
|
||||
public:
|
||||
virtual ze_result_t osRasGetProperties(zes_ras_properties_t &properties) = 0;
|
||||
virtual ze_result_t osRasGetState(zes_ras_state_t &state, ze_bool_t clear) = 0;
|
||||
virtual ze_result_t osRasGetConfig(zes_ras_config_t *config) = 0;
|
||||
virtual ze_result_t osRasSetConfig(const zes_ras_config_t *config) = 0;
|
||||
static OsRas *create(OsSysman *pOsSysman, zes_ras_error_type_t type, ze_bool_t onSubdevice, uint32_t subdeviceId);
|
||||
static ze_result_t getSupportedRasErrorTypes(std::vector<zes_ras_error_type_t> &errorType, OsSysman *pOsSysman, ze_device_handle_t deviceHandle);
|
||||
virtual ~OsRas() = default;
|
||||
|
||||
@@ -20,11 +20,11 @@ ze_result_t RasImp::rasGetProperties(zes_ras_properties_t *pProperties) {
|
||||
}
|
||||
|
||||
ze_result_t RasImp::rasGetConfig(zes_ras_config_t *pConfig) {
|
||||
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
|
||||
return pOsRas->osRasGetConfig(pConfig);
|
||||
}
|
||||
|
||||
ze_result_t RasImp::rasSetConfig(const zes_ras_config_t *pConfig) {
|
||||
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
|
||||
return pOsRas->osRasSetConfig(pConfig);
|
||||
}
|
||||
|
||||
ze_result_t RasImp::rasGetState(zes_ras_state_t *pState, ze_bool_t clear) {
|
||||
|
||||
@@ -12,6 +12,8 @@ namespace L0 {
|
||||
class WddmRasImp : public OsRas {
|
||||
ze_result_t osRasGetProperties(zes_ras_properties_t &properties) override;
|
||||
ze_result_t osRasGetState(zes_ras_state_t &state, ze_bool_t clear) override;
|
||||
ze_result_t osRasGetConfig(zes_ras_config_t *config) override;
|
||||
ze_result_t osRasSetConfig(const zes_ras_config_t *config) override;
|
||||
};
|
||||
|
||||
ze_result_t OsRas::getSupportedRasErrorTypes(std::vector<zes_ras_error_type_t> &errorType, OsSysman *pOsSysman, ze_device_handle_t deviceHandle) {
|
||||
@@ -22,6 +24,14 @@ ze_result_t WddmRasImp::osRasGetProperties(zes_ras_properties_t &properties) {
|
||||
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
|
||||
}
|
||||
|
||||
ze_result_t WddmRasImp::osRasGetConfig(zes_ras_config_t *config) {
|
||||
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
|
||||
}
|
||||
|
||||
ze_result_t WddmRasImp::osRasSetConfig(const zes_ras_config_t *config) {
|
||||
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
|
||||
}
|
||||
|
||||
ze_result_t WddmRasImp::osRasGetState(zes_ras_state_t &state, ze_bool_t clear) {
|
||||
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
|
||||
}
|
||||
|
||||
@@ -357,6 +357,7 @@ void testSysmanRas(ze_device_handle_t &device) {
|
||||
std::cout << std::endl
|
||||
<< " ---- Ras tests ---- " << std::endl;
|
||||
uint32_t count = 0;
|
||||
bool iamroot = (geteuid() == 0);
|
||||
VALIDATECALL(zesDeviceEnumRasErrorSets(device, &count, nullptr));
|
||||
if (count == 0) {
|
||||
std::cout << "Could not retrieve Ras Error Sets" << std::endl;
|
||||
@@ -397,6 +398,34 @@ void testSysmanRas(ze_device_handle_t &device) {
|
||||
std::cout << "Number of correctable display errors that have occurred = " << rasState.category[ZES_RAS_ERROR_CAT_DISPLAY_ERRORS] << std::endl;
|
||||
}
|
||||
}
|
||||
if (iamroot) {
|
||||
zes_ras_config_t getConfig = {};
|
||||
zes_ras_config_t setConfig = {};
|
||||
setConfig.totalThreshold = 14;
|
||||
memset(setConfig.detailedThresholds.category, 0, sizeof(setConfig.detailedThresholds.category));
|
||||
VALIDATECALL(zesRasSetConfig(handle, &setConfig));
|
||||
if (verbose) {
|
||||
std::cout << "Setting Total threshold = " << setConfig.totalThreshold << std::endl;
|
||||
std::cout << "Setting Threshold for Engine Resets = " << setConfig.detailedThresholds.category[0] << std::endl;
|
||||
std::cout << "Setting Threshold for Programming Errors = " << setConfig.detailedThresholds.category[1] << std::endl;
|
||||
std::cout << "Setting Threshold for Driver Errors = " << setConfig.detailedThresholds.category[2] << std::endl;
|
||||
std::cout << "Setting Threshold for Compute Errors = " << setConfig.detailedThresholds.category[3] << std::endl;
|
||||
std::cout << "Setting Threshold for Non Compute Errors = " << setConfig.detailedThresholds.category[4] << std::endl;
|
||||
std::cout << "Setting Threshold for Cache Errors = " << setConfig.detailedThresholds.category[5] << std::endl;
|
||||
std::cout << "Setting Threshold for Display Errors = " << setConfig.detailedThresholds.category[6] << std::endl;
|
||||
}
|
||||
VALIDATECALL(zesRasGetConfig(handle, &getConfig));
|
||||
if (verbose) {
|
||||
std::cout << "Getting Total threshold = " << getConfig.totalThreshold << std::endl;
|
||||
std::cout << "Getting Threshold for Engine Resets = " << getConfig.detailedThresholds.category[0] << std::endl;
|
||||
std::cout << "Getting Threshold for Programming Errors = " << getConfig.detailedThresholds.category[1] << std::endl;
|
||||
std::cout << "Getting Threshold for Driver Errors = " << getConfig.detailedThresholds.category[2] << std::endl;
|
||||
std::cout << "Getting Threshold for Compute Errors = " << getConfig.detailedThresholds.category[3] << std::endl;
|
||||
std::cout << "Getting Threshold for Non Compute Errors = " << getConfig.detailedThresholds.category[4] << std::endl;
|
||||
std::cout << "Getting Threshold for Cache Errors = " << getConfig.detailedThresholds.category[5] << std::endl;
|
||||
std::cout << "Getting Threshold for Display Errors = " << getConfig.detailedThresholds.category[6] << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
std::string getStandbyType(zes_standby_type_t standbyType) {
|
||||
@@ -722,6 +751,12 @@ void testSysmanListenEvents(ze_driver_handle_t driver, std::vector<ze_device_han
|
||||
if (pEvents[index] & ZES_EVENT_TYPE_FLAG_DEVICE_ATTACH) {
|
||||
std::cout << "Device " << index << "got DEVICE_ATTACH event" << std::endl;
|
||||
}
|
||||
if (pEvents[index] & ZES_EVENT_TYPE_FLAG_RAS_UNCORRECTABLE_ERRORS) {
|
||||
std::cout << "Device " << index << "got RAS UNCORRECTABLE event" << std::endl;
|
||||
}
|
||||
if (pEvents[index] & ZES_EVENT_TYPE_FLAG_RAS_CORRECTABLE_ERRORS) {
|
||||
std::cout << "Device " << index << "got RAS CORRECTABLE event" << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -981,10 +1016,10 @@ int main(int argc, char *argv[]) {
|
||||
case 'E':
|
||||
std::for_each(devices.begin(), devices.end(), [&](auto device) {
|
||||
zesDeviceEventRegister(device,
|
||||
ZES_EVENT_TYPE_FLAG_DEVICE_RESET_REQUIRED | ZES_EVENT_TYPE_FLAG_DEVICE_DETACH | ZES_EVENT_TYPE_FLAG_DEVICE_ATTACH);
|
||||
ZES_EVENT_TYPE_FLAG_DEVICE_RESET_REQUIRED | ZES_EVENT_TYPE_FLAG_DEVICE_DETACH | ZES_EVENT_TYPE_FLAG_DEVICE_ATTACH | ZES_EVENT_TYPE_FLAG_RAS_CORRECTABLE_ERRORS | ZES_EVENT_TYPE_FLAG_RAS_UNCORRECTABLE_ERRORS);
|
||||
});
|
||||
testSysmanListenEvents(driver, devices,
|
||||
ZES_EVENT_TYPE_FLAG_DEVICE_RESET_REQUIRED | ZES_EVENT_TYPE_FLAG_DEVICE_DETACH | ZES_EVENT_TYPE_FLAG_DEVICE_ATTACH);
|
||||
ZES_EVENT_TYPE_FLAG_DEVICE_RESET_REQUIRED | ZES_EVENT_TYPE_FLAG_DEVICE_DETACH | ZES_EVENT_TYPE_FLAG_DEVICE_ATTACH | ZES_EVENT_TYPE_FLAG_RAS_CORRECTABLE_ERRORS | ZES_EVENT_TYPE_FLAG_RAS_UNCORRECTABLE_ERRORS);
|
||||
break;
|
||||
case 'F':
|
||||
std::for_each(devices.begin(), devices.end(), [&](auto device) {
|
||||
|
||||
@@ -125,5 +125,15 @@ TEST_F(SysmanMultiDeviceFixture, GivenValidDeviceHandleHavingSubdevicesWhenValid
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(SysmanMultiDeviceFixture, GivenValidEffectiveUserIdCheckWhetherPermissionsReturnedByIsRootUserAreCorrect) {
|
||||
int euid = geteuid();
|
||||
auto pFsAccess = pLinuxSysmanImp->getFsAccess();
|
||||
if (euid == 0) {
|
||||
EXPECT_EQ(true, pFsAccess.isRootUser());
|
||||
} else {
|
||||
EXPECT_EQ(false, pFsAccess.isRootUser());
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2020 Intel Corporation
|
||||
* Copyright (C) 2020-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -15,5 +15,19 @@
|
||||
|
||||
namespace L0 {
|
||||
namespace ult {
|
||||
|
||||
class RasFsAccess : public FsAccess {};
|
||||
template <>
|
||||
struct Mock<RasFsAccess> : public RasFsAccess {
|
||||
MOCK_METHOD(bool, isRootUser, (), (override));
|
||||
bool userIsRoot() {
|
||||
return true;
|
||||
}
|
||||
bool userIsNotRoot() {
|
||||
return false;
|
||||
}
|
||||
Mock<RasFsAccess>() = default;
|
||||
};
|
||||
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
|
||||
@@ -19,9 +19,16 @@ namespace ult {
|
||||
constexpr uint32_t mockHandleCount = 0;
|
||||
struct SysmanRasFixture : public SysmanDeviceFixture {
|
||||
protected:
|
||||
std::unique_ptr<Mock<RasFsAccess>> pFsAccess;
|
||||
std::vector<ze_device_handle_t> deviceHandles;
|
||||
FsAccess *pFsAccessOriginal = nullptr;
|
||||
void SetUp() override {
|
||||
SysmanDeviceFixture::SetUp();
|
||||
pFsAccess = std::make_unique<NiceMock<Mock<RasFsAccess>>>();
|
||||
pFsAccessOriginal = pLinuxSysmanImp->pFsAccess;
|
||||
pLinuxSysmanImp->pFsAccess = pFsAccess.get();
|
||||
ON_CALL(*pFsAccess.get(), isRootUser())
|
||||
.WillByDefault(::testing::Invoke(pFsAccess.get(), &Mock<RasFsAccess>::userIsRoot));
|
||||
pSysmanDeviceImp->pRasHandleContext->handleList.clear();
|
||||
uint32_t subDeviceCount = 0;
|
||||
Device::fromHandle(device->toHandle())->getSubDevices(&subDeviceCount, nullptr);
|
||||
@@ -35,6 +42,7 @@ struct SysmanRasFixture : public SysmanDeviceFixture {
|
||||
}
|
||||
void TearDown() override {
|
||||
SysmanDeviceFixture::TearDown();
|
||||
pLinuxSysmanImp->pFsAccess = pFsAccessOriginal;
|
||||
}
|
||||
|
||||
std::vector<zes_ras_handle_t> get_ras_handles(uint32_t count) {
|
||||
@@ -108,5 +116,44 @@ TEST_F(SysmanRasFixture, GivenValidRasHandleWhileCallingZesRasGetStateThenFailur
|
||||
delete pTestRasImp;
|
||||
}
|
||||
|
||||
TEST_F(SysmanRasFixture, GivenValidRasHandleWhenCallingzesRasGetConfigAfterzesRasSetConfigThenSuccessIsReturned) {
|
||||
RasImp *pTestRasImp = new RasImp(pSysmanDeviceImp->pRasHandleContext->pOsSysman, ZES_RAS_ERROR_TYPE_CORRECTABLE, device->toHandle());
|
||||
pSysmanDeviceImp->pRasHandleContext->handleList.push_back(pTestRasImp);
|
||||
|
||||
auto handles = get_ras_handles(mockHandleCount + 1);
|
||||
|
||||
for (auto handle : handles) {
|
||||
zes_ras_config_t setConfig = {};
|
||||
zes_ras_config_t getConfig = {};
|
||||
setConfig.totalThreshold = 50;
|
||||
memset(setConfig.detailedThresholds.category, 1, sizeof(setConfig.detailedThresholds.category));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, zesRasSetConfig(handle, &setConfig));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, zesRasGetConfig(handle, &getConfig));
|
||||
EXPECT_EQ(setConfig.totalThreshold, getConfig.totalThreshold);
|
||||
int compare = std::memcmp(setConfig.detailedThresholds.category, getConfig.detailedThresholds.category, sizeof(setConfig.detailedThresholds.category));
|
||||
EXPECT_EQ(0, compare);
|
||||
}
|
||||
pSysmanDeviceImp->pRasHandleContext->handleList.pop_back();
|
||||
delete pTestRasImp;
|
||||
}
|
||||
|
||||
TEST_F(SysmanRasFixture, GivenValidRasHandleWhenCallingzesRasSetConfigWithoutPermissionThenFailureIsReturned) {
|
||||
ON_CALL(*pFsAccess.get(), isRootUser())
|
||||
.WillByDefault(::testing::Invoke(pFsAccess.get(), &Mock<RasFsAccess>::userIsNotRoot));
|
||||
RasImp *pTestRasImp = new RasImp(pSysmanDeviceImp->pRasHandleContext->pOsSysman, ZES_RAS_ERROR_TYPE_CORRECTABLE, device->toHandle());
|
||||
pSysmanDeviceImp->pRasHandleContext->handleList.push_back(pTestRasImp);
|
||||
|
||||
auto handles = get_ras_handles(mockHandleCount + 1);
|
||||
|
||||
for (auto handle : handles) {
|
||||
zes_ras_config_t setConfig = {};
|
||||
setConfig.totalThreshold = 50;
|
||||
memset(setConfig.detailedThresholds.category, 1, sizeof(setConfig.detailedThresholds.category));
|
||||
EXPECT_EQ(ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS, zesRasSetConfig(handle, &setConfig));
|
||||
}
|
||||
pSysmanDeviceImp->pRasHandleContext->handleList.pop_back();
|
||||
delete pTestRasImp;
|
||||
}
|
||||
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
|
||||
Reference in New Issue
Block a user