From 0f973f146eea9a5cda11173b4ed27b11aaa50481 Mon Sep 17 00:00:00 2001 From: Mayank Raghuwanshi Date: Thu, 18 Feb 2021 11:22:59 +0530 Subject: [PATCH] Implement zesRasGetConfig and zesRasSetConfig Signed-off-by: Mayank Raghuwanshi --- .../tools/source/sysman/linux/fs_access.cpp | 6 ++- .../tools/source/sysman/linux/fs_access.h | 3 +- .../source/sysman/ras/linux/os_ras_imp.cpp | 17 +++++++ .../source/sysman/ras/linux/os_ras_imp.h | 9 +++- level_zero/tools/source/sysman/ras/os_ras.h | 2 + .../tools/source/sysman/ras/ras_imp.cpp | 4 +- .../source/sysman/ras/windows/os_ras_imp.cpp | 10 ++++ .../test/black_box_tests/zello_sysman.cpp | 39 ++++++++++++++- .../sources/sysman/linux/test_sysman.cpp | 10 ++++ .../sources/sysman/ras/linux/mock_fs_ras.h | 16 ++++++- .../sources/sysman/ras/linux/test_zes_ras.cpp | 47 +++++++++++++++++++ 11 files changed, 155 insertions(+), 8 deletions(-) diff --git a/level_zero/tools/source/sysman/linux/fs_access.cpp b/level_zero/tools/source/sysman/linux/fs_access.cpp index 4f0b2a583e..89bda11619 100644 --- a/level_zero/tools/source/sysman/linux/fs_access.cpp +++ b/level_zero/tools/source/sysman/linux/fs_access.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020 Intel Corporation + * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -273,6 +273,10 @@ std::string FsAccess::getDirName(const std::string path) { return path.substr(0, pos); } +bool FsAccess::isRootUser() { + return (geteuid() == 0); +} + // Procfs Access const std::string ProcfsAccess::procDir = "/proc/"; const std::string ProcfsAccess::fdDir = "/fd/"; diff --git a/level_zero/tools/source/sysman/linux/fs_access.h b/level_zero/tools/source/sysman/linux/fs_access.h index 4bdd9c7b1d..bea87e1884 100644 --- a/level_zero/tools/source/sysman/linux/fs_access.h +++ b/level_zero/tools/source/sysman/linux/fs_access.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020 Intel Corporation + * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -44,6 +44,7 @@ class FsAccess { virtual ze_result_t readSymLink(const std::string path, std::string &buf); virtual ze_result_t getRealPath(const std::string path, std::string &buf); virtual ze_result_t listDirectory(const std::string path, std::vector &list); + virtual bool isRootUser(); std::string getBaseName(const std::string path); std::string getDirName(const std::string path); virtual bool fileExists(const std::string file); diff --git a/level_zero/tools/source/sysman/ras/linux/os_ras_imp.cpp b/level_zero/tools/source/sysman/ras/linux/os_ras_imp.cpp index cb84d8fb5d..fedbfd32e3 100644 --- a/level_zero/tools/source/sysman/ras/linux/os_ras_imp.cpp +++ b/level_zero/tools/source/sysman/ras/linux/os_ras_imp.cpp @@ -19,6 +19,21 @@ ze_result_t LinuxRasImp::osRasGetState(zes_ras_state_t &state, ze_bool_t clear) return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } +ze_result_t LinuxRasImp::osRasGetConfig(zes_ras_config_t *config) { + config->totalThreshold = totalThreshold; + memcpy(config->detailedThresholds.category, categoryThreshold, sizeof(config->detailedThresholds.category)); + return ZE_RESULT_SUCCESS; +} + +ze_result_t LinuxRasImp::osRasSetConfig(const zes_ras_config_t *config) { + if (pFsAccess->isRootUser() == true) { + totalThreshold = config->totalThreshold; + memcpy(categoryThreshold, config->detailedThresholds.category, sizeof(config->detailedThresholds.category)); + return ZE_RESULT_SUCCESS; + } + return ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS; +} + ze_result_t LinuxRasImp::osRasGetProperties(zes_ras_properties_t &properties) { properties.pNext = nullptr; properties.type = osRasErrorType; @@ -27,6 +42,8 @@ ze_result_t LinuxRasImp::osRasGetProperties(zes_ras_properties_t &properties) { return ZE_RESULT_SUCCESS; } LinuxRasImp::LinuxRasImp(OsSysman *pOsSysman, zes_ras_error_type_t type, ze_bool_t onSubdevice, uint32_t subdeviceId) : osRasErrorType(type), isSubdevice(onSubdevice), subdeviceId(subdeviceId) { + pLinuxSysmanImp = static_cast(pOsSysman); + pFsAccess = &pLinuxSysmanImp->getFsAccess(); } OsRas *OsRas::create(OsSysman *pOsSysman, zes_ras_error_type_t type, ze_bool_t onSubdevice, uint32_t subdeviceId) { diff --git a/level_zero/tools/source/sysman/ras/linux/os_ras_imp.h b/level_zero/tools/source/sysman/ras/linux/os_ras_imp.h index 51cd0da218..148dfb89bd 100644 --- a/level_zero/tools/source/sysman/ras/linux/os_ras_imp.h +++ b/level_zero/tools/source/sysman/ras/linux/os_ras_imp.h @@ -11,21 +11,28 @@ #include "level_zero/tools/source/sysman/ras/os_ras.h" namespace L0 { - +class FsAccess; +class LinuxSysmanImp; class LinuxRasImp : public OsRas, NEO::NonCopyableOrMovableClass { public: ze_result_t osRasGetProperties(zes_ras_properties_t &properties) override; ze_result_t osRasGetState(zes_ras_state_t &state, ze_bool_t clear) override; + ze_result_t osRasGetConfig(zes_ras_config_t *config) override; + ze_result_t osRasSetConfig(const zes_ras_config_t *config) override; LinuxRasImp(OsSysman *pOsSysman, zes_ras_error_type_t type, ze_bool_t onSubdevice, uint32_t subdeviceId); LinuxRasImp() = default; ~LinuxRasImp() override = default; protected: zes_ras_error_type_t osRasErrorType = {}; + FsAccess *pFsAccess = nullptr; + LinuxSysmanImp *pLinuxSysmanImp = nullptr; private: bool isSubdevice = false; uint32_t subdeviceId = 0; + uint64_t totalThreshold = 0; + uint64_t categoryThreshold[ZES_MAX_RAS_ERROR_CATEGORY_COUNT] = {0}; }; } // namespace L0 diff --git a/level_zero/tools/source/sysman/ras/os_ras.h b/level_zero/tools/source/sysman/ras/os_ras.h index 8a30e85c94..84a945bf7a 100644 --- a/level_zero/tools/source/sysman/ras/os_ras.h +++ b/level_zero/tools/source/sysman/ras/os_ras.h @@ -18,6 +18,8 @@ class OsRas { public: virtual ze_result_t osRasGetProperties(zes_ras_properties_t &properties) = 0; virtual ze_result_t osRasGetState(zes_ras_state_t &state, ze_bool_t clear) = 0; + virtual ze_result_t osRasGetConfig(zes_ras_config_t *config) = 0; + virtual ze_result_t osRasSetConfig(const zes_ras_config_t *config) = 0; static OsRas *create(OsSysman *pOsSysman, zes_ras_error_type_t type, ze_bool_t onSubdevice, uint32_t subdeviceId); static ze_result_t getSupportedRasErrorTypes(std::vector &errorType, OsSysman *pOsSysman, ze_device_handle_t deviceHandle); virtual ~OsRas() = default; diff --git a/level_zero/tools/source/sysman/ras/ras_imp.cpp b/level_zero/tools/source/sysman/ras/ras_imp.cpp index 0e977a7f0e..1b58182521 100644 --- a/level_zero/tools/source/sysman/ras/ras_imp.cpp +++ b/level_zero/tools/source/sysman/ras/ras_imp.cpp @@ -20,11 +20,11 @@ ze_result_t RasImp::rasGetProperties(zes_ras_properties_t *pProperties) { } ze_result_t RasImp::rasGetConfig(zes_ras_config_t *pConfig) { - return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; + return pOsRas->osRasGetConfig(pConfig); } ze_result_t RasImp::rasSetConfig(const zes_ras_config_t *pConfig) { - return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; + return pOsRas->osRasSetConfig(pConfig); } ze_result_t RasImp::rasGetState(zes_ras_state_t *pState, ze_bool_t clear) { diff --git a/level_zero/tools/source/sysman/ras/windows/os_ras_imp.cpp b/level_zero/tools/source/sysman/ras/windows/os_ras_imp.cpp index 59b00973ca..6d890294d4 100644 --- a/level_zero/tools/source/sysman/ras/windows/os_ras_imp.cpp +++ b/level_zero/tools/source/sysman/ras/windows/os_ras_imp.cpp @@ -12,6 +12,8 @@ namespace L0 { class WddmRasImp : public OsRas { ze_result_t osRasGetProperties(zes_ras_properties_t &properties) override; ze_result_t osRasGetState(zes_ras_state_t &state, ze_bool_t clear) override; + ze_result_t osRasGetConfig(zes_ras_config_t *config) override; + ze_result_t osRasSetConfig(const zes_ras_config_t *config) override; }; ze_result_t OsRas::getSupportedRasErrorTypes(std::vector &errorType, OsSysman *pOsSysman, ze_device_handle_t deviceHandle) { @@ -22,6 +24,14 @@ ze_result_t WddmRasImp::osRasGetProperties(zes_ras_properties_t &properties) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } +ze_result_t WddmRasImp::osRasGetConfig(zes_ras_config_t *config) { + return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ze_result_t WddmRasImp::osRasSetConfig(const zes_ras_config_t *config) { + return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + ze_result_t WddmRasImp::osRasGetState(zes_ras_state_t &state, ze_bool_t clear) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } diff --git a/level_zero/tools/test/black_box_tests/zello_sysman.cpp b/level_zero/tools/test/black_box_tests/zello_sysman.cpp index dd6003d6ca..ae832c6a97 100644 --- a/level_zero/tools/test/black_box_tests/zello_sysman.cpp +++ b/level_zero/tools/test/black_box_tests/zello_sysman.cpp @@ -357,6 +357,7 @@ void testSysmanRas(ze_device_handle_t &device) { std::cout << std::endl << " ---- Ras tests ---- " << std::endl; uint32_t count = 0; + bool iamroot = (geteuid() == 0); VALIDATECALL(zesDeviceEnumRasErrorSets(device, &count, nullptr)); if (count == 0) { std::cout << "Could not retrieve Ras Error Sets" << std::endl; @@ -397,6 +398,34 @@ void testSysmanRas(ze_device_handle_t &device) { std::cout << "Number of correctable display errors that have occurred = " << rasState.category[ZES_RAS_ERROR_CAT_DISPLAY_ERRORS] << std::endl; } } + if (iamroot) { + zes_ras_config_t getConfig = {}; + zes_ras_config_t setConfig = {}; + setConfig.totalThreshold = 14; + memset(setConfig.detailedThresholds.category, 0, sizeof(setConfig.detailedThresholds.category)); + VALIDATECALL(zesRasSetConfig(handle, &setConfig)); + if (verbose) { + std::cout << "Setting Total threshold = " << setConfig.totalThreshold << std::endl; + std::cout << "Setting Threshold for Engine Resets = " << setConfig.detailedThresholds.category[0] << std::endl; + std::cout << "Setting Threshold for Programming Errors = " << setConfig.detailedThresholds.category[1] << std::endl; + std::cout << "Setting Threshold for Driver Errors = " << setConfig.detailedThresholds.category[2] << std::endl; + std::cout << "Setting Threshold for Compute Errors = " << setConfig.detailedThresholds.category[3] << std::endl; + std::cout << "Setting Threshold for Non Compute Errors = " << setConfig.detailedThresholds.category[4] << std::endl; + std::cout << "Setting Threshold for Cache Errors = " << setConfig.detailedThresholds.category[5] << std::endl; + std::cout << "Setting Threshold for Display Errors = " << setConfig.detailedThresholds.category[6] << std::endl; + } + VALIDATECALL(zesRasGetConfig(handle, &getConfig)); + if (verbose) { + std::cout << "Getting Total threshold = " << getConfig.totalThreshold << std::endl; + std::cout << "Getting Threshold for Engine Resets = " << getConfig.detailedThresholds.category[0] << std::endl; + std::cout << "Getting Threshold for Programming Errors = " << getConfig.detailedThresholds.category[1] << std::endl; + std::cout << "Getting Threshold for Driver Errors = " << getConfig.detailedThresholds.category[2] << std::endl; + std::cout << "Getting Threshold for Compute Errors = " << getConfig.detailedThresholds.category[3] << std::endl; + std::cout << "Getting Threshold for Non Compute Errors = " << getConfig.detailedThresholds.category[4] << std::endl; + std::cout << "Getting Threshold for Cache Errors = " << getConfig.detailedThresholds.category[5] << std::endl; + std::cout << "Getting Threshold for Display Errors = " << getConfig.detailedThresholds.category[6] << std::endl; + } + } } } std::string getStandbyType(zes_standby_type_t standbyType) { @@ -722,6 +751,12 @@ void testSysmanListenEvents(ze_driver_handle_t driver, std::vectorgetFsAccess(); + if (euid == 0) { + EXPECT_EQ(true, pFsAccess.isRootUser()); + } else { + EXPECT_EQ(false, pFsAccess.isRootUser()); + } +} + } // namespace ult } // namespace L0 diff --git a/level_zero/tools/test/unit_tests/sources/sysman/ras/linux/mock_fs_ras.h b/level_zero/tools/test/unit_tests/sources/sysman/ras/linux/mock_fs_ras.h index a23c8c3191..e3b31334b1 100644 --- a/level_zero/tools/test/unit_tests/sources/sysman/ras/linux/mock_fs_ras.h +++ b/level_zero/tools/test/unit_tests/sources/sysman/ras/linux/mock_fs_ras.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020 Intel Corporation + * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -15,5 +15,19 @@ namespace L0 { namespace ult { + +class RasFsAccess : public FsAccess {}; +template <> +struct Mock : public RasFsAccess { + MOCK_METHOD(bool, isRootUser, (), (override)); + bool userIsRoot() { + return true; + } + bool userIsNotRoot() { + return false; + } + Mock() = default; +}; + } // namespace ult } // namespace L0 diff --git a/level_zero/tools/test/unit_tests/sources/sysman/ras/linux/test_zes_ras.cpp b/level_zero/tools/test/unit_tests/sources/sysman/ras/linux/test_zes_ras.cpp index f7771e7895..8c72f29689 100644 --- a/level_zero/tools/test/unit_tests/sources/sysman/ras/linux/test_zes_ras.cpp +++ b/level_zero/tools/test/unit_tests/sources/sysman/ras/linux/test_zes_ras.cpp @@ -19,9 +19,16 @@ namespace ult { constexpr uint32_t mockHandleCount = 0; struct SysmanRasFixture : public SysmanDeviceFixture { protected: + std::unique_ptr> pFsAccess; std::vector deviceHandles; + FsAccess *pFsAccessOriginal = nullptr; void SetUp() override { SysmanDeviceFixture::SetUp(); + pFsAccess = std::make_unique>>(); + pFsAccessOriginal = pLinuxSysmanImp->pFsAccess; + pLinuxSysmanImp->pFsAccess = pFsAccess.get(); + ON_CALL(*pFsAccess.get(), isRootUser()) + .WillByDefault(::testing::Invoke(pFsAccess.get(), &Mock::userIsRoot)); pSysmanDeviceImp->pRasHandleContext->handleList.clear(); uint32_t subDeviceCount = 0; Device::fromHandle(device->toHandle())->getSubDevices(&subDeviceCount, nullptr); @@ -35,6 +42,7 @@ struct SysmanRasFixture : public SysmanDeviceFixture { } void TearDown() override { SysmanDeviceFixture::TearDown(); + pLinuxSysmanImp->pFsAccess = pFsAccessOriginal; } std::vector get_ras_handles(uint32_t count) { @@ -108,5 +116,44 @@ TEST_F(SysmanRasFixture, GivenValidRasHandleWhileCallingZesRasGetStateThenFailur delete pTestRasImp; } +TEST_F(SysmanRasFixture, GivenValidRasHandleWhenCallingzesRasGetConfigAfterzesRasSetConfigThenSuccessIsReturned) { + RasImp *pTestRasImp = new RasImp(pSysmanDeviceImp->pRasHandleContext->pOsSysman, ZES_RAS_ERROR_TYPE_CORRECTABLE, device->toHandle()); + pSysmanDeviceImp->pRasHandleContext->handleList.push_back(pTestRasImp); + + auto handles = get_ras_handles(mockHandleCount + 1); + + for (auto handle : handles) { + zes_ras_config_t setConfig = {}; + zes_ras_config_t getConfig = {}; + setConfig.totalThreshold = 50; + memset(setConfig.detailedThresholds.category, 1, sizeof(setConfig.detailedThresholds.category)); + EXPECT_EQ(ZE_RESULT_SUCCESS, zesRasSetConfig(handle, &setConfig)); + EXPECT_EQ(ZE_RESULT_SUCCESS, zesRasGetConfig(handle, &getConfig)); + EXPECT_EQ(setConfig.totalThreshold, getConfig.totalThreshold); + int compare = std::memcmp(setConfig.detailedThresholds.category, getConfig.detailedThresholds.category, sizeof(setConfig.detailedThresholds.category)); + EXPECT_EQ(0, compare); + } + pSysmanDeviceImp->pRasHandleContext->handleList.pop_back(); + delete pTestRasImp; +} + +TEST_F(SysmanRasFixture, GivenValidRasHandleWhenCallingzesRasSetConfigWithoutPermissionThenFailureIsReturned) { + ON_CALL(*pFsAccess.get(), isRootUser()) + .WillByDefault(::testing::Invoke(pFsAccess.get(), &Mock::userIsNotRoot)); + RasImp *pTestRasImp = new RasImp(pSysmanDeviceImp->pRasHandleContext->pOsSysman, ZES_RAS_ERROR_TYPE_CORRECTABLE, device->toHandle()); + pSysmanDeviceImp->pRasHandleContext->handleList.push_back(pTestRasImp); + + auto handles = get_ras_handles(mockHandleCount + 1); + + for (auto handle : handles) { + zes_ras_config_t setConfig = {}; + setConfig.totalThreshold = 50; + memset(setConfig.detailedThresholds.category, 1, sizeof(setConfig.detailedThresholds.category)); + EXPECT_EQ(ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS, zesRasSetConfig(handle, &setConfig)); + } + pSysmanDeviceImp->pRasHandleContext->handleList.pop_back(); + delete pTestRasImp; +} + } // namespace ult } // namespace L0