diff --git a/level_zero/tools/source/sysman/linux/fs_access.h b/level_zero/tools/source/sysman/linux/fs_access.h index b390520dab..580311ea15 100644 --- a/level_zero/tools/source/sysman/linux/fs_access.h +++ b/level_zero/tools/source/sysman/linux/fs_access.h @@ -44,7 +44,7 @@ class FsAccess { virtual ze_result_t readSymLink(const std::string path, std::string &buf); virtual ze_result_t getRealPath(const std::string path, std::string &buf); - ze_result_t listDirectory(const std::string path, std::vector &list); + virtual ze_result_t listDirectory(const std::string path, std::vector &list); std::string getBaseName(const std::string path); std::string getDirName(const std::string path); virtual bool fileExists(const std::string file); diff --git a/level_zero/tools/source/sysman/ras/linux/CMakeLists.txt b/level_zero/tools/source/sysman/ras/linux/CMakeLists.txt index 061b4e99b2..eae0933b47 100755 --- a/level_zero/tools/source/sysman/ras/linux/CMakeLists.txt +++ b/level_zero/tools/source/sysman/ras/linux/CMakeLists.txt @@ -5,7 +5,7 @@ # set(L0_SRCS_TOOLS_SYSMAN_RAS_LINUX - ${CMAKE_CURRENT_SOURCE_DIR}/os_ras_imp.cpp + ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/os_ras_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/os_ras_imp.h ) diff --git a/level_zero/tools/source/sysman/ras/linux/os_ras_imp.cpp b/level_zero/tools/source/sysman/ras/linux/os_ras_imp.cpp index a7eb0aa644..7620c632a2 100644 --- a/level_zero/tools/source/sysman/ras/linux/os_ras_imp.cpp +++ b/level_zero/tools/source/sysman/ras/linux/os_ras_imp.cpp @@ -11,32 +11,22 @@ namespace L0 { -const std::string LinuxRasImp::rasCounterDir("/var/lib/libze_intel_gpu/"); -const std::string LinuxRasImp::resetCounter("ras_reset_count"); -const std::string LinuxRasImp::resetCounterFile = rasCounterDir + resetCounter; - -void LinuxRasImp::setRasErrorType(zes_ras_error_type_t type) { - osRasErrorType = type; -} -bool LinuxRasImp::isRasSupported(void) { - if (false == pFsAccess->fileExists(rasCounterDir)) { - return false; - } - if (osRasErrorType == ZES_RAS_ERROR_TYPE_CORRECTABLE) { - return false; - } else { - return false; - } +ze_result_t OsRas::getSupportedRasErrorTypes(std::vector &errorType, OsSysman *pOsSysman) { + return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } -LinuxRasImp::LinuxRasImp(OsSysman *pOsSysman) { - LinuxSysmanImp *pLinuxSysmanImp = static_cast(pOsSysman); - pFsAccess = &pLinuxSysmanImp->getFsAccess(); - osRasErrorType = ZES_RAS_ERROR_TYPE_UNCORRECTABLE; +ze_result_t LinuxRasImp::osRasGetProperties(zes_ras_properties_t &properties) { + properties.pNext = nullptr; + properties.type = osRasErrorType; + properties.onSubdevice = false; + properties.subdeviceId = 0; + return ZE_RESULT_SUCCESS; +} +LinuxRasImp::LinuxRasImp(OsSysman *pOsSysman, zes_ras_error_type_t type) : osRasErrorType(type) { } -OsRas *OsRas::create(OsSysman *pOsSysman) { - LinuxRasImp *pLinuxRasImp = new LinuxRasImp(pOsSysman); +OsRas *OsRas::create(OsSysman *pOsSysman, zes_ras_error_type_t type) { + LinuxRasImp *pLinuxRasImp = new LinuxRasImp(pOsSysman, type); return static_cast(pLinuxRasImp); } diff --git a/level_zero/tools/source/sysman/ras/linux/os_ras_imp.h b/level_zero/tools/source/sysman/ras/linux/os_ras_imp.h index 9646a293b9..1878d1f8c2 100644 --- a/level_zero/tools/source/sysman/ras/linux/os_ras_imp.h +++ b/level_zero/tools/source/sysman/ras/linux/os_ras_imp.h @@ -18,21 +18,13 @@ namespace L0 { class FsAccess; class LinuxRasImp : public OsRas, NEO::NonCopyableOrMovableClass { public: - LinuxRasImp(OsSysman *pOsSysman); + ze_result_t osRasGetProperties(zes_ras_properties_t &properties) override; + LinuxRasImp(OsSysman *pOsSysman, zes_ras_error_type_t type); LinuxRasImp() = default; ~LinuxRasImp() override = default; - bool isRasSupported(void) override; - void setRasErrorType(zes_ras_error_type_t rasErrorType) override; protected: - FsAccess *pFsAccess = nullptr; - zes_ras_error_type_t osRasErrorType; - - private: - static const std::string rasCounterDir; - static const std::string resetCounter; - static const std::string resetCounterFile; - std::vector rasCounterDirFileList = {}; + zes_ras_error_type_t osRasErrorType = {}; }; } // namespace L0 diff --git a/level_zero/tools/source/sysman/ras/os_ras.h b/level_zero/tools/source/sysman/ras/os_ras.h index bd7df0cb60..6324fda103 100644 --- a/level_zero/tools/source/sysman/ras/os_ras.h +++ b/level_zero/tools/source/sysman/ras/os_ras.h @@ -9,14 +9,16 @@ #include +#include + namespace L0 { struct OsSysman; class OsRas { public: - virtual bool isRasSupported(void) = 0; - virtual void setRasErrorType(zes_ras_error_type_t type) = 0; - static OsRas *create(OsSysman *pOsSysman); + virtual ze_result_t osRasGetProperties(zes_ras_properties_t &properties) = 0; + static OsRas *create(OsSysman *pOsSysman, zes_ras_error_type_t type); + static ze_result_t getSupportedRasErrorTypes(std::vector &errorType, OsSysman *pOsSysman); virtual ~OsRas() = default; }; diff --git a/level_zero/tools/source/sysman/ras/ras.cpp b/level_zero/tools/source/sysman/ras/ras.cpp index 8eafca4090..20d0848660 100644 --- a/level_zero/tools/source/sysman/ras/ras.cpp +++ b/level_zero/tools/source/sysman/ras/ras.cpp @@ -18,15 +18,15 @@ RasHandleContext::~RasHandleContext() { } void RasHandleContext::createHandle(zes_ras_error_type_t type) { Ras *pRas = new RasImp(pOsSysman, type); - if (pRas->isRasErrorSupported == true) { - handleList.push_back(pRas); - } else { - delete pRas; - } + handleList.push_back(pRas); } + void RasHandleContext::init() { - createHandle(ZES_RAS_ERROR_TYPE_UNCORRECTABLE); - createHandle(ZES_RAS_ERROR_TYPE_CORRECTABLE); + std::vector errorType = {}; + OsRas::getSupportedRasErrorTypes(errorType, pOsSysman); + for (const auto &type : errorType) { + createHandle(type); + } } ze_result_t RasHandleContext::rasGet(uint32_t *pCount, zes_ras_handle_t *phRas) { diff --git a/level_zero/tools/source/sysman/ras/ras_imp.cpp b/level_zero/tools/source/sysman/ras/ras_imp.cpp index c8b0f09f18..b9b5892e47 100644 --- a/level_zero/tools/source/sysman/ras/ras_imp.cpp +++ b/level_zero/tools/source/sysman/ras/ras_imp.cpp @@ -15,9 +15,6 @@ namespace L0 { ze_result_t RasImp::rasGetProperties(zes_ras_properties_t *pProperties) { - rasProperties.type = this->rasErrorType; - rasProperties.onSubdevice = false; - rasProperties.subdeviceId = 0; *pProperties = rasProperties; return ZE_RESULT_SUCCESS; } @@ -35,13 +32,11 @@ ze_result_t RasImp::rasGetState(const zes_ras_state_t *pState) { } void RasImp::init() { - pOsRas->setRasErrorType(this->rasErrorType); - isRasErrorSupported = pOsRas->isRasSupported(); + pOsRas->osRasGetProperties(rasProperties); } RasImp::RasImp(OsSysman *pOsSysman, zes_ras_error_type_t type) { - pOsRas = OsRas::create(pOsSysman); - this->rasErrorType = type; + pOsRas = OsRas::create(pOsSysman, type); init(); } diff --git a/level_zero/tools/source/sysman/ras/windows/os_ras_imp.cpp b/level_zero/tools/source/sysman/ras/windows/os_ras_imp.cpp index b037865781..3433a9b18d 100644 --- a/level_zero/tools/source/sysman/ras/windows/os_ras_imp.cpp +++ b/level_zero/tools/source/sysman/ras/windows/os_ras_imp.cpp @@ -10,17 +10,18 @@ namespace L0 { class WddmRasImp : public OsRas { - bool isRasSupported(void) override; - void setRasErrorType(zes_ras_error_type_t type) override; + ze_result_t osRasGetProperties(zes_ras_properties_t &properties) override; }; -bool WddmRasImp::isRasSupported(void) { - return false; +ze_result_t OsRas::getSupportedRasErrorTypes(std::vector &errorType, OsSysman *pOsSysman) { + return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } -void WddmRasImp::setRasErrorType(zes_ras_error_type_t type) {} +ze_result_t WddmRasImp::osRasGetProperties(zes_ras_properties_t &properties) { + return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; +} -OsRas *OsRas::create(OsSysman *pOsSysman) { +OsRas *OsRas::create(OsSysman *pOsSysman, zes_ras_error_type_t type) { WddmRasImp *pWddmRasImp = new WddmRasImp(); return static_cast(pWddmRasImp); } diff --git a/level_zero/tools/test/unit_tests/sources/sysman/ras/linux/CMakeLists.txt b/level_zero/tools/test/unit_tests/sources/sysman/ras/linux/CMakeLists.txt index 67d99cc6d3..ccac6aa6cf 100644 --- a/level_zero/tools/test/unit_tests/sources/sysman/ras/linux/CMakeLists.txt +++ b/level_zero/tools/test/unit_tests/sources/sysman/ras/linux/CMakeLists.txt @@ -8,7 +8,8 @@ if(UNIX) target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt - ${CMAKE_CURRENT_SOURCE_DIR}/test_zes_ras.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/mock_fs_ras.h + ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/test_zes_ras.cpp + ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/mock_fs_ras.h ) endif() + diff --git a/level_zero/tools/test/unit_tests/sources/sysman/ras/linux/mock_fs_ras.h b/level_zero/tools/test/unit_tests/sources/sysman/ras/linux/mock_fs_ras.h index 1bb115be27..a23c8c3191 100644 --- a/level_zero/tools/test/unit_tests/sources/sysman/ras/linux/mock_fs_ras.h +++ b/level_zero/tools/test/unit_tests/sources/sysman/ras/linux/mock_fs_ras.h @@ -15,89 +15,5 @@ namespace L0 { namespace ult { - -constexpr uint64_t mockResetCount = 42; -constexpr uint64_t mockComputeErrorCount = 142; -constexpr uint64_t mockNonComputeErrorCount = 242; -constexpr uint64_t mockCacheErrorCount = 342; -constexpr uint64_t mockProgrammingErrorCount = 442; -constexpr uint64_t mockDisplayErrorCount = 542; -constexpr uint64_t mockDriverErrorCount = 642; - -class RasFsAccess : public FsAccess {}; -template <> -struct Mock : public RasFsAccess { - std::string mockrasCounterDir = "/var/lib/libze_intel_gpu/"; - std::string mockresetCounter = "ras_reset_count"; - std::string mockComputeErrorCounter = "ras_compute_error_count"; - std::string mockNonComputeErrorCounter = "ras_non_compute_error_count"; - std::string mockCacheErrorCounter = "ras_cache_error_count"; - std::string mockProgrammingErrorCounter = "ras_programming_error_count"; - std::string mockDisplayErrorCounter = "ras_display_error_count"; - std::string mockDriverErrorCounter = "ras_driver_error_count"; - MOCK_METHOD(ze_result_t, read, (const std::string file, uint64_t &val), (override)); - MOCK_METHOD(bool, fileExists, (const std::string file), (override)); - - ze_result_t setResetCounterFileName(const std::string file) { - mockresetCounter.assign(file); - return ZE_RESULT_SUCCESS; - } - ze_result_t setRasDirName(const std::string dirName) { - mockrasCounterDir.assign(dirName); - return ZE_RESULT_SUCCESS; - } - ze_result_t getVal(const std::string file, uint64_t &val) { - if (file.compare(mockrasCounterDir + mockresetCounter) == 0) { - val = mockResetCount; - return ZE_RESULT_SUCCESS; - } - if (file.compare(mockrasCounterDir + mockComputeErrorCounter) == 0) { - val = mockComputeErrorCount; - return ZE_RESULT_SUCCESS; - } - if (file.compare(mockrasCounterDir + mockNonComputeErrorCounter) == 0) { - val = mockNonComputeErrorCount; - return ZE_RESULT_SUCCESS; - } - if (file.compare(mockrasCounterDir + mockCacheErrorCounter) == 0) { - val = mockCacheErrorCount; - return ZE_RESULT_SUCCESS; - } - if (file.compare(mockrasCounterDir + mockProgrammingErrorCounter) == 0) { - val = mockProgrammingErrorCount; - return ZE_RESULT_SUCCESS; - } - if (file.compare(mockrasCounterDir + mockDisplayErrorCounter) == 0) { - val = mockDisplayErrorCount; - return ZE_RESULT_SUCCESS; - } - if (file.compare(mockrasCounterDir + mockDriverErrorCounter) == 0) { - val = mockDriverErrorCount; - return ZE_RESULT_SUCCESS; - } - return ZE_RESULT_ERROR_NOT_AVAILABLE; - } - - bool checkFileExists(const std::string file) { - if ((file.compare(mockrasCounterDir + mockresetCounter) == 0) || - (file.compare(mockrasCounterDir + mockComputeErrorCounter) == 0) || - (file.compare(mockrasCounterDir + mockNonComputeErrorCounter) == 0) || - (file.compare(mockrasCounterDir + mockCacheErrorCounter) == 0) || - (file.compare(mockrasCounterDir + mockProgrammingErrorCounter) == 0) || - (file.compare(mockrasCounterDir + mockDisplayErrorCounter) == 0) || - (file.compare(mockrasCounterDir + mockDriverErrorCounter) == 0) || - (file.compare(mockrasCounterDir) == 0)) { - return true; - } - return false; - } - Mock() = default; - ~Mock() override = default; -}; - -class PublicLinuxRasImp : public L0::LinuxRasImp { - public: - using LinuxRasImp::pFsAccess; -}; } // namespace ult } // namespace L0 diff --git a/level_zero/tools/test/unit_tests/sources/sysman/ras/linux/test_zes_ras.cpp b/level_zero/tools/test/unit_tests/sources/sysman/ras/linux/test_zes_ras.cpp index 7cd6356df3..88f874de4d 100644 --- a/level_zero/tools/test/unit_tests/sources/sysman/ras/linux/test_zes_ras.cpp +++ b/level_zero/tools/test/unit_tests/sources/sysman/ras/linux/test_zes_ras.cpp @@ -16,26 +16,22 @@ using ::testing::NiceMock; namespace L0 { namespace ult { +constexpr uint32_t mockHandleCount = 0; struct SysmanRasFixture : public SysmanDeviceFixture { protected: - std::unique_ptr> ptestFsAccess; - FsAccess *pOriginalFsAccess = nullptr; - void SetUp() override { SysmanDeviceFixture::SetUp(); - ptestFsAccess = std::make_unique>>(); - pOriginalFsAccess = pLinuxSysmanImp->pFsAccess; - pLinuxSysmanImp->pFsAccess = ptestFsAccess.get(); - ON_CALL(*ptestFsAccess.get(), read(_, Matcher(_))) - .WillByDefault(::testing::Invoke(ptestFsAccess.get(), &Mock::getVal)); - ON_CALL(*ptestFsAccess.get(), fileExists(_)) - .WillByDefault(::testing::Invoke(ptestFsAccess.get(), &Mock::checkFileExists)); pSysmanDeviceImp->pRasHandleContext->handleList.clear(); pSysmanDeviceImp->pRasHandleContext->init(); } void TearDown() override { SysmanDeviceFixture::TearDown(); - pLinuxSysmanImp->pFsAccess = pOriginalFsAccess; + } + + std::vector get_ras_handles(uint32_t count) { + std::vector handles(count, nullptr); + EXPECT_EQ(zesDeviceEnumRasErrorSets(device->toHandle(), &count, handles.data()), ZE_RESULT_SUCCESS); + return handles; } }; @@ -43,17 +39,50 @@ TEST_F(SysmanRasFixture, GivenValidSysmanHandleWhenRetrievingRasZeroHandlesInRet uint32_t count = 0; ze_result_t result = zesDeviceEnumRasErrorSets(device->toHandle(), &count, NULL); EXPECT_EQ(ZE_RESULT_SUCCESS, result); - EXPECT_EQ(count, 0u); + EXPECT_EQ(count, mockHandleCount); uint32_t testcount = count + 1; result = zesDeviceEnumRasErrorSets(device->toHandle(), &testcount, NULL); EXPECT_EQ(ZE_RESULT_SUCCESS, result); - EXPECT_EQ(testcount, 0u); + EXPECT_EQ(testcount, mockHandleCount); count = 0; std::vector handles(count, nullptr); EXPECT_EQ(zesDeviceEnumRasErrorSets(device->toHandle(), &count, handles.data()), ZE_RESULT_SUCCESS); - EXPECT_EQ(count, 0u); + EXPECT_EQ(count, mockHandleCount); + + RasImp *pTestRasImp = new RasImp(pSysmanDeviceImp->pRasHandleContext->pOsSysman, ZES_RAS_ERROR_TYPE_CORRECTABLE); + pSysmanDeviceImp->pRasHandleContext->handleList.push_back(pTestRasImp); + EXPECT_EQ(zesDeviceEnumRasErrorSets(device->toHandle(), &count, nullptr), ZE_RESULT_SUCCESS); + EXPECT_EQ(count, mockHandleCount + 1); + + testcount = count; + + handles.resize(testcount); + EXPECT_EQ(zesDeviceEnumRasErrorSets(device->toHandle(), &testcount, handles.data()), ZE_RESULT_SUCCESS); + EXPECT_EQ(testcount, mockHandleCount + 1); + EXPECT_NE(nullptr, handles.data()); + + pSysmanDeviceImp->pRasHandleContext->handleList.pop_back(); + delete pTestRasImp; +} + +TEST_F(SysmanRasFixture, GivenValidRasHandleWhenGettingRasPropertiesThenSuccessIsReturned) { + RasImp *pTestRasImp = new RasImp(pSysmanDeviceImp->pRasHandleContext->pOsSysman, ZES_RAS_ERROR_TYPE_CORRECTABLE); + pSysmanDeviceImp->pRasHandleContext->handleList.push_back(pTestRasImp); + + auto handles = get_ras_handles(mockHandleCount + 1); + + for (auto handle : handles) { + zes_ras_properties_t properties = {}; + EXPECT_EQ(ZE_RESULT_SUCCESS, zesRasGetProperties(handle, &properties)); + EXPECT_EQ(properties.pNext, nullptr); + EXPECT_EQ(properties.onSubdevice, false); + EXPECT_EQ(properties.subdeviceId, 0u); + EXPECT_EQ(properties.type, ZES_RAS_ERROR_TYPE_CORRECTABLE); + } + pSysmanDeviceImp->pRasHandleContext->handleList.pop_back(); + delete pTestRasImp; } } // namespace ult