refactor: Merge Ras prelim files with non-prelim files

Related-To: NEO-9469

Signed-off-by: Bellekallu Rajkiran <bellekallu.rajkiran@intel.com>
This commit is contained in:
Bellekallu Rajkiran 2023-11-09 13:20:25 +00:00 committed by Compute-Runtime-Automation
parent 616ef4c9c7
commit 16725e2438
12 changed files with 695 additions and 1022 deletions

View File

@ -8,21 +8,9 @@ if(UNIX)
target_sources(${L0_STATIC_LIB_NAME}
PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/sysman_os_ras_imp.cpp
${CMAKE_CURRENT_SOURCE_DIR}/sysman_os_ras_imp.h
${CMAKE_CURRENT_SOURCE_DIR}/sysman_os_ras_imp_gt.cpp
${CMAKE_CURRENT_SOURCE_DIR}/sysman_os_ras_imp_hbm.cpp
)
if(NEO_ENABLE_i915_PRELIM_DETECTION)
target_sources(${L0_STATIC_LIB_NAME}
PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/sysman_os_ras_imp_prelim.cpp
${CMAKE_CURRENT_SOURCE_DIR}/sysman_os_ras_imp_prelim.h
${CMAKE_CURRENT_SOURCE_DIR}/sysman_os_ras_imp_gt.cpp
${CMAKE_CURRENT_SOURCE_DIR}/sysman_os_ras_imp_hbm.cpp
)
else()
target_sources(${L0_STATIC_LIB_NAME}
PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/sysman_os_ras_imp.cpp
${CMAKE_CURRENT_SOURCE_DIR}/sysman_os_ras_imp.h
)
endif()
endif()

View File

@ -8,35 +8,46 @@
#include "level_zero/sysman/source/api/ras/linux/sysman_os_ras_imp.h"
#include "shared/source/debug_settings/debug_settings_manager.h"
#include "shared/source/helpers/string.h"
#include "shared/source/os_interface/linux/system_info.h"
#include "level_zero/sysman/source/shared/linux/zes_os_sysman_imp.h"
#include <cstring>
#include "drm/intel_hwconfig_types.h"
namespace L0 {
namespace Sysman {
LinuxRasImp::LinuxRasImp(OsSysman *pOsSysman, zes_ras_error_type_t type, ze_bool_t onSubdevice, uint32_t subdeviceId) : osRasErrorType(type), isSubdevice(onSubdevice), subdeviceId(subdeviceId) {
pLinuxSysmanImp = static_cast<LinuxSysmanImp *>(pOsSysman);
pFsAccess = &pLinuxSysmanImp->getFsAccess();
static bool isMemoryTypeHbm(LinuxSysmanImp *pLinuxSysmanImp) {
uint32_t memType = pLinuxSysmanImp->getMemoryType();
if (memType == INTEL_HWCONFIG_MEMORY_TYPE_HBM2e || memType == INTEL_HWCONFIG_MEMORY_TYPE_HBM2) {
return true;
}
return false;
}
void OsRas::getSupportedRasErrorTypes(std::set<zes_ras_error_type_t> &errorType, OsSysman *pOsSysman, ze_bool_t isSubDevice, uint32_t subDeviceId) {}
void OsRas::getSupportedRasErrorTypes(std::set<zes_ras_error_type_t> &errorType, OsSysman *pOsSysman, ze_bool_t isSubDevice, uint32_t subDeviceId) {
ze_result_t LinuxRasImp::osRasGetState(zes_ras_state_t &state, ze_bool_t clear) {
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
constexpr auto maxErrorTypes = 2;
LinuxRasSourceGt::getSupportedRasErrorTypes(errorType, pOsSysman, isSubDevice, subDeviceId);
if (errorType.size() < maxErrorTypes) {
auto pLinuxSysmanImp = static_cast<LinuxSysmanImp *>(pOsSysman);
if (isMemoryTypeHbm(pLinuxSysmanImp) == true) {
LinuxRasSourceHbm::getSupportedRasErrorTypes(errorType, pOsSysman, isSubDevice, subDeviceId);
}
}
}
ze_result_t LinuxRasImp::osRasGetConfig(zes_ras_config_t *config) {
config->totalThreshold = totalThreshold;
memcpy(config->detailedThresholds.category, categoryThreshold, maxRasErrorCategoryCount * sizeof(uint64_t));
memcpy_s(config->detailedThresholds.category, maxRasErrorCategoryCount * sizeof(uint64_t), categoryThreshold, maxRasErrorCategoryCount * sizeof(uint64_t));
return ZE_RESULT_SUCCESS;
}
ze_result_t LinuxRasImp::osRasSetConfig(const zes_ras_config_t *config) {
if (pFsAccess->isRootUser() == true) {
totalThreshold = config->totalThreshold;
memcpy(categoryThreshold, config->detailedThresholds.category, maxRasErrorCategoryCount * sizeof(uint64_t));
memcpy_s(categoryThreshold, maxRasErrorCategoryCount * sizeof(uint64_t), config->detailedThresholds.category, maxRasErrorCategoryCount * sizeof(uint64_t));
return ZE_RESULT_SUCCESS;
}
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): Insufficient permissions and returning error:0x%x \n", __FUNCTION__, ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS);
@ -51,6 +62,42 @@ ze_result_t LinuxRasImp::osRasGetProperties(zes_ras_properties_t &properties) {
return ZE_RESULT_SUCCESS;
}
ze_result_t LinuxRasImp::osRasGetState(zes_ras_state_t &state, ze_bool_t clear) {
if (clear == true) {
if (pFsAccess->isRootUser() == false) {
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): Insufficient permissions and returning error:0x%x \n", __FUNCTION__, ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS);
return ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS;
}
}
ze_result_t result = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
for (auto &rasSource : rasSources) {
zes_ras_state_t localState = {};
ze_result_t localResult = rasSource->osRasGetState(localState, clear);
if (localResult != ZE_RESULT_SUCCESS) {
continue;
}
for (uint32_t i = 0; i < maxRasErrorCategoryCount; i++) {
state.category[i] += localState.category[i];
}
result = ZE_RESULT_SUCCESS;
}
return result;
}
void LinuxRasImp::initSources() {
rasSources.push_back(std::make_unique<L0::Sysman::LinuxRasSourceGt>(pLinuxSysmanImp, osRasErrorType, isSubdevice, subdeviceId));
if (isMemoryTypeHbm(pLinuxSysmanImp) == true) {
rasSources.push_back(std::make_unique<L0::Sysman::LinuxRasSourceHbm>(pLinuxSysmanImp, osRasErrorType, subdeviceId));
}
}
LinuxRasImp::LinuxRasImp(OsSysman *pOsSysman, zes_ras_error_type_t type, ze_bool_t onSubdevice, uint32_t subdeviceId) : osRasErrorType(type), isSubdevice(onSubdevice), subdeviceId(subdeviceId) {
pLinuxSysmanImp = static_cast<LinuxSysmanImp *>(pOsSysman);
pFsAccess = &pLinuxSysmanImp->getFsAccess();
initSources();
}
OsRas *OsRas::create(OsSysman *pOsSysman, zes_ras_error_type_t type, ze_bool_t onSubdevice, uint32_t subdeviceId) {
LinuxRasImp *pLinuxRasImp = new LinuxRasImp(pOsSysman, type, onSubdevice, subdeviceId);
return static_cast<OsRas *>(pLinuxRasImp);

View File

@ -9,12 +9,28 @@
#include "shared/source/helpers/non_copyable_or_moveable.h"
#include "level_zero/sysman/source/api/ras/sysman_os_ras.h"
#include "level_zero/sysman/source/device/sysman_device_imp.h"
#include "level_zero/sysman/source/shared/linux/pmu/sysman_pmu_imp.h"
#include "level_zero/sysman/source/shared/linux/sysman_fs_access.h"
#include "level_zero/sysman/source/sysman_const.h"
#include <map>
#include <memory>
#include <string>
#include <vector>
namespace L0 {
namespace Sysman {
class LinuxSysmanImp;
class FirmwareUtil;
class LinuxRasSources : NEO::NonCopyableOrMovableClass {
public:
virtual ze_result_t osRasGetState(zes_ras_state_t &state, ze_bool_t clear) = 0;
virtual ~LinuxRasSources() = default;
};
class LinuxRasImp : public OsRas, NEO::NonCopyableOrMovableClass {
public:
ze_result_t osRasGetProperties(zes_ras_properties_t &properties) override;
@ -29,13 +45,70 @@ class LinuxRasImp : public OsRas, NEO::NonCopyableOrMovableClass {
zes_ras_error_type_t osRasErrorType = {};
FsAccess *pFsAccess = nullptr;
LinuxSysmanImp *pLinuxSysmanImp = nullptr;
std::vector<std::unique_ptr<L0::Sysman::LinuxRasSources>> rasSources = {};
private:
void initSources();
bool isSubdevice = false;
uint32_t subdeviceId = 0;
uint64_t totalThreshold = 0;
uint64_t categoryThreshold[maxRasErrorCategoryCount] = {0};
};
class LinuxRasSourceGt : public LinuxRasSources {
public:
ze_result_t osRasGetState(zes_ras_state_t &state, ze_bool_t clear) override;
static void getSupportedRasErrorTypes(std::set<zes_ras_error_type_t> &errorType, OsSysman *pOsSysman, ze_bool_t isSubDevice, uint32_t subDeviceId);
LinuxRasSourceGt(LinuxSysmanImp *pLinuxSysmanImp, zes_ras_error_type_t type, ze_bool_t onSubdevice, uint32_t subdeviceId);
LinuxRasSourceGt() = default;
~LinuxRasSourceGt() override;
protected:
LinuxSysmanImp *pLinuxSysmanImp = nullptr;
zes_ras_error_type_t osRasErrorType = {};
PmuInterface *pPmuInterface = nullptr;
FsAccess *pFsAccess = nullptr;
SysfsAccess *pSysfsAccess = nullptr;
private:
void initRasErrors(ze_bool_t clear);
ze_result_t getPmuConfig(
const std::string &eventDirectory,
const std::vector<std::string> &listOfEvents,
const std::string &errorFileToGetConfig,
std::string &pmuConfig);
ze_result_t getBootUpErrorCountFromSysfs(
std::string nameOfError,
const std::string &errorCounterDir,
uint64_t &errorVal);
void closeFds();
int64_t groupFd = -1;
std::vector<int64_t> memberFds = {};
uint64_t initialErrorCount[maxRasErrorCategoryCount] = {0};
std::map<zes_ras_error_cat_t, uint64_t> errorCategoryToEventCount;
uint64_t totalEventCount = 0;
bool isSubdevice = false;
uint32_t subdeviceId = 0;
};
class LinuxRasSourceHbm : public LinuxRasSources {
public:
ze_result_t osRasGetState(zes_ras_state_t &state, ze_bool_t clear) override;
static void getSupportedRasErrorTypes(std::set<zes_ras_error_type_t> &errorType, OsSysman *pOsSysman, ze_bool_t isSubDevice, uint32_t subDeviceId);
LinuxRasSourceHbm(LinuxSysmanImp *pLinuxSysmanImp, zes_ras_error_type_t type, uint32_t subdeviceId);
LinuxRasSourceHbm() = default;
~LinuxRasSourceHbm() override{};
protected:
LinuxSysmanImp *pLinuxSysmanImp = nullptr;
zes_ras_error_type_t osRasErrorType = {};
FirmwareUtil *pFwInterface = nullptr;
SysmanDeviceImp *pDevice = nullptr;
private:
uint64_t errorBaseline = 0;
uint32_t subdeviceId = 0;
};
} // namespace Sysman
} // namespace L0

View File

@ -7,7 +7,7 @@
#include "shared/source/debug_settings/debug_settings_manager.h"
#include "level_zero/sysman/source/api/ras/linux/sysman_os_ras_imp_prelim.h"
#include "level_zero/sysman/source/api/ras/linux/sysman_os_ras_imp.h"
#include "level_zero/sysman/source/shared/linux/zes_os_sysman_imp.h"
#include <cstring>

View File

@ -8,7 +8,7 @@
#include "shared/source/debug_settings/debug_settings_manager.h"
#include "shared/source/helpers/gfx_core_helper.h"
#include "level_zero/sysman/source/api/ras/linux/sysman_os_ras_imp_prelim.h"
#include "level_zero/sysman/source/api/ras/linux/sysman_os_ras_imp.h"
#include "level_zero/sysman/source/shared/firmware_util/sysman_firmware_util.h"
#include "level_zero/sysman/source/shared/linux/zes_os_sysman_imp.h"

View File

@ -1,107 +0,0 @@
/*
* Copyright (C) 2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "level_zero/sysman/source/api/ras/linux/sysman_os_ras_imp_prelim.h"
#include "shared/source/debug_settings/debug_settings_manager.h"
#include "shared/source/helpers/string.h"
#include "shared/source/os_interface/linux/system_info.h"
#include "level_zero/sysman/source/shared/linux/zes_os_sysman_imp.h"
#include "drm/intel_hwconfig_types.h"
namespace L0 {
namespace Sysman {
static bool isMemoryTypeHbm(LinuxSysmanImp *pLinuxSysmanImp) {
uint32_t memType = pLinuxSysmanImp->getMemoryType();
if (memType == INTEL_HWCONFIG_MEMORY_TYPE_HBM2e || memType == INTEL_HWCONFIG_MEMORY_TYPE_HBM2) {
return true;
}
return false;
}
void OsRas::getSupportedRasErrorTypes(std::set<zes_ras_error_type_t> &errorType, OsSysman *pOsSysman, ze_bool_t isSubDevice, uint32_t subDeviceId) {
constexpr auto maxErrorTypes = 2;
LinuxRasSourceGt::getSupportedRasErrorTypes(errorType, pOsSysman, isSubDevice, subDeviceId);
if (errorType.size() < maxErrorTypes) {
auto pLinuxSysmanImp = static_cast<LinuxSysmanImp *>(pOsSysman);
if (isMemoryTypeHbm(pLinuxSysmanImp) == true) {
LinuxRasSourceHbm::getSupportedRasErrorTypes(errorType, pOsSysman, isSubDevice, subDeviceId);
}
}
}
ze_result_t LinuxRasImp::osRasGetConfig(zes_ras_config_t *config) {
config->totalThreshold = totalThreshold;
memcpy_s(config->detailedThresholds.category, maxRasErrorCategoryCount * sizeof(uint64_t), categoryThreshold, maxRasErrorCategoryCount * sizeof(uint64_t));
return ZE_RESULT_SUCCESS;
}
ze_result_t LinuxRasImp::osRasSetConfig(const zes_ras_config_t *config) {
if (pFsAccess->isRootUser() == true) {
totalThreshold = config->totalThreshold;
memcpy_s(categoryThreshold, maxRasErrorCategoryCount * sizeof(uint64_t), config->detailedThresholds.category, maxRasErrorCategoryCount * sizeof(uint64_t));
return ZE_RESULT_SUCCESS;
}
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): Insufficient permissions and returning error:0x%x \n", __FUNCTION__, ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS);
return ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS;
}
ze_result_t LinuxRasImp::osRasGetProperties(zes_ras_properties_t &properties) {
properties.pNext = nullptr;
properties.type = osRasErrorType;
properties.onSubdevice = isSubdevice;
properties.subdeviceId = subdeviceId;
return ZE_RESULT_SUCCESS;
}
ze_result_t LinuxRasImp::osRasGetState(zes_ras_state_t &state, ze_bool_t clear) {
if (clear == true) {
if (pFsAccess->isRootUser() == false) {
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): Insufficient permissions and returning error:0x%x \n", __FUNCTION__, ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS);
return ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS;
}
}
ze_result_t result = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
for (auto &rasSource : rasSources) {
zes_ras_state_t localState = {};
ze_result_t localResult = rasSource->osRasGetState(localState, clear);
if (localResult != ZE_RESULT_SUCCESS) {
continue;
}
for (uint32_t i = 0; i < maxRasErrorCategoryCount; i++) {
state.category[i] += localState.category[i];
}
result = ZE_RESULT_SUCCESS;
}
return result;
}
void LinuxRasImp::initSources() {
rasSources.push_back(std::make_unique<L0::Sysman::LinuxRasSourceGt>(pLinuxSysmanImp, osRasErrorType, isSubdevice, subdeviceId));
if (isMemoryTypeHbm(pLinuxSysmanImp) == true) {
rasSources.push_back(std::make_unique<L0::Sysman::LinuxRasSourceHbm>(pLinuxSysmanImp, osRasErrorType, subdeviceId));
}
}
LinuxRasImp::LinuxRasImp(OsSysman *pOsSysman, zes_ras_error_type_t type, ze_bool_t onSubdevice, uint32_t subdeviceId) : osRasErrorType(type), isSubdevice(onSubdevice), subdeviceId(subdeviceId) {
pLinuxSysmanImp = static_cast<LinuxSysmanImp *>(pOsSysman);
pFsAccess = &pLinuxSysmanImp->getFsAccess();
initSources();
}
OsRas *OsRas::create(OsSysman *pOsSysman, zes_ras_error_type_t type, ze_bool_t onSubdevice, uint32_t subdeviceId) {
LinuxRasImp *pLinuxRasImp = new LinuxRasImp(pOsSysman, type, onSubdevice, subdeviceId);
return static_cast<OsRas *>(pLinuxRasImp);
}
} // namespace Sysman
} // namespace L0

View File

@ -1,114 +0,0 @@
/*
* Copyright (C) 2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "shared/source/helpers/non_copyable_or_moveable.h"
#include "level_zero/sysman/source/api/ras/sysman_os_ras.h"
#include "level_zero/sysman/source/device/sysman_device_imp.h"
#include "level_zero/sysman/source/shared/linux/pmu/sysman_pmu_imp.h"
#include "level_zero/sysman/source/shared/linux/sysman_fs_access.h"
#include "level_zero/sysman/source/sysman_const.h"
#include <map>
#include <memory>
#include <string>
#include <vector>
namespace L0 {
namespace Sysman {
class LinuxSysmanImp;
class FirmwareUtil;
class LinuxRasSources : NEO::NonCopyableOrMovableClass {
public:
virtual ze_result_t osRasGetState(zes_ras_state_t &state, ze_bool_t clear) = 0;
virtual ~LinuxRasSources() = default;
};
class LinuxRasImp : public OsRas, NEO::NonCopyableOrMovableClass {
public:
ze_result_t osRasGetProperties(zes_ras_properties_t &properties) override;
ze_result_t osRasGetState(zes_ras_state_t &state, ze_bool_t clear) override;
ze_result_t osRasGetConfig(zes_ras_config_t *config) override;
ze_result_t osRasSetConfig(const zes_ras_config_t *config) override;
LinuxRasImp(OsSysman *pOsSysman, zes_ras_error_type_t type, ze_bool_t onSubdevice, uint32_t subdeviceId);
LinuxRasImp() = default;
~LinuxRasImp() override = default;
protected:
zes_ras_error_type_t osRasErrorType = {};
FsAccess *pFsAccess = nullptr;
LinuxSysmanImp *pLinuxSysmanImp = nullptr;
std::vector<std::unique_ptr<L0::Sysman::LinuxRasSources>> rasSources = {};
private:
void initSources();
bool isSubdevice = false;
uint32_t subdeviceId = 0;
uint64_t totalThreshold = 0;
uint64_t categoryThreshold[maxRasErrorCategoryCount] = {0};
};
class LinuxRasSourceGt : public LinuxRasSources {
public:
ze_result_t osRasGetState(zes_ras_state_t &state, ze_bool_t clear) override;
static void getSupportedRasErrorTypes(std::set<zes_ras_error_type_t> &errorType, OsSysman *pOsSysman, ze_bool_t isSubDevice, uint32_t subDeviceId);
LinuxRasSourceGt(LinuxSysmanImp *pLinuxSysmanImp, zes_ras_error_type_t type, ze_bool_t onSubdevice, uint32_t subdeviceId);
LinuxRasSourceGt() = default;
~LinuxRasSourceGt() override;
protected:
LinuxSysmanImp *pLinuxSysmanImp = nullptr;
zes_ras_error_type_t osRasErrorType = {};
PmuInterface *pPmuInterface = nullptr;
FsAccess *pFsAccess = nullptr;
SysfsAccess *pSysfsAccess = nullptr;
private:
void initRasErrors(ze_bool_t clear);
ze_result_t getPmuConfig(
const std::string &eventDirectory,
const std::vector<std::string> &listOfEvents,
const std::string &errorFileToGetConfig,
std::string &pmuConfig);
ze_result_t getBootUpErrorCountFromSysfs(
std::string nameOfError,
const std::string &errorCounterDir,
uint64_t &errorVal);
void closeFds();
int64_t groupFd = -1;
std::vector<int64_t> memberFds = {};
uint64_t initialErrorCount[maxRasErrorCategoryCount] = {0};
std::map<zes_ras_error_cat_t, uint64_t> errorCategoryToEventCount;
uint64_t totalEventCount = 0;
bool isSubdevice = false;
uint32_t subdeviceId = 0;
};
class LinuxRasSourceHbm : public LinuxRasSources {
public:
ze_result_t osRasGetState(zes_ras_state_t &state, ze_bool_t clear) override;
static void getSupportedRasErrorTypes(std::set<zes_ras_error_type_t> &errorType, OsSysman *pOsSysman, ze_bool_t isSubDevice, uint32_t subDeviceId);
LinuxRasSourceHbm(LinuxSysmanImp *pLinuxSysmanImp, zes_ras_error_type_t type, uint32_t subdeviceId);
LinuxRasSourceHbm() = default;
~LinuxRasSourceHbm() override{};
protected:
LinuxSysmanImp *pLinuxSysmanImp = nullptr;
zes_ras_error_type_t osRasErrorType = {};
FirmwareUtil *pFwInterface = nullptr;
SysmanDeviceImp *pDevice = nullptr;
private:
uint64_t errorBaseline = 0;
uint32_t subdeviceId = 0;
};
} // namespace Sysman
} // namespace L0

View File

@ -4,26 +4,16 @@
# SPDX-License-Identifier: MIT
#
set(L0_TESTS_TOOLS_SYSMAN_RAS_LINUX
set(L0_TESTS_SYSMAN_RAS_LINUX
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/test_zes_ras.cpp
${CMAKE_CURRENT_SOURCE_DIR}/mock_sysman_ras.h
)
if(NEO_ENABLE_i915_PRELIM_DETECTION)
list(APPEND L0_TESTS_TOOLS_SYSMAN_RAS_LINUX
${CMAKE_CURRENT_SOURCE_DIR}/test_zes_ras_prelim.cpp
${CMAKE_CURRENT_SOURCE_DIR}/mock_fs_ras_prelim.h
)
else()
list(APPEND L0_TESTS_TOOLS_SYSMAN_RAS_LINUX
${CMAKE_CURRENT_SOURCE_DIR}/test_zes_ras.cpp
${CMAKE_CURRENT_SOURCE_DIR}/mock_fs_ras.h
)
endif()
if(UNIX)
target_sources(${TARGET_NAME}
PRIVATE
${L0_TESTS_TOOLS_SYSMAN_RAS_LINUX}
${L0_TESTS_SYSMAN_RAS_LINUX}
)
endif()

View File

@ -1,30 +0,0 @@
/*
* Copyright (C) 2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "level_zero/sysman/source/api/ras/linux/sysman_os_ras_imp.h"
#include "level_zero/sysman/source/api/ras/sysman_ras.h"
#include "level_zero/sysman/source/api/ras/sysman_ras_imp.h"
#include "level_zero/sysman/source/shared/linux/sysman_fs_access.h"
namespace L0 {
namespace Sysman {
namespace ult {
class MockRasFsAccess : public L0::Sysman::FsAccess {
public:
bool mockRootUser = true;
bool isRootUser() override {
return mockRootUser;
}
MockRasFsAccess() = default;
};
} // namespace ult
} // namespace Sysman
} // namespace L0

View File

@ -10,7 +10,7 @@
#include "shared/source/os_interface/linux/ioctl_helper.h"
#include "shared/source/os_interface/linux/system_info.h"
#include "level_zero/sysman/source/api/ras/linux/sysman_os_ras_imp_prelim.h"
#include "level_zero/sysman/source/api/ras/linux/sysman_os_ras_imp.h"
#include "level_zero/sysman/source/api/ras/sysman_ras.h"
#include "level_zero/sysman/source/api/ras/sysman_ras_imp.h"
#include "level_zero/sysman/source/shared/linux/pmu/sysman_pmu_imp.h"

View File

@ -5,36 +5,70 @@
*
*/
#include "shared/test/common/libult/linux/drm_mock.h"
#include "level_zero/sysman/source/sysman_const.h"
#include "level_zero/sysman/test/unit_tests/sources/linux/mock_sysman_fixture.h"
#include "level_zero/sysman/test/unit_tests/sources/ras/linux/mock_sysman_ras.h"
#include "mock_fs_ras.h"
class OsRas;
namespace L0 {
namespace Sysman {
namespace ult {
constexpr uint32_t mockHandleCount = 0;
constexpr uint32_t mockHandleCount = 2u;
constexpr uint32_t mockHandleCountForSubDevice = 4u;
struct SysmanRasFixture : public SysmanDeviceFixture {
protected:
std::unique_ptr<MockRasFsAccess> pFsAccess;
std::unique_ptr<MockRasSysfsAccess> pSysfsAccess;
std::unique_ptr<MockRasPmuInterfaceImp> pPmuInterface;
std::unique_ptr<MockRasFwInterface> pRasFwUtilInterface;
MockRasNeoDrm *pDrm = nullptr;
L0::Sysman::FsAccess *pFsAccessOriginal = nullptr;
L0::Sysman::SysfsAccess *pSysfsAccessOriginal = nullptr;
L0::Sysman::PmuInterface *pOriginalPmuInterface = nullptr;
L0::Sysman::FirmwareUtil *pFwUtilOriginal = nullptr;
L0::Sysman::SysmanDevice *device = nullptr;
void SetUp() override {
SysmanDeviceFixture::SetUp();
pFsAccess = std::make_unique<MockRasFsAccess>();
pFsAccessOriginal = pLinuxSysmanImp->pFsAccess;
pFsAccess = std::make_unique<MockRasFsAccess>();
pLinuxSysmanImp->pFsAccess = pFsAccess.get();
pFsAccess->mockRootUser = true;
pSysmanDeviceImp->pRasHandleContext->handleList.clear();
pSysmanDeviceImp->pRasHandleContext->init(pOsSysman->getSubDeviceCount());
pSysfsAccessOriginal = pLinuxSysmanImp->pSysfsAccess;
pSysfsAccess = std::make_unique<MockRasSysfsAccess>();
pLinuxSysmanImp->pSysfsAccess = pSysfsAccess.get();
pRasFwUtilInterface = std::make_unique<MockRasFwInterface>();
pDrm = new MockRasNeoDrm(const_cast<NEO::RootDeviceEnvironment &>(pSysmanDeviceImp->getRootDeviceEnvironment()));
pDrm->setupIoctlHelper(pSysmanDeviceImp->getRootDeviceEnvironment().getHardwareInfo()->platform.eProductFamily);
pFwUtilOriginal = pLinuxSysmanImp->pFwUtilInterface;
pLinuxSysmanImp->pFwUtilInterface = pRasFwUtilInterface.get();
pOriginalPmuInterface = pLinuxSysmanImp->pPmuInterface;
pPmuInterface = std::make_unique<MockRasPmuInterfaceImp>(pLinuxSysmanImp);
pLinuxSysmanImp->pPmuInterface = pPmuInterface.get();
pDrm->setMemoryType(INTEL_HWCONFIG_MEMORY_TYPE_HBM2e);
auto &osInterface = pSysmanDeviceImp->getRootDeviceEnvironment().osInterface;
osInterface->setDriverModel(std::unique_ptr<MockRasNeoDrm>(pDrm));
pSysmanDeviceImp->getRootDeviceEnvironment().getMutableHardwareInfo()->capabilityTable.isIntegratedDevice = false;
device = pSysmanDevice;
}
void TearDown() override {
pLinuxSysmanImp->pFsAccess = pFsAccessOriginal;
pLinuxSysmanImp->pSysfsAccess = pSysfsAccessOriginal;
pLinuxSysmanImp->pPmuInterface = pOriginalPmuInterface;
pLinuxSysmanImp->pFwUtilInterface = pFwUtilOriginal;
SysmanDeviceFixture::TearDown();
}
std::vector<zes_ras_handle_t> getRasHandles(uint32_t count) {
std::vector<zes_ras_handle_t> handles(count, nullptr);
EXPECT_EQ(zesDeviceEnumRasErrorSets(device->toHandle(), &count, handles.data()), ZE_RESULT_SUCCESS);
@ -42,16 +76,7 @@ struct SysmanRasFixture : public SysmanDeviceFixture {
}
};
TEST_F(SysmanRasFixture, GivenValidRasContextWhenRetrievingRasHandlesThenSuccessIsReturned) {
uint32_t count = 0;
L0::Sysman::RasHandleContext *pRasHandleContext = new L0::Sysman::RasHandleContext(pSysmanDeviceImp->pOsSysman);
ze_result_t result = pRasHandleContext->rasGet(&count, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(count, mockHandleCount);
delete pRasHandleContext;
}
TEST_F(SysmanRasFixture, GivenValidSysmanHandleWhenRasErrorSetsThenCorrectCountIsReported) {
TEST_F(SysmanRasFixture, GivenValidSysmanHandleWhenRetrievingRasHandlesInThenSuccessReturn) {
uint32_t count = 0;
ze_result_t result = zesDeviceEnumRasErrorSets(device->toHandle(), &count, NULL);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
@ -61,37 +86,15 @@ TEST_F(SysmanRasFixture, GivenValidSysmanHandleWhenRasErrorSetsThenCorrectCountI
result = zesDeviceEnumRasErrorSets(device->toHandle(), &testcount, NULL);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(testcount, mockHandleCount);
count = 0;
std::vector<zes_ras_handle_t> handles(count, nullptr);
EXPECT_EQ(zesDeviceEnumRasErrorSets(device->toHandle(), &count, handles.data()), ZE_RESULT_SUCCESS);
EXPECT_EQ(count, mockHandleCount);
bool isSubDevice = false;
uint32_t subDeviceId = 0u;
L0::Sysman::RasImp *pTestRasImp = new L0::Sysman::RasImp(pSysmanDeviceImp->pRasHandleContext->pOsSysman, ZES_RAS_ERROR_TYPE_CORRECTABLE, isSubDevice, subDeviceId);
pSysmanDeviceImp->pRasHandleContext->handleList.push_back(pTestRasImp);
EXPECT_EQ(zesDeviceEnumRasErrorSets(device->toHandle(), &count, nullptr), ZE_RESULT_SUCCESS);
EXPECT_EQ(count, mockHandleCount + 1);
testcount = count;
handles.resize(testcount);
EXPECT_EQ(zesDeviceEnumRasErrorSets(device->toHandle(), &testcount, handles.data()), ZE_RESULT_SUCCESS);
EXPECT_EQ(testcount, mockHandleCount + 1);
EXPECT_NE(nullptr, handles.data());
pSysmanDeviceImp->pRasHandleContext->handleList.pop_back();
delete pTestRasImp;
auto handles = getRasHandles(mockHandleCount);
for (auto handle : handles) {
EXPECT_NE(handle, nullptr);
}
}
TEST_F(SysmanRasFixture, GivenValidRasHandleWhenGettingRasPropertiesThenSuccessIsReturned) {
bool isSubDevice = false;
uint32_t subDeviceId = 0u;
L0::Sysman::RasImp *pTestRasImp = new L0::Sysman::RasImp(pSysmanDeviceImp->pRasHandleContext->pOsSysman, ZES_RAS_ERROR_TYPE_CORRECTABLE, isSubDevice, subDeviceId);
pSysmanDeviceImp->pRasHandleContext->handleList.push_back(pTestRasImp);
auto handles = getRasHandles(mockHandleCount + 1);
auto handles = getRasHandles(mockHandleCount);
bool correctable = true;
for (auto handle : handles) {
zes_ras_properties_t properties = {};
@ -99,59 +102,329 @@ TEST_F(SysmanRasFixture, GivenValidRasHandleWhenGettingRasPropertiesThenSuccessI
EXPECT_EQ(properties.pNext, nullptr);
EXPECT_EQ(properties.onSubdevice, false);
EXPECT_EQ(properties.subdeviceId, 0u);
EXPECT_EQ(properties.type, ZES_RAS_ERROR_TYPE_CORRECTABLE);
if (correctable == true) {
EXPECT_EQ(properties.type, ZES_RAS_ERROR_TYPE_CORRECTABLE);
correctable = false;
} else {
EXPECT_EQ(properties.type, ZES_RAS_ERROR_TYPE_UNCORRECTABLE);
}
}
pSysmanDeviceImp->pRasHandleContext->handleList.pop_back();
delete pTestRasImp;
}
TEST_F(SysmanRasFixture, GivenValidRasHandleWhileCallingZesRasGetStateThenFailureIsReturned) {
bool isSubDevice = false;
uint32_t subDeviceId = 0u;
L0::Sysman::RasImp *pTestRasImp = new L0::Sysman::RasImp(pSysmanDeviceImp->pRasHandleContext->pOsSysman, ZES_RAS_ERROR_TYPE_CORRECTABLE, isSubDevice, subDeviceId);
pSysmanDeviceImp->pRasHandleContext->handleList.push_back(pTestRasImp);
TEST_F(SysmanRasFixture, GivenValidOsSysmanPointerWhenRetrievingSupportedRasErrorsForGtAndIfReadSymLinkFailsThenNoSupportedErrorTypeIsReturned) {
std::set<zes_ras_error_type_t> errorType = {};
auto handles = getRasHandles(mockHandleCount + 1);
pSysfsAccess->mockReadSymLinkResult = true;
L0::Sysman::LinuxRasSourceGt::getSupportedRasErrorTypes(errorType, pOsSysman, false, 0);
EXPECT_EQ(errorType.size(), 0u);
}
TEST_F(SysmanRasFixture, GivenValidOsSysmanPointerWhenRetrievingSupportedRasErrorsForGtAndIfListDirectoryFailsThenNoSupportedErrorTypeIsReturned) {
std::set<zes_ras_error_type_t> errorType = {};
pFsAccess->mockReadDirectoryFailure = true;
L0::Sysman::LinuxRasSourceGt::getSupportedRasErrorTypes(errorType, pOsSysman, false, 0);
EXPECT_EQ(errorType.size(), 0u);
}
TEST_F(SysmanRasFixture, GivenValidOsSysmanPointerWhenRetrievingSupportedRasErrorsForHbmAndFwInterfaceIsAbsentThenNoSupportedErrorTypeIsReturned) {
std::set<zes_ras_error_type_t> errorType = {};
pLinuxSysmanImp->pFwUtilInterface = nullptr;
L0::Sysman::LinuxRasSourceHbm::getSupportedRasErrorTypes(errorType, pOsSysman, false, 0);
EXPECT_EQ(errorType.size(), 0u);
}
TEST_F(SysmanRasFixture, GivenValidSysmanHandleWhenRetrievingRasHandlesIfRasEventsAreAbsentThenZeroHandlesAreCreated) {
pFsAccess->mockReadDirectoryWithoutRasEvents = true;
pLinuxSysmanImp->pFwUtilInterface = nullptr;
pSysmanDeviceImp->pRasHandleContext->handleList.clear();
pSysmanDeviceImp->pRasHandleContext->init(pOsSysman->getSubDeviceCount());
uint32_t count = 0;
ze_result_t result = zesDeviceEnumRasErrorSets(device->toHandle(), &count, NULL);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(count, 0u);
uint32_t testcount = count + 1;
result = zesDeviceEnumRasErrorSets(device->toHandle(), &testcount, NULL);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(testcount, 0u);
}
TEST_F(SysmanRasFixture, GivenValidSysmanHandleWhenRetrievingRasHandlesIfRasEventsAndHbmAreAbsentThenZeroHandlesAreCreated) {
pDrm->setMemoryType(INTEL_HWCONFIG_MEMORY_TYPE_LPDDR4);
pRasFwUtilInterface->mockMemorySuccess = true;
pFsAccess->mockReadDirectoryWithoutRasEvents = true;
pSysmanDeviceImp->pRasHandleContext->handleList.clear();
pSysmanDeviceImp->pRasHandleContext->init(pOsSysman->getSubDeviceCount());
uint32_t count = 0;
ze_result_t result = zesDeviceEnumRasErrorSets(device->toHandle(), &count, NULL);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(count, 0u);
}
TEST_F(SysmanRasFixture, GivenValidSysmanHandleWhenRetrievingRasHandlesIfHbmAndFwInterfaceArePresentThenSuccessIsReturned) {
pDrm->setMemoryType(INTEL_HWCONFIG_MEMORY_TYPE_HBM2);
pRasFwUtilInterface->mockMemorySuccess = true;
uint32_t count = 0;
ze_result_t result = zesDeviceEnumRasErrorSets(device->toHandle(), &count, NULL);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(count, mockHandleCount);
}
TEST_F(SysmanRasFixture, GivenValidSysmanHandleWhenRetrievingRasHandlesIfRasEventsAreAbsentAndQuerySystemInfoSucceedsButMemSysInfoIsNullThenZeroHandlesAreCreated) {
pFsAccess->mockReadDirectoryWithoutRasEvents = true;
pDrm->mockQuerySystemInfoReturnValue.push_back(true);
pLinuxSysmanImp->pFwUtilInterface = nullptr;
uint32_t count = 0;
ze_result_t result = zesDeviceEnumRasErrorSets(device->toHandle(), &count, NULL);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(count, 0u);
}
TEST_F(SysmanRasFixture, GivenValidRasHandleWhenCallingzesRasGetStateForGtThenSuccessIsReturned) {
pPmuInterface->mockPmuReadCorrectable = true;
pRasFwUtilInterface->mockMemorySuccess = false;
auto handles = getRasHandles(mockHandleCount);
bool correctable = true;
for (auto handle : handles) {
zes_ras_state_t state = {};
EXPECT_EQ(ZE_RESULT_SUCCESS, zesRasGetState(handle, 0, &state));
if (correctable == true) {
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_CACHE_ERRORS], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_RESET], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_PROGRAMMING_ERRORS], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_COMPUTE_ERRORS], correctableGrfErrorCount + correctableEuErrorCount + initialCorrectableComputeErrors);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_NON_COMPUTE_ERRORS], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_DISPLAY_ERRORS], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_DRIVER_ERRORS], 0u);
correctable = false;
} else {
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_CACHE_ERRORS], fatalTlb + initialUncorrectableCacheErrors);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_RESET], fatalEngineResetCount + initialEngineReset);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_PROGRAMMING_ERRORS], euAttention + initialProgrammingErrors);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_COMPUTE_ERRORS], fatalEuErrorCount + initialUncorrectableComputeErrors);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_NON_COMPUTE_ERRORS], socFatalPsfCsc0Count + initialUncorrectableNonComputeErrors);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_DISPLAY_ERRORS], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_DRIVER_ERRORS], driverMigration + driverGgtt + driverRps + initialUncorrectableDriverErrors);
}
}
}
TEST_F(SysmanRasFixture, GivenValidRasHandleWhenCallingRasGetStateForGtAfterClearThenSuccessIsReturned) {
pPmuInterface->mockPmuReadAfterClear = true;
auto handles = getRasHandles(mockHandleCount);
bool correctable = true;
ze_bool_t clear = 0;
for (auto handle : handles) {
zes_ras_state_t state = {};
EXPECT_EQ(ZE_RESULT_SUCCESS, zesRasGetState(handle, clear, &state));
if (correctable == true) {
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_RESET], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_PROGRAMMING_ERRORS], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_DRIVER_ERRORS], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_COMPUTE_ERRORS], correctableGrfErrorCount + correctableEuErrorCount + initialCorrectableComputeErrors);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_NON_COMPUTE_ERRORS], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_CACHE_ERRORS], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_DISPLAY_ERRORS], 0u);
correctable = false;
} else {
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_RESET], fatalEngineResetCount + initialEngineReset);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_PROGRAMMING_ERRORS], euAttention + initialProgrammingErrors);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_DRIVER_ERRORS], driverMigration + driverGgtt + driverRps + initialUncorrectableDriverErrors);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_COMPUTE_ERRORS], fatalEuErrorCount + initialUncorrectableComputeErrors);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_NON_COMPUTE_ERRORS], socFatalPsfCsc0Count + initialUncorrectableNonComputeErrors);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_CACHE_ERRORS], fatalTlb + initialUncorrectableCacheErrors);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_DISPLAY_ERRORS], 0u);
}
}
correctable = true;
clear = 1;
for (auto handle : handles) {
zes_ras_state_t state = {};
EXPECT_EQ(ZE_RESULT_SUCCESS, zesRasGetState(handle, clear, &state));
if (correctable == true) {
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_CACHE_ERRORS], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_RESET], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_PROGRAMMING_ERRORS], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_COMPUTE_ERRORS], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_NON_COMPUTE_ERRORS], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_DISPLAY_ERRORS], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_DRIVER_ERRORS], 0u);
correctable = false;
} else {
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_CACHE_ERRORS], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_RESET], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_PROGRAMMING_ERRORS], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_COMPUTE_ERRORS], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_NON_COMPUTE_ERRORS], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_DISPLAY_ERRORS], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_DRIVER_ERRORS], 0u);
}
}
}
TEST_F(SysmanRasFixture, GivenValidRasHandleWhenCallingzesRasGetStateForHbmThenSuccessIsReturned) {
pPmuInterface->mockPmuReadResult = true;
pRasFwUtilInterface->mockMemorySuccess = true;
auto handles = getRasHandles(mockHandleCount);
bool correctable = true;
for (auto handle : handles) {
zes_ras_state_t state = {};
EXPECT_EQ(ZE_RESULT_SUCCESS, zesRasGetState(handle, 0, &state));
if (correctable == true) {
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_NON_COMPUTE_ERRORS], hbmCorrectableErrorCount);
correctable = false;
} else {
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_NON_COMPUTE_ERRORS], hbmUncorrectableErrorCount);
}
}
}
TEST_F(SysmanRasFixture, GivenValidRasHandleWhenCallingzesRasGetStateForHbmWithClearThenSuccessIsReturned) {
pPmuInterface->mockPmuReadResult = true;
pRasFwUtilInterface->mockMemorySuccess = true;
auto handles = getRasHandles(mockHandleCount);
bool correctable = true;
ze_bool_t clear = 0;
for (auto handle : handles) {
zes_ras_state_t state = {};
EXPECT_EQ(ZE_RESULT_SUCCESS, zesRasGetState(handle, clear, &state));
if (correctable == true) {
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_NON_COMPUTE_ERRORS], hbmCorrectableErrorCount);
correctable = false;
} else {
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_NON_COMPUTE_ERRORS], hbmUncorrectableErrorCount);
}
}
correctable = true;
clear = 1;
for (auto handle : handles) {
zes_ras_state_t state = {};
EXPECT_EQ(ZE_RESULT_SUCCESS, zesRasGetState(handle, clear, &state));
if (correctable == true) {
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_NON_COMPUTE_ERRORS], 0u);
correctable = false;
} else {
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_NON_COMPUTE_ERRORS], 0u);
}
}
}
TEST_F(SysmanRasFixture, GivenValidRasHandleWhenCallingzesRasGeStateWithClearOptionWithoutPermissionsThenFailureIsReturned) {
pFsAccess->mockRootUser = true;
auto handles = getRasHandles(mockHandleCount);
ze_bool_t clear = 1;
for (auto handle : handles) {
zes_ras_state_t state = {};
EXPECT_EQ(ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS, zesRasGetState(handle, clear, &state));
}
}
TEST_F(SysmanRasFixture, GivenValidRasHandleWhenCallingzesRasGetStateForGtInterfaceAndUnableToRetrieveConfigValuesAndOtherInterfacesAreAbsentThenFailureIsReturned) {
pFsAccess->mockReadFileFailure = true;
auto handles = getRasHandles(mockHandleCount);
for (auto handle : handles) {
zes_ras_state_t state = {};
EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, zesRasGetState(handle, 0, &state));
}
}
TEST_F(SysmanRasFixture, GivenValidRasHandleWhenCallingzesRasGetStateForGtInterfaceAndPerfEventOpenFailsAndOtherInterfacesAreAbsentThenFailureIsReturned) {
pPmuInterface->mockPerfEvent = true;
auto handles = getRasHandles(mockHandleCount);
for (auto handle : handles) {
zes_ras_state_t state = {};
EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, zesRasGetState(handle, 0, &state));
}
}
TEST_F(SysmanRasFixture, GivenValidRasHandleWhenCallingzesRasGetStateForGtInterfaceAndPmuReadFailsAndOtherInterfacesAreAbsentThenFailureIsReturned) {
pPmuInterface->mockPmuReadResult = true;
auto handles = getRasHandles(mockHandleCount);
for (auto handle : handles) {
zes_ras_state_t state = {};
EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, zesRasGetState(handle, 0, &state));
}
}
TEST_F(SysmanRasFixture, GivenValidRasHandleWhenCallingzesRasGetStateForGtInterfaceWithClearAndPmuReadFailsAndOtherInterfacesAreAbsentThenFailureIsReturned) {
pPmuInterface->mockPmuReadResult = true;
auto handles = getRasHandles(mockHandleCount);
for (auto handle : handles) {
zes_ras_state_t state = {};
EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, zesRasGetState(handle, 1, &state));
}
}
TEST_F(SysmanRasFixture, GivenValidRasHandleWhenCallingzesGetRasStateForGtInterfaceAndPMUGetEventTypeFailsAndOtherInterfacesAreAbsentThenFailureIsReturned) {
pFsAccess->mockReadVal = true;
auto handles = getRasHandles(mockHandleCount);
for (auto handle : handles) {
zes_ras_state_t state = {};
EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, zesRasGetState(handle, 0, &state));
}
}
TEST_F(SysmanRasFixture, GivenValidRasHandleWhenCallingzesGetRasStateAndFirmwareInterfaceIsAbsentOtherInterfacesAreAlsoAbsentThenFailureIsReturned) {
pFsAccess->mockReadVal = true;
pLinuxSysmanImp->pFwUtilInterface = nullptr;
auto handles = getRasHandles(mockHandleCount);
for (auto handle : handles) {
zes_ras_state_t state = {};
EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, zesRasGetState(handle, 0, &state));
}
pSysmanDeviceImp->pRasHandleContext->handleList.pop_back();
delete pTestRasImp;
}
TEST_F(SysmanRasFixture, GivenValidRasHandleWhenCallingzesRasGetConfigAfterzesRasSetConfigThenSuccessIsReturned) {
bool isSubDevice = false;
uint32_t subDeviceId = 0u;
L0::Sysman::RasImp *pTestRasImp = new L0::Sysman::RasImp(pSysmanDeviceImp->pRasHandleContext->pOsSysman, ZES_RAS_ERROR_TYPE_CORRECTABLE, isSubDevice, subDeviceId);
pSysmanDeviceImp->pRasHandleContext->handleList.push_back(pTestRasImp);
auto handles = getRasHandles(mockHandleCount + 1);
auto handles = getRasHandles(mockHandleCount);
for (auto handle : handles) {
zes_ras_config_t setConfig = {};
zes_ras_config_t getConfig = {};
setConfig.totalThreshold = 50;
memset(setConfig.detailedThresholds.category, 1, maxRasErrorCategoryCount * sizeof(uint64_t));
EXPECT_EQ(ZE_RESULT_SUCCESS, zesRasSetConfig(handle, &setConfig));
EXPECT_EQ(ZE_RESULT_SUCCESS, zesRasGetConfig(handle, &getConfig));
EXPECT_EQ(setConfig.totalThreshold, getConfig.totalThreshold);
int compare = std::memcmp(setConfig.detailedThresholds.category, getConfig.detailedThresholds.category, maxRasErrorCategoryCount * sizeof(uint64_t));
EXPECT_EQ(0, compare);
}
pSysmanDeviceImp->pRasHandleContext->handleList.pop_back();
delete pTestRasImp;
}
TEST_F(SysmanRasFixture, GivenValidRasHandleWhenCallingzesRasSetConfigWithoutPermissionThenFailureIsReturned) {
pFsAccess->mockRootUser = false;
bool isSubDevice = false;
uint32_t subDeviceId = 0u;
L0::Sysman::RasImp *pTestRasImp = new L0::Sysman::RasImp(pSysmanDeviceImp->pRasHandleContext->pOsSysman, ZES_RAS_ERROR_TYPE_CORRECTABLE, isSubDevice, subDeviceId);
pSysmanDeviceImp->pRasHandleContext->handleList.push_back(pTestRasImp);
auto handles = getRasHandles(mockHandleCount + 1);
pFsAccess->mockRootUser = true;
auto handles = getRasHandles(mockHandleCount);
for (auto handle : handles) {
zes_ras_config_t setConfig = {};
@ -159,14 +432,220 @@ TEST_F(SysmanRasFixture, GivenValidRasHandleWhenCallingzesRasSetConfigWithoutPer
memset(setConfig.detailedThresholds.category, 1, maxRasErrorCategoryCount * sizeof(uint64_t));
EXPECT_EQ(ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS, zesRasSetConfig(handle, &setConfig));
}
pSysmanDeviceImp->pRasHandleContext->releaseRasHandles();
}
TEST_F(SysmanRasFixture, GivenValidInstanceWhenOsRasImplementationIsNullThenDestructorIsCalledWithoutException) {
TEST_F(SysmanRasFixture, GivenValidRasHandleWhenCallingzesRasGetStateForGtInterfaceAndReadSymLinkFailsDuringInitAndOtherInterfacesAreAbsentThenFailureIsReturned) {
L0::Sysman::RasImp *pTestRasImp = new L0::Sysman::RasImp();
pTestRasImp->pOsRas = nullptr;
EXPECT_NO_THROW(delete pTestRasImp;); // NOLINT(clang-analyzer-cplusplus.NewDeleteLeaks)
pSysfsAccess->mockReadSymLinkStatus = ZE_RESULT_ERROR_NOT_AVAILABLE;
auto handles = getRasHandles(mockHandleCount);
for (auto handle : handles) {
zes_ras_state_t state = {};
EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, zesRasGetState(handle, 0, &state));
}
}
TEST_F(SysmanRasFixture, GivenValidRasHandleWhenCallingzesRasGetStateForGtInterfaceAndReadSymLinkFailsInsideGetEventOpenAndOtherInterfacesAreAbsentThenFailureIsReturned) {
pSysfsAccess->mockReadSymLinkStatus = ZE_RESULT_ERROR_NOT_AVAILABLE;
auto handles = getRasHandles(mockHandleCount);
for (auto handle : handles) {
zes_ras_state_t state = {};
EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, zesRasGetState(handle, 0, &state));
}
}
TEST_F(SysmanRasFixture, GivenValidRasHandleWhenCallingzesRasGetStateForGtInterfaceAndListDirectoryFailsDuringInitAndOtherInterfacesAreAbsentThenFailureIsReturned) {
pFsAccess->mockListDirectoryStatus = ZE_RESULT_ERROR_NOT_AVAILABLE;
auto handles = getRasHandles(mockHandleCount);
for (auto handle : handles) {
zes_ras_state_t state = {};
EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, zesRasGetState(handle, 0, &state));
}
}
TEST_F(SysmanRasFixture, GivenValidRasHandleAndHandleCountZeroWhenCallingReInitThenValidCountIsReturnedAndVerifyzesDeviceEnumRasErrorSetsSucceeds) {
uint32_t count = 0;
ze_result_t result = zesDeviceEnumRasErrorSets(device->toHandle(), &count, NULL);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(count, mockHandleCount);
for (const auto &handle : pSysmanDeviceImp->pRasHandleContext->handleList) {
delete handle;
}
pSysmanDeviceImp->pRasHandleContext->handleList.clear();
pSysmanDeviceImp->pRasHandleContext->init(pOsSysman->getSubDeviceCount());
count = 0;
result = zesDeviceEnumRasErrorSets(device->toHandle(), &count, NULL);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(count, mockHandleCount);
}
struct SysmanRasMultiDeviceFixture : public SysmanMultiDeviceFixture {
protected:
std::unique_ptr<MockRasFsAccess> pFsAccess;
std::unique_ptr<MockRasSysfsAccess> pSysfsAccess;
std::unique_ptr<MockRasPmuInterfaceImp> pPmuInterface;
std::unique_ptr<MockRasFwInterface> pRasFwUtilInterface;
MockRasNeoDrm *pDrm = nullptr;
L0::Sysman::FsAccess *pFsAccessOriginal = nullptr;
L0::Sysman::SysfsAccess *pSysfsAccessOriginal = nullptr;
L0::Sysman::PmuInterface *pOriginalPmuInterface = nullptr;
L0::Sysman::FirmwareUtil *pFwUtilOriginal = nullptr;
Drm *pOriginalDrm = nullptr;
L0::Sysman::SysmanDevice *device = nullptr;
void SetUp() override {
SysmanMultiDeviceFixture::SetUp();
pDrm = new MockRasNeoDrm(const_cast<NEO::RootDeviceEnvironment &>(pSysmanDeviceImp->getRootDeviceEnvironment()));
pDrm->setupIoctlHelper(pSysmanDeviceImp->getRootDeviceEnvironment().getHardwareInfo()->platform.eProductFamily);
pFsAccessOriginal = pLinuxSysmanImp->pFsAccess;
pFsAccess = std::make_unique<MockRasFsAccess>();
pLinuxSysmanImp->pFsAccess = pFsAccess.get();
pSysfsAccessOriginal = pLinuxSysmanImp->pSysfsAccess;
pSysfsAccess = std::make_unique<MockRasSysfsAccess>();
pLinuxSysmanImp->pSysfsAccess = pSysfsAccess.get();
pFwUtilOriginal = pLinuxSysmanImp->pFwUtilInterface;
pRasFwUtilInterface = std::make_unique<MockRasFwInterface>();
pLinuxSysmanImp->pFwUtilInterface = pRasFwUtilInterface.get();
pOriginalPmuInterface = pLinuxSysmanImp->pPmuInterface;
pPmuInterface = std::make_unique<MockRasPmuInterfaceImp>(pLinuxSysmanImp);
pLinuxSysmanImp->pPmuInterface = pPmuInterface.get();
pDrm->setMemoryType(INTEL_HWCONFIG_MEMORY_TYPE_HBM2e);
auto &osInterface = pSysmanDeviceImp->getRootDeviceEnvironment().osInterface;
osInterface->setDriverModel(std::unique_ptr<MockRasNeoDrm>(pDrm));
device = pSysmanDevice;
pFsAccess->mockReadDirectoryForMultiDevice = true;
pSysmanDeviceImp->getRootDeviceEnvironment().getMutableHardwareInfo()->capabilityTable.isIntegratedDevice = false;
}
void TearDown() override {
pLinuxSysmanImp->pFsAccess = pFsAccessOriginal;
pLinuxSysmanImp->pSysfsAccess = pSysfsAccessOriginal;
pLinuxSysmanImp->pPmuInterface = pOriginalPmuInterface;
pLinuxSysmanImp->pFwUtilInterface = pFwUtilOriginal;
SysmanMultiDeviceFixture::TearDown();
}
std::vector<zes_ras_handle_t> getRasHandles(uint32_t count) {
std::vector<zes_ras_handle_t> handles(count, nullptr);
EXPECT_EQ(zesDeviceEnumRasErrorSets(device->toHandle(), &count, handles.data()), ZE_RESULT_SUCCESS);
return handles;
}
};
TEST_F(SysmanRasMultiDeviceFixture, GivenValidSysmanHandleWithMultiDeviceWhenRetrievingRasHandlesThenSuccessIsReturned) {
L0::Sysman::RasHandleContext *pRasHandleContext = new L0::Sysman::RasHandleContext(pSysmanDeviceImp->pOsSysman);
uint32_t count = 0;
ze_result_t result = pRasHandleContext->rasGet(&count, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ((count > 0), true);
delete pRasHandleContext;
}
TEST_F(SysmanRasMultiDeviceFixture, GivenValidSysmanHandleWhenRetrievingRasHandlesThenSuccessIsReturned) {
uint32_t count = 0;
ze_result_t result = zesDeviceEnumRasErrorSets(device->toHandle(), &count, NULL);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(count, mockHandleCountForSubDevice);
uint32_t testcount = count + 1;
result = zesDeviceEnumRasErrorSets(device->toHandle(), &testcount, NULL);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(testcount, mockHandleCountForSubDevice);
auto handles = getRasHandles(mockHandleCountForSubDevice);
for (auto handle : handles) {
EXPECT_NE(handle, nullptr);
}
}
TEST_F(SysmanRasMultiDeviceFixture, GivenValidHandleWhenGettingRasPropertiesThenSuccessIsReturned) {
zes_ras_properties_t properties = {};
bool isSubDevice = true;
uint32_t subDeviceId = 0u;
PublicLinuxRasImp *pLinuxRasImp = new PublicLinuxRasImp(pOsSysman, ZES_RAS_ERROR_TYPE_CORRECTABLE, isSubDevice, subDeviceId);
EXPECT_EQ(ZE_RESULT_SUCCESS, pLinuxRasImp->osRasGetProperties(properties));
EXPECT_EQ(properties.subdeviceId, subDeviceId);
EXPECT_EQ(properties.onSubdevice, isSubDevice);
EXPECT_EQ(properties.type, ZES_RAS_ERROR_TYPE_CORRECTABLE);
delete pLinuxRasImp;
}
TEST_F(SysmanRasMultiDeviceFixture, GivenValidRasHandleWhenCallingzesRasGetStateForGtThenSuccessIsReturned) {
pPmuInterface->mockPmuReadTile = true;
pSysfsAccess->isMultiTileArch = true;
auto handles = getRasHandles(mockHandleCountForSubDevice);
uint32_t handleIndex = 0u;
for (auto handle : handles) {
zes_ras_state_t state = {};
EXPECT_EQ(ZE_RESULT_SUCCESS, zesRasGetState(handle, 0, &state));
if (handleIndex == 0u) {
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_CACHE_ERRORS], correctablel3Bank + initialCorrectableCacheErrorTile0); // No. of correctable error type for subdevice 0
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_RESET], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_PROGRAMMING_ERRORS], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_COMPUTE_ERRORS], correctableGrfErrorCount + correctableEuErrorCount + initialCorrectableComputeErrorsTile0);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_NON_COMPUTE_ERRORS], correctableGscSramEcc + initialCorrectableNonComputeErrorsTile0);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_DISPLAY_ERRORS], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_DRIVER_ERRORS], 0u);
} else if (handleIndex == 1u) {
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_CACHE_ERRORS], fatalTlb + initialUncorrectableCacheErrorsTile0); // No. of uncorrectable error type for subdevice 0
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_RESET], fatalEngineResetCount + initialEngineResetTile0);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_PROGRAMMING_ERRORS], euAttention + initialProgrammingErrorsTile0);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_COMPUTE_ERRORS], fatalSubslice + fatalEuErrorCount + initialUncorrectableComputeErrorsTile0);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_NON_COMPUTE_ERRORS], socFatalPsfCsc0Count + nonFatalGscAonParity + nonFataGscSelfmBist + initialUncorrectableNonComputeErrorsTile0);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_DISPLAY_ERRORS], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_DRIVER_ERRORS], driverMigration + driverGgtt + driverRps + initialUncorrectableDriverErrorsTile0);
} else if (handleIndex == 2u) {
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_CACHE_ERRORS], 0u); // No. of correctable error type for subdevice 1
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_RESET], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_PROGRAMMING_ERRORS], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_COMPUTE_ERRORS], correctableSubsliceTile1 + correctableGucErrorCountTile1 + correctableSamplerErrorCountTile1 + initialCorrectableComputeErrorsTile1);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_NON_COMPUTE_ERRORS], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_DISPLAY_ERRORS], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_DRIVER_ERRORS], 0u);
} else if (handleIndex == 3u) {
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_CACHE_ERRORS], fatalL3BankTile1 + fatalIdiParityErrorCountTile1 + initialUncorrectableCacheErrorsTile1); // No. of uncorrectable error type for subdevice 1
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_RESET], fatalEngineResetCountTile1 + initialEngineResetTile1);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_PROGRAMMING_ERRORS], euAttentionTile1 + initialProgrammingErrorsTile1);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_COMPUTE_ERRORS], fatalGucErrorCountTile1 + initialUncorrectableComputeErrorsTile1);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_NON_COMPUTE_ERRORS], socFatalMdfiWestCountTile1 + socFatalPunitTile1 + initialUncorrectableNonComputeErrorsTile1);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_DISPLAY_ERRORS], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_DRIVER_ERRORS], driverMigration + driverEngineOther + initialUncorrectableDriverErrorsTile1);
}
handleIndex++;
}
}
TEST_F(SysmanRasMultiDeviceFixture, GivenValidRasHandleWhenCallingzesRasGetStateForHbmThenSuccessIsReturned) {
pPmuInterface->mockPmuReadResult = true;
pRasFwUtilInterface->mockMemorySuccess = true;
auto handles = getRasHandles(mockHandleCountForSubDevice);
uint32_t handleIndex = 0u;
for (auto handle : handles) {
zes_ras_state_t state = {};
EXPECT_EQ(ZE_RESULT_SUCCESS, zesRasGetState(handle, 0, &state));
if (handleIndex == 0u) {
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_NON_COMPUTE_ERRORS], hbmCorrectableErrorCount); // No. of correctable error type for subdevice 0
} else if (handleIndex == 1u) {
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_NON_COMPUTE_ERRORS], hbmUncorrectableErrorCount); // No. of uncorrectable error type for subdevice 0
} else if (handleIndex == 2u) {
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_NON_COMPUTE_ERRORS], hbmCorrectableErrorCount); // No. of correctable error type for subdevice 1
} else if (handleIndex == 3u) {
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_NON_COMPUTE_ERRORS], hbmUncorrectableErrorCount); // No. of uncorrectable error type for subdevice 1
}
handleIndex++;
}
}
} // namespace ult

View File

@ -1,653 +0,0 @@
/*
* Copyright (C) 2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/test/common/libult/linux/drm_mock.h"
#include "level_zero/sysman/source/sysman_const.h"
#include "level_zero/sysman/test/unit_tests/sources/linux/mock_sysman_fixture.h"
#include "level_zero/sysman/test/unit_tests/sources/ras/linux/mock_fs_ras_prelim.h"
class OsRas;
namespace L0 {
namespace Sysman {
namespace ult {
constexpr uint32_t mockHandleCount = 2u;
constexpr uint32_t mockHandleCountForSubDevice = 4u;
struct SysmanRasFixture : public SysmanDeviceFixture {
protected:
std::unique_ptr<MockRasFsAccess> pFsAccess;
std::unique_ptr<MockRasSysfsAccess> pSysfsAccess;
std::unique_ptr<MockRasPmuInterfaceImp> pPmuInterface;
std::unique_ptr<MockRasFwInterface> pRasFwUtilInterface;
MockRasNeoDrm *pDrm = nullptr;
L0::Sysman::FsAccess *pFsAccessOriginal = nullptr;
L0::Sysman::SysfsAccess *pSysfsAccessOriginal = nullptr;
L0::Sysman::PmuInterface *pOriginalPmuInterface = nullptr;
L0::Sysman::FirmwareUtil *pFwUtilOriginal = nullptr;
L0::Sysman::SysmanDevice *device = nullptr;
void SetUp() override {
SysmanDeviceFixture::SetUp();
pFsAccessOriginal = pLinuxSysmanImp->pFsAccess;
pFsAccess = std::make_unique<MockRasFsAccess>();
pLinuxSysmanImp->pFsAccess = pFsAccess.get();
pSysfsAccessOriginal = pLinuxSysmanImp->pSysfsAccess;
pSysfsAccess = std::make_unique<MockRasSysfsAccess>();
pLinuxSysmanImp->pSysfsAccess = pSysfsAccess.get();
pRasFwUtilInterface = std::make_unique<MockRasFwInterface>();
pDrm = new MockRasNeoDrm(const_cast<NEO::RootDeviceEnvironment &>(pSysmanDeviceImp->getRootDeviceEnvironment()));
pDrm->setupIoctlHelper(pSysmanDeviceImp->getRootDeviceEnvironment().getHardwareInfo()->platform.eProductFamily);
pFwUtilOriginal = pLinuxSysmanImp->pFwUtilInterface;
pLinuxSysmanImp->pFwUtilInterface = pRasFwUtilInterface.get();
pOriginalPmuInterface = pLinuxSysmanImp->pPmuInterface;
pPmuInterface = std::make_unique<MockRasPmuInterfaceImp>(pLinuxSysmanImp);
pLinuxSysmanImp->pPmuInterface = pPmuInterface.get();
pDrm->setMemoryType(INTEL_HWCONFIG_MEMORY_TYPE_HBM2e);
auto &osInterface = pSysmanDeviceImp->getRootDeviceEnvironment().osInterface;
osInterface->setDriverModel(std::unique_ptr<MockRasNeoDrm>(pDrm));
pSysmanDeviceImp->getRootDeviceEnvironment().getMutableHardwareInfo()->capabilityTable.isIntegratedDevice = false;
device = pSysmanDevice;
}
void TearDown() override {
pLinuxSysmanImp->pFsAccess = pFsAccessOriginal;
pLinuxSysmanImp->pSysfsAccess = pSysfsAccessOriginal;
pLinuxSysmanImp->pPmuInterface = pOriginalPmuInterface;
pLinuxSysmanImp->pFwUtilInterface = pFwUtilOriginal;
SysmanDeviceFixture::TearDown();
}
std::vector<zes_ras_handle_t> getRasHandles(uint32_t count) {
std::vector<zes_ras_handle_t> handles(count, nullptr);
EXPECT_EQ(zesDeviceEnumRasErrorSets(device->toHandle(), &count, handles.data()), ZE_RESULT_SUCCESS);
return handles;
}
};
TEST_F(SysmanRasFixture, GivenValidSysmanHandleWhenRetrievingRasHandlesInThenSuccessReturn) {
uint32_t count = 0;
ze_result_t result = zesDeviceEnumRasErrorSets(device->toHandle(), &count, NULL);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(count, mockHandleCount);
uint32_t testcount = count + 1;
result = zesDeviceEnumRasErrorSets(device->toHandle(), &testcount, NULL);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(testcount, mockHandleCount);
auto handles = getRasHandles(mockHandleCount);
for (auto handle : handles) {
EXPECT_NE(handle, nullptr);
}
}
TEST_F(SysmanRasFixture, GivenValidRasHandleWhenGettingRasPropertiesThenSuccessIsReturned) {
auto handles = getRasHandles(mockHandleCount);
bool correctable = true;
for (auto handle : handles) {
zes_ras_properties_t properties = {};
EXPECT_EQ(ZE_RESULT_SUCCESS, zesRasGetProperties(handle, &properties));
EXPECT_EQ(properties.pNext, nullptr);
EXPECT_EQ(properties.onSubdevice, false);
EXPECT_EQ(properties.subdeviceId, 0u);
if (correctable == true) {
EXPECT_EQ(properties.type, ZES_RAS_ERROR_TYPE_CORRECTABLE);
correctable = false;
} else {
EXPECT_EQ(properties.type, ZES_RAS_ERROR_TYPE_UNCORRECTABLE);
}
}
}
TEST_F(SysmanRasFixture, GivenValidOsSysmanPointerWhenRetrievingSupportedRasErrorsForGtAndIfReadSymLinkFailsThenNoSupportedErrorTypeIsReturned) {
std::set<zes_ras_error_type_t> errorType = {};
pSysfsAccess->mockReadSymLinkResult = true;
L0::Sysman::LinuxRasSourceGt::getSupportedRasErrorTypes(errorType, pOsSysman, false, 0);
EXPECT_EQ(errorType.size(), 0u);
}
TEST_F(SysmanRasFixture, GivenValidOsSysmanPointerWhenRetrievingSupportedRasErrorsForGtAndIfListDirectoryFailsThenNoSupportedErrorTypeIsReturned) {
std::set<zes_ras_error_type_t> errorType = {};
pFsAccess->mockReadDirectoryFailure = true;
L0::Sysman::LinuxRasSourceGt::getSupportedRasErrorTypes(errorType, pOsSysman, false, 0);
EXPECT_EQ(errorType.size(), 0u);
}
TEST_F(SysmanRasFixture, GivenValidOsSysmanPointerWhenRetrievingSupportedRasErrorsForHbmAndFwInterfaceIsAbsentThenNoSupportedErrorTypeIsReturned) {
std::set<zes_ras_error_type_t> errorType = {};
pLinuxSysmanImp->pFwUtilInterface = nullptr;
L0::Sysman::LinuxRasSourceHbm::getSupportedRasErrorTypes(errorType, pOsSysman, false, 0);
EXPECT_EQ(errorType.size(), 0u);
}
TEST_F(SysmanRasFixture, GivenValidSysmanHandleWhenRetrievingRasHandlesIfRasEventsAreAbsentThenZeroHandlesAreCreated) {
pFsAccess->mockReadDirectoryWithoutRasEvents = true;
pLinuxSysmanImp->pFwUtilInterface = nullptr;
pSysmanDeviceImp->pRasHandleContext->handleList.clear();
pSysmanDeviceImp->pRasHandleContext->init(pOsSysman->getSubDeviceCount());
uint32_t count = 0;
ze_result_t result = zesDeviceEnumRasErrorSets(device->toHandle(), &count, NULL);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(count, 0u);
uint32_t testcount = count + 1;
result = zesDeviceEnumRasErrorSets(device->toHandle(), &testcount, NULL);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(testcount, 0u);
}
TEST_F(SysmanRasFixture, GivenValidSysmanHandleWhenRetrievingRasHandlesIfRasEventsAndHbmAreAbsentThenZeroHandlesAreCreated) {
pDrm->setMemoryType(INTEL_HWCONFIG_MEMORY_TYPE_LPDDR4);
pRasFwUtilInterface->mockMemorySuccess = true;
pFsAccess->mockReadDirectoryWithoutRasEvents = true;
pSysmanDeviceImp->pRasHandleContext->handleList.clear();
pSysmanDeviceImp->pRasHandleContext->init(pOsSysman->getSubDeviceCount());
uint32_t count = 0;
ze_result_t result = zesDeviceEnumRasErrorSets(device->toHandle(), &count, NULL);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(count, 0u);
}
TEST_F(SysmanRasFixture, GivenValidSysmanHandleWhenRetrievingRasHandlesIfHbmAndFwInterfaceArePresentThenSuccessIsReturned) {
pDrm->setMemoryType(INTEL_HWCONFIG_MEMORY_TYPE_HBM2);
pRasFwUtilInterface->mockMemorySuccess = true;
uint32_t count = 0;
ze_result_t result = zesDeviceEnumRasErrorSets(device->toHandle(), &count, NULL);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(count, mockHandleCount);
}
TEST_F(SysmanRasFixture, GivenValidSysmanHandleWhenRetrievingRasHandlesIfRasEventsAreAbsentAndQuerySystemInfoSucceedsButMemSysInfoIsNullThenZeroHandlesAreCreated) {
pFsAccess->mockReadDirectoryWithoutRasEvents = true;
pDrm->mockQuerySystemInfoReturnValue.push_back(true);
pLinuxSysmanImp->pFwUtilInterface = nullptr;
uint32_t count = 0;
ze_result_t result = zesDeviceEnumRasErrorSets(device->toHandle(), &count, NULL);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(count, 0u);
}
TEST_F(SysmanRasFixture, GivenValidRasHandleWhenCallingzesRasGetStateForGtThenSuccessIsReturned) {
pPmuInterface->mockPmuReadCorrectable = true;
pRasFwUtilInterface->mockMemorySuccess = false;
auto handles = getRasHandles(mockHandleCount);
bool correctable = true;
for (auto handle : handles) {
zes_ras_state_t state = {};
EXPECT_EQ(ZE_RESULT_SUCCESS, zesRasGetState(handle, 0, &state));
if (correctable == true) {
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_CACHE_ERRORS], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_RESET], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_PROGRAMMING_ERRORS], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_COMPUTE_ERRORS], correctableGrfErrorCount + correctableEuErrorCount + initialCorrectableComputeErrors);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_NON_COMPUTE_ERRORS], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_DISPLAY_ERRORS], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_DRIVER_ERRORS], 0u);
correctable = false;
} else {
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_CACHE_ERRORS], fatalTlb + initialUncorrectableCacheErrors);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_RESET], fatalEngineResetCount + initialEngineReset);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_PROGRAMMING_ERRORS], euAttention + initialProgrammingErrors);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_COMPUTE_ERRORS], fatalEuErrorCount + initialUncorrectableComputeErrors);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_NON_COMPUTE_ERRORS], socFatalPsfCsc0Count + initialUncorrectableNonComputeErrors);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_DISPLAY_ERRORS], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_DRIVER_ERRORS], driverMigration + driverGgtt + driverRps + initialUncorrectableDriverErrors);
}
}
}
TEST_F(SysmanRasFixture, GivenValidRasHandleWhenCallingRasGetStateForGtAfterClearThenSuccessIsReturned) {
pPmuInterface->mockPmuReadAfterClear = true;
auto handles = getRasHandles(mockHandleCount);
bool correctable = true;
ze_bool_t clear = 0;
for (auto handle : handles) {
zes_ras_state_t state = {};
EXPECT_EQ(ZE_RESULT_SUCCESS, zesRasGetState(handle, clear, &state));
if (correctable == true) {
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_RESET], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_PROGRAMMING_ERRORS], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_DRIVER_ERRORS], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_COMPUTE_ERRORS], correctableGrfErrorCount + correctableEuErrorCount + initialCorrectableComputeErrors);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_NON_COMPUTE_ERRORS], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_CACHE_ERRORS], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_DISPLAY_ERRORS], 0u);
correctable = false;
} else {
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_RESET], fatalEngineResetCount + initialEngineReset);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_PROGRAMMING_ERRORS], euAttention + initialProgrammingErrors);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_DRIVER_ERRORS], driverMigration + driverGgtt + driverRps + initialUncorrectableDriverErrors);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_COMPUTE_ERRORS], fatalEuErrorCount + initialUncorrectableComputeErrors);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_NON_COMPUTE_ERRORS], socFatalPsfCsc0Count + initialUncorrectableNonComputeErrors);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_CACHE_ERRORS], fatalTlb + initialUncorrectableCacheErrors);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_DISPLAY_ERRORS], 0u);
}
}
correctable = true;
clear = 1;
for (auto handle : handles) {
zes_ras_state_t state = {};
EXPECT_EQ(ZE_RESULT_SUCCESS, zesRasGetState(handle, clear, &state));
if (correctable == true) {
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_CACHE_ERRORS], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_RESET], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_PROGRAMMING_ERRORS], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_COMPUTE_ERRORS], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_NON_COMPUTE_ERRORS], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_DISPLAY_ERRORS], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_DRIVER_ERRORS], 0u);
correctable = false;
} else {
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_CACHE_ERRORS], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_RESET], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_PROGRAMMING_ERRORS], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_COMPUTE_ERRORS], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_NON_COMPUTE_ERRORS], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_DISPLAY_ERRORS], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_DRIVER_ERRORS], 0u);
}
}
}
TEST_F(SysmanRasFixture, GivenValidRasHandleWhenCallingzesRasGetStateForHbmThenSuccessIsReturned) {
pPmuInterface->mockPmuReadResult = true;
pRasFwUtilInterface->mockMemorySuccess = true;
auto handles = getRasHandles(mockHandleCount);
bool correctable = true;
for (auto handle : handles) {
zes_ras_state_t state = {};
EXPECT_EQ(ZE_RESULT_SUCCESS, zesRasGetState(handle, 0, &state));
if (correctable == true) {
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_NON_COMPUTE_ERRORS], hbmCorrectableErrorCount);
correctable = false;
} else {
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_NON_COMPUTE_ERRORS], hbmUncorrectableErrorCount);
}
}
}
TEST_F(SysmanRasFixture, GivenValidRasHandleWhenCallingzesRasGetStateForHbmWithClearThenSuccessIsReturned) {
pPmuInterface->mockPmuReadResult = true;
pRasFwUtilInterface->mockMemorySuccess = true;
auto handles = getRasHandles(mockHandleCount);
bool correctable = true;
ze_bool_t clear = 0;
for (auto handle : handles) {
zes_ras_state_t state = {};
EXPECT_EQ(ZE_RESULT_SUCCESS, zesRasGetState(handle, clear, &state));
if (correctable == true) {
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_NON_COMPUTE_ERRORS], hbmCorrectableErrorCount);
correctable = false;
} else {
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_NON_COMPUTE_ERRORS], hbmUncorrectableErrorCount);
}
}
correctable = true;
clear = 1;
for (auto handle : handles) {
zes_ras_state_t state = {};
EXPECT_EQ(ZE_RESULT_SUCCESS, zesRasGetState(handle, clear, &state));
if (correctable == true) {
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_NON_COMPUTE_ERRORS], 0u);
correctable = false;
} else {
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_NON_COMPUTE_ERRORS], 0u);
}
}
}
TEST_F(SysmanRasFixture, GivenValidRasHandleWhenCallingzesRasGeStateWithClearOptionWithoutPermissionsThenFailureIsReturned) {
pFsAccess->mockRootUser = true;
auto handles = getRasHandles(mockHandleCount);
ze_bool_t clear = 1;
for (auto handle : handles) {
zes_ras_state_t state = {};
EXPECT_EQ(ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS, zesRasGetState(handle, clear, &state));
}
}
TEST_F(SysmanRasFixture, GivenValidRasHandleWhenCallingzesRasGetStateForGtInterfaceAndUnableToRetrieveConfigValuesAndOtherInterfacesAreAbsentThenFailureIsReturned) {
pFsAccess->mockReadFileFailure = true;
auto handles = getRasHandles(mockHandleCount);
for (auto handle : handles) {
zes_ras_state_t state = {};
EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, zesRasGetState(handle, 0, &state));
}
}
TEST_F(SysmanRasFixture, GivenValidRasHandleWhenCallingzesRasGetStateForGtInterfaceAndPerfEventOpenFailsAndOtherInterfacesAreAbsentThenFailureIsReturned) {
pPmuInterface->mockPerfEvent = true;
auto handles = getRasHandles(mockHandleCount);
for (auto handle : handles) {
zes_ras_state_t state = {};
EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, zesRasGetState(handle, 0, &state));
}
}
TEST_F(SysmanRasFixture, GivenValidRasHandleWhenCallingzesRasGetStateForGtInterfaceAndPmuReadFailsAndOtherInterfacesAreAbsentThenFailureIsReturned) {
pPmuInterface->mockPmuReadResult = true;
auto handles = getRasHandles(mockHandleCount);
for (auto handle : handles) {
zes_ras_state_t state = {};
EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, zesRasGetState(handle, 0, &state));
}
}
TEST_F(SysmanRasFixture, GivenValidRasHandleWhenCallingzesRasGetStateForGtInterfaceWithClearAndPmuReadFailsAndOtherInterfacesAreAbsentThenFailureIsReturned) {
pPmuInterface->mockPmuReadResult = true;
auto handles = getRasHandles(mockHandleCount);
for (auto handle : handles) {
zes_ras_state_t state = {};
EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, zesRasGetState(handle, 1, &state));
}
}
TEST_F(SysmanRasFixture, GivenValidRasHandleWhenCallingzesGetRasStateForGtInterfaceAndPMUGetEventTypeFailsAndOtherInterfacesAreAbsentThenFailureIsReturned) {
pFsAccess->mockReadVal = true;
auto handles = getRasHandles(mockHandleCount);
for (auto handle : handles) {
zes_ras_state_t state = {};
EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, zesRasGetState(handle, 0, &state));
}
}
TEST_F(SysmanRasFixture, GivenValidRasHandleWhenCallingzesGetRasStateAndFirmwareInterfaceIsAbsentOtherInterfacesAreAlsoAbsentThenFailureIsReturned) {
pFsAccess->mockReadVal = true;
pLinuxSysmanImp->pFwUtilInterface = nullptr;
auto handles = getRasHandles(mockHandleCount);
for (auto handle : handles) {
zes_ras_state_t state = {};
EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, zesRasGetState(handle, 0, &state));
}
}
TEST_F(SysmanRasFixture, GivenValidRasHandleWhenCallingzesRasGetConfigAfterzesRasSetConfigThenSuccessIsReturned) {
auto handles = getRasHandles(mockHandleCount);
for (auto handle : handles) {
zes_ras_config_t setConfig = {};
zes_ras_config_t getConfig = {};
setConfig.totalThreshold = 50;
memset(setConfig.detailedThresholds.category, 1, maxRasErrorCategoryCount * sizeof(uint64_t));
EXPECT_EQ(ZE_RESULT_SUCCESS, zesRasSetConfig(handle, &setConfig));
EXPECT_EQ(ZE_RESULT_SUCCESS, zesRasGetConfig(handle, &getConfig));
EXPECT_EQ(setConfig.totalThreshold, getConfig.totalThreshold);
int compare = std::memcmp(setConfig.detailedThresholds.category, getConfig.detailedThresholds.category, maxRasErrorCategoryCount * sizeof(uint64_t));
EXPECT_EQ(0, compare);
}
}
TEST_F(SysmanRasFixture, GivenValidRasHandleWhenCallingzesRasSetConfigWithoutPermissionThenFailureIsReturned) {
pFsAccess->mockRootUser = true;
auto handles = getRasHandles(mockHandleCount);
for (auto handle : handles) {
zes_ras_config_t setConfig = {};
setConfig.totalThreshold = 50;
memset(setConfig.detailedThresholds.category, 1, maxRasErrorCategoryCount * sizeof(uint64_t));
EXPECT_EQ(ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS, zesRasSetConfig(handle, &setConfig));
}
}
TEST_F(SysmanRasFixture, GivenValidRasHandleWhenCallingzesRasGetStateForGtInterfaceAndReadSymLinkFailsDuringInitAndOtherInterfacesAreAbsentThenFailureIsReturned) {
pSysfsAccess->mockReadSymLinkStatus = ZE_RESULT_ERROR_NOT_AVAILABLE;
auto handles = getRasHandles(mockHandleCount);
for (auto handle : handles) {
zes_ras_state_t state = {};
EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, zesRasGetState(handle, 0, &state));
}
}
TEST_F(SysmanRasFixture, GivenValidRasHandleWhenCallingzesRasGetStateForGtInterfaceAndReadSymLinkFailsInsideGetEventOpenAndOtherInterfacesAreAbsentThenFailureIsReturned) {
pSysfsAccess->mockReadSymLinkStatus = ZE_RESULT_ERROR_NOT_AVAILABLE;
auto handles = getRasHandles(mockHandleCount);
for (auto handle : handles) {
zes_ras_state_t state = {};
EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, zesRasGetState(handle, 0, &state));
}
}
TEST_F(SysmanRasFixture, GivenValidRasHandleWhenCallingzesRasGetStateForGtInterfaceAndListDirectoryFailsDuringInitAndOtherInterfacesAreAbsentThenFailureIsReturned) {
pFsAccess->mockListDirectoryStatus = ZE_RESULT_ERROR_NOT_AVAILABLE;
auto handles = getRasHandles(mockHandleCount);
for (auto handle : handles) {
zes_ras_state_t state = {};
EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, zesRasGetState(handle, 0, &state));
}
}
TEST_F(SysmanRasFixture, GivenValidRasHandleAndHandleCountZeroWhenCallingReInitThenValidCountIsReturnedAndVerifyzesDeviceEnumRasErrorSetsSucceeds) {
uint32_t count = 0;
ze_result_t result = zesDeviceEnumRasErrorSets(device->toHandle(), &count, NULL);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(count, mockHandleCount);
for (const auto &handle : pSysmanDeviceImp->pRasHandleContext->handleList) {
delete handle;
}
pSysmanDeviceImp->pRasHandleContext->handleList.clear();
pSysmanDeviceImp->pRasHandleContext->init(pOsSysman->getSubDeviceCount());
count = 0;
result = zesDeviceEnumRasErrorSets(device->toHandle(), &count, NULL);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(count, mockHandleCount);
}
struct SysmanRasMultiDeviceFixture : public SysmanMultiDeviceFixture {
protected:
std::unique_ptr<MockRasFsAccess> pFsAccess;
std::unique_ptr<MockRasSysfsAccess> pSysfsAccess;
std::unique_ptr<MockRasPmuInterfaceImp> pPmuInterface;
std::unique_ptr<MockRasFwInterface> pRasFwUtilInterface;
MockRasNeoDrm *pDrm = nullptr;
L0::Sysman::FsAccess *pFsAccessOriginal = nullptr;
L0::Sysman::SysfsAccess *pSysfsAccessOriginal = nullptr;
L0::Sysman::PmuInterface *pOriginalPmuInterface = nullptr;
L0::Sysman::FirmwareUtil *pFwUtilOriginal = nullptr;
Drm *pOriginalDrm = nullptr;
L0::Sysman::SysmanDevice *device = nullptr;
void SetUp() override {
SysmanMultiDeviceFixture::SetUp();
pDrm = new MockRasNeoDrm(const_cast<NEO::RootDeviceEnvironment &>(pSysmanDeviceImp->getRootDeviceEnvironment()));
pDrm->setupIoctlHelper(pSysmanDeviceImp->getRootDeviceEnvironment().getHardwareInfo()->platform.eProductFamily);
pFsAccessOriginal = pLinuxSysmanImp->pFsAccess;
pFsAccess = std::make_unique<MockRasFsAccess>();
pLinuxSysmanImp->pFsAccess = pFsAccess.get();
pSysfsAccessOriginal = pLinuxSysmanImp->pSysfsAccess;
pSysfsAccess = std::make_unique<MockRasSysfsAccess>();
pLinuxSysmanImp->pSysfsAccess = pSysfsAccess.get();
pFwUtilOriginal = pLinuxSysmanImp->pFwUtilInterface;
pRasFwUtilInterface = std::make_unique<MockRasFwInterface>();
pLinuxSysmanImp->pFwUtilInterface = pRasFwUtilInterface.get();
pOriginalPmuInterface = pLinuxSysmanImp->pPmuInterface;
pPmuInterface = std::make_unique<MockRasPmuInterfaceImp>(pLinuxSysmanImp);
pLinuxSysmanImp->pPmuInterface = pPmuInterface.get();
pDrm->setMemoryType(INTEL_HWCONFIG_MEMORY_TYPE_HBM2e);
auto &osInterface = pSysmanDeviceImp->getRootDeviceEnvironment().osInterface;
osInterface->setDriverModel(std::unique_ptr<MockRasNeoDrm>(pDrm));
device = pSysmanDevice;
pFsAccess->mockReadDirectoryForMultiDevice = true;
pSysmanDeviceImp->getRootDeviceEnvironment().getMutableHardwareInfo()->capabilityTable.isIntegratedDevice = false;
}
void TearDown() override {
pLinuxSysmanImp->pFsAccess = pFsAccessOriginal;
pLinuxSysmanImp->pSysfsAccess = pSysfsAccessOriginal;
pLinuxSysmanImp->pPmuInterface = pOriginalPmuInterface;
pLinuxSysmanImp->pFwUtilInterface = pFwUtilOriginal;
SysmanMultiDeviceFixture::TearDown();
}
std::vector<zes_ras_handle_t> getRasHandles(uint32_t count) {
std::vector<zes_ras_handle_t> handles(count, nullptr);
EXPECT_EQ(zesDeviceEnumRasErrorSets(device->toHandle(), &count, handles.data()), ZE_RESULT_SUCCESS);
return handles;
}
};
TEST_F(SysmanRasMultiDeviceFixture, GivenValidSysmanHandleWithMultiDeviceWhenRetrievingRasHandlesThenSuccessIsReturned) {
L0::Sysman::RasHandleContext *pRasHandleContext = new L0::Sysman::RasHandleContext(pSysmanDeviceImp->pOsSysman);
uint32_t count = 0;
ze_result_t result = pRasHandleContext->rasGet(&count, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ((count > 0), true);
delete pRasHandleContext;
}
TEST_F(SysmanRasMultiDeviceFixture, GivenValidSysmanHandleWhenRetrievingRasHandlesThenSuccessIsReturned) {
uint32_t count = 0;
ze_result_t result = zesDeviceEnumRasErrorSets(device->toHandle(), &count, NULL);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(count, mockHandleCountForSubDevice);
uint32_t testcount = count + 1;
result = zesDeviceEnumRasErrorSets(device->toHandle(), &testcount, NULL);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(testcount, mockHandleCountForSubDevice);
auto handles = getRasHandles(mockHandleCountForSubDevice);
for (auto handle : handles) {
EXPECT_NE(handle, nullptr);
}
}
TEST_F(SysmanRasMultiDeviceFixture, GivenValidHandleWhenGettingRasPropertiesThenSuccessIsReturned) {
zes_ras_properties_t properties = {};
bool isSubDevice = true;
uint32_t subDeviceId = 0u;
PublicLinuxRasImp *pLinuxRasImp = new PublicLinuxRasImp(pOsSysman, ZES_RAS_ERROR_TYPE_CORRECTABLE, isSubDevice, subDeviceId);
EXPECT_EQ(ZE_RESULT_SUCCESS, pLinuxRasImp->osRasGetProperties(properties));
EXPECT_EQ(properties.subdeviceId, subDeviceId);
EXPECT_EQ(properties.onSubdevice, isSubDevice);
EXPECT_EQ(properties.type, ZES_RAS_ERROR_TYPE_CORRECTABLE);
delete pLinuxRasImp;
}
TEST_F(SysmanRasMultiDeviceFixture, GivenValidRasHandleWhenCallingzesRasGetStateForGtThenSuccessIsReturned) {
pPmuInterface->mockPmuReadTile = true;
pSysfsAccess->isMultiTileArch = true;
auto handles = getRasHandles(mockHandleCountForSubDevice);
uint32_t handleIndex = 0u;
for (auto handle : handles) {
zes_ras_state_t state = {};
EXPECT_EQ(ZE_RESULT_SUCCESS, zesRasGetState(handle, 0, &state));
if (handleIndex == 0u) {
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_CACHE_ERRORS], correctablel3Bank + initialCorrectableCacheErrorTile0); // No. of correctable error type for subdevice 0
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_RESET], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_PROGRAMMING_ERRORS], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_COMPUTE_ERRORS], correctableGrfErrorCount + correctableEuErrorCount + initialCorrectableComputeErrorsTile0);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_NON_COMPUTE_ERRORS], correctableGscSramEcc + initialCorrectableNonComputeErrorsTile0);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_DISPLAY_ERRORS], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_DRIVER_ERRORS], 0u);
} else if (handleIndex == 1u) {
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_CACHE_ERRORS], fatalTlb + initialUncorrectableCacheErrorsTile0); // No. of uncorrectable error type for subdevice 0
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_RESET], fatalEngineResetCount + initialEngineResetTile0);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_PROGRAMMING_ERRORS], euAttention + initialProgrammingErrorsTile0);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_COMPUTE_ERRORS], fatalSubslice + fatalEuErrorCount + initialUncorrectableComputeErrorsTile0);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_NON_COMPUTE_ERRORS], socFatalPsfCsc0Count + nonFatalGscAonParity + nonFataGscSelfmBist + initialUncorrectableNonComputeErrorsTile0);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_DISPLAY_ERRORS], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_DRIVER_ERRORS], driverMigration + driverGgtt + driverRps + initialUncorrectableDriverErrorsTile0);
} else if (handleIndex == 2u) {
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_CACHE_ERRORS], 0u); // No. of correctable error type for subdevice 1
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_RESET], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_PROGRAMMING_ERRORS], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_COMPUTE_ERRORS], correctableSubsliceTile1 + correctableGucErrorCountTile1 + correctableSamplerErrorCountTile1 + initialCorrectableComputeErrorsTile1);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_NON_COMPUTE_ERRORS], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_DISPLAY_ERRORS], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_DRIVER_ERRORS], 0u);
} else if (handleIndex == 3u) {
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_CACHE_ERRORS], fatalL3BankTile1 + fatalIdiParityErrorCountTile1 + initialUncorrectableCacheErrorsTile1); // No. of uncorrectable error type for subdevice 1
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_RESET], fatalEngineResetCountTile1 + initialEngineResetTile1);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_PROGRAMMING_ERRORS], euAttentionTile1 + initialProgrammingErrorsTile1);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_COMPUTE_ERRORS], fatalGucErrorCountTile1 + initialUncorrectableComputeErrorsTile1);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_NON_COMPUTE_ERRORS], socFatalMdfiWestCountTile1 + socFatalPunitTile1 + initialUncorrectableNonComputeErrorsTile1);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_DISPLAY_ERRORS], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_DRIVER_ERRORS], driverMigration + driverEngineOther + initialUncorrectableDriverErrorsTile1);
}
handleIndex++;
}
}
TEST_F(SysmanRasMultiDeviceFixture, GivenValidRasHandleWhenCallingzesRasGetStateForHbmThenSuccessIsReturned) {
pPmuInterface->mockPmuReadResult = true;
pRasFwUtilInterface->mockMemorySuccess = true;
auto handles = getRasHandles(mockHandleCountForSubDevice);
uint32_t handleIndex = 0u;
for (auto handle : handles) {
zes_ras_state_t state = {};
EXPECT_EQ(ZE_RESULT_SUCCESS, zesRasGetState(handle, 0, &state));
if (handleIndex == 0u) {
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_NON_COMPUTE_ERRORS], hbmCorrectableErrorCount); // No. of correctable error type for subdevice 0
} else if (handleIndex == 1u) {
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_NON_COMPUTE_ERRORS], hbmUncorrectableErrorCount); // No. of uncorrectable error type for subdevice 0
} else if (handleIndex == 2u) {
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_NON_COMPUTE_ERRORS], hbmCorrectableErrorCount); // No. of correctable error type for subdevice 1
} else if (handleIndex == 3u) {
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_NON_COMPUTE_ERRORS], hbmUncorrectableErrorCount); // No. of uncorrectable error type for subdevice 1
}
handleIndex++;
}
}
} // namespace ult
} // namespace Sysman
} // namespace L0