mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-30 01:35:20 +08:00
Add support for sysman zesFabricPortGetFabricErrorCounters API
Related-To: LOCI-3398 Signed-off-by: Mayank Raghuwanshi <mayank.raghuwanshi@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
48ed9f9c92
commit
07d3353b1f
@@ -760,7 +760,7 @@ ze_result_t zesOverclockSetVFPointValues(
|
||||
ze_result_t zesFabricPortGetFabricErrorCounters(
|
||||
zes_fabric_port_handle_t hPort,
|
||||
zes_fabric_port_error_counters_t *pErrors) {
|
||||
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
|
||||
return L0::FabricPort::fromHandle(hPort)->fabricPortGetErrorCounters(pErrors);
|
||||
}
|
||||
|
||||
ze_result_t zesInit(
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2020-2022 Intel Corporation
|
||||
* Copyright (C) 2020-2023 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -38,6 +38,7 @@ class FabricPort : _zes_fabric_port_handle_t {
|
||||
virtual ze_result_t fabricPortSetConfig(const zes_fabric_port_config_t *pConfig) = 0;
|
||||
virtual ze_result_t fabricPortGetState(zes_fabric_port_state_t *pState) = 0;
|
||||
virtual ze_result_t fabricPortGetThroughput(zes_fabric_port_throughput_t *pThroughput) = 0;
|
||||
virtual ze_result_t fabricPortGetErrorCounters(zes_fabric_port_error_counters_t *pErrors) = 0;
|
||||
|
||||
inline zes_fabric_port_handle_t toZesHandle() { return this; }
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2020 Intel Corporation
|
||||
* Copyright (C) 2020-2023 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -53,6 +53,10 @@ ze_result_t FabricPortImp::fabricPortGetState(zes_fabric_port_state_t *pState) {
|
||||
return pOsFabricPort->getState(pState);
|
||||
}
|
||||
|
||||
ze_result_t FabricPortImp::fabricPortGetErrorCounters(zes_fabric_port_error_counters_t *pErrors) {
|
||||
return pOsFabricPort->getErrorCounters(pErrors);
|
||||
}
|
||||
|
||||
ze_result_t FabricPortImp::fabricPortGetThroughput(zes_fabric_port_throughput_t *pThroughput) {
|
||||
fabricPortGetTimestamp(pThroughput->timestamp);
|
||||
return pOsFabricPort->getThroughput(pThroughput);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2020 Intel Corporation
|
||||
* Copyright (C) 2020-2023 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -35,6 +35,7 @@ class FabricPortImp : public FabricPort, NEO::NonCopyableOrMovableClass {
|
||||
ze_result_t fabricPortSetConfig(const zes_fabric_port_config_t *pConfig) override;
|
||||
ze_result_t fabricPortGetState(zes_fabric_port_state_t *pState) override;
|
||||
ze_result_t fabricPortGetThroughput(zes_fabric_port_throughput_t *pThroughput) override;
|
||||
ze_result_t fabricPortGetErrorCounters(zes_fabric_port_error_counters_t *pErrors) override;
|
||||
|
||||
FabricPortImp() = delete;
|
||||
FabricPortImp(FabricDevice *pFabricDevice, uint32_t portNum);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2020 Intel Corporation
|
||||
* Copyright (C) 2020-2023 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -56,6 +56,10 @@ ze_result_t LinuxFabricPortImp::getThroughput(zes_fabric_port_throughput_t *pThr
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
ze_result_t LinuxFabricPortImp::getErrorCounters(zes_fabric_port_error_counters_t *pErrors) {
|
||||
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
|
||||
}
|
||||
|
||||
ze_result_t LinuxFabricPortImp::getProperties(zes_fabric_port_properties_t *pProperties) {
|
||||
::snprintf(pProperties->model, ZES_MAX_FABRIC_PORT_MODEL_SIZE, "%s", this->model.c_str());
|
||||
pProperties->onSubdevice = false;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2020 Intel Corporation
|
||||
* Copyright (C) 2020-2023 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -34,6 +34,7 @@ class LinuxFabricPortImp : public OsFabricPort, NEO::NonCopyableOrMovableClass {
|
||||
ze_result_t setConfig(const zes_fabric_port_config_t *pConfig) override;
|
||||
ze_result_t getState(zes_fabric_port_state_t *pState) override;
|
||||
ze_result_t getThroughput(zes_fabric_port_throughput_t *pThroughput) override;
|
||||
ze_result_t getErrorCounters(zes_fabric_port_error_counters_t *pErrors) override;
|
||||
|
||||
LinuxFabricPortImp() = delete;
|
||||
LinuxFabricPortImp(OsFabricDevice *pOsFabricDevice, uint32_t portNum);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2022 Intel Corporation
|
||||
* Copyright (C) 2022-2023 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -7,8 +7,11 @@
|
||||
|
||||
#include "os_fabric_port_imp_prelim.h"
|
||||
|
||||
#include "shared/source/debug_settings/debug_settings_manager.h"
|
||||
#include "shared/source/helpers/debug_helpers.h"
|
||||
|
||||
#include "sysman/linux/os_sysman_imp.h"
|
||||
|
||||
#include <cstdio>
|
||||
|
||||
namespace L0 {
|
||||
@@ -36,6 +39,78 @@ ze_result_t LinuxFabricDeviceImp::getThroughput(const zes_fabric_port_id_t portI
|
||||
return pFabricDeviceAccess->getThroughput(portId, *pThroughput);
|
||||
}
|
||||
|
||||
ze_result_t LinuxFabricDeviceImp::getErrorCounters(const zes_fabric_port_id_t portId, zes_fabric_port_error_counters_t *pErrors) {
|
||||
FsAccess *pFsAccess = &pLinuxSysmanImp->getFsAccess();
|
||||
SysfsAccess *pSysfsAccess = &pLinuxSysmanImp->getSysfsAccess();
|
||||
std::string devicePciPath("");
|
||||
ze_result_t result = pSysfsAccess->getRealPath("device/", devicePciPath);
|
||||
if (result != ZE_RESULT_SUCCESS) {
|
||||
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr,
|
||||
"error@<%s> <failed to get device path> <result: 0x%x>\n", __func__, result);
|
||||
return result;
|
||||
}
|
||||
std::string path("");
|
||||
std::vector<std::string> list;
|
||||
result = pFsAccess->listDirectory(devicePciPath, list);
|
||||
if (result != ZE_RESULT_SUCCESS) {
|
||||
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr,
|
||||
"error@<%s> <failed to get list of files in device directory> <result: 0x%x>\n", __func__, result);
|
||||
return result;
|
||||
}
|
||||
|
||||
for (auto entry : list) {
|
||||
if ((entry.find("i915.iaf.") != std::string::npos) ||
|
||||
(entry.find("iaf.") != std::string::npos)) {
|
||||
path = devicePciPath + "/" + entry;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (path.empty()) {
|
||||
// This device does not have a fabric
|
||||
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr,
|
||||
"error@<%s> <Device does not have fabric>\n", __func__);
|
||||
return ZE_RESULT_ERROR_NOT_AVAILABLE;
|
||||
}
|
||||
std::string fabricFwErrorPath = path + "/sd." + std::to_string(portId.attachId);
|
||||
std::string fabricLinkErrorPath = path + "/sd." + std::to_string(portId.attachId) + "/port." + std::to_string(portId.portNumber);
|
||||
uint64_t linkErrorCount = 0;
|
||||
std::string linkFailureFile = fabricLinkErrorPath + "/link_failures";
|
||||
result = pFsAccess->read(linkFailureFile, linkErrorCount);
|
||||
if (result != ZE_RESULT_SUCCESS) {
|
||||
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr,
|
||||
"error@<%s> <failed to read file %s> <result: 0x%x>\n", __func__, linkFailureFile.c_str(), result);
|
||||
linkErrorCount = 0;
|
||||
}
|
||||
uint64_t linkDegradeCount = 0;
|
||||
std::string linkDegradeFile = fabricLinkErrorPath + "/link_degrades";
|
||||
result = pFsAccess->read(linkDegradeFile, linkDegradeCount);
|
||||
if (result != ZE_RESULT_SUCCESS) {
|
||||
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr,
|
||||
"error@<%s> <failed to read file %s> <result: 0x%x>\n", __func__, linkDegradeFile.c_str(), result);
|
||||
linkDegradeCount = 0;
|
||||
}
|
||||
uint64_t fwErrorCount = 0;
|
||||
std::string fwErrorFile = fabricFwErrorPath + "/fw_error";
|
||||
result = pFsAccess->read(fwErrorFile, fwErrorCount);
|
||||
if (result != ZE_RESULT_SUCCESS) {
|
||||
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr,
|
||||
"error@<%s> <failed to read file %s> <result: 0x%x>\n", __func__, fwErrorFile.c_str(), result);
|
||||
fwErrorCount = 0;
|
||||
}
|
||||
uint64_t fwCommErrorCount = 0;
|
||||
std::string fwCommErrorFile = fabricFwErrorPath + "/fw_comm_errors";
|
||||
result = pFsAccess->read(fwCommErrorFile, fwCommErrorCount);
|
||||
if (result != ZE_RESULT_SUCCESS) {
|
||||
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr,
|
||||
"error@<%s> <failed to read file %s> <result: 0x%x>\n", __func__, fwCommErrorFile.c_str(), result);
|
||||
fwCommErrorCount = 0;
|
||||
}
|
||||
pErrors->linkFailureCount = linkErrorCount;
|
||||
pErrors->linkDegradeCount = linkDegradeCount;
|
||||
pErrors->fwErrorCount = fwErrorCount;
|
||||
pErrors->fwCommErrorCount = fwCommErrorCount;
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
ze_result_t LinuxFabricDeviceImp::performSweep() {
|
||||
uint32_t start = 0U;
|
||||
uint32_t end = 0U;
|
||||
@@ -143,6 +218,7 @@ ze_result_t LinuxFabricDeviceImp::routingQuery(uint32_t &start, uint32_t &end) {
|
||||
LinuxFabricDeviceImp::LinuxFabricDeviceImp(OsSysman *pOsSysman) {
|
||||
pFabricDeviceAccess = FabricDeviceAccess::create(pOsSysman);
|
||||
UNRECOVERABLE_IF(nullptr == pFabricDeviceAccess);
|
||||
pLinuxSysmanImp = static_cast<LinuxSysmanImp *>(pOsSysman);
|
||||
}
|
||||
|
||||
LinuxFabricDeviceImp::~LinuxFabricDeviceImp() {
|
||||
@@ -206,6 +282,10 @@ ze_result_t LinuxFabricPortImp::getThroughput(zes_fabric_port_throughput_t *pThr
|
||||
return pLinuxFabricDeviceImp->getThroughput(portId, pThroughput);
|
||||
}
|
||||
|
||||
ze_result_t LinuxFabricPortImp::getErrorCounters(zes_fabric_port_error_counters_t *pErrors) {
|
||||
return pLinuxFabricDeviceImp->getErrorCounters(portId, pErrors);
|
||||
}
|
||||
|
||||
ze_result_t LinuxFabricPortImp::getProperties(zes_fabric_port_properties_t *pProperties) {
|
||||
::snprintf(pProperties->model, ZES_MAX_FABRIC_PORT_MODEL_SIZE, "%s", this->model.c_str());
|
||||
pProperties->onSubdevice = this->onSubdevice;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2022 Intel Corporation
|
||||
* Copyright (C) 2022-2023 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -15,7 +15,7 @@
|
||||
#include <vector>
|
||||
|
||||
namespace L0 {
|
||||
|
||||
class LinuxSysmanImp;
|
||||
class LinuxFabricDeviceImp : public OsFabricDevice, NEO::NonCopyableOrMovableClass {
|
||||
public:
|
||||
uint32_t getNumPorts() override;
|
||||
@@ -29,6 +29,7 @@ class LinuxFabricDeviceImp : public OsFabricDevice, NEO::NonCopyableOrMovableCla
|
||||
ze_result_t disablePortBeaconing(const zes_fabric_port_id_t portId);
|
||||
ze_result_t getState(const zes_fabric_port_id_t portId, zes_fabric_port_state_t *pState);
|
||||
ze_result_t getThroughput(const zes_fabric_port_id_t portId, zes_fabric_port_throughput_t *pThroughput);
|
||||
ze_result_t getErrorCounters(const zes_fabric_port_id_t portId, zes_fabric_port_error_counters_t *pErrors);
|
||||
|
||||
void getPortId(const uint32_t portNumber, zes_fabric_port_id_t &portId);
|
||||
void getProperties(const zes_fabric_port_id_t portId, std::string &model, bool &onSubdevice,
|
||||
@@ -49,6 +50,7 @@ class LinuxFabricDeviceImp : public OsFabricDevice, NEO::NonCopyableOrMovableCla
|
||||
ze_result_t disableUsage(const zes_fabric_port_id_t portId);
|
||||
|
||||
protected:
|
||||
LinuxSysmanImp *pLinuxSysmanImp = nullptr;
|
||||
FabricDeviceAccess *pFabricDeviceAccess = nullptr;
|
||||
};
|
||||
|
||||
@@ -60,6 +62,7 @@ class LinuxFabricPortImp : public OsFabricPort, NEO::NonCopyableOrMovableClass {
|
||||
ze_result_t setConfig(const zes_fabric_port_config_t *pConfig) override;
|
||||
ze_result_t getState(zes_fabric_port_state_t *pState) override;
|
||||
ze_result_t getThroughput(zes_fabric_port_throughput_t *pThroughput) override;
|
||||
ze_result_t getErrorCounters(zes_fabric_port_error_counters_t *pErrors) override;
|
||||
|
||||
LinuxFabricPortImp() = delete;
|
||||
LinuxFabricPortImp(OsFabricDevice *pOsFabricDevice, uint32_t portNum);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2020 Intel Corporation
|
||||
* Copyright (C) 2020-2023 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -29,6 +29,7 @@ class OsFabricPort {
|
||||
virtual ze_result_t setConfig(const zes_fabric_port_config_t *pConfig) = 0;
|
||||
virtual ze_result_t getState(zes_fabric_port_state_t *pState) = 0;
|
||||
virtual ze_result_t getThroughput(zes_fabric_port_throughput_t *pThroughput) = 0;
|
||||
virtual ze_result_t getErrorCounters(zes_fabric_port_error_counters_t *pErrors) = 0;
|
||||
|
||||
static OsFabricPort *create(OsFabricDevice *pOsFabricDevice, uint32_t portNum);
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2020 Intel Corporation
|
||||
* Copyright (C) 2020-2023 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -41,6 +41,10 @@ ze_result_t WddmFabricPortImp::getThroughput(zes_fabric_port_throughput_t *pThro
|
||||
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
|
||||
}
|
||||
|
||||
ze_result_t WddmFabricPortImp::getErrorCounters(zes_fabric_port_error_counters_t *pErrors) {
|
||||
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
|
||||
}
|
||||
|
||||
ze_result_t WddmFabricPortImp::getProperties(zes_fabric_port_properties_t *pProperties) {
|
||||
::memset(pProperties->model, '\0', ZES_MAX_FABRIC_PORT_MODEL_SIZE);
|
||||
pProperties->onSubdevice = false;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2020 Intel Corporation
|
||||
* Copyright (C) 2020-2023 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -35,6 +35,7 @@ class WddmFabricPortImp : public OsFabricPort, NEO::NonCopyableOrMovableClass {
|
||||
ze_result_t setConfig(const zes_fabric_port_config_t *pConfig) override;
|
||||
ze_result_t getState(zes_fabric_port_state_t *pState) override;
|
||||
ze_result_t getThroughput(zes_fabric_port_throughput_t *pThroughput) override;
|
||||
ze_result_t getErrorCounters(zes_fabric_port_error_counters_t *pErrors) override;
|
||||
|
||||
WddmFabricPortImp() = delete;
|
||||
WddmFabricPortImp(OsFabricDevice *pOsFabricDevice, uint32_t portNum);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (C) 2020-2022 Intel Corporation
|
||||
# Copyright (C) 2020-2023 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
@@ -13,7 +13,6 @@ if(NEO_ENABLE_i915_PRELIM_DETECTION)
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/os_ras_imp_prelim.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/os_ras_imp_prelim.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/os_ras_imp_gt.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/os_ras_imp_fabric.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/os_ras_imp_hbm.cpp
|
||||
)
|
||||
else()
|
||||
|
||||
@@ -1,127 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2021-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/device/sub_device.h"
|
||||
|
||||
#include "level_zero/tools/source/sysman/ras/linux/os_ras_imp_prelim.h"
|
||||
#include "level_zero/tools/source/sysman/sysman_imp.h"
|
||||
|
||||
#include "sysman/linux/fs_access.h"
|
||||
#include "sysman/linux/os_sysman_imp.h"
|
||||
|
||||
#include <cstring>
|
||||
#include <regex>
|
||||
namespace L0 {
|
||||
|
||||
void LinuxRasSourceFabric::getNodes(std::vector<std::string> &nodes, uint32_t subdeviceId, LinuxSysmanImp *pSysmanImp, const zes_ras_error_type_t &type) {
|
||||
const uint32_t minBoardStrappedNumber = 0;
|
||||
const uint32_t maxBoardStrappedNumber = 31;
|
||||
const uint32_t minPortId = 1;
|
||||
const uint32_t maxPortId = 8;
|
||||
nodes.clear();
|
||||
|
||||
const std::string iafPathStringMfd("/sys/module/iaf/drivers/platform:iaf/");
|
||||
const std::string iafPathStringAuxillary("/sys/module/iaf/drivers/auxiliary:iaf/");
|
||||
std::string iafPathString("");
|
||||
|
||||
if (pSysmanImp->getSysfsAccess().getRealPath("device/", iafPathString) != ZE_RESULT_SUCCESS) {
|
||||
return;
|
||||
}
|
||||
|
||||
auto &fsAccess = pSysmanImp->getFsAccess();
|
||||
if (fsAccess.directoryExists(iafPathStringMfd)) {
|
||||
iafPathString = iafPathString + "/iaf.";
|
||||
} else if (fsAccess.directoryExists(iafPathStringAuxillary)) {
|
||||
iafPathString = iafPathString + "/i915.iaf.";
|
||||
} else {
|
||||
return;
|
||||
}
|
||||
|
||||
for (auto boardStrappedNumber = minBoardStrappedNumber; boardStrappedNumber <= maxBoardStrappedNumber; boardStrappedNumber++) {
|
||||
|
||||
const auto boardStrappedString(iafPathString + std::to_string(boardStrappedNumber));
|
||||
if (!fsAccess.directoryExists(boardStrappedString)) {
|
||||
continue;
|
||||
}
|
||||
const auto subDeviceString(boardStrappedString + "/sd." + std::to_string(subdeviceId));
|
||||
std::vector<std::string> subDeviceErrorNodes;
|
||||
|
||||
if (type == ZES_RAS_ERROR_TYPE_CORRECTABLE) {
|
||||
subDeviceErrorNodes.push_back(subDeviceString + "/fw_comm_errors");
|
||||
for (auto portId = minPortId; portId <= maxPortId; portId++) {
|
||||
subDeviceErrorNodes.push_back(subDeviceString + "/port." + std::to_string(portId) + "/link_degrades");
|
||||
}
|
||||
} else {
|
||||
subDeviceErrorNodes.push_back(subDeviceString + "/sd_failure");
|
||||
subDeviceErrorNodes.push_back(subDeviceString + "/fw_error");
|
||||
for (auto portId = minPortId; portId <= maxPortId; portId++) {
|
||||
subDeviceErrorNodes.push_back(subDeviceString + "/port." + std::to_string(portId) + "/link_failures");
|
||||
}
|
||||
}
|
||||
|
||||
for (auto &subDeviceErrorNode : subDeviceErrorNodes) {
|
||||
if (ZE_RESULT_SUCCESS == fsAccess.canRead(subDeviceErrorNode)) {
|
||||
nodes.push_back(subDeviceErrorNode);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ze_result_t LinuxRasSourceFabric::getSupportedRasErrorTypes(std::set<zes_ras_error_type_t> &errorType,
|
||||
OsSysman *pOsSysman, ze_device_handle_t deviceHandle) {
|
||||
LinuxSysmanImp *pLinuxSysmanImp = static_cast<LinuxSysmanImp *>(pOsSysman);
|
||||
ze_bool_t onSubDevice = false;
|
||||
uint32_t subDeviceIndex = 0;
|
||||
SysmanDeviceImp::getSysmanDeviceInfo(deviceHandle, subDeviceIndex, onSubDevice, true);
|
||||
std::vector<std::string> nodes;
|
||||
getNodes(nodes, subDeviceIndex, pLinuxSysmanImp, ZES_RAS_ERROR_TYPE_UNCORRECTABLE);
|
||||
if (nodes.size()) {
|
||||
errorType.insert(ZES_RAS_ERROR_TYPE_UNCORRECTABLE);
|
||||
}
|
||||
getNodes(nodes, subDeviceIndex, pLinuxSysmanImp, ZES_RAS_ERROR_TYPE_CORRECTABLE);
|
||||
if (nodes.size()) {
|
||||
errorType.insert(ZES_RAS_ERROR_TYPE_CORRECTABLE);
|
||||
}
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
LinuxRasSourceFabric::LinuxRasSourceFabric(OsSysman *pOsSysman, zes_ras_error_type_t type, uint32_t subDeviceId) {
|
||||
|
||||
pLinuxSysmanImp = static_cast<LinuxSysmanImp *>(pOsSysman);
|
||||
getNodes(errorNodes, subDeviceId, pLinuxSysmanImp, type);
|
||||
}
|
||||
|
||||
uint64_t LinuxRasSourceFabric::getComputeErrorCount() {
|
||||
uint64_t currentErrorCount = 0;
|
||||
auto &fsAccess = pLinuxSysmanImp->getFsAccess();
|
||||
for (const auto &node : errorNodes) {
|
||||
uint64_t errorCount = 0;
|
||||
fsAccess.read(node, errorCount);
|
||||
currentErrorCount += errorCount;
|
||||
}
|
||||
return currentErrorCount;
|
||||
}
|
||||
|
||||
ze_result_t LinuxRasSourceFabric::osRasGetState(zes_ras_state_t &state, ze_bool_t clear) {
|
||||
|
||||
if (errorNodes.size() == 0) {
|
||||
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
|
||||
}
|
||||
|
||||
std::memset(state.category, 0, sizeof(zes_ras_state_t::category));
|
||||
uint64_t currentComputeErrorCount = getComputeErrorCount();
|
||||
|
||||
if (clear) {
|
||||
baseComputeErrorCount = currentComputeErrorCount;
|
||||
currentComputeErrorCount = getComputeErrorCount();
|
||||
}
|
||||
state.category[ZES_RAS_ERROR_CAT_COMPUTE_ERRORS] = currentComputeErrorCount - baseComputeErrorCount;
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
} // namespace L0
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2020-2022 Intel Corporation
|
||||
* Copyright (C) 2020-2023 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -18,10 +18,7 @@ void OsRas::getSupportedRasErrorTypes(std::set<zes_ras_error_type_t> &errorType,
|
||||
constexpr auto maxErrorTypes = 2;
|
||||
LinuxRasSourceGt::getSupportedRasErrorTypes(errorType, pOsSysman, deviceHandle);
|
||||
if (errorType.size() < maxErrorTypes) {
|
||||
LinuxRasSourceFabric::getSupportedRasErrorTypes(errorType, pOsSysman, deviceHandle);
|
||||
if (errorType.size() < maxErrorTypes) {
|
||||
LinuxRasSourceHbm::getSupportedRasErrorTypes(errorType, pOsSysman, deviceHandle);
|
||||
}
|
||||
LinuxRasSourceHbm::getSupportedRasErrorTypes(errorType, pOsSysman, deviceHandle);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -72,7 +69,6 @@ ze_result_t LinuxRasImp::osRasGetState(zes_ras_state_t &state, ze_bool_t clear)
|
||||
|
||||
void LinuxRasImp::initSources() {
|
||||
rasSources.push_back(std::make_unique<L0::LinuxRasSourceGt>(pLinuxSysmanImp, osRasErrorType, isSubdevice, subdeviceId));
|
||||
rasSources.push_back(std::make_unique<L0::LinuxRasSourceFabric>(pLinuxSysmanImp, osRasErrorType, subdeviceId));
|
||||
rasSources.push_back(std::make_unique<L0::LinuxRasSourceHbm>(pLinuxSysmanImp, osRasErrorType, subdeviceId));
|
||||
}
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2020-2022 Intel Corporation
|
||||
* Copyright (C) 2020-2023 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -90,22 +90,6 @@ class LinuxRasSourceGt : public LinuxRasSources {
|
||||
uint32_t subdeviceId = 0;
|
||||
};
|
||||
|
||||
class LinuxRasSourceFabric : public LinuxRasSources {
|
||||
public:
|
||||
static ze_result_t getSupportedRasErrorTypes(std::set<zes_ras_error_type_t> &errorType, OsSysman *pOsSysman, ze_device_handle_t deviceHandle);
|
||||
LinuxRasSourceFabric(OsSysman *pOsSysman, zes_ras_error_type_t type, uint32_t subDeviceId);
|
||||
~LinuxRasSourceFabric() override = default;
|
||||
|
||||
ze_result_t osRasGetState(zes_ras_state_t &state, ze_bool_t clear) override;
|
||||
|
||||
private:
|
||||
LinuxSysmanImp *pLinuxSysmanImp = nullptr;
|
||||
std::vector<std::string> errorNodes = {};
|
||||
uint64_t baseComputeErrorCount = 0;
|
||||
uint64_t getComputeErrorCount();
|
||||
static void getNodes(std::vector<std::string> &nodes, uint32_t subdeviceId, LinuxSysmanImp *pSysmanImp, const zes_ras_error_type_t &type);
|
||||
};
|
||||
|
||||
class LinuxRasSourceHbm : public LinuxRasSources {
|
||||
public:
|
||||
ze_result_t osRasGetState(zes_ras_state_t &state, ze_bool_t clear) override;
|
||||
|
||||
@@ -1124,6 +1124,7 @@ void testSysmanFabricPort(ze_device_handle_t &device) {
|
||||
zes_fabric_port_config_t fabricPortConfig = {};
|
||||
zes_fabric_port_state_t fabricPortState = {};
|
||||
zes_fabric_port_throughput_t fabricPortThroughput = {};
|
||||
zes_fabric_port_error_counters_t fabricPortErrorCounters = {};
|
||||
|
||||
VALIDATECALL(zesFabricPortGetProperties(handle, &fabricPortProperties));
|
||||
if (verbose) {
|
||||
@@ -1172,6 +1173,14 @@ void testSysmanFabricPort(ze_device_handle_t &device) {
|
||||
std::cout << "RX Counter = " << fabricPortThroughput.rxCounter << std::endl;
|
||||
std::cout << "TX Counter = " << fabricPortThroughput.txCounter << std::endl;
|
||||
}
|
||||
|
||||
VALIDATECALL(zesFabricPortGetFabricErrorCounters(handle, &fabricPortErrorCounters));
|
||||
if (verbose) {
|
||||
std::cout << "Link Failures = " << fabricPortErrorCounters.linkFailureCount << std::endl;
|
||||
std::cout << "Link Degrades = " << fabricPortErrorCounters.linkDegradeCount << std::endl;
|
||||
std::cout << "Fw Errors = " << fabricPortErrorCounters.fwErrorCount << std::endl;
|
||||
std::cout << "Fw comm Errors = " << fabricPortErrorCounters.fwCommErrorCount << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2020-2022 Intel Corporation
|
||||
* Copyright (C) 2020-2023 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -10,6 +10,8 @@
|
||||
|
||||
#include "gmock/gmock.h"
|
||||
#include "sysman/fabric_port/fabric_port.h"
|
||||
#include "sysman/linux/fs_access.h"
|
||||
#include "sysman/linux/os_sysman_imp.h"
|
||||
|
||||
namespace L0 {
|
||||
namespace ult {
|
||||
@@ -24,5 +26,53 @@ struct MockFabricDevice : public FabricDevice {
|
||||
MockFabricDevice() = default;
|
||||
};
|
||||
|
||||
class MockFabricFsAccess : public FsAccess {
|
||||
public:
|
||||
ze_result_t mockListDirectory = ZE_RESULT_SUCCESS;
|
||||
ze_result_t canRead(const std::string file) override {
|
||||
if (accessibleNodes.find(file) != accessibleNodes.end()) {
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
return ZE_RESULT_ERROR_UNKNOWN;
|
||||
}
|
||||
|
||||
ze_result_t listDirectory(const std::string path, std::vector<std::string> &list) override {
|
||||
list = accessibleDirectories;
|
||||
return mockListDirectory;
|
||||
}
|
||||
|
||||
~MockFabricFsAccess() override = default;
|
||||
|
||||
ze_result_t read(const std::string file, uint64_t &val) override {
|
||||
if (canRead(file) == ZE_RESULT_SUCCESS) {
|
||||
val = accessibleNodes[file];
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
return ZE_RESULT_ERROR_UNKNOWN;
|
||||
}
|
||||
|
||||
void setAccessibleNodes(std::map<std::string, uint64_t> &nodes) {
|
||||
accessibleNodes = nodes;
|
||||
}
|
||||
|
||||
void setAccessibleDirectories(std::vector<std::string> &dirs) {
|
||||
accessibleDirectories = dirs;
|
||||
}
|
||||
|
||||
private:
|
||||
std::map<std::string, uint64_t> accessibleNodes = {};
|
||||
std::vector<std::string> accessibleDirectories = {};
|
||||
};
|
||||
|
||||
class MockFabricSysFsAccess : public SysfsAccess {
|
||||
public:
|
||||
ze_result_t getRealPath(const std::string path, std::string &buf) override {
|
||||
buf.append("/mockRealPath");
|
||||
return mockRealPathStatus;
|
||||
}
|
||||
|
||||
ze_result_t mockRealPathStatus = ZE_RESULT_SUCCESS;
|
||||
};
|
||||
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include "level_zero/tools/test/unit_tests/sources/sysman/fabric_port/linux/mock_fabric_device.h"
|
||||
#include "level_zero/tools/test/unit_tests/sources/sysman/linux/mock_sysman_fixture.h"
|
||||
#include "level_zero/tools/test/unit_tests/sources/sysman/linux/nl_api/mock_iaf_nl_api_prelim.h"
|
||||
|
||||
@@ -58,7 +59,6 @@ class ZesFabricPortFixture : public SysmanDeviceFixture {
|
||||
delete pFabricPortHandleContext->pFabricDevice;
|
||||
pFabricPortHandleContext->pFabricDevice = nullptr;
|
||||
}
|
||||
|
||||
pFabricPortHandleContext->pFabricDevice = new FabricDeviceImp(pOsSysman);
|
||||
|
||||
PublicLinuxFabricDeviceImp *pPublicLinuxFabricDeviceImp = reinterpret_cast<PublicLinuxFabricDeviceImp *>(pFabricPortHandleContext->pFabricDevice->getOsFabricDevice());
|
||||
@@ -568,5 +568,398 @@ TEST_F(ZesFabricPortFixture, GivenValidFabricPortHandleWhenCallingZesFabricPortG
|
||||
EXPECT_EQ(pMockIafNlApi->txCounter, throughput.txCounter);
|
||||
}
|
||||
|
||||
TEST_F(ZesFabricPortFixture, GivenValidFabricPortHandleWhenCallingzesFabricPortGetFabricErrorCountersWithLegacyPathAndCallSucceeds) {
|
||||
VariableBackup<FsAccess *> backupFsAccess(&pLinuxSysmanImp->pFsAccess);
|
||||
VariableBackup<SysfsAccess *> backupSysfsAccess(&pLinuxSysmanImp->pSysfsAccess);
|
||||
auto pFsAccess = new MockFabricFsAccess;
|
||||
auto pSysfsAccess = new MockFabricSysFsAccess;
|
||||
pLinuxSysmanImp->pFsAccess = pFsAccess;
|
||||
pLinuxSysmanImp->pSysfsAccess = pSysfsAccess;
|
||||
uint32_t count = 1U;
|
||||
zes_fabric_port_handle_t hPorts[1U];
|
||||
ze_result_t result = zesDeviceEnumFabricPorts(device, &count, hPorts);
|
||||
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
EXPECT_EQ(count, 1U);
|
||||
|
||||
zes_fabric_port_properties_t properties;
|
||||
|
||||
result = zesFabricPortGetProperties(hPorts[0], &properties);
|
||||
|
||||
std::string fabricLinkErrorPath = "/mockRealPath/iaf.5/sd." + std::to_string(properties.portId.attachId) + "/port." + std::to_string(properties.portId.portNumber);
|
||||
std::string fabricFwErrorPath = "/mockRealPath/iaf.5/sd." + std::to_string(properties.portId.attachId);
|
||||
std::vector<std::string> dirs = {"driver", "drm", "iaf.5"};
|
||||
uint64_t mockLinkFailures = 401;
|
||||
uint64_t mockLinkDegrades = 501;
|
||||
uint64_t mockFwErrors = 301;
|
||||
uint64_t mockFwCommErrors = 201;
|
||||
std::map<std::string, uint64_t> nodes = {
|
||||
{fabricLinkErrorPath + "/link_failures", mockLinkFailures},
|
||||
{fabricLinkErrorPath + "/link_degrades", mockLinkDegrades},
|
||||
{fabricFwErrorPath + "/fw_error", mockFwErrors},
|
||||
{fabricFwErrorPath + "/fw_comm_errors", mockFwCommErrors},
|
||||
};
|
||||
static_cast<MockFabricFsAccess *>(pFsAccess)->setAccessibleDirectories(dirs);
|
||||
static_cast<MockFabricFsAccess *>(pFsAccess)->setAccessibleNodes(nodes);
|
||||
zes_fabric_port_error_counters_t errors;
|
||||
|
||||
result = zesFabricPortGetFabricErrorCounters(hPorts[0], &errors);
|
||||
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
EXPECT_EQ(errors.linkFailureCount, mockLinkFailures);
|
||||
EXPECT_EQ(errors.linkDegradeCount, mockLinkDegrades);
|
||||
EXPECT_EQ(errors.fwErrorCount, mockFwErrors);
|
||||
EXPECT_EQ(errors.fwCommErrorCount, mockFwCommErrors);
|
||||
|
||||
delete pFsAccess;
|
||||
delete pSysfsAccess;
|
||||
}
|
||||
|
||||
TEST_F(ZesFabricPortFixture, GivenValidFabricPortHandleWhenCallingzesFabricPortGetFabricErrorCountersCallSucceeds) {
|
||||
VariableBackup<FsAccess *> backupFsAccess(&pLinuxSysmanImp->pFsAccess);
|
||||
VariableBackup<SysfsAccess *> backupSysfsAccess(&pLinuxSysmanImp->pSysfsAccess);
|
||||
auto pFsAccess = new MockFabricFsAccess;
|
||||
auto pSysfsAccess = new MockFabricSysFsAccess;
|
||||
pLinuxSysmanImp->pFsAccess = pFsAccess;
|
||||
pLinuxSysmanImp->pSysfsAccess = pSysfsAccess;
|
||||
uint32_t count = 1U;
|
||||
zes_fabric_port_handle_t hPorts[1U];
|
||||
ze_result_t result = zesDeviceEnumFabricPorts(device, &count, hPorts);
|
||||
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
EXPECT_EQ(count, 1U);
|
||||
|
||||
zes_fabric_port_properties_t properties;
|
||||
|
||||
result = zesFabricPortGetProperties(hPorts[0], &properties);
|
||||
|
||||
std::string fabricLinkErrorPath = "/mockRealPath/i915.iaf.5/sd." + std::to_string(properties.portId.attachId) + "/port." + std::to_string(properties.portId.portNumber);
|
||||
std::string fabricFwErrorPath = "/mockRealPath/i915.iaf.5/sd." + std::to_string(properties.portId.attachId);
|
||||
std::vector<std::string> dirs = {"i915.iaf.5",
|
||||
"driver", "drm"};
|
||||
uint64_t mockLinkFailures = 401;
|
||||
uint64_t mockLinkDegrades = 501;
|
||||
uint64_t mockFwErrors = 301;
|
||||
uint64_t mockFwCommErrors = 201;
|
||||
std::map<std::string, uint64_t> nodes = {
|
||||
{fabricLinkErrorPath + "/link_failures", mockLinkFailures},
|
||||
{fabricLinkErrorPath + "/link_degrades", mockLinkDegrades},
|
||||
{fabricFwErrorPath + "/fw_error", mockFwErrors},
|
||||
{fabricFwErrorPath + "/fw_comm_errors", mockFwCommErrors},
|
||||
};
|
||||
static_cast<MockFabricFsAccess *>(pFsAccess)->setAccessibleDirectories(dirs);
|
||||
static_cast<MockFabricFsAccess *>(pFsAccess)->setAccessibleNodes(nodes);
|
||||
zes_fabric_port_error_counters_t errors;
|
||||
|
||||
result = zesFabricPortGetFabricErrorCounters(hPorts[0], &errors);
|
||||
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
EXPECT_EQ(errors.linkFailureCount, mockLinkFailures);
|
||||
EXPECT_EQ(errors.linkDegradeCount, mockLinkDegrades);
|
||||
EXPECT_EQ(errors.fwErrorCount, mockFwErrors);
|
||||
EXPECT_EQ(errors.fwCommErrorCount, mockFwCommErrors);
|
||||
|
||||
delete pFsAccess;
|
||||
delete pSysfsAccess;
|
||||
}
|
||||
|
||||
TEST_F(ZesFabricPortFixture, GivenValidFabricPortHandleWhenCallingzesFabricPortGetFabricErrorCountersAndGetRealPathFailsThenFailureIsReturned) {
|
||||
VariableBackup<FsAccess *> backupFsAccess(&pLinuxSysmanImp->pFsAccess);
|
||||
VariableBackup<SysfsAccess *> backupSysfsAccess(&pLinuxSysmanImp->pSysfsAccess);
|
||||
auto pFsAccess = new MockFabricFsAccess;
|
||||
auto pSysfsAccess = new MockFabricSysFsAccess;
|
||||
pLinuxSysmanImp->pFsAccess = pFsAccess;
|
||||
pLinuxSysmanImp->pSysfsAccess = pSysfsAccess;
|
||||
|
||||
uint32_t count = 1U;
|
||||
zes_fabric_port_handle_t hPorts[1U];
|
||||
ze_result_t result = zesDeviceEnumFabricPorts(device, &count, hPorts);
|
||||
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
EXPECT_EQ(count, 1U);
|
||||
|
||||
static_cast<MockFabricSysFsAccess *>(pSysfsAccess)->mockRealPathStatus = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
|
||||
zes_fabric_port_error_counters_t errors;
|
||||
|
||||
result = zesFabricPortGetFabricErrorCounters(hPorts[0], &errors);
|
||||
|
||||
EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, result);
|
||||
|
||||
delete pFsAccess;
|
||||
delete pSysfsAccess;
|
||||
}
|
||||
|
||||
TEST_F(ZesFabricPortFixture, GivenValidFabricPortHandleWhenCallingzesFabricPortGetFabricErrorCountersAndListDirectoryFailsThenFailureIsReturned) {
|
||||
VariableBackup<FsAccess *> backupFsAccess(&pLinuxSysmanImp->pFsAccess);
|
||||
VariableBackup<SysfsAccess *> backupSysfsAccess(&pLinuxSysmanImp->pSysfsAccess);
|
||||
auto pFsAccess = new MockFabricFsAccess;
|
||||
auto pSysfsAccess = new MockFabricSysFsAccess;
|
||||
pLinuxSysmanImp->pFsAccess = pFsAccess;
|
||||
pLinuxSysmanImp->pSysfsAccess = pSysfsAccess;
|
||||
|
||||
uint32_t count = 1U;
|
||||
zes_fabric_port_handle_t hPorts[1U];
|
||||
ze_result_t result = zesDeviceEnumFabricPorts(device, &count, hPorts);
|
||||
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
EXPECT_EQ(count, 1U);
|
||||
|
||||
static_cast<MockFabricFsAccess *>(pFsAccess)->mockListDirectory = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
|
||||
zes_fabric_port_error_counters_t errors;
|
||||
|
||||
result = zesFabricPortGetFabricErrorCounters(hPorts[0], &errors);
|
||||
|
||||
EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, result);
|
||||
|
||||
delete pFsAccess;
|
||||
delete pSysfsAccess;
|
||||
}
|
||||
|
||||
TEST_F(ZesFabricPortFixture, GivenValidFabricPortHandleWhenCallingzesFabricPortGetFabricErrorCountersAndIafDriverIsNotLoadedThenFailureIsReturned) {
|
||||
VariableBackup<FsAccess *> backupFsAccess(&pLinuxSysmanImp->pFsAccess);
|
||||
VariableBackup<SysfsAccess *> backupSysfsAccess(&pLinuxSysmanImp->pSysfsAccess);
|
||||
auto pFsAccess = new MockFabricFsAccess;
|
||||
auto pSysfsAccess = new MockFabricSysFsAccess;
|
||||
pLinuxSysmanImp->pFsAccess = pFsAccess;
|
||||
pLinuxSysmanImp->pSysfsAccess = pSysfsAccess;
|
||||
|
||||
uint32_t count = 1U;
|
||||
zes_fabric_port_handle_t hPorts[1U];
|
||||
ze_result_t result = zesDeviceEnumFabricPorts(device, &count, hPorts);
|
||||
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
EXPECT_EQ(count, 1U);
|
||||
|
||||
zes_fabric_port_properties_t properties;
|
||||
|
||||
result = zesFabricPortGetProperties(hPorts[0], &properties);
|
||||
|
||||
std::string fabricLinkErrorPath = "/mockRealPath/i915.iaf.5/sd." + std::to_string(properties.portId.attachId) + "/port." + std::to_string(properties.portId.portNumber);
|
||||
std::string fabricFwErrorPath = "/mockRealPath/i915.iaf.5/sd." + std::to_string(properties.portId.attachId);
|
||||
std::vector<std::string> dirs = {"driver", "drm"};
|
||||
uint64_t mockLinkFailures = 401;
|
||||
uint64_t mockLinkDegrades = 501;
|
||||
uint64_t mockFwErrors = 301;
|
||||
uint64_t mockFwCommErrors = 201;
|
||||
std::map<std::string, uint64_t> nodes = {
|
||||
{fabricLinkErrorPath + "/link_failures", mockLinkFailures},
|
||||
{fabricLinkErrorPath + "/link_degrades", mockLinkDegrades},
|
||||
{fabricFwErrorPath + "/fw_error", mockFwErrors},
|
||||
{fabricFwErrorPath + "/fw_comm_errors", mockFwCommErrors},
|
||||
};
|
||||
static_cast<MockFabricFsAccess *>(pFsAccess)->setAccessibleDirectories(dirs);
|
||||
static_cast<MockFabricFsAccess *>(pFsAccess)->setAccessibleNodes(nodes);
|
||||
zes_fabric_port_error_counters_t errors;
|
||||
|
||||
result = zesFabricPortGetFabricErrorCounters(hPorts[0], &errors);
|
||||
|
||||
EXPECT_EQ(ZE_RESULT_ERROR_NOT_AVAILABLE, result);
|
||||
|
||||
delete pFsAccess;
|
||||
delete pSysfsAccess;
|
||||
}
|
||||
|
||||
TEST_F(ZesFabricPortFixture, GivenValidFabricPortHandleWhenCallingzesFabricPortGetFabricErrorCountersAndLinkFailureSysfsNodeIsAbsentThenZeroLinkFailuresAreReturned) {
|
||||
VariableBackup<FsAccess *> backupFsAccess(&pLinuxSysmanImp->pFsAccess);
|
||||
VariableBackup<SysfsAccess *> backupSysfsAccess(&pLinuxSysmanImp->pSysfsAccess);
|
||||
auto pFsAccess = new MockFabricFsAccess;
|
||||
auto pSysfsAccess = new MockFabricSysFsAccess;
|
||||
pLinuxSysmanImp->pFsAccess = pFsAccess;
|
||||
pLinuxSysmanImp->pSysfsAccess = pSysfsAccess;
|
||||
|
||||
uint32_t count = 1U;
|
||||
zes_fabric_port_handle_t hPorts[1U];
|
||||
ze_result_t result = zesDeviceEnumFabricPorts(device, &count, hPorts);
|
||||
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
EXPECT_EQ(count, 1U);
|
||||
|
||||
zes_fabric_port_properties_t properties;
|
||||
|
||||
result = zesFabricPortGetProperties(hPorts[0], &properties);
|
||||
|
||||
std::string fabricLinkErrorIncorrectPath = "/mockRealPath/i915.iaf.5/sd." + std::to_string(properties.portId.attachId + 1) + "/port." + std::to_string(properties.portId.portNumber);
|
||||
std::string fabricLinkErrorPath = "/mockRealPath/i915.iaf.5/sd." + std::to_string(properties.portId.attachId) + "/port." + std::to_string(properties.portId.portNumber);
|
||||
std::string fabricFwErrorPath = "/mockRealPath/i915.iaf.5/sd." + std::to_string(properties.portId.attachId);
|
||||
std::vector<std::string> dirs = {"i915.iaf.5",
|
||||
"driver", "drm"};
|
||||
uint64_t mockLinkFailures = 401;
|
||||
uint64_t mockLinkDegrades = 501;
|
||||
uint64_t mockFwErrors = 301;
|
||||
uint64_t mockFwCommErrors = 201;
|
||||
std::map<std::string, uint64_t> nodes = {
|
||||
{fabricLinkErrorIncorrectPath + "/link_failures", mockLinkFailures},
|
||||
{fabricLinkErrorPath + "/link_degrades", mockLinkDegrades},
|
||||
{fabricFwErrorPath + "/fw_error", mockFwErrors},
|
||||
{fabricFwErrorPath + "/fw_comm_errors", mockFwCommErrors},
|
||||
};
|
||||
static_cast<MockFabricFsAccess *>(pFsAccess)->setAccessibleDirectories(dirs);
|
||||
static_cast<MockFabricFsAccess *>(pFsAccess)->setAccessibleNodes(nodes);
|
||||
zes_fabric_port_error_counters_t errors;
|
||||
|
||||
result = zesFabricPortGetFabricErrorCounters(hPorts[0], &errors);
|
||||
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
EXPECT_EQ(errors.linkFailureCount, 0u);
|
||||
EXPECT_EQ(errors.linkDegradeCount, mockLinkDegrades);
|
||||
EXPECT_EQ(errors.fwErrorCount, mockFwErrors);
|
||||
EXPECT_EQ(errors.fwCommErrorCount, mockFwCommErrors);
|
||||
|
||||
delete pFsAccess;
|
||||
delete pSysfsAccess;
|
||||
}
|
||||
|
||||
TEST_F(ZesFabricPortFixture, GivenValidFabricPortHandleWhenCallingzesFabricPortGetFabricErrorCountersAndLinkDegradesSysfsNodeIsAbsentThenZeroLinkDegradesAreReturned) {
|
||||
VariableBackup<FsAccess *> backupFsAccess(&pLinuxSysmanImp->pFsAccess);
|
||||
VariableBackup<SysfsAccess *> backupSysfsAccess(&pLinuxSysmanImp->pSysfsAccess);
|
||||
auto pFsAccess = new MockFabricFsAccess;
|
||||
auto pSysfsAccess = new MockFabricSysFsAccess;
|
||||
pLinuxSysmanImp->pFsAccess = pFsAccess;
|
||||
pLinuxSysmanImp->pSysfsAccess = pSysfsAccess;
|
||||
|
||||
uint32_t count = 1U;
|
||||
zes_fabric_port_handle_t hPorts[1U];
|
||||
ze_result_t result = zesDeviceEnumFabricPorts(device, &count, hPorts);
|
||||
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
EXPECT_EQ(count, 1U);
|
||||
|
||||
zes_fabric_port_properties_t properties;
|
||||
|
||||
result = zesFabricPortGetProperties(hPorts[0], &properties);
|
||||
|
||||
std::string fabricLinkErrorPath = "/mockRealPath/i915.iaf.5/sd." + std::to_string(properties.portId.attachId) + "/port." + std::to_string(properties.portId.portNumber);
|
||||
std::string fabricLinkErrorIncorrectPath = "/mockRealPath/i915.iaf.5/sd." + std::to_string(properties.portId.attachId + 1) + "/port." + std::to_string(properties.portId.portNumber);
|
||||
std::string fabricFwErrorPath = "/mockRealPath/i915.iaf.5/sd." + std::to_string(properties.portId.attachId);
|
||||
std::vector<std::string> dirs = {"i915.iaf.5",
|
||||
"driver", "drm"};
|
||||
uint64_t mockLinkFailures = 401;
|
||||
uint64_t mockLinkDegrades = 501;
|
||||
uint64_t mockFwErrors = 301;
|
||||
uint64_t mockFwCommErrors = 201;
|
||||
std::map<std::string, uint64_t> nodes = {
|
||||
{fabricLinkErrorPath + "/link_failures", mockLinkFailures},
|
||||
{fabricLinkErrorIncorrectPath + "/link_degrades", mockLinkDegrades},
|
||||
{fabricFwErrorPath + "/fw_error", mockFwErrors},
|
||||
{fabricFwErrorPath + "/fw_comm_errors", mockFwCommErrors},
|
||||
};
|
||||
|
||||
static_cast<MockFabricFsAccess *>(pFsAccess)->setAccessibleDirectories(dirs);
|
||||
static_cast<MockFabricFsAccess *>(pFsAccess)->setAccessibleNodes(nodes);
|
||||
zes_fabric_port_error_counters_t errors;
|
||||
|
||||
result = zesFabricPortGetFabricErrorCounters(hPorts[0], &errors);
|
||||
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
EXPECT_EQ(errors.linkFailureCount, mockLinkFailures);
|
||||
EXPECT_EQ(errors.linkDegradeCount, 0u);
|
||||
EXPECT_EQ(errors.fwErrorCount, mockFwErrors);
|
||||
EXPECT_EQ(errors.fwCommErrorCount, mockFwCommErrors);
|
||||
|
||||
delete pFsAccess;
|
||||
delete pSysfsAccess;
|
||||
}
|
||||
|
||||
TEST_F(ZesFabricPortFixture, GivenValidFabricPortHandleWhenCallingzesFabricPortGetFabricErrorCountersAndFwErrorSysfsNodeIsAbsentThenZeroFwErrorsAreReturned) {
|
||||
VariableBackup<FsAccess *> backupFsAccess(&pLinuxSysmanImp->pFsAccess);
|
||||
VariableBackup<SysfsAccess *> backupSysfsAccess(&pLinuxSysmanImp->pSysfsAccess);
|
||||
auto pFsAccess = new MockFabricFsAccess;
|
||||
auto pSysfsAccess = new MockFabricSysFsAccess;
|
||||
pLinuxSysmanImp->pFsAccess = pFsAccess;
|
||||
pLinuxSysmanImp->pSysfsAccess = pSysfsAccess;
|
||||
|
||||
uint32_t count = 1U;
|
||||
zes_fabric_port_handle_t hPorts[1U];
|
||||
ze_result_t result = zesDeviceEnumFabricPorts(device, &count, hPorts);
|
||||
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
EXPECT_EQ(count, 1U);
|
||||
|
||||
zes_fabric_port_properties_t properties;
|
||||
|
||||
result = zesFabricPortGetProperties(hPorts[0], &properties);
|
||||
|
||||
std::string fabricLinkErrorPath = "/mockRealPath/i915.iaf.5/sd." + std::to_string(properties.portId.attachId) + "/port." + std::to_string(properties.portId.portNumber);
|
||||
std::string fabricFwErrorPath = "/mockRealPath/i915.iaf.5/sd." + std::to_string(properties.portId.attachId);
|
||||
std::string fabricFwErrorIncorrectPath = "/mockRealPath/i915.iaf.5/sd." + std::to_string(properties.portId.attachId + 1);
|
||||
std::vector<std::string> dirs = {"i915.iaf.5",
|
||||
"driver", "drm"};
|
||||
uint64_t mockLinkFailures = 401;
|
||||
uint64_t mockLinkDegrades = 501;
|
||||
uint64_t mockFwErrors = 301;
|
||||
uint64_t mockFwCommErrors = 201;
|
||||
std::map<std::string, uint64_t> nodes = {
|
||||
{fabricLinkErrorPath + "/link_failures", mockLinkFailures},
|
||||
{fabricLinkErrorPath + "/link_degrades", mockLinkDegrades},
|
||||
{fabricFwErrorIncorrectPath + "/fw_error", mockFwErrors},
|
||||
{fabricFwErrorPath + "/fw_comm_errors", mockFwCommErrors},
|
||||
};
|
||||
|
||||
static_cast<MockFabricFsAccess *>(pFsAccess)->setAccessibleDirectories(dirs);
|
||||
static_cast<MockFabricFsAccess *>(pFsAccess)->setAccessibleNodes(nodes);
|
||||
zes_fabric_port_error_counters_t errors;
|
||||
|
||||
result = zesFabricPortGetFabricErrorCounters(hPorts[0], &errors);
|
||||
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
EXPECT_EQ(errors.linkFailureCount, mockLinkFailures);
|
||||
EXPECT_EQ(errors.linkDegradeCount, mockLinkDegrades);
|
||||
EXPECT_EQ(errors.fwErrorCount, 0u);
|
||||
EXPECT_EQ(errors.fwCommErrorCount, mockFwCommErrors);
|
||||
|
||||
delete pFsAccess;
|
||||
delete pSysfsAccess;
|
||||
}
|
||||
|
||||
TEST_F(ZesFabricPortFixture, GivenValidFabricPortHandleWhenCallingzesFabricPortGetFabricErrorCountersAndFwCommErrorSysfsNodeIsAbsentThenZeroFwCommErrorsAreReturned) {
|
||||
VariableBackup<FsAccess *> backupFsAccess(&pLinuxSysmanImp->pFsAccess);
|
||||
VariableBackup<SysfsAccess *> backupSysfsAccess(&pLinuxSysmanImp->pSysfsAccess);
|
||||
auto pFsAccess = new MockFabricFsAccess;
|
||||
auto pSysfsAccess = new MockFabricSysFsAccess;
|
||||
pLinuxSysmanImp->pFsAccess = pFsAccess;
|
||||
pLinuxSysmanImp->pSysfsAccess = pSysfsAccess;
|
||||
|
||||
uint32_t count = 1U;
|
||||
zes_fabric_port_handle_t hPorts[1U];
|
||||
ze_result_t result = zesDeviceEnumFabricPorts(device, &count, hPorts);
|
||||
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
EXPECT_EQ(count, 1U);
|
||||
|
||||
zes_fabric_port_properties_t properties;
|
||||
|
||||
result = zesFabricPortGetProperties(hPorts[0], &properties);
|
||||
|
||||
std::string fabricLinkErrorPath = "/mockRealPath/i915.iaf.5/sd." + std::to_string(properties.portId.attachId) + "/port." + std::to_string(properties.portId.portNumber);
|
||||
std::string fabricFwErrorPath = "/mockRealPath/i915.iaf.5/sd." + std::to_string(properties.portId.attachId);
|
||||
std::string fabricFwErrorIncorrectPath = "/mockRealPath/i915.iaf.5/sd." + std::to_string(properties.portId.attachId + 1);
|
||||
std::vector<std::string> dirs = {"i915.iaf.5",
|
||||
"driver", "drm"};
|
||||
uint64_t mockLinkFailures = 401;
|
||||
uint64_t mockLinkDegrades = 501;
|
||||
uint64_t mockFwErrors = 301;
|
||||
uint64_t mockFwCommErrors = 201;
|
||||
std::map<std::string, uint64_t> nodes = {
|
||||
{fabricLinkErrorPath + "/link_failures", mockLinkFailures},
|
||||
{fabricLinkErrorPath + "/link_degrades", mockLinkDegrades},
|
||||
{fabricFwErrorPath + "/fw_error", mockFwErrors},
|
||||
{fabricFwErrorIncorrectPath + "/fw_comm_errors", mockFwCommErrors},
|
||||
};
|
||||
static_cast<MockFabricFsAccess *>(pFsAccess)->setAccessibleDirectories(dirs);
|
||||
static_cast<MockFabricFsAccess *>(pFsAccess)->setAccessibleNodes(nodes);
|
||||
zes_fabric_port_error_counters_t errors;
|
||||
|
||||
result = zesFabricPortGetFabricErrorCounters(hPorts[0], &errors);
|
||||
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
EXPECT_EQ(errors.linkFailureCount, mockLinkFailures);
|
||||
EXPECT_EQ(errors.linkDegradeCount, mockLinkDegrades);
|
||||
EXPECT_EQ(errors.fwErrorCount, mockFwErrors);
|
||||
EXPECT_EQ(errors.fwCommErrorCount, 0u);
|
||||
|
||||
delete pFsAccess;
|
||||
delete pSysfsAccess;
|
||||
}
|
||||
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (C) 2020-2022 Intel Corporation
|
||||
# Copyright (C) 2020-2023 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
@@ -10,9 +10,7 @@ set(L0_TESTS_TOOLS_SYSMAN_RAS_LINUX
|
||||
|
||||
if(NEO_ENABLE_i915_PRELIM_DETECTION)
|
||||
list(APPEND L0_TESTS_TOOLS_SYSMAN_RAS_LINUX
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/test_zes_ras_fabric_prelim.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/test_zes_ras_prelim.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/mock_fs_ras_fabric_prelim.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/mock_fs_ras_prelim.h
|
||||
)
|
||||
else()
|
||||
|
||||
@@ -1,86 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "level_zero/core/test/unit_tests/mocks/mock_memory_manager.h"
|
||||
#include "level_zero/tools/source/sysman/linux/pmu/pmu_imp.h"
|
||||
#include "level_zero/tools/source/sysman/ras/linux/os_ras_imp_prelim.h"
|
||||
|
||||
#include "sysman/linux/fs_access.h"
|
||||
#include "sysman/linux/os_sysman_imp.h"
|
||||
#include "sysman/ras/ras.h"
|
||||
#include "sysman/ras/ras_imp.h"
|
||||
|
||||
#include <map>
|
||||
|
||||
namespace L0 {
|
||||
namespace ult {
|
||||
|
||||
class MockRasFabricFsAccess : public FsAccess {
|
||||
public:
|
||||
ze_result_t canRead(const std::string file) override {
|
||||
if (accessibleNodes.find(file) != accessibleNodes.end()) {
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
return ZE_RESULT_ERROR_UNKNOWN;
|
||||
}
|
||||
|
||||
~MockRasFabricFsAccess() override = default;
|
||||
|
||||
bool isRootUser() override {
|
||||
return true;
|
||||
}
|
||||
|
||||
ze_result_t read(const std::string file, uint64_t &val) override {
|
||||
if (canRead(file) == ZE_RESULT_SUCCESS) {
|
||||
val = accessibleNodes[file];
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
return ZE_RESULT_ERROR_UNKNOWN;
|
||||
}
|
||||
|
||||
void setAccessibleNodes(std::map<std::string, uint64_t> &nodes) {
|
||||
accessibleNodes = nodes;
|
||||
}
|
||||
|
||||
void setAccessibleDirectories(std::vector<std::string> &dirs) {
|
||||
accessibleDirectories = dirs;
|
||||
}
|
||||
|
||||
bool directoryExists(const std::string path) override {
|
||||
if (std::find(accessibleDirectories.begin(), accessibleDirectories.end(), path) != accessibleDirectories.end()) {
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
std::map<std::string, uint64_t> accessibleNodes = {};
|
||||
std::vector<std::string> accessibleDirectories = {};
|
||||
};
|
||||
|
||||
class MockRasFabricSysFsAccess : public SysfsAccess {
|
||||
public:
|
||||
ze_result_t readSymLink(const std::string path, std::string &buf) override {
|
||||
return ZE_RESULT_ERROR_UNKNOWN;
|
||||
}
|
||||
|
||||
ze_result_t getRealPath(const std::string path, std::string &buf) override {
|
||||
buf.append("/mockRealPath");
|
||||
return mockRealPathStatus;
|
||||
}
|
||||
|
||||
ze_result_t mockRealPathStatus = ZE_RESULT_SUCCESS;
|
||||
};
|
||||
|
||||
struct MockMemoryManagerInRasSysman : public MemoryManagerMock {
|
||||
MockMemoryManagerInRasSysman(NEO::ExecutionEnvironment &executionEnvironment) : MemoryManagerMock(const_cast<NEO::ExecutionEnvironment &>(executionEnvironment)) {}
|
||||
};
|
||||
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
@@ -1,529 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2022-2023 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "level_zero/tools/test/unit_tests/sources/sysman/linux/mock_sysman_fixture.h"
|
||||
#include "level_zero/tools/test/unit_tests/sources/sysman/ras/linux/mock_fs_ras_fabric_prelim.h"
|
||||
|
||||
namespace L0 {
|
||||
namespace ult {
|
||||
constexpr uint32_t mockHandleCount = 2u;
|
||||
class TestRasFabricFixture : public SysmanDeviceFixture {
|
||||
protected:
|
||||
std::unique_ptr<MockRasFabricFsAccess> pFsAccess;
|
||||
std::unique_ptr<MockRasFabricSysFsAccess> pSysfsAccess;
|
||||
MemoryManager *pMemoryManagerOriginal = nullptr;
|
||||
std::unique_ptr<MockMemoryManagerInRasSysman> pMemoryManager;
|
||||
FsAccess *pFsAccessOriginal = nullptr;
|
||||
SysfsAccess *pSysfsAccessOriginal = nullptr;
|
||||
PmuInterface *pOriginalPmuInterface = nullptr;
|
||||
FirmwareUtil *pOriginalFwUtilInterface = nullptr;
|
||||
std::vector<ze_device_handle_t> deviceHandles;
|
||||
|
||||
void SetUp() override {
|
||||
|
||||
if (!sysmanUltsEnable) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
SysmanDeviceFixture::SetUp();
|
||||
pMemoryManagerOriginal = device->getDriverHandle()->getMemoryManager();
|
||||
pMemoryManager = std::make_unique<MockMemoryManagerInRasSysman>(*neoDevice->getExecutionEnvironment());
|
||||
pMemoryManager->localMemorySupported[0] = true;
|
||||
device->getDriverHandle()->setMemoryManager(pMemoryManager.get());
|
||||
pFsAccess = std::make_unique<MockRasFabricFsAccess>();
|
||||
pSysfsAccess = std::make_unique<MockRasFabricSysFsAccess>();
|
||||
pFsAccessOriginal = pLinuxSysmanImp->pFsAccess;
|
||||
pSysfsAccessOriginal = pLinuxSysmanImp->pSysfsAccess;
|
||||
pOriginalPmuInterface = pLinuxSysmanImp->pPmuInterface;
|
||||
pOriginalFwUtilInterface = pLinuxSysmanImp->pFwUtilInterface;
|
||||
pLinuxSysmanImp->pFsAccess = pFsAccess.get();
|
||||
pLinuxSysmanImp->pSysfsAccess = pSysfsAccess.get();
|
||||
pLinuxSysmanImp->pPmuInterface = nullptr;
|
||||
pLinuxSysmanImp->pFwUtilInterface = nullptr;
|
||||
for (const auto &handle : pSysmanDeviceImp->pRasHandleContext->handleList) {
|
||||
delete handle;
|
||||
}
|
||||
|
||||
pSysmanDeviceImp->pRasHandleContext->handleList.clear();
|
||||
uint32_t subDeviceCount = 0;
|
||||
Device::fromHandle(device->toHandle())->getSubDevices(&subDeviceCount, nullptr);
|
||||
if (subDeviceCount == 0) {
|
||||
deviceHandles.resize(1, device->toHandle());
|
||||
} else {
|
||||
deviceHandles.resize(subDeviceCount, nullptr);
|
||||
Device::fromHandle(device->toHandle())->getSubDevices(&subDeviceCount, deviceHandles.data());
|
||||
}
|
||||
}
|
||||
|
||||
void TearDown() override {
|
||||
if (!sysmanUltsEnable) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
device->getDriverHandle()->setMemoryManager(pMemoryManagerOriginal);
|
||||
pLinuxSysmanImp->pFsAccess = pFsAccessOriginal;
|
||||
pLinuxSysmanImp->pSysfsAccess = pSysfsAccessOriginal;
|
||||
pLinuxSysmanImp->pPmuInterface = pOriginalPmuInterface;
|
||||
pLinuxSysmanImp->pFwUtilInterface = pOriginalFwUtilInterface;
|
||||
SysmanDeviceFixture::TearDown();
|
||||
}
|
||||
|
||||
std::vector<zes_ras_handle_t> getRasHandles(uint32_t count) {
|
||||
std::vector<zes_ras_handle_t> handles(count, nullptr);
|
||||
EXPECT_EQ(zesDeviceEnumRasErrorSets(device->toHandle(), &count, handles.data()), ZE_RESULT_SUCCESS);
|
||||
return handles;
|
||||
}
|
||||
};
|
||||
|
||||
TEST_F(TestRasFabricFixture, GivenValidRasFabricNodesThenGetStateIsSuccessful) {
|
||||
|
||||
std::vector<std::string> dirs = {"/mockRealPath/iaf.0",
|
||||
"/sys/module/iaf/drivers/platform:iaf/"};
|
||||
std::map<std::string, uint64_t> nodes = {
|
||||
{"/mockRealPath/iaf.0/sd.0/fw_comm_errors", 101},
|
||||
{"/mockRealPath/iaf.0/sd.0/sd_failure", 201},
|
||||
{"/mockRealPath/iaf.0/sd.0/fw_error", 301},
|
||||
{"/mockRealPath/iaf.0/sd.0/port.1/link_failures", 401},
|
||||
{"/mockRealPath/iaf.0/sd.0/port.1/link_degrades", 501},
|
||||
{"/mockRealPath/iaf.0/sd.0/port.2/link_failures", 601},
|
||||
{"/mockRealPath/iaf.0/sd.0/port.2/link_degrades", 701},
|
||||
{"/mockRealPath/iaf.0/sd.0/port.3/link_failures", 801},
|
||||
{"/mockRealPath/iaf.0/sd.0/port.3/link_degrades", 901},
|
||||
{"/mockRealPath/iaf.0/sd.0/port.4/link_failures", 1001},
|
||||
{"/mockRealPath/iaf.0/sd.0/port.4/link_degrades", 1101},
|
||||
{"/mockRealPath/iaf.0/sd.0/port.5/link_failures", 2101},
|
||||
{"/mockRealPath/iaf.0/sd.0/port.5/link_degrades", 3101},
|
||||
{"/mockRealPath/iaf.0/sd.0/port.6/link_failures", 4101},
|
||||
{"/mockRealPath/iaf.0/sd.0/port.6/link_degrades", 5101},
|
||||
{"/mockRealPath/iaf.0/sd.0/port.7/link_failures", 6101},
|
||||
{"/mockRealPath/iaf.0/sd.0/port.7/link_degrades", 7101},
|
||||
{"/mockRealPath/iaf.0/sd.0/port.8/link_failures", 8101},
|
||||
{"/mockRealPath/iaf.0/sd.0/port.8/link_degrades", 9101},
|
||||
};
|
||||
static_cast<MockRasFabricFsAccess *>(pFsAccess.get())->setAccessibleDirectories(dirs);
|
||||
static_cast<MockRasFabricFsAccess *>(pFsAccess.get())->setAccessibleNodes(nodes);
|
||||
|
||||
uint32_t count = 0;
|
||||
ze_result_t result = zesDeviceEnumRasErrorSets(device->toHandle(), &count, NULL);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
EXPECT_EQ(count, mockHandleCount);
|
||||
|
||||
uint32_t testcount = count + 1;
|
||||
result = zesDeviceEnumRasErrorSets(device->toHandle(), &testcount, NULL);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
EXPECT_EQ(testcount, mockHandleCount);
|
||||
auto handles = getRasHandles(mockHandleCount);
|
||||
for (auto handle : handles) {
|
||||
EXPECT_NE(handle, nullptr);
|
||||
zes_ras_state_t state = {};
|
||||
zes_ras_properties_t properties = {};
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, zesRasGetState(handle, 0, &state));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, zesRasGetProperties(handle, &properties));
|
||||
|
||||
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_CACHE_ERRORS], 0u);
|
||||
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_RESET], 0u);
|
||||
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_PROGRAMMING_ERRORS], 0u);
|
||||
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_NON_COMPUTE_ERRORS], 0u);
|
||||
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_DISPLAY_ERRORS], 0u);
|
||||
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_DRIVER_ERRORS], 0u);
|
||||
|
||||
if (properties.type == ZES_RAS_ERROR_TYPE_CORRECTABLE) {
|
||||
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_COMPUTE_ERRORS], 27709u);
|
||||
}
|
||||
if (properties.type == ZES_RAS_ERROR_TYPE_UNCORRECTABLE) {
|
||||
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_COMPUTE_ERRORS], 23710u);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TestRasFabricFixture, GivenInValidRasFabricNodesThenEnumerationDoesNotReturnAnyHandles) {
|
||||
|
||||
pSysfsAccess->mockRealPathStatus = ZE_RESULT_ERROR_UNKNOWN;
|
||||
uint32_t count = 0;
|
||||
EXPECT_EQ(zesDeviceEnumRasErrorSets(device->toHandle(), &count, NULL), ZE_RESULT_SUCCESS);
|
||||
EXPECT_EQ(count, 0u);
|
||||
}
|
||||
|
||||
TEST_F(TestRasFabricFixture, GivenValidRasFabricAuxiliaryNodesThenGetStateIsSuccessful) {
|
||||
|
||||
std::vector<std::string> dirs = {"/mockRealPath/i915.iaf.0",
|
||||
"/sys/module/iaf/drivers/auxiliary:iaf/"};
|
||||
std::map<std::string, uint64_t> nodes = {
|
||||
{"/mockRealPath/i915.iaf.0/sd.0/fw_comm_errors", 101},
|
||||
{"/mockRealPath/i915.iaf.0/sd.0/sd_failure", 201},
|
||||
{"/mockRealPath/i915.iaf.0/sd.0/fw_error", 301},
|
||||
{"/mockRealPath/i915.iaf.0/sd.0/port.1/link_failures", 401},
|
||||
{"/mockRealPath/i915.iaf.0/sd.0/port.1/link_degrades", 501},
|
||||
};
|
||||
static_cast<MockRasFabricFsAccess *>(pFsAccess.get())->setAccessibleDirectories(dirs);
|
||||
static_cast<MockRasFabricFsAccess *>(pFsAccess.get())->setAccessibleNodes(nodes);
|
||||
|
||||
uint32_t count = 0;
|
||||
ze_result_t result = zesDeviceEnumRasErrorSets(device->toHandle(), &count, NULL);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
EXPECT_EQ(count, mockHandleCount);
|
||||
|
||||
uint32_t testcount = count + 1;
|
||||
result = zesDeviceEnumRasErrorSets(device->toHandle(), &testcount, NULL);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
EXPECT_EQ(testcount, mockHandleCount);
|
||||
auto handles = getRasHandles(mockHandleCount);
|
||||
for (auto handle : handles) {
|
||||
EXPECT_NE(handle, nullptr);
|
||||
zes_ras_state_t state = {};
|
||||
zes_ras_properties_t properties = {};
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, zesRasGetState(handle, 0, &state));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, zesRasGetProperties(handle, &properties));
|
||||
|
||||
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_CACHE_ERRORS], 0u);
|
||||
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_RESET], 0u);
|
||||
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_PROGRAMMING_ERRORS], 0u);
|
||||
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_NON_COMPUTE_ERRORS], 0u);
|
||||
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_DISPLAY_ERRORS], 0u);
|
||||
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_DRIVER_ERRORS], 0u);
|
||||
|
||||
if (properties.type == ZES_RAS_ERROR_TYPE_CORRECTABLE) {
|
||||
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_COMPUTE_ERRORS], 602u);
|
||||
}
|
||||
if (properties.type == ZES_RAS_ERROR_TYPE_UNCORRECTABLE) {
|
||||
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_COMPUTE_ERRORS], 903u);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TestRasFabricFixture, GivenSomeRasFabricNodesThenGetStateIsSuccessful) {
|
||||
|
||||
std::vector<std::string> dirs = {"/mockRealPath/iaf.31",
|
||||
"/sys/module/iaf/drivers/platform:iaf/"};
|
||||
std::map<std::string, uint64_t> nodes = {
|
||||
{"/mockRealPath/iaf.31/sd.0/fw_comm_errors", 101},
|
||||
{"/mockRealPath/iaf.31/sd.0/sd_failure", 201},
|
||||
{"/mockRealPath/iaf.31/sd.0/fw_error", 301},
|
||||
{"/mockRealPath/iaf.31/sd.0/port.1/link_failures", 401},
|
||||
{"/mockRealPath/iaf.31/sd.0/port.2/link_failures", 601},
|
||||
{"/mockRealPath/iaf.31/sd.0/port.2/link_degrades", 701},
|
||||
{"/mockRealPath/iaf.31/sd.0/port.3/link_failures", 801},
|
||||
{"/mockRealPath/iaf.31/sd.0/port.3/link_degrades", 901},
|
||||
{"/mockRealPath/iaf.31/sd.0/port.4/link_failures", 1001},
|
||||
{"/mockRealPath/iaf.31/sd.0/port.4/link_degrades", 1101},
|
||||
{"/mockRealPath/iaf.31/sd.0/port.5/link_failures", 2101},
|
||||
{"/mockRealPath/iaf.31/sd.0/port.5/link_degrades", 3101},
|
||||
{"/mockRealPath/iaf.31/sd.0/port.6/link_failures", 4101},
|
||||
{"/mockRealPath/iaf.31/sd.0/port.6/link_degrades", 5101},
|
||||
{"/mockRealPath/iaf.31/sd.0/port.7/link_failures", 6101},
|
||||
{"/mockRealPath/iaf.31/sd.0/port.7/link_degrades", 7101},
|
||||
{"/mockRealPath/iaf.31/sd.0/port.8/link_degrades", 9101},
|
||||
};
|
||||
static_cast<MockRasFabricFsAccess *>(pFsAccess.get())->setAccessibleDirectories(dirs);
|
||||
static_cast<MockRasFabricFsAccess *>(pFsAccess.get())->setAccessibleNodes(nodes);
|
||||
|
||||
uint32_t count = 0;
|
||||
ze_result_t result = zesDeviceEnumRasErrorSets(device->toHandle(), &count, NULL);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
EXPECT_EQ(count, mockHandleCount);
|
||||
|
||||
uint32_t testcount = count + 1;
|
||||
result = zesDeviceEnumRasErrorSets(device->toHandle(), &testcount, NULL);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
EXPECT_EQ(testcount, mockHandleCount);
|
||||
auto handles = getRasHandles(mockHandleCount);
|
||||
for (auto handle : handles) {
|
||||
EXPECT_NE(handle, nullptr);
|
||||
zes_ras_state_t state = {};
|
||||
zes_ras_properties_t properties = {};
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, zesRasGetState(handle, 0, &state));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, zesRasGetProperties(handle, &properties));
|
||||
|
||||
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_CACHE_ERRORS], 0u);
|
||||
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_RESET], 0u);
|
||||
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_PROGRAMMING_ERRORS], 0u);
|
||||
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_NON_COMPUTE_ERRORS], 0u);
|
||||
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_DISPLAY_ERRORS], 0u);
|
||||
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_DRIVER_ERRORS], 0u);
|
||||
|
||||
if (properties.type == ZES_RAS_ERROR_TYPE_CORRECTABLE) {
|
||||
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_COMPUTE_ERRORS], 27709u - 501u);
|
||||
}
|
||||
if (properties.type == ZES_RAS_ERROR_TYPE_UNCORRECTABLE) {
|
||||
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_COMPUTE_ERRORS], 23710u - 8101u);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TestRasFabricFixture, GivenValidRasFabricNodesWhenGetStateIsCalledTwiceThenRasErrorCountIsDoubled) {
|
||||
|
||||
std::vector<std::string> dirs = {"/mockRealPath/iaf.27",
|
||||
"/sys/module/iaf/drivers/platform:iaf/"};
|
||||
std::map<std::string, uint64_t> nodes = {
|
||||
{"/mockRealPath/iaf.27/sd.0/fw_comm_errors", 101},
|
||||
{"/mockRealPath/iaf.27/sd.0/sd_failure", 201},
|
||||
{"/mockRealPath/iaf.27/sd.0/fw_error", 301},
|
||||
{"/mockRealPath/iaf.27/sd.0/port.1/link_failures", 401},
|
||||
{"/mockRealPath/iaf.27/sd.0/port.1/link_degrades", 501},
|
||||
};
|
||||
std::map<std::string, uint64_t> nodesSecondRead = {
|
||||
{"/mockRealPath/iaf.27/sd.0/fw_comm_errors", 101 * 2},
|
||||
{"/mockRealPath/iaf.27/sd.0/sd_failure", 201 * 2},
|
||||
{"/mockRealPath/iaf.27/sd.0/fw_error", 301 * 2},
|
||||
{"/mockRealPath/iaf.27/sd.0/port.1/link_failures", 401 * 2},
|
||||
{"/mockRealPath/iaf.27/sd.0/port.1/link_degrades", 501 * 2},
|
||||
};
|
||||
static_cast<MockRasFabricFsAccess *>(pFsAccess.get())->setAccessibleDirectories(dirs);
|
||||
static_cast<MockRasFabricFsAccess *>(pFsAccess.get())->setAccessibleNodes(nodes);
|
||||
|
||||
uint32_t count = 0;
|
||||
ze_result_t result = zesDeviceEnumRasErrorSets(device->toHandle(), &count, NULL);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
EXPECT_EQ(count, mockHandleCount);
|
||||
|
||||
uint32_t testcount = count + 1;
|
||||
result = zesDeviceEnumRasErrorSets(device->toHandle(), &testcount, NULL);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
EXPECT_EQ(testcount, mockHandleCount);
|
||||
auto handles = getRasHandles(mockHandleCount);
|
||||
for (auto handle : handles) {
|
||||
EXPECT_NE(handle, nullptr);
|
||||
zes_ras_state_t state = {};
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, zesRasGetState(handle, 0, &state));
|
||||
}
|
||||
static_cast<MockRasFabricFsAccess *>(pFsAccess.get())->setAccessibleNodes(nodesSecondRead);
|
||||
|
||||
for (auto handle : handles) {
|
||||
EXPECT_NE(handle, nullptr);
|
||||
zes_ras_state_t state = {};
|
||||
zes_ras_properties_t properties = {};
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, zesRasGetState(handle, 0, &state));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, zesRasGetProperties(handle, &properties));
|
||||
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_CACHE_ERRORS], 0u);
|
||||
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_RESET], 0u);
|
||||
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_PROGRAMMING_ERRORS], 0u);
|
||||
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_NON_COMPUTE_ERRORS], 0u);
|
||||
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_DISPLAY_ERRORS], 0u);
|
||||
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_DRIVER_ERRORS], 0u);
|
||||
|
||||
if (properties.type == ZES_RAS_ERROR_TYPE_CORRECTABLE) {
|
||||
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_COMPUTE_ERRORS], 602u * 2);
|
||||
}
|
||||
if (properties.type == ZES_RAS_ERROR_TYPE_UNCORRECTABLE) {
|
||||
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_COMPUTE_ERRORS], 903u * 2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TestRasFabricFixture, GivenValidRasFabricNodesWhenGetStateIsCalledTwiceWithClearThenNewRasErrorCountIsRetrieved) {
|
||||
|
||||
std::vector<std::string> dirs = {"/mockRealPath/iaf.27",
|
||||
"/sys/module/iaf/drivers/platform:iaf/"};
|
||||
std::map<std::string, uint64_t> nodes = {
|
||||
{"/mockRealPath/iaf.27/sd.0/fw_comm_errors", 101},
|
||||
{"/mockRealPath/iaf.27/sd.0/sd_failure", 201},
|
||||
{"/mockRealPath/iaf.27/sd.0/fw_error", 301},
|
||||
{"/mockRealPath/iaf.27/sd.0/port.1/link_failures", 401},
|
||||
{"/mockRealPath/iaf.27/sd.0/port.1/link_degrades", 501},
|
||||
};
|
||||
std::map<std::string, uint64_t> nodesSecondRead = {
|
||||
{"/mockRealPath/iaf.27/sd.0/fw_comm_errors", 101 * 2},
|
||||
{"/mockRealPath/iaf.27/sd.0/sd_failure", 201 * 2},
|
||||
{"/mockRealPath/iaf.27/sd.0/fw_error", 301 * 2},
|
||||
{"/mockRealPath/iaf.27/sd.0/port.1/link_failures", 401 * 2},
|
||||
{"/mockRealPath/iaf.27/sd.0/port.1/link_degrades", 501 * 2},
|
||||
};
|
||||
static_cast<MockRasFabricFsAccess *>(pFsAccess.get())->setAccessibleDirectories(dirs);
|
||||
static_cast<MockRasFabricFsAccess *>(pFsAccess.get())->setAccessibleNodes(nodes);
|
||||
|
||||
uint32_t count = 0;
|
||||
ze_result_t result = zesDeviceEnumRasErrorSets(device->toHandle(), &count, NULL);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
EXPECT_EQ(count, mockHandleCount);
|
||||
|
||||
uint32_t testcount = count + 1;
|
||||
result = zesDeviceEnumRasErrorSets(device->toHandle(), &testcount, NULL);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
EXPECT_EQ(testcount, mockHandleCount);
|
||||
auto handles = getRasHandles(mockHandleCount);
|
||||
for (auto handle : handles) {
|
||||
EXPECT_NE(handle, nullptr);
|
||||
zes_ras_state_t state = {};
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, zesRasGetState(handle, 1, &state));
|
||||
}
|
||||
static_cast<MockRasFabricFsAccess *>(pFsAccess.get())->setAccessibleNodes(nodesSecondRead);
|
||||
|
||||
for (auto handle : handles) {
|
||||
EXPECT_NE(handle, nullptr);
|
||||
zes_ras_state_t state = {};
|
||||
zes_ras_properties_t properties = {};
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, zesRasGetState(handle, 0, &state));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, zesRasGetProperties(handle, &properties));
|
||||
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_CACHE_ERRORS], 0u);
|
||||
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_RESET], 0u);
|
||||
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_PROGRAMMING_ERRORS], 0u);
|
||||
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_NON_COMPUTE_ERRORS], 0u);
|
||||
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_DISPLAY_ERRORS], 0u);
|
||||
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_DRIVER_ERRORS], 0u);
|
||||
|
||||
if (properties.type == ZES_RAS_ERROR_TYPE_CORRECTABLE) {
|
||||
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_COMPUTE_ERRORS], 602u);
|
||||
}
|
||||
if (properties.type == ZES_RAS_ERROR_TYPE_UNCORRECTABLE) {
|
||||
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_COMPUTE_ERRORS], 903u);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
class SysmanRasFabricMultiDeviceFixture : public MultiDeviceFixture, public ::testing::Test {
|
||||
public:
|
||||
void SetUp() override {
|
||||
if (!sysmanUltsEnable) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
MultiDeviceFixture::setUp();
|
||||
for (auto &device : driverHandle->devices) {
|
||||
auto neoDevice = device->getNEODevice();
|
||||
neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]->osInterface = std::make_unique<NEO::OSInterface>();
|
||||
auto &osInterface = device->getOsInterface();
|
||||
osInterface.setDriverModel(std::make_unique<SysmanMockDrm>(const_cast<NEO::RootDeviceEnvironment &>(neoDevice->getRootDeviceEnvironment())));
|
||||
setenv("ZES_ENABLE_SYSMAN", "1", 1);
|
||||
delete device->getSysmanHandle();
|
||||
device->setSysmanHandle(new SysmanDeviceImp(device->toHandle()));
|
||||
auto pSysmanDevice = device->getSysmanHandle();
|
||||
for (auto &subDevice : static_cast<DeviceImp *>(device)->subDevices) {
|
||||
static_cast<DeviceImp *>(subDevice)->setSysmanHandle(pSysmanDevice);
|
||||
}
|
||||
|
||||
auto pSysmanDeviceImp = static_cast<SysmanDeviceImp *>(pSysmanDevice);
|
||||
auto pOsSysman = pSysmanDeviceImp->pOsSysman;
|
||||
auto pLinuxSysmanImp = static_cast<PublicLinuxSysmanImp *>(pOsSysman);
|
||||
|
||||
pSysmanDeviceImp->init();
|
||||
|
||||
delete pLinuxSysmanImp->pFwUtilInterface;
|
||||
delete pLinuxSysmanImp->pSysfsAccess;
|
||||
delete pLinuxSysmanImp->pProcfsAccess;
|
||||
delete pLinuxSysmanImp->pFsAccess;
|
||||
|
||||
auto pProcfsAccess = new MockLinuxProcfsAccess();
|
||||
auto pFsAccess = new MockRasFabricFsAccess();
|
||||
auto pSysfsAccess = new MockRasFabricSysFsAccess();
|
||||
|
||||
pLinuxSysmanImp->pFwUtilInterface = nullptr;
|
||||
pLinuxSysmanImp->pSysfsAccess = pSysfsAccess;
|
||||
pLinuxSysmanImp->pProcfsAccess = pProcfsAccess;
|
||||
pLinuxSysmanImp->pFsAccess = pFsAccess;
|
||||
}
|
||||
}
|
||||
void TearDown() override {
|
||||
if (!sysmanUltsEnable) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
|
||||
for (auto &device : driverHandle->devices) {
|
||||
auto pSysmanDevice = device->getSysmanHandle();
|
||||
auto pSysmanDeviceImp = static_cast<SysmanDeviceImp *>(pSysmanDevice);
|
||||
auto pOsSysman = pSysmanDeviceImp->pOsSysman;
|
||||
auto pLinuxSysmanImp = static_cast<PublicLinuxSysmanImp *>(pOsSysman);
|
||||
|
||||
delete pLinuxSysmanImp->pSysfsAccess;
|
||||
delete pLinuxSysmanImp->pProcfsAccess;
|
||||
delete pLinuxSysmanImp->pFsAccess;
|
||||
|
||||
pLinuxSysmanImp->pFwUtilInterface = nullptr;
|
||||
pLinuxSysmanImp->pSysfsAccess = nullptr;
|
||||
pLinuxSysmanImp->pProcfsAccess = nullptr;
|
||||
pLinuxSysmanImp->pFsAccess = nullptr;
|
||||
|
||||
delete pSysmanDevice;
|
||||
device->setSysmanHandle(nullptr);
|
||||
}
|
||||
|
||||
unsetenv("ZES_ENABLE_SYSMAN");
|
||||
MultiDeviceFixture::tearDown();
|
||||
}
|
||||
};
|
||||
|
||||
TEST_F(SysmanRasFabricMultiDeviceFixture, GivenValidRasFabricNodesForMultipleDevicesThenGetStateReturnsErrorCountSpecificToEachOfDevice) {
|
||||
|
||||
const uint32_t testUseSubDeviceCount = 2u;
|
||||
ASSERT_GE(numRootDevices, 2u);
|
||||
ASSERT_GE(numSubDevices, testUseSubDeviceCount);
|
||||
|
||||
std::vector<std::string> dirs = {"/mockRealPath/iaf.27",
|
||||
"/sys/module/iaf/drivers/platform:iaf/"};
|
||||
{
|
||||
std::map<std::string, uint64_t> nodes = {
|
||||
{"/mockRealPath/iaf.27/sd.0/fw_comm_errors", 1},
|
||||
{"/mockRealPath/iaf.27/sd.0/sd_failure", 1},
|
||||
{"/mockRealPath/iaf.27/sd.0/fw_error", 1},
|
||||
{"/mockRealPath/iaf.27/sd.0/port.1/link_failures", 1},
|
||||
{"/mockRealPath/iaf.27/sd.0/port.1/link_degrades", 1},
|
||||
|
||||
{"/mockRealPath/iaf.27/sd.1/fw_comm_errors", 2},
|
||||
{"/mockRealPath/iaf.27/sd.1/sd_failure", 2},
|
||||
{"/mockRealPath/iaf.27/sd.1/fw_error", 2},
|
||||
{"/mockRealPath/iaf.27/sd.1/port.1/link_failures", 2},
|
||||
{"/mockRealPath/iaf.27/sd.1/port.1/link_degrades", 2},
|
||||
};
|
||||
|
||||
auto pOsSysman = static_cast<SysmanDeviceImp *>(driverHandle->devices[0]->getSysmanHandle())->pOsSysman;
|
||||
auto pLinuxSysmanImp = static_cast<PublicLinuxSysmanImp *>(pOsSysman);
|
||||
|
||||
static_cast<MockRasFabricFsAccess *>(pLinuxSysmanImp->pFsAccess)->setAccessibleDirectories(dirs);
|
||||
static_cast<MockRasFabricFsAccess *>(pLinuxSysmanImp->pFsAccess)->setAccessibleNodes(nodes);
|
||||
}
|
||||
|
||||
{
|
||||
std::map<std::string, uint64_t> nodes = {
|
||||
{"/mockRealPath/iaf.27/sd.0/fw_comm_errors", 3},
|
||||
{"/mockRealPath/iaf.27/sd.0/sd_failure", 3},
|
||||
{"/mockRealPath/iaf.27/sd.0/fw_error", 3},
|
||||
{"/mockRealPath/iaf.27/sd.0/port.1/link_failures", 3},
|
||||
{"/mockRealPath/iaf.27/sd.0/port.1/link_degrades", 3},
|
||||
|
||||
{"/mockRealPath/iaf.27/sd.1/fw_comm_errors", 4},
|
||||
{"/mockRealPath/iaf.27/sd.1/sd_failure", 4},
|
||||
{"/mockRealPath/iaf.27/sd.1/fw_error", 4},
|
||||
{"/mockRealPath/iaf.27/sd.1/port.1/link_failures", 4},
|
||||
{"/mockRealPath/iaf.27/sd.1/port.1/link_degrades", 4},
|
||||
};
|
||||
|
||||
auto pOsSysman = static_cast<SysmanDeviceImp *>(driverHandle->devices[1]->getSysmanHandle())->pOsSysman;
|
||||
auto pLinuxSysmanImp = static_cast<PublicLinuxSysmanImp *>(pOsSysman);
|
||||
|
||||
static_cast<MockRasFabricFsAccess *>(pLinuxSysmanImp->pFsAccess)->setAccessibleDirectories(dirs);
|
||||
static_cast<MockRasFabricFsAccess *>(pLinuxSysmanImp->pFsAccess)->setAccessibleNodes(nodes);
|
||||
}
|
||||
|
||||
const std::vector<std::pair<uint32_t, uint32_t>> errorCounts{
|
||||
{2, 3}, // Device 0, subdevice 0
|
||||
{4, 6}, // Device 0, subdevice 1
|
||||
{6, 9}, // Device 1, subdevice 0
|
||||
{8, 12}, // Device 1, subdevice 1
|
||||
};
|
||||
|
||||
for (uint32_t deviceIndex = 0; deviceIndex < testUseSubDeviceCount; deviceIndex++) {
|
||||
uint32_t count = 0;
|
||||
auto hDevice = driverHandle->devices[deviceIndex]->toHandle();
|
||||
EXPECT_EQ(zesDeviceEnumRasErrorSets(hDevice, &count, NULL), ZE_RESULT_SUCCESS);
|
||||
EXPECT_GT(count, 0u);
|
||||
std::vector<zes_ras_handle_t> handles(count, nullptr);
|
||||
EXPECT_EQ(zesDeviceEnumRasErrorSets(hDevice, &count, handles.data()), ZE_RESULT_SUCCESS);
|
||||
for (auto handle : handles) {
|
||||
zes_ras_state_t state = {};
|
||||
zes_ras_properties_t properties = {};
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, zesRasGetState(handle, 0, &state));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, zesRasGetProperties(handle, &properties));
|
||||
|
||||
const auto accessIndex = deviceIndex * testUseSubDeviceCount + properties.subdeviceId;
|
||||
if (properties.type == ZES_RAS_ERROR_TYPE_CORRECTABLE) {
|
||||
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_COMPUTE_ERRORS], errorCounts[accessIndex].first);
|
||||
}
|
||||
if (properties.type == ZES_RAS_ERROR_TYPE_UNCORRECTABLE) {
|
||||
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_COMPUTE_ERRORS], errorCounts[accessIndex].second);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
Reference in New Issue
Block a user