Add support for sysman zesFabricPortGetFabricErrorCounters API

Related-To: LOCI-3398

Signed-off-by: Mayank Raghuwanshi <mayank.raghuwanshi@intel.com>
This commit is contained in:
Mayank Raghuwanshi
2023-01-30 16:07:51 +00:00
committed by Compute-Runtime-Automation
parent 48ed9f9c92
commit 07d3353b1f
21 changed files with 571 additions and 784 deletions

View File

@@ -760,7 +760,7 @@ ze_result_t zesOverclockSetVFPointValues(
ze_result_t zesFabricPortGetFabricErrorCounters(
zes_fabric_port_handle_t hPort,
zes_fabric_port_error_counters_t *pErrors) {
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
return L0::FabricPort::fromHandle(hPort)->fabricPortGetErrorCounters(pErrors);
}
ze_result_t zesInit(

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2020-2022 Intel Corporation
* Copyright (C) 2020-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -38,6 +38,7 @@ class FabricPort : _zes_fabric_port_handle_t {
virtual ze_result_t fabricPortSetConfig(const zes_fabric_port_config_t *pConfig) = 0;
virtual ze_result_t fabricPortGetState(zes_fabric_port_state_t *pState) = 0;
virtual ze_result_t fabricPortGetThroughput(zes_fabric_port_throughput_t *pThroughput) = 0;
virtual ze_result_t fabricPortGetErrorCounters(zes_fabric_port_error_counters_t *pErrors) = 0;
inline zes_fabric_port_handle_t toZesHandle() { return this; }

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2020 Intel Corporation
* Copyright (C) 2020-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -53,6 +53,10 @@ ze_result_t FabricPortImp::fabricPortGetState(zes_fabric_port_state_t *pState) {
return pOsFabricPort->getState(pState);
}
ze_result_t FabricPortImp::fabricPortGetErrorCounters(zes_fabric_port_error_counters_t *pErrors) {
return pOsFabricPort->getErrorCounters(pErrors);
}
ze_result_t FabricPortImp::fabricPortGetThroughput(zes_fabric_port_throughput_t *pThroughput) {
fabricPortGetTimestamp(pThroughput->timestamp);
return pOsFabricPort->getThroughput(pThroughput);

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2020 Intel Corporation
* Copyright (C) 2020-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -35,6 +35,7 @@ class FabricPortImp : public FabricPort, NEO::NonCopyableOrMovableClass {
ze_result_t fabricPortSetConfig(const zes_fabric_port_config_t *pConfig) override;
ze_result_t fabricPortGetState(zes_fabric_port_state_t *pState) override;
ze_result_t fabricPortGetThroughput(zes_fabric_port_throughput_t *pThroughput) override;
ze_result_t fabricPortGetErrorCounters(zes_fabric_port_error_counters_t *pErrors) override;
FabricPortImp() = delete;
FabricPortImp(FabricDevice *pFabricDevice, uint32_t portNum);

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2020 Intel Corporation
* Copyright (C) 2020-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -56,6 +56,10 @@ ze_result_t LinuxFabricPortImp::getThroughput(zes_fabric_port_throughput_t *pThr
return ZE_RESULT_SUCCESS;
}
ze_result_t LinuxFabricPortImp::getErrorCounters(zes_fabric_port_error_counters_t *pErrors) {
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
ze_result_t LinuxFabricPortImp::getProperties(zes_fabric_port_properties_t *pProperties) {
::snprintf(pProperties->model, ZES_MAX_FABRIC_PORT_MODEL_SIZE, "%s", this->model.c_str());
pProperties->onSubdevice = false;

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2020 Intel Corporation
* Copyright (C) 2020-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -34,6 +34,7 @@ class LinuxFabricPortImp : public OsFabricPort, NEO::NonCopyableOrMovableClass {
ze_result_t setConfig(const zes_fabric_port_config_t *pConfig) override;
ze_result_t getState(zes_fabric_port_state_t *pState) override;
ze_result_t getThroughput(zes_fabric_port_throughput_t *pThroughput) override;
ze_result_t getErrorCounters(zes_fabric_port_error_counters_t *pErrors) override;
LinuxFabricPortImp() = delete;
LinuxFabricPortImp(OsFabricDevice *pOsFabricDevice, uint32_t portNum);

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2022 Intel Corporation
* Copyright (C) 2022-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -7,8 +7,11 @@
#include "os_fabric_port_imp_prelim.h"
#include "shared/source/debug_settings/debug_settings_manager.h"
#include "shared/source/helpers/debug_helpers.h"
#include "sysman/linux/os_sysman_imp.h"
#include <cstdio>
namespace L0 {
@@ -36,6 +39,78 @@ ze_result_t LinuxFabricDeviceImp::getThroughput(const zes_fabric_port_id_t portI
return pFabricDeviceAccess->getThroughput(portId, *pThroughput);
}
ze_result_t LinuxFabricDeviceImp::getErrorCounters(const zes_fabric_port_id_t portId, zes_fabric_port_error_counters_t *pErrors) {
FsAccess *pFsAccess = &pLinuxSysmanImp->getFsAccess();
SysfsAccess *pSysfsAccess = &pLinuxSysmanImp->getSysfsAccess();
std::string devicePciPath("");
ze_result_t result = pSysfsAccess->getRealPath("device/", devicePciPath);
if (result != ZE_RESULT_SUCCESS) {
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr,
"error@<%s> <failed to get device path> <result: 0x%x>\n", __func__, result);
return result;
}
std::string path("");
std::vector<std::string> list;
result = pFsAccess->listDirectory(devicePciPath, list);
if (result != ZE_RESULT_SUCCESS) {
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr,
"error@<%s> <failed to get list of files in device directory> <result: 0x%x>\n", __func__, result);
return result;
}
for (auto entry : list) {
if ((entry.find("i915.iaf.") != std::string::npos) ||
(entry.find("iaf.") != std::string::npos)) {
path = devicePciPath + "/" + entry;
break;
}
}
if (path.empty()) {
// This device does not have a fabric
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr,
"error@<%s> <Device does not have fabric>\n", __func__);
return ZE_RESULT_ERROR_NOT_AVAILABLE;
}
std::string fabricFwErrorPath = path + "/sd." + std::to_string(portId.attachId);
std::string fabricLinkErrorPath = path + "/sd." + std::to_string(portId.attachId) + "/port." + std::to_string(portId.portNumber);
uint64_t linkErrorCount = 0;
std::string linkFailureFile = fabricLinkErrorPath + "/link_failures";
result = pFsAccess->read(linkFailureFile, linkErrorCount);
if (result != ZE_RESULT_SUCCESS) {
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr,
"error@<%s> <failed to read file %s> <result: 0x%x>\n", __func__, linkFailureFile.c_str(), result);
linkErrorCount = 0;
}
uint64_t linkDegradeCount = 0;
std::string linkDegradeFile = fabricLinkErrorPath + "/link_degrades";
result = pFsAccess->read(linkDegradeFile, linkDegradeCount);
if (result != ZE_RESULT_SUCCESS) {
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr,
"error@<%s> <failed to read file %s> <result: 0x%x>\n", __func__, linkDegradeFile.c_str(), result);
linkDegradeCount = 0;
}
uint64_t fwErrorCount = 0;
std::string fwErrorFile = fabricFwErrorPath + "/fw_error";
result = pFsAccess->read(fwErrorFile, fwErrorCount);
if (result != ZE_RESULT_SUCCESS) {
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr,
"error@<%s> <failed to read file %s> <result: 0x%x>\n", __func__, fwErrorFile.c_str(), result);
fwErrorCount = 0;
}
uint64_t fwCommErrorCount = 0;
std::string fwCommErrorFile = fabricFwErrorPath + "/fw_comm_errors";
result = pFsAccess->read(fwCommErrorFile, fwCommErrorCount);
if (result != ZE_RESULT_SUCCESS) {
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr,
"error@<%s> <failed to read file %s> <result: 0x%x>\n", __func__, fwCommErrorFile.c_str(), result);
fwCommErrorCount = 0;
}
pErrors->linkFailureCount = linkErrorCount;
pErrors->linkDegradeCount = linkDegradeCount;
pErrors->fwErrorCount = fwErrorCount;
pErrors->fwCommErrorCount = fwCommErrorCount;
return ZE_RESULT_SUCCESS;
}
ze_result_t LinuxFabricDeviceImp::performSweep() {
uint32_t start = 0U;
uint32_t end = 0U;
@@ -143,6 +218,7 @@ ze_result_t LinuxFabricDeviceImp::routingQuery(uint32_t &start, uint32_t &end) {
LinuxFabricDeviceImp::LinuxFabricDeviceImp(OsSysman *pOsSysman) {
pFabricDeviceAccess = FabricDeviceAccess::create(pOsSysman);
UNRECOVERABLE_IF(nullptr == pFabricDeviceAccess);
pLinuxSysmanImp = static_cast<LinuxSysmanImp *>(pOsSysman);
}
LinuxFabricDeviceImp::~LinuxFabricDeviceImp() {
@@ -206,6 +282,10 @@ ze_result_t LinuxFabricPortImp::getThroughput(zes_fabric_port_throughput_t *pThr
return pLinuxFabricDeviceImp->getThroughput(portId, pThroughput);
}
ze_result_t LinuxFabricPortImp::getErrorCounters(zes_fabric_port_error_counters_t *pErrors) {
return pLinuxFabricDeviceImp->getErrorCounters(portId, pErrors);
}
ze_result_t LinuxFabricPortImp::getProperties(zes_fabric_port_properties_t *pProperties) {
::snprintf(pProperties->model, ZES_MAX_FABRIC_PORT_MODEL_SIZE, "%s", this->model.c_str());
pProperties->onSubdevice = this->onSubdevice;

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2022 Intel Corporation
* Copyright (C) 2022-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -15,7 +15,7 @@
#include <vector>
namespace L0 {
class LinuxSysmanImp;
class LinuxFabricDeviceImp : public OsFabricDevice, NEO::NonCopyableOrMovableClass {
public:
uint32_t getNumPorts() override;
@@ -29,6 +29,7 @@ class LinuxFabricDeviceImp : public OsFabricDevice, NEO::NonCopyableOrMovableCla
ze_result_t disablePortBeaconing(const zes_fabric_port_id_t portId);
ze_result_t getState(const zes_fabric_port_id_t portId, zes_fabric_port_state_t *pState);
ze_result_t getThroughput(const zes_fabric_port_id_t portId, zes_fabric_port_throughput_t *pThroughput);
ze_result_t getErrorCounters(const zes_fabric_port_id_t portId, zes_fabric_port_error_counters_t *pErrors);
void getPortId(const uint32_t portNumber, zes_fabric_port_id_t &portId);
void getProperties(const zes_fabric_port_id_t portId, std::string &model, bool &onSubdevice,
@@ -49,6 +50,7 @@ class LinuxFabricDeviceImp : public OsFabricDevice, NEO::NonCopyableOrMovableCla
ze_result_t disableUsage(const zes_fabric_port_id_t portId);
protected:
LinuxSysmanImp *pLinuxSysmanImp = nullptr;
FabricDeviceAccess *pFabricDeviceAccess = nullptr;
};
@@ -60,6 +62,7 @@ class LinuxFabricPortImp : public OsFabricPort, NEO::NonCopyableOrMovableClass {
ze_result_t setConfig(const zes_fabric_port_config_t *pConfig) override;
ze_result_t getState(zes_fabric_port_state_t *pState) override;
ze_result_t getThroughput(zes_fabric_port_throughput_t *pThroughput) override;
ze_result_t getErrorCounters(zes_fabric_port_error_counters_t *pErrors) override;
LinuxFabricPortImp() = delete;
LinuxFabricPortImp(OsFabricDevice *pOsFabricDevice, uint32_t portNum);

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2020 Intel Corporation
* Copyright (C) 2020-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -29,6 +29,7 @@ class OsFabricPort {
virtual ze_result_t setConfig(const zes_fabric_port_config_t *pConfig) = 0;
virtual ze_result_t getState(zes_fabric_port_state_t *pState) = 0;
virtual ze_result_t getThroughput(zes_fabric_port_throughput_t *pThroughput) = 0;
virtual ze_result_t getErrorCounters(zes_fabric_port_error_counters_t *pErrors) = 0;
static OsFabricPort *create(OsFabricDevice *pOsFabricDevice, uint32_t portNum);

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2020 Intel Corporation
* Copyright (C) 2020-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -41,6 +41,10 @@ ze_result_t WddmFabricPortImp::getThroughput(zes_fabric_port_throughput_t *pThro
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
ze_result_t WddmFabricPortImp::getErrorCounters(zes_fabric_port_error_counters_t *pErrors) {
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
ze_result_t WddmFabricPortImp::getProperties(zes_fabric_port_properties_t *pProperties) {
::memset(pProperties->model, '\0', ZES_MAX_FABRIC_PORT_MODEL_SIZE);
pProperties->onSubdevice = false;

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2020 Intel Corporation
* Copyright (C) 2020-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -35,6 +35,7 @@ class WddmFabricPortImp : public OsFabricPort, NEO::NonCopyableOrMovableClass {
ze_result_t setConfig(const zes_fabric_port_config_t *pConfig) override;
ze_result_t getState(zes_fabric_port_state_t *pState) override;
ze_result_t getThroughput(zes_fabric_port_throughput_t *pThroughput) override;
ze_result_t getErrorCounters(zes_fabric_port_error_counters_t *pErrors) override;
WddmFabricPortImp() = delete;
WddmFabricPortImp(OsFabricDevice *pOsFabricDevice, uint32_t portNum);

View File

@@ -1,5 +1,5 @@
#
# Copyright (C) 2020-2022 Intel Corporation
# Copyright (C) 2020-2023 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
@@ -13,7 +13,6 @@ if(NEO_ENABLE_i915_PRELIM_DETECTION)
${CMAKE_CURRENT_SOURCE_DIR}/os_ras_imp_prelim.cpp
${CMAKE_CURRENT_SOURCE_DIR}/os_ras_imp_prelim.h
${CMAKE_CURRENT_SOURCE_DIR}/os_ras_imp_gt.cpp
${CMAKE_CURRENT_SOURCE_DIR}/os_ras_imp_fabric.cpp
${CMAKE_CURRENT_SOURCE_DIR}/os_ras_imp_hbm.cpp
)
else()

View File

@@ -1,127 +0,0 @@
/*
* Copyright (C) 2021-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/device/sub_device.h"
#include "level_zero/tools/source/sysman/ras/linux/os_ras_imp_prelim.h"
#include "level_zero/tools/source/sysman/sysman_imp.h"
#include "sysman/linux/fs_access.h"
#include "sysman/linux/os_sysman_imp.h"
#include <cstring>
#include <regex>
namespace L0 {
void LinuxRasSourceFabric::getNodes(std::vector<std::string> &nodes, uint32_t subdeviceId, LinuxSysmanImp *pSysmanImp, const zes_ras_error_type_t &type) {
const uint32_t minBoardStrappedNumber = 0;
const uint32_t maxBoardStrappedNumber = 31;
const uint32_t minPortId = 1;
const uint32_t maxPortId = 8;
nodes.clear();
const std::string iafPathStringMfd("/sys/module/iaf/drivers/platform:iaf/");
const std::string iafPathStringAuxillary("/sys/module/iaf/drivers/auxiliary:iaf/");
std::string iafPathString("");
if (pSysmanImp->getSysfsAccess().getRealPath("device/", iafPathString) != ZE_RESULT_SUCCESS) {
return;
}
auto &fsAccess = pSysmanImp->getFsAccess();
if (fsAccess.directoryExists(iafPathStringMfd)) {
iafPathString = iafPathString + "/iaf.";
} else if (fsAccess.directoryExists(iafPathStringAuxillary)) {
iafPathString = iafPathString + "/i915.iaf.";
} else {
return;
}
for (auto boardStrappedNumber = minBoardStrappedNumber; boardStrappedNumber <= maxBoardStrappedNumber; boardStrappedNumber++) {
const auto boardStrappedString(iafPathString + std::to_string(boardStrappedNumber));
if (!fsAccess.directoryExists(boardStrappedString)) {
continue;
}
const auto subDeviceString(boardStrappedString + "/sd." + std::to_string(subdeviceId));
std::vector<std::string> subDeviceErrorNodes;
if (type == ZES_RAS_ERROR_TYPE_CORRECTABLE) {
subDeviceErrorNodes.push_back(subDeviceString + "/fw_comm_errors");
for (auto portId = minPortId; portId <= maxPortId; portId++) {
subDeviceErrorNodes.push_back(subDeviceString + "/port." + std::to_string(portId) + "/link_degrades");
}
} else {
subDeviceErrorNodes.push_back(subDeviceString + "/sd_failure");
subDeviceErrorNodes.push_back(subDeviceString + "/fw_error");
for (auto portId = minPortId; portId <= maxPortId; portId++) {
subDeviceErrorNodes.push_back(subDeviceString + "/port." + std::to_string(portId) + "/link_failures");
}
}
for (auto &subDeviceErrorNode : subDeviceErrorNodes) {
if (ZE_RESULT_SUCCESS == fsAccess.canRead(subDeviceErrorNode)) {
nodes.push_back(subDeviceErrorNode);
}
}
}
}
ze_result_t LinuxRasSourceFabric::getSupportedRasErrorTypes(std::set<zes_ras_error_type_t> &errorType,
OsSysman *pOsSysman, ze_device_handle_t deviceHandle) {
LinuxSysmanImp *pLinuxSysmanImp = static_cast<LinuxSysmanImp *>(pOsSysman);
ze_bool_t onSubDevice = false;
uint32_t subDeviceIndex = 0;
SysmanDeviceImp::getSysmanDeviceInfo(deviceHandle, subDeviceIndex, onSubDevice, true);
std::vector<std::string> nodes;
getNodes(nodes, subDeviceIndex, pLinuxSysmanImp, ZES_RAS_ERROR_TYPE_UNCORRECTABLE);
if (nodes.size()) {
errorType.insert(ZES_RAS_ERROR_TYPE_UNCORRECTABLE);
}
getNodes(nodes, subDeviceIndex, pLinuxSysmanImp, ZES_RAS_ERROR_TYPE_CORRECTABLE);
if (nodes.size()) {
errorType.insert(ZES_RAS_ERROR_TYPE_CORRECTABLE);
}
return ZE_RESULT_SUCCESS;
}
LinuxRasSourceFabric::LinuxRasSourceFabric(OsSysman *pOsSysman, zes_ras_error_type_t type, uint32_t subDeviceId) {
pLinuxSysmanImp = static_cast<LinuxSysmanImp *>(pOsSysman);
getNodes(errorNodes, subDeviceId, pLinuxSysmanImp, type);
}
uint64_t LinuxRasSourceFabric::getComputeErrorCount() {
uint64_t currentErrorCount = 0;
auto &fsAccess = pLinuxSysmanImp->getFsAccess();
for (const auto &node : errorNodes) {
uint64_t errorCount = 0;
fsAccess.read(node, errorCount);
currentErrorCount += errorCount;
}
return currentErrorCount;
}
ze_result_t LinuxRasSourceFabric::osRasGetState(zes_ras_state_t &state, ze_bool_t clear) {
if (errorNodes.size() == 0) {
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
std::memset(state.category, 0, sizeof(zes_ras_state_t::category));
uint64_t currentComputeErrorCount = getComputeErrorCount();
if (clear) {
baseComputeErrorCount = currentComputeErrorCount;
currentComputeErrorCount = getComputeErrorCount();
}
state.category[ZES_RAS_ERROR_CAT_COMPUTE_ERRORS] = currentComputeErrorCount - baseComputeErrorCount;
return ZE_RESULT_SUCCESS;
}
} // namespace L0

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2020-2022 Intel Corporation
* Copyright (C) 2020-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -18,10 +18,7 @@ void OsRas::getSupportedRasErrorTypes(std::set<zes_ras_error_type_t> &errorType,
constexpr auto maxErrorTypes = 2;
LinuxRasSourceGt::getSupportedRasErrorTypes(errorType, pOsSysman, deviceHandle);
if (errorType.size() < maxErrorTypes) {
LinuxRasSourceFabric::getSupportedRasErrorTypes(errorType, pOsSysman, deviceHandle);
if (errorType.size() < maxErrorTypes) {
LinuxRasSourceHbm::getSupportedRasErrorTypes(errorType, pOsSysman, deviceHandle);
}
LinuxRasSourceHbm::getSupportedRasErrorTypes(errorType, pOsSysman, deviceHandle);
}
}
@@ -72,7 +69,6 @@ ze_result_t LinuxRasImp::osRasGetState(zes_ras_state_t &state, ze_bool_t clear)
void LinuxRasImp::initSources() {
rasSources.push_back(std::make_unique<L0::LinuxRasSourceGt>(pLinuxSysmanImp, osRasErrorType, isSubdevice, subdeviceId));
rasSources.push_back(std::make_unique<L0::LinuxRasSourceFabric>(pLinuxSysmanImp, osRasErrorType, subdeviceId));
rasSources.push_back(std::make_unique<L0::LinuxRasSourceHbm>(pLinuxSysmanImp, osRasErrorType, subdeviceId));
}

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2020-2022 Intel Corporation
* Copyright (C) 2020-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -90,22 +90,6 @@ class LinuxRasSourceGt : public LinuxRasSources {
uint32_t subdeviceId = 0;
};
class LinuxRasSourceFabric : public LinuxRasSources {
public:
static ze_result_t getSupportedRasErrorTypes(std::set<zes_ras_error_type_t> &errorType, OsSysman *pOsSysman, ze_device_handle_t deviceHandle);
LinuxRasSourceFabric(OsSysman *pOsSysman, zes_ras_error_type_t type, uint32_t subDeviceId);
~LinuxRasSourceFabric() override = default;
ze_result_t osRasGetState(zes_ras_state_t &state, ze_bool_t clear) override;
private:
LinuxSysmanImp *pLinuxSysmanImp = nullptr;
std::vector<std::string> errorNodes = {};
uint64_t baseComputeErrorCount = 0;
uint64_t getComputeErrorCount();
static void getNodes(std::vector<std::string> &nodes, uint32_t subdeviceId, LinuxSysmanImp *pSysmanImp, const zes_ras_error_type_t &type);
};
class LinuxRasSourceHbm : public LinuxRasSources {
public:
ze_result_t osRasGetState(zes_ras_state_t &state, ze_bool_t clear) override;

View File

@@ -1124,6 +1124,7 @@ void testSysmanFabricPort(ze_device_handle_t &device) {
zes_fabric_port_config_t fabricPortConfig = {};
zes_fabric_port_state_t fabricPortState = {};
zes_fabric_port_throughput_t fabricPortThroughput = {};
zes_fabric_port_error_counters_t fabricPortErrorCounters = {};
VALIDATECALL(zesFabricPortGetProperties(handle, &fabricPortProperties));
if (verbose) {
@@ -1172,6 +1173,14 @@ void testSysmanFabricPort(ze_device_handle_t &device) {
std::cout << "RX Counter = " << fabricPortThroughput.rxCounter << std::endl;
std::cout << "TX Counter = " << fabricPortThroughput.txCounter << std::endl;
}
VALIDATECALL(zesFabricPortGetFabricErrorCounters(handle, &fabricPortErrorCounters));
if (verbose) {
std::cout << "Link Failures = " << fabricPortErrorCounters.linkFailureCount << std::endl;
std::cout << "Link Degrades = " << fabricPortErrorCounters.linkDegradeCount << std::endl;
std::cout << "Fw Errors = " << fabricPortErrorCounters.fwErrorCount << std::endl;
std::cout << "Fw comm Errors = " << fabricPortErrorCounters.fwCommErrorCount << std::endl;
}
}
}

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2020-2022 Intel Corporation
* Copyright (C) 2020-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -10,6 +10,8 @@
#include "gmock/gmock.h"
#include "sysman/fabric_port/fabric_port.h"
#include "sysman/linux/fs_access.h"
#include "sysman/linux/os_sysman_imp.h"
namespace L0 {
namespace ult {
@@ -24,5 +26,53 @@ struct MockFabricDevice : public FabricDevice {
MockFabricDevice() = default;
};
class MockFabricFsAccess : public FsAccess {
public:
ze_result_t mockListDirectory = ZE_RESULT_SUCCESS;
ze_result_t canRead(const std::string file) override {
if (accessibleNodes.find(file) != accessibleNodes.end()) {
return ZE_RESULT_SUCCESS;
}
return ZE_RESULT_ERROR_UNKNOWN;
}
ze_result_t listDirectory(const std::string path, std::vector<std::string> &list) override {
list = accessibleDirectories;
return mockListDirectory;
}
~MockFabricFsAccess() override = default;
ze_result_t read(const std::string file, uint64_t &val) override {
if (canRead(file) == ZE_RESULT_SUCCESS) {
val = accessibleNodes[file];
return ZE_RESULT_SUCCESS;
}
return ZE_RESULT_ERROR_UNKNOWN;
}
void setAccessibleNodes(std::map<std::string, uint64_t> &nodes) {
accessibleNodes = nodes;
}
void setAccessibleDirectories(std::vector<std::string> &dirs) {
accessibleDirectories = dirs;
}
private:
std::map<std::string, uint64_t> accessibleNodes = {};
std::vector<std::string> accessibleDirectories = {};
};
class MockFabricSysFsAccess : public SysfsAccess {
public:
ze_result_t getRealPath(const std::string path, std::string &buf) override {
buf.append("/mockRealPath");
return mockRealPathStatus;
}
ze_result_t mockRealPathStatus = ZE_RESULT_SUCCESS;
};
} // namespace ult
} // namespace L0

View File

@@ -5,6 +5,7 @@
*
*/
#include "level_zero/tools/test/unit_tests/sources/sysman/fabric_port/linux/mock_fabric_device.h"
#include "level_zero/tools/test/unit_tests/sources/sysman/linux/mock_sysman_fixture.h"
#include "level_zero/tools/test/unit_tests/sources/sysman/linux/nl_api/mock_iaf_nl_api_prelim.h"
@@ -58,7 +59,6 @@ class ZesFabricPortFixture : public SysmanDeviceFixture {
delete pFabricPortHandleContext->pFabricDevice;
pFabricPortHandleContext->pFabricDevice = nullptr;
}
pFabricPortHandleContext->pFabricDevice = new FabricDeviceImp(pOsSysman);
PublicLinuxFabricDeviceImp *pPublicLinuxFabricDeviceImp = reinterpret_cast<PublicLinuxFabricDeviceImp *>(pFabricPortHandleContext->pFabricDevice->getOsFabricDevice());
@@ -568,5 +568,398 @@ TEST_F(ZesFabricPortFixture, GivenValidFabricPortHandleWhenCallingZesFabricPortG
EXPECT_EQ(pMockIafNlApi->txCounter, throughput.txCounter);
}
TEST_F(ZesFabricPortFixture, GivenValidFabricPortHandleWhenCallingzesFabricPortGetFabricErrorCountersWithLegacyPathAndCallSucceeds) {
VariableBackup<FsAccess *> backupFsAccess(&pLinuxSysmanImp->pFsAccess);
VariableBackup<SysfsAccess *> backupSysfsAccess(&pLinuxSysmanImp->pSysfsAccess);
auto pFsAccess = new MockFabricFsAccess;
auto pSysfsAccess = new MockFabricSysFsAccess;
pLinuxSysmanImp->pFsAccess = pFsAccess;
pLinuxSysmanImp->pSysfsAccess = pSysfsAccess;
uint32_t count = 1U;
zes_fabric_port_handle_t hPorts[1U];
ze_result_t result = zesDeviceEnumFabricPorts(device, &count, hPorts);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(count, 1U);
zes_fabric_port_properties_t properties;
result = zesFabricPortGetProperties(hPorts[0], &properties);
std::string fabricLinkErrorPath = "/mockRealPath/iaf.5/sd." + std::to_string(properties.portId.attachId) + "/port." + std::to_string(properties.portId.portNumber);
std::string fabricFwErrorPath = "/mockRealPath/iaf.5/sd." + std::to_string(properties.portId.attachId);
std::vector<std::string> dirs = {"driver", "drm", "iaf.5"};
uint64_t mockLinkFailures = 401;
uint64_t mockLinkDegrades = 501;
uint64_t mockFwErrors = 301;
uint64_t mockFwCommErrors = 201;
std::map<std::string, uint64_t> nodes = {
{fabricLinkErrorPath + "/link_failures", mockLinkFailures},
{fabricLinkErrorPath + "/link_degrades", mockLinkDegrades},
{fabricFwErrorPath + "/fw_error", mockFwErrors},
{fabricFwErrorPath + "/fw_comm_errors", mockFwCommErrors},
};
static_cast<MockFabricFsAccess *>(pFsAccess)->setAccessibleDirectories(dirs);
static_cast<MockFabricFsAccess *>(pFsAccess)->setAccessibleNodes(nodes);
zes_fabric_port_error_counters_t errors;
result = zesFabricPortGetFabricErrorCounters(hPorts[0], &errors);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(errors.linkFailureCount, mockLinkFailures);
EXPECT_EQ(errors.linkDegradeCount, mockLinkDegrades);
EXPECT_EQ(errors.fwErrorCount, mockFwErrors);
EXPECT_EQ(errors.fwCommErrorCount, mockFwCommErrors);
delete pFsAccess;
delete pSysfsAccess;
}
TEST_F(ZesFabricPortFixture, GivenValidFabricPortHandleWhenCallingzesFabricPortGetFabricErrorCountersCallSucceeds) {
VariableBackup<FsAccess *> backupFsAccess(&pLinuxSysmanImp->pFsAccess);
VariableBackup<SysfsAccess *> backupSysfsAccess(&pLinuxSysmanImp->pSysfsAccess);
auto pFsAccess = new MockFabricFsAccess;
auto pSysfsAccess = new MockFabricSysFsAccess;
pLinuxSysmanImp->pFsAccess = pFsAccess;
pLinuxSysmanImp->pSysfsAccess = pSysfsAccess;
uint32_t count = 1U;
zes_fabric_port_handle_t hPorts[1U];
ze_result_t result = zesDeviceEnumFabricPorts(device, &count, hPorts);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(count, 1U);
zes_fabric_port_properties_t properties;
result = zesFabricPortGetProperties(hPorts[0], &properties);
std::string fabricLinkErrorPath = "/mockRealPath/i915.iaf.5/sd." + std::to_string(properties.portId.attachId) + "/port." + std::to_string(properties.portId.portNumber);
std::string fabricFwErrorPath = "/mockRealPath/i915.iaf.5/sd." + std::to_string(properties.portId.attachId);
std::vector<std::string> dirs = {"i915.iaf.5",
"driver", "drm"};
uint64_t mockLinkFailures = 401;
uint64_t mockLinkDegrades = 501;
uint64_t mockFwErrors = 301;
uint64_t mockFwCommErrors = 201;
std::map<std::string, uint64_t> nodes = {
{fabricLinkErrorPath + "/link_failures", mockLinkFailures},
{fabricLinkErrorPath + "/link_degrades", mockLinkDegrades},
{fabricFwErrorPath + "/fw_error", mockFwErrors},
{fabricFwErrorPath + "/fw_comm_errors", mockFwCommErrors},
};
static_cast<MockFabricFsAccess *>(pFsAccess)->setAccessibleDirectories(dirs);
static_cast<MockFabricFsAccess *>(pFsAccess)->setAccessibleNodes(nodes);
zes_fabric_port_error_counters_t errors;
result = zesFabricPortGetFabricErrorCounters(hPorts[0], &errors);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(errors.linkFailureCount, mockLinkFailures);
EXPECT_EQ(errors.linkDegradeCount, mockLinkDegrades);
EXPECT_EQ(errors.fwErrorCount, mockFwErrors);
EXPECT_EQ(errors.fwCommErrorCount, mockFwCommErrors);
delete pFsAccess;
delete pSysfsAccess;
}
TEST_F(ZesFabricPortFixture, GivenValidFabricPortHandleWhenCallingzesFabricPortGetFabricErrorCountersAndGetRealPathFailsThenFailureIsReturned) {
VariableBackup<FsAccess *> backupFsAccess(&pLinuxSysmanImp->pFsAccess);
VariableBackup<SysfsAccess *> backupSysfsAccess(&pLinuxSysmanImp->pSysfsAccess);
auto pFsAccess = new MockFabricFsAccess;
auto pSysfsAccess = new MockFabricSysFsAccess;
pLinuxSysmanImp->pFsAccess = pFsAccess;
pLinuxSysmanImp->pSysfsAccess = pSysfsAccess;
uint32_t count = 1U;
zes_fabric_port_handle_t hPorts[1U];
ze_result_t result = zesDeviceEnumFabricPorts(device, &count, hPorts);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(count, 1U);
static_cast<MockFabricSysFsAccess *>(pSysfsAccess)->mockRealPathStatus = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
zes_fabric_port_error_counters_t errors;
result = zesFabricPortGetFabricErrorCounters(hPorts[0], &errors);
EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, result);
delete pFsAccess;
delete pSysfsAccess;
}
TEST_F(ZesFabricPortFixture, GivenValidFabricPortHandleWhenCallingzesFabricPortGetFabricErrorCountersAndListDirectoryFailsThenFailureIsReturned) {
VariableBackup<FsAccess *> backupFsAccess(&pLinuxSysmanImp->pFsAccess);
VariableBackup<SysfsAccess *> backupSysfsAccess(&pLinuxSysmanImp->pSysfsAccess);
auto pFsAccess = new MockFabricFsAccess;
auto pSysfsAccess = new MockFabricSysFsAccess;
pLinuxSysmanImp->pFsAccess = pFsAccess;
pLinuxSysmanImp->pSysfsAccess = pSysfsAccess;
uint32_t count = 1U;
zes_fabric_port_handle_t hPorts[1U];
ze_result_t result = zesDeviceEnumFabricPorts(device, &count, hPorts);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(count, 1U);
static_cast<MockFabricFsAccess *>(pFsAccess)->mockListDirectory = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
zes_fabric_port_error_counters_t errors;
result = zesFabricPortGetFabricErrorCounters(hPorts[0], &errors);
EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, result);
delete pFsAccess;
delete pSysfsAccess;
}
TEST_F(ZesFabricPortFixture, GivenValidFabricPortHandleWhenCallingzesFabricPortGetFabricErrorCountersAndIafDriverIsNotLoadedThenFailureIsReturned) {
VariableBackup<FsAccess *> backupFsAccess(&pLinuxSysmanImp->pFsAccess);
VariableBackup<SysfsAccess *> backupSysfsAccess(&pLinuxSysmanImp->pSysfsAccess);
auto pFsAccess = new MockFabricFsAccess;
auto pSysfsAccess = new MockFabricSysFsAccess;
pLinuxSysmanImp->pFsAccess = pFsAccess;
pLinuxSysmanImp->pSysfsAccess = pSysfsAccess;
uint32_t count = 1U;
zes_fabric_port_handle_t hPorts[1U];
ze_result_t result = zesDeviceEnumFabricPorts(device, &count, hPorts);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(count, 1U);
zes_fabric_port_properties_t properties;
result = zesFabricPortGetProperties(hPorts[0], &properties);
std::string fabricLinkErrorPath = "/mockRealPath/i915.iaf.5/sd." + std::to_string(properties.portId.attachId) + "/port." + std::to_string(properties.portId.portNumber);
std::string fabricFwErrorPath = "/mockRealPath/i915.iaf.5/sd." + std::to_string(properties.portId.attachId);
std::vector<std::string> dirs = {"driver", "drm"};
uint64_t mockLinkFailures = 401;
uint64_t mockLinkDegrades = 501;
uint64_t mockFwErrors = 301;
uint64_t mockFwCommErrors = 201;
std::map<std::string, uint64_t> nodes = {
{fabricLinkErrorPath + "/link_failures", mockLinkFailures},
{fabricLinkErrorPath + "/link_degrades", mockLinkDegrades},
{fabricFwErrorPath + "/fw_error", mockFwErrors},
{fabricFwErrorPath + "/fw_comm_errors", mockFwCommErrors},
};
static_cast<MockFabricFsAccess *>(pFsAccess)->setAccessibleDirectories(dirs);
static_cast<MockFabricFsAccess *>(pFsAccess)->setAccessibleNodes(nodes);
zes_fabric_port_error_counters_t errors;
result = zesFabricPortGetFabricErrorCounters(hPorts[0], &errors);
EXPECT_EQ(ZE_RESULT_ERROR_NOT_AVAILABLE, result);
delete pFsAccess;
delete pSysfsAccess;
}
TEST_F(ZesFabricPortFixture, GivenValidFabricPortHandleWhenCallingzesFabricPortGetFabricErrorCountersAndLinkFailureSysfsNodeIsAbsentThenZeroLinkFailuresAreReturned) {
VariableBackup<FsAccess *> backupFsAccess(&pLinuxSysmanImp->pFsAccess);
VariableBackup<SysfsAccess *> backupSysfsAccess(&pLinuxSysmanImp->pSysfsAccess);
auto pFsAccess = new MockFabricFsAccess;
auto pSysfsAccess = new MockFabricSysFsAccess;
pLinuxSysmanImp->pFsAccess = pFsAccess;
pLinuxSysmanImp->pSysfsAccess = pSysfsAccess;
uint32_t count = 1U;
zes_fabric_port_handle_t hPorts[1U];
ze_result_t result = zesDeviceEnumFabricPorts(device, &count, hPorts);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(count, 1U);
zes_fabric_port_properties_t properties;
result = zesFabricPortGetProperties(hPorts[0], &properties);
std::string fabricLinkErrorIncorrectPath = "/mockRealPath/i915.iaf.5/sd." + std::to_string(properties.portId.attachId + 1) + "/port." + std::to_string(properties.portId.portNumber);
std::string fabricLinkErrorPath = "/mockRealPath/i915.iaf.5/sd." + std::to_string(properties.portId.attachId) + "/port." + std::to_string(properties.portId.portNumber);
std::string fabricFwErrorPath = "/mockRealPath/i915.iaf.5/sd." + std::to_string(properties.portId.attachId);
std::vector<std::string> dirs = {"i915.iaf.5",
"driver", "drm"};
uint64_t mockLinkFailures = 401;
uint64_t mockLinkDegrades = 501;
uint64_t mockFwErrors = 301;
uint64_t mockFwCommErrors = 201;
std::map<std::string, uint64_t> nodes = {
{fabricLinkErrorIncorrectPath + "/link_failures", mockLinkFailures},
{fabricLinkErrorPath + "/link_degrades", mockLinkDegrades},
{fabricFwErrorPath + "/fw_error", mockFwErrors},
{fabricFwErrorPath + "/fw_comm_errors", mockFwCommErrors},
};
static_cast<MockFabricFsAccess *>(pFsAccess)->setAccessibleDirectories(dirs);
static_cast<MockFabricFsAccess *>(pFsAccess)->setAccessibleNodes(nodes);
zes_fabric_port_error_counters_t errors;
result = zesFabricPortGetFabricErrorCounters(hPorts[0], &errors);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(errors.linkFailureCount, 0u);
EXPECT_EQ(errors.linkDegradeCount, mockLinkDegrades);
EXPECT_EQ(errors.fwErrorCount, mockFwErrors);
EXPECT_EQ(errors.fwCommErrorCount, mockFwCommErrors);
delete pFsAccess;
delete pSysfsAccess;
}
TEST_F(ZesFabricPortFixture, GivenValidFabricPortHandleWhenCallingzesFabricPortGetFabricErrorCountersAndLinkDegradesSysfsNodeIsAbsentThenZeroLinkDegradesAreReturned) {
VariableBackup<FsAccess *> backupFsAccess(&pLinuxSysmanImp->pFsAccess);
VariableBackup<SysfsAccess *> backupSysfsAccess(&pLinuxSysmanImp->pSysfsAccess);
auto pFsAccess = new MockFabricFsAccess;
auto pSysfsAccess = new MockFabricSysFsAccess;
pLinuxSysmanImp->pFsAccess = pFsAccess;
pLinuxSysmanImp->pSysfsAccess = pSysfsAccess;
uint32_t count = 1U;
zes_fabric_port_handle_t hPorts[1U];
ze_result_t result = zesDeviceEnumFabricPorts(device, &count, hPorts);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(count, 1U);
zes_fabric_port_properties_t properties;
result = zesFabricPortGetProperties(hPorts[0], &properties);
std::string fabricLinkErrorPath = "/mockRealPath/i915.iaf.5/sd." + std::to_string(properties.portId.attachId) + "/port." + std::to_string(properties.portId.portNumber);
std::string fabricLinkErrorIncorrectPath = "/mockRealPath/i915.iaf.5/sd." + std::to_string(properties.portId.attachId + 1) + "/port." + std::to_string(properties.portId.portNumber);
std::string fabricFwErrorPath = "/mockRealPath/i915.iaf.5/sd." + std::to_string(properties.portId.attachId);
std::vector<std::string> dirs = {"i915.iaf.5",
"driver", "drm"};
uint64_t mockLinkFailures = 401;
uint64_t mockLinkDegrades = 501;
uint64_t mockFwErrors = 301;
uint64_t mockFwCommErrors = 201;
std::map<std::string, uint64_t> nodes = {
{fabricLinkErrorPath + "/link_failures", mockLinkFailures},
{fabricLinkErrorIncorrectPath + "/link_degrades", mockLinkDegrades},
{fabricFwErrorPath + "/fw_error", mockFwErrors},
{fabricFwErrorPath + "/fw_comm_errors", mockFwCommErrors},
};
static_cast<MockFabricFsAccess *>(pFsAccess)->setAccessibleDirectories(dirs);
static_cast<MockFabricFsAccess *>(pFsAccess)->setAccessibleNodes(nodes);
zes_fabric_port_error_counters_t errors;
result = zesFabricPortGetFabricErrorCounters(hPorts[0], &errors);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(errors.linkFailureCount, mockLinkFailures);
EXPECT_EQ(errors.linkDegradeCount, 0u);
EXPECT_EQ(errors.fwErrorCount, mockFwErrors);
EXPECT_EQ(errors.fwCommErrorCount, mockFwCommErrors);
delete pFsAccess;
delete pSysfsAccess;
}
TEST_F(ZesFabricPortFixture, GivenValidFabricPortHandleWhenCallingzesFabricPortGetFabricErrorCountersAndFwErrorSysfsNodeIsAbsentThenZeroFwErrorsAreReturned) {
VariableBackup<FsAccess *> backupFsAccess(&pLinuxSysmanImp->pFsAccess);
VariableBackup<SysfsAccess *> backupSysfsAccess(&pLinuxSysmanImp->pSysfsAccess);
auto pFsAccess = new MockFabricFsAccess;
auto pSysfsAccess = new MockFabricSysFsAccess;
pLinuxSysmanImp->pFsAccess = pFsAccess;
pLinuxSysmanImp->pSysfsAccess = pSysfsAccess;
uint32_t count = 1U;
zes_fabric_port_handle_t hPorts[1U];
ze_result_t result = zesDeviceEnumFabricPorts(device, &count, hPorts);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(count, 1U);
zes_fabric_port_properties_t properties;
result = zesFabricPortGetProperties(hPorts[0], &properties);
std::string fabricLinkErrorPath = "/mockRealPath/i915.iaf.5/sd." + std::to_string(properties.portId.attachId) + "/port." + std::to_string(properties.portId.portNumber);
std::string fabricFwErrorPath = "/mockRealPath/i915.iaf.5/sd." + std::to_string(properties.portId.attachId);
std::string fabricFwErrorIncorrectPath = "/mockRealPath/i915.iaf.5/sd." + std::to_string(properties.portId.attachId + 1);
std::vector<std::string> dirs = {"i915.iaf.5",
"driver", "drm"};
uint64_t mockLinkFailures = 401;
uint64_t mockLinkDegrades = 501;
uint64_t mockFwErrors = 301;
uint64_t mockFwCommErrors = 201;
std::map<std::string, uint64_t> nodes = {
{fabricLinkErrorPath + "/link_failures", mockLinkFailures},
{fabricLinkErrorPath + "/link_degrades", mockLinkDegrades},
{fabricFwErrorIncorrectPath + "/fw_error", mockFwErrors},
{fabricFwErrorPath + "/fw_comm_errors", mockFwCommErrors},
};
static_cast<MockFabricFsAccess *>(pFsAccess)->setAccessibleDirectories(dirs);
static_cast<MockFabricFsAccess *>(pFsAccess)->setAccessibleNodes(nodes);
zes_fabric_port_error_counters_t errors;
result = zesFabricPortGetFabricErrorCounters(hPorts[0], &errors);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(errors.linkFailureCount, mockLinkFailures);
EXPECT_EQ(errors.linkDegradeCount, mockLinkDegrades);
EXPECT_EQ(errors.fwErrorCount, 0u);
EXPECT_EQ(errors.fwCommErrorCount, mockFwCommErrors);
delete pFsAccess;
delete pSysfsAccess;
}
TEST_F(ZesFabricPortFixture, GivenValidFabricPortHandleWhenCallingzesFabricPortGetFabricErrorCountersAndFwCommErrorSysfsNodeIsAbsentThenZeroFwCommErrorsAreReturned) {
VariableBackup<FsAccess *> backupFsAccess(&pLinuxSysmanImp->pFsAccess);
VariableBackup<SysfsAccess *> backupSysfsAccess(&pLinuxSysmanImp->pSysfsAccess);
auto pFsAccess = new MockFabricFsAccess;
auto pSysfsAccess = new MockFabricSysFsAccess;
pLinuxSysmanImp->pFsAccess = pFsAccess;
pLinuxSysmanImp->pSysfsAccess = pSysfsAccess;
uint32_t count = 1U;
zes_fabric_port_handle_t hPorts[1U];
ze_result_t result = zesDeviceEnumFabricPorts(device, &count, hPorts);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(count, 1U);
zes_fabric_port_properties_t properties;
result = zesFabricPortGetProperties(hPorts[0], &properties);
std::string fabricLinkErrorPath = "/mockRealPath/i915.iaf.5/sd." + std::to_string(properties.portId.attachId) + "/port." + std::to_string(properties.portId.portNumber);
std::string fabricFwErrorPath = "/mockRealPath/i915.iaf.5/sd." + std::to_string(properties.portId.attachId);
std::string fabricFwErrorIncorrectPath = "/mockRealPath/i915.iaf.5/sd." + std::to_string(properties.portId.attachId + 1);
std::vector<std::string> dirs = {"i915.iaf.5",
"driver", "drm"};
uint64_t mockLinkFailures = 401;
uint64_t mockLinkDegrades = 501;
uint64_t mockFwErrors = 301;
uint64_t mockFwCommErrors = 201;
std::map<std::string, uint64_t> nodes = {
{fabricLinkErrorPath + "/link_failures", mockLinkFailures},
{fabricLinkErrorPath + "/link_degrades", mockLinkDegrades},
{fabricFwErrorPath + "/fw_error", mockFwErrors},
{fabricFwErrorIncorrectPath + "/fw_comm_errors", mockFwCommErrors},
};
static_cast<MockFabricFsAccess *>(pFsAccess)->setAccessibleDirectories(dirs);
static_cast<MockFabricFsAccess *>(pFsAccess)->setAccessibleNodes(nodes);
zes_fabric_port_error_counters_t errors;
result = zesFabricPortGetFabricErrorCounters(hPorts[0], &errors);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(errors.linkFailureCount, mockLinkFailures);
EXPECT_EQ(errors.linkDegradeCount, mockLinkDegrades);
EXPECT_EQ(errors.fwErrorCount, mockFwErrors);
EXPECT_EQ(errors.fwCommErrorCount, 0u);
delete pFsAccess;
delete pSysfsAccess;
}
} // namespace ult
} // namespace L0

View File

@@ -1,5 +1,5 @@
#
# Copyright (C) 2020-2022 Intel Corporation
# Copyright (C) 2020-2023 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
@@ -10,9 +10,7 @@ set(L0_TESTS_TOOLS_SYSMAN_RAS_LINUX
if(NEO_ENABLE_i915_PRELIM_DETECTION)
list(APPEND L0_TESTS_TOOLS_SYSMAN_RAS_LINUX
${CMAKE_CURRENT_SOURCE_DIR}/test_zes_ras_fabric_prelim.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_zes_ras_prelim.cpp
${CMAKE_CURRENT_SOURCE_DIR}/mock_fs_ras_fabric_prelim.h
${CMAKE_CURRENT_SOURCE_DIR}/mock_fs_ras_prelim.h
)
else()

View File

@@ -1,86 +0,0 @@
/*
* Copyright (C) 2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "level_zero/core/test/unit_tests/mocks/mock_memory_manager.h"
#include "level_zero/tools/source/sysman/linux/pmu/pmu_imp.h"
#include "level_zero/tools/source/sysman/ras/linux/os_ras_imp_prelim.h"
#include "sysman/linux/fs_access.h"
#include "sysman/linux/os_sysman_imp.h"
#include "sysman/ras/ras.h"
#include "sysman/ras/ras_imp.h"
#include <map>
namespace L0 {
namespace ult {
class MockRasFabricFsAccess : public FsAccess {
public:
ze_result_t canRead(const std::string file) override {
if (accessibleNodes.find(file) != accessibleNodes.end()) {
return ZE_RESULT_SUCCESS;
}
return ZE_RESULT_ERROR_UNKNOWN;
}
~MockRasFabricFsAccess() override = default;
bool isRootUser() override {
return true;
}
ze_result_t read(const std::string file, uint64_t &val) override {
if (canRead(file) == ZE_RESULT_SUCCESS) {
val = accessibleNodes[file];
return ZE_RESULT_SUCCESS;
}
return ZE_RESULT_ERROR_UNKNOWN;
}
void setAccessibleNodes(std::map<std::string, uint64_t> &nodes) {
accessibleNodes = nodes;
}
void setAccessibleDirectories(std::vector<std::string> &dirs) {
accessibleDirectories = dirs;
}
bool directoryExists(const std::string path) override {
if (std::find(accessibleDirectories.begin(), accessibleDirectories.end(), path) != accessibleDirectories.end()) {
return true;
} else {
return false;
}
}
private:
std::map<std::string, uint64_t> accessibleNodes = {};
std::vector<std::string> accessibleDirectories = {};
};
class MockRasFabricSysFsAccess : public SysfsAccess {
public:
ze_result_t readSymLink(const std::string path, std::string &buf) override {
return ZE_RESULT_ERROR_UNKNOWN;
}
ze_result_t getRealPath(const std::string path, std::string &buf) override {
buf.append("/mockRealPath");
return mockRealPathStatus;
}
ze_result_t mockRealPathStatus = ZE_RESULT_SUCCESS;
};
struct MockMemoryManagerInRasSysman : public MemoryManagerMock {
MockMemoryManagerInRasSysman(NEO::ExecutionEnvironment &executionEnvironment) : MemoryManagerMock(const_cast<NEO::ExecutionEnvironment &>(executionEnvironment)) {}
};
} // namespace ult
} // namespace L0

View File

@@ -1,529 +0,0 @@
/*
* Copyright (C) 2022-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "level_zero/tools/test/unit_tests/sources/sysman/linux/mock_sysman_fixture.h"
#include "level_zero/tools/test/unit_tests/sources/sysman/ras/linux/mock_fs_ras_fabric_prelim.h"
namespace L0 {
namespace ult {
constexpr uint32_t mockHandleCount = 2u;
class TestRasFabricFixture : public SysmanDeviceFixture {
protected:
std::unique_ptr<MockRasFabricFsAccess> pFsAccess;
std::unique_ptr<MockRasFabricSysFsAccess> pSysfsAccess;
MemoryManager *pMemoryManagerOriginal = nullptr;
std::unique_ptr<MockMemoryManagerInRasSysman> pMemoryManager;
FsAccess *pFsAccessOriginal = nullptr;
SysfsAccess *pSysfsAccessOriginal = nullptr;
PmuInterface *pOriginalPmuInterface = nullptr;
FirmwareUtil *pOriginalFwUtilInterface = nullptr;
std::vector<ze_device_handle_t> deviceHandles;
void SetUp() override {
if (!sysmanUltsEnable) {
GTEST_SKIP();
}
SysmanDeviceFixture::SetUp();
pMemoryManagerOriginal = device->getDriverHandle()->getMemoryManager();
pMemoryManager = std::make_unique<MockMemoryManagerInRasSysman>(*neoDevice->getExecutionEnvironment());
pMemoryManager->localMemorySupported[0] = true;
device->getDriverHandle()->setMemoryManager(pMemoryManager.get());
pFsAccess = std::make_unique<MockRasFabricFsAccess>();
pSysfsAccess = std::make_unique<MockRasFabricSysFsAccess>();
pFsAccessOriginal = pLinuxSysmanImp->pFsAccess;
pSysfsAccessOriginal = pLinuxSysmanImp->pSysfsAccess;
pOriginalPmuInterface = pLinuxSysmanImp->pPmuInterface;
pOriginalFwUtilInterface = pLinuxSysmanImp->pFwUtilInterface;
pLinuxSysmanImp->pFsAccess = pFsAccess.get();
pLinuxSysmanImp->pSysfsAccess = pSysfsAccess.get();
pLinuxSysmanImp->pPmuInterface = nullptr;
pLinuxSysmanImp->pFwUtilInterface = nullptr;
for (const auto &handle : pSysmanDeviceImp->pRasHandleContext->handleList) {
delete handle;
}
pSysmanDeviceImp->pRasHandleContext->handleList.clear();
uint32_t subDeviceCount = 0;
Device::fromHandle(device->toHandle())->getSubDevices(&subDeviceCount, nullptr);
if (subDeviceCount == 0) {
deviceHandles.resize(1, device->toHandle());
} else {
deviceHandles.resize(subDeviceCount, nullptr);
Device::fromHandle(device->toHandle())->getSubDevices(&subDeviceCount, deviceHandles.data());
}
}
void TearDown() override {
if (!sysmanUltsEnable) {
GTEST_SKIP();
}
device->getDriverHandle()->setMemoryManager(pMemoryManagerOriginal);
pLinuxSysmanImp->pFsAccess = pFsAccessOriginal;
pLinuxSysmanImp->pSysfsAccess = pSysfsAccessOriginal;
pLinuxSysmanImp->pPmuInterface = pOriginalPmuInterface;
pLinuxSysmanImp->pFwUtilInterface = pOriginalFwUtilInterface;
SysmanDeviceFixture::TearDown();
}
std::vector<zes_ras_handle_t> getRasHandles(uint32_t count) {
std::vector<zes_ras_handle_t> handles(count, nullptr);
EXPECT_EQ(zesDeviceEnumRasErrorSets(device->toHandle(), &count, handles.data()), ZE_RESULT_SUCCESS);
return handles;
}
};
TEST_F(TestRasFabricFixture, GivenValidRasFabricNodesThenGetStateIsSuccessful) {
std::vector<std::string> dirs = {"/mockRealPath/iaf.0",
"/sys/module/iaf/drivers/platform:iaf/"};
std::map<std::string, uint64_t> nodes = {
{"/mockRealPath/iaf.0/sd.0/fw_comm_errors", 101},
{"/mockRealPath/iaf.0/sd.0/sd_failure", 201},
{"/mockRealPath/iaf.0/sd.0/fw_error", 301},
{"/mockRealPath/iaf.0/sd.0/port.1/link_failures", 401},
{"/mockRealPath/iaf.0/sd.0/port.1/link_degrades", 501},
{"/mockRealPath/iaf.0/sd.0/port.2/link_failures", 601},
{"/mockRealPath/iaf.0/sd.0/port.2/link_degrades", 701},
{"/mockRealPath/iaf.0/sd.0/port.3/link_failures", 801},
{"/mockRealPath/iaf.0/sd.0/port.3/link_degrades", 901},
{"/mockRealPath/iaf.0/sd.0/port.4/link_failures", 1001},
{"/mockRealPath/iaf.0/sd.0/port.4/link_degrades", 1101},
{"/mockRealPath/iaf.0/sd.0/port.5/link_failures", 2101},
{"/mockRealPath/iaf.0/sd.0/port.5/link_degrades", 3101},
{"/mockRealPath/iaf.0/sd.0/port.6/link_failures", 4101},
{"/mockRealPath/iaf.0/sd.0/port.6/link_degrades", 5101},
{"/mockRealPath/iaf.0/sd.0/port.7/link_failures", 6101},
{"/mockRealPath/iaf.0/sd.0/port.7/link_degrades", 7101},
{"/mockRealPath/iaf.0/sd.0/port.8/link_failures", 8101},
{"/mockRealPath/iaf.0/sd.0/port.8/link_degrades", 9101},
};
static_cast<MockRasFabricFsAccess *>(pFsAccess.get())->setAccessibleDirectories(dirs);
static_cast<MockRasFabricFsAccess *>(pFsAccess.get())->setAccessibleNodes(nodes);
uint32_t count = 0;
ze_result_t result = zesDeviceEnumRasErrorSets(device->toHandle(), &count, NULL);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(count, mockHandleCount);
uint32_t testcount = count + 1;
result = zesDeviceEnumRasErrorSets(device->toHandle(), &testcount, NULL);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(testcount, mockHandleCount);
auto handles = getRasHandles(mockHandleCount);
for (auto handle : handles) {
EXPECT_NE(handle, nullptr);
zes_ras_state_t state = {};
zes_ras_properties_t properties = {};
EXPECT_EQ(ZE_RESULT_SUCCESS, zesRasGetState(handle, 0, &state));
EXPECT_EQ(ZE_RESULT_SUCCESS, zesRasGetProperties(handle, &properties));
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_CACHE_ERRORS], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_RESET], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_PROGRAMMING_ERRORS], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_NON_COMPUTE_ERRORS], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_DISPLAY_ERRORS], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_DRIVER_ERRORS], 0u);
if (properties.type == ZES_RAS_ERROR_TYPE_CORRECTABLE) {
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_COMPUTE_ERRORS], 27709u);
}
if (properties.type == ZES_RAS_ERROR_TYPE_UNCORRECTABLE) {
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_COMPUTE_ERRORS], 23710u);
}
}
}
TEST_F(TestRasFabricFixture, GivenInValidRasFabricNodesThenEnumerationDoesNotReturnAnyHandles) {
pSysfsAccess->mockRealPathStatus = ZE_RESULT_ERROR_UNKNOWN;
uint32_t count = 0;
EXPECT_EQ(zesDeviceEnumRasErrorSets(device->toHandle(), &count, NULL), ZE_RESULT_SUCCESS);
EXPECT_EQ(count, 0u);
}
TEST_F(TestRasFabricFixture, GivenValidRasFabricAuxiliaryNodesThenGetStateIsSuccessful) {
std::vector<std::string> dirs = {"/mockRealPath/i915.iaf.0",
"/sys/module/iaf/drivers/auxiliary:iaf/"};
std::map<std::string, uint64_t> nodes = {
{"/mockRealPath/i915.iaf.0/sd.0/fw_comm_errors", 101},
{"/mockRealPath/i915.iaf.0/sd.0/sd_failure", 201},
{"/mockRealPath/i915.iaf.0/sd.0/fw_error", 301},
{"/mockRealPath/i915.iaf.0/sd.0/port.1/link_failures", 401},
{"/mockRealPath/i915.iaf.0/sd.0/port.1/link_degrades", 501},
};
static_cast<MockRasFabricFsAccess *>(pFsAccess.get())->setAccessibleDirectories(dirs);
static_cast<MockRasFabricFsAccess *>(pFsAccess.get())->setAccessibleNodes(nodes);
uint32_t count = 0;
ze_result_t result = zesDeviceEnumRasErrorSets(device->toHandle(), &count, NULL);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(count, mockHandleCount);
uint32_t testcount = count + 1;
result = zesDeviceEnumRasErrorSets(device->toHandle(), &testcount, NULL);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(testcount, mockHandleCount);
auto handles = getRasHandles(mockHandleCount);
for (auto handle : handles) {
EXPECT_NE(handle, nullptr);
zes_ras_state_t state = {};
zes_ras_properties_t properties = {};
EXPECT_EQ(ZE_RESULT_SUCCESS, zesRasGetState(handle, 0, &state));
EXPECT_EQ(ZE_RESULT_SUCCESS, zesRasGetProperties(handle, &properties));
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_CACHE_ERRORS], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_RESET], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_PROGRAMMING_ERRORS], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_NON_COMPUTE_ERRORS], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_DISPLAY_ERRORS], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_DRIVER_ERRORS], 0u);
if (properties.type == ZES_RAS_ERROR_TYPE_CORRECTABLE) {
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_COMPUTE_ERRORS], 602u);
}
if (properties.type == ZES_RAS_ERROR_TYPE_UNCORRECTABLE) {
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_COMPUTE_ERRORS], 903u);
}
}
}
TEST_F(TestRasFabricFixture, GivenSomeRasFabricNodesThenGetStateIsSuccessful) {
std::vector<std::string> dirs = {"/mockRealPath/iaf.31",
"/sys/module/iaf/drivers/platform:iaf/"};
std::map<std::string, uint64_t> nodes = {
{"/mockRealPath/iaf.31/sd.0/fw_comm_errors", 101},
{"/mockRealPath/iaf.31/sd.0/sd_failure", 201},
{"/mockRealPath/iaf.31/sd.0/fw_error", 301},
{"/mockRealPath/iaf.31/sd.0/port.1/link_failures", 401},
{"/mockRealPath/iaf.31/sd.0/port.2/link_failures", 601},
{"/mockRealPath/iaf.31/sd.0/port.2/link_degrades", 701},
{"/mockRealPath/iaf.31/sd.0/port.3/link_failures", 801},
{"/mockRealPath/iaf.31/sd.0/port.3/link_degrades", 901},
{"/mockRealPath/iaf.31/sd.0/port.4/link_failures", 1001},
{"/mockRealPath/iaf.31/sd.0/port.4/link_degrades", 1101},
{"/mockRealPath/iaf.31/sd.0/port.5/link_failures", 2101},
{"/mockRealPath/iaf.31/sd.0/port.5/link_degrades", 3101},
{"/mockRealPath/iaf.31/sd.0/port.6/link_failures", 4101},
{"/mockRealPath/iaf.31/sd.0/port.6/link_degrades", 5101},
{"/mockRealPath/iaf.31/sd.0/port.7/link_failures", 6101},
{"/mockRealPath/iaf.31/sd.0/port.7/link_degrades", 7101},
{"/mockRealPath/iaf.31/sd.0/port.8/link_degrades", 9101},
};
static_cast<MockRasFabricFsAccess *>(pFsAccess.get())->setAccessibleDirectories(dirs);
static_cast<MockRasFabricFsAccess *>(pFsAccess.get())->setAccessibleNodes(nodes);
uint32_t count = 0;
ze_result_t result = zesDeviceEnumRasErrorSets(device->toHandle(), &count, NULL);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(count, mockHandleCount);
uint32_t testcount = count + 1;
result = zesDeviceEnumRasErrorSets(device->toHandle(), &testcount, NULL);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(testcount, mockHandleCount);
auto handles = getRasHandles(mockHandleCount);
for (auto handle : handles) {
EXPECT_NE(handle, nullptr);
zes_ras_state_t state = {};
zes_ras_properties_t properties = {};
EXPECT_EQ(ZE_RESULT_SUCCESS, zesRasGetState(handle, 0, &state));
EXPECT_EQ(ZE_RESULT_SUCCESS, zesRasGetProperties(handle, &properties));
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_CACHE_ERRORS], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_RESET], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_PROGRAMMING_ERRORS], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_NON_COMPUTE_ERRORS], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_DISPLAY_ERRORS], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_DRIVER_ERRORS], 0u);
if (properties.type == ZES_RAS_ERROR_TYPE_CORRECTABLE) {
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_COMPUTE_ERRORS], 27709u - 501u);
}
if (properties.type == ZES_RAS_ERROR_TYPE_UNCORRECTABLE) {
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_COMPUTE_ERRORS], 23710u - 8101u);
}
}
}
TEST_F(TestRasFabricFixture, GivenValidRasFabricNodesWhenGetStateIsCalledTwiceThenRasErrorCountIsDoubled) {
std::vector<std::string> dirs = {"/mockRealPath/iaf.27",
"/sys/module/iaf/drivers/platform:iaf/"};
std::map<std::string, uint64_t> nodes = {
{"/mockRealPath/iaf.27/sd.0/fw_comm_errors", 101},
{"/mockRealPath/iaf.27/sd.0/sd_failure", 201},
{"/mockRealPath/iaf.27/sd.0/fw_error", 301},
{"/mockRealPath/iaf.27/sd.0/port.1/link_failures", 401},
{"/mockRealPath/iaf.27/sd.0/port.1/link_degrades", 501},
};
std::map<std::string, uint64_t> nodesSecondRead = {
{"/mockRealPath/iaf.27/sd.0/fw_comm_errors", 101 * 2},
{"/mockRealPath/iaf.27/sd.0/sd_failure", 201 * 2},
{"/mockRealPath/iaf.27/sd.0/fw_error", 301 * 2},
{"/mockRealPath/iaf.27/sd.0/port.1/link_failures", 401 * 2},
{"/mockRealPath/iaf.27/sd.0/port.1/link_degrades", 501 * 2},
};
static_cast<MockRasFabricFsAccess *>(pFsAccess.get())->setAccessibleDirectories(dirs);
static_cast<MockRasFabricFsAccess *>(pFsAccess.get())->setAccessibleNodes(nodes);
uint32_t count = 0;
ze_result_t result = zesDeviceEnumRasErrorSets(device->toHandle(), &count, NULL);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(count, mockHandleCount);
uint32_t testcount = count + 1;
result = zesDeviceEnumRasErrorSets(device->toHandle(), &testcount, NULL);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(testcount, mockHandleCount);
auto handles = getRasHandles(mockHandleCount);
for (auto handle : handles) {
EXPECT_NE(handle, nullptr);
zes_ras_state_t state = {};
EXPECT_EQ(ZE_RESULT_SUCCESS, zesRasGetState(handle, 0, &state));
}
static_cast<MockRasFabricFsAccess *>(pFsAccess.get())->setAccessibleNodes(nodesSecondRead);
for (auto handle : handles) {
EXPECT_NE(handle, nullptr);
zes_ras_state_t state = {};
zes_ras_properties_t properties = {};
EXPECT_EQ(ZE_RESULT_SUCCESS, zesRasGetState(handle, 0, &state));
EXPECT_EQ(ZE_RESULT_SUCCESS, zesRasGetProperties(handle, &properties));
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_CACHE_ERRORS], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_RESET], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_PROGRAMMING_ERRORS], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_NON_COMPUTE_ERRORS], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_DISPLAY_ERRORS], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_DRIVER_ERRORS], 0u);
if (properties.type == ZES_RAS_ERROR_TYPE_CORRECTABLE) {
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_COMPUTE_ERRORS], 602u * 2);
}
if (properties.type == ZES_RAS_ERROR_TYPE_UNCORRECTABLE) {
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_COMPUTE_ERRORS], 903u * 2);
}
}
}
TEST_F(TestRasFabricFixture, GivenValidRasFabricNodesWhenGetStateIsCalledTwiceWithClearThenNewRasErrorCountIsRetrieved) {
std::vector<std::string> dirs = {"/mockRealPath/iaf.27",
"/sys/module/iaf/drivers/platform:iaf/"};
std::map<std::string, uint64_t> nodes = {
{"/mockRealPath/iaf.27/sd.0/fw_comm_errors", 101},
{"/mockRealPath/iaf.27/sd.0/sd_failure", 201},
{"/mockRealPath/iaf.27/sd.0/fw_error", 301},
{"/mockRealPath/iaf.27/sd.0/port.1/link_failures", 401},
{"/mockRealPath/iaf.27/sd.0/port.1/link_degrades", 501},
};
std::map<std::string, uint64_t> nodesSecondRead = {
{"/mockRealPath/iaf.27/sd.0/fw_comm_errors", 101 * 2},
{"/mockRealPath/iaf.27/sd.0/sd_failure", 201 * 2},
{"/mockRealPath/iaf.27/sd.0/fw_error", 301 * 2},
{"/mockRealPath/iaf.27/sd.0/port.1/link_failures", 401 * 2},
{"/mockRealPath/iaf.27/sd.0/port.1/link_degrades", 501 * 2},
};
static_cast<MockRasFabricFsAccess *>(pFsAccess.get())->setAccessibleDirectories(dirs);
static_cast<MockRasFabricFsAccess *>(pFsAccess.get())->setAccessibleNodes(nodes);
uint32_t count = 0;
ze_result_t result = zesDeviceEnumRasErrorSets(device->toHandle(), &count, NULL);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(count, mockHandleCount);
uint32_t testcount = count + 1;
result = zesDeviceEnumRasErrorSets(device->toHandle(), &testcount, NULL);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(testcount, mockHandleCount);
auto handles = getRasHandles(mockHandleCount);
for (auto handle : handles) {
EXPECT_NE(handle, nullptr);
zes_ras_state_t state = {};
EXPECT_EQ(ZE_RESULT_SUCCESS, zesRasGetState(handle, 1, &state));
}
static_cast<MockRasFabricFsAccess *>(pFsAccess.get())->setAccessibleNodes(nodesSecondRead);
for (auto handle : handles) {
EXPECT_NE(handle, nullptr);
zes_ras_state_t state = {};
zes_ras_properties_t properties = {};
EXPECT_EQ(ZE_RESULT_SUCCESS, zesRasGetState(handle, 0, &state));
EXPECT_EQ(ZE_RESULT_SUCCESS, zesRasGetProperties(handle, &properties));
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_CACHE_ERRORS], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_RESET], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_PROGRAMMING_ERRORS], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_NON_COMPUTE_ERRORS], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_DISPLAY_ERRORS], 0u);
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_DRIVER_ERRORS], 0u);
if (properties.type == ZES_RAS_ERROR_TYPE_CORRECTABLE) {
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_COMPUTE_ERRORS], 602u);
}
if (properties.type == ZES_RAS_ERROR_TYPE_UNCORRECTABLE) {
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_COMPUTE_ERRORS], 903u);
}
}
}
class SysmanRasFabricMultiDeviceFixture : public MultiDeviceFixture, public ::testing::Test {
public:
void SetUp() override {
if (!sysmanUltsEnable) {
GTEST_SKIP();
}
MultiDeviceFixture::setUp();
for (auto &device : driverHandle->devices) {
auto neoDevice = device->getNEODevice();
neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]->osInterface = std::make_unique<NEO::OSInterface>();
auto &osInterface = device->getOsInterface();
osInterface.setDriverModel(std::make_unique<SysmanMockDrm>(const_cast<NEO::RootDeviceEnvironment &>(neoDevice->getRootDeviceEnvironment())));
setenv("ZES_ENABLE_SYSMAN", "1", 1);
delete device->getSysmanHandle();
device->setSysmanHandle(new SysmanDeviceImp(device->toHandle()));
auto pSysmanDevice = device->getSysmanHandle();
for (auto &subDevice : static_cast<DeviceImp *>(device)->subDevices) {
static_cast<DeviceImp *>(subDevice)->setSysmanHandle(pSysmanDevice);
}
auto pSysmanDeviceImp = static_cast<SysmanDeviceImp *>(pSysmanDevice);
auto pOsSysman = pSysmanDeviceImp->pOsSysman;
auto pLinuxSysmanImp = static_cast<PublicLinuxSysmanImp *>(pOsSysman);
pSysmanDeviceImp->init();
delete pLinuxSysmanImp->pFwUtilInterface;
delete pLinuxSysmanImp->pSysfsAccess;
delete pLinuxSysmanImp->pProcfsAccess;
delete pLinuxSysmanImp->pFsAccess;
auto pProcfsAccess = new MockLinuxProcfsAccess();
auto pFsAccess = new MockRasFabricFsAccess();
auto pSysfsAccess = new MockRasFabricSysFsAccess();
pLinuxSysmanImp->pFwUtilInterface = nullptr;
pLinuxSysmanImp->pSysfsAccess = pSysfsAccess;
pLinuxSysmanImp->pProcfsAccess = pProcfsAccess;
pLinuxSysmanImp->pFsAccess = pFsAccess;
}
}
void TearDown() override {
if (!sysmanUltsEnable) {
GTEST_SKIP();
}
for (auto &device : driverHandle->devices) {
auto pSysmanDevice = device->getSysmanHandle();
auto pSysmanDeviceImp = static_cast<SysmanDeviceImp *>(pSysmanDevice);
auto pOsSysman = pSysmanDeviceImp->pOsSysman;
auto pLinuxSysmanImp = static_cast<PublicLinuxSysmanImp *>(pOsSysman);
delete pLinuxSysmanImp->pSysfsAccess;
delete pLinuxSysmanImp->pProcfsAccess;
delete pLinuxSysmanImp->pFsAccess;
pLinuxSysmanImp->pFwUtilInterface = nullptr;
pLinuxSysmanImp->pSysfsAccess = nullptr;
pLinuxSysmanImp->pProcfsAccess = nullptr;
pLinuxSysmanImp->pFsAccess = nullptr;
delete pSysmanDevice;
device->setSysmanHandle(nullptr);
}
unsetenv("ZES_ENABLE_SYSMAN");
MultiDeviceFixture::tearDown();
}
};
TEST_F(SysmanRasFabricMultiDeviceFixture, GivenValidRasFabricNodesForMultipleDevicesThenGetStateReturnsErrorCountSpecificToEachOfDevice) {
const uint32_t testUseSubDeviceCount = 2u;
ASSERT_GE(numRootDevices, 2u);
ASSERT_GE(numSubDevices, testUseSubDeviceCount);
std::vector<std::string> dirs = {"/mockRealPath/iaf.27",
"/sys/module/iaf/drivers/platform:iaf/"};
{
std::map<std::string, uint64_t> nodes = {
{"/mockRealPath/iaf.27/sd.0/fw_comm_errors", 1},
{"/mockRealPath/iaf.27/sd.0/sd_failure", 1},
{"/mockRealPath/iaf.27/sd.0/fw_error", 1},
{"/mockRealPath/iaf.27/sd.0/port.1/link_failures", 1},
{"/mockRealPath/iaf.27/sd.0/port.1/link_degrades", 1},
{"/mockRealPath/iaf.27/sd.1/fw_comm_errors", 2},
{"/mockRealPath/iaf.27/sd.1/sd_failure", 2},
{"/mockRealPath/iaf.27/sd.1/fw_error", 2},
{"/mockRealPath/iaf.27/sd.1/port.1/link_failures", 2},
{"/mockRealPath/iaf.27/sd.1/port.1/link_degrades", 2},
};
auto pOsSysman = static_cast<SysmanDeviceImp *>(driverHandle->devices[0]->getSysmanHandle())->pOsSysman;
auto pLinuxSysmanImp = static_cast<PublicLinuxSysmanImp *>(pOsSysman);
static_cast<MockRasFabricFsAccess *>(pLinuxSysmanImp->pFsAccess)->setAccessibleDirectories(dirs);
static_cast<MockRasFabricFsAccess *>(pLinuxSysmanImp->pFsAccess)->setAccessibleNodes(nodes);
}
{
std::map<std::string, uint64_t> nodes = {
{"/mockRealPath/iaf.27/sd.0/fw_comm_errors", 3},
{"/mockRealPath/iaf.27/sd.0/sd_failure", 3},
{"/mockRealPath/iaf.27/sd.0/fw_error", 3},
{"/mockRealPath/iaf.27/sd.0/port.1/link_failures", 3},
{"/mockRealPath/iaf.27/sd.0/port.1/link_degrades", 3},
{"/mockRealPath/iaf.27/sd.1/fw_comm_errors", 4},
{"/mockRealPath/iaf.27/sd.1/sd_failure", 4},
{"/mockRealPath/iaf.27/sd.1/fw_error", 4},
{"/mockRealPath/iaf.27/sd.1/port.1/link_failures", 4},
{"/mockRealPath/iaf.27/sd.1/port.1/link_degrades", 4},
};
auto pOsSysman = static_cast<SysmanDeviceImp *>(driverHandle->devices[1]->getSysmanHandle())->pOsSysman;
auto pLinuxSysmanImp = static_cast<PublicLinuxSysmanImp *>(pOsSysman);
static_cast<MockRasFabricFsAccess *>(pLinuxSysmanImp->pFsAccess)->setAccessibleDirectories(dirs);
static_cast<MockRasFabricFsAccess *>(pLinuxSysmanImp->pFsAccess)->setAccessibleNodes(nodes);
}
const std::vector<std::pair<uint32_t, uint32_t>> errorCounts{
{2, 3}, // Device 0, subdevice 0
{4, 6}, // Device 0, subdevice 1
{6, 9}, // Device 1, subdevice 0
{8, 12}, // Device 1, subdevice 1
};
for (uint32_t deviceIndex = 0; deviceIndex < testUseSubDeviceCount; deviceIndex++) {
uint32_t count = 0;
auto hDevice = driverHandle->devices[deviceIndex]->toHandle();
EXPECT_EQ(zesDeviceEnumRasErrorSets(hDevice, &count, NULL), ZE_RESULT_SUCCESS);
EXPECT_GT(count, 0u);
std::vector<zes_ras_handle_t> handles(count, nullptr);
EXPECT_EQ(zesDeviceEnumRasErrorSets(hDevice, &count, handles.data()), ZE_RESULT_SUCCESS);
for (auto handle : handles) {
zes_ras_state_t state = {};
zes_ras_properties_t properties = {};
EXPECT_EQ(ZE_RESULT_SUCCESS, zesRasGetState(handle, 0, &state));
EXPECT_EQ(ZE_RESULT_SUCCESS, zesRasGetProperties(handle, &properties));
const auto accessIndex = deviceIndex * testUseSubDeviceCount + properties.subdeviceId;
if (properties.type == ZES_RAS_ERROR_TYPE_CORRECTABLE) {
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_COMPUTE_ERRORS], errorCounts[accessIndex].first);
}
if (properties.type == ZES_RAS_ERROR_TYPE_UNCORRECTABLE) {
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_COMPUTE_ERRORS], errorCounts[accessIndex].second);
}
}
}
}
} // namespace ult
} // namespace L0