feature(sysman): Added L3 Fabric Errors
Related-To: NEO-8560 Signed-off-by: Bari, Pratik <pratik.bari@intel.com>
This commit is contained in:
parent
fb9d225495
commit
00d36b5cee
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2020-2023 Intel Corporation
|
||||
* Copyright (C) 2020-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -95,7 +95,7 @@ class LinuxRasSourceGt : public LinuxRasSources {
|
|||
}
|
||||
int64_t groupFd = -1;
|
||||
std::vector<int64_t> memberFds = {};
|
||||
uint64_t initialErrorCount[maxRasErrorCategoryCount] = {0};
|
||||
uint64_t initialErrorCount[maxRasErrorCategoryExpCount] = {0};
|
||||
uint32_t clearStatus = 0;
|
||||
std::map<zes_ras_error_category_exp_t, uint64_t> errorCategoryToEventCount;
|
||||
bool isSubdevice = false;
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2021-2023 Intel Corporation
|
||||
* Copyright (C) 2021-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -12,6 +12,7 @@
|
|||
#include "level_zero/tools/source/sysman/sysman_imp.h"
|
||||
|
||||
namespace L0 {
|
||||
|
||||
static const std::map<zes_ras_error_category_exp_t, std::vector<std::string>> categoryToListOfEventsUncorrectable = {
|
||||
{ZES_RAS_ERROR_CATEGORY_EXP_CACHE_ERRORS,
|
||||
{"fatal-array-bist", "fatal-idi-parity", "fatal-l3-double",
|
||||
|
@ -27,16 +28,18 @@ static const std::map<zes_ras_error_category_exp_t, std::vector<std::string>> ca
|
|||
"sgunit-fatal", "soc-nonfatal-punit", "sgunit-fatal", "sgunit-nonfatal", "gsc-nonfatal-mia-shutdown",
|
||||
"gsc-nonfatal-aon-parity", "gsc-nonfatal-rom-parity", "gsc-nonfatal-fuse-crc-check",
|
||||
"gsc-nonfatal-selfmbist", "gsc-nonfatal-fuse-pull", "gsc-nonfatal-sram-ecc", "gsc-nonfatal-glitch-det",
|
||||
"gsc-nonfatal-ucode-parity", "gsc-nonfatal-mia-int", "gsc-nonfatal-wdg-timeout", "soc-fatal-mdfi-east",
|
||||
"soc-fatal-mdfi-south", "soc-nonfatal-mdfi-east", "soc-nonfatal-mdfi-south", "soc-fatal-mdfi-west",
|
||||
"soc-fatal-cd0-mdfi", "soc-nonfatal-cd0-mdfi"}},
|
||||
"gsc-nonfatal-ucode-parity", "gsc-nonfatal-mia-int", "gsc-nonfatal-wdg-timeout",
|
||||
"soc-nonfatal-mdfi-east", "soc-nonfatal-mdfi-south",
|
||||
"soc-nonfatal-cd0-mdfi"}},
|
||||
{ZES_RAS_ERROR_CATEGORY_EXP_COMPUTE_ERRORS,
|
||||
{"fatal-fpu", "fatal-eu-grf", "fatal-sampler", "fatal-slm",
|
||||
"fatal-guc", "fatal-eu-ic", "fatal-subslice", "fatal-l3-fabric"}},
|
||||
"fatal-guc", "fatal-eu-ic", "fatal-subslice"}},
|
||||
{ZES_RAS_ERROR_CATEGORY_EXP_DRIVER_ERRORS,
|
||||
{"driver-object-migration", "driver-engine-other", "driver-ggtt",
|
||||
"driver-gt-interrupt", "driver-gt-other", "driver-guc-communication",
|
||||
"driver-rps"}}};
|
||||
"driver-rps"}},
|
||||
{ZES_RAS_ERROR_CATEGORY_EXP_L3FABRIC_ERRORS,
|
||||
{"soc-fatal-mdfi-east", "soc-fatal-mdfi-south", "soc-fatal-mdfi-west", "fatal-l3-fabric", "soc-fatal-cd0-mdfi"}}};
|
||||
|
||||
static const std::map<zes_ras_error_category_exp_t, std::vector<std::string>> categoryToListOfEventsCorrectable = {
|
||||
{ZES_RAS_ERROR_CATEGORY_EXP_CACHE_ERRORS,
|
||||
|
@ -53,6 +56,14 @@ static void closeFd(int64_t &fd) {
|
|||
}
|
||||
}
|
||||
|
||||
static const std::map<zes_ras_error_category_exp_t, zes_ras_error_cat_t> rasErrorCatExpToErrorCat = {
|
||||
{ZES_RAS_ERROR_CATEGORY_EXP_CACHE_ERRORS, ZES_RAS_ERROR_CAT_CACHE_ERRORS},
|
||||
{ZES_RAS_ERROR_CATEGORY_EXP_RESET, ZES_RAS_ERROR_CAT_RESET},
|
||||
{ZES_RAS_ERROR_CATEGORY_EXP_PROGRAMMING_ERRORS, ZES_RAS_ERROR_CAT_PROGRAMMING_ERRORS},
|
||||
{ZES_RAS_ERROR_CATEGORY_EXP_NON_COMPUTE_ERRORS, ZES_RAS_ERROR_CAT_NON_COMPUTE_ERRORS},
|
||||
{ZES_RAS_ERROR_CATEGORY_EXP_COMPUTE_ERRORS, ZES_RAS_ERROR_CAT_COMPUTE_ERRORS},
|
||||
{ZES_RAS_ERROR_CATEGORY_EXP_DRIVER_ERRORS, ZES_RAS_ERROR_CAT_DRIVER_ERRORS}};
|
||||
|
||||
static ze_result_t readI915EventsDirectory(LinuxSysmanImp *pLinuxSysmanImp, std::vector<std::string> &listOfEvents, std::string *eventDirectory) {
|
||||
// To know how many errors are supported on a platform scan
|
||||
// /sys/devices/i915_0000_01_00.0/events/
|
||||
|
@ -150,7 +161,7 @@ ze_result_t LinuxRasSourceGt::osRasGetState(zes_ras_state_t &state, ze_bool_t cl
|
|||
if (clear == true) {
|
||||
closeFds();
|
||||
memset(state.category, 0, maxRasErrorCategoryCount * sizeof(uint64_t));
|
||||
memset(initialErrorCount, 0, maxRasErrorCategoryCount * sizeof(uint64_t));
|
||||
memset(initialErrorCount, 0, maxRasErrorCategoryExpCount * sizeof(uint64_t));
|
||||
}
|
||||
initRasErrors(clear);
|
||||
// Iterate over all the file descriptor values present in vector which is mapped to given ras error category
|
||||
|
@ -167,6 +178,10 @@ ze_result_t LinuxRasSourceGt::osRasGetState(zes_ras_state_t &state, ze_bool_t cl
|
|||
/* The data buffer retrieved after reading pmu counters is parsed to get the error count for each suberror category */
|
||||
uint64_t initialIndex = 2; // Initial index in the buffer from which the data be parsed begins
|
||||
for (auto errorCat = errorCategoryToEventCount.begin(); errorCat != errorCategoryToEventCount.end(); errorCat++) {
|
||||
auto errorCategory = rasErrorCatExpToErrorCat.find(errorCat->first);
|
||||
if (errorCategory == rasErrorCatExpToErrorCat.end()) {
|
||||
continue;
|
||||
}
|
||||
uint64_t errorCount = 0;
|
||||
uint64_t j = 0;
|
||||
for (; j < errorCat->second; j++) {
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2020-2023 Intel Corporation
|
||||
* Copyright (C) 2020-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -15,6 +15,7 @@ const std::string guid64BitMemoryCounters("0xb15a0ede");
|
|||
constexpr uint32_t mbpsToBytesPerSecond = 125000;
|
||||
constexpr double milliVoltsFactor = 1000.0;
|
||||
constexpr uint32_t maxRasErrorCategoryCount = 7;
|
||||
constexpr uint32_t maxRasErrorCategoryExpCount = 10;
|
||||
namespace L0 {
|
||||
struct SteadyClock {
|
||||
typedef std::chrono::duration<uint64_t, std::milli> duration;
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2022-2023 Intel Corporation
|
||||
* Copyright (C) 2022-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -60,9 +60,9 @@ constexpr uint64_t driverEngineOther = 3u;
|
|||
constexpr uint64_t initialUncorrectableCacheErrors = 2u;
|
||||
constexpr uint64_t initialEngineReset = 2u;
|
||||
constexpr uint64_t initialProgrammingErrors = 7u;
|
||||
constexpr uint64_t initialUncorrectableNonComputeErrors = 8u;
|
||||
constexpr uint64_t initialUncorrectableNonComputeErrors = 3u;
|
||||
constexpr uint64_t initialUncorrectableFabricErrors = 8u;
|
||||
constexpr uint64_t initialUncorrectableComputeErrors = 10u;
|
||||
constexpr uint64_t initialUncorrectableComputeErrors = 7u;
|
||||
constexpr uint64_t initialCorrectableComputeErrors = 6u;
|
||||
constexpr uint64_t initialUncorrectableDriverErrors = 5u;
|
||||
|
||||
|
@ -70,9 +70,9 @@ constexpr uint64_t initialUncorrectableCacheErrorsTile0 = 2u;
|
|||
constexpr uint64_t initialCorrectableCacheErrorTile0 = 2u;
|
||||
constexpr uint64_t initialEngineResetTile0 = 2u;
|
||||
constexpr uint64_t initialProgrammingErrorsTile0 = 7u;
|
||||
constexpr uint64_t initialUncorrectableNonComputeErrorsTile0 = 15u;
|
||||
constexpr uint64_t initialUncorrectableNonComputeErrorsTile0 = 10u;
|
||||
constexpr uint64_t initialCorrectableNonComputeErrorsTile0 = 2u;
|
||||
constexpr uint64_t initialUncorrectableComputeErrorsTile0 = 11u;
|
||||
constexpr uint64_t initialUncorrectableComputeErrorsTile0 = 8u;
|
||||
constexpr uint64_t initialUncorrectableFabricErrorsTile0 = 8u;
|
||||
constexpr uint64_t initialCorrectableComputeErrorsTile0 = 6u;
|
||||
constexpr uint64_t initialUncorrectableDriverErrorsTile0 = 5u;
|
||||
|
@ -80,7 +80,7 @@ constexpr uint64_t initialUncorrectableCacheErrorsTile1 = 1u;
|
|||
constexpr uint64_t initialEngineResetTile1 = 4u;
|
||||
constexpr uint64_t initialProgrammingErrorsTile1 = 5u;
|
||||
constexpr uint64_t initialCorrectableComputeErrorsTile1 = 7u;
|
||||
constexpr uint64_t initialUncorrectableNonComputeErrorsTile1 = 5u;
|
||||
constexpr uint64_t initialUncorrectableNonComputeErrorsTile1 = 3u;
|
||||
constexpr uint64_t initialUncorrectableFabricErrorsTile1 = 2u;
|
||||
constexpr uint64_t initialUncorrectableComputeErrorsTile1 = 6u;
|
||||
constexpr uint64_t initialUncorrectableDriverErrorsTile1 = 4u;
|
||||
|
@ -137,11 +137,11 @@ struct MockRasPmuInterfaceImp : public PmuInterfaceImp {
|
|||
data[5] = driverGgtt;
|
||||
data[6] = driverRps;
|
||||
data[7] = 0;
|
||||
data[8] = 0;
|
||||
data[9] = fatalEuErrorCount;
|
||||
data[10] = socFatalPsfCsc0Count;
|
||||
data[11] = socFatalMdfiEastCount;
|
||||
data[12] = fatalTlb;
|
||||
data[8] = fatalEuErrorCount;
|
||||
data[9] = socFatalPsfCsc0Count;
|
||||
data[10] = fatalTlb;
|
||||
data[11] = 0;
|
||||
data[12] = socFatalMdfiEastCount;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -164,14 +164,14 @@ struct MockRasPmuInterfaceImp : public PmuInterfaceImp {
|
|||
data[5] = driverGgtt;
|
||||
data[6] = driverRps;
|
||||
data[7] = 0;
|
||||
data[8] = 0;
|
||||
data[9] = fatalSubslice;
|
||||
data[10] = fatalEuErrorCount;
|
||||
data[11] = socFatalPsfCsc0Count;
|
||||
data[12] = socFatalMdfiEastCount;
|
||||
data[13] = nonFatalGscAonParity;
|
||||
data[14] = nonFataGscSelfmBist;
|
||||
data[15] = fatalTlb;
|
||||
data[8] = fatalSubslice;
|
||||
data[9] = fatalEuErrorCount;
|
||||
data[10] = socFatalPsfCsc0Count;
|
||||
data[11] = nonFatalGscAonParity;
|
||||
data[12] = nonFataGscSelfmBist;
|
||||
data[13] = fatalTlb;
|
||||
data[14] = 0;
|
||||
data[15] = socFatalMdfiEastCount;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -192,10 +192,10 @@ struct MockRasPmuInterfaceImp : public PmuInterfaceImp {
|
|||
data[4] = driverMigration;
|
||||
data[5] = driverEngineOther;
|
||||
data[6] = fatalGucErrorCountTile1;
|
||||
data[7] = socFatalMdfiWestCountTile1;
|
||||
data[8] = socFatalPunitTile1;
|
||||
data[9] = fatalIdiParityErrorCountTile1;
|
||||
data[10] = fatalL3BankTile1;
|
||||
data[7] = socFatalPunitTile1;
|
||||
data[8] = fatalIdiParityErrorCountTile1;
|
||||
data[9] = fatalL3BankTile1;
|
||||
data[10] = socFatalMdfiWestCountTile1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2022-2023 Intel Corporation
|
||||
* Copyright (C) 2022-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -244,7 +244,7 @@ TEST_F(SysmanRasFixture, GivenValidRasHandleWhenCallingzesRasGetStateForGtThenSu
|
|||
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_RESET], fatalEngineResetCount + initialEngineReset);
|
||||
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_PROGRAMMING_ERRORS], euAttention + initialProgrammingErrors);
|
||||
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_COMPUTE_ERRORS], fatalEuErrorCount + initialUncorrectableComputeErrors);
|
||||
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_NON_COMPUTE_ERRORS], socFatalPsfCsc0Count + socFatalMdfiEastCount + initialUncorrectableNonComputeErrors);
|
||||
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_NON_COMPUTE_ERRORS], socFatalPsfCsc0Count + initialUncorrectableNonComputeErrors);
|
||||
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_DISPLAY_ERRORS], 0u);
|
||||
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_DRIVER_ERRORS], driverMigration + driverGgtt + driverRps + initialUncorrectableDriverErrors);
|
||||
}
|
||||
|
@ -281,7 +281,7 @@ TEST_F(SysmanRasFixture, GivenValidRasHandleWhenCallingzesRasGeStateForGtAfterCl
|
|||
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_RESET], fatalEngineResetCount + initialEngineReset);
|
||||
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_PROGRAMMING_ERRORS], euAttention + initialProgrammingErrors);
|
||||
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_COMPUTE_ERRORS], fatalEuErrorCount + initialUncorrectableComputeErrors);
|
||||
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_NON_COMPUTE_ERRORS], socFatalMdfiEastCount + socFatalPsfCsc0Count + initialUncorrectableNonComputeErrors);
|
||||
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_NON_COMPUTE_ERRORS], socFatalPsfCsc0Count + initialUncorrectableNonComputeErrors);
|
||||
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_DISPLAY_ERRORS], 0u);
|
||||
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_DRIVER_ERRORS], driverMigration + driverGgtt + driverRps + initialUncorrectableDriverErrors);
|
||||
}
|
||||
|
@ -720,7 +720,7 @@ TEST_F(SysmanRasMultiDeviceFixture, GivenValidRasHandleWhenCallingzesRasGeStateF
|
|||
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_RESET], fatalEngineResetCount + initialEngineResetTile0);
|
||||
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_PROGRAMMING_ERRORS], euAttention + initialProgrammingErrorsTile0);
|
||||
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_COMPUTE_ERRORS], fatalSubslice + fatalEuErrorCount + initialUncorrectableComputeErrorsTile0);
|
||||
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_NON_COMPUTE_ERRORS], socFatalMdfiEastCount + socFatalPsfCsc0Count + nonFatalGscAonParity + nonFataGscSelfmBist + initialUncorrectableNonComputeErrorsTile0);
|
||||
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_NON_COMPUTE_ERRORS], socFatalPsfCsc0Count + nonFatalGscAonParity + nonFataGscSelfmBist + initialUncorrectableNonComputeErrorsTile0);
|
||||
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_DISPLAY_ERRORS], 0u);
|
||||
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_DRIVER_ERRORS], driverMigration + driverGgtt + driverRps + initialUncorrectableDriverErrorsTile0);
|
||||
} else if (handleIndex == 2u) {
|
||||
|
@ -736,7 +736,7 @@ TEST_F(SysmanRasMultiDeviceFixture, GivenValidRasHandleWhenCallingzesRasGeStateF
|
|||
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_RESET], fatalEngineResetCountTile1 + initialEngineResetTile1);
|
||||
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_PROGRAMMING_ERRORS], euAttentionTile1 + initialProgrammingErrorsTile1);
|
||||
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_COMPUTE_ERRORS], fatalGucErrorCountTile1 + initialUncorrectableComputeErrorsTile1);
|
||||
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_NON_COMPUTE_ERRORS], socFatalMdfiWestCountTile1 + socFatalPunitTile1 + initialUncorrectableNonComputeErrorsTile1);
|
||||
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_NON_COMPUTE_ERRORS], socFatalPunitTile1 + initialUncorrectableNonComputeErrorsTile1);
|
||||
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_DISPLAY_ERRORS], 0u);
|
||||
EXPECT_EQ(state.category[ZES_RAS_ERROR_CAT_DRIVER_ERRORS], driverMigration + driverEngineOther + initialUncorrectableDriverErrorsTile1);
|
||||
}
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2023 Intel Corporation
|
||||
* Copyright (C) 2023-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -125,7 +125,7 @@ TEST_F(SysmanRasExpFixture, GivenValidRasHandleWhenCallingZesRasGetStateExpForGt
|
|||
expectedErrCount = fatalEuErrorCount + initialUncorrectableComputeErrors;
|
||||
EXPECT_EQ(rasStates[i].errorCounter, expectedErrCount);
|
||||
} else if (rasStates[i].category == ZES_RAS_ERROR_CATEGORY_EXP_NON_COMPUTE_ERRORS) {
|
||||
expectedErrCount = socFatalPsfCsc0Count + socFatalMdfiEastCount + initialUncorrectableNonComputeErrors;
|
||||
expectedErrCount = socFatalPsfCsc0Count + initialUncorrectableNonComputeErrors;
|
||||
EXPECT_EQ(rasStates[i].errorCounter, expectedErrCount);
|
||||
} else if (rasStates[i].category == ZES_RAS_ERROR_CATEGORY_EXP_PROGRAMMING_ERRORS) {
|
||||
expectedErrCount = euAttention + initialProgrammingErrors;
|
||||
|
@ -133,6 +133,9 @@ TEST_F(SysmanRasExpFixture, GivenValidRasHandleWhenCallingZesRasGetStateExpForGt
|
|||
} else if (rasStates[i].category == ZES_RAS_ERROR_CATEGORY_EXP_DRIVER_ERRORS) {
|
||||
expectedErrCount = driverMigration + driverGgtt + driverRps + initialUncorrectableDriverErrors;
|
||||
EXPECT_EQ(rasStates[i].errorCounter, expectedErrCount);
|
||||
} else if (rasStates[i].category == ZES_RAS_ERROR_CATEGORY_EXP_L3FABRIC_ERRORS) {
|
||||
expectedErrCount = socFatalMdfiEastCount + initialUncorrectableFabricErrors;
|
||||
EXPECT_EQ(rasStates[i].errorCounter, expectedErrCount);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -194,7 +197,7 @@ TEST_F(SysmanRasExpFixture, GivenValidRasHandleWhenCallingZesRasGetStateExpForGt
|
|||
expectedErrCount = fatalEuErrorCount + initialUncorrectableComputeErrors;
|
||||
EXPECT_EQ(rasStates[i].errorCounter, expectedErrCount);
|
||||
} else if (rasStates[i].category == ZES_RAS_ERROR_CATEGORY_EXP_NON_COMPUTE_ERRORS) {
|
||||
expectedErrCount = socFatalPsfCsc0Count + socFatalMdfiEastCount + initialUncorrectableNonComputeErrors;
|
||||
expectedErrCount = socFatalPsfCsc0Count + initialUncorrectableNonComputeErrors;
|
||||
EXPECT_EQ(rasStates[i].errorCounter, expectedErrCount);
|
||||
} else if (rasStates[i].category == ZES_RAS_ERROR_CATEGORY_EXP_PROGRAMMING_ERRORS) {
|
||||
expectedErrCount = euAttention + initialProgrammingErrors;
|
||||
|
@ -202,6 +205,9 @@ TEST_F(SysmanRasExpFixture, GivenValidRasHandleWhenCallingZesRasGetStateExpForGt
|
|||
} else if (rasStates[i].category == ZES_RAS_ERROR_CATEGORY_EXP_DRIVER_ERRORS) {
|
||||
expectedErrCount = driverMigration + driverGgtt + driverRps + initialUncorrectableDriverErrors;
|
||||
EXPECT_EQ(rasStates[i].errorCounter, expectedErrCount);
|
||||
} else if (rasStates[i].category == ZES_RAS_ERROR_CATEGORY_EXP_L3FABRIC_ERRORS) {
|
||||
expectedErrCount = socFatalMdfiEastCount + initialUncorrectableFabricErrors;
|
||||
EXPECT_EQ(rasStates[i].errorCounter, expectedErrCount);
|
||||
}
|
||||
}
|
||||
EXPECT_EQ(numCategoriesRetrieved, requestedCount);
|
||||
|
@ -501,7 +507,7 @@ TEST_F(SysmanRasExpFixture, GivenValidRasHandleWhenCallingzesRasClearStateExpAnd
|
|||
expectedErrCount = fatalEuErrorCount + initialUncorrectableComputeErrors;
|
||||
EXPECT_EQ(rasStates[i].errorCounter, expectedErrCount);
|
||||
} else if (rasStates[i].category == ZES_RAS_ERROR_CATEGORY_EXP_NON_COMPUTE_ERRORS) {
|
||||
expectedErrCount = socFatalPsfCsc0Count + socFatalMdfiEastCount + initialUncorrectableNonComputeErrors;
|
||||
expectedErrCount = socFatalPsfCsc0Count + initialUncorrectableNonComputeErrors;
|
||||
EXPECT_EQ(rasStates[i].errorCounter, expectedErrCount);
|
||||
} else if (rasStates[i].category == ZES_RAS_ERROR_CATEGORY_EXP_PROGRAMMING_ERRORS) {
|
||||
expectedErrCount = euAttention + initialProgrammingErrors;
|
||||
|
@ -509,6 +515,9 @@ TEST_F(SysmanRasExpFixture, GivenValidRasHandleWhenCallingzesRasClearStateExpAnd
|
|||
} else if (rasStates[i].category == ZES_RAS_ERROR_CATEGORY_EXP_DRIVER_ERRORS) {
|
||||
expectedErrCount = driverMigration + driverGgtt + driverRps + initialUncorrectableDriverErrors;
|
||||
EXPECT_EQ(rasStates[i].errorCounter, expectedErrCount);
|
||||
} else if (rasStates[i].category == ZES_RAS_ERROR_CATEGORY_EXP_L3FABRIC_ERRORS) {
|
||||
expectedErrCount = socFatalMdfiEastCount + initialUncorrectableFabricErrors;
|
||||
EXPECT_EQ(rasStates[i].errorCounter, expectedErrCount);
|
||||
}
|
||||
}
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, zesRasClearStateExp(handle, ZES_RAS_ERROR_CATEGORY_EXP_NON_COMPUTE_ERRORS));
|
||||
|
@ -797,7 +806,7 @@ TEST_F(SysmanRasExpMultiDeviceFixture, GivenValidRasHandleWhenCallingZesRasGetSt
|
|||
expectedErrCount = fatalSubslice + fatalEuErrorCount + initialUncorrectableComputeErrorsTile0;
|
||||
EXPECT_EQ(rasStates[i].errorCounter, expectedErrCount);
|
||||
} else if (rasStates[i].category == ZES_RAS_ERROR_CATEGORY_EXP_NON_COMPUTE_ERRORS) {
|
||||
expectedErrCount = socFatalPsfCsc0Count + socFatalMdfiEastCount + nonFatalGscAonParity + nonFataGscSelfmBist + initialUncorrectableNonComputeErrorsTile0;
|
||||
expectedErrCount = socFatalPsfCsc0Count + nonFatalGscAonParity + nonFataGscSelfmBist + initialUncorrectableNonComputeErrorsTile0;
|
||||
EXPECT_EQ(rasStates[i].errorCounter, expectedErrCount);
|
||||
} else if (rasStates[i].category == ZES_RAS_ERROR_CATEGORY_EXP_PROGRAMMING_ERRORS) {
|
||||
expectedErrCount = euAttention + initialProgrammingErrors;
|
||||
|
@ -805,6 +814,9 @@ TEST_F(SysmanRasExpMultiDeviceFixture, GivenValidRasHandleWhenCallingZesRasGetSt
|
|||
} else if (rasStates[i].category == ZES_RAS_ERROR_CATEGORY_EXP_DRIVER_ERRORS) {
|
||||
expectedErrCount = driverMigration + driverGgtt + driverRps + initialUncorrectableDriverErrors;
|
||||
EXPECT_EQ(rasStates[i].errorCounter, expectedErrCount);
|
||||
} else if (rasStates[i].category == ZES_RAS_ERROR_CATEGORY_EXP_L3FABRIC_ERRORS) {
|
||||
expectedErrCount = socFatalMdfiEastCount + initialUncorrectableFabricErrorsTile0;
|
||||
EXPECT_EQ(rasStates[i].errorCounter, expectedErrCount);
|
||||
}
|
||||
}
|
||||
} else if (handleIndex == 2u) {
|
||||
|
@ -827,7 +839,7 @@ TEST_F(SysmanRasExpMultiDeviceFixture, GivenValidRasHandleWhenCallingZesRasGetSt
|
|||
expectedErrCount = fatalGucErrorCountTile1 + initialUncorrectableComputeErrorsTile1;
|
||||
EXPECT_EQ(rasStates[i].errorCounter, expectedErrCount);
|
||||
} else if (rasStates[i].category == ZES_RAS_ERROR_CATEGORY_EXP_NON_COMPUTE_ERRORS) {
|
||||
expectedErrCount = socFatalMdfiWestCountTile1 + socFatalPunitTile1 + initialUncorrectableNonComputeErrorsTile1;
|
||||
expectedErrCount = socFatalPunitTile1 + initialUncorrectableNonComputeErrorsTile1;
|
||||
EXPECT_EQ(rasStates[i].errorCounter, expectedErrCount);
|
||||
} else if (rasStates[i].category == ZES_RAS_ERROR_CATEGORY_EXP_PROGRAMMING_ERRORS) {
|
||||
expectedErrCount = euAttentionTile1 + initialProgrammingErrorsTile1;
|
||||
|
@ -835,6 +847,9 @@ TEST_F(SysmanRasExpMultiDeviceFixture, GivenValidRasHandleWhenCallingZesRasGetSt
|
|||
} else if (rasStates[i].category == ZES_RAS_ERROR_CATEGORY_EXP_DRIVER_ERRORS) {
|
||||
expectedErrCount = driverMigration + driverEngineOther + initialUncorrectableDriverErrorsTile1;
|
||||
EXPECT_EQ(rasStates[i].errorCounter, expectedErrCount);
|
||||
} else if (rasStates[i].category == ZES_RAS_ERROR_CATEGORY_EXP_L3FABRIC_ERRORS) {
|
||||
expectedErrCount = socFatalMdfiWestCountTile1 + initialUncorrectableFabricErrorsTile1;
|
||||
EXPECT_EQ(rasStates[i].errorCounter, expectedErrCount);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -960,7 +975,7 @@ TEST_F(SysmanRasExpMultiDeviceFixture, GivenValidRasHandleWhenCallingzesRasClear
|
|||
expectedErrCount = fatalSubslice + fatalEuErrorCount + initialUncorrectableComputeErrorsTile0;
|
||||
EXPECT_EQ(rasStates[i].errorCounter, expectedErrCount);
|
||||
} else if (rasStates[i].category == ZES_RAS_ERROR_CATEGORY_EXP_NON_COMPUTE_ERRORS) {
|
||||
expectedErrCount = socFatalPsfCsc0Count + nonFatalGscAonParity + socFatalMdfiEastCount + nonFataGscSelfmBist + initialUncorrectableNonComputeErrorsTile0;
|
||||
expectedErrCount = socFatalPsfCsc0Count + nonFatalGscAonParity + nonFataGscSelfmBist + initialUncorrectableNonComputeErrorsTile0;
|
||||
EXPECT_EQ(rasStates[i].errorCounter, expectedErrCount);
|
||||
} else if (rasStates[i].category == ZES_RAS_ERROR_CATEGORY_EXP_PROGRAMMING_ERRORS) {
|
||||
expectedErrCount = euAttention + initialProgrammingErrors;
|
||||
|
@ -968,6 +983,9 @@ TEST_F(SysmanRasExpMultiDeviceFixture, GivenValidRasHandleWhenCallingzesRasClear
|
|||
} else if (rasStates[i].category == ZES_RAS_ERROR_CATEGORY_EXP_DRIVER_ERRORS) {
|
||||
expectedErrCount = driverMigration + driverGgtt + driverRps + initialUncorrectableDriverErrors;
|
||||
EXPECT_EQ(rasStates[i].errorCounter, expectedErrCount);
|
||||
} else if (rasStates[i].category == ZES_RAS_ERROR_CATEGORY_EXP_L3FABRIC_ERRORS) {
|
||||
expectedErrCount = socFatalMdfiEastCount + initialUncorrectableFabricErrorsTile0;
|
||||
EXPECT_EQ(rasStates[i].errorCounter, expectedErrCount);
|
||||
}
|
||||
}
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, zesRasClearStateExp(handle, ZES_RAS_ERROR_CATEGORY_EXP_NON_COMPUTE_ERRORS));
|
||||
|
@ -995,7 +1013,7 @@ TEST_F(SysmanRasExpMultiDeviceFixture, GivenValidRasHandleWhenCallingzesRasClear
|
|||
expectedErrCount = fatalGucErrorCountTile1 + initialUncorrectableComputeErrorsTile1;
|
||||
EXPECT_EQ(rasStates[i].errorCounter, expectedErrCount);
|
||||
} else if (rasStates[i].category == ZES_RAS_ERROR_CATEGORY_EXP_NON_COMPUTE_ERRORS) {
|
||||
expectedErrCount = socFatalMdfiWestCountTile1 + socFatalPunitTile1 + initialUncorrectableNonComputeErrorsTile1;
|
||||
expectedErrCount = socFatalPunitTile1 + initialUncorrectableNonComputeErrorsTile1;
|
||||
EXPECT_EQ(rasStates[i].errorCounter, expectedErrCount);
|
||||
} else if (rasStates[i].category == ZES_RAS_ERROR_CATEGORY_EXP_PROGRAMMING_ERRORS) {
|
||||
expectedErrCount = euAttentionTile1 + initialProgrammingErrorsTile1;
|
||||
|
@ -1003,6 +1021,9 @@ TEST_F(SysmanRasExpMultiDeviceFixture, GivenValidRasHandleWhenCallingzesRasClear
|
|||
} else if (rasStates[i].category == ZES_RAS_ERROR_CATEGORY_EXP_DRIVER_ERRORS) {
|
||||
expectedErrCount = driverMigration + driverEngineOther + initialUncorrectableDriverErrorsTile1;
|
||||
EXPECT_EQ(rasStates[i].errorCounter, expectedErrCount);
|
||||
} else if (rasStates[i].category == ZES_RAS_ERROR_CATEGORY_EXP_L3FABRIC_ERRORS) {
|
||||
expectedErrCount = socFatalMdfiWestCountTile1 + initialUncorrectableFabricErrorsTile1;
|
||||
EXPECT_EQ(rasStates[i].errorCounter, expectedErrCount);
|
||||
}
|
||||
}
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, zesRasClearStateExp(handle, ZES_RAS_ERROR_CATEGORY_EXP_NON_COMPUTE_ERRORS));
|
||||
|
|
Loading…
Reference in New Issue