From c03867b55cec952aeefa15cc0a0362a9a72d559a Mon Sep 17 00:00:00 2001 From: "Devarinti, Puneeth Kumar Reddy" Date: Tue, 25 Apr 2023 20:19:10 +0000 Subject: [PATCH] feature: Add debug logs for RAS module Related-To: LOCI-3880 Signed-off-by: Devarinti, Puneeth Kumar Reddy --- level_zero/tools/source/sysman/ras/linux/os_ras_imp.cpp | 3 +++ level_zero/tools/source/sysman/ras/linux/os_ras_imp_gt.cpp | 5 +++++ level_zero/tools/source/sysman/ras/linux/os_ras_imp_hbm.cpp | 4 ++++ .../tools/source/sysman/ras/linux/os_ras_imp_prelim.cpp | 3 +++ 4 files changed, 15 insertions(+) diff --git a/level_zero/tools/source/sysman/ras/linux/os_ras_imp.cpp b/level_zero/tools/source/sysman/ras/linux/os_ras_imp.cpp index 987308b880..c069e12523 100644 --- a/level_zero/tools/source/sysman/ras/linux/os_ras_imp.cpp +++ b/level_zero/tools/source/sysman/ras/linux/os_ras_imp.cpp @@ -7,6 +7,8 @@ #include "level_zero/tools/source/sysman/ras/linux/os_ras_imp.h" +#include "shared/source/debug_settings/debug_settings_manager.h" + #include "level_zero/tools/source/sysman/linux/os_sysman_imp.h" namespace L0 { @@ -29,6 +31,7 @@ ze_result_t LinuxRasImp::osRasSetConfig(const zes_ras_config_t *config) { memcpy(categoryThreshold, config->detailedThresholds.category, maxRasErrorCategoryCount * sizeof(uint64_t)); return ZE_RESULT_SUCCESS; } + NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): Insufficient permissions and returning error:0x%x \n", __FUNCTION__, ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS); return ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS; } diff --git a/level_zero/tools/source/sysman/ras/linux/os_ras_imp_gt.cpp b/level_zero/tools/source/sysman/ras/linux/os_ras_imp_gt.cpp index 40e0825635..0eb99b13d1 100644 --- a/level_zero/tools/source/sysman/ras/linux/os_ras_imp_gt.cpp +++ b/level_zero/tools/source/sysman/ras/linux/os_ras_imp_gt.cpp @@ -5,6 +5,8 @@ * */ +#include "shared/source/debug_settings/debug_settings_manager.h" + #include "level_zero/tools/source/sysman/linux/os_sysman_imp.h" #include "level_zero/tools/source/sysman/ras/linux/os_ras_imp_prelim.h" #include "level_zero/tools/source/sysman/sysman_imp.h" @@ -60,6 +62,7 @@ static ze_result_t readI915EventsDirectory(LinuxSysmanImp *pLinuxSysmanImp, std: std::string bdfDir; ze_result_t result = pSysfsAccess->readSymLink(deviceDir, bdfDir); if (ZE_RESULT_SUCCESS != result) { + NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): Failed to read Symlink from %s and returning error:0x%x \n", __FUNCTION__, deviceDir.c_str(), ZE_RESULT_ERROR_UNSUPPORTED_FEATURE); return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } const auto loc = bdfDir.find_last_of('/'); @@ -73,6 +76,7 @@ static ze_result_t readI915EventsDirectory(LinuxSysmanImp *pLinuxSysmanImp, std: FsAccess *pFsAccess = &pLinuxSysmanImp->getFsAccess(); result = pFsAccess->listDirectory(sysfsNode, listOfEvents); if (ZE_RESULT_SUCCESS != result) { + NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): Failed to list directories from %s and returning error:0x%x \n", __FUNCTION__, sysfsNode.c_str(), ZE_RESULT_ERROR_UNSUPPORTED_FEATURE); return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } return ZE_RESULT_SUCCESS; @@ -187,6 +191,7 @@ ze_result_t LinuxRasSourceGt::getPmuConfig( std::string &pmuConfig) { auto findErrorInList = std::find(listOfEvents.begin(), listOfEvents.end(), errorFileToGetConfig); if (findErrorInList == listOfEvents.end()) { + NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): Failed to find %s from list of events and returning error:0x%x \n", __FUNCTION__, errorFileToGetConfig.c_str(), ZE_RESULT_ERROR_UNKNOWN); return ZE_RESULT_ERROR_UNKNOWN; } return pFsAccess->read(eventDirectory + "/" + errorFileToGetConfig, pmuConfig); diff --git a/level_zero/tools/source/sysman/ras/linux/os_ras_imp_hbm.cpp b/level_zero/tools/source/sysman/ras/linux/os_ras_imp_hbm.cpp index 9590193236..a618912f4e 100644 --- a/level_zero/tools/source/sysman/ras/linux/os_ras_imp_hbm.cpp +++ b/level_zero/tools/source/sysman/ras/linux/os_ras_imp_hbm.cpp @@ -5,6 +5,8 @@ * */ +#include "shared/source/debug_settings/debug_settings_manager.h" + #include "level_zero/tools/source/sysman/firmware_util/firmware_util.h" #include "level_zero/tools/source/sysman/linux/os_sysman_imp.h" #include "level_zero/tools/source/sysman/ras/linux/os_ras_imp_prelim.h" @@ -30,6 +32,7 @@ ze_result_t LinuxRasSourceHbm::osRasGetState(zes_ras_state_t &state, ze_bool_t c uint64_t errorCount = 0; ze_result_t result = pFwInterface->fwGetMemoryErrorCount(osRasErrorType, subDeviceCount, subdeviceId, errorCount); if (result != ZE_RESULT_SUCCESS) { + NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): Failed while getting fwGetMemoryErrorCount() for RasErrorType:%d, SubDeviceCount:%d, SubdeviceId:%d, errorBaseline update:%d and returning error:0x%x \n", __FUNCTION__, osRasErrorType, subDeviceCount, subdeviceId, clear, result); return result; } errorBaseline = errorCount; // during clear update the error baseline value @@ -37,6 +40,7 @@ ze_result_t LinuxRasSourceHbm::osRasGetState(zes_ras_state_t &state, ze_bool_t c uint64_t errorCount = 0; ze_result_t result = pFwInterface->fwGetMemoryErrorCount(osRasErrorType, subDeviceCount, subdeviceId, errorCount); if (result != ZE_RESULT_SUCCESS) { + NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): Failed while getting fwGetMemoryErrorCount() for RasErrorType:%d, SubDeviceCount:%d, SubdeviceId:%d, errorBaseline update:%d and returning error:0x%x \n", __FUNCTION__, osRasErrorType, subDeviceCount, subdeviceId, clear, result); return result; } state.category[ZES_RAS_ERROR_CAT_NON_COMPUTE_ERRORS] = errorCount - errorBaseline; diff --git a/level_zero/tools/source/sysman/ras/linux/os_ras_imp_prelim.cpp b/level_zero/tools/source/sysman/ras/linux/os_ras_imp_prelim.cpp index 5e7d3f731b..af9e6debfe 100644 --- a/level_zero/tools/source/sysman/ras/linux/os_ras_imp_prelim.cpp +++ b/level_zero/tools/source/sysman/ras/linux/os_ras_imp_prelim.cpp @@ -7,6 +7,7 @@ #include "level_zero/tools/source/sysman/ras/linux/os_ras_imp_prelim.h" +#include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/helpers/string.h" #include "shared/source/os_interface/linux/system_info.h" @@ -48,6 +49,7 @@ ze_result_t LinuxRasImp::osRasSetConfig(const zes_ras_config_t *config) { memcpy_s(categoryThreshold, maxRasErrorCategoryCount * sizeof(uint64_t), config->detailedThresholds.category, maxRasErrorCategoryCount * sizeof(uint64_t)); return ZE_RESULT_SUCCESS; } + NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): Insufficient permissions and returning error:0x%x \n", __FUNCTION__, ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS); return ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS; } @@ -62,6 +64,7 @@ ze_result_t LinuxRasImp::osRasGetProperties(zes_ras_properties_t &properties) { ze_result_t LinuxRasImp::osRasGetState(zes_ras_state_t &state, ze_bool_t clear) { if (clear == true) { if (pFsAccess->isRootUser() == false) { + NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): Insufficient permissions and returning error:0x%x \n", __FUNCTION__, ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS); return ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS; } }