feature: Add debug logs for RAS module

Related-To: LOCI-3880

Signed-off-by: Devarinti, Puneeth Kumar Reddy <puneeth.kumar.reddy.devarinti@intel.com>
This commit is contained in:
Devarinti, Puneeth Kumar Reddy 2023-04-25 20:19:10 +00:00 committed by Compute-Runtime-Automation
parent 909bb00f22
commit c03867b55c
4 changed files with 15 additions and 0 deletions

View File

@ -7,6 +7,8 @@
#include "level_zero/tools/source/sysman/ras/linux/os_ras_imp.h"
#include "shared/source/debug_settings/debug_settings_manager.h"
#include "level_zero/tools/source/sysman/linux/os_sysman_imp.h"
namespace L0 {
@ -29,6 +31,7 @@ ze_result_t LinuxRasImp::osRasSetConfig(const zes_ras_config_t *config) {
memcpy(categoryThreshold, config->detailedThresholds.category, maxRasErrorCategoryCount * sizeof(uint64_t));
return ZE_RESULT_SUCCESS;
}
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): Insufficient permissions and returning error:0x%x \n", __FUNCTION__, ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS);
return ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS;
}

View File

@ -5,6 +5,8 @@
*
*/
#include "shared/source/debug_settings/debug_settings_manager.h"
#include "level_zero/tools/source/sysman/linux/os_sysman_imp.h"
#include "level_zero/tools/source/sysman/ras/linux/os_ras_imp_prelim.h"
#include "level_zero/tools/source/sysman/sysman_imp.h"
@ -60,6 +62,7 @@ static ze_result_t readI915EventsDirectory(LinuxSysmanImp *pLinuxSysmanImp, std:
std::string bdfDir;
ze_result_t result = pSysfsAccess->readSymLink(deviceDir, bdfDir);
if (ZE_RESULT_SUCCESS != result) {
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): Failed to read Symlink from %s and returning error:0x%x \n", __FUNCTION__, deviceDir.c_str(), ZE_RESULT_ERROR_UNSUPPORTED_FEATURE);
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
const auto loc = bdfDir.find_last_of('/');
@ -73,6 +76,7 @@ static ze_result_t readI915EventsDirectory(LinuxSysmanImp *pLinuxSysmanImp, std:
FsAccess *pFsAccess = &pLinuxSysmanImp->getFsAccess();
result = pFsAccess->listDirectory(sysfsNode, listOfEvents);
if (ZE_RESULT_SUCCESS != result) {
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): Failed to list directories from %s and returning error:0x%x \n", __FUNCTION__, sysfsNode.c_str(), ZE_RESULT_ERROR_UNSUPPORTED_FEATURE);
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
return ZE_RESULT_SUCCESS;
@ -187,6 +191,7 @@ ze_result_t LinuxRasSourceGt::getPmuConfig(
std::string &pmuConfig) {
auto findErrorInList = std::find(listOfEvents.begin(), listOfEvents.end(), errorFileToGetConfig);
if (findErrorInList == listOfEvents.end()) {
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): Failed to find %s from list of events and returning error:0x%x \n", __FUNCTION__, errorFileToGetConfig.c_str(), ZE_RESULT_ERROR_UNKNOWN);
return ZE_RESULT_ERROR_UNKNOWN;
}
return pFsAccess->read(eventDirectory + "/" + errorFileToGetConfig, pmuConfig);

View File

@ -5,6 +5,8 @@
*
*/
#include "shared/source/debug_settings/debug_settings_manager.h"
#include "level_zero/tools/source/sysman/firmware_util/firmware_util.h"
#include "level_zero/tools/source/sysman/linux/os_sysman_imp.h"
#include "level_zero/tools/source/sysman/ras/linux/os_ras_imp_prelim.h"
@ -30,6 +32,7 @@ ze_result_t LinuxRasSourceHbm::osRasGetState(zes_ras_state_t &state, ze_bool_t c
uint64_t errorCount = 0;
ze_result_t result = pFwInterface->fwGetMemoryErrorCount(osRasErrorType, subDeviceCount, subdeviceId, errorCount);
if (result != ZE_RESULT_SUCCESS) {
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): Failed while getting fwGetMemoryErrorCount() for RasErrorType:%d, SubDeviceCount:%d, SubdeviceId:%d, errorBaseline update:%d and returning error:0x%x \n", __FUNCTION__, osRasErrorType, subDeviceCount, subdeviceId, clear, result);
return result;
}
errorBaseline = errorCount; // during clear update the error baseline value
@ -37,6 +40,7 @@ ze_result_t LinuxRasSourceHbm::osRasGetState(zes_ras_state_t &state, ze_bool_t c
uint64_t errorCount = 0;
ze_result_t result = pFwInterface->fwGetMemoryErrorCount(osRasErrorType, subDeviceCount, subdeviceId, errorCount);
if (result != ZE_RESULT_SUCCESS) {
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): Failed while getting fwGetMemoryErrorCount() for RasErrorType:%d, SubDeviceCount:%d, SubdeviceId:%d, errorBaseline update:%d and returning error:0x%x \n", __FUNCTION__, osRasErrorType, subDeviceCount, subdeviceId, clear, result);
return result;
}
state.category[ZES_RAS_ERROR_CAT_NON_COMPUTE_ERRORS] = errorCount - errorBaseline;

View File

@ -7,6 +7,7 @@
#include "level_zero/tools/source/sysman/ras/linux/os_ras_imp_prelim.h"
#include "shared/source/debug_settings/debug_settings_manager.h"
#include "shared/source/helpers/string.h"
#include "shared/source/os_interface/linux/system_info.h"
@ -48,6 +49,7 @@ ze_result_t LinuxRasImp::osRasSetConfig(const zes_ras_config_t *config) {
memcpy_s(categoryThreshold, maxRasErrorCategoryCount * sizeof(uint64_t), config->detailedThresholds.category, maxRasErrorCategoryCount * sizeof(uint64_t));
return ZE_RESULT_SUCCESS;
}
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): Insufficient permissions and returning error:0x%x \n", __FUNCTION__, ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS);
return ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS;
}
@ -62,6 +64,7 @@ ze_result_t LinuxRasImp::osRasGetProperties(zes_ras_properties_t &properties) {
ze_result_t LinuxRasImp::osRasGetState(zes_ras_state_t &state, ze_bool_t clear) {
if (clear == true) {
if (pFsAccess->isRootUser() == false) {
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): Insufficient permissions and returning error:0x%x \n", __FUNCTION__, ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS);
return ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS;
}
}