Debug: Add debug logs for temperature module
Related-To: LOCI-3881 Signed-off-by: Devarinti, Puneeth Kumar Reddy <puneeth.kumar.reddy.devarinti@intel.com>
This commit is contained in:
parent
996d73b768
commit
535debff2d
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2022 Intel Corporation
|
||||
* Copyright (C) 2022-2023 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -7,6 +7,8 @@
|
|||
|
||||
#include "level_zero/tools/source/sysman/temperature/linux/os_temperature_imp.h"
|
||||
|
||||
#include "shared/source/debug_settings/debug_settings_manager.h"
|
||||
|
||||
#include "level_zero/tools/source/sysman/linux/pmt/pmt.h"
|
||||
|
||||
#include "sysman/linux/os_sysman_imp.h"
|
||||
|
@ -33,6 +35,7 @@ ze_result_t LinuxTemperatureImp::getProperties(zes_temp_properties_t *pPropertie
|
|||
ze_result_t LinuxTemperatureImp::getGlobalMaxTemperatureNoSubDevice(double *pTemperature) {
|
||||
auto isValidTemperature = [](auto temperature) {
|
||||
if ((temperature > invalidMaxTemperature) || (temperature < invalidMinTemperature)) {
|
||||
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): temperature:%f is not in valid limits \n", __FUNCTION__, temperature);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
|
@ -60,6 +63,7 @@ ze_result_t LinuxTemperatureImp::getGlobalMaxTemperatureNoSubDevice(double *pTem
|
|||
key = "COMPUTE_TEMPERATURES";
|
||||
result = pPmt->readValue(key, computeTemperature);
|
||||
if (result != ZE_RESULT_SUCCESS) {
|
||||
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): Pmt->readvalue() for COMPUTE_TEMPERATURES is returning error:0x%x \n", __FUNCTION__, result);
|
||||
return result;
|
||||
}
|
||||
// Check max temperature among IA, GT and LLC sensors across COMPUTE_TEMPERATURES
|
||||
|
@ -69,6 +73,7 @@ ze_result_t LinuxTemperatureImp::getGlobalMaxTemperatureNoSubDevice(double *pTem
|
|||
key = "CORE_TEMPERATURES";
|
||||
result = pPmt->readValue(key, coreTemperature);
|
||||
if (result != ZE_RESULT_SUCCESS) {
|
||||
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): Pmt->readvalue() for CORE_TEMPERATURES is returning error:0x%x \n", __FUNCTION__, result);
|
||||
return result;
|
||||
}
|
||||
// Check max temperature among CORE0, CORE1, CORE2, CORE3 sensors across CORE_TEMPERATURES
|
||||
|
@ -80,6 +85,7 @@ ze_result_t LinuxTemperatureImp::getGlobalMaxTemperatureNoSubDevice(double *pTem
|
|||
key = "SOC_TEMPERATURES";
|
||||
result = pPmt->readValue(key, socTemperature);
|
||||
if (result != ZE_RESULT_SUCCESS) {
|
||||
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): Pmt->readvalue() for SOC_TEMPERATURES is returning error:0x%x \n", __FUNCTION__, result);
|
||||
return result;
|
||||
}
|
||||
// Check max temperature among possible sensors like PCH or GT_TEMP, DRAM, SA, PSF, DE, PCIE, TYPEC across SOC_TEMPERATURES
|
||||
|
@ -100,6 +106,7 @@ ze_result_t LinuxTemperatureImp::getGlobalMaxTemperature(double *pTemperature) {
|
|||
std::string key("TileMaxTemperature");
|
||||
ze_result_t result = pPmt->readValue(key, globalMaxTemperature);
|
||||
if (result != ZE_RESULT_SUCCESS) {
|
||||
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): Pmt->readvalue() for TileMaxTemperature is returning error:0x%x \n", __FUNCTION__, result);
|
||||
return result;
|
||||
}
|
||||
*pTemperature = static_cast<double>(globalMaxTemperature);
|
||||
|
@ -113,6 +120,7 @@ ze_result_t LinuxTemperatureImp::getGpuMaxTemperatureNoSubDevice(double *pTemper
|
|||
std::string key = "SOC_TEMPERATURES";
|
||||
auto result = pPmt->readValue(key, socTemperature);
|
||||
if (result != ZE_RESULT_SUCCESS) {
|
||||
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): Pmt->readvalue() for SOC_TEMPERATURES is returning error:0x%x \n", __FUNCTION__, result);
|
||||
return result;
|
||||
}
|
||||
gpuMaxTemperature = static_cast<double>(socTemperature & 0xff);
|
||||
|
@ -123,6 +131,7 @@ ze_result_t LinuxTemperatureImp::getGpuMaxTemperatureNoSubDevice(double *pTemper
|
|||
std::string key("COMPUTE_TEMPERATURES");
|
||||
ze_result_t result = pPmt->readValue(key, computeTemperature);
|
||||
if (result != ZE_RESULT_SUCCESS) {
|
||||
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): Pmt->readvalue() for COMPUTE_TEMPERATURES is returning error:0x%x \n", __FUNCTION__, result);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -142,6 +151,7 @@ ze_result_t LinuxTemperatureImp::getGpuMaxTemperature(double *pTemperature) {
|
|||
std::string key("GTMaxTemperature");
|
||||
ze_result_t result = pPmt->readValue(key, gpuMaxTemperature);
|
||||
if (result != ZE_RESULT_SUCCESS) {
|
||||
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): Pmt->readvalue() for GTMaxTemperature is returning error:0x%x \n", __FUNCTION__, result);
|
||||
return result;
|
||||
}
|
||||
*pTemperature = static_cast<double>(gpuMaxTemperature);
|
||||
|
@ -156,6 +166,7 @@ ze_result_t LinuxTemperatureImp::getMemoryMaxTemperature(double *pTemperature) {
|
|||
} else if (productFamily == IGFX_PVC) {
|
||||
numHbmModules = 4u;
|
||||
} else {
|
||||
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s() returning UNSUPPORTED_FEATURE \n", __FUNCTION__);
|
||||
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
|
||||
}
|
||||
|
||||
|
@ -166,6 +177,7 @@ ze_result_t LinuxTemperatureImp::getMemoryMaxTemperature(double *pTemperature) {
|
|||
std::string key = "HBM" + std::to_string(hbmModuleIndex) + "MaxDeviceTemperature";
|
||||
result = pPmt->readValue(key, maxDeviceTemperature);
|
||||
if (result != ZE_RESULT_SUCCESS) {
|
||||
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): Pmt->readvalue() for %s is returning error:0x%x \n", __FUNCTION__, key.c_str(), result);
|
||||
return result;
|
||||
}
|
||||
maxDeviceTemperatureList.push_back(maxDeviceTemperature);
|
||||
|
|
Loading…
Reference in New Issue