feature(sysman): Added support for Temperature APIs

Added support for the Temperature APIs in the new sysman design.
Added ULTs for the Temperature APIs in the new sysman design.

Related-To: LOCI-4293

Signed-off-by: Bari, Pratik <pratik.bari@intel.com>
This commit is contained in:
Bari, Pratik
2023-04-14 15:41:53 +00:00
committed by Compute-Runtime-Automation
parent 3ead7f6ac9
commit 4647564cc9
23 changed files with 1254 additions and 9 deletions

View File

@@ -119,5 +119,10 @@ ze_result_t SysmanDevice::deviceSetEccState(zes_device_handle_t hDevice, const z
return pSysmanDevice->deviceSetEccState(newState, pState);
}
ze_result_t SysmanDevice::temperatureGet(zes_device_handle_t hDevice, uint32_t *pCount, zes_temp_handle_t *phTemperature) {
auto pSysmanDevice = L0::Sysman::SysmanDevice::fromHandle(hDevice);
return pSysmanDevice->temperatureGet(pCount, phTemperature);
}
} // namespace Sysman
} // namespace L0

View File

@@ -21,6 +21,7 @@
#include "level_zero/sysman/source/ras/ras.h"
#include "level_zero/sysman/source/scheduler/scheduler.h"
#include "level_zero/sysman/source/standby/standby.h"
#include "level_zero/sysman/source/temperature/temperature.h"
#include <level_zero/ze_api.h>
#include <level_zero/zes_api.h>
@@ -90,6 +91,9 @@ struct SysmanDevice : _ze_device_handle_t {
static ze_result_t deviceSetEccState(zes_device_handle_t hDevice, const zes_device_ecc_desc_t *newState, zes_device_ecc_properties_t *pState);
virtual ze_result_t deviceSetEccState(const zes_device_ecc_desc_t *newState, zes_device_ecc_properties_t *pState) = 0;
static ze_result_t temperatureGet(zes_device_handle_t hDevice, uint32_t *pCount, zes_temp_handle_t *phTemperature);
virtual ze_result_t temperatureGet(uint32_t *pCount, zes_temp_handle_t *phTemperature) = 0;
};
} // namespace Sysman

View File

@@ -35,6 +35,7 @@ SysmanDeviceImp::SysmanDeviceImp(NEO::ExecutionEnvironment *executionEnvironment
pGlobalOperations = new GlobalOperationsImp(pOsSysman);
pStandbyHandleContext = new StandbyHandleContext(pOsSysman);
pEcc = new EccImp(pOsSysman);
pTempHandleContext = new TemperatureHandleContext(pOsSysman);
}
SysmanDeviceImp::~SysmanDeviceImp() {
@@ -50,6 +51,7 @@ SysmanDeviceImp::~SysmanDeviceImp() {
freeResource(pFabricPortHandleContext);
freeResource(pStandbyHandleContext);
freeResource(pEcc);
freeResource(pTempHandleContext);
freeResource(pOsSysman);
executionEnvironment->decRefInternal();
}
@@ -117,15 +119,19 @@ ze_result_t SysmanDeviceImp::firmwareGet(uint32_t *pCount, zes_firmware_handle_t
ze_result_t SysmanDeviceImp::diagnosticsGet(uint32_t *pCount, zes_diag_handle_t *phDiagnostics) {
return pDiagnosticsHandleContext->diagnosticsGet(pCount, phDiagnostics);
}
ze_result_t SysmanDeviceImp::deviceEccAvailable(ze_bool_t *pAvailable) {
return pEcc->deviceEccAvailable(pAvailable);
}
ze_result_t SysmanDeviceImp::deviceEccConfigurable(ze_bool_t *pConfigurable) {
return pEcc->deviceEccConfigurable(pConfigurable);
}
ze_result_t SysmanDeviceImp::deviceGetEccState(zes_device_ecc_properties_t *pState) {
return pEcc->getEccState(pState);
}
ze_result_t SysmanDeviceImp::deviceSetEccState(const zes_device_ecc_desc_t *newState, zes_device_ecc_properties_t *pState) {
return pEcc->setEccState(newState, pState);
}
@@ -134,5 +140,9 @@ ze_result_t SysmanDeviceImp::standbyGet(uint32_t *pCount, zes_standby_handle_t *
return pStandbyHandleContext->standbyGet(pCount, phStandby);
}
ze_result_t SysmanDeviceImp::temperatureGet(uint32_t *pCount, zes_temp_handle_t *phTemperature) {
return pTempHandleContext->temperatureGet(pCount, phTemperature);
}
} // namespace Sysman
} // namespace L0

View File

@@ -49,6 +49,7 @@ struct SysmanDeviceImp : SysmanDevice, NEO::NonCopyableOrMovableClass {
FrequencyHandleContext *pFrequencyHandleContext = nullptr;
StandbyHandleContext *pStandbyHandleContext = nullptr;
Ecc *pEcc = nullptr;
TemperatureHandleContext *pTempHandleContext = nullptr;
ze_result_t powerGet(uint32_t *pCount, zes_pwr_handle_t *phPower) override;
ze_result_t powerGetCardDomain(zes_pwr_handle_t *phPower) override;
@@ -69,6 +70,7 @@ struct SysmanDeviceImp : SysmanDevice, NEO::NonCopyableOrMovableClass {
ze_result_t deviceEccConfigurable(ze_bool_t *pConfigurable) override;
ze_result_t deviceGetEccState(zes_device_ecc_properties_t *pState) override;
ze_result_t deviceSetEccState(const zes_device_ecc_desc_t *newState, zes_device_ecc_properties_t *pState) override;
ze_result_t temperatureGet(uint32_t *pCount, zes_temp_handle_t *phTemperature) override;
private:
NEO::ExecutionEnvironment *executionEnvironment = nullptr;

View File

@@ -0,0 +1,17 @@
#
# Copyright (C) 2020-2023 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
target_sources(${L0_STATIC_LIB_NAME}
PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/os_temperature.h
${CMAKE_CURRENT_SOURCE_DIR}/temperature.cpp
${CMAKE_CURRENT_SOURCE_DIR}/temperature.h
${CMAKE_CURRENT_SOURCE_DIR}/temperature_imp.cpp
${CMAKE_CURRENT_SOURCE_DIR}/temperature_imp.h
)
add_subdirectories()

View File

@@ -0,0 +1,14 @@
#
# Copyright (C) 2020-2023 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
if(UNIX)
target_sources(${L0_STATIC_LIB_NAME}
PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/os_temperature_imp.cpp
${CMAKE_CURRENT_SOURCE_DIR}/os_temperature_imp.h
)
endif()

View File

@@ -0,0 +1,247 @@
/*
* Copyright (C) 2022-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "level_zero/sysman/source/temperature/linux/os_temperature_imp.h"
#include "shared/source/debug_settings/debug_settings_manager.h"
#include "level_zero/sysman/source/linux/os_sysman_imp.h"
namespace L0 {
namespace Sysman {
constexpr uint32_t numSocTemperatureEntries = 7; // entries would be PCH or GT_TEMP, DRAM, SA, PSF, DE, PCIE, TYPEC
constexpr uint32_t numCoreTemperatureEntries = 4; // entries would be CORE0, CORE1, CORE2, CORE3
constexpr uint32_t numComputeTemperatureEntries = 3; // entries would be IA, GT and LLC
constexpr uint32_t invalidMaxTemperature = 125;
constexpr uint32_t invalidMinTemperature = 10;
ze_result_t LinuxTemperatureImp::getProperties(zes_temp_properties_t *pProperties) {
pProperties->type = type;
pProperties->onSubdevice = 0;
pProperties->subdeviceId = 0;
if (isSubdevice) {
pProperties->onSubdevice = isSubdevice;
pProperties->subdeviceId = subdeviceId;
}
return ZE_RESULT_SUCCESS;
}
ze_result_t LinuxTemperatureImp::getGlobalMaxTemperatureNoSubDevice(double *pTemperature) {
auto isValidTemperature = [](auto temperature) {
if ((temperature > invalidMaxTemperature) || (temperature < invalidMinTemperature)) {
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): temperature:%f is not in valid limits \n", __FUNCTION__, temperature);
return false;
}
return true;
};
auto getMaxTemperature = [&](auto temperature, auto numTemperatureEntries) {
uint32_t maxTemperature = 0;
for (uint32_t count = 0; count < numTemperatureEntries; count++) {
uint32_t localTemperatureVal = (temperature >> (8 * count)) & 0xff;
if (isValidTemperature(localTemperatureVal)) {
if (localTemperatureVal > maxTemperature) {
maxTemperature = localTemperatureVal;
}
}
}
return maxTemperature;
};
ze_result_t result = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
uint32_t maxComputeTemperature = 0;
uint32_t maxCoreTemperature = 0;
std::string key;
if (productFamily == IGFX_DG1) {
uint32_t computeTemperature = 0;
key = "COMPUTE_TEMPERATURES";
result = pPmt->readValue(key, computeTemperature);
if (result != ZE_RESULT_SUCCESS) {
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): Pmt->readvalue() for COMPUTE_TEMPERATURES is returning error:0x%x \n", __FUNCTION__, result);
return result;
}
// Check max temperature among IA, GT and LLC sensors across COMPUTE_TEMPERATURES
maxComputeTemperature = getMaxTemperature(computeTemperature, numComputeTemperatureEntries);
uint32_t coreTemperature = 0;
key = "CORE_TEMPERATURES";
result = pPmt->readValue(key, coreTemperature);
if (result != ZE_RESULT_SUCCESS) {
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): Pmt->readvalue() for CORE_TEMPERATURES is returning error:0x%x \n", __FUNCTION__, result);
return result;
}
// Check max temperature among CORE0, CORE1, CORE2, CORE3 sensors across CORE_TEMPERATURES
maxCoreTemperature = getMaxTemperature(coreTemperature, numCoreTemperatureEntries);
}
// SOC_TEMPERATURES is present in all product families
uint64_t socTemperature = 0;
key = "SOC_TEMPERATURES";
result = pPmt->readValue(key, socTemperature);
if (result != ZE_RESULT_SUCCESS) {
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): Pmt->readvalue() for SOC_TEMPERATURES is returning error:0x%x \n", __FUNCTION__, result);
return result;
}
// Check max temperature among possible sensors like PCH or GT_TEMP, DRAM, SA, PSF, DE, PCIE, TYPEC across SOC_TEMPERATURES
uint32_t maxSocTemperature = getMaxTemperature(socTemperature, numSocTemperatureEntries);
*pTemperature = static_cast<double>(std::max({maxComputeTemperature, maxCoreTemperature, maxSocTemperature}));
return result;
}
ze_result_t LinuxTemperatureImp::getGlobalMaxTemperature(double *pTemperature) {
// For XE_HP_SDV and PVC single tile devices, telemetry info is retrieved from
// tile's telem node rather from root device telem node.
if ((!isSubdevice) && (!((productFamily == IGFX_PVC) || (productFamily == IGFX_XE_HP_SDV)))) {
return getGlobalMaxTemperatureNoSubDevice(pTemperature);
}
uint32_t globalMaxTemperature = 0;
std::string key("TileMaxTemperature");
ze_result_t result = pPmt->readValue(key, globalMaxTemperature);
if (result != ZE_RESULT_SUCCESS) {
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): Pmt->readvalue() for TileMaxTemperature is returning error:0x%x \n", __FUNCTION__, result);
return result;
}
*pTemperature = static_cast<double>(globalMaxTemperature);
return result;
}
ze_result_t LinuxTemperatureImp::getGpuMaxTemperatureNoSubDevice(double *pTemperature) {
double gpuMaxTemperature = 0;
uint64_t socTemperature = 0;
// Gpu temperature is obtained from GT_TEMP in SOC_TEMPERATURE's bit 0 to 7.
std::string key = "SOC_TEMPERATURES";
auto result = pPmt->readValue(key, socTemperature);
if (result != ZE_RESULT_SUCCESS) {
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): Pmt->readvalue() for SOC_TEMPERATURES is returning error:0x%x \n", __FUNCTION__, result);
return result;
}
gpuMaxTemperature = static_cast<double>(socTemperature & 0xff);
if (productFamily == IGFX_DG1) {
// In DG1 platform, Gpu Max Temperature is obtained from COMPUTE_TEMPERATURE only
uint32_t computeTemperature = 0;
std::string key("COMPUTE_TEMPERATURES");
ze_result_t result = pPmt->readValue(key, computeTemperature);
if (result != ZE_RESULT_SUCCESS) {
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): Pmt->readvalue() for COMPUTE_TEMPERATURES is returning error:0x%x \n", __FUNCTION__, result);
return result;
}
// GT temperature could be read via 8th to 15th bit in the value read in temperature
computeTemperature = (computeTemperature >> 8) & 0xff;
gpuMaxTemperature = static_cast<double>(computeTemperature);
}
*pTemperature = gpuMaxTemperature;
return ZE_RESULT_SUCCESS;
}
ze_result_t LinuxTemperatureImp::getGpuMaxTemperature(double *pTemperature) {
if ((!isSubdevice) && (!((productFamily == IGFX_PVC) || (productFamily == IGFX_XE_HP_SDV)))) {
return getGpuMaxTemperatureNoSubDevice(pTemperature);
}
uint32_t gpuMaxTemperature = 0;
std::string key("GTMaxTemperature");
ze_result_t result = pPmt->readValue(key, gpuMaxTemperature);
if (result != ZE_RESULT_SUCCESS) {
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): Pmt->readvalue() for GTMaxTemperature is returning error:0x%x \n", __FUNCTION__, result);
return result;
}
*pTemperature = static_cast<double>(gpuMaxTemperature);
return result;
}
ze_result_t LinuxTemperatureImp::getMemoryMaxTemperature(double *pTemperature) {
ze_result_t result = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
uint32_t numHbmModules = 0u;
if (productFamily == IGFX_XE_HP_SDV) {
numHbmModules = 2u;
} else if (productFamily == IGFX_PVC) {
numHbmModules = 4u;
} else {
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s() returning UNSUPPORTED_FEATURE \n", __FUNCTION__);
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
std::vector<uint32_t> maxDeviceTemperatureList;
for (auto hbmModuleIndex = 0u; hbmModuleIndex < numHbmModules; hbmModuleIndex++) {
uint32_t maxDeviceTemperature = 0;
// To read HBM 0's max device temperature key would be HBM0MaxDeviceTemperature
std::string key = "HBM" + std::to_string(hbmModuleIndex) + "MaxDeviceTemperature";
result = pPmt->readValue(key, maxDeviceTemperature);
if (result != ZE_RESULT_SUCCESS) {
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): Pmt->readvalue() for %s is returning error:0x%x \n", __FUNCTION__, key.c_str(), result);
return result;
}
maxDeviceTemperatureList.push_back(maxDeviceTemperature);
}
*pTemperature = static_cast<double>(*std::max_element(maxDeviceTemperatureList.begin(), maxDeviceTemperatureList.end()));
return result;
}
ze_result_t LinuxTemperatureImp::getSensorTemperature(double *pTemperature) {
ze_result_t result = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
switch (type) {
case ZES_TEMP_SENSORS_GLOBAL:
result = getGlobalMaxTemperature(pTemperature);
if (result != ZE_RESULT_SUCCESS) {
return result;
}
break;
case ZES_TEMP_SENSORS_GPU:
result = getGpuMaxTemperature(pTemperature);
if (result != ZE_RESULT_SUCCESS) {
return result;
}
break;
case ZES_TEMP_SENSORS_MEMORY:
result = getMemoryMaxTemperature(pTemperature);
if (result != ZE_RESULT_SUCCESS) {
return result;
}
break;
default:
*pTemperature = 0;
result = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
break;
}
return result;
}
bool LinuxTemperatureImp::isTempModuleSupported() {
if ((!isSubdevice) && (!((productFamily == IGFX_PVC) || (productFamily == IGFX_XE_HP_SDV)))) {
if (type == ZES_TEMP_SENSORS_MEMORY) {
return false;
}
}
return (pPmt != nullptr);
}
void LinuxTemperatureImp::setSensorType(zes_temp_sensors_t sensorType) {
type = sensorType;
}
LinuxTemperatureImp::LinuxTemperatureImp(OsSysman *pOsSysman, ze_bool_t onSubdevice,
uint32_t subdeviceId) : subdeviceId(subdeviceId), isSubdevice(onSubdevice) {
LinuxSysmanImp *pLinuxSysmanImp = static_cast<LinuxSysmanImp *>(pOsSysman);
pPmt = pLinuxSysmanImp->getPlatformMonitoringTechAccess(subdeviceId);
productFamily = pLinuxSysmanImp->getProductFamily();
}
std::unique_ptr<OsTemperature> OsTemperature::create(OsSysman *pOsSysman, ze_bool_t onSubdevice, uint32_t subdeviceId, zes_temp_sensors_t sensorType) {
std::unique_ptr<LinuxTemperatureImp> pLinuxTemperatureImp = std::make_unique<LinuxTemperatureImp>(pOsSysman, onSubdevice, subdeviceId);
pLinuxTemperatureImp->setSensorType(sensorType);
return pLinuxTemperatureImp;
}
} // namespace Sysman
} // namespace L0

View File

@@ -0,0 +1,52 @@
/*
* Copyright (C) 2022-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "shared/source/helpers/non_copyable_or_moveable.h"
#include "level_zero/sysman/source/linux/fs_access.h"
#include "level_zero/sysman/source/linux/pmt/pmt.h"
#include "level_zero/sysman/source/temperature/os_temperature.h"
#include "igfxfmid.h"
#include <memory>
namespace L0 {
namespace Sysman {
class SysfsAccess;
class PlatformMonitoringTech;
class LinuxTemperatureImp : public OsTemperature, NEO::NonCopyableOrMovableClass {
public:
ze_result_t getProperties(zes_temp_properties_t *pProperties) override;
ze_result_t getSensorTemperature(double *pTemperature) override;
bool isTempModuleSupported() override;
void setSensorType(zes_temp_sensors_t sensorType);
LinuxTemperatureImp(OsSysman *pOsSysman, ze_bool_t onSubdevice, uint32_t subdeviceId);
LinuxTemperatureImp() = default;
~LinuxTemperatureImp() override = default;
protected:
PlatformMonitoringTech *pPmt = nullptr;
zes_temp_sensors_t type = ZES_TEMP_SENSORS_GLOBAL;
private:
ze_result_t getGlobalMaxTemperature(double *pTemperature);
ze_result_t getGlobalMinTemperature(double *pTemperature);
ze_result_t getGpuMaxTemperature(double *pTemperature);
ze_result_t getGpuMinTemperature(double *pTemperature);
ze_result_t getMemoryMaxTemperature(double *pTemperature);
ze_result_t getGlobalMaxTemperatureNoSubDevice(double *pTemperature);
ze_result_t getGpuMaxTemperatureNoSubDevice(double *pTemperature);
uint32_t subdeviceId = 0;
ze_bool_t isSubdevice = 0;
PRODUCT_FAMILY productFamily = IGFX_UNKNOWN;
};
} // namespace Sysman
} // namespace L0

View File

@@ -0,0 +1,27 @@
/*
* Copyright (C) 2020-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include <level_zero/zes_api.h>
#include <memory>
namespace L0 {
namespace Sysman {
struct OsSysman;
class OsTemperature {
public:
virtual ze_result_t getProperties(zes_temp_properties_t *pProperties) = 0;
virtual ze_result_t getSensorTemperature(double *pTemperature) = 0;
virtual bool isTempModuleSupported() = 0;
static std::unique_ptr<OsTemperature> create(OsSysman *pOsSysman, ze_bool_t onSubdevice, uint32_t subdeviceId, zes_temp_sensors_t sensorType);
virtual ~OsTemperature() = default;
};
} // namespace Sysman
} // namespace L0

View File

@@ -0,0 +1,60 @@
/*
* Copyright (C) 2020-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/helpers/basic_math.h"
#include "level_zero/sysman/source/os_sysman.h"
#include "level_zero/sysman/source/temperature/temperature_imp.h"
namespace L0 {
namespace Sysman {
TemperatureHandleContext::~TemperatureHandleContext() {}
void TemperatureHandleContext::createHandle(bool onSubdevice, uint32_t subDeviceId, zes_temp_sensors_t type) {
std::unique_ptr<Temperature> pTemperature = std::make_unique<TemperatureImp>(pOsSysman, onSubdevice, subDeviceId, type);
if (pTemperature->initSuccess == true) {
handleList.push_back(std::move(pTemperature));
}
}
ze_result_t TemperatureHandleContext::init(uint32_t subDeviceCount) {
if (subDeviceCount > 0) {
for (uint32_t subDeviceId = 0; subDeviceId < subDeviceCount; subDeviceId++) {
createHandle(true, subDeviceId, ZES_TEMP_SENSORS_GLOBAL);
createHandle(true, subDeviceId, ZES_TEMP_SENSORS_GPU);
createHandle(true, subDeviceId, ZES_TEMP_SENSORS_MEMORY);
}
} else {
createHandle(false, 0, ZES_TEMP_SENSORS_GLOBAL);
createHandle(false, 0, ZES_TEMP_SENSORS_GPU);
createHandle(false, 0, ZES_TEMP_SENSORS_MEMORY);
}
return ZE_RESULT_SUCCESS;
}
ze_result_t TemperatureHandleContext::temperatureGet(uint32_t *pCount, zes_temp_handle_t *phTemperature) {
std::call_once(initTemperatureOnce, [this]() {
this->init(pOsSysman->getSubDeviceCount());
});
uint32_t handleListSize = static_cast<uint32_t>(handleList.size());
uint32_t numToCopy = std::min(*pCount, handleListSize);
if (0 == *pCount || *pCount > handleListSize) {
*pCount = handleListSize;
}
if (nullptr != phTemperature) {
for (uint32_t i = 0; i < numToCopy; i++) {
phTemperature[i] = handleList[i]->toHandle();
}
}
return ZE_RESULT_SUCCESS;
}
} // namespace Sysman
} // namespace L0

View File

@@ -0,0 +1,52 @@
/*
* Copyright (C) 2020-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "level_zero/api/sysman/zes_handles_struct.h"
#include <level_zero/zes_api.h>
#include <memory>
#include <mutex>
#include <vector>
namespace L0 {
namespace Sysman {
struct OsSysman;
class Temperature : _zes_temp_handle_t {
public:
virtual ze_result_t temperatureGetProperties(zes_temp_properties_t *pProperties) = 0;
virtual ze_result_t temperatureGetConfig(zes_temp_config_t *pConfig) = 0;
virtual ze_result_t temperatureSetConfig(const zes_temp_config_t *pConfig) = 0;
virtual ze_result_t temperatureGetState(double *pTemperature) = 0;
static Temperature *fromHandle(zes_temp_handle_t handle) {
return static_cast<Temperature *>(handle);
}
inline zes_temp_handle_t toHandle() { return this; }
bool initSuccess = false;
zes_temp_properties_t tempProperties = {};
};
struct TemperatureHandleContext {
TemperatureHandleContext(OsSysman *pOsSysman) : pOsSysman(pOsSysman){};
~TemperatureHandleContext();
ze_result_t init(uint32_t subDeviceCount);
ze_result_t temperatureGet(uint32_t *pCount, zes_temp_handle_t *phTemperature);
OsSysman *pOsSysman = nullptr;
std::vector<std::unique_ptr<Temperature>> handleList = {};
private:
void createHandle(bool onSubdevice, uint32_t subDeviceId, zes_temp_sensors_t type);
std::once_flag initTemperatureOnce;
};
} // namespace Sysman
} // namespace L0

View File

@@ -0,0 +1,48 @@
/*
* Copyright (C) 2020-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "level_zero/sysman/source/temperature/temperature_imp.h"
#include "level_zero/sysman/source/sysman_device_imp.h"
namespace L0 {
namespace Sysman {
ze_result_t TemperatureImp::temperatureGetProperties(zes_temp_properties_t *pProperties) {
*pProperties = tempProperties;
return ZE_RESULT_SUCCESS;
}
ze_result_t TemperatureImp::temperatureGetConfig(zes_temp_config_t *pConfig) {
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
ze_result_t TemperatureImp::temperatureSetConfig(const zes_temp_config_t *pConfig) {
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
ze_result_t TemperatureImp::temperatureGetState(double *pTemperature) {
return pOsTemperature->getSensorTemperature(pTemperature);
}
void TemperatureImp::init() {
if (pOsTemperature->isTempModuleSupported()) {
pOsTemperature->getProperties(&tempProperties);
this->initSuccess = true;
}
}
TemperatureImp::TemperatureImp(OsSysman *pOsSysman, ze_bool_t onSubdevice, uint32_t subdeviceId, zes_temp_sensors_t type) {
pOsTemperature = OsTemperature::create(pOsSysman, onSubdevice, subdeviceId, type);
init();
}
TemperatureImp::~TemperatureImp() {
}
} // namespace Sysman
} // namespace L0

View File

@@ -0,0 +1,32 @@
/*
* Copyright (C) 2020-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "shared/source/helpers/non_copyable_or_moveable.h"
#include "level_zero/sysman/source/temperature/os_temperature.h"
#include "level_zero/sysman/source/temperature/temperature.h"
namespace L0 {
namespace Sysman {
class TemperatureImp : public Temperature, NEO::NonCopyableOrMovableClass {
public:
ze_result_t temperatureGetProperties(zes_temp_properties_t *pProperties) override;
ze_result_t temperatureGetConfig(zes_temp_config_t *pConfig) override;
ze_result_t temperatureSetConfig(const zes_temp_config_t *pConfig) override;
ze_result_t temperatureGetState(double *pTemperature) override;
TemperatureImp() = default;
TemperatureImp(OsSysman *pOsSysman, ze_bool_t onSubdevice, uint32_t subdeviceId, zes_temp_sensors_t type);
~TemperatureImp() override;
std::unique_ptr<OsTemperature> pOsTemperature = nullptr;
void init();
};
} // namespace Sysman
} // namespace L0

View File

@@ -0,0 +1,14 @@
#
# Copyright (C) 2020-2023 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
if(WIN32)
target_sources(${L0_STATIC_LIB_NAME}
PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/os_temperature_imp.h
${CMAKE_CURRENT_SOURCE_DIR}/os_temperature_imp.cpp
)
endif()

View File

@@ -0,0 +1,33 @@
/*
* Copyright (C) 2020-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "level_zero/sysman/source/temperature/windows/os_temperature_imp.h"
namespace L0 {
namespace Sysman {
ze_result_t WddmTemperatureImp::getProperties(zes_temp_properties_t *pProperties) {
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
ze_result_t WddmTemperatureImp::getSensorTemperature(double *pTemperature) {
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
bool WddmTemperatureImp::isTempModuleSupported() {
return false;
}
WddmTemperatureImp::WddmTemperatureImp(OsSysman *pOsSysman) {}
std::unique_ptr<OsTemperature> OsTemperature::create(OsSysman *pOsSysman, ze_bool_t onSubdevice, uint32_t subdeviceId, zes_temp_sensors_t sensorType) {
std::unique_ptr<WddmTemperatureImp> pWddmTemperatureImp = std::make_unique<WddmTemperatureImp>(pOsSysman);
return std::move(pWddmTemperatureImp);
}
} // namespace Sysman
} // namespace L0

View File

@@ -0,0 +1,29 @@
/*
* Copyright (C) 2020-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "shared/source/helpers/non_copyable_or_moveable.h"
#include "level_zero/sysman/source/temperature/os_temperature.h"
#include "level_zero/sysman/source/windows/os_sysman_imp.h"
namespace L0 {
namespace Sysman {
class KmdSysManager;
class WddmTemperatureImp : public OsTemperature, NEO::NonCopyableOrMovableClass {
public:
ze_result_t getProperties(zes_temp_properties_t *pProperties) override;
ze_result_t getSensorTemperature(double *pTemperature) override;
bool isTempModuleSupported() override;
WddmTemperatureImp(OsSysman *pOsSysman);
WddmTemperatureImp() = default;
~WddmTemperatureImp() override = default;
};
} // namespace Sysman
} // namespace L0

View File

@@ -0,0 +1,10 @@
#
# Copyright (C) 2020-2023 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
target_sources(${TARGET_NAME} PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
)
add_subdirectories()

View File

@@ -0,0 +1,18 @@
#
# Copyright (C) 2020-2023 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
set(L0_TESTS_SYSMAN_TEMPERATURE_LINUX
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/test_zes_temperature.cpp
${CMAKE_CURRENT_SOURCE_DIR}/mock_sysfs_temperature.h
)
if(UNIX)
target_sources(${TARGET_NAME}
PRIVATE
${L0_TESTS_SYSMAN_TEMPERATURE_LINUX}
)
endif()

View File

@@ -0,0 +1,174 @@
/*
* Copyright (C) 2022-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "level_zero/sysman/source/linux/fs_access.h"
#include "level_zero/sysman/source/linux/pmt/pmt.h"
#include "level_zero/sysman/source/temperature/linux/os_temperature_imp.h"
#include "level_zero/sysman/source/temperature/temperature_imp.h"
namespace L0 {
namespace ult {
constexpr uint8_t memory0MaxTemperature = 0x12;
constexpr uint8_t memory1MaxTemperature = 0x45;
constexpr uint8_t memory2MaxTemperature = 0x32;
constexpr uint8_t memory3MaxTemperature = 0x36;
constexpr uint32_t gtMaxTemperature = 0x1d;
constexpr uint32_t tileMaxTemperature = 0x34;
constexpr uint8_t computeTempIndex = 8;
constexpr uint8_t coreTempIndex = 12;
constexpr uint8_t socTempIndex = 0;
constexpr uint8_t tempArrForNoSubDevices[19] = {0x12, 0x23, 0x43, 0xde, 0xa3, 0xce, 0x23, 0x11, 0x45, 0x32, 0x67, 0x47, 0xac, 0x21, 0x03, 0x90, 0, 0, 0};
constexpr uint8_t computeIndexForNoSubDevices = 9;
constexpr uint8_t gtTempIndexForNoSubDevices = 0;
const std::string baseTelemSysFS("/sys/class/intel_pmt");
std::string gpuUpstreamPortPathInTemperature = "/sys/devices/pci0000:89/0000:89:02.0/0000:8a:00.0";
const std::string realPathTelem1 = "/sys/devices/pci0000:89/0000:89:02.0/0000:8a:00.0/0000:8b:02.0/0000:8e:00.1/pmt_telemetry.1.auto/intel_pmt/telem1";
const std::string realPathTelem2 = "/sys/devices/pci0000:89/0000:89:02.0/0000:8a:00.0/0000:8b:02.0/0000:8e:00.1/pmt_telemetry.1.auto/intel_pmt/telem2";
const std::string realPathTelem3 = "/sys/devices/pci0000:89/0000:89:02.0/0000:8a:00.0/0000:8b:02.0/0000:8e:00.1/pmt_telemetry.1.auto/intel_pmt/telem3";
const std::string realPathTelem4 = "/sys/devices/pci0000:89/0000:89:02.0/0000:8a:00.0/0000:8b:02.0/0000:8e:00.1/pmt_telemetry.1.auto/intel_pmt/telem4";
const std::string realPathTelem5 = "/sys/devices/pci0000:89/0000:89:02.0/0000:8a:00.0/0000:8b:02.0/0000:8e:00.1/pmt_telemetry.1.auto/intel_pmt/telem5";
const std::string sysfsPahTelem1 = "/sys/class/intel_pmt/telem1";
const std::string sysfsPahTelem2 = "/sys/class/intel_pmt/telem2";
const std::string sysfsPahTelem3 = "/sys/class/intel_pmt/telem3";
const std::string sysfsPahTelem4 = "/sys/class/intel_pmt/telem4";
const std::string sysfsPahTelem5 = "/sys/class/intel_pmt/telem5";
struct MockTemperaturePmt : public L0::Sysman::PlatformMonitoringTech {
MockTemperaturePmt(L0::Sysman::FsAccess *pFsAccess, ze_bool_t onSubdevice, uint32_t subdeviceId) : L0::Sysman::PlatformMonitoringTech(pFsAccess, onSubdevice, subdeviceId) {}
using L0::Sysman::PlatformMonitoringTech::closeFunction;
using L0::Sysman::PlatformMonitoringTech::keyOffsetMap;
using L0::Sysman::PlatformMonitoringTech::openFunction;
using L0::Sysman::PlatformMonitoringTech::preadFunction;
using L0::Sysman::PlatformMonitoringTech::telemetryDeviceEntry;
ze_result_t mockReadValueResult = ZE_RESULT_SUCCESS;
ze_result_t mockReadCoreTempResult = ZE_RESULT_SUCCESS;
ze_result_t mockReadComputeTempResult = ZE_RESULT_SUCCESS;
~MockTemperaturePmt() override {
rootDeviceTelemNodeIndex = 0;
}
void mockedInit(L0::Sysman::FsAccess *pFsAccess) {
if (ZE_RESULT_SUCCESS != PlatformMonitoringTech::enumerateRootTelemIndex(pFsAccess, gpuUpstreamPortPathInTemperature)) {
return;
}
telemetryDeviceEntry = "/sys/class/intel_pmt/telem2/telem";
}
ze_result_t readValue(const std::string key, uint32_t &val) override {
if (mockReadValueResult != ZE_RESULT_SUCCESS) {
return mockReadValueResult;
}
ze_result_t result = ZE_RESULT_SUCCESS;
if (key.compare("HBM0MaxDeviceTemperature") == 0) {
val = memory0MaxTemperature;
} else if (key.compare("HBM1MaxDeviceTemperature") == 0) {
val = memory1MaxTemperature;
} else if (key.compare("HBM2MaxDeviceTemperature") == 0) {
val = memory2MaxTemperature;
} else if (key.compare("HBM3MaxDeviceTemperature") == 0) {
val = memory3MaxTemperature;
} else if (key.compare("GTMaxTemperature") == 0) {
val = gtMaxTemperature;
} else if (key.compare("TileMaxTemperature") == 0) {
val = tileMaxTemperature;
} else if (key.compare("COMPUTE_TEMPERATURES") == 0) {
if (mockReadComputeTempResult != ZE_RESULT_SUCCESS) {
return mockReadComputeTempResult;
}
val = 0;
for (uint8_t i = 0; i < sizeof(uint32_t); i++) {
val |= (uint32_t)tempArrForNoSubDevices[(computeTempIndex) + i] << (i * 8);
}
} else if (key.compare("CORE_TEMPERATURES") == 0) {
if (mockReadCoreTempResult != ZE_RESULT_SUCCESS) {
return mockReadCoreTempResult;
}
val = 0;
for (uint8_t i = 0; i < sizeof(uint32_t); i++) {
val |= (uint32_t)tempArrForNoSubDevices[(coreTempIndex) + i] << (i * 8);
}
} else {
result = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
return result;
}
ze_result_t readValue(const std::string key, uint64_t &val) override {
if (mockReadValueResult != ZE_RESULT_SUCCESS) {
return mockReadValueResult;
}
if (key.compare("SOC_TEMPERATURES") == 0) {
val = 0;
for (uint8_t i = 0; i < sizeof(uint64_t); i++) {
val |= (uint64_t)tempArrForNoSubDevices[(socTempIndex) + i] << (i * 8);
}
return ZE_RESULT_SUCCESS;
} else {
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
}
};
struct MockTemperatureFsAccess : public L0::Sysman::FsAccess {
ze_result_t mockErrorListDirectory = ZE_RESULT_SUCCESS;
ze_result_t mockErrorGetRealPath = ZE_RESULT_SUCCESS;
ze_result_t listDirectory(const std::string directory, std::vector<std::string> &listOfTelemNodes) override {
if (mockErrorListDirectory != ZE_RESULT_SUCCESS) {
return mockErrorListDirectory;
}
if (directory.compare(baseTelemSysFS) == 0) {
listOfTelemNodes.push_back("telem1");
listOfTelemNodes.push_back("telem2");
listOfTelemNodes.push_back("telem3");
listOfTelemNodes.push_back("telem4");
listOfTelemNodes.push_back("telem5");
return ZE_RESULT_SUCCESS;
}
return ZE_RESULT_ERROR_NOT_AVAILABLE;
}
ze_result_t getRealPath(const std::string path, std::string &buf) override {
if (mockErrorGetRealPath != ZE_RESULT_SUCCESS) {
return mockErrorGetRealPath;
}
if (path.compare(sysfsPahTelem1) == 0) {
buf = realPathTelem1;
} else if (path.compare(sysfsPahTelem2) == 0) {
buf = realPathTelem2;
} else if (path.compare(sysfsPahTelem3) == 0) {
buf = realPathTelem3;
} else if (path.compare(sysfsPahTelem4) == 0) {
buf = realPathTelem4;
} else if (path.compare(sysfsPahTelem5) == 0) {
buf = realPathTelem5;
} else {
return ZE_RESULT_ERROR_NOT_AVAILABLE;
}
return ZE_RESULT_SUCCESS;
}
MockTemperatureFsAccess() = default;
};
class PublicLinuxTemperatureImp : public L0::Sysman::LinuxTemperatureImp {
public:
PublicLinuxTemperatureImp(L0::Sysman::OsSysman *pOsSysman, ze_bool_t onSubdevice, uint32_t subdeviceId) : LinuxTemperatureImp(pOsSysman, onSubdevice, subdeviceId) {}
using L0::Sysman::LinuxTemperatureImp::pPmt;
using L0::Sysman::LinuxTemperatureImp::type;
};
} // namespace ult
} // namespace L0

View File

@@ -0,0 +1,376 @@
/*
* Copyright (C) 2020-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "level_zero/sysman/source/linux/pmt/pmt_xml_offsets.h"
#include "level_zero/sysman/test/unit_tests/sources/linux/mock_sysman_fixture.h"
#include "level_zero/sysman/test/unit_tests/sources/temperature/linux/mock_sysfs_temperature.h"
namespace L0 {
namespace ult {
constexpr uint32_t handleComponentCountForTwoTileDevices = 6u;
constexpr uint32_t handleComponentCountForSingleTileDevice = 3u;
constexpr uint32_t handleComponentCountForNoSubDevices = 2u;
constexpr uint32_t invalidMaxTemperature = 125;
constexpr uint32_t invalidMinTemperature = 10;
const std::string sampleGuid1 = "0xb15a0edc";
const std::string sampleGuid2 = "0x490e01";
class SysmanMultiDeviceTemperatureFixture : public SysmanMultiDeviceFixture {
protected:
std::unique_ptr<PublicLinuxTemperatureImp> pPublicLinuxTemperatureImp;
std::unique_ptr<MockTemperatureFsAccess> pFsAccess;
L0::Sysman::FsAccess *pFsAccessOriginal = nullptr;
std::map<uint32_t, L0::Sysman::PlatformMonitoringTech *> mapOriginal;
L0::Sysman::SysmanDevice *device = nullptr;
void SetUp() override {
SysmanMultiDeviceFixture::SetUp();
device = pSysmanDevice;
pSysmanDeviceImp->pTempHandleContext->handleList.clear();
pFsAccess = std::make_unique<MockTemperatureFsAccess>();
pFsAccessOriginal = pLinuxSysmanImp->pFsAccess;
pLinuxSysmanImp->pFsAccess = pFsAccess.get();
mapOriginal = pLinuxSysmanImp->mapOfSubDeviceIdToPmtObject;
pLinuxSysmanImp->mapOfSubDeviceIdToPmtObject.clear();
auto subDeviceCount = pLinuxSysmanImp->getSubDeviceCount();
uint32_t subdeviceId = 0;
do {
ze_bool_t onSubdevice = (subDeviceCount == 0) ? false : true;
auto pPmt = new MockTemperaturePmt(pFsAccess.get(), onSubdevice, subdeviceId);
pPmt->mockedInit(pFsAccess.get());
auto keyOffsetMapEntry = L0::Sysman::guidToKeyOffsetMap.find(sampleGuid1);
pPmt->keyOffsetMap = keyOffsetMapEntry->second;
pLinuxSysmanImp->mapOfSubDeviceIdToPmtObject.emplace(subdeviceId, pPmt);
} while (++subdeviceId < subDeviceCount);
getTempHandles(0);
}
void TearDown() override {
for (const auto &pmtMapElement : pLinuxSysmanImp->mapOfSubDeviceIdToPmtObject) {
delete pmtMapElement.second;
}
pLinuxSysmanImp->pFsAccess = pFsAccessOriginal;
pLinuxSysmanImp->mapOfSubDeviceIdToPmtObject = mapOriginal;
SysmanMultiDeviceFixture::TearDown();
}
std::vector<zes_temp_handle_t> getTempHandles(uint32_t count) {
std::vector<zes_temp_handle_t> handles(count, nullptr);
EXPECT_EQ(zesDeviceEnumTemperatureSensors(device->toHandle(), &count, handles.data()), ZE_RESULT_SUCCESS);
return handles;
}
};
TEST_F(SysmanMultiDeviceTemperatureFixture, GivenComponentCountZeroWhenCallingZetSysmanTemperatureGetThenZeroCountIsReturnedAndVerifySysmanTemperatureGetCallSucceeds) {
uint32_t count = 0;
ze_result_t result = zesDeviceEnumTemperatureSensors(device->toHandle(), &count, NULL);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(count, handleComponentCountForTwoTileDevices);
uint32_t testcount = count + 1;
result = zesDeviceEnumTemperatureSensors(device->toHandle(), &testcount, NULL);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(testcount, handleComponentCountForTwoTileDevices);
count = 0;
std::vector<zes_temp_handle_t> handles(count, nullptr);
EXPECT_EQ(zesDeviceEnumTemperatureSensors(device->toHandle(), &count, handles.data()), ZE_RESULT_SUCCESS);
EXPECT_EQ(count, handleComponentCountForTwoTileDevices);
}
HWTEST2_F(SysmanMultiDeviceTemperatureFixture, GivenValidTempHandleWhenGettingTemperatureThenValidTemperatureReadingsRetrieved, IsPVC) {
auto handles = getTempHandles(handleComponentCountForTwoTileDevices);
for (auto handle : handles) {
zes_temp_properties_t properties = {};
EXPECT_EQ(ZE_RESULT_SUCCESS, zesTemperatureGetProperties(handle, &properties));
double temperature;
if (properties.type == ZES_TEMP_SENSORS_GLOBAL) {
ASSERT_EQ(ZE_RESULT_SUCCESS, zesTemperatureGetState(handle, &temperature));
EXPECT_EQ(temperature, static_cast<double>(tileMaxTemperature));
}
if (properties.type == ZES_TEMP_SENSORS_GPU) {
ASSERT_EQ(ZE_RESULT_SUCCESS, zesTemperatureGetState(handle, &temperature));
EXPECT_EQ(temperature, static_cast<double>(gtMaxTemperature));
}
if (properties.type == ZES_TEMP_SENSORS_MEMORY) {
ASSERT_EQ(ZE_RESULT_SUCCESS, zesTemperatureGetState(handle, &temperature));
EXPECT_EQ(temperature, static_cast<double>(std::max({memory0MaxTemperature, memory1MaxTemperature, memory2MaxTemperature, memory3MaxTemperature})));
}
}
}
TEST_F(SysmanMultiDeviceTemperatureFixture, GivenValidTempHandleWhenGettingTemperatureConfigThenUnsupportedIsReturned) {
auto handles = getTempHandles(handleComponentCountForTwoTileDevices);
for (auto handle : handles) {
zes_temp_config_t config = {};
EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, zesTemperatureGetConfig(handle, &config));
}
}
TEST_F(SysmanMultiDeviceTemperatureFixture, GivenValidTempHandleWhenSettingTemperatureConfigThenUnsupportedIsReturned) {
auto handles = getTempHandles(handleComponentCountForTwoTileDevices);
for (auto handle : handles) {
zes_temp_config_t config = {};
EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, zesTemperatureSetConfig(handle, &config));
}
}
TEST_F(SysmanMultiDeviceTemperatureFixture, GivenCreatePmtObjectsWhenRootTileIndexEnumeratesSuccessfulThenValidatePmtObjectsReceivedAndBranches) {
std::map<uint32_t, L0::Sysman::PlatformMonitoringTech *> mapOfSubDeviceIdToPmtObject;
L0::Sysman::PlatformMonitoringTech::create(pLinuxSysmanImp, gpuUpstreamPortPathInTemperature, mapOfSubDeviceIdToPmtObject);
uint32_t subdeviceId = 0;
for (auto &subDeviceIdToPmtEntry : mapOfSubDeviceIdToPmtObject) {
EXPECT_NE(subDeviceIdToPmtEntry.second, nullptr);
EXPECT_EQ(subDeviceIdToPmtEntry.first, subdeviceId);
subdeviceId++;
delete subDeviceIdToPmtEntry.second; // delete memory to avoid mem leak here, as we finished our test validation just above.
}
}
TEST_F(SysmanMultiDeviceTemperatureFixture, GivenValidTempHandleAndPmtReadValueFailsWhenGettingTemperatureThenFailureReturned) {
auto handles = getTempHandles(handleComponentCountForTwoTileDevices);
auto subDeviceCount = pLinuxSysmanImp->getSubDeviceCount();
uint32_t subdeviceId = 0;
for (subdeviceId = 0; subdeviceId < subDeviceCount; subdeviceId++) {
auto pPmt = static_cast<MockTemperaturePmt *>(pLinuxSysmanImp->getPlatformMonitoringTechAccess(subdeviceId));
pPmt->mockReadValueResult = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
for (auto &handle : handles) {
zes_temp_properties_t properties = {};
EXPECT_EQ(ZE_RESULT_SUCCESS, zesTemperatureGetProperties(handle, &properties));
double temperature;
ASSERT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, zesTemperatureGetState(handle, &temperature));
}
}
class SysmanDeviceTemperatureFixture : public SysmanDeviceFixture {
protected:
std::unique_ptr<PublicLinuxTemperatureImp> pPublicLinuxTemperatureImp;
std::unique_ptr<MockTemperatureFsAccess> pFsAccess;
L0::Sysman::FsAccess *pFsAccessOriginal = nullptr;
std::map<uint32_t, L0::Sysman::PlatformMonitoringTech *> pmtMapOriginal;
L0::Sysman::SysmanDevice *device = nullptr;
void SetUp() override {
SysmanDeviceFixture::SetUp();
device = pSysmanDevice;
pFsAccess = std::make_unique<MockTemperatureFsAccess>();
pFsAccessOriginal = pLinuxSysmanImp->pFsAccess;
pLinuxSysmanImp->pFsAccess = pFsAccess.get();
auto subDeviceCount = pLinuxSysmanImp->getSubDeviceCount();
uint32_t subdeviceId = 0;
pmtMapOriginal = pLinuxSysmanImp->mapOfSubDeviceIdToPmtObject;
pLinuxSysmanImp->mapOfSubDeviceIdToPmtObject.clear();
do {
ze_bool_t onSubdevice = (subDeviceCount == 0) ? false : true;
auto pPmt = new MockTemperaturePmt(pFsAccess.get(), onSubdevice, subdeviceId);
pPmt->mockedInit(pFsAccess.get());
auto keyOffsetMapEntry = L0::Sysman::guidToKeyOffsetMap.find(sampleGuid2);
pPmt->keyOffsetMap = keyOffsetMapEntry->second;
pLinuxSysmanImp->mapOfSubDeviceIdToPmtObject.emplace(subdeviceId, pPmt);
} while (++subdeviceId < subDeviceCount);
getTempHandles(0);
}
void TearDown() override {
pLinuxSysmanImp->releasePmtObject();
pLinuxSysmanImp->mapOfSubDeviceIdToPmtObject = pmtMapOriginal;
pLinuxSysmanImp->pFsAccess = pFsAccessOriginal;
SysmanDeviceFixture::TearDown();
}
std::vector<zes_temp_handle_t> getTempHandles(uint32_t count) {
std::vector<zes_temp_handle_t> handles(count, nullptr);
EXPECT_EQ(zesDeviceEnumTemperatureSensors(device->toHandle(), &count, handles.data()), ZE_RESULT_SUCCESS);
return handles;
}
};
HWTEST2_F(SysmanDeviceTemperatureFixture, GivenValidTempHandleWhenGettingGPUAndGlobalTemperatureThenValidTemperatureReadingsRetrieved, IsDG1) {
auto handles = getTempHandles(handleComponentCountForNoSubDevices);
for (auto &handle : handles) {
zes_temp_properties_t properties = {};
EXPECT_EQ(ZE_RESULT_SUCCESS, zesTemperatureGetProperties(handle, &properties));
double temperature;
ASSERT_EQ(ZE_RESULT_SUCCESS, zesTemperatureGetState(handle, &temperature));
if (properties.type == ZES_TEMP_SENSORS_GLOBAL) {
uint8_t maxTemp = 0;
for (uint64_t i = 0; i < sizeof(tempArrForNoSubDevices) / sizeof(uint8_t); i++) {
if ((tempArrForNoSubDevices[i] > invalidMaxTemperature) ||
(tempArrForNoSubDevices[i] < invalidMinTemperature) || (maxTemp > tempArrForNoSubDevices[i])) {
continue;
}
maxTemp = tempArrForNoSubDevices[i];
}
EXPECT_EQ(temperature, static_cast<double>(maxTemp));
}
if (properties.type == ZES_TEMP_SENSORS_GPU) {
EXPECT_EQ(temperature, static_cast<double>(tempArrForNoSubDevices[computeIndexForNoSubDevices]));
}
}
}
HWTEST2_F(SysmanDeviceTemperatureFixture, GivenValidTempHandleAndReadCoreTemperatureFailsWhenGettingGpuAndGlobalTempThenValidGpuTempAndFailureForGlobalTempAreReturned, IsDG1) {
auto handles = getTempHandles(handleComponentCountForNoSubDevices);
uint32_t subdeviceId = 0;
auto pPmt = static_cast<MockTemperaturePmt *>(pLinuxSysmanImp->getPlatformMonitoringTechAccess(subdeviceId));
pPmt->mockReadCoreTempResult = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
for (auto &handle : handles) {
zes_temp_properties_t properties = {};
EXPECT_EQ(ZE_RESULT_SUCCESS, zesTemperatureGetProperties(handle, &properties));
double temperature;
if (properties.type == ZES_TEMP_SENSORS_GLOBAL) {
EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, zesTemperatureGetState(handle, &temperature));
}
if (properties.type == ZES_TEMP_SENSORS_GPU) {
ASSERT_EQ(ZE_RESULT_SUCCESS, zesTemperatureGetState(handle, &temperature));
EXPECT_EQ(temperature, static_cast<double>(tempArrForNoSubDevices[computeIndexForNoSubDevices]));
}
}
}
HWTEST2_F(SysmanDeviceTemperatureFixture, GivenValidTempHandleAndReadComputeTemperatureFailsWhenGettingGPUAndGlobalTemperatureThenFailureReturned, IsDG1) {
auto handles = getTempHandles(handleComponentCountForNoSubDevices);
uint32_t subdeviceId = 0;
auto pPmt = static_cast<MockTemperaturePmt *>(pLinuxSysmanImp->getPlatformMonitoringTechAccess(subdeviceId));
pPmt->mockReadComputeTempResult = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
for (auto &handle : handles) {
double temperature;
ASSERT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, zesTemperatureGetState(handle, &temperature));
}
}
HWTEST2_F(SysmanDeviceTemperatureFixture, GivenValidTempHandleWhenGettingGPUAndGlobalTemperatureThenValidTemperatureReadingsRetrieved, IsDG2) {
auto handles = getTempHandles(handleComponentCountForNoSubDevices);
for (auto &handle : handles) {
zes_temp_properties_t properties = {};
EXPECT_EQ(ZE_RESULT_SUCCESS, zesTemperatureGetProperties(handle, &properties));
double temperature;
ASSERT_EQ(ZE_RESULT_SUCCESS, zesTemperatureGetState(handle, &temperature));
if (properties.type == ZES_TEMP_SENSORS_GLOBAL) {
uint8_t maxTemp = 0;
// For DG2, Global Max temperature will be Maximum of SOC_TEMPERATURES
for (uint64_t i = 0; i < sizeof(uint64_t); i++) {
if ((tempArrForNoSubDevices[i] > invalidMaxTemperature) ||
(tempArrForNoSubDevices[i] < invalidMinTemperature) || (maxTemp > tempArrForNoSubDevices[i])) {
continue;
}
maxTemp = tempArrForNoSubDevices[i];
}
EXPECT_EQ(temperature, static_cast<double>(maxTemp));
}
if (properties.type == ZES_TEMP_SENSORS_GPU) {
EXPECT_EQ(temperature, static_cast<double>(tempArrForNoSubDevices[gtTempIndexForNoSubDevices]));
}
}
}
TEST_F(SysmanDeviceTemperatureFixture, GivenValidTempHandleAndPmtReadValueFailsWhenGettingTemperatureThenFailureReturned) {
auto handles = getTempHandles(handleComponentCountForNoSubDevices);
uint32_t subdeviceId = 0;
auto pPmt = static_cast<MockTemperaturePmt *>(pLinuxSysmanImp->getPlatformMonitoringTechAccess(subdeviceId));
pPmt->mockReadValueResult = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
for (auto &handle : handles) {
zes_temp_properties_t properties = {};
EXPECT_EQ(ZE_RESULT_SUCCESS, zesTemperatureGetProperties(handle, &properties));
double temperature;
ASSERT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, zesTemperatureGetState(handle, &temperature));
}
}
TEST_F(SysmanDeviceTemperatureFixture, GivenValidTempHandleWhenGettingUnsupportedSensorsTemperatureThenUnsupportedReturned) {
auto subDeviceCount = pLinuxSysmanImp->getSubDeviceCount();
ze_bool_t onSubdevice = (subDeviceCount == 0) ? false : true;
uint32_t subdeviceId = 0;
auto pPublicLinuxTemperatureImp = std::make_unique<L0::Sysman::LinuxTemperatureImp>(pOsSysman, onSubdevice, subdeviceId);
pPublicLinuxTemperatureImp->setSensorType(ZES_TEMP_SENSORS_MEMORY_MIN);
double temperature;
EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, pPublicLinuxTemperatureImp->getSensorTemperature(&temperature));
}
TEST_F(SysmanDeviceTemperatureFixture, GivenValidateEnumerateRootTelemIndexWhengetRealPathFailsThenFailureReturned) {
pFsAccess->mockErrorListDirectory = ZE_RESULT_ERROR_NOT_AVAILABLE;
EXPECT_EQ(ZE_RESULT_ERROR_NOT_AVAILABLE,
L0::Sysman::PlatformMonitoringTech::enumerateRootTelemIndex(pFsAccess.get(), gpuUpstreamPortPathInTemperature));
std::map<uint32_t, L0::Sysman::PlatformMonitoringTech *> mapOfSubDeviceIdToPmtObject;
L0::Sysman::PlatformMonitoringTech::create(pLinuxSysmanImp, gpuUpstreamPortPathInTemperature, mapOfSubDeviceIdToPmtObject);
EXPECT_TRUE(mapOfSubDeviceIdToPmtObject.empty());
}
TEST_F(SysmanDeviceTemperatureFixture, GivenValidatePmtReadValueWhenkeyOffsetMapIsNotThereThenFailureReturned) {
auto pPmt = std::make_unique<MockTemperaturePmt>(pFsAccess.get(), 0, 0);
pPmt->mockedInit(pFsAccess.get());
// Get keyOffsetMap
auto keyOffsetMapEntry = L0::Sysman::guidToKeyOffsetMap.find(sampleGuid2);
pPmt->keyOffsetMap = keyOffsetMapEntry->second;
uint32_t val = 0;
EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, pPmt->readValue("SOMETHING", val));
}
TEST_F(SysmanDeviceTemperatureFixture, GivenCreatePmtObjectsWhenRootTileIndexEnumeratesSuccessfulThenValidatePmtObjectsReceivedAndBranches) {
std::map<uint32_t, L0::Sysman::PlatformMonitoringTech *> mapOfSubDeviceIdToPmtObject;
L0::Sysman::PlatformMonitoringTech::create(pLinuxSysmanImp, gpuUpstreamPortPathInTemperature, mapOfSubDeviceIdToPmtObject);
for (auto &subDeviceIdToPmtEntry : mapOfSubDeviceIdToPmtObject) {
EXPECT_NE(subDeviceIdToPmtEntry.second, nullptr);
EXPECT_EQ(subDeviceIdToPmtEntry.first, 0u);
delete subDeviceIdToPmtEntry.second;
}
}
HWTEST2_F(SysmanDeviceTemperatureFixture, GivenComponentCountZeroWhenCallingZetSysmanTemperatureGetThenZeroCountIsReturnedAndVerifySysmanTemperatureGetCallSucceeds, IsPVC) {
uint32_t count = 0;
ze_result_t result = zesDeviceEnumTemperatureSensors(device->toHandle(), &count, NULL);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(count, handleComponentCountForSingleTileDevice);
uint32_t testcount = count + 1;
result = zesDeviceEnumTemperatureSensors(device->toHandle(), &testcount, NULL);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(testcount, handleComponentCountForSingleTileDevice);
count = 0;
std::vector<zes_temp_handle_t> handles(count, nullptr);
EXPECT_EQ(zesDeviceEnumTemperatureSensors(device->toHandle(), &count, handles.data()), ZE_RESULT_SUCCESS);
EXPECT_EQ(count, handleComponentCountForSingleTileDevice);
}
HWTEST2_F(SysmanDeviceTemperatureFixture, GivenValidTempHandleWhenGettingTemperatureThenValidTemperatureReadingsRetrieved, IsPVC) {
auto handles = getTempHandles(handleComponentCountForSingleTileDevice);
for (auto handle : handles) {
zes_temp_properties_t properties = {};
EXPECT_EQ(ZE_RESULT_SUCCESS, zesTemperatureGetProperties(handle, &properties));
double temperature;
if (properties.type == ZES_TEMP_SENSORS_GLOBAL) {
ASSERT_EQ(ZE_RESULT_SUCCESS, zesTemperatureGetState(handle, &temperature));
EXPECT_EQ(temperature, static_cast<double>(tileMaxTemperature));
}
if (properties.type == ZES_TEMP_SENSORS_GPU) {
ASSERT_EQ(ZE_RESULT_SUCCESS, zesTemperatureGetState(handle, &temperature));
EXPECT_EQ(temperature, static_cast<double>(gtMaxTemperature));
}
if (properties.type == ZES_TEMP_SENSORS_MEMORY) {
ASSERT_EQ(ZE_RESULT_SUCCESS, zesTemperatureGetState(handle, &temperature));
EXPECT_EQ(temperature, static_cast<double>(std::max({memory0MaxTemperature, memory1MaxTemperature, memory2MaxTemperature, memory3MaxTemperature})));
}
}
}
} // namespace ult
} // namespace L0