Update Temperature APIs to get correct temperature

This change updates Temperature APIs to get correct current
temperature based on updated PMT interface.

Signed-off-by: Jitendra Sharma <jitendra.sharma@intel.com>
This commit is contained in:
Jitendra Sharma
2021-01-20 23:52:47 +05:30
committed by Compute-Runtime-Automation
parent a6d898a026
commit 3597093758
37 changed files with 988 additions and 548 deletions

View File

@@ -1,5 +1,5 @@
#
# Copyright (C) 2019-2020 Intel Corporation
# Copyright (C) 2019-2021 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
@@ -9,8 +9,6 @@ set(L0_SRCS_TOOLS_SYSMAN_LINUX
${CMAKE_CURRENT_SOURCE_DIR}/os_sysman_imp.h
${CMAKE_CURRENT_SOURCE_DIR}/os_sysman_imp.cpp
${CMAKE_CURRENT_SOURCE_DIR}/fs_access.cpp
${CMAKE_CURRENT_SOURCE_DIR}/pmt.cpp
${CMAKE_CURRENT_SOURCE_DIR}/pmt.h
)
if(UNIX)

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2019-2020 Intel Corporation
* Copyright (C) 2019-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -34,9 +34,14 @@ ze_result_t LinuxSysmanImp::init() {
pSysfsAccess = SysfsAccess::create(myDeviceName);
UNRECOVERABLE_IF(nullptr == pSysfsAccess);
pPmt = new PlatformMonitoringTech();
UNRECOVERABLE_IF(nullptr == pPmt);
pPmt->init(myDeviceName, pFsAccess);
std::string realRootPath;
result = pSysfsAccess->getRealPath("device", realRootPath);
if (ZE_RESULT_SUCCESS != result) {
return result;
}
auto rootPciPathOfGpuDevice = getPciRootPortDirectoryPath(realRootPath);
PlatformMonitoringTech::create(pParentSysmanDeviceImp->deviceHandles, pFsAccess, rootPciPathOfGpuDevice, mapOfSubDeviceIdToPmtObject);
pPmuInterface = PmuInterface::create(this);
UNRECOVERABLE_IF(nullptr == pPmuInterface);
@@ -83,9 +88,29 @@ SysmanDeviceImp *LinuxSysmanImp::getSysmanDeviceImp() {
return pParentSysmanDeviceImp;
}
PlatformMonitoringTech &LinuxSysmanImp::getPlatformMonitoringTechAccess() {
UNRECOVERABLE_IF(nullptr == pPmt);
return *pPmt;
std::string LinuxSysmanImp::getPciRootPortDirectoryPath(std::string realPciPath) {
size_t loc;
// we need to change the absolute path to two levels up to get
// the Discrete card's root port.
// the root port is always at a fixed distance as defined in HW
uint8_t nLevel = 2;
while (nLevel > 0) {
loc = realPciPath.find_last_of('/');
if (loc == std::string::npos) {
break;
}
realPciPath = realPciPath.substr(0, loc);
nLevel--;
}
return realPciPath;
}
PlatformMonitoringTech *LinuxSysmanImp::getPlatformMonitoringTechAccess(uint32_t subDeviceId) {
auto subDeviceIdToPmtEntry = mapOfSubDeviceIdToPmtObject.find(subDeviceId);
if (subDeviceIdToPmtEntry == mapOfSubDeviceIdToPmtObject.end()) {
return nullptr;
}
return subDeviceIdToPmtEntry->second;
}
LinuxSysmanImp::LinuxSysmanImp(SysmanDeviceImp *pParentSysmanDeviceImp) {
@@ -113,14 +138,13 @@ LinuxSysmanImp::~LinuxSysmanImp() {
delete pFwUtilInterface;
pFwUtilInterface = nullptr;
}
if (nullptr != pPmt) {
delete pPmt;
pPmt = nullptr;
}
if (nullptr != pPmuInterface) {
delete pPmuInterface;
pPmuInterface = nullptr;
}
for (auto &subDeviceIdToPmtEntry : mapOfSubDeviceIdToPmtObject) {
delete subDeviceIdToPmtEntry.second;
}
}
OsSysman *OsSysman::create(SysmanDeviceImp *pParentSysmanDeviceImp) {

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2019-2020 Intel Corporation
* Copyright (C) 2019-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -13,11 +13,13 @@
#include "level_zero/core/source/device/device.h"
#include "level_zero/tools/source/sysman/linux/firmware_util/firmware_util.h"
#include "level_zero/tools/source/sysman/linux/fs_access.h"
#include "level_zero/tools/source/sysman/linux/pmt.h"
#include "level_zero/tools/source/sysman/linux/pmt/pmt.h"
#include "level_zero/tools/source/sysman/linux/pmu/pmu_imp.h"
#include "level_zero/tools/source/sysman/linux/xml_parser/xml_parser.h"
#include "level_zero/tools/source/sysman/sysman_imp.h"
#include <map>
namespace L0 {
class PmuInterface;
@@ -35,20 +37,21 @@ class LinuxSysmanImp : public OsSysman, NEO::NonCopyableOrMovableClass {
ProcfsAccess &getProcfsAccess();
SysfsAccess &getSysfsAccess();
NEO::Drm &getDrm();
PlatformMonitoringTech &getPlatformMonitoringTechAccess();
PlatformMonitoringTech *getPlatformMonitoringTechAccess(uint32_t subDeviceId);
Device *getDeviceHandle();
SysmanDeviceImp *getSysmanDeviceImp();
std::string getPciRootPortDirectoryPath(std::string realPciPath);
protected:
XmlParser *pXmlParser = nullptr;
FsAccess *pFsAccess = nullptr;
ProcfsAccess *pProcfsAccess = nullptr;
SysfsAccess *pSysfsAccess = nullptr;
PlatformMonitoringTech *pPmt = nullptr;
NEO::Drm *pDrm = nullptr;
Device *pDevice = nullptr;
PmuInterface *pPmuInterface = nullptr;
FirmwareUtil *pFwUtilInterface = nullptr;
std::map<uint32_t, L0::PlatformMonitoringTech *> mapOfSubDeviceIdToPmtObject;
private:
LinuxSysmanImp() = delete;

View File

@@ -1,89 +0,0 @@
/*
* Copyright (C) 2020-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "level_zero/tools/source/sysman/linux/pmt.h"
#include "shared/source/debug_settings/debug_settings_manager.h"
#include <errno.h>
#include <fcntl.h>
#include <string.h>
#include <sys/mman.h>
namespace L0 {
const std::string PlatformMonitoringTech::baseTelemDevice("/dev/telem");
const std::string PlatformMonitoringTech::baseTelemSysFS("/sys/class/pmt_telemetry/telem");
void PlatformMonitoringTech::init(const std::string &deviceName, FsAccess *pFsAccess) {
pmtSupported = false;
std::string deviceNumber("1"); // Temporarily hardcoded
std::string telemetryDeviceEntry = baseTelemDevice + deviceNumber;
if (!pFsAccess->fileExists(telemetryDeviceEntry)) {
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr,
"Telemetry support not available. No file %s\n", telemetryDeviceEntry.c_str());
return;
}
std::string guid;
std::string guidPath = baseTelemSysFS + deviceNumber + std::string("/guid");
ze_result_t result = pFsAccess->read(guidPath, guid);
if (ZE_RESULT_SUCCESS != result) {
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr,
"Telemetry sysfs entry not available %s\n", guidPath.c_str());
return;
}
std::string sizePath = baseTelemSysFS + deviceNumber + std::string("/size");
result = pFsAccess->read(sizePath, size);
if (ZE_RESULT_SUCCESS != result) {
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr,
"Telemetry sysfs entry not available %s\n", sizePath.c_str());
return;
}
std::string offsetPath = baseTelemSysFS + deviceNumber + std::string("/offset");
result = pFsAccess->read(offsetPath, baseOffset);
if (ZE_RESULT_SUCCESS != result) {
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr,
"Telemetry sysfs entry not available %s\n", offsetPath.c_str());
return;
}
int fd = open(static_cast<const char *>(telemetryDeviceEntry.c_str()), O_RDONLY);
if (fd == -1) {
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr,
"Failure opening telemetry file %s : %s \n", telemetryDeviceEntry.c_str(), strerror(errno));
return;
}
mappedMemory = static_cast<char *>(mmap(nullptr, static_cast<size_t>(size), PROT_READ, MAP_SHARED, fd, 0));
if (mappedMemory == MAP_FAILED) {
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr,
"Failure mapping telemetry file %s : %s \n", telemetryDeviceEntry.c_str(), strerror(errno));
close(fd);
return;
}
if (close(fd) == -1) {
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr,
"Failure closing telemetry file %s : %s \n", telemetryDeviceEntry.c_str(), strerror(errno));
munmap(mappedMemory, size);
return;
}
mappedMemory += baseOffset;
pmtSupported = true;
}
PlatformMonitoringTech::~PlatformMonitoringTech() {
if (mappedMemory != nullptr) {
munmap(mappedMemory - baseOffset, size);
}
}
} // namespace L0

View File

@@ -1,57 +0,0 @@
/*
* Copyright (C) 2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "shared/source/helpers/non_copyable_or_moveable.h"
#include "level_zero/tools/source/sysman/linux/fs_access.h"
#include <map>
namespace L0 {
class PlatformMonitoringTech : NEO::NonCopyableOrMovableClass {
public:
PlatformMonitoringTech() = default;
virtual ~PlatformMonitoringTech();
MOCKABLE_VIRTUAL void init(const std::string &deviceName, FsAccess *pFsAccess);
template <typename ReadType>
ze_result_t readValue(const std::string key, ReadType &value);
bool isPmtSupported() { return pmtSupported; }
protected:
bool pmtSupported = false;
char *mappedMemory = nullptr;
private:
static const std::string baseTelemDevice;
static const std::string baseTelemSysFS;
uint64_t size = 0;
uint64_t baseOffset = 0;
};
const std::map<std::string, uint64_t> keyOffsetMap = {
{"PACKAGE_ENERGY", 0x400},
{"COMPUTE_TEMPERATURES", 0x68},
{"SOC_TEMPERATURES", 0x60},
{"CORE_TEMPERATURES", 0x6c}};
template <typename ReadType>
ze_result_t PlatformMonitoringTech::readValue(const std::string key, ReadType &value) {
if (!pmtSupported) {
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
auto offset = keyOffsetMap.find(key);
if (offset == keyOffsetMap.end()) {
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
value = *reinterpret_cast<ReadType *>(mappedMemory + offset->second);
return ZE_RESULT_SUCCESS;
}
} // namespace L0

View File

@@ -0,0 +1,18 @@
#
# Copyright (C) 2020-2021 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
set(L0_SRCS_TOOLS_SYSMAN_LINUX_PMT
${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/pmt.cpp
${CMAKE_CURRENT_SOURCE_DIR}/pmt.h
)
if(UNIX)
target_sources(${L0_STATIC_LIB_NAME}
PRIVATE
${L0_SRCS_TOOLS_SYSMAN_LINUX_PMT}
)
endif()
# Make our source files visible to parent
set_property(GLOBAL PROPERTY L0_SRCS_TOOLS_SYSMAN_PMT_LINUX ${L0_SRCS_TOOLS_SYSMAN_PMT_LINUX})

View File

@@ -0,0 +1,175 @@
/*
* Copyright (C) 2020-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "level_zero/tools/source/sysman/linux/pmt/pmt.h"
#include "shared/source/debug_settings/debug_settings_manager.h"
#include <algorithm>
#include <errno.h>
#include <fcntl.h>
#include <string.h>
#include <sys/mman.h>
namespace L0 {
const std::string PlatformMonitoringTech::devfs("/dev/");
const std::string PlatformMonitoringTech::baseTelemSysFS("/sys/class/pmt_telemetry");
const std::string PlatformMonitoringTech::telem("telem");
uint32_t PlatformMonitoringTech::rootDeviceTelemNodeIndex = 0;
const std::map<std::string, uint64_t> deviceKeyOffsetMap = {
{"PACKAGE_ENERGY", 0x400},
{"COMPUTE_TEMPERATURES", 0x68},
{"SOC_TEMPERATURES", 0x60},
{"CORE_TEMPERATURES", 0x6c}};
ze_result_t PlatformMonitoringTech::enumerateRootTelemIndex(FsAccess *pFsAccess, std::string &rootPciPathOfGpuDevice) {
std::vector<std::string> listOfTelemNodes;
auto result = pFsAccess->listDirectory(baseTelemSysFS, listOfTelemNodes);
if (ZE_RESULT_SUCCESS != result) {
return result;
}
// Exmaple: For below directory
// # /sys/class/pmt_telemetry$ ls
// telem1 telem2 telem3
// Then listOfTelemNodes would contain telem1, telem2, telem3
std::sort(listOfTelemNodes.begin(), listOfTelemNodes.end()); // sort listOfTelemNodes, to arange telem nodes in ascending order
for (const auto &telemNode : listOfTelemNodes) {
std::string realPathOfTelemNode;
result = pFsAccess->getRealPath(baseTelemSysFS + "/" + telemNode, realPathOfTelemNode);
if (result != ZE_RESULT_SUCCESS) {
return result;
}
// Check if Telemetry node(say telem1) and rootPciPathOfGpuDevice share same PCI Root port
if (realPathOfTelemNode.compare(0, rootPciPathOfGpuDevice.size(), rootPciPathOfGpuDevice) == 0) {
// Example: If
// rootPciPathOfGpuDevice = "/sys/devices/pci0000:89/0000:89:02.0/0000:8a:00.0";
// realPathOfTelemNode = "/sys/devices/pci0000:89/0000:89:02.0/0000:8a:00.0/0000:8b:02.0/0000:8e:00.1/pmt_telemetry/telem1";
// Thus As realPathOfTelemNode consists of rootPciPathOfGpuDevice, hence both telemNode and GPU device share same PCI Root.
auto indexString = telemNode.substr(telem.size(), telemNode.size());
rootDeviceTelemNodeIndex = stoi(indexString); // if telemNode is telemN, then rootDeviceTelemNodeIndex = N
return ZE_RESULT_SUCCESS;
}
}
return result;
}
void PlatformMonitoringTech::init(FsAccess *pFsAccess) {
auto getErrorVal = [](auto err) {
if ((EPERM == err) || (EACCES == err)) {
return ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS;
} else if (ENOENT == err) {
return ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE;
} else {
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
};
std::string telemNode = telem + std::to_string(rootDeviceTelemNodeIndex);
if (isSubdevice) {
uint32_t telemNodeIndex = 0;
// If rootDeviceTelemNode is telem1, then rootDeviceTelemNodeIndex = 1
// And thus for subdevice0 --> telem node will be telem2,
// for subdevice1 --> telem node will be telem3 etc
telemNodeIndex = rootDeviceTelemNodeIndex + subdeviceId + 1;
telemNode = telem + std::to_string(telemNodeIndex);
}
std::string baseTelemSysFSNode = baseTelemSysFS + "/" + telemNode;
std::string telemetryDeviceEntry = devfs + telemNode;
if (!pFsAccess->fileExists(telemetryDeviceEntry)) {
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr,
"Telemetry support not available. No file %s\n", telemetryDeviceEntry.c_str());
retVal = ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE;
return;
}
std::string guid;
std::string guidPath = baseTelemSysFSNode + std::string("/guid");
ze_result_t result = pFsAccess->read(guidPath, guid);
if (ZE_RESULT_SUCCESS != result) {
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr,
"Telemetry sysfs entry not available %s\n", guidPath.c_str());
retVal = result;
return;
}
keyOffsetMap = deviceKeyOffsetMap;
std::string sizePath = baseTelemSysFSNode + std::string("/size");
result = pFsAccess->read(sizePath, size);
if (ZE_RESULT_SUCCESS != result) {
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr,
"Telemetry sysfs entry not available %s\n", sizePath.c_str());
retVal = result;
return;
}
std::string offsetPath = baseTelemSysFSNode + std::string("/offset");
result = pFsAccess->read(offsetPath, baseOffset);
if (ZE_RESULT_SUCCESS != result) {
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr,
"Telemetry sysfs entry not available %s\n", offsetPath.c_str());
retVal = result;
return;
}
int fd = open(static_cast<const char *>(telemetryDeviceEntry.c_str()), O_RDONLY);
if (fd == -1) {
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr,
"Failure opening telemetry file %s : %s \n", telemetryDeviceEntry.c_str(), strerror(errno));
retVal = getErrorVal(errno);
return;
}
mappedMemory = static_cast<char *>(mmap(nullptr, static_cast<size_t>(size), PROT_READ, MAP_SHARED, fd, 0));
if (mappedMemory == MAP_FAILED) {
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr,
"Failure mapping telemetry file %s : %s \n", telemetryDeviceEntry.c_str(), strerror(errno));
close(fd);
retVal = getErrorVal(errno);
return;
}
if (close(fd) == -1) {
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr,
"Failure closing telemetry file %s : %s \n", telemetryDeviceEntry.c_str(), strerror(errno));
munmap(mappedMemory, size);
mappedMemory = nullptr;
retVal = getErrorVal(errno);
return;
}
mappedMemory += baseOffset;
}
PlatformMonitoringTech::PlatformMonitoringTech(FsAccess *pFsAccess, ze_bool_t onSubdevice,
uint32_t subdeviceId) : subdeviceId(subdeviceId), isSubdevice(onSubdevice) {
init(pFsAccess);
}
void PlatformMonitoringTech::create(const std::vector<ze_device_handle_t> &deviceHandles,
FsAccess *pFsAccess, std::string &rootPciPathOfGpuDevice,
std::map<uint32_t, L0::PlatformMonitoringTech *> &mapOfSubDeviceIdToPmtObject) {
if (ZE_RESULT_SUCCESS == PlatformMonitoringTech::enumerateRootTelemIndex(pFsAccess, rootPciPathOfGpuDevice)) {
for (const auto &deviceHandle : deviceHandles) {
ze_device_properties_t deviceProperties = {};
Device::fromHandle(deviceHandle)->getProperties(&deviceProperties);
auto pPmt = new PlatformMonitoringTech(pFsAccess, deviceProperties.flags & ZE_DEVICE_PROPERTY_FLAG_SUBDEVICE,
deviceProperties.subdeviceId);
UNRECOVERABLE_IF(nullptr == pPmt);
mapOfSubDeviceIdToPmtObject.emplace(deviceProperties.subdeviceId, pPmt);
}
}
}
PlatformMonitoringTech::~PlatformMonitoringTech() {
if (mappedMemory != nullptr) {
munmap(mappedMemory - baseOffset, size);
}
}
} // namespace L0

View File

@@ -0,0 +1,62 @@
/*
* Copyright (C) 2020-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "shared/source/helpers/non_copyable_or_moveable.h"
#include "level_zero/core/source/device/device.h"
#include "level_zero/tools/source/sysman/linux/fs_access.h"
#include <map>
namespace L0 {
class PlatformMonitoringTech : NEO::NonCopyableOrMovableClass {
public:
PlatformMonitoringTech() = delete;
PlatformMonitoringTech(FsAccess *pFsAccess, ze_bool_t onSubdevice, uint32_t subdeviceId);
virtual ~PlatformMonitoringTech();
template <typename ReadType>
ze_result_t readValue(const std::string key, ReadType &value);
static ze_result_t enumerateRootTelemIndex(FsAccess *pFsAccess, std::string &rootPciPathOfGpuDevice);
static void create(const std::vector<ze_device_handle_t> &deviceHandles,
FsAccess *pFsAccess, std::string &rootPciPathOfGpuDevice,
std::map<uint32_t, L0::PlatformMonitoringTech *> &mapOfSubDeviceIdToPmtObject);
protected:
char *mappedMemory = nullptr;
static uint32_t rootDeviceTelemNodeIndex;
std::map<std::string, uint64_t> keyOffsetMap;
private:
void init(FsAccess *pFsAccess);
static const std::string devfs;
static const std::string baseTelemSysFS;
static const std::string telem;
uint64_t size = 0;
uint64_t baseOffset = 0;
uint32_t subdeviceId = 0;
ze_bool_t isSubdevice = 0;
ze_result_t retVal = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
};
template <typename ReadType>
ze_result_t PlatformMonitoringTech::readValue(const std::string key, ReadType &value) {
if (mappedMemory == nullptr) {
return retVal;
}
auto offset = keyOffsetMap.find(key);
if (offset == keyOffsetMap.end()) {
return retVal;
}
value = *reinterpret_cast<ReadType *>(mappedMemory + offset->second);
return ZE_RESULT_SUCCESS;
}
} // namespace L0