mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-05 09:09:04 +08:00
feature: Add support for 64 bit memory counters for zesInit
Related-To: LOCI-4613, LOCI-4612 Signed-off-by: Mayank Raghuwanshi <mayank.raghuwanshi@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
c3ca3ff119
commit
af6798f257
@@ -23,6 +23,10 @@ const std::string PlatformMonitoringTech::baseTelemSysFS("/sys/class/intel_pmt")
|
||||
const std::string PlatformMonitoringTech::telem("telem");
|
||||
uint32_t PlatformMonitoringTech::rootDeviceTelemNodeIndex = 0;
|
||||
|
||||
std::string PlatformMonitoringTech::getGuid() {
|
||||
return guid;
|
||||
}
|
||||
|
||||
ze_result_t PlatformMonitoringTech::readValue(const std::string key, uint32_t &value) {
|
||||
auto offset = keyOffsetMap.find(key);
|
||||
if (offset == keyOffsetMap.end()) {
|
||||
|
||||
@@ -30,6 +30,7 @@ class PlatformMonitoringTech : NEO::NonCopyableOrMovableClass {
|
||||
|
||||
virtual ze_result_t readValue(const std::string key, uint32_t &value);
|
||||
virtual ze_result_t readValue(const std::string key, uint64_t &value);
|
||||
std::string getGuid();
|
||||
static ze_result_t enumerateRootTelemIndex(FsAccess *pFsAccess, std::string &gpuUpstreamPortPath);
|
||||
static void create(LinuxSysmanImp *pLinuxSysmanImp, std::string &gpuUpstreamPortPath,
|
||||
std::map<uint32_t, L0::Sysman::PlatformMonitoringTech *> &mapOfSubDeviceIdToPmtObject);
|
||||
@@ -39,6 +40,7 @@ class PlatformMonitoringTech : NEO::NonCopyableOrMovableClass {
|
||||
static uint32_t rootDeviceTelemNodeIndex;
|
||||
std::string telemetryDeviceEntry{};
|
||||
std::map<std::string, uint64_t> keyOffsetMap;
|
||||
std::string guid;
|
||||
ze_result_t init(FsAccess *pFsAccess, const std::string &gpuUpstreamPortPath, PRODUCT_FAMILY productFamily);
|
||||
static void doInitPmtObject(FsAccess *pFsAccess, uint32_t subdeviceId, PlatformMonitoringTech *pPmt, const std::string &gpuUpstreamPortPath,
|
||||
std::map<uint32_t, L0::Sysman::PlatformMonitoringTech *> &mapOfSubDeviceIdToPmtObject, PRODUCT_FAMILY productFamily);
|
||||
|
||||
@@ -240,6 +240,45 @@ const std::map<std::string, std::map<std::string, uint64_t>> guidToKeyOffsetMap
|
||||
{"VF1_HBM2_WRITE", 348},
|
||||
{"VF1_HBM3_READ", 360},
|
||||
{"VF1_HBM3_WRITE", 364}}},
|
||||
{"0xb15a0ede", // For PVC device
|
||||
{{"HBM0MaxDeviceTemperature", 28},
|
||||
{"HBM1MaxDeviceTemperature", 36},
|
||||
{"TileMinTemperature", 40},
|
||||
{"TileMaxTemperature", 44},
|
||||
{"GTMinTemperature", 48},
|
||||
{"GTMaxTemperature", 52},
|
||||
{"VF0_VFID", 88},
|
||||
{"VF0_HBM0_READ", 92},
|
||||
{"VF0_HBM0_WRITE", 96},
|
||||
{"VF0_HBM1_READ", 104},
|
||||
{"VF0_HBM1_WRITE", 108},
|
||||
{"VF0_TIMESTAMP_L", 168},
|
||||
{"VF0_TIMESTAMP_H", 172},
|
||||
{"VF1_VFID", 176},
|
||||
{"VF1_HBM0_READ", 180},
|
||||
{"VF1_HBM0_WRITE", 184},
|
||||
{"VF1_HBM1_READ", 192},
|
||||
{"VF1_HBM1_WRITE", 196},
|
||||
{"VF1_TIMESTAMP_L", 256},
|
||||
{"VF1_TIMESTAMP_H", 260},
|
||||
{"HBM2MaxDeviceTemperature", 300},
|
||||
{"HBM3MaxDeviceTemperature", 308},
|
||||
{"VF0_HBM2_READ", 312},
|
||||
{"VF0_HBM2_WRITE", 316},
|
||||
{"VF0_HBM3_READ", 328},
|
||||
{"VF0_HBM3_WRITE", 332},
|
||||
{"VF1_HBM2_READ", 344},
|
||||
{"VF1_HBM2_WRITE", 348},
|
||||
{"VF1_HBM3_READ", 360},
|
||||
{"VF1_HBM3_WRITE", 364},
|
||||
{"VF0_HBM_READ_L", 384},
|
||||
{"VF0_HBM_READ_H", 388},
|
||||
{"VF0_HBM_WRITE_L", 392},
|
||||
{"VF0_HBM_WRITE_H", 396},
|
||||
{"VF1_HBM_READ_L", 400},
|
||||
{"VF1_HBM_READ_H", 404},
|
||||
{"VF1_HBM_WRITE_L", 408},
|
||||
{"VF1_HBM_WRITE_H", 412}}},
|
||||
{"0x41fe79a5", // For PVC root device
|
||||
{{"PPIN", 152},
|
||||
{"BoardNumber", 72}}}};
|
||||
|
||||
@@ -197,6 +197,79 @@ ze_result_t LinuxMemoryImp::getBandwidthForDg2(zes_mem_bandwidth_t *pBandwidth)
|
||||
return result;
|
||||
}
|
||||
|
||||
ze_result_t LinuxMemoryImp::getHbmBandwidthPVC(uint32_t numHbmModules, zes_mem_bandwidth_t *pBandwidth) {
|
||||
std::string guid = pPmt->getGuid();
|
||||
if (guid != guid64BitMemoryCounters) {
|
||||
return getHbmBandwidth(numHbmModules, pBandwidth);
|
||||
}
|
||||
pBandwidth->readCounter = 0;
|
||||
pBandwidth->writeCounter = 0;
|
||||
pBandwidth->timestamp = 0;
|
||||
pBandwidth->maxBandwidth = 0;
|
||||
ze_result_t result = ZE_RESULT_ERROR_UNKNOWN;
|
||||
std::string vfId = "";
|
||||
result = getVFIDString(vfId);
|
||||
if (result != ZE_RESULT_SUCCESS) {
|
||||
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s():getVFIDString returning error:0x%x while retriving VFID string \n", __FUNCTION__, result);
|
||||
return result;
|
||||
}
|
||||
auto &hwInfo = pDevice->getHardwareInfo();
|
||||
auto productFamily = hwInfo.platform.eProductFamily;
|
||||
auto &productHelper = pDevice->getRootDeviceEnvironment().getHelper<NEO::ProductHelper>();
|
||||
auto stepping = productHelper.getSteppingFromHwRevId(hwInfo);
|
||||
|
||||
uint32_t readCounterL = 0;
|
||||
std::string readCounterKey = vfId + "_HBM_READ_L";
|
||||
result = pPmt->readValue(readCounterKey, readCounterL);
|
||||
if (result != ZE_RESULT_SUCCESS) {
|
||||
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s():readValue for readCounterL returning error:0x%x \n", __FUNCTION__, result);
|
||||
return result;
|
||||
}
|
||||
|
||||
uint32_t readCounterH = 0;
|
||||
readCounterKey = vfId + "_HBM_READ_H";
|
||||
result = pPmt->readValue(readCounterKey, readCounterH);
|
||||
if (result != ZE_RESULT_SUCCESS) {
|
||||
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s():readValue for readCounterH returning error:0x%x \n", __FUNCTION__, result);
|
||||
return result;
|
||||
}
|
||||
|
||||
constexpr uint64_t transactionSize = 32;
|
||||
pBandwidth->readCounter = readCounterH;
|
||||
pBandwidth->readCounter = (pBandwidth->readCounter << 32) | static_cast<uint64_t>(readCounterL);
|
||||
pBandwidth->readCounter = (pBandwidth->readCounter * transactionSize);
|
||||
|
||||
uint32_t writeCounterL = 0;
|
||||
std::string writeCounterKey = vfId + "_HBM_WRITE_L";
|
||||
result = pPmt->readValue(writeCounterKey, writeCounterL);
|
||||
if (result != ZE_RESULT_SUCCESS) {
|
||||
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s():readValue for writeCounterL returning error:0x%x \n", __FUNCTION__, result);
|
||||
return result;
|
||||
}
|
||||
|
||||
uint32_t writeCounterH = 0;
|
||||
writeCounterKey = vfId + "_HBM_WRITE_H";
|
||||
result = pPmt->readValue(writeCounterKey, writeCounterH);
|
||||
if (result != ZE_RESULT_SUCCESS) {
|
||||
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s():readValue for writeCounterH returning error:0x%x \n", __FUNCTION__, result);
|
||||
return result;
|
||||
}
|
||||
|
||||
pBandwidth->writeCounter = writeCounterH;
|
||||
pBandwidth->writeCounter = (pBandwidth->writeCounter << 32) | static_cast<uint64_t>(writeCounterL);
|
||||
pBandwidth->writeCounter = (pBandwidth->writeCounter * transactionSize);
|
||||
|
||||
uint64_t timeStampVal = 0;
|
||||
memoryGetTimeStamp(timeStampVal);
|
||||
pBandwidth->timestamp = timeStampVal;
|
||||
|
||||
uint64_t hbmFrequency = 0;
|
||||
getHbmFrequency(productFamily, stepping, hbmFrequency);
|
||||
|
||||
pBandwidth->maxBandwidth = memoryBusWidth * hbmFrequency * numHbmModules; // Value in bytes/secs
|
||||
return result;
|
||||
}
|
||||
|
||||
ze_result_t LinuxMemoryImp::getHbmBandwidth(uint32_t numHbmModules, zes_mem_bandwidth_t *pBandwidth) {
|
||||
pBandwidth->readCounter = 0;
|
||||
pBandwidth->writeCounter = 0;
|
||||
@@ -235,6 +308,10 @@ ze_result_t LinuxMemoryImp::getHbmBandwidth(uint32_t numHbmModules, zes_mem_band
|
||||
pBandwidth->writeCounter += counterValue;
|
||||
}
|
||||
|
||||
constexpr uint64_t transactionSize = 32;
|
||||
pBandwidth->readCounter = pBandwidth->readCounter * transactionSize;
|
||||
pBandwidth->writeCounter = pBandwidth->writeCounter * transactionSize;
|
||||
|
||||
uint32_t timeStampL = 0;
|
||||
std::string timeStamp = vfId + "_TIMESTAMP_L";
|
||||
result = pPmt->readValue(timeStamp, timeStampL);
|
||||
@@ -257,7 +334,6 @@ ze_result_t LinuxMemoryImp::getHbmBandwidth(uint32_t numHbmModules, zes_mem_band
|
||||
getHbmFrequency(productFamily, stepping, hbmFrequency);
|
||||
|
||||
pBandwidth->maxBandwidth = memoryBusWidth * hbmFrequency * numHbmModules;
|
||||
pBandwidth->maxBandwidth /= 8; // Divide by 8 to get bandwidth in bytes/sec
|
||||
return result;
|
||||
}
|
||||
|
||||
@@ -275,7 +351,7 @@ ze_result_t LinuxMemoryImp::getBandwidth(zes_mem_bandwidth_t *pBandwidth) {
|
||||
break;
|
||||
case IGFX_PVC:
|
||||
numHbmModules = 4u;
|
||||
result = getHbmBandwidth(numHbmModules, pBandwidth);
|
||||
result = getHbmBandwidthPVC(numHbmModules, pBandwidth);
|
||||
break;
|
||||
default:
|
||||
result = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
|
||||
|
||||
@@ -45,6 +45,7 @@ class LinuxMemoryImp : public OsMemory, NEO::NonCopyableOrMovableClass {
|
||||
ze_result_t readMcChannelCounters(uint64_t &readCounters, uint64_t &writeCounters);
|
||||
ze_result_t getVFIDString(std::string &vfID);
|
||||
ze_result_t getBandwidthForDg2(zes_mem_bandwidth_t *pBandwidth);
|
||||
ze_result_t getHbmBandwidthPVC(uint32_t numHbmModules, zes_mem_bandwidth_t *pBandwidth);
|
||||
ze_result_t getHbmBandwidth(uint32_t numHbmModules, zes_mem_bandwidth_t *pBandwidth);
|
||||
static const std::string deviceMemoryHealth;
|
||||
bool isSubdevice = false;
|
||||
|
||||
@@ -23,7 +23,6 @@ class Memory : _zes_mem_handle_t {
|
||||
virtual ze_result_t memoryGetProperties(zes_mem_properties_t *pProperties) = 0;
|
||||
virtual ze_result_t memoryGetBandwidth(zes_mem_bandwidth_t *pBandwidth) = 0;
|
||||
virtual ze_result_t memoryGetState(zes_mem_state_t *pState) = 0;
|
||||
virtual ze_result_t memoryGetBandwidthEx(uint64_t *pReadCounters, uint64_t *pWriteCounters, uint64_t *pMaxBandwidth, uint64_t timeout) = 0;
|
||||
|
||||
static Memory *fromHandle(zes_mem_handle_t handle) {
|
||||
return static_cast<Memory *>(handle);
|
||||
|
||||
@@ -23,10 +23,6 @@ ze_result_t MemoryImp::memoryGetProperties(zes_mem_properties_t *pProperties) {
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
ze_result_t MemoryImp::memoryGetBandwidthEx(uint64_t *pReadCounters, uint64_t *pWriteCounters, uint64_t *pMaxBandwidth, uint64_t timeout) {
|
||||
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
|
||||
}
|
||||
|
||||
void MemoryImp::init() {
|
||||
this->initSuccess = pOsMemory->isMemoryModuleSupported();
|
||||
if (this->initSuccess == true) {
|
||||
|
||||
@@ -20,7 +20,6 @@ class MemoryImp : public Memory, NEO::NonCopyableOrMovableClass {
|
||||
ze_result_t memoryGetProperties(zes_mem_properties_t *pProperties) override;
|
||||
ze_result_t memoryGetBandwidth(zes_mem_bandwidth_t *pBandwidth) override;
|
||||
ze_result_t memoryGetState(zes_mem_state_t *pState) override;
|
||||
ze_result_t memoryGetBandwidthEx(uint64_t *pReadCounters, uint64_t *pWriteCounters, uint64_t *pMaxBandwidth, uint64_t timeout) override;
|
||||
|
||||
MemoryImp(OsSysman *pOsSysman, bool onSubdevice, uint32_t subDeviceId);
|
||||
~MemoryImp() override;
|
||||
|
||||
@@ -11,6 +11,7 @@
|
||||
const std::string vendorIntel("Intel(R) Corporation");
|
||||
const std::string unknown("unknown");
|
||||
const std::string intelPciId("0x8086");
|
||||
const std::string guid64BitMemoryCounters("0xb15a0ede");
|
||||
constexpr uint32_t MbpsToBytesPerSecond = 125000;
|
||||
constexpr double milliVoltsFactor = 1000.0;
|
||||
constexpr uint32_t maxRasErrorCategoryCount = 7;
|
||||
|
||||
Reference in New Issue
Block a user