Add zesMemoryGetBandwidth support for DG2 and ATS-M

Related-To: LOCI-2574
Signed-off-by: Mayank Raghuwanshi <mayank.raghuwanshi@intel.com>
This commit is contained in:
Mayank Raghuwanshi
2022-04-13 11:18:41 +00:00
committed by Compute-Runtime-Automation
parent f35107d3b0
commit bcc543f949
3 changed files with 184 additions and 21 deletions

View File

@@ -31,10 +31,108 @@ const std::map<std::string, std::map<std::string, uint64_t>> guidToKeyOffsetMap
{"SOC_TEMPERATURES", 56}}}, // SOC_TEMPERATURE contains GT_TEMP, DRAM_TEMP, SA_TEMP, DE_TEMP, PCIE_TEMP, TYPEC_TEMP
{"0x4f9302", // For DG2 512EU / ATS-M1
{{"PACKAGE_ENERGY", 1032},
{"SOC_TEMPERATURES", 56}}},
{"SOC_TEMPERATURES", 56},
{"MC_CAPTURE_TIMESTAMP", 1088},
{"IDI_READS[0]", 1096},
{"IDI_READS[1]", 1104},
{"IDI_READS[2]", 1112},
{"IDI_READS[3]", 1120},
{"IDI_READS[4]", 1128},
{"IDI_READS[5]", 1136},
{"IDI_READS[6]", 1144},
{"IDI_READS[7]", 1152},
{"IDI_READS[8]", 1160},
{"IDI_READS[9]", 1168},
{"IDI_READS[10]", 1176},
{"IDI_READS[11]", 1184},
{"IDI_READS[12]", 1192},
{"IDI_READS[13]", 1200},
{"IDI_READS[14]", 1208},
{"IDI_READS[15]", 1216},
{"IDI_WRITES[0]", 1224},
{"IDI_WRITES[1]", 1232},
{"IDI_WRITES[2]", 1240},
{"IDI_WRITES[3]", 1248},
{"IDI_WRITES[4]", 1256},
{"IDI_WRITES[5]", 1264},
{"IDI_WRITES[6]", 1272},
{"IDI_WRITES[7]", 1280},
{"IDI_WRITES[8]", 1288},
{"IDI_WRITES[9]", 1296},
{"IDI_WRITES[10]", 1304},
{"IDI_WRITES[11]", 1312},
{"IDI_WRITES[12]", 1320},
{"IDI_WRITES[13]", 1328},
{"IDI_WRITES[14]", 1336},
{"IDI_WRITES[15]", 1344},
{"DISPLAY_VC1_READS[0]", 1352},
{"DISPLAY_VC1_READS[1]", 1360},
{"DISPLAY_VC1_READS[2]", 1368},
{"DISPLAY_VC1_READS[3]", 1376},
{"DISPLAY_VC1_READS[4]", 1384},
{"DISPLAY_VC1_READS[5]", 1392},
{"DISPLAY_VC1_READS[6]", 1400},
{"DISPLAY_VC1_READS[7]", 1408},
{"DISPLAY_VC1_READS[8]", 1416},
{"DISPLAY_VC1_READS[9]", 1424},
{"DISPLAY_VC1_READS[10]", 1432},
{"DISPLAY_VC1_READS[11]", 1440},
{"DISPLAY_VC1_READS[12]", 1448},
{"DISPLAY_VC1_READS[13]", 1456},
{"DISPLAY_VC1_READS[14]", 1464},
{"DISPLAY_VC1_READS[15]", 1472}}},
{"0x4f9502", // For DG2 128EU / ATS-M3
{{"PACKAGE_ENERGY", 1032},
{"SOC_TEMPERATURES", 56}}},
{"SOC_TEMPERATURES", 56},
{"MC_CAPTURE_TIMESTAMP", 1088},
{"IDI_READS[0]", 1096},
{"IDI_READS[1]", 1104},
{"IDI_READS[2]", 1112},
{"IDI_READS[3]", 1120},
{"IDI_READS[4]", 1128},
{"IDI_READS[5]", 1136},
{"IDI_READS[6]", 1144},
{"IDI_READS[7]", 1152},
{"IDI_READS[8]", 1160},
{"IDI_READS[9]", 1168},
{"IDI_READS[10]", 1176},
{"IDI_READS[11]", 1184},
{"IDI_READS[12]", 1192},
{"IDI_READS[13]", 1200},
{"IDI_READS[14]", 1208},
{"IDI_READS[15]", 1216},
{"IDI_WRITES[0]", 1224},
{"IDI_WRITES[1]", 1232},
{"IDI_WRITES[2]", 1240},
{"IDI_WRITES[3]", 1248},
{"IDI_WRITES[4]", 1256},
{"IDI_WRITES[5]", 1264},
{"IDI_WRITES[6]", 1272},
{"IDI_WRITES[7]", 1280},
{"IDI_WRITES[8]", 1288},
{"IDI_WRITES[9]", 1296},
{"IDI_WRITES[10]", 1304},
{"IDI_WRITES[11]", 1312},
{"IDI_WRITES[12]", 1320},
{"IDI_WRITES[13]", 1328},
{"IDI_WRITES[14]", 1336},
{"IDI_WRITES[15]", 1344},
{"DISPLAY_VC1_READS[0]", 1352},
{"DISPLAY_VC1_READS[1]", 1360},
{"DISPLAY_VC1_READS[2]", 1368},
{"DISPLAY_VC1_READS[3]", 1376},
{"DISPLAY_VC1_READS[4]", 1384},
{"DISPLAY_VC1_READS[5]", 1392},
{"DISPLAY_VC1_READS[6]", 1400},
{"DISPLAY_VC1_READS[7]", 1408},
{"DISPLAY_VC1_READS[8]", 1416},
{"DISPLAY_VC1_READS[9]", 1424},
{"DISPLAY_VC1_READS[10]", 1432},
{"DISPLAY_VC1_READS[11]", 1440},
{"DISPLAY_VC1_READS[12]", 1448},
{"DISPLAY_VC1_READS[13]", 1456},
{"DISPLAY_VC1_READS[14]", 1464},
{"DISPLAY_VC1_READS[15]", 1472}}},
{"0xfdc76194", // For XeHP_SDV device
{{"HBM0MaxDeviceTemperature", 28},
{"HBM1MaxDeviceTemperature", 36},

View File

@@ -117,6 +117,31 @@ ze_result_t LinuxMemoryImp::getVFIDString(std::string &vfID) {
return result;
}
ze_result_t LinuxMemoryImp::readMcChannelCounters(uint64_t &readCounters, uint64_t &writeCounters) {
// For DG2 there are 8 memory instances each memory instance has 2 channels there are total 16 MC Channels
uint32_t numMcChannels = 16u;
ze_result_t result = ZE_RESULT_ERROR_UNKNOWN;
std::vector<std::string> nameOfCounters{"IDI_READS", "IDI_WRITES", "DISPLAY_VC1_READS"};
std::vector<uint64_t> counterValues(3, 0); // Will store the values of counters metioned in nameOfCounters
for (uint64_t counterIndex = 0; counterIndex < nameOfCounters.size(); counterIndex++) {
for (uint32_t mcChannelIndex = 0; mcChannelIndex < numMcChannels; mcChannelIndex++) {
uint64_t val = 0;
std::string readCounterKey = nameOfCounters[counterIndex] + "[" + std::to_string(mcChannelIndex) + "]";
result = pPmt->readValue(readCounterKey, val);
if (result != ZE_RESULT_SUCCESS) {
return result;
}
counterValues[counterIndex] += val;
}
}
// PMT counters returns number of transactions that have occured and each tranaction is of 64 bytes
// Multiplying 64(tranaction size) with number of transactions gives the total reads or writes in bytes
constexpr uint64_t transactionSize = 64;
readCounters = (counterValues[0] + counterValues[2]) * transactionSize; // Read counters are summation of total IDI_READS and DISPLAY_VC1_READS
writeCounters = (counterValues[1]) * transactionSize; // Write counters are summation of IDI_WRITES
return result;
}
void LinuxMemoryImp::getHbmFrequency(PRODUCT_FAMILY productFamily, unsigned short stepping, uint64_t &hbmFrequency) {
hbmFrequency = 0;
if (productFamily == IGFX_XE_HP_SDV) {
@@ -140,29 +165,40 @@ void LinuxMemoryImp::getHbmFrequency(PRODUCT_FAMILY productFamily, unsigned shor
}
}
ze_result_t LinuxMemoryImp::getBandwidth(zes_mem_bandwidth_t *pBandwidth) {
if (pPmt == nullptr) {
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
std::string vfId = "";
auto result = getVFIDString(vfId);
if (result != ZE_RESULT_SUCCESS) {
return result;
}
uint32_t numHbmModules = 0u;
auto &hwInfo = pDevice->getNEODevice()->getHardwareInfo();
auto productFamily = hwInfo.platform.eProductFamily;
auto stepping = NEO::HwInfoConfig::get(productFamily)->getSteppingFromHwRevId(hwInfo);
if (productFamily == IGFX_XE_HP_SDV) {
numHbmModules = 2u;
} else if (productFamily == IGFX_PVC) {
numHbmModules = 4u;
}
ze_result_t LinuxMemoryImp::getBandwidthForDg2(zes_mem_bandwidth_t *pBandwidth) {
pBandwidth->readCounter = 0;
pBandwidth->writeCounter = 0;
pBandwidth->timestamp = 0;
pBandwidth->maxBandwidth = 0;
ze_result_t result = readMcChannelCounters(pBandwidth->readCounter, pBandwidth->writeCounter);
if (result != ZE_RESULT_SUCCESS) {
return result;
}
pBandwidth->maxBandwidth = 0u;
std::string timeStamp = "MC_CAPTURE_TIMESTAMP";
uint64_t timeStampVal = 0;
result = pPmt->readValue(timeStamp, timeStampVal);
if (result != ZE_RESULT_SUCCESS) {
return result;
}
pBandwidth->timestamp = timeStampVal * 1e-8; // Convert timeStamp into seconds
return result;
}
ze_result_t LinuxMemoryImp::getHbmBandwidth(uint32_t numHbmModules, zes_mem_bandwidth_t *pBandwidth) {
pBandwidth->readCounter = 0;
pBandwidth->writeCounter = 0;
pBandwidth->timestamp = 0;
pBandwidth->maxBandwidth = 0;
ze_result_t result = ZE_RESULT_ERROR_UNKNOWN;
std::string vfId = "";
result = getVFIDString(vfId);
if (result != ZE_RESULT_SUCCESS) {
return result;
}
auto &hwInfo = pDevice->getNEODevice()->getHardwareInfo();
auto productFamily = hwInfo.platform.eProductFamily;
auto stepping = NEO::HwInfoConfig::get(productFamily)->getSteppingFromHwRevId(hwInfo);
for (auto hbmModuleIndex = 0u; hbmModuleIndex < numHbmModules; hbmModuleIndex++) {
uint32_t counterValue = 0;
// To read counters from VFID 0 and HBM module 0, key would be: VF0_HBM0_READ
@@ -204,7 +240,33 @@ ze_result_t LinuxMemoryImp::getBandwidth(zes_mem_bandwidth_t *pBandwidth) {
pBandwidth->maxBandwidth = memoryBusWidth * hbmFrequency * numHbmModules;
pBandwidth->maxBandwidth /= 8; // Divide by 8 to get bandwidth in bytes/sec
return result;
}
ze_result_t LinuxMemoryImp::getBandwidth(zes_mem_bandwidth_t *pBandwidth) {
if (pPmt == nullptr) {
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
ze_result_t result = ZE_RESULT_ERROR_UNKNOWN;
auto &hwInfo = pDevice->getNEODevice()->getHardwareInfo();
auto productFamily = hwInfo.platform.eProductFamily;
uint32_t numHbmModules = 0u;
switch (productFamily) {
case IGFX_DG2:
result = getBandwidthForDg2(pBandwidth);
break;
case IGFX_XE_HP_SDV:
numHbmModules = 2u;
result = getHbmBandwidth(numHbmModules, pBandwidth);
break;
case IGFX_PVC:
numHbmModules = 4u;
result = getHbmBandwidth(numHbmModules, pBandwidth);
break;
default:
result = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
break;
}
return result;
}

View File

@@ -36,7 +36,10 @@ class LinuxMemoryImp : public OsMemory, NEO::NonCopyableOrMovableClass {
void getHbmFrequency(PRODUCT_FAMILY productFamily, unsigned short stepping, uint64_t &hbmFrequency);
private:
ze_result_t readMcChannelCounters(uint64_t &readCounters, uint64_t &writeCounters);
ze_result_t getVFIDString(std::string &vfID);
ze_result_t getBandwidthForDg2(zes_mem_bandwidth_t *pBandwidth);
ze_result_t getHbmBandwidth(uint32_t numHbmModules, zes_mem_bandwidth_t *pBandwidth);
static const std::string deviceMemoryHealth;
bool isSubdevice = false;
uint32_t subdeviceId = 0;