fix: Add guid check while calculating Sysman memory Bandwidth

Related-To: LOCI-4597

Signed-off-by: Mayank Raghuwanshi <mayank.raghuwanshi@intel.com>
This commit is contained in:
Mayank Raghuwanshi
2023-07-03 17:14:25 +00:00
committed by Compute-Runtime-Automation
parent e52e4f28f2
commit c3e2e145c5
6 changed files with 43 additions and 7 deletions

View File

@ -22,6 +22,9 @@ const std::string PlatformMonitoringTech::baseTelemSysFS("/sys/class/intel_pmt")
const std::string PlatformMonitoringTech::telem("telem");
uint32_t PlatformMonitoringTech::rootDeviceTelemNodeIndex = 0;
std::string PlatformMonitoringTech::getGuid() {
return guid;
}
ze_result_t PlatformMonitoringTech::readValue(const std::string key, uint32_t &value) {
auto offset = keyOffsetMap.find(key);
if (offset == keyOffsetMap.end()) {
@ -146,7 +149,6 @@ ze_result_t PlatformMonitoringTech::init(FsAccess *pFsAccess, const std::string
return ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE;
}
std::string guid;
std::string guidPath = baseTelemSysFSNode + std::string("/guid");
ze_result_t result = pFsAccess->read(guidPath, guid);
if (ZE_RESULT_SUCCESS != result) {

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2021-2022 Intel Corporation
* Copyright (C) 2021-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -28,6 +28,7 @@ class PlatformMonitoringTech : NEO::NonCopyableOrMovableClass {
virtual ze_result_t readValue(const std::string key, uint32_t &value);
virtual ze_result_t readValue(const std::string key, uint64_t &value);
static ze_result_t enumerateRootTelemIndex(FsAccess *pFsAccess, std::string &gpuUpstreamPortPath);
std::string getGuid();
static void create(const std::vector<ze_device_handle_t> &deviceHandles,
FsAccess *pFsAccess, std::string &gpuUpstreamPortPath,
std::map<uint32_t, L0::PlatformMonitoringTech *> &mapOfSubDeviceIdToPmtObject);
@ -37,6 +38,7 @@ class PlatformMonitoringTech : NEO::NonCopyableOrMovableClass {
static uint32_t rootDeviceTelemNodeIndex;
std::string telemetryDeviceEntry{};
std::map<std::string, uint64_t> keyOffsetMap;
std::string guid;
ze_result_t init(FsAccess *pFsAccess, const std::string &gpuUpstreamPortPath, PRODUCT_FAMILY productFamily);
static void doInitPmtObject(FsAccess *pFsAccess, uint32_t subdeviceId, PlatformMonitoringTech *pPmt, const std::string &gpuUpstreamPortPath,
std::map<uint32_t, L0::PlatformMonitoringTech *> &mapOfSubDeviceIdToPmtObject, PRODUCT_FAMILY productFamily);

View File

@ -260,6 +260,10 @@ ze_result_t LinuxMemoryImp::getHbmBandwidth(uint32_t numHbmModules, zes_mem_band
}
ze_result_t LinuxMemoryImp::getHbmBandwidthPVC(uint32_t numHbmModules, zes_mem_bandwidth_t *pBandwidth) {
std::string guid = pPmt->getGuid();
if (guid != guid64BitMemoryCounters) {
return getHbmBandwidth(numHbmModules, pBandwidth);
}
pBandwidth->readCounter = 0;
pBandwidth->writeCounter = 0;
pBandwidth->timestamp = 0;

View File

@ -11,6 +11,7 @@
const std::string vendorIntel("Intel(R) Corporation");
const std::string unknown("unknown");
const std::string intelPciId("0x8086");
const std::string guid64BitMemoryCounters("0xb15a0ede");
constexpr uint32_t MbpsToBytesPerSecond = 125000;
constexpr double milliVoltsFactor = 1000.0;
constexpr uint32_t maxRasErrorCategoryCount = 7;

View File

@ -243,6 +243,7 @@ struct MockMemoryNeoDrm : public Drm {
struct MockMemoryPmt : public PlatformMonitoringTech {
using PlatformMonitoringTech::guid;
using PlatformMonitoringTech::keyOffsetMap;
std::vector<ze_result_t> mockReadValueReturnStatus{};
std::vector<uint32_t> mockReadArgumentValue{};
@ -253,7 +254,9 @@ struct MockMemoryPmt : public PlatformMonitoringTech {
bool mockVfid0Status = false;
bool mockVfid1Status = false;
bool isRepeated = false;
void setGuid(std::string guid) {
this->guid = guid;
}
MockMemoryPmt(FsAccess *pFsAccess, ze_bool_t onSubdevice, uint32_t subdeviceId) : PlatformMonitoringTech(pFsAccess, onSubdevice, subdeviceId) {}
ze_result_t readValue(const std::string key, uint32_t &val) override {
ze_result_t result = ZE_RESULT_SUCCESS;

View File

@ -407,7 +407,7 @@ HWTEST2_F(SysmanDeviceMemoryFixture, GivenValidMemoryHandleWhenCallingzesSysmanM
auto &productHelper = pLinuxSysmanImp->getDeviceHandle()->getNEODevice()->getProductHelper();
hwInfo->platform.usRevId = productHelper.getHwRevIdFromStepping(REVISION_B, *hwInfo);
auto pPmt = static_cast<MockMemoryPmt *>(pLinuxSysmanImp->getPlatformMonitoringTechAccess(properties.subdeviceId));
pPmt->setGuid(guid64BitMemoryCounters);
pPmt->mockVfid0Status = true;
pSysfsAccess->mockReadUInt64Value.push_back(hbmRP0Frequency);
pSysfsAccess->mockReadReturnStatus.push_back(ZE_RESULT_SUCCESS);
@ -441,7 +441,7 @@ HWTEST2_F(SysmanDeviceMemoryFixture, GivenValidMemoryHandleWhenCallingzesSysmanM
auto &productHelper = pLinuxSysmanImp->getDeviceHandle()->getNEODevice()->getProductHelper();
hwInfo->platform.usRevId = productHelper.getHwRevIdFromStepping(REVISION_B, *hwInfo);
auto pPmt = static_cast<MockMemoryPmt *>(pLinuxSysmanImp->getPlatformMonitoringTechAccess(properties.subdeviceId));
pPmt->setGuid(guid64BitMemoryCounters);
pPmt->mockVfid1Status = true;
pSysfsAccess->mockReadUInt64Value.push_back(hbmRP0Frequency);
pSysfsAccess->mockReadReturnStatus.push_back(ZE_RESULT_SUCCESS);
@ -471,6 +471,7 @@ HWTEST2_F(SysmanDeviceMemoryFixture, GivenValidMemoryHandleWhenCallingzesSysmanM
zes_mem_bandwidth_t bandwidth;
auto pPmt = static_cast<MockMemoryPmt *>(pLinuxSysmanImp->getPlatformMonitoringTechAccess(properties.subdeviceId));
pPmt->setGuid(guid64BitMemoryCounters);
pPmt->mockReadArgumentValue.push_back(1);
pPmt->mockReadValueReturnStatus.push_back(ZE_RESULT_SUCCESS); // Return success after reading VF0_VFID
pPmt->mockReadArgumentValue.push_back(0);
@ -481,6 +482,24 @@ HWTEST2_F(SysmanDeviceMemoryFixture, GivenValidMemoryHandleWhenCallingzesSysmanM
}
}
HWTEST2_F(SysmanDeviceMemoryFixture, GivenValidMemoryHandleWhenCallingzesSysmanMemoryGetBandwidthAndVF0_VFIDFailsForOldGuidThenFailureIsReturned, IsPVC) {
setLocalSupportedAndReinit(true);
auto handles = getMemoryHandles(memoryHandleComponentCount);
for (auto &handle : handles) {
zes_mem_properties_t properties = {};
zesMemoryGetProperties(handle, &properties);
zes_mem_bandwidth_t bandwidth;
auto pPmt = static_cast<MockMemoryPmt *>(pLinuxSysmanImp->getPlatformMonitoringTechAccess(properties.subdeviceId));
pPmt->setGuid("0xb15a0edd");
pPmt->mockReadArgumentValue.push_back(1);
pPmt->mockReadValueReturnStatus.push_back(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE);
EXPECT_EQ(zesMemoryGetBandwidth(handle, &bandwidth), ZE_RESULT_ERROR_UNSUPPORTED_FEATURE);
}
}
HWTEST2_F(SysmanDeviceMemoryFixture, GivenValidMemoryHandleWhenCallingzesSysmanMemoryGetBandwidthAndVF0_HBM_READ_HFailsThenFailureIsReturned, IsPVC) {
setLocalSupportedAndReinit(true);
auto handles = getMemoryHandles(memoryHandleComponentCount);
@ -492,6 +511,7 @@ HWTEST2_F(SysmanDeviceMemoryFixture, GivenValidMemoryHandleWhenCallingzesSysmanM
zes_mem_bandwidth_t bandwidth;
auto pPmt = static_cast<MockMemoryPmt *>(pLinuxSysmanImp->getPlatformMonitoringTechAccess(properties.subdeviceId));
pPmt->setGuid(guid64BitMemoryCounters);
pPmt->mockReadArgumentValue.push_back(1);
pPmt->mockReadValueReturnStatus.push_back(ZE_RESULT_SUCCESS); // Return success after reading VF0_VFID
pPmt->mockReadArgumentValue.push_back(0);
@ -515,6 +535,7 @@ HWTEST2_F(SysmanDeviceMemoryFixture, GivenValidMemoryHandleWhenCallingzesSysmanM
zes_mem_bandwidth_t bandwidth;
auto pPmt = static_cast<MockMemoryPmt *>(pLinuxSysmanImp->getPlatformMonitoringTechAccess(properties.subdeviceId));
pPmt->setGuid(guid64BitMemoryCounters);
pPmt->mockReadArgumentValue.push_back(1);
pPmt->mockReadValueReturnStatus.push_back(ZE_RESULT_SUCCESS); // Return success after reading VF0_VFID
pPmt->mockReadArgumentValue.push_back(0);
@ -540,6 +561,7 @@ HWTEST2_F(SysmanDeviceMemoryFixture, GivenValidMemoryHandleWhenCallingzesSysmanM
zes_mem_bandwidth_t bandwidth;
auto pPmt = static_cast<MockMemoryPmt *>(pLinuxSysmanImp->getPlatformMonitoringTechAccess(properties.subdeviceId));
pPmt->setGuid(guid64BitMemoryCounters);
pPmt->mockReadArgumentValue.push_back(1);
pPmt->mockReadValueReturnStatus.push_back(ZE_RESULT_SUCCESS); // Return success after reading VF0_VFID
pPmt->mockReadArgumentValue.push_back(0);
@ -733,7 +755,7 @@ HWTEST2_F(SysmanDeviceMemoryFixture, GivenValidMemoryHandleWhenCallingGetBandwid
auto &productHelper = pLinuxSysmanImp->getDeviceHandle()->getNEODevice()->getProductHelper();
hwInfo->platform.usRevId = productHelper.getHwRevIdFromStepping(REVISION_B, *hwInfo);
auto pPmt = static_cast<MockMemoryPmt *>(pLinuxSysmanImp->getPlatformMonitoringTechAccess(0));
pPmt->setGuid(guid64BitMemoryCounters);
pPmt->mockVfid1Status = true;
pSysfsAccess->mockReadUInt64Value.push_back(hbmRP0Frequency);
pSysfsAccess->mockReadReturnStatus.push_back(ZE_RESULT_SUCCESS);
@ -755,7 +777,7 @@ HWTEST2_F(SysmanDeviceMemoryFixture, GivenValidMemoryHandleWhenCallingGetBandwid
auto &productHelper = pLinuxSysmanImp->getDeviceHandle()->getNEODevice()->getProductHelper();
hwInfo->platform.usRevId = productHelper.getHwRevIdFromStepping(REVISION_B, *hwInfo);
auto pPmt = static_cast<MockMemoryPmt *>(pLinuxSysmanImp->getPlatformMonitoringTechAccess(0));
pPmt->setGuid(guid64BitMemoryCounters);
pPmt->mockVfid0Status = true;
pSysfsAccess->mockReadUInt64Value.push_back(hbmRP0Frequency);
pSysfsAccess->mockReadReturnStatus.push_back(ZE_RESULT_SUCCESS);
@ -814,6 +836,7 @@ HWTEST2_F(SysmanDeviceMemoryFixture, GivenValidUsRevIdForRevisionBWhenCallingzes
hwInfo->platform.usRevId = productHelper.getHwRevIdFromStepping(REVISION_B, *hwInfo);
auto pPmt = static_cast<MockMemoryPmt *>(pLinuxSysmanImp->getPlatformMonitoringTechAccess(properties.subdeviceId));
pPmt->mockVfid1Status = true;
pPmt->setGuid(guid64BitMemoryCounters);
pSysfsAccess->mockReadUInt64Value.push_back(hbmRP0Frequency);
pSysfsAccess->mockReadReturnStatus.push_back(ZE_RESULT_SUCCESS);
@ -1028,6 +1051,7 @@ TEST_F(SysmanDeviceMemoryFixture, GivenValidMemoryHandleWhenBothVfid0AndVfid1Are
zes_mem_bandwidth_t bandwidth;
auto pPmt = static_cast<MockMemoryPmt *>(pLinuxSysmanImp->getPlatformMonitoringTechAccess(properties.subdeviceId));
pPmt->setGuid(guid64BitMemoryCounters);
pPmt->mockReadArgumentValue.push_back(0);
pPmt->mockReadValueReturnStatus.push_back(ZE_RESULT_SUCCESS); // Return success after reading VF0_VFID
pPmt->mockReadArgumentValue.push_back(0);