feature: Add support for 64 bit memory counters for zesInit

Related-To: LOCI-4613, LOCI-4612

Signed-off-by: Mayank Raghuwanshi <mayank.raghuwanshi@intel.com>
This commit is contained in:
Mayank Raghuwanshi
2023-07-14 11:56:00 +00:00
committed by Compute-Runtime-Automation
parent c3ca3ff119
commit af6798f257
29 changed files with 195 additions and 378 deletions

View File

@@ -41,10 +41,6 @@ ze_result_t LinuxMemoryImp::getState(zes_mem_state_t *pState) {
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
ze_result_t LinuxMemoryImp::getBandwidthEx(uint64_t *pReadCounters, uint64_t *pWriteCounters, uint64_t *pMaxBandwidth, uint64_t timeout) {
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
std::unique_ptr<OsMemory> OsMemory::create(OsSysman *pOsSysman, ze_bool_t onSubdevice, uint32_t subdeviceId) {
std::unique_ptr<LinuxMemoryImp> pLinuxMemoryImp = std::make_unique<LinuxMemoryImp>(pOsSysman, onSubdevice, subdeviceId);
return pLinuxMemoryImp;

View File

@@ -21,7 +21,6 @@ class LinuxMemoryImp : public OsMemory, NEO::NonCopyableOrMovableClass {
ze_result_t getProperties(zes_mem_properties_t *pProperties) override;
ze_result_t getBandwidth(zes_mem_bandwidth_t *pBandwidth) override;
ze_result_t getState(zes_mem_state_t *pState) override;
ze_result_t getBandwidthEx(uint64_t *pReadCounters, uint64_t *pWriteCounters, uint64_t *pMaxBandwidth, uint64_t timeout) override;
bool isMemoryModuleSupported() override;
LinuxMemoryImp(OsSysman *pOsSysman, ze_bool_t onSubdevice, uint32_t subdeviceId);

View File

@@ -41,10 +41,6 @@ ze_result_t LinuxMemoryImp::getBandwidth(zes_mem_bandwidth_t *pBandwidth) {
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
ze_result_t LinuxMemoryImp::getBandwidthEx(uint64_t *pReadCounters, uint64_t *pWriteCounters, uint64_t *pMaxBandwidth, uint64_t timeout) {
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
ze_result_t LinuxMemoryImp::getState(zes_mem_state_t *pState) {
std::vector<NEO::MemoryRegion> deviceRegions;
if (pDrm->queryMemoryInfo() == false) {

View File

@@ -354,92 +354,6 @@ ze_result_t LinuxMemoryImp::getBandwidth(zes_mem_bandwidth_t *pBandwidth) {
return result;
}
uint64_t getCounterIncrement(uint32_t counterMaxValue, uint64_t prevValue, uint64_t currentValue) {
if (currentValue < prevValue) {
return (counterMaxValue - prevValue + currentValue);
}
return (currentValue - prevValue);
}
ze_result_t LinuxMemoryImp::getHbmBandwidthEx(uint32_t numHbmModules, uint32_t counterMaxValue, uint64_t *pReadCounters, uint64_t *pWriteCounters, uint64_t *pMaxBandwidth, uint64_t timeout) {
std::vector<uint64_t> prevReadCounters(numHbmModules, 0);
std::vector<uint64_t> prevWriteCounters(numHbmModules, 0);
uint64_t totalReadCounters = 0;
uint64_t totalWriteCounters = 0;
bool counterInit = false;
ze_result_t result = ZE_RESULT_ERROR_UNKNOWN;
std::string vfId = "";
result = getVFIDString(vfId);
if (result != ZE_RESULT_SUCCESS) {
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s():getVFIDString returning error:0x%x while retriving VFID string \n", __FUNCTION__, result);
return result;
}
auto &hwInfo = pDevice->getNEODevice()->getHardwareInfo();
auto productFamily = hwInfo.platform.eProductFamily;
auto &productHelper = pDevice->getNEODevice()->getProductHelper();
auto stepping = productHelper.getSteppingFromHwRevId(hwInfo);
auto timeToExitLoop = std::chrono::steady_clock::now() + std::chrono::duration<uint64_t, std::milli>(timeout);
do {
for (auto hbmModuleIndex = 0u; hbmModuleIndex < numHbmModules; hbmModuleIndex++) {
uint32_t counterValue = 0;
// To read counters from VFID 0 and HBM module 0, key would be: VF0_HBM0_READ
std::string readCounterKey = vfId + "_HBM" + std::to_string(hbmModuleIndex) + "_READ";
result = pPmt->readValue(readCounterKey, counterValue);
if (result != ZE_RESULT_SUCCESS) {
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s():readValue for readCounterKey returning error:0x%x \n", __FUNCTION__, result);
return result;
}
if (counterInit) {
totalReadCounters += getCounterIncrement(counterMaxValue, prevReadCounters[hbmModuleIndex], counterValue);
}
prevReadCounters[hbmModuleIndex] = counterValue;
counterValue = 0;
// To write counters to VFID 0 and HBM module 0, key would be: VF0_HBM0_Write
std::string writeCounterKey = vfId + "_HBM" + std::to_string(hbmModuleIndex) + "_WRITE";
result = pPmt->readValue(writeCounterKey, counterValue);
if (result != ZE_RESULT_SUCCESS) {
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s():readValue for writeCounterKey returning error:0x%x \n", __FUNCTION__, result);
return result;
}
if (counterInit) {
totalWriteCounters += getCounterIncrement(counterMaxValue, prevWriteCounters[hbmModuleIndex], counterValue);
}
prevWriteCounters[hbmModuleIndex] = counterValue;
}
counterInit = true;
} while (std::chrono::steady_clock::now() <= timeToExitLoop);
constexpr uint64_t transactionSize = 32;
*pReadCounters = (totalReadCounters * transactionSize);
*pWriteCounters = (totalWriteCounters * transactionSize);
uint64_t hbmFrequency = 0;
getHbmFrequency(productFamily, stepping, hbmFrequency);
*pMaxBandwidth = memoryBusWidth * hbmFrequency * numHbmModules; // Value in bytes/secs
return result;
}
ze_result_t LinuxMemoryImp::getBandwidthEx(uint64_t *pReadCounters, uint64_t *pWriteCounters, uint64_t *pMaxBw, uint64_t timeout) {
if (pPmt == nullptr) {
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
ze_result_t result = ZE_RESULT_ERROR_UNKNOWN;
auto &hwInfo = pDevice->getNEODevice()->getHardwareInfo();
auto productFamily = hwInfo.platform.eProductFamily;
uint32_t numHbmModules = 0u;
uint32_t counterMaxValue;
switch (productFamily) {
case IGFX_PVC:
numHbmModules = 4u;
counterMaxValue = UINT32_MAX;
result = getHbmBandwidthEx(numHbmModules, counterMaxValue, pReadCounters, pWriteCounters, pMaxBw, timeout);
break;
default:
result = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
break;
}
return result;
}
ze_result_t LinuxMemoryImp::getState(zes_mem_state_t *pState) {
pState->health = ZES_MEM_HEALTH_UNKNOWN;
FirmwareUtil *pFwInterface = pLinuxSysmanImp->getFwUtilInterface();

View File

@@ -24,7 +24,6 @@ class LinuxMemoryImp : public OsMemory, NEO::NonCopyableOrMovableClass {
ze_result_t getProperties(zes_mem_properties_t *pProperties) override;
ze_result_t getBandwidth(zes_mem_bandwidth_t *pBandwidth) override;
ze_result_t getState(zes_mem_state_t *pState) override;
ze_result_t getBandwidthEx(uint64_t *pReadCounters, uint64_t *pWriteCounters, uint64_t *pMaxBandwidth, uint64_t timeout) override;
bool isMemoryModuleSupported() override;
LinuxMemoryImp(OsSysman *pOsSysman, ze_bool_t onSubdevice, uint32_t subdeviceId);

View File

@@ -22,8 +22,6 @@ class Memory : _zes_mem_handle_t {
virtual ze_result_t memoryGetProperties(zes_mem_properties_t *pProperties) = 0;
virtual ze_result_t memoryGetBandwidth(zes_mem_bandwidth_t *pBandwidth) = 0;
virtual ze_result_t memoryGetState(zes_mem_state_t *pState) = 0;
virtual ze_result_t memoryGetBandwidthEx(uint64_t *pReadCounters, uint64_t *pWriteCounters, uint64_t *pMaxBandwidth, uint64_t timeout) = 0;
static Memory *fromHandle(zes_mem_handle_t handle) {
return static_cast<Memory *>(handle);
}

View File

@@ -24,10 +24,6 @@ ze_result_t MemoryImp::memoryGetProperties(zes_mem_properties_t *pProperties) {
return ZE_RESULT_SUCCESS;
}
ze_result_t MemoryImp::memoryGetBandwidthEx(uint64_t *pReadCounters, uint64_t *pWriteCounters, uint64_t *pMaxBandwidth, uint64_t timeout) {
return pOsMemory->getBandwidthEx(pReadCounters, pWriteCounters, pMaxBandwidth, timeout);
}
void MemoryImp::init() {
this->initSuccess = pOsMemory->isMemoryModuleSupported();
if (this->initSuccess == true) {

View File

@@ -19,7 +19,6 @@ class MemoryImp : public Memory, NEO::NonCopyableOrMovableClass {
ze_result_t memoryGetProperties(zes_mem_properties_t *pProperties) override;
ze_result_t memoryGetBandwidth(zes_mem_bandwidth_t *pBandwidth) override;
ze_result_t memoryGetState(zes_mem_state_t *pState) override;
ze_result_t memoryGetBandwidthEx(uint64_t *pReadCounters, uint64_t *pWriteCounters, uint64_t *pMaxBandwidth, uint64_t timeout) override;
MemoryImp(OsSysman *pOsSysman, ze_device_handle_t handle);
~MemoryImp() override;

View File

@@ -19,7 +19,6 @@ class OsMemory {
virtual ze_result_t getProperties(zes_mem_properties_t *pProperties) = 0;
virtual ze_result_t getBandwidth(zes_mem_bandwidth_t *pBandwidth) = 0;
virtual ze_result_t getState(zes_mem_state_t *pState) = 0;
virtual ze_result_t getBandwidthEx(uint64_t *pReadCounters, uint64_t *pWriteCounters, uint64_t *pMaxBandwidth, uint64_t timeout) = 0;
virtual bool isMemoryModuleSupported() = 0;
static std::unique_ptr<OsMemory> create(OsSysman *pOsSysman, ze_bool_t onSubdevice, uint32_t subdeviceId);
virtual ~OsMemory() {}

View File

@@ -168,10 +168,6 @@ ze_result_t WddmMemoryImp::getProperties(zes_mem_properties_t *pProperties) {
return ZE_RESULT_SUCCESS;
}
ze_result_t WddmMemoryImp::getBandwidthEx(uint64_t *pReadCounters, uint64_t *pWriteCounters, uint64_t *pMaxBandwidth, uint64_t timeout) {
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
ze_result_t WddmMemoryImp::getBandwidth(zes_mem_bandwidth_t *pBandwidth) {
uint32_t retValu32 = 0;
uint64_t retValu64 = 0;

View File

@@ -28,7 +28,6 @@ class WddmMemoryImp : public OsMemory, NEO::NonCopyableOrMovableClass {
ze_result_t getProperties(zes_mem_properties_t *pProperties) override;
ze_result_t getBandwidth(zes_mem_bandwidth_t *pBandwidth) override;
ze_result_t getState(zes_mem_state_t *pState) override;
ze_result_t getBandwidthEx(uint64_t *pReadCounters, uint64_t *pWriteCounters, uint64_t *pMaxBandwidth, uint64_t timeout) override;
bool isMemoryModuleSupported() override;
WddmMemoryImp(OsSysman *pOsSysman, ze_bool_t onSubdevice, uint32_t subdeviceId);
WddmMemoryImp() = default;

View File

@@ -500,6 +500,26 @@ HWTEST2_F(SysmanDeviceMemoryFixture, GivenValidMemoryHandleWhenCallingzesSysmanM
}
}
HWTEST2_F(SysmanDeviceMemoryFixture, GivenValidMemoryHandleWhenCallingzesSysmanMemoryGetBandwidthAndVF1_VFIDFailsForOldGuidThenFailureIsReturned, IsPVC) {
setLocalSupportedAndReinit(true);
auto handles = getMemoryHandles(memoryHandleComponentCount);
for (auto &handle : handles) {
zes_mem_properties_t properties = {};
zesMemoryGetProperties(handle, &properties);
zes_mem_bandwidth_t bandwidth;
auto pPmt = static_cast<MockMemoryPmt *>(pLinuxSysmanImp->getPlatformMonitoringTechAccess(properties.subdeviceId));
pPmt->setGuid("0xb15a0edd");
pPmt->mockReadArgumentValue.push_back(1);
pPmt->mockReadValueReturnStatus.push_back(ZE_RESULT_SUCCESS);
pPmt->mockReadArgumentValue.push_back(0);
pPmt->mockReadValueReturnStatus.push_back(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE);
EXPECT_EQ(zesMemoryGetBandwidth(handle, &bandwidth), ZE_RESULT_ERROR_UNSUPPORTED_FEATURE);
}
}
HWTEST2_F(SysmanDeviceMemoryFixture, GivenValidMemoryHandleWhenCallingzesSysmanMemoryGetBandwidthAndVF0_HBM_READ_HFailsThenFailureIsReturned, IsPVC) {
setLocalSupportedAndReinit(true);
auto handles = getMemoryHandles(memoryHandleComponentCount);
@@ -537,9 +557,9 @@ HWTEST2_F(SysmanDeviceMemoryFixture, GivenValidMemoryHandleWhenCallingzesSysmanM
auto pPmt = static_cast<MockMemoryPmt *>(pLinuxSysmanImp->getPlatformMonitoringTechAccess(properties.subdeviceId));
pPmt->setGuid(guid64BitMemoryCounters);
pPmt->mockReadArgumentValue.push_back(1);
pPmt->mockReadValueReturnStatus.push_back(ZE_RESULT_SUCCESS); // Return success after reading VF0_VFID
pPmt->mockReadValueReturnStatus.push_back(ZE_RESULT_SUCCESS);
pPmt->mockReadArgumentValue.push_back(0);
pPmt->mockReadValueReturnStatus.push_back(ZE_RESULT_SUCCESS); // Return success after reading VF1_VFID
pPmt->mockReadValueReturnStatus.push_back(ZE_RESULT_SUCCESS);
pPmt->mockReadArgumentValue.push_back(4);
pPmt->mockReadValueReturnStatus.push_back(ZE_RESULT_SUCCESS);
pPmt->mockReadArgumentValue.push_back(4);
@@ -645,79 +665,6 @@ HWTEST2_F(SysmanDeviceMemoryFixture, GivenValidMemoryHandleWhenCallingzesSysmanM
}
}
HWTEST2_F(SysmanDeviceMemoryFixture, GivenValidMemoryHandleWhenCallingGetBandwidthExWhenVFID1IsActiveThenSuccessIsReturnedAndBandwidthIsValid, IsPVC) {
setLocalSupportedAndReinit(true);
auto hwInfo = pLinuxSysmanImp->getDeviceHandle()->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo();
auto &productHelper = pLinuxSysmanImp->getDeviceHandle()->getNEODevice()->getProductHelper();
hwInfo->platform.usRevId = productHelper.getHwRevIdFromStepping(REVISION_B, *hwInfo);
auto pPmt = static_cast<MockMemoryPmt *>(pLinuxSysmanImp->getPlatformMonitoringTechAccess(0));
pPmt->setGuid(guid64BitMemoryCounters);
pPmt->mockVfid1Status = true;
pSysfsAccess->mockReadUInt64Value.push_back(hbmRP0Frequency);
pSysfsAccess->mockReadReturnStatus.push_back(ZE_RESULT_SUCCESS);
uint64_t readCounters = 0;
uint64_t writeCounters = 0;
uint64_t maxBandwidth = 0;
uint64_t timeout = 1;
std::unique_ptr<PublicLinuxMemoryImp> pLinuxMemoryImp = std::make_unique<PublicLinuxMemoryImp>(pOsSysman, true, 0);
EXPECT_EQ(pLinuxMemoryImp->getBandwidthEx(&readCounters, &writeCounters, &maxBandwidth, timeout), ZE_RESULT_SUCCESS);
EXPECT_EQ(readCounters, 0u);
EXPECT_EQ(writeCounters, 0u);
uint64_t expectedBandwidth = 128 * hbmRP0Frequency * 1000 * 1000 * 4;
EXPECT_EQ(maxBandwidth, expectedBandwidth);
}
HWTEST2_F(SysmanDeviceMemoryFixture, GivenValidMemoryHandleWhenCallingGetBandwidthExWhenVFID0IsActiveThenSuccessIsReturnedAndBandwidthIsValid, IsPVC) {
setLocalSupportedAndReinit(true);
auto hwInfo = pLinuxSysmanImp->getDeviceHandle()->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo();
auto &productHelper = pLinuxSysmanImp->getDeviceHandle()->getNEODevice()->getProductHelper();
hwInfo->platform.usRevId = productHelper.getHwRevIdFromStepping(REVISION_B, *hwInfo);
auto pPmt = static_cast<MockMemoryPmt *>(pLinuxSysmanImp->getPlatformMonitoringTechAccess(0));
pPmt->setGuid(guid64BitMemoryCounters);
pPmt->mockVfid0Status = true;
pSysfsAccess->mockReadUInt64Value.push_back(hbmRP0Frequency);
pSysfsAccess->mockReadReturnStatus.push_back(ZE_RESULT_SUCCESS);
uint64_t readCounters = 0;
uint64_t writeCounters = 0;
uint64_t maxBandwidth = 0;
uint64_t timeout = 1;
std::unique_ptr<PublicLinuxMemoryImp> pLinuxMemoryImp = std::make_unique<PublicLinuxMemoryImp>(pOsSysman, true, 0);
EXPECT_EQ(pLinuxMemoryImp->getBandwidthEx(&readCounters, &writeCounters, &maxBandwidth, timeout), ZE_RESULT_SUCCESS);
EXPECT_EQ(readCounters, 0u);
EXPECT_EQ(writeCounters, 0u);
uint64_t expectedBandwidth = 128 * hbmRP0Frequency * 1000 * 1000 * 4;
EXPECT_EQ(maxBandwidth, expectedBandwidth);
}
TEST_F(SysmanDeviceMemoryFixture, GivenValidMemoryHandleWhenCallingGetBandwidthExForUnknownPlatformThenFailureIsReturned) {
setLocalSupportedAndReinit(true);
auto hwInfo = *NEO::defaultHwInfo.get();
hwInfo.platform.eProductFamily = IGFX_UNKNOWN;
pLinuxSysmanImp->getDeviceHandle()->getNEODevice()->getRootDeviceEnvironmentRef().setHwInfoAndInitHelpers(&hwInfo);
uint64_t readCounters = 0;
uint64_t writeCounters = 0;
uint64_t maxBandwidth = 0;
uint64_t timeout = 1;
std::unique_ptr<PublicLinuxMemoryImp> pLinuxMemoryImp = std::make_unique<PublicLinuxMemoryImp>(pOsSysman, true, 0);
EXPECT_EQ(pLinuxMemoryImp->getBandwidthEx(&readCounters, &writeCounters, &maxBandwidth, timeout), ZE_RESULT_ERROR_UNSUPPORTED_FEATURE);
}
TEST_F(SysmanDeviceMemoryFixture, GivenValidMemoryHandleWhenCallingGetMemoryGetBandwidthExWhenPmtObjectIsNullThenFailureRetuned) {
for (auto &subDeviceIdToPmtEntry : pLinuxSysmanImp->mapOfSubDeviceIdToPmtObject) {
if (subDeviceIdToPmtEntry.second != nullptr) {
delete subDeviceIdToPmtEntry.second;
subDeviceIdToPmtEntry.second = nullptr;
}
}
setLocalSupportedAndReinit(true);
uint64_t readCounters = 0;
uint64_t writeCounters = 0;
uint64_t maxBandwidth = 0;
uint64_t timeout = 1;
std::unique_ptr<PublicLinuxMemoryImp> pLinuxMemoryImp = std::make_unique<PublicLinuxMemoryImp>(pOsSysman, true, 0);
EXPECT_EQ(pLinuxMemoryImp->getBandwidthEx(&readCounters, &writeCounters, &maxBandwidth, timeout), ZE_RESULT_ERROR_UNSUPPORTED_FEATURE);
}
HWTEST2_F(SysmanDeviceMemoryFixture, GivenValidUsRevIdForRevisionBWhenCallingzesSysmanMemoryGetBandwidthThenSuccessIsReturnedAndBandwidthIsValid, IsPVC) {
setLocalSupportedAndReinit(true);
auto handles = getMemoryHandles(memoryHandleComponentCount);