fix: Fix number of channels and bus width calculation

Take number of memory channels from telemetry instead
of using hard coded value and calculate memory bus width
based on the formula

busWidth = numChannels * 32

Related-To: NEO-15629

Signed-off-by: Aviral Nigam <aviral.nigam@intel.com>
This commit is contained in:
Aviral Nigam
2025-08-06 09:05:41 +00:00
committed by Compute-Runtime-Automation
parent e88d1287c0
commit c1c1f1f0af
8 changed files with 232 additions and 22 deletions

View File

@@ -57,6 +57,7 @@ class SysmanProductHelper {
virtual ze_result_t getMemoryProperties(zes_mem_properties_t *pProperties, LinuxSysmanImp *pLinuxSysmanImp, NEO::Drm *pDrm, SysmanKmdInterface *pSysmanKmdInterface, uint32_t subDeviceId, bool isSubdevice) = 0;
virtual ze_result_t getMemoryBandwidth(zes_mem_bandwidth_t *pBandwidth, LinuxSysmanImp *pLinuxSysmanImp, uint32_t subdeviceId) = 0;
virtual void getMemoryHealthIndicator(FirmwareUtil *pFwInterface, zes_mem_health_t *health) = 0;
virtual ze_result_t getNumberOfMemoryChannels(LinuxSysmanImp *pLinuxSysmanImp, uint32_t *pNumChannels) = 0;
// Performance
virtual void getMediaPerformanceFactorMultiplier(const double performanceFactor, double *pMultiplier) = 0;

View File

@@ -31,6 +31,7 @@ class SysmanProductHelperHw : public SysmanProductHelper {
ze_result_t getMemoryProperties(zes_mem_properties_t *pProperties, LinuxSysmanImp *pLinuxSysmanImp, NEO::Drm *pDrm, SysmanKmdInterface *pSysmanKmdInterface, uint32_t subDeviceId, bool isSubdevice) override;
ze_result_t getMemoryBandwidth(zes_mem_bandwidth_t *pBandwidth, LinuxSysmanImp *pLinuxSysmanImp, uint32_t subdeviceId) override;
void getMemoryHealthIndicator(FirmwareUtil *pFwInterface, zes_mem_health_t *health) override;
ze_result_t getNumberOfMemoryChannels(LinuxSysmanImp *pLinuxSysmanImp, uint32_t *pNumChannels) override;
// Performance
void getMediaPerformanceFactorMultiplier(const double performanceFactor, double *pMultiplier) override;

View File

@@ -35,10 +35,16 @@ void SysmanProductHelperHw<gfxProduct>::getFrequencyStepSize(double *pStepSize)
*pStepSize = (50.0 / 3); // Step of 16.6666667 Mhz
}
template <PRODUCT_FAMILY gfxProduct>
ze_result_t SysmanProductHelperHw<gfxProduct>::getNumberOfMemoryChannels(LinuxSysmanImp *pLinuxSysmanImp, uint32_t *pNumChannels) {
return ZE_RESULT_ERROR_NOT_AVAILABLE;
}
template <PRODUCT_FAMILY gfxProduct>
ze_result_t SysmanProductHelperHw<gfxProduct>::getMemoryProperties(zes_mem_properties_t *pProperties, LinuxSysmanImp *pLinuxSysmanImp, NEO::Drm *pDrm, SysmanKmdInterface *pSysmanKmdInterface, uint32_t subDeviceId, bool isSubdevice) {
auto pSysFsAccess = pSysmanKmdInterface->getSysFsAccess();
bool isIntegratedDevice = pLinuxSysmanImp->getHardwareInfo().capabilityTable.isIntegratedDevice;
bool isNumChannelsFromTelemetry = false;
if (isIntegratedDevice) {
pProperties->location = ZES_MEM_LOC_SYSTEM;
@@ -80,16 +86,25 @@ ze_result_t SysmanProductHelperHw<gfxProduct>::getMemoryProperties(zes_mem_prope
}
if (pProperties->type == ZES_MEM_TYPE_HBM) {
pProperties->numChannels = memSystemInfo->getNumHbmStacksPerTile() * memSystemInfo->getNumChannlesPerHbmStack();
pProperties->numChannels = memSystemInfo->getNumHbmStacksPerTile() * memSystemInfo->getNumChannelsPerHbmStack();
} else if (pProperties->type == ZES_MEM_TYPE_GDDR6) {
uint32_t numChannels = 0;
ze_result_t result = this->getNumberOfMemoryChannels(pLinuxSysmanImp, &numChannels);
isNumChannelsFromTelemetry = true;
if (result == ZE_RESULT_SUCCESS) {
pProperties->numChannels = numChannels;
pProperties->busWidth = pProperties->numChannels * 32;
}
} else {
pProperties->numChannels = memSystemInfo->getMaxMemoryChannels();
}
}
}
pProperties->busWidth = memoryBusWidth;
pProperties->physicalSize = 0;
if (!isNumChannelsFromTelemetry) {
pProperties->busWidth = memoryBusWidth;
}
if (isIntegratedDevice) {
pProperties->busWidth = -1;
pProperties->numChannels = -1;

View File

@@ -58,6 +58,7 @@ static std::map<std::string, std::map<std::string, uint64_t>> guidToKeyOffsetMap
{"reg_PCIESS_tx_pktcount_lsb", 304},
{"reg_PCIESS_tx_pktcount_msb", 300},
{"MSU_BITMASK", 3688},
{"NUM_OF_MEM_CHANNEL", 3660},
{"GDDR_TELEM_CAPTURE_TIMESTAMP_UPPER", 372},
{"GDDR_TELEM_CAPTURE_TIMESTAMP_LOWER", 368},
{"GDDR0_CH0_GT_32B_RD_REQ_UPPER", 376},
@@ -314,6 +315,7 @@ static std::map<std::string, std::map<std::string, uint64_t>> guidToKeyOffsetMap
{"reg_PCIESS_tx_pktcount_lsb", 304},
{"reg_PCIESS_tx_pktcount_msb", 308},
{"MSU_BITMASK", 3688},
{"NUM_OF_MEM_CHANNEL", 3660},
{"GDDR_TELEM_CAPTURE_TIMESTAMP_UPPER", 372},
{"GDDR_TELEM_CAPTURE_TIMESTAMP_LOWER", 368},
{"GDDR0_CH0_GT_32B_RD_REQ_UPPER", 380},
@@ -586,6 +588,7 @@ static std::map<std::string, std::map<std::string, uint64_t>> guidToKeyOffsetMap
{"reg_PCIESS_tx_pktcount_lsb", 304},
{"reg_PCIESS_tx_pktcount_msb", 300},
{"MSU_BITMASK", 3688},
{"NUM_OF_MEM_CHANNEL", 3660},
{"GDDR_TELEM_CAPTURE_TIMESTAMP_UPPER", 372},
{"GDDR_TELEM_CAPTURE_TIMESTAMP_LOWER", 368},
{"GDDR0_CH0_GT_32B_RD_REQ_UPPER", 376},
@@ -922,6 +925,7 @@ static std::map<std::string, std::map<std::string, uint64_t>> guidToKeyOffsetMap
{"reg_PCIESS_tx_pktcount_lsb", 304},
{"reg_PCIESS_tx_pktcount_msb", 308},
{"MSU_BITMASK", 3688},
{"NUM_OF_MEM_CHANNEL", 3660},
{"GDDR_TELEM_CAPTURE_TIMESTAMP_UPPER", 372},
{"GDDR_TELEM_CAPTURE_TIMESTAMP_LOWER", 368},
{"GDDR0_CH0_GT_32B_RD_REQ_UPPER", 380},
@@ -1605,6 +1609,56 @@ ze_result_t SysmanProductHelperHw<gfxProduct>::getMemoryBandwidth(zes_mem_bandwi
return ZE_RESULT_SUCCESS;
}
template <>
ze_result_t SysmanProductHelperHw<gfxProduct>::getNumberOfMemoryChannels(LinuxSysmanImp *pLinuxSysmanImp, uint32_t *pNumChannels) {
std::string &rootPath = pLinuxSysmanImp->getPciRootPath();
std::map<uint32_t, std::string> telemNodes;
NEO::PmtUtil::getTelemNodesInPciPath(std::string_view(rootPath), telemNodes);
if (telemNodes.empty()) {
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
std::map<std::string, uint64_t> keyOffsetMap;
std::unordered_map<std::string, std::string> keyTelemInfoMap;
// Iterate through all the TelemNodes to find both OOBMSM and PUNIT guids along with their keyOffsetMap
for (const auto &it : telemNodes) {
std::string telemNodeDir = it.second;
std::array<char, NEO::PmtUtil::guidStringSize> guidString = {};
if (!NEO::PmtUtil::readGuid(telemNodeDir, guidString)) {
continue;
}
auto keyOffsetMapIterator = guidToKeyOffsetMap.find(guidString.data());
if (keyOffsetMapIterator == guidToKeyOffsetMap.end()) {
continue;
}
const auto &tempKeyOffsetMap = keyOffsetMapIterator->second;
for (auto it = tempKeyOffsetMap.begin(); it != tempKeyOffsetMap.end(); it++) {
keyOffsetMap[it->first] = it->second;
keyTelemInfoMap[it->first] = telemNodeDir;
}
}
if (keyOffsetMap.empty()) {
NEO::printDebugString(NEO::debugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): key Offset map is empty\n", __FUNCTION__);
return ZE_RESULT_ERROR_NOT_AVAILABLE;
}
// Get Number of Memory Channels
uint32_t numChannels = 0;
std::string key = "NUM_OF_MEM_CHANNEL";
if (!PlatformMonitoringTech::readValue(keyOffsetMap, keyTelemInfoMap[key], key, 0, numChannels)) {
return ZE_RESULT_ERROR_NOT_AVAILABLE;
}
*pNumChannels = numChannels;
return ZE_RESULT_SUCCESS;
}
template <>
bool SysmanProductHelperHw<gfxProduct>::isZesInitSupported() {
return true;

View File

@@ -377,23 +377,6 @@ HWTEST2_F(SysmanDeviceMemoryFixtureI915, GivenValidMemoryHandleWhenCallingZesMem
}
}
HWTEST2_F(SysmanDeviceMemoryFixtureI915, GivenValidMemoryHandleWhenCallingZesMemoryGetPropertiesWithGddr6LocalMemoryThenVerifySysmanMemoryGetPropertiesCallSucceeds, IsPVC) {
pDrm->setMemoryType(NEO::DeviceBlobConstants::MemoryType::gddr6);
auto handles = getMemoryHandles(memoryHandleComponentCount);
for (auto handle : handles) {
zes_mem_properties_t properties;
ze_result_t result = zesMemoryGetProperties(handle, &properties);
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
EXPECT_EQ(properties.type, ZES_MEM_TYPE_GDDR6);
EXPECT_EQ(properties.location, ZES_MEM_LOC_DEVICE);
EXPECT_FALSE(properties.onSubdevice);
EXPECT_EQ(properties.subdeviceId, 0u);
EXPECT_EQ(properties.physicalSize, 0u);
EXPECT_EQ(properties.numChannels, numMemoryChannels);
EXPECT_EQ(properties.busWidth, memoryBusWidth);
}
}
TEST_F(SysmanDeviceMemoryFixtureI915, GivenValidMemoryHandleWhenCallingZesMemoryGetPropertiesWithInvalidMemoryTypeThenVerifyGetPropertiesCallReturnsMemoryTypeAsDdrAndNumberOfChannelsAsUnknown) {
pDrm->setMemoryType(INT_MAX);
auto handles = getMemoryHandles(memoryHandleComponentCount);

View File

@@ -1186,6 +1186,162 @@ HWTEST2_F(SysmanProductHelperMemoryTest, GivenSysmanProductHelperInstanceWhenCal
EXPECT_GT(memBandwidth.timestamp, 0u);
}
HWTEST2_F(SysmanProductHelperMemoryTest, GivenSysmanProductHelperInstanceWhenCallingGetNumberOfMemoryChannelsAndTelemNodesAreNotAvailableThenErrorIsReturned, IsBMG) {
auto pSysmanProductHelper = L0::Sysman::SysmanProductHelper::create(defaultHwInfo->platform.eProductFamily);
uint32_t numChannels = 0;
ze_result_t result = pSysmanProductHelper->getNumberOfMemoryChannels(pLinuxSysmanImp, &numChannels);
EXPECT_EQ(result, ZE_RESULT_ERROR_UNSUPPORTED_FEATURE);
}
HWTEST2_F(SysmanProductHelperMemoryTest, GivenSysmanProductHelperInstanceWhenCallingGetNumberOfMemoryChannelsAndReadGuidFailsThenErrorIsReturned, IsBMG) {
VariableBackup<decltype(NEO::SysCalls::sysCallsReadlink)> mockReadLink(&NEO::SysCalls::sysCallsReadlink, &mockBmgReadLinkSuccess);
VariableBackup<decltype(NEO::SysCalls::sysCallsOpen)> mockOpen(&NEO::SysCalls::sysCallsOpen, &mockOpenSuccess);
VariableBackup<decltype(NEO::SysCalls::sysCallsPread)> mockPread(&NEO::SysCalls::sysCallsPread, [](int fd, void *buf, size_t count, off_t offset) -> ssize_t {
if (fd == 5 || fd == 9) {
// Fail when reading GUID fails
return -1;
}
return count;
});
auto pSysmanProductHelper = L0::Sysman::SysmanProductHelper::create(defaultHwInfo->platform.eProductFamily);
uint32_t numChannels = 0;
ze_result_t result = pSysmanProductHelper->getNumberOfMemoryChannels(pLinuxSysmanImp, &numChannels);
EXPECT_EQ(result, ZE_RESULT_ERROR_NOT_AVAILABLE);
}
HWTEST2_F(SysmanProductHelperMemoryTest, GivenSysmanProductHelperInstanceWhenCallingGetNumberOfMemoryChannelsAndGuidNotFoundInMapThenErrorIsReturned, IsBMG) {
VariableBackup<decltype(NEO::SysCalls::sysCallsReadlink)> mockReadLink(&NEO::SysCalls::sysCallsReadlink, &mockBmgReadLinkSuccess);
VariableBackup<decltype(NEO::SysCalls::sysCallsOpen)> mockOpen(&NEO::SysCalls::sysCallsOpen, &mockOpenSuccess);
VariableBackup<decltype(NEO::SysCalls::sysCallsPread)> mockPread(&NEO::SysCalls::sysCallsPread, [](int fd, void *buf, size_t count, off_t offset) -> ssize_t {
uint64_t telem2Offset = 0;
std::string invalidGuid = "0x12345678"; // This GUID doesn't exist in BMG's guidToKeyOffsetMap
if (fd == 4 || fd == 8) {
memcpy(buf, &telem2Offset, count);
} else if (fd == 5 || fd == 9) {
memcpy(buf, invalidGuid.data(), count);
}
return count;
});
auto pSysmanProductHelper = L0::Sysman::SysmanProductHelper::create(defaultHwInfo->platform.eProductFamily);
uint32_t numChannels = 0;
ze_result_t result = pSysmanProductHelper->getNumberOfMemoryChannels(pLinuxSysmanImp, &numChannels);
EXPECT_EQ(result, ZE_RESULT_ERROR_NOT_AVAILABLE);
}
HWTEST2_F(SysmanProductHelperMemoryTest, GivenSysmanProductHelperInstanceWhenCallingGetNumberOfMemoryChannelsAndKeyOffsetMapIsEmptyThenErrorIsReturned, IsBMG) {
VariableBackup<decltype(NEO::SysCalls::sysCallsReadlink)> mockReadLink(&NEO::SysCalls::sysCallsReadlink, &mockBmgReadLinkSuccess);
VariableBackup<decltype(NEO::SysCalls::sysCallsOpen)> mockOpen(&NEO::SysCalls::sysCallsOpen, &mockOpenSuccess);
VariableBackup<decltype(NEO::SysCalls::sysCallsPread)> mockPread(&NEO::SysCalls::sysCallsPread, [](int fd, void *buf, size_t count, off_t offset) -> ssize_t {
uint64_t telem2Offset = 0;
std::string validButEmptyGuid = "0x1e2f8200"; // Valid GUID but no NUM_OF_MEM_CHANNEL key
if (fd == 4 || fd == 8) {
memcpy(buf, &telem2Offset, count);
} else if (fd == 5 || fd == 9) {
memcpy(buf, validButEmptyGuid.data(), count);
}
return count;
});
auto pSysmanProductHelper = L0::Sysman::SysmanProductHelper::create(defaultHwInfo->platform.eProductFamily);
uint32_t numChannels = 0;
ze_result_t result = pSysmanProductHelper->getNumberOfMemoryChannels(pLinuxSysmanImp, &numChannels);
EXPECT_EQ(result, ZE_RESULT_ERROR_NOT_AVAILABLE);
}
HWTEST2_F(SysmanProductHelperMemoryTest, GivenSysmanProductHelperInstanceWhenCallingGetNumberOfMemoryChannelsAndReadValueFailsThenErrorIsReturned, IsBMG) {
VariableBackup<decltype(NEO::SysCalls::sysCallsReadlink)> mockReadLink(&NEO::SysCalls::sysCallsReadlink, &mockBmgReadLinkSuccess);
VariableBackup<decltype(NEO::SysCalls::sysCallsOpen)> mockOpen(&NEO::SysCalls::sysCallsOpen, &mockOpenSuccess);
VariableBackup<decltype(NEO::SysCalls::sysCallsPread)> mockPread(&NEO::SysCalls::sysCallsPread, [](int fd, void *buf, size_t count, off_t offset) -> ssize_t {
uint64_t telem2Offset = 0;
std::string validOobmsmGuid = "0x5e2f8210";
if (fd == 4 || fd == 8) {
memcpy(buf, &telem2Offset, count);
} else if (fd == 5 || fd == 9) {
memcpy(buf, validOobmsmGuid.data(), count);
} else if (fd == 6 || fd == 10) {
// Fail when trying to read NUM_OF_MEM_CHANNEL value
return -1;
}
return count;
});
auto pSysmanProductHelper = L0::Sysman::SysmanProductHelper::create(defaultHwInfo->platform.eProductFamily);
uint32_t numChannels = 0;
ze_result_t result = pSysmanProductHelper->getNumberOfMemoryChannels(pLinuxSysmanImp, &numChannels);
EXPECT_EQ(result, ZE_RESULT_ERROR_NOT_AVAILABLE);
}
HWTEST2_F(SysmanProductHelperMemoryTest, GivenSysmanProductHelperInstanceWhenCallingGetMemoryPropertiesWithGddr6AndGetNumberOfMemoryChannelsSucceedsThenValidPropertiesAreReturned, IsBMG) {
VariableBackup<decltype(NEO::SysCalls::sysCallsReadlink)> mockReadLink(&NEO::SysCalls::sysCallsReadlink, &mockBmgReadLinkSuccess);
VariableBackup<decltype(NEO::SysCalls::sysCallsOpen)> mockOpen(&NEO::SysCalls::sysCallsOpen, &mockOpenSuccess);
VariableBackup<decltype(NEO::SysCalls::sysCallsPread)> mockPread(&NEO::SysCalls::sysCallsPread, [](int fd, void *buf, size_t count, off_t offset) -> ssize_t {
uint64_t telem2Offset = 0;
std::string validOobmsmGuid = "0x5e2f8210";
uint32_t channelCount = numMemoryChannels;
if (fd == 4 || fd == 8) {
memcpy(buf, &telem2Offset, count);
} else if (fd == 5 || fd == 9) {
memcpy(buf, validOobmsmGuid.data(), count);
} else if (fd == 6 || fd == 10) {
memcpy(buf, &channelCount, count);
}
return count;
});
auto pSysmanProductHelper = L0::Sysman::SysmanProductHelper::create(defaultHwInfo->platform.eProductFamily);
zes_mem_properties_t properties = {};
bool isSubdevice = false;
uint32_t subDeviceId = 0;
std::unique_ptr<MockMemoryNeoDrm> pDrm = std::make_unique<MockMemoryNeoDrm>(const_cast<NEO::RootDeviceEnvironment &>(pSysmanDeviceImp->getRootDeviceEnvironment()));
pDrm->setMemoryType(NEO::DeviceBlobConstants::MemoryType::gddr6);
ze_result_t result = pSysmanProductHelper->getMemoryProperties(&properties, pLinuxSysmanImp, pDrm.get(), pLinuxSysmanImp->getSysmanKmdInterface(), subDeviceId, isSubdevice);
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
EXPECT_EQ(properties.type, ZES_MEM_TYPE_GDDR6);
EXPECT_EQ(properties.numChannels, numMemoryChannels);
EXPECT_EQ(properties.busWidth, numMemoryChannels * 32);
}
HWTEST2_F(SysmanProductHelperMemoryTest, GivenSysmanProductHelperInstanceWhenCallingGetMemoryPropertiesWithGddr6AndGetNumberOfMemoryChannelsFailsThenDefaultValuesAreReturned, IsBMG) {
auto pSysmanProductHelper = L0::Sysman::SysmanProductHelper::create(defaultHwInfo->platform.eProductFamily);
zes_mem_properties_t properties = {};
bool isSubdevice = false;
uint32_t subDeviceId = 0;
std::unique_ptr<MockMemoryNeoDrm> pDrm = std::make_unique<MockMemoryNeoDrm>(const_cast<NEO::RootDeviceEnvironment &>(pSysmanDeviceImp->getRootDeviceEnvironment()));
pDrm->setMemoryType(NEO::DeviceBlobConstants::MemoryType::gddr6);
ze_result_t result = pSysmanProductHelper->getMemoryProperties(&properties, pLinuxSysmanImp, pDrm.get(), pLinuxSysmanImp->getSysmanKmdInterface(), subDeviceId, isSubdevice);
// getNumberOfMemoryChannels fails: getMemoryProperties now succeeds and leaves numChannels and busWidth to default (-1)
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
EXPECT_EQ(properties.type, ZES_MEM_TYPE_GDDR6);
EXPECT_EQ(properties.numChannels, -1);
EXPECT_EQ(properties.busWidth, -1);
}
HWTEST2_F(SysmanProductHelperMemoryTest, GivenSysmanProductHelperInstanceWhenCallingGetNumberOfMemoryChannelsAndDeviceIsNoteBMGThenErrorIsReturned, IsNotBMG) {
auto pSysmanProductHelper = L0::Sysman::SysmanProductHelper::create(defaultHwInfo->platform.eProductFamily);
uint32_t numChannels = 0;
// For non-BMG platforms, the default implementation should return error
ze_result_t result = pSysmanProductHelper->getNumberOfMemoryChannels(pLinuxSysmanImp, &numChannels);
EXPECT_EQ(result, ZE_RESULT_ERROR_NOT_AVAILABLE);
}
} // namespace ult
} // namespace Sysman
} // namespace L0

View File

@@ -61,7 +61,7 @@ struct SystemInfo {
uint32_t getL3BankSizeInKb() const { return l3BankSizeInKb; }
uint32_t getSlmSizePerDss() const { return slmSizePerDss; }
uint32_t getNumHbmStacksPerTile() const { return numHbmStacksPerTile; }
uint32_t getNumChannlesPerHbmStack() const { return numChannelsPerHbmStack; }
uint32_t getNumChannelsPerHbmStack() const { return numChannelsPerHbmStack; }
uint32_t getNumRegions() const { return numRegions; }
uint32_t getNumL3BanksPerGroup() const { return numL3BanksPerGroup; }
uint32_t getNumL3BankGroups() const { return numL3BankGroups; }

View File

@@ -184,7 +184,7 @@ TEST(DrmSystemInfoTest, givenSystemInfoCreatedFromDeviceBlobWhenQueryingSpecific
EXPECT_EQ(0x24u, systemInfo.getSlmSizePerDss());
EXPECT_EQ(0x25u, systemInfo.getCsrSizeInMb());
EXPECT_EQ(0x04u, systemInfo.getNumHbmStacksPerTile());
EXPECT_EQ(0x08u, systemInfo.getNumChannlesPerHbmStack());
EXPECT_EQ(0x08u, systemInfo.getNumChannelsPerHbmStack());
EXPECT_EQ(0x02u, systemInfo.getNumRegions());
EXPECT_EQ(0x02u, systemInfo.getNumL3BankGroups());
EXPECT_EQ(0x03u, systemInfo.getNumL3BanksPerGroup());