[Fix, Sysman] Map uevent to device based on device path

Related-To: LOCI-4307

Signed-off-by: Bellekallu Rajkiran <bellekallu.rajkiran@intel.com>
This commit is contained in:
Bellekallu Rajkiran
2023-04-12 18:30:55 +00:00
committed by Compute-Runtime-Automation
parent 9036a034bd
commit 007f5d70bf
5 changed files with 165 additions and 480 deletions

View File

@@ -246,96 +246,74 @@ bool LinuxEventsUtil::checkIfFabricPortStatusChanged(void *dev, zes_event_type_f
return false;
}
void LinuxEventsUtil::getDevIndexToDevInfoMap(std::vector<zes_event_type_flags_t> &registeredEvents, uint32_t count, zes_device_handle_t *phDevices, std::map<uint32_t, DeviceInfo> &mapOfDevIndexToDevInfo) {
void LinuxEventsUtil::getDevIndexToDevPathMap(std::vector<zes_event_type_flags_t> &registeredEvents, uint32_t count, zes_device_handle_t *phDevices, std::map<uint32_t, std::string> &mapOfDevIndexToDevPath) {
for (uint32_t devIndex = 0; devIndex < count; devIndex++) {
auto device = static_cast<SysmanDeviceImp *>(L0::SysmanDevice::fromHandle(phDevices[devIndex]));
registeredEvents[devIndex] = deviceEventsMap[device];
if (!registeredEvents[devIndex]) {
continue;
} else {
auto pDrm = &static_cast<L0::LinuxSysmanImp *>(device->deviceGetOsInterface())->getDrm();
struct stat drmStat = {};
DeviceInfo devInfo = {};
int ret = -1;
ze_result_t result = ZE_RESULT_ERROR_UNKNOWN;
if ((ret = NEO::SysCalls::fstat(pDrm->getFileDescriptor(), &drmStat)) == 0) {
devInfo.deviceNum = drmStat.st_rdev;
std::string bdf;
auto pSysfsAccess = &static_cast<L0::LinuxSysmanImp *>(device->deviceGetOsInterface())->getSysfsAccess();
if (pSysfsAccess->getRealPath("device", bdf) == ZE_RESULT_SUCCESS) {
// /sys needs to be removed from real path inorder to equate with
// DEVPATH property of uevent.
// Example of real path: /sys/devices/pci0000:97/0000:97:02.0/0000:98:00.0/0000:99:01.0/0000:9a:00.0
// Example of DEVPATH: /devices/pci0000:97/0000:97:02.0/0000:98:00.0/0000:99:01.0/0000:9a:00.0/i915.iaf.0
const auto loc = bdf.find("/devices");
if (loc == std::string::npos) {
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr,
"%s", "Invalid device path\n");
continue;
}
bdf = bdf.substr(loc);
mapOfDevIndexToDevPath.insert({devIndex, bdf});
} else {
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr,
"%s", "Retrieving drm stat failed for fd %d\n", pDrm->getFileDescriptor());
}
if (registeredEvents[devIndex] & ZES_EVENT_TYPE_FLAG_FABRIC_PORT_HEALTH) {
std::string bdfDir;
auto pSysfsAccess = &static_cast<L0::LinuxSysmanImp *>(device->deviceGetOsInterface())->getSysfsAccess();
result = pSysfsAccess->getRealPath("device", bdfDir);
if (ZE_RESULT_SUCCESS == result) {
// /sys needs to be removed from real path inorder to equate with
// DEVPATH property of uevent.
// Example of real path: /sys/devices/pci0000:97/0000:97:02.0/0000:98:00.0/0000:99:01.0/0000:9a:00.0
// Example of DEVPATH: /devices/pci0000:97/0000:97:02.0/0000:98:00.0/0000:99:01.0/0000:9a:00.0/i915.iaf.0
const auto loc = bdfDir.find_first_of('/');
auto bdf = bdfDir.substr(loc + 4);
devInfo.devicePath = bdf;
} else {
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr,
"%s", "Failed to get real path of device\n");
}
}
if ((ret != -1) || (result == ZE_RESULT_SUCCESS)) {
mapOfDevIndexToDevInfo.insert({devIndex, devInfo});
"%s", "Failed to get real path of device\n");
}
}
}
}
bool LinuxEventsUtil::checkDeviceEvents(std::vector<zes_event_type_flags_t> &registeredEvents, std::map<uint32_t, DeviceInfo> mapOfDevIndexToDevInfo, zes_event_type_flags_t *pEvents, void *dev) {
dev_t devnum = pUdevLib->getEventGenerationSourceDevice(dev);
for (auto it = mapOfDevIndexToDevInfo.begin(); it != mapOfDevIndexToDevInfo.end(); it++) {
if ((it->second.deviceNum == devnum) || ((it->second.deviceNum - 127) == devnum)) {
if (registeredEvents[it->first] & ZES_EVENT_TYPE_FLAG_DEVICE_DETACH) {
if (checkDeviceDetachEvent(pEvents[it->first])) {
return true;
}
}
if (registeredEvents[it->first] & ZES_EVENT_TYPE_FLAG_DEVICE_ATTACH) {
if (checkDeviceAttachEvent(pEvents[it->first])) {
return true;
}
}
if (registeredEvents[it->first] & ZES_EVENT_TYPE_FLAG_DEVICE_RESET_REQUIRED) {
if (isResetRequired(dev, pEvents[it->first])) {
return true;
}
}
if (registeredEvents[it->first] & ZES_EVENT_TYPE_FLAG_MEM_HEALTH) {
if (checkIfMemHealthChanged(dev, pEvents[it->first])) {
return true;
}
}
break;
}
}
bool LinuxEventsUtil::checkDeviceEvents(std::vector<zes_event_type_flags_t> &registeredEvents, std::map<uint32_t, std::string> mapOfDevIndexToDevPath, zes_event_type_flags_t *pEvents, void *dev) {
const char *devicePath = pUdevLib->getEventPropertyValue(dev, "DEVPATH");
bool retVal = false;
if (devicePath != nullptr) {
std::string devPath(devicePath);
// /i915.iaf.* needs to be removed from DEVPATH inorder to equate with
// real path of device.
// Example of real path: /sys/devices/pci0000:97/0000:97:02.0/0000:98:00.0/0000:99:01.0/0000:9a:00.0
// Example of DEVPATH: /devices/pci0000:97/0000:97:02.0/0000:98:00.0/0000:99:01.0/0000:9a:00.0/i915.iaf.0
const auto loc = devPath.find_last_of('/');
devPath = devPath.substr(0, loc);
for (auto it = mapOfDevIndexToDevInfo.begin(); it != mapOfDevIndexToDevInfo.end(); it++) {
if (it->second.devicePath == devPath) {
if (checkIfFabricPortStatusChanged(dev, pEvents[it->first])) {
return true;
for (auto it = mapOfDevIndexToDevPath.begin(); it != mapOfDevIndexToDevPath.end(); it++) {
if (devPath.find(it->second.c_str()) != std::string::npos) {
if (registeredEvents[it->first] & ZES_EVENT_TYPE_FLAG_DEVICE_DETACH) {
if (checkDeviceDetachEvent(pEvents[it->first])) {
retVal = true;
}
}
if (registeredEvents[it->first] & ZES_EVENT_TYPE_FLAG_DEVICE_ATTACH) {
if (checkDeviceAttachEvent(pEvents[it->first])) {
retVal = true;
}
}
if (registeredEvents[it->first] & ZES_EVENT_TYPE_FLAG_DEVICE_RESET_REQUIRED) {
if (isResetRequired(dev, pEvents[it->first])) {
retVal = true;
}
}
if (registeredEvents[it->first] & ZES_EVENT_TYPE_FLAG_MEM_HEALTH) {
if (checkIfMemHealthChanged(dev, pEvents[it->first])) {
retVal = true;
}
}
if (registeredEvents[it->first] & ZES_EVENT_TYPE_FLAG_FABRIC_PORT_HEALTH) {
if (checkIfFabricPortStatusChanged(dev, pEvents[it->first])) {
retVal = true;
}
}
break;
}
}
}
return false;
return retVal;
}
bool LinuxEventsUtil::listenSystemEvents(zes_event_type_flags_t *pEvents, uint32_t count, std::vector<zes_event_type_flags_t> &registeredEvents, zes_device_handle_t *phDevices, uint64_t timeout) {
@@ -346,7 +324,7 @@ bool LinuxEventsUtil::listenSystemEvents(zes_event_type_flags_t *pEvents, uint32
bool retval = false;
struct pollfd pfd[2];
std::vector<std::string> subsystemList;
std::map<uint32_t, DeviceInfo> mapOfDevIndexToDevInfo = {};
std::map<uint32_t, std::string> mapOfDevIndexToDevPath = {};
if (pUdevLib == nullptr) {
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr,
@@ -372,7 +350,7 @@ bool LinuxEventsUtil::listenSystemEvents(zes_event_type_flags_t *pEvents, uint32
auto start = L0::steadyClock::now();
std::chrono::duration<double, std::milli> timeElapsed;
getDevIndexToDevInfoMap(registeredEvents, count, phDevices, mapOfDevIndexToDevInfo);
getDevIndexToDevPathMap(registeredEvents, count, phDevices, mapOfDevIndexToDevPath);
eventsMutex.unlock();
while (NEO::SysCalls::poll(pfd, 2, static_cast<int>(timeout)) > 0) {
bool eventReceived = false;
@@ -382,8 +360,8 @@ bool LinuxEventsUtil::listenSystemEvents(zes_event_type_flags_t *pEvents, uint32
eventsMutex.lock();
uint8_t dummy;
NEO::SysCalls::read(pipeFd[0], &dummy, 1);
mapOfDevIndexToDevInfo.clear();
getDevIndexToDevInfoMap(registeredEvents, count, phDevices, mapOfDevIndexToDevInfo);
mapOfDevIndexToDevPath.clear();
getDevIndexToDevPathMap(registeredEvents, count, phDevices, mapOfDevIndexToDevPath);
eventsMutex.unlock();
} else {
eventReceived = true;
@@ -391,7 +369,7 @@ bool LinuxEventsUtil::listenSystemEvents(zes_event_type_flags_t *pEvents, uint32
}
}
if (mapOfDevIndexToDevInfo.empty()) {
if (mapOfDevIndexToDevPath.empty()) {
break;
}
@@ -424,7 +402,7 @@ bool LinuxEventsUtil::listenSystemEvents(zes_event_type_flags_t *pEvents, uint32
break;
}
retval = checkDeviceEvents(registeredEvents, mapOfDevIndexToDevInfo, pEvents, dev);
retval = checkDeviceEvents(registeredEvents, mapOfDevIndexToDevPath, pEvents, dev);
pUdevLib->dropDeviceReference(dev);
if (retval) {
break;

View File

@@ -12,11 +12,6 @@
namespace L0 {
struct DeviceInfo {
std::string devicePath = {};
dev_t deviceNum = 0;
};
class LinuxEventsImp : public OsEvents, NEO::NonCopyableOrMovableClass {
public:
bool eventListen(zes_event_type_flags_t &pEvent, uint64_t timeout) override;
@@ -58,8 +53,8 @@ class LinuxEventsUtil {
static const std::string unbind;
static const std::string bind;
static bool checkRasEventOccured(Ras *rasHandle);
void getDevIndexToDevInfoMap(std::vector<zes_event_type_flags_t> &registeredEvents, uint32_t count, zes_device_handle_t *phDevices, std::map<uint32_t, DeviceInfo> &mapOfDevIndexToDevInfo);
bool checkDeviceEvents(std::vector<zes_event_type_flags_t> &registeredEvents, std::map<uint32_t, DeviceInfo> mapOfDevIndexToDevInfo, zes_event_type_flags_t *pEvents, void *dev);
void getDevIndexToDevPathMap(std::vector<zes_event_type_flags_t> &registeredEvents, uint32_t count, zes_device_handle_t *phDevices, std::map<uint32_t, std::string> &mapOfDevIndexToDevPath);
bool checkDeviceEvents(std::vector<zes_event_type_flags_t> &registeredEvents, std::map<uint32_t, std::string> mapOfDevIndexToDevPath, zes_event_type_flags_t *pEvents, void *dev);
std::once_flag initEventsOnce;
std::mutex eventsMutex;
void init();