feature: supports boot time survivability mode recovery

Related-To: NEO-14331

Signed-off-by: Kulkarni, Ashwin Kumar <ashwin.kumar.kulkarni@intel.com>
This commit is contained in:
Kulkarni, Ashwin Kumar
2025-07-01 10:09:58 +00:00
committed by Compute-Runtime-Automation
parent 5a057bbd46
commit 75d592d940
29 changed files with 1090 additions and 14 deletions

View File

@@ -17,6 +17,7 @@
#include "shared/source/os_interface/linux/pmt_util.h"
#include "shared/source/os_interface/linux/system_info.h"
#include "shared/source/os_interface/os_interface.h"
#include "shared/source/utilities/directory.h"
#include "level_zero/core/source/driver/driver.h"
#include "level_zero/sysman/source/api/pci/linux/sysman_os_pci_imp.h"
@@ -182,13 +183,17 @@ LinuxSysmanImp::LinuxSysmanImp(SysmanDeviceImp *pParentSysmanDeviceImp) {
}
void LinuxSysmanImp::createFwUtilInterface() {
const auto pciBusInfo = pParentSysmanDeviceImp->getRootDeviceEnvironment().osInterface->getDriverModel()->getPciBusInfo();
const uint16_t domain = static_cast<uint16_t>(pciBusInfo.pciDomain);
const uint8_t bus = static_cast<uint8_t>(pciBusInfo.pciBus);
const uint8_t device = static_cast<uint8_t>(pciBusInfo.pciDevice);
const uint8_t function = static_cast<uint8_t>(pciBusInfo.pciFunction);
if (isDeviceInSurvivabilityMode()) {
pFwUtilInterface = FirmwareUtil::create(pciBdfInfo.pciDomain, pciBdfInfo.pciBus, pciBdfInfo.pciDevice, pciBdfInfo.pciFunction);
} else {
const auto pciBusInfo = pParentSysmanDeviceImp->getRootDeviceEnvironment().osInterface->getDriverModel()->getPciBusInfo();
const uint16_t domain = static_cast<uint16_t>(pciBusInfo.pciDomain);
const uint8_t bus = static_cast<uint8_t>(pciBusInfo.pciBus);
const uint8_t device = static_cast<uint8_t>(pciBusInfo.pciDevice);
const uint8_t function = static_cast<uint8_t>(pciBusInfo.pciFunction);
pFwUtilInterface = FirmwareUtil::create(domain, bus, device, function);
pFwUtilInterface = FirmwareUtil::create(domain, bus, device, function);
}
}
FirmwareUtil *LinuxSysmanImp::getFwUtilInterface() {
@@ -590,6 +595,29 @@ bool LinuxSysmanImp::getUuidFromSubDeviceInfo(uint32_t subDeviceID, std::array<u
return this->uuidVec[subDeviceID].isValid;
}
static NEO::PhysicalDevicePciBusInfo getPciBufInfo(const char *bdfString) {
constexpr int bdfTokensNum = 4;
uint16_t domain = -1;
uint8_t bus = -1, device = -1, function = -1;
if (NEO::parseBdfString(bdfString, domain, bus, device, function) != bdfTokensNum) {
return NEO::PhysicalDevicePciBusInfo{};
}
return NEO::PhysicalDevicePciBusInfo{domain, bus, device, function};
}
ze_result_t LinuxSysmanImp::initSurvivabilityMode(std::unique_ptr<NEO::HwDeviceId> hwDeviceId) {
const auto hwDeviceIdDrm = static_cast<NEO::HwDeviceIdDrm *>(hwDeviceId.get());
pciBdfInfo = getPciBufInfo(hwDeviceIdDrm->getPciPath());
if (pciBdfInfo.pciDomain == pciBdfInfo.invalidValue) {
return ZE_RESULT_ERROR_UNINITIALIZED;
}
return ZE_RESULT_SUCCESS;
}
bool LinuxSysmanImp::isDeviceInSurvivabilityMode() {
return pParentSysmanDeviceImp->isDeviceInSurvivabilityMode;
}
OsSysman *OsSysman::create(SysmanDeviceImp *pParentSysmanDeviceImp) {
LinuxSysmanImp *pLinuxSysmanImp = new LinuxSysmanImp(pParentSysmanDeviceImp);
return static_cast<OsSysman *>(pLinuxSysmanImp);

View File

@@ -9,6 +9,7 @@
#include "shared/source/execution_environment/execution_environment.h"
#include "shared/source/helpers/hw_info.h"
#include "shared/source/helpers/non_copyable_or_moveable.h"
#include "shared/source/os_interface/driver_info.h"
#include "shared/source/os_interface/linux/sys_calls.h"
#include "level_zero/sysman/source/device/os_sysman.h"
@@ -81,6 +82,8 @@ class LinuxSysmanImp : public OsSysman, NEO::NonCopyableAndNonMovableClass {
bool getTelemData(uint32_t subDeviceId, std::string &telemDir, std::string &guid, uint64_t &telemOffset);
bool getUuidFromSubDeviceInfo(uint32_t subDeviceID, std::array<uint8_t, NEO::ProductHelper::uuidSize> &uuid);
bool generateUuidFromPciAndSubDeviceInfo(uint32_t subDeviceID, const NEO::PhysicalDevicePciBusInfo &pciBusInfo, std::array<uint8_t, NEO::ProductHelper::uuidSize> &uuid);
ze_result_t initSurvivabilityMode(std::unique_ptr<NEO::HwDeviceId> hwDeviceId) override;
bool isDeviceInSurvivabilityMode() override;
protected:
std::unique_ptr<SysmanProductHelper> pSysmanProductHelper;
@@ -102,6 +105,7 @@ class LinuxSysmanImp : public OsSysman, NEO::NonCopyableAndNonMovableClass {
std::array<uint8_t, NEO::ProductHelper::uuidSize> id{};
};
std::vector<Uuid> uuidVec;
NEO::PhysicalDevicePciBusInfo pciBdfInfo = {};
private:
LinuxSysmanImp() = delete;

View File

@@ -191,6 +191,14 @@ bool WddmSysmanImp::generateUuidFromPciBusInfo(const NEO::PhysicalDevicePciBusIn
return false;
}
ze_result_t WddmSysmanImp::initSurvivabilityMode(std::unique_ptr<NEO::HwDeviceId> hwDeviceId) {
return ZE_RESULT_ERROR_UNINITIALIZED;
}
bool WddmSysmanImp::isDeviceInSurvivabilityMode() {
return false;
}
OsSysman *OsSysman::create(SysmanDeviceImp *pParentSysmanDeviceImp) {
WddmSysmanImp *pWddmSysmanImp = new WddmSysmanImp(pParentSysmanDeviceImp);
return static_cast<OsSysman *>(pWddmSysmanImp);

View File

@@ -46,6 +46,8 @@ class WddmSysmanImp : public OsSysman, NEO::NonCopyableAndNonMovableClass {
PlatformMonitoringTech *getSysmanPmt();
bool getUuid(std::array<uint8_t, NEO::ProductHelper::uuidSize> &uuid);
bool generateUuidFromPciBusInfo(const NEO::PhysicalDevicePciBusInfo &pciBusInfo, std::array<uint8_t, NEO::ProductHelper::uuidSize> &uuid);
ze_result_t initSurvivabilityMode(std::unique_ptr<NEO::HwDeviceId> hwDeviceId) override;
bool isDeviceInSurvivabilityMode() override;
protected:
FirmwareUtil *pFwUtilInterface = nullptr;