mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-03 14:55:24 +08:00
sysman: clean up code duplication for reset
warm and cold reset are common functionality, the code is being moved to the common sysman implementation from diagnostics specific files. Related-To: LOCI-1908 Signed-off-by: Vilvaraj, T J Vivek <t.j.vivek.vilvaraj@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
0c0603966b
commit
47f7b4f509
@@ -33,168 +33,6 @@ void OsDiagnostics::getSupportedDiagTestsFromFW(void *pOsSysman, std::vector<std
|
||||
}
|
||||
}
|
||||
|
||||
void LinuxDiagnosticsImp::releaseSysmanDeviceResources() {
|
||||
pLinuxSysmanImp->getSysmanDeviceImp()->pEngineHandleContext->releaseEngines();
|
||||
pLinuxSysmanImp->getSysmanDeviceImp()->pRasHandleContext->releaseRasHandles();
|
||||
pLinuxSysmanImp->releasePmtObject();
|
||||
pLinuxSysmanImp->releaseLocalDrmHandle();
|
||||
}
|
||||
|
||||
void LinuxDiagnosticsImp::releaseDeviceResources() {
|
||||
releaseSysmanDeviceResources();
|
||||
auto device = static_cast<DeviceImp *>(pLinuxSysmanImp->getDeviceHandle());
|
||||
device->releaseResources();
|
||||
executionEnvironment->memoryManager->releaseDeviceSpecificMemResources(rootDeviceIndex);
|
||||
executionEnvironment->releaseRootDeviceEnvironmentResources(executionEnvironment->rootDeviceEnvironments[rootDeviceIndex].get());
|
||||
executionEnvironment->rootDeviceEnvironments[rootDeviceIndex].reset();
|
||||
}
|
||||
|
||||
void LinuxDiagnosticsImp::reInitSysmanDeviceResources() {
|
||||
pLinuxSysmanImp->getSysmanDeviceImp()->updateSubDeviceHandlesLocally();
|
||||
pLinuxSysmanImp->createPmtHandles();
|
||||
pLinuxSysmanImp->getSysmanDeviceImp()->pRasHandleContext->init(pLinuxSysmanImp->getSysmanDeviceImp()->deviceHandles);
|
||||
pLinuxSysmanImp->getSysmanDeviceImp()->pEngineHandleContext->init();
|
||||
}
|
||||
|
||||
ze_result_t LinuxDiagnosticsImp::initDevice() {
|
||||
ze_result_t result = ZE_RESULT_SUCCESS;
|
||||
auto device = static_cast<DeviceImp *>(pLinuxSysmanImp->getDeviceHandle());
|
||||
|
||||
auto neoDevice = NEO::DeviceFactory::createDevice(*executionEnvironment, devicePciBdf, rootDeviceIndex);
|
||||
if (neoDevice == nullptr) {
|
||||
return ZE_RESULT_ERROR_DEVICE_LOST;
|
||||
}
|
||||
static_cast<L0::DriverHandleImp *>(device->getDriverHandle())->updateRootDeviceBitFields(neoDevice);
|
||||
static_cast<L0::DriverHandleImp *>(device->getDriverHandle())->enableRootDeviceDebugger(neoDevice);
|
||||
Device::deviceReinit(device->getDriverHandle(), device, neoDevice, &result);
|
||||
reInitSysmanDeviceResources();
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
static void getPidFdsForOpenDevice(ProcfsAccess *pProcfsAccess, SysfsAccess *pSysfsAccess, const ::pid_t pid, std::vector<int> &deviceFds) {
|
||||
// Return a list of all the file descriptors of this process that point to this device
|
||||
std::vector<int> fds;
|
||||
deviceFds.clear();
|
||||
if (ZE_RESULT_SUCCESS != pProcfsAccess->getFileDescriptors(pid, fds)) {
|
||||
// Process exited. Not an error. Just ignore.
|
||||
return;
|
||||
}
|
||||
for (auto &&fd : fds) {
|
||||
std::string file;
|
||||
if (pProcfsAccess->getFileName(pid, fd, file) != ZE_RESULT_SUCCESS) {
|
||||
// Process closed this file. Not an error. Just ignore.
|
||||
continue;
|
||||
}
|
||||
if (pSysfsAccess->isMyDeviceFile(file)) {
|
||||
deviceFds.push_back(fd);
|
||||
}
|
||||
}
|
||||
}
|
||||
// A 'warm reset' is a conventional reset that is triggered across a PCI express link.
|
||||
// A warm reset is triggered either when a link is forced into electrical idle or
|
||||
// by sending TS1 and TS2 ordered sets with the hot reset bit set.
|
||||
// Software can initiate a warm reset by setting and then clearing the secondary bus reset bit
|
||||
// in the bridge control register in the PCI configuration space of the bridge port upstream of the device.
|
||||
ze_result_t LinuxDiagnosticsImp::osWarmReset() {
|
||||
std::string rootPortPath;
|
||||
std::string realRootPath;
|
||||
ze_result_t result = pSysfsAccess->getRealPath(deviceDir, realRootPath);
|
||||
if (ZE_RESULT_SUCCESS != result) {
|
||||
return result;
|
||||
}
|
||||
auto device = static_cast<DeviceImp *>(pDevice);
|
||||
executionEnvironment = device->getNEODevice()->getExecutionEnvironment();
|
||||
|
||||
ExecutionEnvironmentRefCountRestore restorer(executionEnvironment);
|
||||
releaseDeviceResources();
|
||||
// write 1 to remove
|
||||
result = pFsAccess->write(realRootPath + '/' + "remove", "1");
|
||||
if (ZE_RESULT_SUCCESS != result) {
|
||||
return result;
|
||||
}
|
||||
size_t loc;
|
||||
|
||||
loc = realRootPath.find_last_of('/');
|
||||
realRootPath = realRootPath.substr(0, loc);
|
||||
|
||||
int fd, ret = 0;
|
||||
unsigned int offset = PCI_BRIDGE_CONTROL; // Bridge control offset in Header of PCI config space
|
||||
unsigned int value = 0x00;
|
||||
unsigned int resetValue = 0x00;
|
||||
std::string configFilePath = realRootPath + '/' + "config";
|
||||
fd = this->openFunction(configFilePath.c_str(), O_RDWR);
|
||||
if (fd < 0) {
|
||||
return ZE_RESULT_ERROR_UNKNOWN;
|
||||
}
|
||||
this->preadFunction(fd, &value, 0x01, offset);
|
||||
resetValue = value | PCI_BRIDGE_CTL_BUS_RESET;
|
||||
this->pwriteFunction(fd, &resetValue, 0x01, offset);
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(100)); // Sleep for 100 milliseconds just to make sure the change is propagated.
|
||||
this->pwriteFunction(fd, &value, 0x01, offset);
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(500)); // Sleep for 500 milliseconds
|
||||
ret = this->closeFunction(fd);
|
||||
if (ret < 0) {
|
||||
return ZE_RESULT_ERROR_UNKNOWN;
|
||||
}
|
||||
|
||||
result = pFsAccess->write(realRootPath + '/' + "rescan", "1");
|
||||
if (ZE_RESULT_SUCCESS != result) {
|
||||
return result;
|
||||
}
|
||||
|
||||
return initDevice();
|
||||
}
|
||||
|
||||
std::string getRootPortaddress(std::string &rootPortPath) {
|
||||
size_t loc;
|
||||
loc = rootPortPath.find_last_of('/'); // we get the pci address of the root port from rootPortPath
|
||||
return rootPortPath.substr(loc + 1, std::string::npos);
|
||||
}
|
||||
|
||||
ze_result_t LinuxDiagnosticsImp::osColdReset() {
|
||||
const std::string slotPath("/sys/bus/pci/slots/"); // holds the directories matching to the number of slots in the PC
|
||||
std::string rootPortPath; // will hold the PCIe Root port directory path (the address of the PCIe slot).
|
||||
std::string realRootPath; // will hold the absolute real path (not symlink) to the selected Device
|
||||
ze_result_t result = pSysfsAccess->getRealPath(deviceDir, realRootPath); // e.g realRootPath=/sys/devices/pci0000:89/0000:89:02.0/0000:8a:00.0/0000:8b:01.0/0000:8c:00.0
|
||||
if (ZE_RESULT_SUCCESS != result) {
|
||||
return result;
|
||||
}
|
||||
auto device = static_cast<DeviceImp *>(pDevice);
|
||||
executionEnvironment = device->getNEODevice()->getExecutionEnvironment();
|
||||
|
||||
ExecutionEnvironmentRefCountRestore restorer(executionEnvironment);
|
||||
releaseDeviceResources();
|
||||
|
||||
rootPortPath = pLinuxSysmanImp->getPciRootPortDirectoryPath(realRootPath); // e.g rootPortPath=/sys/devices/pci0000:89/0000:89:02.0/0000:8a:00.0
|
||||
std::string rootAddress = getRootPortaddress(rootPortPath); // e.g rootAddress = 0000:8a:00.0
|
||||
|
||||
std::vector<std::string> dir;
|
||||
result = pFsAccess->listDirectory(slotPath, dir); // get list of slot directories from /sys/bus/pci/slots/
|
||||
if (ZE_RESULT_SUCCESS != result) {
|
||||
return result;
|
||||
}
|
||||
for (auto &slot : dir) {
|
||||
std::string slotAddress;
|
||||
result = pFsAccess->read((slotPath + slot + "/address"), slotAddress); // extract slot address from the slot directory /sys/bus/pci/slots/<slot num>/address
|
||||
if (ZE_RESULT_SUCCESS != result) {
|
||||
return result;
|
||||
}
|
||||
if (slotAddress.compare(rootAddress) == 0) { // compare slot address to root port address
|
||||
result = pFsAccess->write((slotPath + slot + "/power"), "0"); // turn off power
|
||||
if (ZE_RESULT_SUCCESS != result) {
|
||||
return result;
|
||||
}
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(100)); // Sleep for 100 milliseconds just to make sure, 1 ms is defined as part of spec
|
||||
result = pFsAccess->write((slotPath + slot + "/power"), "1"); // turn on power
|
||||
if (ZE_RESULT_SUCCESS != result) {
|
||||
return result;
|
||||
}
|
||||
return initDevice();
|
||||
}
|
||||
}
|
||||
return ZE_RESULT_ERROR_DEVICE_LOST; // incase the reset fails inform upper layers.
|
||||
}
|
||||
|
||||
ze_result_t LinuxDiagnosticsImp::osRunDiagTestsinFW(zes_diag_result_t *pResult) {
|
||||
const int intVal = 1;
|
||||
// before running diagnostics need to close all active workloads
|
||||
@@ -209,7 +47,7 @@ ze_result_t LinuxDiagnosticsImp::osRunDiagTestsinFW(zes_diag_result_t *pResult)
|
||||
}
|
||||
for (auto &&pid : processes) {
|
||||
std::vector<int> fds;
|
||||
getPidFdsForOpenDevice(pProcfsAccess, pSysfsAccess, pid, fds);
|
||||
pLinuxSysmanImp->getPidFdsForOpenDevice(pProcfsAccess, pSysfsAccess, pid, fds);
|
||||
if (pid == myPid) {
|
||||
// L0 is expected to have this file open.
|
||||
// Keep list of fds. Close before unbind.
|
||||
@@ -228,10 +66,11 @@ ze_result_t LinuxDiagnosticsImp::osRunDiagTestsinFW(zes_diag_result_t *pResult)
|
||||
return result;
|
||||
}
|
||||
pFwInterface->fwRunDiagTests(osDiagType, pResult);
|
||||
pLinuxSysmanImp->diagnosticsReset = true;
|
||||
if (*pResult == ZES_DIAG_RESULT_REBOOT_FOR_REPAIR) {
|
||||
return osColdReset();
|
||||
return pLinuxSysmanImp->osColdReset();
|
||||
}
|
||||
return osWarmReset(); // we need to at least do a Warm reset to bring the machine out of wedged state
|
||||
return pLinuxSysmanImp->osWarmReset(); // we need to at least do a Warm reset to bring the machine out of wedged state
|
||||
}
|
||||
|
||||
} // namespace L0
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
* Copyright (C) 2021-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -54,17 +54,4 @@ class LinuxDiagnosticsImp : public OsDiagnostics, NEO::NonCopyableOrMovableClass
|
||||
static const std::string deviceDir;
|
||||
};
|
||||
|
||||
class ExecutionEnvironmentRefCountRestore {
|
||||
public:
|
||||
ExecutionEnvironmentRefCountRestore() = delete;
|
||||
ExecutionEnvironmentRefCountRestore(NEO::ExecutionEnvironment *executionEnvironmentRecevied) {
|
||||
executionEnvironment = executionEnvironmentRecevied;
|
||||
executionEnvironment->incRefInternal();
|
||||
}
|
||||
~ExecutionEnvironmentRefCountRestore() {
|
||||
executionEnvironment->decRefInternal();
|
||||
}
|
||||
NEO::ExecutionEnvironment *executionEnvironment = nullptr;
|
||||
};
|
||||
|
||||
} // namespace L0
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2020-2021 Intel Corporation
|
||||
* Copyright (C) 2020-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -101,83 +101,14 @@ void LinuxGlobalOperationsImp::getDriverVersion(char (&driverVersion)[ZES_STRING
|
||||
return;
|
||||
}
|
||||
|
||||
static void getPidFdsForOpenDevice(ProcfsAccess *pProcfsAccess, SysfsAccess *pSysfsAccess, const ::pid_t pid, std::vector<int> &deviceFds) {
|
||||
// Return a list of all the file descriptors of this process that point to this device
|
||||
std::vector<int> fds;
|
||||
deviceFds.clear();
|
||||
if (ZE_RESULT_SUCCESS != pProcfsAccess->getFileDescriptors(pid, fds)) {
|
||||
// Process exited. Not an error. Just ignore.
|
||||
return;
|
||||
}
|
||||
for (auto &&fd : fds) {
|
||||
std::string file;
|
||||
if (pProcfsAccess->getFileName(pid, fd, file) != ZE_RESULT_SUCCESS) {
|
||||
// Process closed this file. Not an error. Just ignore.
|
||||
continue;
|
||||
}
|
||||
if (pSysfsAccess->isMyDeviceFile(file)) {
|
||||
deviceFds.push_back(fd);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void LinuxGlobalOperationsImp::releaseSysmanDeviceResources() {
|
||||
pLinuxSysmanImp->getSysmanDeviceImp()->pEngineHandleContext->releaseEngines();
|
||||
pLinuxSysmanImp->getSysmanDeviceImp()->pRasHandleContext->releaseRasHandles();
|
||||
pLinuxSysmanImp->getSysmanDeviceImp()->pDiagnosticsHandleContext->releaseDiagnosticsHandles();
|
||||
pLinuxSysmanImp->getSysmanDeviceImp()->pFirmwareHandleContext->releaseFwHandles();
|
||||
pLinuxSysmanImp->releasePmtObject();
|
||||
pLinuxSysmanImp->releaseFwUtilInterface();
|
||||
pLinuxSysmanImp->releaseLocalDrmHandle();
|
||||
}
|
||||
|
||||
void LinuxGlobalOperationsImp::releaseDeviceResources() {
|
||||
releaseSysmanDeviceResources();
|
||||
auto device = static_cast<DeviceImp *>(getDevice());
|
||||
device->releaseResources();
|
||||
executionEnvironment->memoryManager->releaseDeviceSpecificMemResources(rootDeviceIndex);
|
||||
executionEnvironment->releaseRootDeviceEnvironmentResources(executionEnvironment->rootDeviceEnvironments[rootDeviceIndex].get());
|
||||
executionEnvironment->rootDeviceEnvironments[rootDeviceIndex].reset();
|
||||
}
|
||||
|
||||
void LinuxGlobalOperationsImp::reInitSysmanDeviceResources() {
|
||||
pLinuxSysmanImp->getSysmanDeviceImp()->updateSubDeviceHandlesLocally();
|
||||
pLinuxSysmanImp->createPmtHandles();
|
||||
pLinuxSysmanImp->getSysmanDeviceImp()->pRasHandleContext->init(pLinuxSysmanImp->getSysmanDeviceImp()->deviceHandles);
|
||||
pLinuxSysmanImp->getSysmanDeviceImp()->pEngineHandleContext->init();
|
||||
pLinuxSysmanImp->getSysmanDeviceImp()->pDiagnosticsHandleContext->init(pLinuxSysmanImp->getSysmanDeviceImp()->deviceHandles);
|
||||
pLinuxSysmanImp->getSysmanDeviceImp()->pFirmwareHandleContext->init();
|
||||
}
|
||||
|
||||
ze_result_t LinuxGlobalOperationsImp::initDevice() {
|
||||
ze_result_t result = ZE_RESULT_SUCCESS;
|
||||
auto device = static_cast<DeviceImp *>(getDevice());
|
||||
|
||||
auto neoDevice = NEO::DeviceFactory::createDevice(*executionEnvironment, devicePciBdf, rootDeviceIndex);
|
||||
if (neoDevice == nullptr) {
|
||||
return ZE_RESULT_ERROR_DEVICE_LOST;
|
||||
}
|
||||
static_cast<L0::DriverHandleImp *>(device->getDriverHandle())->updateRootDeviceBitFields(neoDevice);
|
||||
static_cast<L0::DriverHandleImp *>(device->getDriverHandle())->enableRootDeviceDebugger(neoDevice);
|
||||
Device::deviceReinit(device->getDriverHandle(), device, neoDevice, &result);
|
||||
reInitSysmanDeviceResources();
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
ze_result_t LinuxGlobalOperationsImp::reset(ze_bool_t force) {
|
||||
if (!pSysfsAccess->isRootUser()) {
|
||||
return ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS;
|
||||
}
|
||||
std::string resetPath;
|
||||
std::string resetName;
|
||||
ze_result_t result = ZE_RESULT_SUCCESS;
|
||||
|
||||
pSysfsAccess->getRealPath(functionLevelReset, resetPath);
|
||||
// Must run as root. Verify permission to perform reset.
|
||||
result = pFsAccess->canWrite(resetPath);
|
||||
if (ZE_RESULT_SUCCESS != result) {
|
||||
return result;
|
||||
}
|
||||
pSysfsAccess->getRealPath(deviceDir, resetName);
|
||||
resetName = pFsAccess->getBaseName(resetName);
|
||||
|
||||
::pid_t myPid = pProcfsAccess->myProcessId();
|
||||
std::vector<int> myPidFds;
|
||||
std::vector<::pid_t> processes;
|
||||
@@ -188,7 +119,7 @@ ze_result_t LinuxGlobalOperationsImp::reset(ze_bool_t force) {
|
||||
}
|
||||
for (auto &&pid : processes) {
|
||||
std::vector<int> fds;
|
||||
getPidFdsForOpenDevice(pProcfsAccess, pSysfsAccess, pid, fds);
|
||||
pLinuxSysmanImp->getPidFdsForOpenDevice(pProcfsAccess, pSysfsAccess, pid, fds);
|
||||
if (pid == myPid) {
|
||||
// L0 is expected to have this file open.
|
||||
// Keep list of fds. Close before unbind.
|
||||
@@ -204,8 +135,28 @@ ze_result_t LinuxGlobalOperationsImp::reset(ze_bool_t force) {
|
||||
}
|
||||
}
|
||||
|
||||
pSysfsAccess->getRealPath(deviceDir, resetName);
|
||||
resetName = pFsAccess->getBaseName(resetName);
|
||||
|
||||
ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES};
|
||||
pDevice->getProperties(&deviceProperties);
|
||||
if (!(deviceProperties.flags & ZE_DEVICE_PROPERTY_FLAG_INTEGRATED)) {
|
||||
result = pSysfsAccess->unbindDevice(resetName);
|
||||
if (ZE_RESULT_SUCCESS != result) {
|
||||
return result;
|
||||
}
|
||||
return pLinuxSysmanImp->osWarmReset();
|
||||
}
|
||||
|
||||
pSysfsAccess->getRealPath(functionLevelReset, resetPath);
|
||||
// Must run as root. Verify permission to perform reset.
|
||||
result = pFsAccess->canWrite(resetPath);
|
||||
if (ZE_RESULT_SUCCESS != result) {
|
||||
return result;
|
||||
}
|
||||
|
||||
ExecutionEnvironmentRefCountRestore restorer(executionEnvironment);
|
||||
releaseDeviceResources();
|
||||
pLinuxSysmanImp->releaseDeviceResources();
|
||||
for (auto &&fd : myPidFds) {
|
||||
// Close open filedescriptors to the device
|
||||
// before unbinding device.
|
||||
@@ -232,7 +183,7 @@ ze_result_t LinuxGlobalOperationsImp::reset(ze_bool_t force) {
|
||||
deviceUsingPids.clear();
|
||||
for (auto &&pid : processes) {
|
||||
std::vector<int> fds;
|
||||
getPidFdsForOpenDevice(pProcfsAccess, pSysfsAccess, pid, fds);
|
||||
pLinuxSysmanImp->getPidFdsForOpenDevice(pProcfsAccess, pSysfsAccess, pid, fds);
|
||||
if (!fds.empty()) {
|
||||
|
||||
// Kill all processes that have the device open.
|
||||
@@ -270,7 +221,7 @@ ze_result_t LinuxGlobalOperationsImp::reset(ze_bool_t force) {
|
||||
return result;
|
||||
}
|
||||
|
||||
return initDevice();
|
||||
return pLinuxSysmanImp->initDevice();
|
||||
}
|
||||
|
||||
// Processes in the form of clients are present in sysfs like this:
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2020-2021 Intel Corporation
|
||||
* Copyright (C) 2020-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -57,17 +57,4 @@ class LinuxGlobalOperationsImp : public OsGlobalOperations, NEO::NonCopyableOrMo
|
||||
uint32_t rootDeviceIndex = 0u;
|
||||
};
|
||||
|
||||
class ExecutionEnvironmentRefCountRestore {
|
||||
public:
|
||||
ExecutionEnvironmentRefCountRestore() = delete;
|
||||
ExecutionEnvironmentRefCountRestore(NEO::ExecutionEnvironment *executionEnvironmentRecevied) {
|
||||
executionEnvironment = executionEnvironmentRecevied;
|
||||
executionEnvironment->incRefInternal();
|
||||
}
|
||||
~ExecutionEnvironmentRefCountRestore() {
|
||||
executionEnvironment->decRefInternal();
|
||||
}
|
||||
NEO::ExecutionEnvironment *executionEnvironment = nullptr;
|
||||
};
|
||||
|
||||
} // namespace L0
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2020-2021 Intel Corporation
|
||||
* Copyright (C) 2020-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -7,12 +7,19 @@
|
||||
|
||||
#include "level_zero/tools/source/sysman/linux/os_sysman_imp.h"
|
||||
|
||||
#include "shared/source/os_interface/device_factory.h"
|
||||
|
||||
#include "level_zero/core/source/device/device_imp.h"
|
||||
#include "level_zero/tools/source/sysman/linux/fs_access.h"
|
||||
|
||||
#include "sysman/linux/firmware_util/firmware_util.h"
|
||||
|
||||
#include <linux/pci_regs.h>
|
||||
|
||||
namespace L0 {
|
||||
|
||||
const std::string LinuxSysmanImp::deviceDir("device");
|
||||
|
||||
ze_result_t LinuxSysmanImp::init() {
|
||||
pFsAccess = FsAccess::create();
|
||||
DEBUG_BREAK_IF(nullptr == pFsAccess);
|
||||
@@ -145,6 +152,39 @@ std::string LinuxSysmanImp::getPciRootPortDirectoryPath(std::string realPciPath)
|
||||
return realPciPath;
|
||||
}
|
||||
|
||||
static std::string modifyPathOnLevel(std::string path, uint8_t level) {
|
||||
size_t loc = 0;
|
||||
size_t count = 0;
|
||||
std::string modifiedPath(path);
|
||||
uint8_t nLevel = level;
|
||||
do {
|
||||
loc = path.find_first_of('/');
|
||||
count = count + loc;
|
||||
if (loc == std::string::npos) {
|
||||
break;
|
||||
}
|
||||
path = path.substr(loc + 1, path.size());
|
||||
nLevel--;
|
||||
} while (nLevel > 0);
|
||||
if (nLevel == 0) {
|
||||
modifiedPath = modifiedPath.substr(0, (count + level - 1)); // need to adjust for last '/' that the code encounters
|
||||
}
|
||||
return modifiedPath;
|
||||
}
|
||||
std::string LinuxSysmanImp::getPciRootPortDirectoryPathForReset(std::string realPciPath) {
|
||||
// the rootport is always the first pci folder after the pcie slot.
|
||||
// /sys/devices/pci0000:89/0000:89:02.0/0000:8a:00.0/0000:8b:01.0/0000:8c:00.0
|
||||
// '/sys/devices/pci0000:89/0000:89:02.0/' will always be the same distance.
|
||||
return modifyPathOnLevel(realPciPath, 5);
|
||||
}
|
||||
|
||||
std::string LinuxSysmanImp::getPciCardBusDirectoryPath(std::string realPciPath) {
|
||||
// the cardbus is always the second pci folder after the pcie slot.
|
||||
// /sys/devices/pci0000:89/0000:89:02.0/0000:8a:00.0/0000:8b:01.0/0000:8c:00.0
|
||||
// '/sys/devices/pci0000:89/0000:89:02.0/0000:8a:00.0/' will always be the same distance.
|
||||
return modifyPathOnLevel(realPciPath, 6);
|
||||
}
|
||||
|
||||
PlatformMonitoringTech *LinuxSysmanImp::getPlatformMonitoringTechAccess(uint32_t subDeviceId) {
|
||||
auto subDeviceIdToPmtEntry = mapOfSubDeviceIdToPmtObject.find(subDeviceId);
|
||||
if (subDeviceIdToPmtEntry == mapOfSubDeviceIdToPmtObject.end()) {
|
||||
@@ -194,6 +234,187 @@ LinuxSysmanImp::~LinuxSysmanImp() {
|
||||
releasePmtObject();
|
||||
}
|
||||
|
||||
void LinuxSysmanImp::getPidFdsForOpenDevice(ProcfsAccess *pProcfsAccess, SysfsAccess *pSysfsAccess, const ::pid_t pid, std::vector<int> &deviceFds) {
|
||||
// Return a list of all the file descriptors of this process that point to this device
|
||||
std::vector<int> fds;
|
||||
deviceFds.clear();
|
||||
if (ZE_RESULT_SUCCESS != pProcfsAccess->getFileDescriptors(pid, fds)) {
|
||||
// Process exited. Not an error. Just ignore.
|
||||
return;
|
||||
}
|
||||
for (auto &&fd : fds) {
|
||||
std::string file;
|
||||
if (pProcfsAccess->getFileName(pid, fd, file) != ZE_RESULT_SUCCESS) {
|
||||
// Process closed this file. Not an error. Just ignore.
|
||||
continue;
|
||||
}
|
||||
if (pSysfsAccess->isMyDeviceFile(file)) {
|
||||
deviceFds.push_back(fd);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void LinuxSysmanImp::releaseSysmanDeviceResources() {
|
||||
getSysmanDeviceImp()->pEngineHandleContext->releaseEngines();
|
||||
getSysmanDeviceImp()->pRasHandleContext->releaseRasHandles();
|
||||
if (!diagnosticsReset) {
|
||||
getSysmanDeviceImp()->pDiagnosticsHandleContext->releaseDiagnosticsHandles();
|
||||
}
|
||||
getSysmanDeviceImp()->pFirmwareHandleContext->releaseFwHandles();
|
||||
releasePmtObject();
|
||||
if (!diagnosticsReset) {
|
||||
releaseFwUtilInterface();
|
||||
}
|
||||
releaseLocalDrmHandle();
|
||||
}
|
||||
|
||||
void LinuxSysmanImp::releaseDeviceResources() {
|
||||
releaseSysmanDeviceResources();
|
||||
auto device = static_cast<DeviceImp *>(getDeviceHandle());
|
||||
executionEnvironment = device->getNEODevice()->getExecutionEnvironment();
|
||||
device->releaseResources();
|
||||
executionEnvironment->memoryManager->releaseDeviceSpecificMemResources(rootDeviceIndex);
|
||||
executionEnvironment->releaseRootDeviceEnvironmentResources(executionEnvironment->rootDeviceEnvironments[rootDeviceIndex].get());
|
||||
executionEnvironment->rootDeviceEnvironments[rootDeviceIndex].reset();
|
||||
}
|
||||
|
||||
void LinuxSysmanImp::reInitSysmanDeviceResources() {
|
||||
getSysmanDeviceImp()->updateSubDeviceHandlesLocally();
|
||||
createPmtHandles();
|
||||
createFwUtilInterface();
|
||||
getSysmanDeviceImp()->pRasHandleContext->init(getSysmanDeviceImp()->deviceHandles);
|
||||
getSysmanDeviceImp()->pEngineHandleContext->init();
|
||||
if (!diagnosticsReset) {
|
||||
getSysmanDeviceImp()->pDiagnosticsHandleContext->init(getSysmanDeviceImp()->deviceHandles);
|
||||
}
|
||||
getSysmanDeviceImp()->pFirmwareHandleContext->init();
|
||||
}
|
||||
|
||||
ze_result_t LinuxSysmanImp::initDevice() {
|
||||
ze_result_t result = ZE_RESULT_SUCCESS;
|
||||
auto device = static_cast<DeviceImp *>(getDeviceHandle());
|
||||
|
||||
auto neoDevice = NEO::DeviceFactory::createDevice(*executionEnvironment, devicePciBdf, rootDeviceIndex);
|
||||
if (neoDevice == nullptr) {
|
||||
return ZE_RESULT_ERROR_DEVICE_LOST;
|
||||
}
|
||||
static_cast<L0::DriverHandleImp *>(device->getDriverHandle())->updateRootDeviceBitFields(neoDevice);
|
||||
static_cast<L0::DriverHandleImp *>(device->getDriverHandle())->enableRootDeviceDebugger(neoDevice);
|
||||
Device::deviceReinit(device->getDriverHandle(), device, neoDevice, &result);
|
||||
reInitSysmanDeviceResources();
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
// A 'warm reset' is a conventional reset that is triggered across a PCI express link.
|
||||
// A warm reset is triggered either when a link is forced into electrical idle or
|
||||
// by sending TS1 and TS2 ordered sets with the hot reset bit set.
|
||||
// Software can initiate a warm reset by setting and then clearing the secondary bus reset bit
|
||||
// in the bridge control register in the PCI configuration space of the bridge port upstream of the device.
|
||||
ze_result_t LinuxSysmanImp::osWarmReset() {
|
||||
std::string rootPortPath;
|
||||
std::string realRootPath;
|
||||
ze_result_t result = pSysfsAccess->getRealPath(deviceDir, realRootPath);
|
||||
if (ZE_RESULT_SUCCESS != result) {
|
||||
return result;
|
||||
}
|
||||
auto device = static_cast<DeviceImp *>(pDevice);
|
||||
executionEnvironment = device->getNEODevice()->getExecutionEnvironment();
|
||||
devicePciBdf = device->getNEODevice()->getRootDeviceEnvironment().osInterface->getDriverModel()->as<NEO::Drm>()->getPciPath();
|
||||
rootDeviceIndex = device->getNEODevice()->getRootDeviceIndex();
|
||||
|
||||
ExecutionEnvironmentRefCountRestore restorer(executionEnvironment);
|
||||
releaseDeviceResources();
|
||||
|
||||
rootPortPath = getPciRootPortDirectoryPathForReset(realRootPath);
|
||||
|
||||
int fd, ret = 0;
|
||||
unsigned int offset = PCI_BRIDGE_CONTROL; // Bridge control offset in Header of PCI config space
|
||||
unsigned int value = 0x00;
|
||||
unsigned int resetValue = 0x00;
|
||||
std::string configFilePath = rootPortPath + '/' + "config";
|
||||
fd = this->openFunction(configFilePath.c_str(), O_RDWR);
|
||||
if (fd < 0) {
|
||||
return ZE_RESULT_ERROR_UNKNOWN;
|
||||
}
|
||||
this->preadFunction(fd, &value, 0x01, offset);
|
||||
resetValue = value | PCI_BRIDGE_CTL_BUS_RESET;
|
||||
this->pwriteFunction(fd, &resetValue, 0x01, offset);
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(100)); // Sleep for 100 milliseconds just to make sure the change is propagated.
|
||||
this->pwriteFunction(fd, &value, 0x01, offset);
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(500)); // Sleep for 500 milliseconds
|
||||
ret = this->closeFunction(fd);
|
||||
if (ret < 0) {
|
||||
return ZE_RESULT_ERROR_UNKNOWN;
|
||||
}
|
||||
|
||||
std::string cardBusPath;
|
||||
cardBusPath = getPciCardBusDirectoryPath(realRootPath);
|
||||
// write 1 to remove
|
||||
result = pFsAccess->write(cardBusPath + '/' + "remove", "1");
|
||||
if (ZE_RESULT_SUCCESS != result) {
|
||||
return result;
|
||||
}
|
||||
|
||||
result = pFsAccess->write(realRootPath + '/' + "rescan", "1");
|
||||
if (ZE_RESULT_SUCCESS != result) {
|
||||
return result;
|
||||
}
|
||||
|
||||
return initDevice();
|
||||
}
|
||||
|
||||
std::string LinuxSysmanImp::getAddressFromPath(std::string &rootPortPath) {
|
||||
size_t loc;
|
||||
loc = rootPortPath.find_last_of('/'); // we get the pci address of the root port from rootPortPath
|
||||
return rootPortPath.substr(loc + 1, std::string::npos);
|
||||
}
|
||||
|
||||
ze_result_t LinuxSysmanImp::osColdReset() {
|
||||
const std::string slotPath("/sys/bus/pci/slots/"); // holds the directories matching to the number of slots in the PC
|
||||
std::string cardBusPath; // will hold the PCIe Root port directory path (the address of the PCIe slot).
|
||||
std::string realRootPath; // will hold the absolute real path (not symlink) to the selected Device
|
||||
ze_result_t result = pSysfsAccess->getRealPath(deviceDir, realRootPath); // e.g realRootPath=/sys/devices/pci0000:89/0000:89:02.0/0000:8a:00.0/0000:8b:01.0/0000:8c:00.0
|
||||
if (ZE_RESULT_SUCCESS != result) {
|
||||
return result;
|
||||
}
|
||||
auto device = static_cast<DeviceImp *>(pDevice);
|
||||
executionEnvironment = device->getNEODevice()->getExecutionEnvironment();
|
||||
devicePciBdf = device->getNEODevice()->getRootDeviceEnvironment().osInterface->getDriverModel()->as<NEO::Drm>()->getPciPath();
|
||||
rootDeviceIndex = device->getNEODevice()->getRootDeviceIndex();
|
||||
|
||||
ExecutionEnvironmentRefCountRestore restorer(executionEnvironment);
|
||||
releaseDeviceResources();
|
||||
|
||||
cardBusPath = getPciCardBusDirectoryPath(realRootPath); // e.g cardBusPath=/sys/devices/pci0000:89/0000:89:02.0/
|
||||
std::string rootAddress = getAddressFromPath(cardBusPath); // e.g rootAddress = 0000:8a:00.0
|
||||
|
||||
std::vector<std::string> dir;
|
||||
result = pFsAccess->listDirectory(slotPath, dir); // get list of slot directories from /sys/bus/pci/slots/
|
||||
if (ZE_RESULT_SUCCESS != result) {
|
||||
return result;
|
||||
}
|
||||
for (auto &slot : dir) {
|
||||
std::string slotAddress;
|
||||
result = pFsAccess->read((slotPath + slot + "/address"), slotAddress); // extract slot address from the slot directory /sys/bus/pci/slots/<slot num>/address
|
||||
if (ZE_RESULT_SUCCESS != result) {
|
||||
return result;
|
||||
}
|
||||
if (slotAddress.compare(rootAddress) == 0) { // compare slot address to root port address
|
||||
result = pFsAccess->write((slotPath + slot + "/power"), "0"); // turn off power
|
||||
if (ZE_RESULT_SUCCESS != result) {
|
||||
return result;
|
||||
}
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(100)); // Sleep for 100 milliseconds just to make sure, 1 ms is defined as part of spec
|
||||
result = pFsAccess->write((slotPath + slot + "/power"), "1"); // turn on power
|
||||
if (ZE_RESULT_SUCCESS != result) {
|
||||
return result;
|
||||
}
|
||||
return initDevice();
|
||||
}
|
||||
}
|
||||
return ZE_RESULT_ERROR_DEVICE_LOST; // incase the reset fails inform upper layers.
|
||||
}
|
||||
|
||||
OsSysman *OsSysman::create(SysmanDeviceImp *pParentSysmanDeviceImp) {
|
||||
LinuxSysmanImp *pLinuxSysmanImp = new LinuxSysmanImp(pParentSysmanDeviceImp);
|
||||
return static_cast<OsSysman *>(pLinuxSysmanImp);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2020-2021 Intel Corporation
|
||||
* Copyright (C) 2020-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -39,12 +39,30 @@ class LinuxSysmanImp : public OsSysman, NEO::NonCopyableOrMovableClass {
|
||||
Device *getDeviceHandle();
|
||||
SysmanDeviceImp *getSysmanDeviceImp();
|
||||
std::string getPciRootPortDirectoryPath(std::string realPciPath);
|
||||
std::string getPciRootPortDirectoryPathForReset(std::string realPciPath);
|
||||
std::string getPciCardBusDirectoryPath(std::string realPciPath);
|
||||
void releasePmtObject();
|
||||
ze_result_t createPmtHandles();
|
||||
void createFwUtilInterface();
|
||||
void releaseFwUtilInterface();
|
||||
void releaseLocalDrmHandle();
|
||||
PRODUCT_FAMILY getProductFamily();
|
||||
void releaseSysmanDeviceResources();
|
||||
void releaseDeviceResources();
|
||||
ze_result_t initDevice();
|
||||
void reInitSysmanDeviceResources();
|
||||
void getPidFdsForOpenDevice(ProcfsAccess *, SysfsAccess *, const ::pid_t, std::vector<int> &);
|
||||
ze_result_t osWarmReset();
|
||||
ze_result_t osColdReset();
|
||||
std::string getAddressFromPath(std::string &rootPortPath);
|
||||
decltype(&NEO::SysCalls::open) openFunction = NEO::SysCalls::open;
|
||||
decltype(&NEO::SysCalls::close) closeFunction = NEO::SysCalls::close;
|
||||
decltype(&NEO::SysCalls::pread) preadFunction = NEO::SysCalls::pread;
|
||||
decltype(&NEO::SysCalls::pwrite) pwriteFunction = NEO::SysCalls::pwrite;
|
||||
std::string devicePciBdf = "";
|
||||
uint32_t rootDeviceIndex = 0u;
|
||||
NEO::ExecutionEnvironment *executionEnvironment = nullptr;
|
||||
bool diagnosticsReset = false;
|
||||
|
||||
protected:
|
||||
FsAccess *pFsAccess = nullptr;
|
||||
@@ -60,6 +78,18 @@ class LinuxSysmanImp : public OsSysman, NEO::NonCopyableOrMovableClass {
|
||||
private:
|
||||
LinuxSysmanImp() = delete;
|
||||
SysmanDeviceImp *pParentSysmanDeviceImp = nullptr;
|
||||
static const std::string deviceDir;
|
||||
};
|
||||
class ExecutionEnvironmentRefCountRestore {
|
||||
public:
|
||||
ExecutionEnvironmentRefCountRestore() = delete;
|
||||
ExecutionEnvironmentRefCountRestore(NEO::ExecutionEnvironment *executionEnvironmentRecevied) {
|
||||
executionEnvironment = executionEnvironmentRecevied;
|
||||
executionEnvironment->incRefInternal();
|
||||
}
|
||||
~ExecutionEnvironmentRefCountRestore() {
|
||||
executionEnvironment->decRefInternal();
|
||||
}
|
||||
NEO::ExecutionEnvironment *executionEnvironment = nullptr;
|
||||
};
|
||||
|
||||
} // namespace L0
|
||||
|
||||
Reference in New Issue
Block a user