mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-03 14:55:24 +08:00
Fix zesDeviceReset for Spec version 1.
This patch does the following: - Fixes a bug in FsAccess::listDirectory that could return ZE_RESULT_UNKNOWN_ERROR when no error has occurred. - Fixes a bug in zesDeviceReset that would reset the device if force was set to false, even if the device was in use. - Fixes a bug in zesDeviceReset that would reset the device if force was set to false without closing the file descriptor. - Added a releaseResources method method to Device object. This method does the same thing as the DeviceImp destructor except it does not free the DeviceImp object and it does not free the SysmanDeviceImp object. - Added the releaseResources methods to Mock<Device> object. - Moved the reset of the debugger out of DriverHandleImp destructor and into DeviceImp releaseResources. - Added a releaseEngine method to the EngineHandleContext. This method frees all the Engine handles. - On reset, I call the Devcie->releaseResources and EngineHandleContext->releaseEngines before resetting the device. - Added a -r (--reset) option to zello_sysman so I could easily test resets. With these patches, the L0 Sysman CTS for zesDeviceReset both pass. Change-Id: I31fad1b27bc5cc6befe31cd6f9319748e2683424
This commit is contained in:
@@ -19,10 +19,7 @@ EngineHandleContext::EngineHandleContext(OsSysman *pOsSysman) {
|
||||
}
|
||||
|
||||
EngineHandleContext::~EngineHandleContext() {
|
||||
for (Engine *pEngine : handleList) {
|
||||
delete pEngine;
|
||||
}
|
||||
handleList.clear();
|
||||
releaseEngines();
|
||||
}
|
||||
|
||||
void EngineHandleContext::createHandle(zes_engine_group_t engineType, uint32_t engineInstance) {
|
||||
@@ -41,6 +38,13 @@ void EngineHandleContext::init() {
|
||||
}
|
||||
}
|
||||
|
||||
void EngineHandleContext::releaseEngines() {
|
||||
for (Engine *pEngine : handleList) {
|
||||
delete pEngine;
|
||||
}
|
||||
handleList.clear();
|
||||
}
|
||||
|
||||
ze_result_t EngineHandleContext::engineGet(uint32_t *pCount, zes_engine_handle_t *phEngine) {
|
||||
uint32_t handleListSize = static_cast<uint32_t>(handleList.size());
|
||||
uint32_t numToCopy = std::min(*pCount, handleListSize);
|
||||
|
||||
@@ -35,6 +35,7 @@ struct EngineHandleContext {
|
||||
~EngineHandleContext();
|
||||
|
||||
void init();
|
||||
void releaseEngines();
|
||||
|
||||
ze_result_t engineGet(uint32_t *pCount, zes_engine_handle_t *phEngine);
|
||||
|
||||
|
||||
@@ -7,13 +7,15 @@
|
||||
|
||||
#include "level_zero/tools/source/sysman/global_operations/linux/os_global_operations_imp.h"
|
||||
|
||||
#include "level_zero/core/source/device/device.h"
|
||||
#include "level_zero/core/source/device/device_imp.h"
|
||||
#include "level_zero/tools/source/sysman/global_operations/global_operations_imp.h"
|
||||
#include "level_zero/tools/source/sysman/linux/fs_access.h"
|
||||
#include "level_zero/tools/source/sysman/sysman_const.h"
|
||||
#include <level_zero/zet_api.h>
|
||||
|
||||
#include <chrono>
|
||||
#include <csignal>
|
||||
#include <time.h>
|
||||
|
||||
namespace L0 {
|
||||
|
||||
@@ -149,21 +151,21 @@ ze_result_t LinuxGlobalOperationsImp::reset(ze_bool_t force) {
|
||||
std::vector<int> myPidFds;
|
||||
std::vector<::pid_t> processes;
|
||||
|
||||
if (!force) {
|
||||
// If not force, don't reset if any process
|
||||
// has this device open.
|
||||
result = pProcfsAccess->listProcesses(processes);
|
||||
if (ZE_RESULT_SUCCESS != result) {
|
||||
return result;
|
||||
}
|
||||
for (auto &&pid : processes) {
|
||||
std::vector<int> fds;
|
||||
getPidFdsForOpenDevice(pProcfsAccess, pSysfsAccess, pid, fds);
|
||||
if (pid == myPid) {
|
||||
// L0 is expected to have this file open.
|
||||
// Keep list of fds. Close before unbind.
|
||||
myPidFds = fds;
|
||||
} else if (!fds.empty()) {
|
||||
result = pProcfsAccess->listProcesses(processes);
|
||||
if (ZE_RESULT_SUCCESS != result) {
|
||||
return result;
|
||||
}
|
||||
for (auto &&pid : processes) {
|
||||
std::vector<int> fds;
|
||||
getPidFdsForOpenDevice(pProcfsAccess, pSysfsAccess, pid, fds);
|
||||
if (pid == myPid) {
|
||||
// L0 is expected to have this file open.
|
||||
// Keep list of fds. Close before unbind.
|
||||
myPidFds = fds;
|
||||
} else if (!fds.empty()) {
|
||||
if (force) {
|
||||
::kill(pid, SIGKILL);
|
||||
} else {
|
||||
// Device is in use by another process.
|
||||
// Don't reset while in use.
|
||||
return ZE_RESULT_ERROR_HANDLE_OBJECT_IN_USE;
|
||||
@@ -171,6 +173,8 @@ ze_result_t LinuxGlobalOperationsImp::reset(ze_bool_t force) {
|
||||
}
|
||||
}
|
||||
|
||||
pLinuxSysmanImp->getSysmanDeviceImp()->pEngineHandleContext->releaseEngines();
|
||||
static_cast<DeviceImp *>(getDevice())->releaseResources();
|
||||
for (auto &&fd : myPidFds) {
|
||||
// Close open filedescriptors to the device
|
||||
// before unbinding device.
|
||||
@@ -187,19 +191,38 @@ ze_result_t LinuxGlobalOperationsImp::reset(ze_bool_t force) {
|
||||
return result;
|
||||
}
|
||||
|
||||
// If force is set (or someone opened the device
|
||||
// after we checkd) there could be processes
|
||||
// that have the device open. Kill them here.
|
||||
// If someone opened the device
|
||||
// after we check, kill them here.
|
||||
result = pProcfsAccess->listProcesses(processes);
|
||||
if (ZE_RESULT_SUCCESS != result) {
|
||||
return result;
|
||||
}
|
||||
std::vector<::pid_t> deviceUsingPids;
|
||||
deviceUsingPids.clear();
|
||||
for (auto &&pid : processes) {
|
||||
std::vector<int> fds;
|
||||
getPidFdsForOpenDevice(pProcfsAccess, pSysfsAccess, pid, fds);
|
||||
if (!fds.empty()) {
|
||||
|
||||
// Kill all processes that have the device open.
|
||||
::kill(pid, SIGKILL);
|
||||
deviceUsingPids.push_back(pid);
|
||||
}
|
||||
}
|
||||
|
||||
// Wait for all the processes to exit
|
||||
// If they don't all exit within 10
|
||||
// seconds, just fail reset.
|
||||
auto start = std::chrono::steady_clock::now();
|
||||
for (auto &&pid : deviceUsingPids) {
|
||||
while (pProcfsAccess->isAlive(pid)) {
|
||||
auto end = std::chrono::steady_clock::now();
|
||||
if (std::chrono::duration_cast<std::chrono::seconds>(end - start).count() >= LinuxGlobalOperationsImp::resetTimeout) {
|
||||
return ZE_RESULT_ERROR_HANDLE_OBJECT_IN_USE;
|
||||
}
|
||||
|
||||
struct ::timespec timeout = {.tv_sec = 0, .tv_nsec = 1000};
|
||||
::nanosleep(&timeout, NULL);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -35,6 +35,8 @@ class LinuxGlobalOperationsImp : public OsGlobalOperations, NEO::NonCopyableOrMo
|
||||
Device *pDevice = nullptr;
|
||||
|
||||
private:
|
||||
static const int resetTimeout = 10;
|
||||
|
||||
static const std::string deviceDir;
|
||||
static const std::string vendorFile;
|
||||
static const std::string deviceFile;
|
||||
|
||||
@@ -186,6 +186,13 @@ ze_result_t FsAccess::canWrite(const std::string file) {
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
ze_result_t FsAccess::exists(const std::string file) {
|
||||
if (access(file.c_str(), F_OK)) {
|
||||
return ZE_RESULT_ERROR_NOT_AVAILABLE;
|
||||
}
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
ze_result_t FsAccess::getFileMode(const std::string file, ::mode_t &mode) {
|
||||
struct stat sb;
|
||||
if (0 != stat(file.c_str(), &sb)) {
|
||||
@@ -225,19 +232,25 @@ ze_result_t FsAccess::listDirectory(const std::string path, std::vector<std::str
|
||||
return getResult(errno);
|
||||
}
|
||||
struct ::dirent *ent;
|
||||
int err = 0;
|
||||
// readdir doesn't clear errno, so make sure it is clear
|
||||
errno = 0;
|
||||
while (NULL != (ent = ::readdir(procDir))) {
|
||||
// Ignore . and ..
|
||||
std::string name = std::string(ent->d_name);
|
||||
if (!name.compare(".") || !name.compare("..")) {
|
||||
errno = 0;
|
||||
continue;
|
||||
}
|
||||
list.push_back(std::string(ent->d_name));
|
||||
errno = 0;
|
||||
}
|
||||
err = errno;
|
||||
::closedir(procDir);
|
||||
// Check if in above while loop, readdir encountered any error.
|
||||
if ((errno != 0) && (errno != ENOENT)) {
|
||||
if ((err != 0) && (err != ENOENT)) {
|
||||
list.clear();
|
||||
return getResult(errno);
|
||||
return getResult(err);
|
||||
}
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
@@ -341,6 +354,10 @@ ze_result_t ProcfsAccess::getFileName(const ::pid_t pid, const int fd, std::stri
|
||||
return FsAccess::readSymLink(fullFdPath(pid, fd), val);
|
||||
}
|
||||
|
||||
ze_result_t ProcfsAccess::isAlive(const ::pid_t pid) {
|
||||
return FsAccess::exists(fullPath(pid));
|
||||
}
|
||||
|
||||
::pid_t ProcfsAccess::myProcessId() {
|
||||
return ::getpid();
|
||||
}
|
||||
|
||||
@@ -30,6 +30,7 @@ class FsAccess {
|
||||
|
||||
virtual ze_result_t canRead(const std::string file);
|
||||
virtual ze_result_t canWrite(const std::string file);
|
||||
ze_result_t exists(const std::string file);
|
||||
virtual ze_result_t getFileMode(const std::string file, ::mode_t &mode);
|
||||
|
||||
virtual ze_result_t read(const std::string file, uint64_t &val);
|
||||
@@ -61,6 +62,7 @@ class ProcfsAccess : private FsAccess {
|
||||
::pid_t myProcessId();
|
||||
ze_result_t getFileDescriptors(const ::pid_t pid, std::vector<int> &list);
|
||||
ze_result_t getFileName(const ::pid_t pid, const int fd, std::string &val);
|
||||
ze_result_t isAlive(const ::pid_t pid);
|
||||
|
||||
private:
|
||||
ProcfsAccess() = default;
|
||||
|
||||
@@ -75,6 +75,10 @@ Device *LinuxSysmanImp::getDeviceHandle() {
|
||||
return pDevice;
|
||||
}
|
||||
|
||||
SysmanDeviceImp *LinuxSysmanImp::getSysmanDeviceImp() {
|
||||
return pParentSysmanDeviceImp;
|
||||
}
|
||||
|
||||
PlatformMonitoringTech &LinuxSysmanImp::getPlatformMonitoringTechAccess() {
|
||||
UNRECOVERABLE_IF(nullptr == pPmt);
|
||||
return *pPmt;
|
||||
|
||||
@@ -35,6 +35,7 @@ class LinuxSysmanImp : public OsSysman, NEO::NonCopyableOrMovableClass {
|
||||
NEO::Drm &getDrm();
|
||||
PlatformMonitoringTech &getPlatformMonitoringTechAccess();
|
||||
Device *getDeviceHandle();
|
||||
SysmanDeviceImp *getSysmanDeviceImp();
|
||||
|
||||
protected:
|
||||
XmlParser *pXmlParser = nullptr;
|
||||
|
||||
Reference in New Issue
Block a user