mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-03 06:49:52 +08:00
feature: return error when file handles are exhaused in sysman engine
Related-To: NEO-10513 Signed-off-by: Joshua Santosh Ranjan <joshua.santosh.ranjan@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
a22cefdaed
commit
ba5c6f32b3
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2020-2023 Intel Corporation
|
||||
* Copyright (C) 2020-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -25,7 +25,9 @@ EngineHandleContext::~EngineHandleContext() {
|
||||
|
||||
void EngineHandleContext::createHandle(zes_engine_group_t engineType, uint32_t engineInstance, uint32_t subDeviceId, ze_bool_t onSubdevice) {
|
||||
std::unique_ptr<Engine> pEngine = std::make_unique<EngineImp>(pOsSysman, engineType, engineInstance, subDeviceId, onSubdevice);
|
||||
if (pEngine->initSuccess == true) {
|
||||
// Only store error for all engines in device incase of dependencies unavailable.
|
||||
deviceEngineInitStatus = pEngine->initStatus != ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE ? deviceEngineInitStatus : pEngine->initStatus;
|
||||
if (pEngine->initStatus == ZE_RESULT_SUCCESS) {
|
||||
handleList.push_back(std::move(pEngine));
|
||||
}
|
||||
}
|
||||
@@ -54,6 +56,11 @@ ze_result_t EngineHandleContext::engineGet(uint32_t *pCount, zes_engine_handle_t
|
||||
this->init(pOsSysman->getDeviceHandles());
|
||||
this->engineInitDone = true;
|
||||
});
|
||||
|
||||
if (deviceEngineInitStatus != ZE_RESULT_SUCCESS) {
|
||||
return deviceEngineInitStatus;
|
||||
}
|
||||
|
||||
uint32_t handleListSize = static_cast<uint32_t>(handleList.size());
|
||||
uint32_t numToCopy = std::min(*pCount, handleListSize);
|
||||
if (0 == *pCount || *pCount > handleListSize) {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2020-2023 Intel Corporation
|
||||
* Copyright (C) 2020-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -27,7 +27,7 @@ class Engine : _zes_engine_handle_t {
|
||||
return static_cast<Engine *>(handle);
|
||||
}
|
||||
inline zes_engine_handle_t toHandle() { return this; }
|
||||
bool initSuccess = false;
|
||||
ze_result_t initStatus = ZE_RESULT_SUCCESS;
|
||||
};
|
||||
|
||||
struct EngineHandleContext {
|
||||
@@ -49,6 +49,7 @@ struct EngineHandleContext {
|
||||
void createHandle(zes_engine_group_t engineType, uint32_t engineInstance, uint32_t subDeviceId, ze_bool_t onSubdevice);
|
||||
std::once_flag initEngineOnce;
|
||||
bool engineInitDone = false;
|
||||
ze_result_t deviceEngineInitStatus = ZE_RESULT_SUCCESS;
|
||||
};
|
||||
|
||||
} // namespace L0
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2020-2023 Intel Corporation
|
||||
* Copyright (C) 2020-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -23,9 +23,9 @@ ze_result_t EngineImp::engineGetProperties(zes_engine_properties_t *pProperties)
|
||||
}
|
||||
|
||||
void EngineImp::init() {
|
||||
if (pOsEngine->isEngineModuleSupported()) {
|
||||
initStatus = pOsEngine->isEngineModuleSupported();
|
||||
if (initStatus == ZE_RESULT_SUCCESS) {
|
||||
pOsEngine->getProperties(engineProperties);
|
||||
this->initSuccess = true;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -51,9 +51,8 @@ ze_result_t OsEngine::getNumEngineTypeAndInstances(std::set<std::pair<zes_engine
|
||||
}
|
||||
|
||||
ze_result_t LinuxEngineImp::getActivity(zes_engine_stats_t *pStats) {
|
||||
if (fdList.size() == 0) {
|
||||
NEO::printDebugString(NEO::debugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): No Valid Fds returning error:0x%x \n", __FUNCTION__, ZE_RESULT_ERROR_UNSUPPORTED_FEATURE);
|
||||
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
|
||||
if (initStatus != ZE_RESULT_SUCCESS) {
|
||||
return initStatus;
|
||||
}
|
||||
uint64_t data[2] = {};
|
||||
auto ret = pPmuInterface->pmuRead(static_cast<int>(fdList[0].first), data, sizeof(data));
|
||||
@@ -74,6 +73,16 @@ ze_result_t LinuxEngineImp::getProperties(zes_engine_properties_t &properties) {
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
void LinuxEngineImp::checkErrorNumberAndUpdateStatus() {
|
||||
if (errno == -EMFILE || errno == -ENFILE) {
|
||||
NEO::printDebugString(NEO::debugManager.flags.PrintDebugMessages.get(), stderr, "Engine Handles could not be created because system has run out of file handles. Suggested action is to increase the file handle limit. \n");
|
||||
initStatus = ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE;
|
||||
} else {
|
||||
NEO::printDebugString(NEO::debugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s():No valid Filedescriptors: Engine Module is not supported \n", __FUNCTION__);
|
||||
initStatus = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
|
||||
}
|
||||
}
|
||||
|
||||
void LinuxEngineImp::init() {
|
||||
auto i915EngineClass = engineToI915Map.find(engineGroup);
|
||||
vfConfigs.clear();
|
||||
@@ -81,6 +90,8 @@ void LinuxEngineImp::init() {
|
||||
auto fd = pPmuInterface->pmuInterfaceOpen(I915_PMU_ENGINE_BUSY(i915EngineClass->second, engineInstance), -1, PERF_FORMAT_TOTAL_TIME_ENABLED);
|
||||
if (fd >= 0) {
|
||||
fdList.push_back(std::make_pair(fd, -1));
|
||||
} else {
|
||||
checkErrorNumberAndUpdateStatus();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -88,21 +99,21 @@ ze_result_t LinuxEngineImp::getActivityExt(uint32_t *pCount, zes_engine_stats_t
|
||||
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
|
||||
}
|
||||
|
||||
LinuxEngineImp::~LinuxEngineImp() {
|
||||
void LinuxEngineImp::cleanup() {
|
||||
for (auto &fdPair : fdList) {
|
||||
if (fdPair.first != -1) {
|
||||
if (fdPair.first >= 0) {
|
||||
close(static_cast<int>(fdPair.first));
|
||||
}
|
||||
}
|
||||
fdList.clear();
|
||||
}
|
||||
|
||||
bool LinuxEngineImp::isEngineModuleSupported() {
|
||||
if (fdList.size() == 0) {
|
||||
NEO::printDebugString(NEO::debugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s():No valid Filedescriptors: Engine Module is not supported \n", __FUNCTION__);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
LinuxEngineImp::~LinuxEngineImp() {
|
||||
cleanup();
|
||||
}
|
||||
|
||||
ze_result_t LinuxEngineImp::isEngineModuleSupported() {
|
||||
return initStatus;
|
||||
}
|
||||
|
||||
LinuxEngineImp::LinuxEngineImp(OsSysman *pOsSysman, zes_engine_group_t type, uint32_t engineInstance, uint32_t subDeviceId, ze_bool_t onSubDevice) : engineGroup(type), engineInstance(engineInstance), subDeviceId(subDeviceId), onSubDevice(onSubDevice) {
|
||||
@@ -111,6 +122,9 @@ LinuxEngineImp::LinuxEngineImp(OsSysman *pOsSysman, zes_engine_group_t type, uin
|
||||
pDevice = pLinuxSysmanImp->getDeviceHandle();
|
||||
pPmuInterface = pLinuxSysmanImp->getPmuInterface();
|
||||
init();
|
||||
if (initStatus != ZE_RESULT_SUCCESS) {
|
||||
cleanup();
|
||||
}
|
||||
}
|
||||
|
||||
std::unique_ptr<OsEngine> OsEngine::create(OsSysman *pOsSysman, zes_engine_group_t type, uint32_t engineInstance, uint32_t subDeviceId, ze_bool_t onSubDevice) {
|
||||
|
||||
@@ -23,10 +23,11 @@ class LinuxEngineImp : public OsEngine, NEO::NonCopyableOrMovableClass {
|
||||
ze_result_t getActivity(zes_engine_stats_t *pStats) override;
|
||||
ze_result_t getActivityExt(uint32_t *pCount, zes_engine_stats_t *pStats) override;
|
||||
ze_result_t getProperties(zes_engine_properties_t &properties) override;
|
||||
bool isEngineModuleSupported() override;
|
||||
ze_result_t isEngineModuleSupported() override;
|
||||
static zes_engine_group_t getGroupFromEngineType(zes_engine_group_t type);
|
||||
LinuxEngineImp() = default;
|
||||
LinuxEngineImp(OsSysman *pOsSysman, zes_engine_group_t type, uint32_t engineInstance, uint32_t subDeviceId, ze_bool_t onSubDevice);
|
||||
void cleanup();
|
||||
~LinuxEngineImp() override;
|
||||
|
||||
protected:
|
||||
@@ -39,11 +40,13 @@ class LinuxEngineImp : public OsEngine, NEO::NonCopyableOrMovableClass {
|
||||
ze_bool_t onSubDevice = false;
|
||||
uint32_t numberOfVfs = 0;
|
||||
SysfsAccess *pSysfsAccess = nullptr;
|
||||
void checkErrorNumberAndUpdateStatus();
|
||||
|
||||
private:
|
||||
void init();
|
||||
std::vector<std::pair<int64_t, int64_t>> fdList{};
|
||||
std::vector<std::pair<uint64_t, uint64_t>> vfConfigs{};
|
||||
ze_result_t initStatus = ZE_RESULT_SUCCESS;
|
||||
};
|
||||
|
||||
} // namespace L0
|
||||
|
||||
@@ -87,10 +87,10 @@ static ze_result_t readBusynessFromGroupFd(PmuInterface *pPmuInterface, std::pai
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
static void openPmuHandlesForVfs(uint32_t numberOfVfs,
|
||||
PmuInterface *pPmuInterface,
|
||||
std::vector<std::pair<uint64_t, uint64_t>> &vfConfigs,
|
||||
std::vector<std::pair<int64_t, int64_t>> &fdList) {
|
||||
static ze_result_t openPmuHandlesForVfs(uint32_t numberOfVfs,
|
||||
PmuInterface *pPmuInterface,
|
||||
std::vector<std::pair<uint64_t, uint64_t>> &vfConfigs,
|
||||
std::vector<std::pair<int64_t, int64_t>> &fdList) {
|
||||
// +1 to include PF
|
||||
for (uint64_t i = 0; i < numberOfVfs + 1; i++) {
|
||||
int64_t fd[2] = {-1, -1};
|
||||
@@ -100,33 +100,53 @@ static void openPmuHandlesForVfs(uint32_t numberOfVfs,
|
||||
if (fd[0] >= 0) {
|
||||
fd[1] = pPmuInterface->pmuInterfaceOpen(vfConfigs[i].second, static_cast<int>(fd[0]),
|
||||
PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_GROUP);
|
||||
if (fd[1] == -1) {
|
||||
if (fd[1] < 0) {
|
||||
NEO::printDebugString(NEO::debugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): Could not open Total Active Ticks PMU Handle \n", __FUNCTION__);
|
||||
close(static_cast<int>(fd[0]));
|
||||
fd[0] = -1;
|
||||
}
|
||||
}
|
||||
|
||||
if (fd[1] < 0) {
|
||||
if (errno == -EMFILE || errno == -ENFILE) {
|
||||
NEO::printDebugString(NEO::debugManager.flags.PrintDebugMessages.get(), stderr, "Engine Handles could not be created because system has run out of file handles. Suggested action is to increase the file handle limit. \n");
|
||||
return ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE;
|
||||
}
|
||||
}
|
||||
|
||||
fdList.push_back(std::make_pair(fd[0], fd[1]));
|
||||
}
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
ze_result_t LinuxEngineImp::getActivity(zes_engine_stats_t *pStats) {
|
||||
|
||||
if (initStatus != ZE_RESULT_SUCCESS) {
|
||||
// Handles are not expected to be created in case of init failure
|
||||
DEBUG_BREAK_IF(true);
|
||||
}
|
||||
|
||||
// read from global busyness fd
|
||||
return readBusynessFromGroupFd(pPmuInterface, fdList[0], pStats);
|
||||
}
|
||||
|
||||
LinuxEngineImp::~LinuxEngineImp() {
|
||||
|
||||
void LinuxEngineImp::cleanup() {
|
||||
for (auto &fdPair : fdList) {
|
||||
if (fdPair.first != -1) {
|
||||
if (fdPair.first >= 0) {
|
||||
close(static_cast<int>(fdPair.first));
|
||||
}
|
||||
if (fdPair.second != -1) {
|
||||
if (fdPair.second >= 0) {
|
||||
close(static_cast<int>(fdPair.second));
|
||||
}
|
||||
}
|
||||
fdList.clear();
|
||||
vfConfigs.clear();
|
||||
numberOfVfs = 0;
|
||||
}
|
||||
|
||||
LinuxEngineImp::~LinuxEngineImp() {
|
||||
cleanup();
|
||||
}
|
||||
|
||||
ze_result_t LinuxEngineImp::getActivityExt(uint32_t *pCount, zes_engine_stats_t *pStats) {
|
||||
@@ -148,8 +168,22 @@ ze_result_t LinuxEngineImp::getActivityExt(uint32_t *pCount, zes_engine_stats_t
|
||||
}
|
||||
|
||||
// Open only if not opened previously
|
||||
// fdList[0] has global busyness.
|
||||
// So check if PF and VF busyness were not updated
|
||||
if (fdList.size() == 1) {
|
||||
openPmuHandlesForVfs(numberOfVfs, pPmuInterface, vfConfigs, fdList);
|
||||
auto status = openPmuHandlesForVfs(numberOfVfs, pPmuInterface, vfConfigs, fdList);
|
||||
if (status != ZE_RESULT_SUCCESS) {
|
||||
// Clean up all vf fds added
|
||||
for (size_t i = 1; i < fdList.size(); i++) {
|
||||
auto &fdPair = fdList[i];
|
||||
if (fdPair.first >= 0) {
|
||||
close(static_cast<int32_t>(fdPair.first));
|
||||
close(static_cast<int32_t>(fdPair.second));
|
||||
}
|
||||
fdList.resize(1);
|
||||
}
|
||||
return status;
|
||||
}
|
||||
}
|
||||
|
||||
*pCount = std::min(*pCount, numberOfVfs + 1);
|
||||
@@ -176,6 +210,15 @@ ze_result_t LinuxEngineImp::getProperties(zes_engine_properties_t &properties) {
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
void LinuxEngineImp::checkErrorNumberAndUpdateStatus() {
|
||||
if (errno == -EMFILE || errno == -ENFILE) {
|
||||
NEO::printDebugString(NEO::debugManager.flags.PrintDebugMessages.get(), stderr, "Engine Handles could not be created because system has run out of file handles. Suggested action is to increase the file handle limit. \n");
|
||||
initStatus = ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE;
|
||||
} else {
|
||||
initStatus = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
|
||||
}
|
||||
}
|
||||
|
||||
void LinuxEngineImp::init() {
|
||||
uint64_t config = UINT64_MAX;
|
||||
switch (engineGroup) {
|
||||
@@ -201,14 +244,16 @@ void LinuxEngineImp::init() {
|
||||
|
||||
// Fds for global busyness
|
||||
fd[0] = pPmuInterface->pmuInterfaceOpen(config, -1, PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_GROUP);
|
||||
if (fd[0] == -1) {
|
||||
if (fd[0] < 0) {
|
||||
NEO::printDebugString(NEO::debugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): Could not open Busy Ticks Handle \n", __FUNCTION__);
|
||||
checkErrorNumberAndUpdateStatus();
|
||||
return;
|
||||
}
|
||||
fd[1] = pPmuInterface->pmuInterfaceOpen(__PRELIM_I915_PMU_TOTAL_ACTIVE_TICKS(subDeviceId), static_cast<int>(fd[0]), PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_GROUP);
|
||||
|
||||
if (fd[1] == -1) {
|
||||
if (fd[1] < 0) {
|
||||
NEO::printDebugString(NEO::debugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): Could not open Total Active Ticks Handle \n", __FUNCTION__);
|
||||
checkErrorNumberAndUpdateStatus();
|
||||
close(static_cast<int>(fd[0]));
|
||||
return;
|
||||
}
|
||||
@@ -230,18 +275,8 @@ void LinuxEngineImp::init() {
|
||||
}
|
||||
}
|
||||
|
||||
bool LinuxEngineImp::isEngineModuleSupported() {
|
||||
|
||||
if (fdList.size() == 0) {
|
||||
NEO::printDebugString(NEO::debugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): as fileDescriptors could not be opened \n", __FUNCTION__);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (fdList[0].second < 0) {
|
||||
NEO::printDebugString(NEO::debugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): as fileDescriptor value = %d Engine Module is not supported \n", __FUNCTION__, fdList[0].second);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
ze_result_t LinuxEngineImp::isEngineModuleSupported() {
|
||||
return initStatus;
|
||||
}
|
||||
|
||||
LinuxEngineImp::LinuxEngineImp(OsSysman *pOsSysman, zes_engine_group_t type, uint32_t engineInstance, uint32_t subDeviceId, ze_bool_t onSubDevice) : engineGroup(type), engineInstance(engineInstance), subDeviceId(subDeviceId), onSubDevice(onSubDevice) {
|
||||
@@ -251,6 +286,10 @@ LinuxEngineImp::LinuxEngineImp(OsSysman *pOsSysman, zes_engine_group_t type, uin
|
||||
pPmuInterface = pLinuxSysmanImp->getPmuInterface();
|
||||
pSysfsAccess = &pLinuxSysmanImp->getSysfsAccess();
|
||||
init();
|
||||
|
||||
if (initStatus != ZE_RESULT_SUCCESS) {
|
||||
cleanup();
|
||||
}
|
||||
}
|
||||
|
||||
std::unique_ptr<OsEngine> OsEngine::create(OsSysman *pOsSysman, zes_engine_group_t type, uint32_t engineInstance, uint32_t subDeviceId, ze_bool_t onSubDevice) {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2020-2023 Intel Corporation
|
||||
* Copyright (C) 2020-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -22,7 +22,7 @@ class OsEngine {
|
||||
virtual ze_result_t getActivity(zes_engine_stats_t *pStats) = 0;
|
||||
virtual ze_result_t getActivityExt(uint32_t *pCount, zes_engine_stats_t *pStats) = 0;
|
||||
virtual ze_result_t getProperties(zes_engine_properties_t &properties) = 0;
|
||||
virtual bool isEngineModuleSupported() = 0;
|
||||
virtual ze_result_t isEngineModuleSupported() = 0;
|
||||
static std::unique_ptr<OsEngine> create(OsSysman *pOsSysman, zes_engine_group_t engineType, uint32_t engineInstance, uint32_t subDeviceId, ze_bool_t onSubdevice);
|
||||
static ze_result_t getNumEngineTypeAndInstances(std::set<std::pair<zes_engine_group_t, EngineInstanceSubDeviceId>> &engineGroupInstance, OsSysman *pOsSysman);
|
||||
virtual ~OsEngine() = default;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2020-2023 Intel Corporation
|
||||
* Copyright (C) 2020-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -76,8 +76,8 @@ ze_result_t WddmEngineImp::getProperties(zes_engine_properties_t &properties) {
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
bool WddmEngineImp::isEngineModuleSupported() {
|
||||
return true;
|
||||
ze_result_t WddmEngineImp::isEngineModuleSupported() {
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
WddmEngineImp::WddmEngineImp(OsSysman *pOsSysman, zes_engine_group_t engineType, uint32_t engineInstance, uint32_t subDeviceId) {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2020-2023 Intel Corporation
|
||||
* Copyright (C) 2020-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -18,7 +18,7 @@ class WddmEngineImp : public OsEngine, NEO::NonCopyableOrMovableClass {
|
||||
ze_result_t getActivity(zes_engine_stats_t *pStats) override;
|
||||
ze_result_t getActivityExt(uint32_t *pCount, zes_engine_stats_t *pStats) override;
|
||||
ze_result_t getProperties(zes_engine_properties_t &properties) override;
|
||||
bool isEngineModuleSupported() override;
|
||||
ze_result_t isEngineModuleSupported() override;
|
||||
|
||||
WddmEngineImp() = default;
|
||||
WddmEngineImp(OsSysman *pOsSysman, zes_engine_group_t type, uint32_t engineInstance, uint32_t subDeviceId);
|
||||
|
||||
Reference in New Issue
Block a user