feature: return error when file handles are exhaused in sysman engine

Related-To: NEO-10513

Signed-off-by: Joshua Santosh Ranjan <joshua.santosh.ranjan@intel.com>
This commit is contained in:
Joshua Santosh Ranjan
2024-03-07 07:27:55 +00:00
committed by Compute-Runtime-Automation
parent a22cefdaed
commit ba5c6f32b3
12 changed files with 190 additions and 72 deletions

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2020-2023 Intel Corporation
* Copyright (C) 2020-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -25,7 +25,9 @@ EngineHandleContext::~EngineHandleContext() {
void EngineHandleContext::createHandle(zes_engine_group_t engineType, uint32_t engineInstance, uint32_t subDeviceId, ze_bool_t onSubdevice) {
std::unique_ptr<Engine> pEngine = std::make_unique<EngineImp>(pOsSysman, engineType, engineInstance, subDeviceId, onSubdevice);
if (pEngine->initSuccess == true) {
// Only store error for all engines in device incase of dependencies unavailable.
deviceEngineInitStatus = pEngine->initStatus != ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE ? deviceEngineInitStatus : pEngine->initStatus;
if (pEngine->initStatus == ZE_RESULT_SUCCESS) {
handleList.push_back(std::move(pEngine));
}
}
@@ -54,6 +56,11 @@ ze_result_t EngineHandleContext::engineGet(uint32_t *pCount, zes_engine_handle_t
this->init(pOsSysman->getDeviceHandles());
this->engineInitDone = true;
});
if (deviceEngineInitStatus != ZE_RESULT_SUCCESS) {
return deviceEngineInitStatus;
}
uint32_t handleListSize = static_cast<uint32_t>(handleList.size());
uint32_t numToCopy = std::min(*pCount, handleListSize);
if (0 == *pCount || *pCount > handleListSize) {

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2020-2023 Intel Corporation
* Copyright (C) 2020-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -27,7 +27,7 @@ class Engine : _zes_engine_handle_t {
return static_cast<Engine *>(handle);
}
inline zes_engine_handle_t toHandle() { return this; }
bool initSuccess = false;
ze_result_t initStatus = ZE_RESULT_SUCCESS;
};
struct EngineHandleContext {
@@ -49,6 +49,7 @@ struct EngineHandleContext {
void createHandle(zes_engine_group_t engineType, uint32_t engineInstance, uint32_t subDeviceId, ze_bool_t onSubdevice);
std::once_flag initEngineOnce;
bool engineInitDone = false;
ze_result_t deviceEngineInitStatus = ZE_RESULT_SUCCESS;
};
} // namespace L0

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2020-2023 Intel Corporation
* Copyright (C) 2020-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -23,9 +23,9 @@ ze_result_t EngineImp::engineGetProperties(zes_engine_properties_t *pProperties)
}
void EngineImp::init() {
if (pOsEngine->isEngineModuleSupported()) {
initStatus = pOsEngine->isEngineModuleSupported();
if (initStatus == ZE_RESULT_SUCCESS) {
pOsEngine->getProperties(engineProperties);
this->initSuccess = true;
}
}

View File

@@ -51,9 +51,8 @@ ze_result_t OsEngine::getNumEngineTypeAndInstances(std::set<std::pair<zes_engine
}
ze_result_t LinuxEngineImp::getActivity(zes_engine_stats_t *pStats) {
if (fdList.size() == 0) {
NEO::printDebugString(NEO::debugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): No Valid Fds returning error:0x%x \n", __FUNCTION__, ZE_RESULT_ERROR_UNSUPPORTED_FEATURE);
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
if (initStatus != ZE_RESULT_SUCCESS) {
return initStatus;
}
uint64_t data[2] = {};
auto ret = pPmuInterface->pmuRead(static_cast<int>(fdList[0].first), data, sizeof(data));
@@ -74,6 +73,16 @@ ze_result_t LinuxEngineImp::getProperties(zes_engine_properties_t &properties) {
return ZE_RESULT_SUCCESS;
}
void LinuxEngineImp::checkErrorNumberAndUpdateStatus() {
if (errno == -EMFILE || errno == -ENFILE) {
NEO::printDebugString(NEO::debugManager.flags.PrintDebugMessages.get(), stderr, "Engine Handles could not be created because system has run out of file handles. Suggested action is to increase the file handle limit. \n");
initStatus = ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE;
} else {
NEO::printDebugString(NEO::debugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s():No valid Filedescriptors: Engine Module is not supported \n", __FUNCTION__);
initStatus = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
}
void LinuxEngineImp::init() {
auto i915EngineClass = engineToI915Map.find(engineGroup);
vfConfigs.clear();
@@ -81,6 +90,8 @@ void LinuxEngineImp::init() {
auto fd = pPmuInterface->pmuInterfaceOpen(I915_PMU_ENGINE_BUSY(i915EngineClass->second, engineInstance), -1, PERF_FORMAT_TOTAL_TIME_ENABLED);
if (fd >= 0) {
fdList.push_back(std::make_pair(fd, -1));
} else {
checkErrorNumberAndUpdateStatus();
}
}
@@ -88,21 +99,21 @@ ze_result_t LinuxEngineImp::getActivityExt(uint32_t *pCount, zes_engine_stats_t
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
LinuxEngineImp::~LinuxEngineImp() {
void LinuxEngineImp::cleanup() {
for (auto &fdPair : fdList) {
if (fdPair.first != -1) {
if (fdPair.first >= 0) {
close(static_cast<int>(fdPair.first));
}
}
fdList.clear();
}
bool LinuxEngineImp::isEngineModuleSupported() {
if (fdList.size() == 0) {
NEO::printDebugString(NEO::debugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s():No valid Filedescriptors: Engine Module is not supported \n", __FUNCTION__);
return false;
}
return true;
LinuxEngineImp::~LinuxEngineImp() {
cleanup();
}
ze_result_t LinuxEngineImp::isEngineModuleSupported() {
return initStatus;
}
LinuxEngineImp::LinuxEngineImp(OsSysman *pOsSysman, zes_engine_group_t type, uint32_t engineInstance, uint32_t subDeviceId, ze_bool_t onSubDevice) : engineGroup(type), engineInstance(engineInstance), subDeviceId(subDeviceId), onSubDevice(onSubDevice) {
@@ -111,6 +122,9 @@ LinuxEngineImp::LinuxEngineImp(OsSysman *pOsSysman, zes_engine_group_t type, uin
pDevice = pLinuxSysmanImp->getDeviceHandle();
pPmuInterface = pLinuxSysmanImp->getPmuInterface();
init();
if (initStatus != ZE_RESULT_SUCCESS) {
cleanup();
}
}
std::unique_ptr<OsEngine> OsEngine::create(OsSysman *pOsSysman, zes_engine_group_t type, uint32_t engineInstance, uint32_t subDeviceId, ze_bool_t onSubDevice) {

View File

@@ -23,10 +23,11 @@ class LinuxEngineImp : public OsEngine, NEO::NonCopyableOrMovableClass {
ze_result_t getActivity(zes_engine_stats_t *pStats) override;
ze_result_t getActivityExt(uint32_t *pCount, zes_engine_stats_t *pStats) override;
ze_result_t getProperties(zes_engine_properties_t &properties) override;
bool isEngineModuleSupported() override;
ze_result_t isEngineModuleSupported() override;
static zes_engine_group_t getGroupFromEngineType(zes_engine_group_t type);
LinuxEngineImp() = default;
LinuxEngineImp(OsSysman *pOsSysman, zes_engine_group_t type, uint32_t engineInstance, uint32_t subDeviceId, ze_bool_t onSubDevice);
void cleanup();
~LinuxEngineImp() override;
protected:
@@ -39,11 +40,13 @@ class LinuxEngineImp : public OsEngine, NEO::NonCopyableOrMovableClass {
ze_bool_t onSubDevice = false;
uint32_t numberOfVfs = 0;
SysfsAccess *pSysfsAccess = nullptr;
void checkErrorNumberAndUpdateStatus();
private:
void init();
std::vector<std::pair<int64_t, int64_t>> fdList{};
std::vector<std::pair<uint64_t, uint64_t>> vfConfigs{};
ze_result_t initStatus = ZE_RESULT_SUCCESS;
};
} // namespace L0

View File

@@ -87,10 +87,10 @@ static ze_result_t readBusynessFromGroupFd(PmuInterface *pPmuInterface, std::pai
return ZE_RESULT_SUCCESS;
}
static void openPmuHandlesForVfs(uint32_t numberOfVfs,
PmuInterface *pPmuInterface,
std::vector<std::pair<uint64_t, uint64_t>> &vfConfigs,
std::vector<std::pair<int64_t, int64_t>> &fdList) {
static ze_result_t openPmuHandlesForVfs(uint32_t numberOfVfs,
PmuInterface *pPmuInterface,
std::vector<std::pair<uint64_t, uint64_t>> &vfConfigs,
std::vector<std::pair<int64_t, int64_t>> &fdList) {
// +1 to include PF
for (uint64_t i = 0; i < numberOfVfs + 1; i++) {
int64_t fd[2] = {-1, -1};
@@ -100,33 +100,53 @@ static void openPmuHandlesForVfs(uint32_t numberOfVfs,
if (fd[0] >= 0) {
fd[1] = pPmuInterface->pmuInterfaceOpen(vfConfigs[i].second, static_cast<int>(fd[0]),
PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_GROUP);
if (fd[1] == -1) {
if (fd[1] < 0) {
NEO::printDebugString(NEO::debugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): Could not open Total Active Ticks PMU Handle \n", __FUNCTION__);
close(static_cast<int>(fd[0]));
fd[0] = -1;
}
}
if (fd[1] < 0) {
if (errno == -EMFILE || errno == -ENFILE) {
NEO::printDebugString(NEO::debugManager.flags.PrintDebugMessages.get(), stderr, "Engine Handles could not be created because system has run out of file handles. Suggested action is to increase the file handle limit. \n");
return ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE;
}
}
fdList.push_back(std::make_pair(fd[0], fd[1]));
}
return ZE_RESULT_SUCCESS;
}
ze_result_t LinuxEngineImp::getActivity(zes_engine_stats_t *pStats) {
if (initStatus != ZE_RESULT_SUCCESS) {
// Handles are not expected to be created in case of init failure
DEBUG_BREAK_IF(true);
}
// read from global busyness fd
return readBusynessFromGroupFd(pPmuInterface, fdList[0], pStats);
}
LinuxEngineImp::~LinuxEngineImp() {
void LinuxEngineImp::cleanup() {
for (auto &fdPair : fdList) {
if (fdPair.first != -1) {
if (fdPair.first >= 0) {
close(static_cast<int>(fdPair.first));
}
if (fdPair.second != -1) {
if (fdPair.second >= 0) {
close(static_cast<int>(fdPair.second));
}
}
fdList.clear();
vfConfigs.clear();
numberOfVfs = 0;
}
LinuxEngineImp::~LinuxEngineImp() {
cleanup();
}
ze_result_t LinuxEngineImp::getActivityExt(uint32_t *pCount, zes_engine_stats_t *pStats) {
@@ -148,8 +168,22 @@ ze_result_t LinuxEngineImp::getActivityExt(uint32_t *pCount, zes_engine_stats_t
}
// Open only if not opened previously
// fdList[0] has global busyness.
// So check if PF and VF busyness were not updated
if (fdList.size() == 1) {
openPmuHandlesForVfs(numberOfVfs, pPmuInterface, vfConfigs, fdList);
auto status = openPmuHandlesForVfs(numberOfVfs, pPmuInterface, vfConfigs, fdList);
if (status != ZE_RESULT_SUCCESS) {
// Clean up all vf fds added
for (size_t i = 1; i < fdList.size(); i++) {
auto &fdPair = fdList[i];
if (fdPair.first >= 0) {
close(static_cast<int32_t>(fdPair.first));
close(static_cast<int32_t>(fdPair.second));
}
fdList.resize(1);
}
return status;
}
}
*pCount = std::min(*pCount, numberOfVfs + 1);
@@ -176,6 +210,15 @@ ze_result_t LinuxEngineImp::getProperties(zes_engine_properties_t &properties) {
return ZE_RESULT_SUCCESS;
}
void LinuxEngineImp::checkErrorNumberAndUpdateStatus() {
if (errno == -EMFILE || errno == -ENFILE) {
NEO::printDebugString(NEO::debugManager.flags.PrintDebugMessages.get(), stderr, "Engine Handles could not be created because system has run out of file handles. Suggested action is to increase the file handle limit. \n");
initStatus = ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE;
} else {
initStatus = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
}
void LinuxEngineImp::init() {
uint64_t config = UINT64_MAX;
switch (engineGroup) {
@@ -201,14 +244,16 @@ void LinuxEngineImp::init() {
// Fds for global busyness
fd[0] = pPmuInterface->pmuInterfaceOpen(config, -1, PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_GROUP);
if (fd[0] == -1) {
if (fd[0] < 0) {
NEO::printDebugString(NEO::debugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): Could not open Busy Ticks Handle \n", __FUNCTION__);
checkErrorNumberAndUpdateStatus();
return;
}
fd[1] = pPmuInterface->pmuInterfaceOpen(__PRELIM_I915_PMU_TOTAL_ACTIVE_TICKS(subDeviceId), static_cast<int>(fd[0]), PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_GROUP);
if (fd[1] == -1) {
if (fd[1] < 0) {
NEO::printDebugString(NEO::debugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): Could not open Total Active Ticks Handle \n", __FUNCTION__);
checkErrorNumberAndUpdateStatus();
close(static_cast<int>(fd[0]));
return;
}
@@ -230,18 +275,8 @@ void LinuxEngineImp::init() {
}
}
bool LinuxEngineImp::isEngineModuleSupported() {
if (fdList.size() == 0) {
NEO::printDebugString(NEO::debugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): as fileDescriptors could not be opened \n", __FUNCTION__);
return false;
}
if (fdList[0].second < 0) {
NEO::printDebugString(NEO::debugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): as fileDescriptor value = %d Engine Module is not supported \n", __FUNCTION__, fdList[0].second);
return false;
}
return true;
ze_result_t LinuxEngineImp::isEngineModuleSupported() {
return initStatus;
}
LinuxEngineImp::LinuxEngineImp(OsSysman *pOsSysman, zes_engine_group_t type, uint32_t engineInstance, uint32_t subDeviceId, ze_bool_t onSubDevice) : engineGroup(type), engineInstance(engineInstance), subDeviceId(subDeviceId), onSubDevice(onSubDevice) {
@@ -251,6 +286,10 @@ LinuxEngineImp::LinuxEngineImp(OsSysman *pOsSysman, zes_engine_group_t type, uin
pPmuInterface = pLinuxSysmanImp->getPmuInterface();
pSysfsAccess = &pLinuxSysmanImp->getSysfsAccess();
init();
if (initStatus != ZE_RESULT_SUCCESS) {
cleanup();
}
}
std::unique_ptr<OsEngine> OsEngine::create(OsSysman *pOsSysman, zes_engine_group_t type, uint32_t engineInstance, uint32_t subDeviceId, ze_bool_t onSubDevice) {

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2020-2023 Intel Corporation
* Copyright (C) 2020-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -22,7 +22,7 @@ class OsEngine {
virtual ze_result_t getActivity(zes_engine_stats_t *pStats) = 0;
virtual ze_result_t getActivityExt(uint32_t *pCount, zes_engine_stats_t *pStats) = 0;
virtual ze_result_t getProperties(zes_engine_properties_t &properties) = 0;
virtual bool isEngineModuleSupported() = 0;
virtual ze_result_t isEngineModuleSupported() = 0;
static std::unique_ptr<OsEngine> create(OsSysman *pOsSysman, zes_engine_group_t engineType, uint32_t engineInstance, uint32_t subDeviceId, ze_bool_t onSubdevice);
static ze_result_t getNumEngineTypeAndInstances(std::set<std::pair<zes_engine_group_t, EngineInstanceSubDeviceId>> &engineGroupInstance, OsSysman *pOsSysman);
virtual ~OsEngine() = default;

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2020-2023 Intel Corporation
* Copyright (C) 2020-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -76,8 +76,8 @@ ze_result_t WddmEngineImp::getProperties(zes_engine_properties_t &properties) {
return ZE_RESULT_SUCCESS;
}
bool WddmEngineImp::isEngineModuleSupported() {
return true;
ze_result_t WddmEngineImp::isEngineModuleSupported() {
return ZE_RESULT_SUCCESS;
}
WddmEngineImp::WddmEngineImp(OsSysman *pOsSysman, zes_engine_group_t engineType, uint32_t engineInstance, uint32_t subDeviceId) {

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2020-2023 Intel Corporation
* Copyright (C) 2020-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -18,7 +18,7 @@ class WddmEngineImp : public OsEngine, NEO::NonCopyableOrMovableClass {
ze_result_t getActivity(zes_engine_stats_t *pStats) override;
ze_result_t getActivityExt(uint32_t *pCount, zes_engine_stats_t *pStats) override;
ze_result_t getProperties(zes_engine_properties_t &properties) override;
bool isEngineModuleSupported() override;
ze_result_t isEngineModuleSupported() override;
WddmEngineImp() = default;
WddmEngineImp(OsSysman *pOsSysman, zes_engine_group_t type, uint32_t engineInstance, uint32_t subDeviceId);