Debug: Add debug logs for global module
Related-To: LOCI-3876 Signed-off-by: Devarinti, Puneeth Kumar Reddy <puneeth.kumar.reddy.devarinti@intel.com>
This commit is contained in:
parent
b909b03b02
commit
239ce79f43
|
@ -7,6 +7,7 @@
|
|||
|
||||
#include "level_zero/tools/source/sysman/global_operations/linux/os_global_operations_imp.h"
|
||||
|
||||
#include "shared/source/debug_settings/debug_settings_manager.h"
|
||||
#include "shared/source/execution_environment/root_device_environment.h"
|
||||
#include "shared/source/helpers/ptr_math.h"
|
||||
#include "shared/source/helpers/string.h"
|
||||
|
@ -51,12 +52,14 @@ bool LinuxGlobalOperationsImp::getTelemOffsetAndTelemDir(uint64_t &telemOffset,
|
|||
auto pDrm = &pLinuxSysmanImp->getDrm();
|
||||
std::optional<std::string> rootPath = NEO::getPciRootPath(pDrm->getFileDescriptor());
|
||||
if (!rootPath.has_value()) {
|
||||
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): Root path has no value \n", __FUNCTION__);
|
||||
return false;
|
||||
}
|
||||
|
||||
std::map<uint32_t, std::string> telemPciPath;
|
||||
NEO::PmtUtil::getTelemNodesInPciPath(rootPath.value(), telemPciPath);
|
||||
if (telemPciPath.size() < pDevice->getNEODevice()->getNumSubDevices() + 1) {
|
||||
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): telemPciPath size is less than expected \n", __FUNCTION__);
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -65,11 +68,13 @@ bool LinuxGlobalOperationsImp::getTelemOffsetAndTelemDir(uint64_t &telemOffset,
|
|||
|
||||
std::array<char, NEO::PmtUtil::guidStringSize> guidString = {};
|
||||
if (!NEO::PmtUtil::readGuid(telemDir, guidString)) {
|
||||
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): Failed to read GUID from %s \n", __FUNCTION__, telemDir.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
uint64_t offset = ULONG_MAX;
|
||||
if (!NEO::PmtUtil::readOffset(telemDir, offset)) {
|
||||
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): Failed to read offset from %s\n", __FUNCTION__, telemDir.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -81,6 +86,7 @@ bool LinuxGlobalOperationsImp::getTelemOffsetAndTelemDir(uint64_t &telemOffset,
|
|||
return true;
|
||||
}
|
||||
}
|
||||
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): Failed to find keyOffset in keyOffsetMap \n", __FUNCTION__);
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -88,6 +94,7 @@ bool LinuxGlobalOperationsImp::getSerialNumber(char (&serialNumber)[ZES_STRING_P
|
|||
uint64_t offset = 0;
|
||||
std::string telemDir = {};
|
||||
if (!LinuxGlobalOperationsImp::getTelemOffsetAndTelemDir(offset, "PPIN", telemDir)) {
|
||||
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): Failed to get telemetry offset and directory for PPIN \n", __FUNCTION__);
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -99,7 +106,7 @@ bool LinuxGlobalOperationsImp::getSerialNumber(char (&serialNumber)[ZES_STRING_P
|
|||
memcpy_s(serialNumber, ZES_STRING_PROPERTY_SIZE, telemDataString.str().c_str(), telemDataString.str().size());
|
||||
return true;
|
||||
}
|
||||
|
||||
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): Failed to read serial number \n", __FUNCTION__);
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -112,6 +119,7 @@ bool LinuxGlobalOperationsImp::getBoardNumber(char (&boardNumber)[ZES_STRING_PRO
|
|||
std::string telemDir = {};
|
||||
constexpr uint32_t boardNumberSize = 32;
|
||||
if (!LinuxGlobalOperationsImp::getTelemOffsetAndTelemDir(offset, "BoardNumber", telemDir)) {
|
||||
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): Failed to get telemetry offset and directory for BoardNumber \n", __FUNCTION__);
|
||||
return false;
|
||||
}
|
||||
std::array<uint8_t, boardNumberSize> value;
|
||||
|
@ -120,7 +128,7 @@ bool LinuxGlobalOperationsImp::getBoardNumber(char (&boardNumber)[ZES_STRING_PRO
|
|||
memcpy_s(boardNumber, ZES_STRING_PROPERTY_SIZE, value.data(), bytesRead);
|
||||
return true;
|
||||
}
|
||||
|
||||
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): Failed to read board number \n", __FUNCTION__);
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -162,10 +170,12 @@ void LinuxGlobalOperationsImp::getDriverVersion(char (&driverVersion)[ZES_STRING
|
|||
ze_result_t result = pFsAccess->read(agamaVersionFile, strVal);
|
||||
if (ZE_RESULT_SUCCESS != result) {
|
||||
if (ZE_RESULT_ERROR_NOT_AVAILABLE != result) {
|
||||
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): Driver version not available \n", __FUNCTION__);
|
||||
return;
|
||||
}
|
||||
result = pFsAccess->read(srcVersionFile, strVal);
|
||||
if (ZE_RESULT_SUCCESS != result) {
|
||||
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): FsAccess->read() failed to read driver version and returning error:0x%x\n", __FUNCTION__, result);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
@ -175,6 +185,7 @@ void LinuxGlobalOperationsImp::getDriverVersion(char (&driverVersion)[ZES_STRING
|
|||
|
||||
ze_result_t LinuxGlobalOperationsImp::reset(ze_bool_t force) {
|
||||
if (!pSysfsAccess->isRootUser()) {
|
||||
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): Not running as root user and returning error:0x%x \n", __FUNCTION__, ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS);
|
||||
return ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS;
|
||||
}
|
||||
ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES};
|
||||
|
@ -193,6 +204,7 @@ ze_result_t LinuxGlobalOperationsImp::reset(ze_bool_t force) {
|
|||
|
||||
result = pProcfsAccess->listProcesses(processes);
|
||||
if (ZE_RESULT_SUCCESS != result) {
|
||||
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): Unable to list processes and returning error:0x%x \n", __FUNCTION__, result);
|
||||
return result;
|
||||
}
|
||||
for (auto &&pid : processes) {
|
||||
|
@ -208,6 +220,7 @@ ze_result_t LinuxGlobalOperationsImp::reset(ze_bool_t force) {
|
|||
} else {
|
||||
// Device is in use by another process.
|
||||
// Don't reset while in use.
|
||||
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): Device in use by another process, not resetting and returning error:0x%x \n", __FUNCTION__, ZE_RESULT_ERROR_HANDLE_OBJECT_IN_USE);
|
||||
return ZE_RESULT_ERROR_HANDLE_OBJECT_IN_USE;
|
||||
}
|
||||
}
|
||||
|
@ -219,12 +232,14 @@ ze_result_t LinuxGlobalOperationsImp::reset(ze_bool_t force) {
|
|||
if (!(deviceProperties.flags & ZE_DEVICE_PROPERTY_FLAG_INTEGRATED)) {
|
||||
result = pSysfsAccess->unbindDevice(resetName);
|
||||
if (ZE_RESULT_SUCCESS != result) {
|
||||
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): Failed to unbind device and returning error:0x%x \n", __FUNCTION__, result);
|
||||
return result;
|
||||
}
|
||||
result = pLinuxSysmanImp->osWarmReset();
|
||||
if (ZE_RESULT_SUCCESS == result) {
|
||||
return pLinuxSysmanImp->initDevice();
|
||||
}
|
||||
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): Error during warm reset \n", __FUNCTION__);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -243,6 +258,7 @@ ze_result_t LinuxGlobalOperationsImp::reset(ze_bool_t force) {
|
|||
// Unbind the device from the kernel driver.
|
||||
result = pSysfsAccess->unbindDevice(resetName);
|
||||
if (ZE_RESULT_SUCCESS != result) {
|
||||
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): Failed to unbind device and returning error:0x%x \n", __FUNCTION__, result);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -250,6 +266,7 @@ ze_result_t LinuxGlobalOperationsImp::reset(ze_bool_t force) {
|
|||
// after we check, kill them here.
|
||||
result = pProcfsAccess->listProcesses(processes);
|
||||
if (ZE_RESULT_SUCCESS != result) {
|
||||
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): Failed to list processes and returning error:0x%x \n", __FUNCTION__, result);
|
||||
return result;
|
||||
}
|
||||
std::vector<::pid_t> deviceUsingPids;
|
||||
|
@ -272,7 +289,7 @@ ze_result_t LinuxGlobalOperationsImp::reset(ze_bool_t force) {
|
|||
for (auto &&pid : deviceUsingPids) {
|
||||
while (pProcfsAccess->isAlive(pid)) {
|
||||
if (std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() > resetTimeout) {
|
||||
|
||||
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): Timeout reached, device still in use and returning error:0x%x \n", __FUNCTION__, ZE_RESULT_ERROR_HANDLE_OBJECT_IN_USE);
|
||||
return ZE_RESULT_ERROR_HANDLE_OBJECT_IN_USE;
|
||||
}
|
||||
struct ::timespec timeout = {.tv_sec = 0, .tv_nsec = 1000};
|
||||
|
@ -284,12 +301,14 @@ ze_result_t LinuxGlobalOperationsImp::reset(ze_bool_t force) {
|
|||
// Reset the device.
|
||||
result = pFsAccess->write(resetPath, "1");
|
||||
if (ZE_RESULT_SUCCESS != result) {
|
||||
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): Failed to reset the device and returning error:0x%x \n", __FUNCTION__, result);
|
||||
return result;
|
||||
}
|
||||
|
||||
// Rebind the device to the kernel driver.
|
||||
result = pSysfsAccess->bindDevice(resetName);
|
||||
if (ZE_RESULT_SUCCESS != result) {
|
||||
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): Failed to bind the device to the kernel driver and returning error:0x%x \n", __FUNCTION__, result);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -326,6 +345,7 @@ ze_result_t LinuxGlobalOperationsImp::scanProcessesState(std::vector<zes_process
|
|||
|
||||
ze_result_t result = pSysfsAccess->scanDirEntries(clientsDir, clientIds);
|
||||
if (ZE_RESULT_SUCCESS != result) {
|
||||
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): Failed to scan directory entries and returning error:0x%x \n", __FUNCTION__, ZE_RESULT_ERROR_UNSUPPORTED_FEATURE);
|
||||
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
|
||||
}
|
||||
|
||||
|
@ -406,6 +426,7 @@ ze_result_t LinuxGlobalOperationsImp::scanProcessesState(std::vector<zes_process
|
|||
result = pSysfsAccess->read(realClientTotalMemoryPath, memSize);
|
||||
if (ZE_RESULT_SUCCESS != result) {
|
||||
if (ZE_RESULT_ERROR_NOT_AVAILABLE != result) {
|
||||
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): Failed to read memory size from:%s and returning error:0x%x \n", __FUNCTION__, realClientTotalMemoryPath.c_str(), result);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
@ -415,6 +436,7 @@ ze_result_t LinuxGlobalOperationsImp::scanProcessesState(std::vector<zes_process
|
|||
result = pSysfsAccess->read(realClientTotalSharedMemoryPath, sharedMemSize);
|
||||
if (ZE_RESULT_SUCCESS != result) {
|
||||
if (ZE_RESULT_ERROR_NOT_AVAILABLE != result) {
|
||||
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): Failed to read shared memory size from:%s and returning error:0x%x \n", __FUNCTION__, realClientTotalSharedMemoryPath.c_str(), result);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue