feature: supports core device handle with zesInit

Related-To: NEO-13805

Signed-off-by: Kulkarni, Ashwin Kumar <ashwin.kumar.kulkarni@intel.com>
This commit is contained in:
Kulkarni, Ashwin Kumar
2025-03-11 11:30:31 +00:00
committed by Compute-Runtime-Automation
parent 83637404bf
commit f52c81c0e4
17 changed files with 536 additions and 5 deletions

View File

@@ -24,6 +24,7 @@ struct OsSysman {
static OsSysman *create(SysmanDeviceImp *pSysmanImp);
virtual uint32_t getSubDeviceCount() = 0;
virtual const NEO::HardwareInfo &getHardwareInfo() const = 0;
virtual void getDeviceUuids(std::vector<std::string> &deviceUuids) = 0;
};
} // namespace Sysman

View File

@@ -34,8 +34,7 @@ SysmanDevice *SysmanDevice::fromHandle(zes_device_handle_t handle) {
return nullptr;
}
if (std::find(globalSysmanDriver->sysmanDevices.begin(), globalSysmanDriver->sysmanDevices.end(), sysmanDevice) == globalSysmanDriver->sysmanDevices.end()) {
PRINT_DEBUG_STRING(NEO::debugManager.flags.PrintDebugMessages.get(), stderr, "SysmanDevice::fromHandle: Device not found in sysmanDevices list%s\n", "");
return nullptr;
return globalSysmanDriver->getSysmanDeviceFromCoreDeviceHandle(handle);
}
return sysmanDevice;
}

View File

@@ -138,6 +138,7 @@ struct SysmanDevice : _ze_device_handle_t {
static ze_result_t deviceEnumEnabledVF(zes_device_handle_t hDevice, uint32_t *pCount, zes_vf_handle_t *phVFhandle);
virtual OsSysman *deviceGetOsInterface() = 0;
virtual void getDeviceUuids(std::vector<std::string> &deviceUuids) = 0;
};
} // namespace Sysman

View File

@@ -211,6 +211,10 @@ ze_result_t SysmanDeviceImp::fabricPortGetMultiPortThroughput(uint32_t numPorts,
return pFabricPortHandleContext->fabricPortGetMultiPortThroughput(numPorts, phPort, pThroughput);
}
void SysmanDeviceImp::getDeviceUuids(std::vector<std::string> &deviceUuids) {
return pOsSysman->getDeviceUuids(deviceUuids);
}
OsSysman *SysmanDeviceImp::deviceGetOsInterface() {
return pOsSysman;
}

View File

@@ -93,8 +93,8 @@ struct SysmanDeviceImp : SysmanDevice, NEO::NonCopyableAndNonMovableClass {
bool deviceEventListen(zes_event_type_flags_t &pEvent, uint64_t timeout) override;
ze_result_t fabricPortGetMultiPortThroughput(uint32_t numPorts, zes_fabric_port_handle_t *phPort, zes_fabric_port_throughput_t **pThroughput) override;
ze_result_t deviceEnumEnabledVF(uint32_t *pCount, zes_vf_handle_t *phVFhandle) override;
OsSysman *deviceGetOsInterface() override;
void getDeviceUuids(std::vector<std::string> &deviceUuids) override;
private:
NEO::ExecutionEnvironment *executionEnvironment = nullptr;

View File

@@ -28,11 +28,55 @@ struct SysmanDriverHandleImp *globalSysmanDriver;
SysmanDriverHandleImp::SysmanDriverHandleImp() = default;
void SysmanDriverHandleImp::updateUuidMap(SysmanDevice *sysmanDevice) {
std::vector<std::string> uuidArr;
sysmanDevice->getDeviceUuids(uuidArr);
for (auto &uuid : uuidArr) {
uuidDeviceMap[uuid] = sysmanDevice;
}
return;
}
SysmanDevice *SysmanDriverHandleImp::findSysmanDeviceFromCoreToSysmanDeviceMap(ze_device_handle_t handle) {
auto iterator = coreToSysmanDeviceMap.find(handle);
if (iterator != coreToSysmanDeviceMap.end()) {
SysmanDevice *sysmanDevice = iterator->second;
return sysmanDevice;
}
return nullptr;
}
SysmanDevice *SysmanDriverHandleImp::getSysmanDeviceFromCoreDeviceHandle(ze_device_handle_t hDevice) {
const std::lock_guard<std::mutex> lock(this->coreToSysmanDeviceMapLock);
if (hDevice == nullptr) {
return nullptr;
}
SysmanDevice *sysmanDevice = findSysmanDeviceFromCoreToSysmanDeviceMap(hDevice);
if (sysmanDevice != nullptr) {
return sysmanDevice;
}
ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES};
Device::fromHandle(hDevice)->getProperties(&deviceProperties);
std::string uuid(reinterpret_cast<char const *>(deviceProperties.uuid.id));
auto it = uuidDeviceMap.find(uuid);
if (it == uuidDeviceMap.end()) {
PRINT_DEBUG_STRING(NEO::debugManager.flags.PrintDebugMessages.get(), stderr, "SysmanDriverHandleImp::getSysmanDeviceFromCoreDeviceHandle() - sysman device handle equivalent to core device handle not found!! %s\n", "");
return nullptr;
}
sysmanDevice = it->second;
coreToSysmanDeviceMap[hDevice] = sysmanDevice;
return sysmanDevice;
}
ze_result_t SysmanDriverHandleImp::initialize(NEO::ExecutionEnvironment &executionEnvironment) {
for (uint32_t rootDeviceIndex = 0u; rootDeviceIndex < executionEnvironment.rootDeviceEnvironments.size(); rootDeviceIndex++) {
auto pSysmanDevice = SysmanDevice::create(executionEnvironment, rootDeviceIndex);
if (pSysmanDevice != nullptr) {
this->sysmanDevices.push_back(pSysmanDevice);
updateUuidMap(pSysmanDevice);
}
}

View File

@@ -8,6 +8,7 @@
#pragma once
#include "level_zero/sysman/source/driver/sysman_driver_handle.h"
#include <mutex>
#include <string>
#include <unordered_map>
@@ -29,6 +30,16 @@ struct SysmanDriverHandleImp : SysmanDriverHandle {
uint32_t numDevices = 0;
ze_result_t getExtensionFunctionAddress(const char *pFuncName, void **pfunc) override;
struct OsSysmanDriver *pOsSysmanDriver = nullptr;
SysmanDevice *getSysmanDeviceFromCoreDeviceHandle(ze_device_handle_t hDevice);
private:
void updateUuidMap(SysmanDevice *sysmanDevice);
SysmanDevice *findSysmanDeviceFromCoreToSysmanDeviceMap(ze_device_handle_t handle);
std::mutex coreToSysmanDeviceMapLock;
std::unordered_map<ze_device_handle_t, SysmanDevice *> coreToSysmanDeviceMap;
protected:
std::unordered_map<std::string, SysmanDevice *> uuidDeviceMap;
};
extern struct SysmanDriverHandleImp *globalSysmanDriver;

View File

@@ -496,6 +496,100 @@ bool LinuxSysmanImp::getTelemData(uint32_t subDeviceId, std::string &telemDir, s
return true;
}
void LinuxSysmanImp::getDeviceUuids(std::vector<std::string> &deviceUuids) {
constexpr uint32_t rootDeviceCount = 1;
uint32_t totalUuidCountForDevice = this->getSubDeviceCount() + rootDeviceCount;
deviceUuids.clear();
for (uint32_t index = 0; index < totalUuidCountForDevice; index++) {
std::array<uint8_t, NEO::ProductHelper::uuidSize> deviceUuid;
bool uuidValid = this->getUuidFromSubDeviceInfo(index, deviceUuid);
if (uuidValid) {
uint8_t uuid[ZE_MAX_DEVICE_UUID_SIZE] = {};
std::copy_n(std::begin(deviceUuid), ZE_MAX_DEVICE_UUID_SIZE, std::begin(uuid));
std::string uuidString(reinterpret_cast<char const *>(uuid));
deviceUuids.push_back(uuidString);
}
}
}
bool LinuxSysmanImp::generateUuidFromPciAndSubDeviceInfo(uint32_t subDeviceID, const NEO::PhysicalDevicePciBusInfo &pciBusInfo, std::array<uint8_t, NEO::ProductHelper::uuidSize> &uuid) {
if (pciBusInfo.pciDomain != NEO::PhysicalDevicePciBusInfo::invalidValue) {
uuid.fill(0);
// Device UUID uniquely identifies a device within a system.
// We generate it based on device information along with PCI information
// This guarantees uniqueness of UUIDs on a system even when multiple
// identical Intel GPUs are present.
// We want to have UUID matching between different GPU APIs (including outside
// of compute_runtime project - i.e. other than L0 or OCL). This structure definition
// has been agreed upon by various Intel driver teams.
//
// Consult other driver teams before changing this.
//
struct DeviceUUID {
uint16_t vendorID;
uint16_t deviceID;
uint16_t revisionID;
uint16_t pciDomain;
uint8_t pciBus;
uint8_t pciDev;
uint8_t pciFunc;
uint8_t reserved[4];
uint8_t subDeviceID;
};
auto &hwInfo = getParentSysmanDeviceImp()->getHardwareInfo();
DeviceUUID deviceUUID = {};
deviceUUID.vendorID = 0x8086; // Intel
deviceUUID.deviceID = hwInfo.platform.usDeviceID;
deviceUUID.revisionID = hwInfo.platform.usRevId;
deviceUUID.pciDomain = static_cast<uint16_t>(pciBusInfo.pciDomain);
deviceUUID.pciBus = static_cast<uint8_t>(pciBusInfo.pciBus);
deviceUUID.pciDev = static_cast<uint8_t>(pciBusInfo.pciDevice);
deviceUUID.pciFunc = static_cast<uint8_t>(pciBusInfo.pciFunction);
deviceUUID.subDeviceID = subDeviceID;
static_assert(sizeof(DeviceUUID) == NEO::ProductHelper::uuidSize);
memcpy_s(uuid.data(), NEO::ProductHelper::uuidSize, &deviceUUID, sizeof(DeviceUUID));
return true;
}
return false;
}
bool LinuxSysmanImp::getUuidFromSubDeviceInfo(uint32_t subDeviceID, std::array<uint8_t, NEO::ProductHelper::uuidSize> &uuid) {
auto subDeviceCount = getSubDeviceCount();
if (uuidVec.size() == 0) {
constexpr uint32_t rootDeviceCount = 1;
uuidVec.resize(subDeviceCount + rootDeviceCount);
}
if (getParentSysmanDeviceImp()->getRootDeviceEnvironment().osInterface != nullptr) {
auto driverModel = getParentSysmanDeviceImp()->getRootDeviceEnvironment().osInterface->getDriverModel();
auto &gfxCoreHelper = getParentSysmanDeviceImp()->getRootDeviceEnvironment().getHelper<NEO::GfxCoreHelper>();
auto &productHelper = getParentSysmanDeviceImp()->getRootDeviceEnvironment().getHelper<NEO::ProductHelper>();
if (NEO::debugManager.flags.EnableChipsetUniqueUUID.get() != 0) {
if (gfxCoreHelper.isChipsetUniqueUUIDSupported()) {
auto hwDeviceId = getSysmanHwDeviceIdInstance();
this->uuidVec[subDeviceID].isValid = productHelper.getUuid(driverModel, subDeviceCount, subDeviceID, this->uuidVec[subDeviceID].id);
}
}
if (!this->uuidVec[subDeviceID].isValid) {
NEO::PhysicalDevicePciBusInfo pciBusInfo = driverModel->getPciBusInfo();
this->uuidVec[subDeviceID].isValid = generateUuidFromPciAndSubDeviceInfo(subDeviceID, pciBusInfo, this->uuidVec[subDeviceID].id);
}
if (this->uuidVec[subDeviceID].isValid) {
uuid = this->uuidVec[subDeviceID].id;
}
}
return this->uuidVec[subDeviceID].isValid;
}
OsSysman *OsSysman::create(SysmanDeviceImp *pParentSysmanDeviceImp) {
LinuxSysmanImp *pLinuxSysmanImp = new LinuxSysmanImp(pParentSysmanDeviceImp);
return static_cast<OsSysman *>(pLinuxSysmanImp);

View File

@@ -50,6 +50,7 @@ class LinuxSysmanImp : public OsSysman, NEO::NonCopyableAndNonMovableClass {
SysmanDeviceImp *getSysmanDeviceImp();
SysmanProductHelper *getSysmanProductHelper();
uint32_t getSubDeviceCount() override;
void getDeviceUuids(std::vector<std::string> &deviceUuids) override;
const NEO::HardwareInfo &getHardwareInfo() const override { return pParentSysmanDeviceImp->getHardwareInfo(); }
std::string getPciCardBusDirectoryPath(std::string realPciPath);
uint32_t getMemoryType();
@@ -78,6 +79,8 @@ class LinuxSysmanImp : public OsSysman, NEO::NonCopyableAndNonMovableClass {
SysmanKmdInterface *getSysmanKmdInterface() { return pSysmanKmdInterface.get(); }
static ze_result_t getResult(int err);
bool getTelemData(uint32_t subDeviceId, std::string &telemDir, std::string &guid, uint64_t &telemOffset);
bool getUuidFromSubDeviceInfo(uint32_t subDeviceID, std::array<uint8_t, NEO::ProductHelper::uuidSize> &uuid);
bool generateUuidFromPciAndSubDeviceInfo(uint32_t subDeviceID, const NEO::PhysicalDevicePciBusInfo &pciBusInfo, std::array<uint8_t, NEO::ProductHelper::uuidSize> &uuid);
protected:
std::unique_ptr<SysmanProductHelper> pSysmanProductHelper;
@@ -94,6 +97,11 @@ class LinuxSysmanImp : public OsSysman, NEO::NonCopyableAndNonMovableClass {
std::map<uint32_t, std::unique_ptr<PlatformMonitoringTech::TelemData>> mapOfSubDeviceIdToTelemData;
std::map<uint32_t, std::string> telemNodesInPciPath;
std::unique_ptr<PlatformMonitoringTech::TelemData> pTelemData = nullptr;
struct Uuid {
bool isValid = false;
std::array<uint8_t, NEO::ProductHelper::uuidSize> id;
};
std::vector<Uuid> uuidVec;
private:
LinuxSysmanImp() = delete;

View File

@@ -114,6 +114,83 @@ KmdSysManager &WddmSysmanImp::getKmdSysManager() {
return *pKmdSysManager;
}
void WddmSysmanImp::getDeviceUuids(std::vector<std::string> &deviceUuids) {
deviceUuids.clear();
std::array<uint8_t, NEO::ProductHelper::uuidSize> deviceUuid;
bool uuidValid = this->getUuid(deviceUuid);
if (uuidValid) {
uint8_t uuid[ZE_MAX_DEVICE_UUID_SIZE] = {};
std::copy_n(std::begin(deviceUuid), ZE_MAX_DEVICE_UUID_SIZE, std::begin(uuid));
std::string uuidString(reinterpret_cast<char const *>(uuid));
deviceUuids.push_back(uuidString);
}
}
bool WddmSysmanImp::getUuid(std::array<uint8_t, NEO::ProductHelper::uuidSize> &uuid) {
if (getSysmanDeviceImp()->getRootDeviceEnvironment().osInterface != nullptr) {
auto driverModel = getSysmanDeviceImp()->getRootDeviceEnvironment().osInterface->getDriverModel();
if (!this->uuid.isValid) {
NEO::PhysicalDevicePciBusInfo pciBusInfo = driverModel->getPciBusInfo();
this->uuid.isValid = generateUuidFromPciBusInfo(pciBusInfo, this->uuid.id);
}
if (this->uuid.isValid) {
uuid = this->uuid.id;
}
}
return this->uuid.isValid;
}
bool WddmSysmanImp::generateUuidFromPciBusInfo(const NEO::PhysicalDevicePciBusInfo &pciBusInfo, std::array<uint8_t, NEO::ProductHelper::uuidSize> &uuid) {
if (pciBusInfo.pciDomain != NEO::PhysicalDevicePciBusInfo::invalidValue) {
uuid.fill(0);
// Device UUID uniquely identifies a device within a system.
// We generate it based on device information along with PCI information
// This guarantees uniqueness of UUIDs on a system even when multiple
// identical Intel GPUs are present.
//
// We want to have UUID matching between different GPU APIs (including outside
// of compute_runtime project - i.e. other than L0 or OCL). This structure definition
// has been agreed upon by various Intel driver teams.
//
// Consult other driver teams before changing this.
//
struct DeviceUUID {
uint16_t vendorID;
uint16_t deviceID;
uint16_t revisionID;
uint16_t pciDomain;
uint8_t pciBus;
uint8_t pciDev;
uint8_t pciFunc;
uint8_t reserved[4];
uint8_t subDeviceID;
};
auto &hwInfo = getSysmanDeviceImp()->getHardwareInfo();
DeviceUUID deviceUUID = {};
deviceUUID.vendorID = 0x8086; // Intel
deviceUUID.deviceID = hwInfo.platform.usDeviceID;
deviceUUID.revisionID = hwInfo.platform.usRevId;
deviceUUID.pciDomain = static_cast<uint16_t>(pciBusInfo.pciDomain);
deviceUUID.pciBus = static_cast<uint8_t>(pciBusInfo.pciBus);
deviceUUID.pciDev = static_cast<uint8_t>(pciBusInfo.pciDevice);
deviceUUID.pciFunc = static_cast<uint8_t>(pciBusInfo.pciFunction);
deviceUUID.subDeviceID = 0;
static_assert(sizeof(DeviceUUID) == NEO::ProductHelper::uuidSize);
memcpy_s(uuid.data(), NEO::ProductHelper::uuidSize, &deviceUUID, sizeof(DeviceUUID));
return true;
}
return false;
}
OsSysman *OsSysman::create(SysmanDeviceImp *pParentSysmanDeviceImp) {
WddmSysmanImp *pWddmSysmanImp = new WddmSysmanImp(pParentSysmanDeviceImp);
return static_cast<OsSysman *>(pWddmSysmanImp);

View File

@@ -8,6 +8,8 @@
#pragma once
#include "shared/source/helpers/hw_info.h"
#include "shared/source/helpers/non_copyable_or_moveable.h"
#include "shared/source/os_interface/driver_info.h"
#include "shared/source/os_interface/product_helper.h"
#include "level_zero/sysman/source/device/os_sysman.h"
#include "level_zero/sysman/source/device/sysman_device.h"
@@ -36,11 +38,14 @@ class WddmSysmanImp : public OsSysman, NEO::NonCopyableAndNonMovableClass {
void releaseFwUtilInterface();
uint32_t getSubDeviceCount() override;
void getDeviceUuids(std::vector<std::string> &deviceUuids) override;
SysmanDeviceImp *getSysmanDeviceImp();
const NEO::HardwareInfo &getHardwareInfo() const override { return pParentSysmanDeviceImp->getHardwareInfo(); }
PRODUCT_FAMILY getProductFamily() const { return pParentSysmanDeviceImp->getProductFamily(); }
SysmanProductHelper *getSysmanProductHelper();
PlatformMonitoringTech *getSysmanPmt();
bool getUuid(std::array<uint8_t, NEO::ProductHelper::uuidSize> &uuid);
bool generateUuidFromPciBusInfo(const NEO::PhysicalDevicePciBusInfo &pciBusInfo, std::array<uint8_t, NEO::ProductHelper::uuidSize> &uuid);
protected:
FirmwareUtil *pFwUtilInterface = nullptr;
@@ -48,6 +53,10 @@ class WddmSysmanImp : public OsSysman, NEO::NonCopyableAndNonMovableClass {
SysmanDevice *pDevice = nullptr;
std::unique_ptr<PlatformMonitoringTech> pPmt;
std::unique_ptr<SysmanProductHelper> pSysmanProductHelper;
struct {
bool isValid = false;
std::array<uint8_t, NEO::ProductHelper::uuidSize> id;
} uuid;
private:
SysmanDeviceImp *pParentSysmanDeviceImp = nullptr;