[Offload] Add device UID (#164391)

Introduced in OpenMP 6.0, the device UID shall be a unique identifier of
a device on a given system. (Not necessarily a UUID.) Since it is not
guaranteed that the (U)UIDs defined by the device vendor libraries, such
as HSA, do not overlap with those of other vendors, the device UIDs in
offload are always combined with the offload plugin name. In case the
vendor library does not specify any device UID for a given device, we
fall back to the offload-internal device ID.
The device UID can be retrieved using the `llvm-offload-device-info`
tool.
This commit is contained in:
Robert Imschweiler
2025-11-04 20:15:47 +01:00
committed by GitHub
parent 92a1eb3712
commit dc94f2cbad
12 changed files with 91 additions and 6 deletions

View File

@@ -29,6 +29,7 @@ def ol_device_info_t : Enum {
TaggedEtor<"PLATFORM", "ol_platform_handle_t", "the platform associated with the device">,
TaggedEtor<"NAME", "char[]", "Device name">,
TaggedEtor<"PRODUCT_NAME", "char[]", "Device user-facing marketing name">,
TaggedEtor<"UID", "char[]", "Device UID">,
TaggedEtor<"VENDOR", "char[]", "Device vendor">,
TaggedEtor<"DRIVER_VERSION", "char[]", "Driver version">,
TaggedEtor<"MAX_WORK_GROUP_SIZE", "uint32_t", "Maximum total work group size in work items">,

View File

@@ -147,8 +147,8 @@ llvm::Error ol_platform_impl_t::init() {
if (llvm::Error Err = Plugin->initDevice(Id))
return Err;
auto Device = &Plugin->getDevice(Id);
auto Info = Device->obtainInfoImpl();
GenericDeviceTy *Device = &Plugin->getDevice(Id);
llvm::Expected<InfoTreeNode> Info = Device->obtainInfo();
if (llvm::Error Err = Info.takeError())
return Err;
Devices.emplace_back(std::make_unique<ol_device_impl_t>(Id, Device, *this,
@@ -467,6 +467,7 @@ Error olGetDeviceInfoImplDetail(ol_device_handle_t Device,
switch (PropName) {
case OL_DEVICE_INFO_NAME:
case OL_DEVICE_INFO_PRODUCT_NAME:
case OL_DEVICE_INFO_UID:
case OL_DEVICE_INFO_VENDOR:
case OL_DEVICE_INFO_DRIVER_VERSION: {
// String values
@@ -544,6 +545,8 @@ Error olGetDeviceInfoImplDetailHost(ol_device_handle_t Device,
return Info.writeString("Virtual Host Device");
case OL_DEVICE_INFO_PRODUCT_NAME:
return Info.writeString("Virtual Host Device");
case OL_DEVICE_INFO_UID:
return Info.writeString(GenericPluginTy::getHostDeviceUid());
case OL_DEVICE_INFO_VENDOR:
return Info.writeString("Liboffload");
case OL_DEVICE_INFO_DRIVER_VERSION:

View File

@@ -72,6 +72,7 @@ typedef enum hsa_amd_agent_info_s {
HSA_AMD_AGENT_INFO_MAX_WAVES_PER_CU = 0xA00A,
HSA_AMD_AGENT_INFO_NUM_SIMDS_PER_CU = 0xA00B,
HSA_AMD_AGENT_INFO_COOPERATIVE_QUEUES = 0xA010,
HSA_AMD_AGENT_INFO_UUID = 0xA011,
HSA_AMD_AGENT_INFO_TIMESTAMP_FREQUENCY = 0xA016,
} hsa_amd_agent_info_t;

View File

@@ -2083,6 +2083,20 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
return Err;
ComputeUnitKind = GPUName;
// From the ROCm HSA documentation:
// Query the UUID of the agent. The value is an Ascii string with a maximum
// of 21 chars including NUL. The string value consists of two parts: header
// and body. The header identifies the device type (GPU, CPU, DSP) while the
// body encodes the UUID as a 16 digit hex string.
//
// Agents that do not support UUID will return the string "GPU-XX" or
// "CPU-XX" or "DSP-XX" depending on their device type.
char UUID[24] = {0};
if (auto Err = getDeviceAttr(HSA_AMD_AGENT_INFO_UUID, UUID))
return Err;
if (!StringRef(UUID).ends_with("-XX"))
setDeviceUidFromVendorUid(UUID);
// Get the wavefront size.
uint32_t WavefrontSize = 0;
if (auto Err = getDeviceAttr(HSA_AGENT_INFO_WAVEFRONT_SIZE, WavefrontSize))

View File

@@ -791,6 +791,9 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
/// this id is not unique between different plugins; they may overlap.
int32_t getDeviceId() const { return DeviceId; }
/// Get the unique identifier of the device.
const char *getDeviceUid() const { return DeviceUid.c_str(); }
/// Set the context of the device if needed, before calling device-specific
/// functions. Plugins may implement this function as a no-op if not needed.
virtual Error setContext() = 0;
@@ -989,9 +992,12 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
Error syncEvent(void *EventPtr);
virtual Error syncEventImpl(void *EventPtr) = 0;
/// Obtain information about the device.
Expected<InfoTreeNode> obtainInfo();
virtual Expected<InfoTreeNode> obtainInfoImpl() = 0;
/// Print information about the device.
Error printInfo();
virtual Expected<InfoTreeNode> obtainInfoImpl() = 0;
/// Return true if the device has work that is either queued or currently
/// running
@@ -1204,6 +1210,14 @@ protected:
/// global device id and is not the device id visible to the OpenMP user.
const int32_t DeviceId;
/// The unique identifier of the device.
/// Per default, the unique identifier of the device is set to the device id,
/// combined with the plugin name, since the offload device id may overlap
/// between different plugins.
std::string DeviceUid;
/// Construct the device UID from the vendor (U)UID.
void setDeviceUidFromVendorUid(StringRef VendorUid);
/// The default grid values used for this device.
llvm::omp::GV GridValues;
@@ -1290,6 +1304,9 @@ struct GenericPluginTy {
return UserDeviceIds.at(DeviceId);
}
/// Get the UID for the host device.
static constexpr const char *getHostDeviceUid() { return "HOST"; }
/// Get the ELF code to recognize the binary image of this plugin.
virtual uint16_t getMagicElfBits() const = 0;

View File

@@ -715,6 +715,10 @@ GenericDeviceTy::GenericDeviceTy(GenericPluginTy &Plugin, int32_t DeviceId,
DeviceId(DeviceId), GridValues(OMPGridValues),
PeerAccesses(NumDevices, PeerAccessState::PENDING), PeerAccessesLock(),
PinnedAllocs(*this), RPCServer(nullptr) {
// Conservative fall-back to the plugin's device uid for the case that no real
// vendor (u)uid will become available later.
setDeviceUidFromVendorUid(std::to_string(static_cast<uint64_t>(DeviceId)));
#ifdef OMPT_SUPPORT
OmptInitialized.store(false);
// Bind the callbacks to this device's member functions
@@ -1524,15 +1528,22 @@ Error GenericDeviceTy::enqueueHostCall(void (*Callback)(void *), void *UserData,
return Err;
}
Expected<InfoTreeNode> GenericDeviceTy::obtainInfo() {
auto InfoOrErr = obtainInfoImpl();
if (InfoOrErr)
InfoOrErr->add("UID", getDeviceUid(), "", DeviceInfo::UID);
return InfoOrErr;
}
Error GenericDeviceTy::printInfo() {
auto Info = obtainInfoImpl();
auto InfoOrErr = obtainInfo();
// Get the vendor-specific info entries describing the device properties.
if (auto Err = Info.takeError())
if (auto Err = InfoOrErr.takeError())
return Err;
// Print all info entries.
Info->print();
InfoOrErr->print();
return Plugin::success();
}
@@ -1603,6 +1614,10 @@ Expected<bool> GenericDeviceTy::isAccessiblePtr(const void *Ptr, size_t Size) {
return isAccessiblePtrImpl(Ptr, Size);
}
void GenericDeviceTy::setDeviceUidFromVendorUid(StringRef VendorUid) {
DeviceUid = std::string(Plugin.getName()) + "-" + std::string(VendorUid);
}
Error GenericPluginTy::init() {
if (Initialized)
return Plugin::success();

View File

@@ -35,6 +35,7 @@ DLWRAP(cuFuncSetAttribute, 3)
// Device info
DLWRAP(cuDeviceGetName, 3)
DLWRAP(cuDeviceGetUuid, 2)
DLWRAP(cuDeviceTotalMem, 2)
DLWRAP(cuDriverGetVersion, 1)

View File

@@ -33,6 +33,9 @@ typedef struct CUfunc_st *CUfunction;
typedef void (*CUhostFn)(void *userData);
typedef struct CUstream_st *CUstream;
typedef struct CUevent_st *CUevent;
typedef struct CUuuid_st {
char bytes[16];
} CUuuid;
#define CU_DEVICE_INVALID ((CUdevice)(-2))
@@ -301,6 +304,7 @@ CUresult cuFuncSetAttribute(CUfunction, CUfunction_attribute, int);
// Device info
CUresult cuDeviceGetName(char *, int, CUdevice);
CUresult cuDeviceGetUuid(CUuuid *, CUdevice);
CUresult cuDeviceTotalMem(size_t *, CUdevice);
CUresult cuDriverGetVersion(int *);

View File

@@ -25,6 +25,7 @@
#include "PluginInterface.h"
#include "Utils/ELF.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/Frontend/OpenMP/OMPConstants.h"
#include "llvm/Frontend/OpenMP/OMPGridValues.h"
@@ -293,6 +294,12 @@ struct CUDADeviceTy : public GenericDeviceTy {
if (auto Err = Plugin::check(Res, "error in cuDeviceGet: %s"))
return Err;
CUuuid UUID = {0};
Res = cuDeviceGetUuid(&UUID, Device);
if (auto Err = Plugin::check(Res, "error in cuDeviceGetUuid: %s"))
return Err;
setDeviceUidFromVendorUid(toHex(UUID.bytes, true));
// Query the current flags of the primary context and set its flags if
// it is inactive.
unsigned int FormerPrimaryCtxFlags = 0;

View File

@@ -176,6 +176,7 @@ ol_result_t printDevice(std::ostream &S, ol_device_handle_t D) {
printDeviceValue<const char *>(S, D, OL_DEVICE_INFO_NAME, "Name"));
OFFLOAD_ERR(printDeviceValue<const char *>(S, D, OL_DEVICE_INFO_PRODUCT_NAME,
"Product Name"));
OFFLOAD_ERR(printDeviceValue<const char *>(S, D, OL_DEVICE_INFO_UID, "UID"));
OFFLOAD_ERR(
printDeviceValue<ol_device_type_t>(S, D, OL_DEVICE_INFO_TYPE, "Type"));
OFFLOAD_ERR(printDeviceValue<const char *>(

View File

@@ -98,6 +98,16 @@ TEST_P(olGetDeviceInfoTest, SuccessProductName) {
ASSERT_EQ(std::strlen(Name.data()), Size - 1);
}
TEST_P(olGetDeviceInfoTest, SuccessUID) {
size_t Size = 0;
ASSERT_SUCCESS(olGetDeviceInfoSize(Device, OL_DEVICE_INFO_UID, &Size));
ASSERT_GT(Size, 0ul);
std::vector<char> UID;
UID.resize(Size);
ASSERT_SUCCESS(olGetDeviceInfo(Device, OL_DEVICE_INFO_UID, Size, UID.data()));
ASSERT_EQ(std::strlen(UID.data()), Size - 1);
}
TEST_P(olGetDeviceInfoTest, HostProductName) {
size_t Size = 0;
ASSERT_SUCCESS(olGetDeviceInfoSize(Host, OL_DEVICE_INFO_PRODUCT_NAME, &Size));
@@ -109,6 +119,16 @@ TEST_P(olGetDeviceInfoTest, HostProductName) {
ASSERT_EQ(std::strlen(Name.data()), Size - 1);
}
TEST_P(olGetDeviceInfoTest, HostUID) {
size_t Size = 0;
ASSERT_SUCCESS(olGetDeviceInfoSize(Host, OL_DEVICE_INFO_UID, &Size));
ASSERT_GT(Size, 0ul);
std::vector<char> UID;
UID.resize(Size);
ASSERT_SUCCESS(olGetDeviceInfo(Host, OL_DEVICE_INFO_UID, Size, UID.data()));
ASSERT_EQ(std::strlen(UID.data()), Size - 1);
}
TEST_P(olGetDeviceInfoTest, SuccessVendor) {
size_t Size = 0;
ASSERT_SUCCESS(olGetDeviceInfoSize(Device, OL_DEVICE_INFO_VENDOR, &Size));

View File

@@ -32,6 +32,7 @@ OL_DEVICE_INFO_SIZE_TEST_EQ(Platform, ol_platform_handle_t,
OL_DEVICE_INFO_PLATFORM);
OL_DEVICE_INFO_SIZE_TEST_NONZERO(Name, OL_DEVICE_INFO_NAME);
OL_DEVICE_INFO_SIZE_TEST_NONZERO(ProductName, OL_DEVICE_INFO_PRODUCT_NAME);
OL_DEVICE_INFO_SIZE_TEST_NONZERO(UID, OL_DEVICE_INFO_UID);
OL_DEVICE_INFO_SIZE_TEST_NONZERO(Vendor, OL_DEVICE_INFO_VENDOR);
OL_DEVICE_INFO_SIZE_TEST_NONZERO(DriverVersion, OL_DEVICE_INFO_DRIVER_VERSION);
OL_DEVICE_INFO_SIZE_TEST_EQ(MaxWorkGroupSize, uint32_t,