[Offload] Replace device info queue with a tree (#144050)

Previously, device info was returned as a queue with each element having
a "Level" field indicating its nesting level. This replaces this queue
with a more traditional tree-like structure.

This should not result in a change to the output of
`llvm-offload-device-info`.
This commit is contained in:
Ross Brunton
2025-06-13 15:22:47 +01:00
committed by GitHub
parent 0a0960dac6
commit e6a3579653
6 changed files with 122 additions and 103 deletions

View File

@@ -229,26 +229,19 @@ Error olGetDeviceInfoImplDetail(ol_device_handle_t Device,
// Find the info if it exists under any of the given names
auto GetInfo = [&](std::vector<std::string> Names) {
InfoQueueTy DevInfo;
if (Device == HostDevice())
return std::string("Host");
if (!Device->Device)
return std::string("");
if (auto Err = Device->Device->obtainInfoImpl(DevInfo))
auto Info = Device->Device->obtainInfoImpl();
if (auto Err = Info.takeError())
return std::string("");
for (auto Name : Names) {
auto InfoKeyMatches = [&](const InfoQueueTy::InfoQueueEntryTy &Info) {
return Info.Key == Name;
};
auto Item = std::find_if(DevInfo.getQueue().begin(),
DevInfo.getQueue().end(), InfoKeyMatches);
if (Item != std::end(DevInfo.getQueue())) {
return Item->Value;
}
if (auto Entry = Info->get(Name))
return (*Entry)->Value;
}
return std::string("");

View File

@@ -2551,7 +2551,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
}
/// Print information about the device.
Error obtainInfoImpl(InfoQueueTy &Info) override {
Expected<InfoTreeNode> obtainInfoImpl() override {
char TmpChar[1000];
const char *TmpCharPtr = "Unknown";
uint16_t Major, Minor;
@@ -2562,6 +2562,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
uint16_t WorkgrpMaxDim[3];
hsa_dim3_t GridMaxDim;
hsa_status_t Status, Status2;
InfoTreeNode Info;
Status = hsa_system_get_info(HSA_SYSTEM_INFO_VERSION_MAJOR, &Major);
Status2 = hsa_system_get_info(HSA_SYSTEM_INFO_VERSION_MINOR, &Minor);
@@ -2617,11 +2618,11 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
// runtime.
Status = getDeviceAttrRaw(HSA_AGENT_INFO_CACHE_SIZE, CacheSize);
if (Status == HSA_STATUS_SUCCESS) {
Info.add("Cache");
auto &Cache = *Info.add("Cache");
for (int I = 0; I < 4; I++)
if (CacheSize[I])
Info.add<InfoLevel2>("L" + std::to_string(I), CacheSize[I]);
Cache.add("L" + std::to_string(I), CacheSize[I]);
}
Status = getDeviceAttrRaw(HSA_AMD_AGENT_INFO_CACHELINE_SIZE, TmpUInt);
@@ -2654,10 +2655,10 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
Status = getDeviceAttrRaw(HSA_AGENT_INFO_WORKGROUP_MAX_DIM, WorkgrpMaxDim);
if (Status == HSA_STATUS_SUCCESS) {
Info.add("Workgroup Max Size per Dimension");
Info.add<InfoLevel2>("x", WorkgrpMaxDim[0]);
Info.add<InfoLevel2>("y", WorkgrpMaxDim[1]);
Info.add<InfoLevel2>("z", WorkgrpMaxDim[2]);
auto &MaxSize = *Info.add("Workgroup Max Size per Dimension");
MaxSize.add("x", WorkgrpMaxDim[0]);
MaxSize.add("y", WorkgrpMaxDim[1]);
MaxSize.add("z", WorkgrpMaxDim[2]);
}
Status = getDeviceAttrRaw(
@@ -2673,17 +2674,17 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
Status = getDeviceAttrRaw(HSA_AGENT_INFO_GRID_MAX_DIM, GridMaxDim);
if (Status == HSA_STATUS_SUCCESS) {
Info.add("Grid Max Size per Dimension");
Info.add<InfoLevel2>("x", GridMaxDim.x);
Info.add<InfoLevel2>("y", GridMaxDim.y);
Info.add<InfoLevel2>("z", GridMaxDim.z);
auto &MaxDim = *Info.add("Grid Max Size per Dimension");
MaxDim.add("x", GridMaxDim.x);
MaxDim.add("y", GridMaxDim.y);
MaxDim.add("z", GridMaxDim.z);
}
Status = getDeviceAttrRaw(HSA_AGENT_INFO_FBARRIER_MAX_SIZE, TmpUInt);
if (Status == HSA_STATUS_SUCCESS)
Info.add("Max fbarriers/Workgrp", TmpUInt);
Info.add("Memory Pools");
auto &RootPool = *Info.add("Memory Pools");
for (AMDGPUMemoryPoolTy *Pool : AllMemoryPools) {
std::string TmpStr, TmpStr2;
@@ -2698,7 +2699,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
else
TmpStr = "Unknown";
Info.add<InfoLevel2>(std::string("Pool ") + TmpStr);
auto &PoolNode = *RootPool.add(std::string("Pool ") + TmpStr);
if (Pool->isGlobal()) {
if (Pool->isFineGrained())
@@ -2708,39 +2709,39 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
if (Pool->supportsKernelArgs())
TmpStr2 += "Kernarg ";
Info.add<InfoLevel3>("Flags", TmpStr2);
PoolNode.add("Flags", TmpStr2);
}
Status = Pool->getAttrRaw(HSA_AMD_MEMORY_POOL_INFO_SIZE, TmpSt);
if (Status == HSA_STATUS_SUCCESS)
Info.add<InfoLevel3>("Size", TmpSt, "bytes");
PoolNode.add("Size", TmpSt, "bytes");
Status = Pool->getAttrRaw(HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED,
TmpBool);
if (Status == HSA_STATUS_SUCCESS)
Info.add<InfoLevel3>("Allocatable", TmpBool);
PoolNode.add("Allocatable", TmpBool);
Status = Pool->getAttrRaw(HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE,
TmpSt);
if (Status == HSA_STATUS_SUCCESS)
Info.add<InfoLevel3>("Runtime Alloc Granule", TmpSt, "bytes");
PoolNode.add("Runtime Alloc Granule", TmpSt, "bytes");
Status = Pool->getAttrRaw(
HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALIGNMENT, TmpSt);
if (Status == HSA_STATUS_SUCCESS)
Info.add<InfoLevel3>("Runtime Alloc Alignment", TmpSt, "bytes");
PoolNode.add("Runtime Alloc Alignment", TmpSt, "bytes");
Status =
Pool->getAttrRaw(HSA_AMD_MEMORY_POOL_INFO_ACCESSIBLE_BY_ALL, TmpBool);
if (Status == HSA_STATUS_SUCCESS)
Info.add<InfoLevel3>("Accessible by all", TmpBool);
PoolNode.add("Accessible by all", TmpBool);
}
Info.add("ISAs");
auto &ISAs = *Info.add("ISAs");
auto Err = hsa_utils::iterateAgentISAs(getAgent(), [&](hsa_isa_t ISA) {
Status = hsa_isa_get_info_alt(ISA, HSA_ISA_INFO_NAME, TmpChar);
if (Status == HSA_STATUS_SUCCESS)
Info.add<InfoLevel2>("Name", TmpChar);
ISAs.add("Name", TmpChar);
return Status;
});
@@ -2749,7 +2750,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
if (Err)
consumeError(std::move(Err));
return Plugin::success();
return Info;
}
/// Returns true if auto zero-copy the best configuration for the current

View File

@@ -112,77 +112,100 @@ private:
__tgt_async_info *AsyncInfoPtr;
};
/// The information level represents the level of a key-value property in the
/// info tree print (i.e. indentation). The first level should be the default.
enum InfoLevelKind { InfoLevel1 = 1, InfoLevel2, InfoLevel3 };
/// Tree node for device information
///
/// This information is either printed or used by liboffload to extract certain
/// device queries. Each property has an optional key, an optional value
/// and optional children. The children can be used to store additional
/// information (such as x, y and z components of ranges).
struct InfoTreeNode {
static constexpr uint64_t IndentSize = 4;
/// Class for storing device information and later be printed. An object of this
/// type acts as a queue of key-value properties. Each property has a key, a
/// a value, and an optional unit for the value. For printing purposes, the
/// information can be classified into several levels. These levels are useful
/// for defining sections and subsections. Thus, each key-value property also
/// has an additional field indicating to which level belongs to. Notice that
/// we use the level to determine the indentation of the key-value property at
/// printing time. See the enum InfoLevelKind for the list of accepted levels.
class InfoQueueTy {
public:
struct InfoQueueEntryTy {
std::string Key;
std::string Value;
std::string Units;
uint64_t Level;
};
std::string Key;
std::string Value;
std::string Units;
// Need to specify a default value number of elements here as `InfoTreeNode`'s
// size is unknown. This is a vector (rather than a Key->Value map) since:
// * The keys need to be owned and thus `std::string`s
// * The order of keys is important
// * The same key can appear multiple times
std::unique_ptr<llvm::SmallVector<InfoTreeNode, 8>> Children;
private:
std::deque<InfoQueueEntryTy> Queue;
InfoTreeNode() : InfoTreeNode("", "", "") {}
InfoTreeNode(std::string Key, std::string Value, std::string Units)
: Key(Key), Value(Value), Units(Units) {}
public:
/// Add a new info entry to the queue. The entry requires at least a key
/// string in \p Key. The value in \p Value is optional and can be any type
/// that is representable as a string. The units in \p Units is optional and
/// must be a string. The info level is a template parameter that defaults to
/// the first level (top level).
template <InfoLevelKind L = InfoLevel1, typename T = std::string>
void add(const std::string &Key, T Value = T(),
const std::string &Units = std::string()) {
/// Add a new info entry as a child of this node. The entry requires at least
/// a key string in \p Key. The value in \p Value is optional and can be any
/// type that is representable as a string. The units in \p Units is optional
/// and must be a string.
template <typename T = std::string>
InfoTreeNode *add(std::string Key, T Value = T(),
const std::string &Units = std::string()) {
assert(!Key.empty() && "Invalid info key");
// Convert the value to a string depending on its type.
if (!Children)
Children = std::make_unique<llvm::SmallVector<InfoTreeNode, 8>>();
std::string ValueStr;
if constexpr (std::is_same_v<T, bool>)
Queue.push_back({Key, Value ? "Yes" : "No", Units, L});
ValueStr = Value ? "Yes" : "No";
else if constexpr (std::is_arithmetic_v<T>)
Queue.push_back({Key, std::to_string(Value), Units, L});
ValueStr = std::to_string(Value);
else
Queue.push_back({Key, Value, Units, L});
ValueStr = Value;
return &Children->emplace_back(Key, ValueStr, Units);
}
const std::deque<InfoQueueEntryTy> &getQueue() const { return Queue; }
std::optional<InfoTreeNode *> get(StringRef Key) {
if (!Children)
return std::nullopt;
/// Print all info entries added to the queue.
auto It = std::find_if(Children->begin(), Children->end(),
[&](auto &V) { return V.Key == Key; });
if (It == Children->end())
return std::nullopt;
return It;
}
/// Print all info entries in the tree
void print() const {
// We print four spances for each level.
constexpr uint64_t IndentSize = 4;
// Fake an additional indent so that values are offset from the keys
doPrint(0, maxKeySize(1));
}
// Find the maximum key length (level + key) to compute the individual
// indentation of each entry.
uint64_t MaxKeySize = 0;
for (const auto &Entry : Queue) {
uint64_t KeySize = Entry.Key.size() + Entry.Level * IndentSize;
if (KeySize > MaxKeySize)
MaxKeySize = KeySize;
}
// Print all info entries.
for (const auto &Entry : Queue) {
private:
void doPrint(int Level, uint64_t MaxKeySize) const {
if (Key.size()) {
// Compute the indentations for the current entry.
uint64_t KeyIndentSize = Entry.Level * IndentSize;
uint64_t KeyIndentSize = Level * IndentSize;
uint64_t ValIndentSize =
MaxKeySize - (Entry.Key.size() + KeyIndentSize) + IndentSize;
MaxKeySize - (Key.size() + KeyIndentSize) + IndentSize;
llvm::outs() << std::string(KeyIndentSize, ' ') << Entry.Key
<< std::string(ValIndentSize, ' ') << Entry.Value
<< (Entry.Units.empty() ? "" : " ") << Entry.Units << "\n";
llvm::outs() << std::string(KeyIndentSize, ' ') << Key
<< std::string(ValIndentSize, ' ') << Value
<< (Units.empty() ? "" : " ") << Units << "\n";
}
// Print children
if (Children)
for (const auto &Entry : *Children)
Entry.doPrint(Level + 1, MaxKeySize);
}
// Recursively calculates the maximum width of each key, including indentation
uint64_t maxKeySize(int Level) const {
uint64_t MaxKeySize = 0;
if (Children)
for (const auto &Entry : *Children) {
uint64_t KeySize = Entry.Key.size() + Level * IndentSize;
MaxKeySize = std::max(MaxKeySize, KeySize);
MaxKeySize = std::max(MaxKeySize, Entry.maxKeySize(Level + 1));
}
return MaxKeySize;
}
};
@@ -871,7 +894,7 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
/// Print information about the device.
Error printInfo();
virtual Error obtainInfoImpl(InfoQueueTy &Info) = 0;
virtual Expected<InfoTreeNode> obtainInfoImpl() = 0;
/// Getters of the grid values.
uint32_t getWarpSize() const { return GridValues.GV_Warp_Size; }

View File

@@ -1578,14 +1578,14 @@ Error GenericDeviceTy::initDeviceInfo(__tgt_device_info *DeviceInfo) {
}
Error GenericDeviceTy::printInfo() {
InfoQueueTy InfoQueue;
auto Info = obtainInfoImpl();
// Get the vendor-specific info entries describing the device properties.
if (auto Err = obtainInfoImpl(InfoQueue))
if (auto Err = Info.takeError())
return Err;
// Print all info entries.
InfoQueue.print();
Info->print();
return Plugin::success();
}

View File

@@ -922,11 +922,12 @@ struct CUDADeviceTy : public GenericDeviceTy {
}
/// Print information about the device.
Error obtainInfoImpl(InfoQueueTy &Info) override {
Expected<InfoTreeNode> obtainInfoImpl() override {
char TmpChar[1000];
const char *TmpCharPtr;
size_t TmpSt;
int TmpInt;
InfoTreeNode Info;
CUresult Res = cuDriverGetVersion(&TmpInt);
if (Res == CUDA_SUCCESS)
@@ -971,27 +972,27 @@ struct CUDADeviceTy : public GenericDeviceTy {
if (Res == CUDA_SUCCESS)
Info.add("Maximum Threads per Block", TmpInt);
Info.add("Maximum Block Dimensions", "");
auto &MaxBlock = *Info.add("Maximum Block Dimensions", "");
Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X, TmpInt);
if (Res == CUDA_SUCCESS)
Info.add<InfoLevel2>("x", TmpInt);
MaxBlock.add("x", TmpInt);
Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y, TmpInt);
if (Res == CUDA_SUCCESS)
Info.add<InfoLevel2>("y", TmpInt);
MaxBlock.add("y", TmpInt);
Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z, TmpInt);
if (Res == CUDA_SUCCESS)
Info.add<InfoLevel2>("z", TmpInt);
MaxBlock.add("z", TmpInt);
Info.add("Maximum Grid Dimensions", "");
auto &MaxGrid = *Info.add("Maximum Grid Dimensions", "");
Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X, TmpInt);
if (Res == CUDA_SUCCESS)
Info.add<InfoLevel2>("x", TmpInt);
MaxGrid.add("x", TmpInt);
Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y, TmpInt);
if (Res == CUDA_SUCCESS)
Info.add<InfoLevel2>("y", TmpInt);
MaxGrid.add("y", TmpInt);
Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z, TmpInt);
if (Res == CUDA_SUCCESS)
Info.add<InfoLevel2>("z", TmpInt);
MaxGrid.add("z", TmpInt);
Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_MAX_PITCH, TmpInt);
if (Res == CUDA_SUCCESS)
@@ -1087,7 +1088,7 @@ struct CUDADeviceTy : public GenericDeviceTy {
Info.add("Compute Capabilities", ComputeCapability.str());
return Plugin::success();
return Info;
}
virtual bool shouldSetupDeviceMemoryPool() const override {

View File

@@ -326,9 +326,10 @@ struct GenELF64DeviceTy : public GenericDeviceTy {
Error syncEventImpl(void *EventPtr) override { return Plugin::success(); }
/// Print information about the device.
Error obtainInfoImpl(InfoQueueTy &Info) override {
Expected<InfoTreeNode> obtainInfoImpl() override {
InfoTreeNode Info;
Info.add("Device Type", "Generic-elf-64bit");
return Plugin::success();
return Info;
}
/// This plugin should not setup the device environment or memory pool.