mirror of
https://github.com/intel/llvm.git
synced 2026-01-14 03:50:17 +08:00
[Offload] Replace device info queue with a tree (#144050)
Previously, device info was returned as a queue with each element having a "Level" field indicating its nesting level. This replaces this queue with a more traditional tree-like structure. This should not result in a change to the output of `llvm-offload-device-info`.
This commit is contained in:
@@ -229,26 +229,19 @@ Error olGetDeviceInfoImplDetail(ol_device_handle_t Device,
|
||||
|
||||
// Find the info if it exists under any of the given names
|
||||
auto GetInfo = [&](std::vector<std::string> Names) {
|
||||
InfoQueueTy DevInfo;
|
||||
if (Device == HostDevice())
|
||||
return std::string("Host");
|
||||
|
||||
if (!Device->Device)
|
||||
return std::string("");
|
||||
|
||||
if (auto Err = Device->Device->obtainInfoImpl(DevInfo))
|
||||
auto Info = Device->Device->obtainInfoImpl();
|
||||
if (auto Err = Info.takeError())
|
||||
return std::string("");
|
||||
|
||||
for (auto Name : Names) {
|
||||
auto InfoKeyMatches = [&](const InfoQueueTy::InfoQueueEntryTy &Info) {
|
||||
return Info.Key == Name;
|
||||
};
|
||||
auto Item = std::find_if(DevInfo.getQueue().begin(),
|
||||
DevInfo.getQueue().end(), InfoKeyMatches);
|
||||
|
||||
if (Item != std::end(DevInfo.getQueue())) {
|
||||
return Item->Value;
|
||||
}
|
||||
if (auto Entry = Info->get(Name))
|
||||
return (*Entry)->Value;
|
||||
}
|
||||
|
||||
return std::string("");
|
||||
|
||||
@@ -2551,7 +2551,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
|
||||
}
|
||||
|
||||
/// Print information about the device.
|
||||
Error obtainInfoImpl(InfoQueueTy &Info) override {
|
||||
Expected<InfoTreeNode> obtainInfoImpl() override {
|
||||
char TmpChar[1000];
|
||||
const char *TmpCharPtr = "Unknown";
|
||||
uint16_t Major, Minor;
|
||||
@@ -2562,6 +2562,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
|
||||
uint16_t WorkgrpMaxDim[3];
|
||||
hsa_dim3_t GridMaxDim;
|
||||
hsa_status_t Status, Status2;
|
||||
InfoTreeNode Info;
|
||||
|
||||
Status = hsa_system_get_info(HSA_SYSTEM_INFO_VERSION_MAJOR, &Major);
|
||||
Status2 = hsa_system_get_info(HSA_SYSTEM_INFO_VERSION_MINOR, &Minor);
|
||||
@@ -2617,11 +2618,11 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
|
||||
// runtime.
|
||||
Status = getDeviceAttrRaw(HSA_AGENT_INFO_CACHE_SIZE, CacheSize);
|
||||
if (Status == HSA_STATUS_SUCCESS) {
|
||||
Info.add("Cache");
|
||||
auto &Cache = *Info.add("Cache");
|
||||
|
||||
for (int I = 0; I < 4; I++)
|
||||
if (CacheSize[I])
|
||||
Info.add<InfoLevel2>("L" + std::to_string(I), CacheSize[I]);
|
||||
Cache.add("L" + std::to_string(I), CacheSize[I]);
|
||||
}
|
||||
|
||||
Status = getDeviceAttrRaw(HSA_AMD_AGENT_INFO_CACHELINE_SIZE, TmpUInt);
|
||||
@@ -2654,10 +2655,10 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
|
||||
|
||||
Status = getDeviceAttrRaw(HSA_AGENT_INFO_WORKGROUP_MAX_DIM, WorkgrpMaxDim);
|
||||
if (Status == HSA_STATUS_SUCCESS) {
|
||||
Info.add("Workgroup Max Size per Dimension");
|
||||
Info.add<InfoLevel2>("x", WorkgrpMaxDim[0]);
|
||||
Info.add<InfoLevel2>("y", WorkgrpMaxDim[1]);
|
||||
Info.add<InfoLevel2>("z", WorkgrpMaxDim[2]);
|
||||
auto &MaxSize = *Info.add("Workgroup Max Size per Dimension");
|
||||
MaxSize.add("x", WorkgrpMaxDim[0]);
|
||||
MaxSize.add("y", WorkgrpMaxDim[1]);
|
||||
MaxSize.add("z", WorkgrpMaxDim[2]);
|
||||
}
|
||||
|
||||
Status = getDeviceAttrRaw(
|
||||
@@ -2673,17 +2674,17 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
|
||||
|
||||
Status = getDeviceAttrRaw(HSA_AGENT_INFO_GRID_MAX_DIM, GridMaxDim);
|
||||
if (Status == HSA_STATUS_SUCCESS) {
|
||||
Info.add("Grid Max Size per Dimension");
|
||||
Info.add<InfoLevel2>("x", GridMaxDim.x);
|
||||
Info.add<InfoLevel2>("y", GridMaxDim.y);
|
||||
Info.add<InfoLevel2>("z", GridMaxDim.z);
|
||||
auto &MaxDim = *Info.add("Grid Max Size per Dimension");
|
||||
MaxDim.add("x", GridMaxDim.x);
|
||||
MaxDim.add("y", GridMaxDim.y);
|
||||
MaxDim.add("z", GridMaxDim.z);
|
||||
}
|
||||
|
||||
Status = getDeviceAttrRaw(HSA_AGENT_INFO_FBARRIER_MAX_SIZE, TmpUInt);
|
||||
if (Status == HSA_STATUS_SUCCESS)
|
||||
Info.add("Max fbarriers/Workgrp", TmpUInt);
|
||||
|
||||
Info.add("Memory Pools");
|
||||
auto &RootPool = *Info.add("Memory Pools");
|
||||
for (AMDGPUMemoryPoolTy *Pool : AllMemoryPools) {
|
||||
std::string TmpStr, TmpStr2;
|
||||
|
||||
@@ -2698,7 +2699,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
|
||||
else
|
||||
TmpStr = "Unknown";
|
||||
|
||||
Info.add<InfoLevel2>(std::string("Pool ") + TmpStr);
|
||||
auto &PoolNode = *RootPool.add(std::string("Pool ") + TmpStr);
|
||||
|
||||
if (Pool->isGlobal()) {
|
||||
if (Pool->isFineGrained())
|
||||
@@ -2708,39 +2709,39 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
|
||||
if (Pool->supportsKernelArgs())
|
||||
TmpStr2 += "Kernarg ";
|
||||
|
||||
Info.add<InfoLevel3>("Flags", TmpStr2);
|
||||
PoolNode.add("Flags", TmpStr2);
|
||||
}
|
||||
|
||||
Status = Pool->getAttrRaw(HSA_AMD_MEMORY_POOL_INFO_SIZE, TmpSt);
|
||||
if (Status == HSA_STATUS_SUCCESS)
|
||||
Info.add<InfoLevel3>("Size", TmpSt, "bytes");
|
||||
PoolNode.add("Size", TmpSt, "bytes");
|
||||
|
||||
Status = Pool->getAttrRaw(HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED,
|
||||
TmpBool);
|
||||
if (Status == HSA_STATUS_SUCCESS)
|
||||
Info.add<InfoLevel3>("Allocatable", TmpBool);
|
||||
PoolNode.add("Allocatable", TmpBool);
|
||||
|
||||
Status = Pool->getAttrRaw(HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE,
|
||||
TmpSt);
|
||||
if (Status == HSA_STATUS_SUCCESS)
|
||||
Info.add<InfoLevel3>("Runtime Alloc Granule", TmpSt, "bytes");
|
||||
PoolNode.add("Runtime Alloc Granule", TmpSt, "bytes");
|
||||
|
||||
Status = Pool->getAttrRaw(
|
||||
HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALIGNMENT, TmpSt);
|
||||
if (Status == HSA_STATUS_SUCCESS)
|
||||
Info.add<InfoLevel3>("Runtime Alloc Alignment", TmpSt, "bytes");
|
||||
PoolNode.add("Runtime Alloc Alignment", TmpSt, "bytes");
|
||||
|
||||
Status =
|
||||
Pool->getAttrRaw(HSA_AMD_MEMORY_POOL_INFO_ACCESSIBLE_BY_ALL, TmpBool);
|
||||
if (Status == HSA_STATUS_SUCCESS)
|
||||
Info.add<InfoLevel3>("Accessible by all", TmpBool);
|
||||
PoolNode.add("Accessible by all", TmpBool);
|
||||
}
|
||||
|
||||
Info.add("ISAs");
|
||||
auto &ISAs = *Info.add("ISAs");
|
||||
auto Err = hsa_utils::iterateAgentISAs(getAgent(), [&](hsa_isa_t ISA) {
|
||||
Status = hsa_isa_get_info_alt(ISA, HSA_ISA_INFO_NAME, TmpChar);
|
||||
if (Status == HSA_STATUS_SUCCESS)
|
||||
Info.add<InfoLevel2>("Name", TmpChar);
|
||||
ISAs.add("Name", TmpChar);
|
||||
|
||||
return Status;
|
||||
});
|
||||
@@ -2749,7 +2750,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
|
||||
if (Err)
|
||||
consumeError(std::move(Err));
|
||||
|
||||
return Plugin::success();
|
||||
return Info;
|
||||
}
|
||||
|
||||
/// Returns true if auto zero-copy the best configuration for the current
|
||||
|
||||
@@ -112,77 +112,100 @@ private:
|
||||
__tgt_async_info *AsyncInfoPtr;
|
||||
};
|
||||
|
||||
/// The information level represents the level of a key-value property in the
|
||||
/// info tree print (i.e. indentation). The first level should be the default.
|
||||
enum InfoLevelKind { InfoLevel1 = 1, InfoLevel2, InfoLevel3 };
|
||||
/// Tree node for device information
|
||||
///
|
||||
/// This information is either printed or used by liboffload to extract certain
|
||||
/// device queries. Each property has an optional key, an optional value
|
||||
/// and optional children. The children can be used to store additional
|
||||
/// information (such as x, y and z components of ranges).
|
||||
struct InfoTreeNode {
|
||||
static constexpr uint64_t IndentSize = 4;
|
||||
|
||||
/// Class for storing device information and later be printed. An object of this
|
||||
/// type acts as a queue of key-value properties. Each property has a key, a
|
||||
/// a value, and an optional unit for the value. For printing purposes, the
|
||||
/// information can be classified into several levels. These levels are useful
|
||||
/// for defining sections and subsections. Thus, each key-value property also
|
||||
/// has an additional field indicating to which level belongs to. Notice that
|
||||
/// we use the level to determine the indentation of the key-value property at
|
||||
/// printing time. See the enum InfoLevelKind for the list of accepted levels.
|
||||
class InfoQueueTy {
|
||||
public:
|
||||
struct InfoQueueEntryTy {
|
||||
std::string Key;
|
||||
std::string Value;
|
||||
std::string Units;
|
||||
uint64_t Level;
|
||||
};
|
||||
std::string Key;
|
||||
std::string Value;
|
||||
std::string Units;
|
||||
// Need to specify a default value number of elements here as `InfoTreeNode`'s
|
||||
// size is unknown. This is a vector (rather than a Key->Value map) since:
|
||||
// * The keys need to be owned and thus `std::string`s
|
||||
// * The order of keys is important
|
||||
// * The same key can appear multiple times
|
||||
std::unique_ptr<llvm::SmallVector<InfoTreeNode, 8>> Children;
|
||||
|
||||
private:
|
||||
std::deque<InfoQueueEntryTy> Queue;
|
||||
InfoTreeNode() : InfoTreeNode("", "", "") {}
|
||||
InfoTreeNode(std::string Key, std::string Value, std::string Units)
|
||||
: Key(Key), Value(Value), Units(Units) {}
|
||||
|
||||
public:
|
||||
/// Add a new info entry to the queue. The entry requires at least a key
|
||||
/// string in \p Key. The value in \p Value is optional and can be any type
|
||||
/// that is representable as a string. The units in \p Units is optional and
|
||||
/// must be a string. The info level is a template parameter that defaults to
|
||||
/// the first level (top level).
|
||||
template <InfoLevelKind L = InfoLevel1, typename T = std::string>
|
||||
void add(const std::string &Key, T Value = T(),
|
||||
const std::string &Units = std::string()) {
|
||||
/// Add a new info entry as a child of this node. The entry requires at least
|
||||
/// a key string in \p Key. The value in \p Value is optional and can be any
|
||||
/// type that is representable as a string. The units in \p Units is optional
|
||||
/// and must be a string.
|
||||
template <typename T = std::string>
|
||||
InfoTreeNode *add(std::string Key, T Value = T(),
|
||||
const std::string &Units = std::string()) {
|
||||
assert(!Key.empty() && "Invalid info key");
|
||||
|
||||
// Convert the value to a string depending on its type.
|
||||
if (!Children)
|
||||
Children = std::make_unique<llvm::SmallVector<InfoTreeNode, 8>>();
|
||||
|
||||
std::string ValueStr;
|
||||
if constexpr (std::is_same_v<T, bool>)
|
||||
Queue.push_back({Key, Value ? "Yes" : "No", Units, L});
|
||||
ValueStr = Value ? "Yes" : "No";
|
||||
else if constexpr (std::is_arithmetic_v<T>)
|
||||
Queue.push_back({Key, std::to_string(Value), Units, L});
|
||||
ValueStr = std::to_string(Value);
|
||||
else
|
||||
Queue.push_back({Key, Value, Units, L});
|
||||
ValueStr = Value;
|
||||
|
||||
return &Children->emplace_back(Key, ValueStr, Units);
|
||||
}
|
||||
|
||||
const std::deque<InfoQueueEntryTy> &getQueue() const { return Queue; }
|
||||
std::optional<InfoTreeNode *> get(StringRef Key) {
|
||||
if (!Children)
|
||||
return std::nullopt;
|
||||
|
||||
/// Print all info entries added to the queue.
|
||||
auto It = std::find_if(Children->begin(), Children->end(),
|
||||
[&](auto &V) { return V.Key == Key; });
|
||||
if (It == Children->end())
|
||||
return std::nullopt;
|
||||
return It;
|
||||
}
|
||||
|
||||
/// Print all info entries in the tree
|
||||
void print() const {
|
||||
// We print four spances for each level.
|
||||
constexpr uint64_t IndentSize = 4;
|
||||
// Fake an additional indent so that values are offset from the keys
|
||||
doPrint(0, maxKeySize(1));
|
||||
}
|
||||
|
||||
// Find the maximum key length (level + key) to compute the individual
|
||||
// indentation of each entry.
|
||||
uint64_t MaxKeySize = 0;
|
||||
for (const auto &Entry : Queue) {
|
||||
uint64_t KeySize = Entry.Key.size() + Entry.Level * IndentSize;
|
||||
if (KeySize > MaxKeySize)
|
||||
MaxKeySize = KeySize;
|
||||
}
|
||||
|
||||
// Print all info entries.
|
||||
for (const auto &Entry : Queue) {
|
||||
private:
|
||||
void doPrint(int Level, uint64_t MaxKeySize) const {
|
||||
if (Key.size()) {
|
||||
// Compute the indentations for the current entry.
|
||||
uint64_t KeyIndentSize = Entry.Level * IndentSize;
|
||||
uint64_t KeyIndentSize = Level * IndentSize;
|
||||
uint64_t ValIndentSize =
|
||||
MaxKeySize - (Entry.Key.size() + KeyIndentSize) + IndentSize;
|
||||
MaxKeySize - (Key.size() + KeyIndentSize) + IndentSize;
|
||||
|
||||
llvm::outs() << std::string(KeyIndentSize, ' ') << Entry.Key
|
||||
<< std::string(ValIndentSize, ' ') << Entry.Value
|
||||
<< (Entry.Units.empty() ? "" : " ") << Entry.Units << "\n";
|
||||
llvm::outs() << std::string(KeyIndentSize, ' ') << Key
|
||||
<< std::string(ValIndentSize, ' ') << Value
|
||||
<< (Units.empty() ? "" : " ") << Units << "\n";
|
||||
}
|
||||
|
||||
// Print children
|
||||
if (Children)
|
||||
for (const auto &Entry : *Children)
|
||||
Entry.doPrint(Level + 1, MaxKeySize);
|
||||
}
|
||||
|
||||
// Recursively calculates the maximum width of each key, including indentation
|
||||
uint64_t maxKeySize(int Level) const {
|
||||
uint64_t MaxKeySize = 0;
|
||||
|
||||
if (Children)
|
||||
for (const auto &Entry : *Children) {
|
||||
uint64_t KeySize = Entry.Key.size() + Level * IndentSize;
|
||||
MaxKeySize = std::max(MaxKeySize, KeySize);
|
||||
MaxKeySize = std::max(MaxKeySize, Entry.maxKeySize(Level + 1));
|
||||
}
|
||||
|
||||
return MaxKeySize;
|
||||
}
|
||||
};
|
||||
|
||||
@@ -871,7 +894,7 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
|
||||
|
||||
/// Print information about the device.
|
||||
Error printInfo();
|
||||
virtual Error obtainInfoImpl(InfoQueueTy &Info) = 0;
|
||||
virtual Expected<InfoTreeNode> obtainInfoImpl() = 0;
|
||||
|
||||
/// Getters of the grid values.
|
||||
uint32_t getWarpSize() const { return GridValues.GV_Warp_Size; }
|
||||
|
||||
@@ -1578,14 +1578,14 @@ Error GenericDeviceTy::initDeviceInfo(__tgt_device_info *DeviceInfo) {
|
||||
}
|
||||
|
||||
Error GenericDeviceTy::printInfo() {
|
||||
InfoQueueTy InfoQueue;
|
||||
auto Info = obtainInfoImpl();
|
||||
|
||||
// Get the vendor-specific info entries describing the device properties.
|
||||
if (auto Err = obtainInfoImpl(InfoQueue))
|
||||
if (auto Err = Info.takeError())
|
||||
return Err;
|
||||
|
||||
// Print all info entries.
|
||||
InfoQueue.print();
|
||||
Info->print();
|
||||
|
||||
return Plugin::success();
|
||||
}
|
||||
|
||||
@@ -922,11 +922,12 @@ struct CUDADeviceTy : public GenericDeviceTy {
|
||||
}
|
||||
|
||||
/// Print information about the device.
|
||||
Error obtainInfoImpl(InfoQueueTy &Info) override {
|
||||
Expected<InfoTreeNode> obtainInfoImpl() override {
|
||||
char TmpChar[1000];
|
||||
const char *TmpCharPtr;
|
||||
size_t TmpSt;
|
||||
int TmpInt;
|
||||
InfoTreeNode Info;
|
||||
|
||||
CUresult Res = cuDriverGetVersion(&TmpInt);
|
||||
if (Res == CUDA_SUCCESS)
|
||||
@@ -971,27 +972,27 @@ struct CUDADeviceTy : public GenericDeviceTy {
|
||||
if (Res == CUDA_SUCCESS)
|
||||
Info.add("Maximum Threads per Block", TmpInt);
|
||||
|
||||
Info.add("Maximum Block Dimensions", "");
|
||||
auto &MaxBlock = *Info.add("Maximum Block Dimensions", "");
|
||||
Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X, TmpInt);
|
||||
if (Res == CUDA_SUCCESS)
|
||||
Info.add<InfoLevel2>("x", TmpInt);
|
||||
MaxBlock.add("x", TmpInt);
|
||||
Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y, TmpInt);
|
||||
if (Res == CUDA_SUCCESS)
|
||||
Info.add<InfoLevel2>("y", TmpInt);
|
||||
MaxBlock.add("y", TmpInt);
|
||||
Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z, TmpInt);
|
||||
if (Res == CUDA_SUCCESS)
|
||||
Info.add<InfoLevel2>("z", TmpInt);
|
||||
MaxBlock.add("z", TmpInt);
|
||||
|
||||
Info.add("Maximum Grid Dimensions", "");
|
||||
auto &MaxGrid = *Info.add("Maximum Grid Dimensions", "");
|
||||
Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X, TmpInt);
|
||||
if (Res == CUDA_SUCCESS)
|
||||
Info.add<InfoLevel2>("x", TmpInt);
|
||||
MaxGrid.add("x", TmpInt);
|
||||
Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y, TmpInt);
|
||||
if (Res == CUDA_SUCCESS)
|
||||
Info.add<InfoLevel2>("y", TmpInt);
|
||||
MaxGrid.add("y", TmpInt);
|
||||
Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z, TmpInt);
|
||||
if (Res == CUDA_SUCCESS)
|
||||
Info.add<InfoLevel2>("z", TmpInt);
|
||||
MaxGrid.add("z", TmpInt);
|
||||
|
||||
Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_MAX_PITCH, TmpInt);
|
||||
if (Res == CUDA_SUCCESS)
|
||||
@@ -1087,7 +1088,7 @@ struct CUDADeviceTy : public GenericDeviceTy {
|
||||
|
||||
Info.add("Compute Capabilities", ComputeCapability.str());
|
||||
|
||||
return Plugin::success();
|
||||
return Info;
|
||||
}
|
||||
|
||||
virtual bool shouldSetupDeviceMemoryPool() const override {
|
||||
|
||||
@@ -326,9 +326,10 @@ struct GenELF64DeviceTy : public GenericDeviceTy {
|
||||
Error syncEventImpl(void *EventPtr) override { return Plugin::success(); }
|
||||
|
||||
/// Print information about the device.
|
||||
Error obtainInfoImpl(InfoQueueTy &Info) override {
|
||||
Expected<InfoTreeNode> obtainInfoImpl() override {
|
||||
InfoTreeNode Info;
|
||||
Info.add("Device Type", "Generic-elf-64bit");
|
||||
return Plugin::success();
|
||||
return Info;
|
||||
}
|
||||
|
||||
/// This plugin should not setup the device environment or memory pool.
|
||||
|
||||
Reference in New Issue
Block a user