[ELF] Add CPU name detection for CUDA architectures (#75964)

Summary: Recently we added support for detecting the CUDA processor with the ELF flags. This allows us to get a string representation of it in other code. This will be used by the offloading runtime.
2026-01-31 07:27:33 +08:00 · 2023-12-19 20:01:15 -06:00
parent 56414220df
commit deab58d127
2 changed files with 70 additions and 0 deletions
--- a/llvm/include/llvm/Object/ELFObjectFile.h
+++ b/llvm/include/llvm/Object/ELFObjectFile.h
@@ -64,6 +64,7 @@ class ELFObjectFileBase : public ObjectFile {
  SubtargetFeatures getLoongArchFeatures() const;

  StringRef getAMDGPUCPUName() const;
+  StringRef getNVPTXCPUName() const;

 protected:
  ELFObjectFileBase(unsigned int Type, MemoryBufferRef Source);
--- a/llvm/lib/Object/ELFObjectFile.cpp
+++ b/llvm/lib/Object/ELFObjectFile.cpp
@@ -358,6 +358,8 @@ std::optional<StringRef> ELFObjectFileBase::tryGetCPUName() const {
  switch (getEMachine()) {
  case ELF::EM_AMDGPU:
    return getAMDGPUCPUName();
+  case ELF::EM_CUDA:
+    return getNVPTXCPUName();
  case ELF::EM_PPC:
  case ELF::EM_PPC64:
    return StringRef("future");
@@ -517,6 +519,73 @@ StringRef ELFObjectFileBase::getAMDGPUCPUName() const {
  }
 }

+StringRef ELFObjectFileBase::getNVPTXCPUName() const {
+  assert(getEMachine() == ELF::EM_CUDA);
+  unsigned SM = getPlatformFlags() & ELF::EF_CUDA_SM;
+
+  switch (SM) {
+  // Fermi architecture.
+  case ELF::EF_CUDA_SM20:
+    return "sm_20";
+  case ELF::EF_CUDA_SM21:
+    return "sm_21";
+
+  // Kepler architecture.
+  case ELF::EF_CUDA_SM30:
+    return "sm_30";
+  case ELF::EF_CUDA_SM32:
+    return "sm_32";
+  case ELF::EF_CUDA_SM35:
+    return "sm_35";
+  case ELF::EF_CUDA_SM37:
+    return "sm_37";
+
+  // Maxwell architecture.
+  case ELF::EF_CUDA_SM50:
+    return "sm_50";
+  case ELF::EF_CUDA_SM52:
+    return "sm_52";
+  case ELF::EF_CUDA_SM53:
+    return "sm_53";
+
+  // Pascal architecture.
+  case ELF::EF_CUDA_SM60:
+    return "sm_60";
+  case ELF::EF_CUDA_SM61:
+    return "sm_61";
+  case ELF::EF_CUDA_SM62:
+    return "sm_62";
+
+  // Volta architecture.
+  case ELF::EF_CUDA_SM70:
+    return "sm_70";
+  case ELF::EF_CUDA_SM72:
+    return "sm_72";
+
+  // Turing architecture.
+  case ELF::EF_CUDA_SM75:
+    return "sm_75";
+
+  // Ampere architecture.
+  case ELF::EF_CUDA_SM80:
+    return "sm_80";
+  case ELF::EF_CUDA_SM86:
+    return "sm_86";
+  case ELF::EF_CUDA_SM87:
+    return "sm_87";
+
+  // Ada architecture.
+  case ELF::EF_CUDA_SM89:
+    return "sm_89";
+
+  // Hopper architecture.
+  case ELF::EF_CUDA_SM90:
+    return getPlatformFlags() & ELF::EF_CUDA_ACCELERATORS ? "sm_90a" : "sm_90";
+  default:
+    llvm_unreachable("Unknown EF_CUDA_SM value");
+  }
+}
+
 // FIXME Encode from a tablegen description or target parser.
 void ELFObjectFileBase::setARMSubArch(Triple &TheTriple) const {
  if (TheTriple.getSubArch() != Triple::NoSubArch)