[ELF] Add profile guided section layout

This adds profile guided layout using the Call-Chain Clustering (C³) heuristic from https://research.fb.com/wp-content/uploads/2017/01/cgo2017-hfsort-final1.pdf . RFC: [llvm-dev] [RFC] Profile guided section layout http://lists.llvm.org/pipermail/llvm-dev/2017-June/114178.html Pass `--call-graph-ordering-file <file>` to read a call graph profile where each line has the format: <from symbol> <to symbol> <call count> Differential Revision: https://reviews.llvm.org/D36351 llvm-svn: 330234
2026-01-22 07:01:03 +08:00 · 2018-04-17 23:30:05 +00:00
parent 66f1c9a858
commit b842725c1d
13 changed files with 693 additions and 19 deletions
--- a/lld/ELF/CMakeLists.txt
+++ b/lld/ELF/CMakeLists.txt
@@ -19,6 +19,7 @@ add_lld_library(lldELF
  Arch/SPARCV9.cpp
  Arch/X86.cpp
  Arch/X86_64.cpp
+  CallGraphSort.cpp
  Driver.cpp
  DriverUtils.cpp
  EhFrame.cpp
--- a/lld/ELF/CallGraphSort.cpp
+++ b/lld/ELF/CallGraphSort.cpp
@@ -0,0 +1,251 @@
+//===- CallGraphSort.cpp --------------------------------------------------===//
+//
+//                             The LLVM Linker
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// Implementation of Call-Chain Clustering from: Optimizing Function Placement
+/// for Large-Scale Data-Center Applications
+/// https://research.fb.com/wp-content/uploads/2017/01/cgo2017-hfsort-final1.pdf
+///
+/// The goal of this algorithm is to improve runtime performance of the final
+/// executable by arranging code sections such that page table and i-cache
+/// misses are minimized.
+///
+/// Definitions:
+/// * Cluster
+///   * An ordered list of input sections which are layed out as a unit. At the
+///     beginning of the algorithm each input section has its own cluster and
+///     the weight of the cluster is the sum of the weight of all incomming
+///     edges.
+/// * Call-Chain Clustering (C³) Heuristic
+///   * Defines when and how clusters are combined. Pick the highest weighted
+///     input section then add it to its most likely predecessor if it wouldn't
+///     penalize it too much.
+/// * Density
+///   * The weight of the cluster divided by the size of the cluster. This is a
+///     proxy for the ammount of execution time spent per byte of the cluster.
+///
+/// It does so given a call graph profile by the following:
+/// * Build a weighted call graph from the call graph profile
+/// * Sort input sections by weight
+/// * For each input section starting with the highest weight
+///   * Find its most likely predecessor cluster
+///   * Check if the combined cluster would be too large, or would have too low
+///     a density.
+///   * If not, then combine the clusters.
+/// * Sort non-empty clusters by density
+///
+//===----------------------------------------------------------------------===//
+
+#include "CallGraphSort.h"
+#include "OutputSections.h"
+#include "SymbolTable.h"
+#include "Symbols.h"
+
+using namespace llvm;
+using namespace lld;
+using namespace lld::elf;
+
+namespace {
+struct Edge {
+  int From;
+  uint64_t Weight;
+};
+
+struct Cluster {
+  Cluster(int Sec, size_t S) {
+    Sections.push_back(Sec);
+    Size = S;
+  }
+
+  double getDensity() const {
+    if (Size == 0)
+      return 0;
+    return double(Weight) / double(Size);
+  }
+
+  std::vector<int> Sections;
+  size_t Size = 0;
+  uint64_t Weight = 0;
+  uint64_t InitialWeight = 0;
+  std::vector<Edge> Preds;
+};
+
+class CallGraphSort {
+public:
+  CallGraphSort();
+
+  DenseMap<const InputSectionBase *, int> run();
+
+private:
+  std::vector<Cluster> Clusters;
+  std::vector<const InputSectionBase *> Sections;
+
+  void groupClusters();
+};
+
+// Maximum ammount the combined cluster density can be worse than the original
+// cluster to consider merging.
+constexpr int MAX_DENSITY_DEGRADATION = 8;
+
+// Maximum cluster size in bytes.
+constexpr uint64_t MAX_CLUSTER_SIZE = 1024 * 1024;
+} // end anonymous namespace
+
+// Take the edge list in Config->CallGraphProfile, resolve symbol names to
+// Symbols, and generate a graph between InputSections with the provided
+// weights.
+CallGraphSort::CallGraphSort() {
+  llvm::MapVector<std::pair<const InputSectionBase *, const InputSectionBase *>,
+                  uint64_t> &Profile = Config->CallGraphProfile;
+  DenseMap<const InputSectionBase *, int> SecToCluster;
+
+  auto GetOrCreateNode = [&](const InputSectionBase *IS) -> int {
+    auto Res = SecToCluster.insert(std::make_pair(IS, Clusters.size()));
+    if (Res.second) {
+      Sections.push_back(IS);
+      Clusters.emplace_back(Clusters.size(), IS->getSize());
+    }
+    return Res.first->second;
+  };
+
+  // Create the graph.
+  for (const auto &C : Profile) {
+    const auto *FromSB = cast<InputSectionBase>(C.first.first->Repl);
+    const auto *ToSB = cast<InputSectionBase>(C.first.second->Repl);
+    uint64_t Weight = C.second;
+
+    // Ignore edges between input sections belonging to different output
+    // sections.  This is done because otherwise we would end up with clusters
+    // containing input sections that can't actually be placed adjacently in the
+    // output.  This messes with the cluster size and density calculations.  We
+    // would also end up moving input sections in other output sections without
+    // moving them closer to what calls them.
+    if (FromSB->getOutputSection() != ToSB->getOutputSection())
+      continue;
+
+    int From = GetOrCreateNode(FromSB);
+    int To = GetOrCreateNode(ToSB);
+
+    Clusters[To].Weight += Weight;
+
+    if (From == To)
+      continue;
+
+    // Add an edge
+    Clusters[To].Preds.push_back({From, Weight});
+  }
+  for (Cluster &C : Clusters)
+    C.InitialWeight = C.Weight;
+}
+
+// It's bad to merge clusters which would degrade the density too much.
+static bool isNewDensityBad(Cluster &A, Cluster &B) {
+  double NewDensity = double(A.Weight + B.Weight) / double(A.Size + B.Size);
+  if (NewDensity < A.getDensity() / MAX_DENSITY_DEGRADATION)
+    return true;
+  return false;
+}
+
+static void mergeClusters(Cluster &Into, Cluster &From) {
+  Into.Sections.insert(Into.Sections.end(), From.Sections.begin(),
+                       From.Sections.end());
+  Into.Size += From.Size;
+  Into.Weight += From.Weight;
+  From.Sections.clear();
+  From.Size = 0;
+  From.Weight = 0;
+}
+
+// Group InputSections into clusters using the Call-Chain Clustering heuristic
+// then sort the clusters by density.
+void CallGraphSort::groupClusters() {
+  std::vector<int> SortedSecs(Clusters.size());
+  std::vector<Cluster *> SecToCluster(Clusters.size());
+
+  for (int SI = 0, SE = Clusters.size(); SI != SE; ++SI) {
+    SortedSecs[SI] = SI;
+    SecToCluster[SI] = &Clusters[SI];
+  }
+
+  std::stable_sort(SortedSecs.begin(), SortedSecs.end(), [&](int A, int B) {
+    return Clusters[B].getDensity() < Clusters[A].getDensity();
+  });
+
+  for (int SI : SortedSecs) {
+    // Clusters[SI] is the same as SecToClusters[SI] here because it has not
+    // been merged into another cluster yet.
+    Cluster &C = Clusters[SI];
+
+    int BestPred = -1;
+    uint64_t BestWeight = 0;
+
+    for (Edge &E : C.Preds) {
+      if (BestPred == -1 || E.Weight > BestWeight) {
+        BestPred = E.From;
+        BestWeight = E.Weight;
+      }
+    }
+
+    // don't consider merging if the edge is unlikely.
+    if (BestWeight * 10 <= C.InitialWeight)
+      continue;
+
+    Cluster *PredC = SecToCluster[BestPred];
+    if (PredC == &C)
+      continue;
+
+    if (C.Size + PredC->Size > MAX_CLUSTER_SIZE)
+      continue;
+
+    if (isNewDensityBad(*PredC, C))
+      continue;
+
+    // NOTE: Consider using a disjoint-set to track section -> cluster mapping
+    // if this is ever slow.
+    for (int SI : C.Sections)
+      SecToCluster[SI] = PredC;
+
+    mergeClusters(*PredC, C);
+  }
+
+  // Remove empty or dead nodes. Invalidates all cluster indices.
+  Clusters.erase(std::remove_if(Clusters.begin(), Clusters.end(),
+                                [](const Cluster &C) {
+                                  return C.Size == 0 || C.Sections.empty();
+                                }),
+                 Clusters.end());
+
+  // Sort by density.
+  std::sort(Clusters.begin(), Clusters.end(),
+            [](const Cluster &A, const Cluster &B) {
+              return A.getDensity() > B.getDensity();
+            });
+}
+
+DenseMap<const InputSectionBase *, int> CallGraphSort::run() {
+  groupClusters();
+
+  // Generate order.
+  llvm::DenseMap<const InputSectionBase *, int> OrderMap;
+  ssize_t CurOrder = 1;
+
+  for (const Cluster &C : Clusters)
+    for (int SecIndex : C.Sections)
+      OrderMap[Sections[SecIndex]] = CurOrder++;
+
+  return OrderMap;
+}
+
+// Sort sections by the profile data provided by -callgraph-profile-file
+//
+// This first builds a call graph based on the profile data then merges sections
+// according to the C³ huristic. All clusters are then sorted by a density
+// metric to further improve locality.
+DenseMap<const InputSectionBase *, int> elf::computeCallGraphProfileOrder() {
+  return CallGraphSort().run();
+}
--- a/lld/ELF/CallGraphSort.h
+++ b/lld/ELF/CallGraphSort.h
@@ -0,0 +1,23 @@
+//===- CallGraphSort.h ------------------------------------------*- C++ -*-===//
+//
+//                             The LLVM Linker
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLD_ELF_CALL_GRAPH_SORT_H
+#define LLD_ELF_CALL_GRAPH_SORT_H
+
+#include "llvm/ADT/DenseMap.h"
+
+namespace lld {
+namespace elf {
+class InputSectionBase;
+
+llvm::DenseMap<const InputSectionBase *, int> computeCallGraphProfileOrder();
+} // namespace elf
+} // namespace lld
+
+#endif
--- a/lld/ELF/Config.h
+++ b/lld/ELF/Config.h
@@ -24,6 +24,7 @@ namespace lld {
 namespace elf {

 class InputFile;
+class InputSectionBase;

 enum ELFKind {
  ELFNoneKind,
@@ -104,6 +105,9 @@ struct Configuration {
  std::vector<SymbolVersion> VersionScriptGlobals;
  std::vector<SymbolVersion> VersionScriptLocals;
  std::vector<uint8_t> BuildIdVector;
+  llvm::MapVector<std::pair<const InputSectionBase *, const InputSectionBase *>,
+                  uint64_t>
+      CallGraphProfile;
  bool AllowMultipleDefinition;
  bool AndroidPackDynRelocs = false;
  bool ARMHasBlx = false;
--- a/lld/ELF/Driver.cpp
+++ b/lld/ELF/Driver.cpp
@@ -572,6 +572,45 @@ getBuildId(opt::InputArgList &Args) {
  return {BuildIdKind::None, {}};
 }

+static void readCallGraph(MemoryBufferRef MB) {
+  // Build a map from symbol name to section
+  DenseMap<StringRef, const Symbol *> SymbolNameToSymbol;
+  for (InputFile *File : ObjectFiles)
+    for (Symbol *Sym : File->getSymbols())
+      SymbolNameToSymbol[Sym->getName()] = Sym;
+
+  for (StringRef L : args::getLines(MB)) {
+    SmallVector<StringRef, 3> Fields;
+    L.split(Fields, ' ');
+    if (Fields.size() != 3)
+      fatal("parse error");
+    uint64_t Count;
+    if (!to_integer(Fields[2], Count))
+      fatal("parse error");
+    const Symbol *FromSym = SymbolNameToSymbol.lookup(Fields[0]);
+    const Symbol *ToSym = SymbolNameToSymbol.lookup(Fields[1]);
+    if (Config->WarnSymbolOrdering) {
+      if (!FromSym)
+        warn("call graph file: no such symbol: " + Fields[0]);
+      if (!ToSym)
+        warn("call graph file: no such symbol: " + Fields[1]);
+    }
+    if (!FromSym || !ToSym || Count == 0)
+      continue;
+    warnUnorderableSymbol(FromSym);
+    warnUnorderableSymbol(ToSym);
+    const Defined *FromSymD = dyn_cast<Defined>(FromSym);
+    const Defined *ToSymD = dyn_cast<Defined>(ToSym);
+    if (!FromSymD || !ToSymD)
+      continue;
+    const auto *FromSB = dyn_cast_or_null<InputSectionBase>(FromSymD->Section);
+    const auto *ToSB = dyn_cast_or_null<InputSectionBase>(ToSymD->Section);
+    if (!FromSB || !ToSB)
+      continue;
+    Config->CallGraphProfile[std::make_pair(FromSB, ToSB)] += Count;
+  }
+}
+
 static bool getCompressDebugSections(opt::InputArgList &Args) {
  StringRef S = Args.getLastArgValue(OPT_compress_debug_sections, "none");
  if (S == "none")
@@ -1224,6 +1263,11 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &Args) {
  if (Config->ICF)
    doIcf<ELFT>();

+  // Read the callgraph now that we know what was gced or icfed
+  if (auto *Arg = Args.getLastArg(OPT_call_graph_ordering_file))
+    if (Optional<MemoryBufferRef> Buffer = readFile(Arg->getValue()))
+      readCallGraph(*Buffer);
+
  // Write the result to the file.
  writeResult<ELFT>();
 }
--- a/lld/ELF/Options.td
+++ b/lld/ELF/Options.td
@@ -66,6 +66,9 @@ defm as_needed: B<"as-needed",
    "Only set DT_NEEDED for shared libraries if used",
    "Always set DT_NEEDED for shared libraries">;

+defm call_graph_ordering_file: Eq<"call-graph-ordering-file">,
+  HelpText<"Layout sections to optimize the given callgraph">;
+
 // -chroot doesn't have a help text because it is an internal option.
 defm chroot: Eq<"chroot">;

--- a/lld/ELF/Symbols.cpp
+++ b/lld/ELF/Symbols.cpp
@@ -248,6 +248,27 @@ void elf::printTraceSymbol(Symbol *Sym) {
  message(toString(Sym->File) + S + Sym->getName());
 }

+void elf::warnUnorderableSymbol(const Symbol *Sym) {
+  if (!Config->WarnSymbolOrdering)
+    return;
+  const InputFile *File = Sym->File;
+  auto *D = dyn_cast<Defined>(Sym);
+  if (Sym->isUndefined())
+    warn(toString(File) +
+         ": unable to order undefined symbol: " + Sym->getName());
+  else if (Sym->isShared())
+    warn(toString(File) + ": unable to order shared symbol: " + Sym->getName());
+  else if (D && !D->Section)
+    warn(toString(File) +
+         ": unable to order absolute symbol: " + Sym->getName());
+  else if (D && isa<OutputSection>(D->Section))
+    warn(toString(File) +
+         ": unable to order synthetic symbol: " + Sym->getName());
+  else if (D && !D->Section->Repl->Live)
+    warn(toString(File) +
+         ": unable to order discarded symbol: " + Sym->getName());
+}
+
 // Returns a symbol for an error message.
 std::string lld::toString(const Symbol &B) {
  if (Config->Demangle)
--- a/lld/ELF/Symbols.h
+++ b/lld/ELF/Symbols.h
@@ -356,6 +356,8 @@ void replaceSymbol(Symbol *S, ArgT &&... Arg) {
  if (S->Traced)
    printTraceSymbol(S);
 }
+
+void warnUnorderableSymbol(const Symbol *Sym);
 } // namespace elf

 std::string toString(const elf::Symbol &B);
--- a/lld/ELF/Writer.cpp
+++ b/lld/ELF/Writer.cpp
@@ -9,6 +9,7 @@

 #include "Writer.h"
 #include "AArch64ErrataFix.h"
+#include "CallGraphSort.h"
 #include "Config.h"
 #include "Filesystem.h"
 #include "LinkerScript.h"
@@ -1029,6 +1030,10 @@ findOrphanPos(std::vector<BaseCommand *>::iterator B,
 // Builds section order for handling --symbol-ordering-file.
 static DenseMap<const InputSectionBase *, int> buildSectionOrder() {
  DenseMap<const InputSectionBase *, int> SectionOrder;
+  // Use the rarely used option -call-graph-ordering-file to sort sections.
+  if (!Config->CallGraphProfile.empty())
+    return computeCallGraphProfileOrder();
+
  if (Config->SymbolOrderingFile.empty())
    return SectionOrder;

@@ -1053,25 +1058,7 @@ static DenseMap<const InputSectionBase *, int> buildSectionOrder() {
    SymbolOrderEntry &Ent = It->second;
    Ent.Present = true;

-    if (Config->WarnSymbolOrdering) {
-      auto *D = dyn_cast<Defined>(&Sym);
-      InputFile *File = Sym.File;
-      if (Sym.isUndefined())
-        warn(toString(File) +
-             ": unable to order undefined symbol: " + Sym.getName());
-      else if (Sym.isShared())
-        warn(toString(File) +
-             ": unable to order shared symbol: " + Sym.getName());
-      else if (D && !D->Section)
-        warn(toString(File) +
-             ": unable to order absolute symbol: " + Sym.getName());
-      else if (D && isa<OutputSection>(D->Section))
-        warn(toString(File) +
-             ": unable to order synthetic symbol: " + Sym.getName());
-      else if (D && !D->Section->Repl->Live)
-        warn(toString(File) +
-             ": unable to order discarded symbol: " + Sym.getName());
-    }
+    warnUnorderableSymbol(&Sym);

    if (auto *D = dyn_cast<Defined>(&Sym)) {
      if (auto *Sec = dyn_cast_or_null<InputSectionBase>(D->Section)) {
--- a/lld/test/ELF/cgprofile-bad-clusters.s
+++ b/lld/test/ELF/cgprofile-bad-clusters.s
@@ -0,0 +1,71 @@
+# This test checks that CallGraphSort ignores edges that would form "bad"
+# clusters.
+
+# REQUIRES: x86
+
+# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t
+# RUN: echo "A C 1" > %t.call_graph
+# RUN: echo "E B 4" >> %t.call_graph
+# RUN: echo "C D 2" >> %t.call_graph
+# RUN: echo "B D 1" >> %t.call_graph
+# RUN: echo "F G 6" >> %t.call_graph
+# RUN: echo "G H 5" >> %t.call_graph
+# RUN: echo "H I 4" >> %t.call_graph
+# RUN: ld.lld -e A %t --call-graph-ordering-file %t.call_graph -o %t2
+# RUN: llvm-readobj -symbols %t2 | FileCheck %s
+
+    .section    .text.A,"ax",@progbits
+    .globl A
+A:
+    retq
+
+    .section    .text.D,"ax",@progbits
+D:
+    .fill 1000, 1, 0
+
+    .section    .text.E,"ax",@progbits
+E:
+    retq
+
+    .section    .text.C,"ax",@progbits
+C:
+    retq
+
+    .section    .text.B,"ax",@progbits
+B:
+    .fill 1000, 1, 0
+
+    .section    .text.F,"ax",@progbits
+F:
+    .fill (1024 * 1024) - 1, 1, 0
+
+    .section    .text.G,"ax",@progbits
+G:
+    retq
+
+    .section    .text.H,"ax",@progbits
+H:
+    retq
+
+    .section    .text.I,"ax",@progbits
+I:
+    .fill 13, 1, 0
+
+# CHECK:          Name: B
+# CHECK-NEXT:     Value: 0x201011
+# CHECK:          Name: C
+# CHECK-NEXT:     Value: 0x20100F
+# CHECK:          Name: D
+# CHECK-NEXT:     Value: 0x2013F9
+# CHECK:          Name: E
+# CHECK-NEXT:     Value: 0x201010
+# CHECK:          Name: F
+# CHECK-NEXT:     Value: 0x2017E1
+# CHECK:          Name: G
+# CHECK-NEXT:     Value: 0x3017E0
+# CHECK:          Name: H
+# CHECK-NEXT:     Value: 0x201000
+# CHECK:          Name: I
+# CHECK-NEXT:     Value: 0x201001
+# CHECK:          Name: A
+# CHECK-NEXT:     Value: 0x20100E
--- a/lld/test/ELF/cgprofile-icf.s
+++ b/lld/test/ELF/cgprofile-icf.s
@@ -0,0 +1,53 @@
+# REQUIRES: x86
+
+# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t
+
+# RUN: echo "A B 100" > %t.call_graph
+# RUN: echo "A C 40" >> %t.call_graph
+# RUN: echo "C D 61" >> %t.call_graph
+# RUN: ld.lld -e A %t --call-graph-ordering-file %t.call_graph -o %t.out -icf=all
+# RUN: llvm-readobj -symbols %t.out | FileCheck %s
+# RUN: ld.lld -e A %t --call-graph-ordering-file %t.call_graph -o %t2.out
+# RUN: llvm-readobj -symbols %t2.out | FileCheck %s --check-prefix=NOICF
+
+    .section    .text.D,"ax",@progbits
+    .globl  D
+D:
+    mov $60, %rax
+    retq
+
+    .section    .text.C,"ax",@progbits
+    .globl  C
+C:
+    mov $60, %rax
+    retq
+
+    .section    .text.B,"ax",@progbits
+    .globl  B
+B:
+    mov $2, %rax
+    retq
+
+    .section    .text.A,"ax",@progbits
+    .globl  A
+A:
+    mov $42, %rax
+    retq
+
+# CHECK:          Name: A
+# CHECK-NEXT:     Value: 0x201000
+# CHECK:          Name: B
+# CHECK-NEXT:     Value: 0x201010
+# CHECK:          Name: C
+# CHECK-NEXT:     Value: 0x201008
+# CHECK:          Name: D
+# CHECK-NEXT:     Value: 0x201008
+
+# NOICF:          Name: A
+# NOICF-NEXT:     Value: 0x201000
+# NOICF:          Name: B
+# NOICF-NEXT:     Value: 0x201008
+# NOICF:          Name: C
+# NOICF-NEXT:     Value: 0x201010
+# NOICF:          Name: D
+# NOICF-NEXT:     Value: 0x201018
--- a/lld/test/ELF/cgprofile-txt.s
+++ b/lld/test/ELF/cgprofile-txt.s
@@ -0,0 +1,185 @@
+# REQUIRES: x86
+
+# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t
+# RUN: ld.lld -e A %t -o %t2
+# RUN: llvm-readobj -symbols %t2 | FileCheck %s --check-prefix=NOSORT
+
+# RUN: echo "A B 10" > %t.call_graph
+# RUN: echo "A B 10" >> %t.call_graph
+# RUN: echo "Aa B 80" >> %t.call_graph
+# RUN: echo "A C 40" >> %t.call_graph
+# RUN: echo "B C 30" >> %t.call_graph
+# RUN: echo "C D 90" >> %t.call_graph
+# RUN: echo "PP TS 100" >> %t.call_graph
+# RUN: echo "_init2 _init 24567837" >> %t.call_graph
+# RUN: echo "TS QC 9001" >> %t.call_graph
+# RUN: echo "TooManyPreds0 TooManyPreds 10" >> %t.call_graph
+# RUN: echo "TooManyPreds1 TooManyPreds 10" >> %t.call_graph
+# RUN: echo "TooManyPreds2 TooManyPreds 10" >> %t.call_graph
+# RUN: echo "TooManyPreds3 TooManyPreds 10" >> %t.call_graph
+# RUN: echo "TooManyPreds4 TooManyPreds 10" >> %t.call_graph
+# RUN: echo "TooManyPreds5 TooManyPreds 10" >> %t.call_graph
+# RUN: echo "TooManyPreds6 TooManyPreds 10" >> %t.call_graph
+# RUN: echo "TooManyPreds7 TooManyPreds 10" >> %t.call_graph
+# RUN: echo "TooManyPreds8 TooManyPreds 10" >> %t.call_graph
+# RUN: echo "TooManyPreds9 TooManyPreds 10" >> %t.call_graph
+# RUN: echo "TooManyPreds10 TooManyPreds 11" >> %t.call_graph
+# RUN: ld.lld -e A %t --call-graph-ordering-file %t.call_graph -o %t2
+# RUN: llvm-readobj -symbols %t2 | FileCheck %s
+
+    .section    .text.D,"ax",@progbits
+D:
+    retq
+
+    .section    .text.C,"ax",@progbits
+    .globl  C
+C:
+    retq
+
+    .section    .text.B,"ax",@progbits
+    .globl  B
+B:
+    retq
+
+    .section    .text.A,"ax",@progbits
+    .globl  A
+A:
+Aa:
+    retq
+
+    .section    .ponies,"ax",@progbits,unique,1
+    .globl TS
+TS:
+    retq
+
+    .section    .ponies,"ax",@progbits,unique,2
+    .globl PP
+PP:
+    retq
+
+    .section    .other,"ax",@progbits,unique,1
+    .globl QC
+QC:
+    retq
+
+    .section    .other,"ax",@progbits,unique,2
+    .globl GB
+GB:
+    retq
+
+    .section    .init,"ax",@progbits,unique,1
+    .globl _init
+_init:
+    retq
+
+    .section    .init,"ax",@progbits,unique,2
+    .globl _init2
+_init2:
+    retq
+
+    .section    .text.TooManyPreds,"ax",@progbits
+TooManyPreds:
+    retq
+	retq
+	retq
+	retq
+	retq
+	retq
+	retq
+	retq
+	retq
+	retq
+
+    .section    .text.TooManyPreds0,"ax",@progbits
+TooManyPreds0:
+    retq
+
+    .section    .text.TooManyPreds1,"ax",@progbits
+TooManyPreds1:
+    retq
+
+    .section    .text.TooManyPreds2,"ax",@progbits
+TooManyPreds2:
+    retq
+
+    .section    .text.TooManyPreds3,"ax",@progbits
+TooManyPreds3:
+    retq
+
+    .section    .text.TooManyPreds4,"ax",@progbits
+TooManyPreds4:
+    retq
+
+    .section    .text.TooManyPreds5,"ax",@progbits
+TooManyPreds5:
+    retq
+
+    .section    .text.TooManyPreds6,"ax",@progbits
+TooManyPreds6:
+    retq
+
+    .section    .text.TooManyPreds7,"ax",@progbits
+TooManyPreds7:
+    retq
+
+    .section    .text.TooManyPreds8,"ax",@progbits
+TooManyPreds8:
+    retq
+
+    .section    .text.TooManyPreds9,"ax",@progbits
+TooManyPreds9:
+    retq
+
+    .section    .text.TooManyPreds10,"ax",@progbits
+TooManyPreds10:
+    retq
+
+# CHECK:          Name: D
+# CHECK-NEXT:     Value: 0x201003
+# CHECK:          Name: TooManyPreds
+# CHECK-NEXT:     Value: 0x201004
+# CHECK:          Name: TooManyPreds10
+# CHECK-NEXT:     Value: 0x201018
+# CHECK:          Name: A
+# CHECK-NEXT:     Value: 0x201000
+# CHECK:          Name: B
+# CHECK-NEXT:     Value: 0x201001
+# CHECK:          Name: C
+# CHECK-NEXT:     Value: 0x201002
+# CHECK:          Name: GB
+# CHECK-NEXT:     Value: 0x20101F
+# CHECK:          Name: PP
+# CHECK-NEXT:     Value: 0x20101C
+# CHECK:          Name: QC
+# CHECK-NEXT:     Value: 0x20101E
+# CHECK:          Name: TS
+# CHECK-NEXT:     Value: 0x20101D
+# CHECK:          Name: _init
+# CHECK-NEXT:     Value: 0x201020
+# CHECK:          Name: _init2
+# CHECK-NEXT:     Value: 0x201021
+
+# NOSORT:          Name: D
+# NOSORT-NEXT:     Value: 0x201000
+# NOSORT:          Name: TooManyPreds
+# NOSORT-NEXT:     Value: 0x201004
+# NOSORT:          Name: TooManyPreds10
+# NOSORT-NEXT:     Value: 0x201018
+# NOSORT:          Name: A
+# NOSORT-NEXT:     Value: 0x201003
+# NOSORT:          Name: B
+# NOSORT-NEXT:     Value: 0x201002
+# NOSORT:          Name: C
+# NOSORT-NEXT:     Value: 0x201001
+# NOSORT:          Name: GB
+# NOSORT-NEXT:     Value: 0x20101C
+# NOSORT:          Name: PP
+# NOSORT-NEXT:     Value: 0x20101A
+# NOSORT:          Name: QC
+# NOSORT-NEXT:     Value: 0x20101B
+# NOSORT:          Name: TS
+# NOSORT-NEXT:     Value: 0x201019
+# NOSORT:          Name: _init
+# NOSORT-NEXT:     Value: 0x20101D
+# NOSORT:          Name: _init2
+# NOSORT-NEXT:     Value: 0x20101E
--- a/lld/test/ELF/cgprofile-warn.s
+++ b/lld/test/ELF/cgprofile-warn.s
@@ -0,0 +1,29 @@
+# REQUIRES: x86
+
+# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t
+
+# RUN: echo "A B 100" > %t.call_graph
+# RUN: echo "A C 40" >> %t.call_graph
+# RUN: echo "B C 30" >> %t.call_graph
+# RUN: echo "adena A 30" >> %t.call_graph
+# RUN: echo "poppy A 30" >> %t.call_graph
+# RUN: ld.lld -e A %t --call-graph-ordering-file %t.call_graph -o %t.out \
+# RUN:   -noinhibit-exec -icf=all 2>&1 | FileCheck %s
+
+    .section    .text.C,"ax",@progbits
+    .globl  C
+C:
+    mov poppy, %rax
+    retq
+
+B = 0x1234
+
+    .section    .text.A,"ax",@progbits
+    .globl  A
+A:
+    mov poppy, %rax
+    retq
+
+# CHECK: unable to order absolute symbol: B
+# CHECK: call graph file: no such symbol: adena
+# CHECK: unable to order undefined symbol: poppy