[CodeLayout] cache-directed sort: limit max chain size (#69039)

When linking an executable with a slightly larger executable,
ld.lld --call-graph-profile-sort=cdsort can be very slow (see #68638).
```
   4.6%  20.7Mi    .text.hot
   3.5%  15.9Mi    .text
   3.4%  15.2Mi    .text.unknown
```

Add cl option `cdsort-max-chain-size`, which is similar to
`ext-tsp-max-chain-size`, and set it to 128, to improve performance.

In `ld.lld @response.txt --threads=4 --call-graph-profile-sort=cdsort
--time-trace"
builds, the "Total Sort sections" time is measured as follows:

* -mllvm  -cdsort-max-chain-size=64: 1.321813
* -mllvm -cdsort-max-chain-size=128: 2.030425
* -mllvm -cdsort-max-chain-size=256: 2.927684
* -mllvm -cdsort-max-chain-size=512: 5.493106
* unlimited: 9 minutes

The rest part takes 6.8s.
This commit is contained in:
Fangrui Song
2023-10-22 16:50:03 -07:00
committed by GitHub
parent 8511ade505
commit a24418375a
3 changed files with 19 additions and 0 deletions

View File

@@ -65,6 +65,8 @@ struct CDSortConfig {
unsigned CacheEntries = 16;
/// The size of a line in the cache.
unsigned CacheSize = 2048;
/// The maximum size of a chain to create.
unsigned MaxChainSize = 128;
/// The power exponent for the distance-based locality.
double DistancePower = 0.25;
/// The scale factor for the frequency-based locality.

View File

@@ -123,6 +123,10 @@ static cl::opt<unsigned> CacheEntries("cds-cache-entries", cl::ReallyHidden,
static cl::opt<unsigned> CacheSize("cds-cache-size", cl::ReallyHidden,
cl::desc("The size of a line in the cache"));
static cl::opt<unsigned>
CDMaxChainSize("cdsort-max-chain-size", cl::ReallyHidden,
cl::desc("The maximum size of a chain to create"));
static cl::opt<double> DistancePower(
"cds-distance-power", cl::ReallyHidden,
cl::desc("The power exponent for the distance-based locality"));
@@ -1156,6 +1160,9 @@ private:
// Ignore loop edges.
if (Edge->isSelfEdge())
continue;
if (Edge->srcChain()->numBlocks() + Edge->dstChain()->numBlocks() >
Config.MaxChainSize)
continue;
// Compute the gain of merging the two chains.
MergeGainT Gain = getBestMergeGain(Edge);
@@ -1452,6 +1459,8 @@ std::vector<uint64_t> codelayout::computeCacheDirectedLayout(
Config.CacheEntries = CacheEntries;
if (CacheSize.getNumOccurrences() > 0)
Config.CacheSize = CacheSize;
if (CDMaxChainSize.getNumOccurrences() > 0)
Config.MaxChainSize = CDMaxChainSize;
if (DistancePower.getNumOccurrences() > 0)
Config.DistancePower = DistancePower;
if (FrequencyScale.getNumOccurrences() > 0)

View File

@@ -40,6 +40,14 @@ TEST(CodeLayout, HotChain) {
const std::vector<uint64_t> CallOffsets(std::size(Edges), 5);
auto Order = computeCacheDirectedLayout(Sizes, Counts, Edges, CallOffsets);
EXPECT_THAT(Order, ElementsAreArray({0, 3, 4, 2, 1}));
// -cdsort-max-chain-size disables forming a larger chain and therefore may
// change the result.
CDSortConfig Config;
Config.MaxChainSize = 3;
Order =
computeCacheDirectedLayout(Config, Sizes, Counts, Edges, CallOffsets);
EXPECT_THAT(Order, ElementsAreArray({0, 3, 4, 1, 2}));
}
}