[ELF] Optimize RelocationSection<ELFT>::writeTo

When linking a 1.2G output (nearly no debug info, 2846621 dynamic relocations) using `--threads=8`, I measured

```
9.131462 Total ExecuteLinker
1.449913 Total Write output file
1.445784 Total Write sections
0.657152 Write sections {"detail":".rela.dyn"}
```

This change decreases the .rela.dyn time to 0.25, leading to 4% speed up in the total time.

* The parallelSort is slow because of expensive r_sym/r_offset computation. Cache the values.
* The iteration is slow. Move r_sym/r_addend computation ahead of time and parallelize it.

With the change, the new encodeDynamicReloc is cheap (0.05s). So no need to parallelize it.

Reviewed By: ikudrin

Differential Revision: https://reviews.llvm.org/D115993
This commit is contained in:
Fangrui Song
2021-12-21 09:43:44 -08:00
parent 008849d7a5
commit 6683099a0d
3 changed files with 43 additions and 26 deletions

View File

@@ -560,7 +560,7 @@ void OutputSection::checkDynRelAddends(const uint8_t *bufStart) {
if (!sec)
return;
for (const DynamicReloc &rel : sec->relocs) {
int64_t addend = rel.computeAddend();
int64_t addend = rel.addend;
const OutputSection *relOsec = rel.inputSec->getOutputSection();
assert(relOsec != nullptr && "missing output section for relocation");
const uint8_t *relocTarget =

View File

@@ -1653,13 +1653,19 @@ RelrBaseSection::RelrBaseSection()
config->wordsize, ".relr.dyn") {}
template <class ELFT>
static void encodeDynamicReloc(SymbolTableBaseSection *symTab,
typename ELFT::Rela *p,
static void encodeDynamicReloc(typename ELFT::Rela *p,
const DynamicReloc &rel) {
p->r_offset = rel.r_offset;
p->setSymbolAndType(rel.r_sym, rel.type, config->isMips64EL);
if (config->isRela)
p->r_addend = rel.computeAddend();
p->r_offset = rel.getOffset();
p->setSymbolAndType(rel.getSymIndex(symTab), rel.type, config->isMips64EL);
p->r_addend = rel.addend;
}
void DynamicReloc::computeRaw(SymbolTableBaseSection *symtab) {
r_offset = getOffset();
r_sym = getSymIndex(symtab);
addend = computeAddend();
kind = AddendOnly; // Catch errors
}
template <class ELFT>
@@ -1674,20 +1680,21 @@ RelocationSection<ELFT>::RelocationSection(StringRef name, bool sort)
template <class ELFT> void RelocationSection<ELFT>::writeTo(uint8_t *buf) {
SymbolTableBaseSection *symTab = getPartition().dynSymTab;
parallelForEach(relocs,
[symTab](DynamicReloc &rel) { rel.computeRaw(symTab); });
// Sort by (!IsRelative,SymIndex,r_offset). DT_REL[A]COUNT requires us to
// place R_*_RELATIVE first. SymIndex is to improve locality, while r_offset
// is to make results easier to read.
if (sort)
parallelSort(
relocs, [&](const DynamicReloc &a, const DynamicReloc &b) {
return std::make_tuple(a.type != target->relativeRel,
a.getSymIndex(symTab), a.getOffset()) <
std::make_tuple(b.type != target->relativeRel,
b.getSymIndex(symTab), b.getOffset());
});
if (sort) {
const RelType relativeRel = target->relativeRel;
parallelSort(relocs, [&](const DynamicReloc &a, const DynamicReloc &b) {
return std::make_tuple(a.type != relativeRel, a.r_sym, a.r_offset) <
std::make_tuple(b.type != relativeRel, b.r_sym, b.r_offset);
});
}
for (const DynamicReloc &rel : relocs) {
encodeDynamicReloc<ELFT>(symTab, reinterpret_cast<Elf_Rela *>(buf), rel);
encodeDynamicReloc<ELFT>(reinterpret_cast<Elf_Rela *>(buf), rel);
buf += config->isRela ? sizeof(Elf_Rela) : sizeof(Elf_Rel);
}
}
@@ -1765,7 +1772,11 @@ bool AndroidPackedRelocationSection<ELFT>::updateAllocSize() {
for (const DynamicReloc &rel : relocs) {
Elf_Rela r;
encodeDynamicReloc<ELFT>(getPartition().dynSymTab, &r, rel);
r.r_offset = rel.getOffset();
r.setSymbolAndType(rel.getSymIndex(getPartition().dynSymTab), rel.type,
false);
if (config->isRela)
r.r_addend = rel.computeAddend();
if (r.getType(config->isMips64EL) == target->relativeRel)
relatives.push_back(r);

View File

@@ -449,21 +449,21 @@ public:
DynamicReloc(RelType type, const InputSectionBase *inputSec,
uint64_t offsetInSec, Kind kind, Symbol &sym, int64_t addend,
RelExpr expr)
: type(type), sym(&sym), inputSec(inputSec), offsetInSec(offsetInSec),
kind(kind), expr(expr), addend(addend) {}
: sym(&sym), inputSec(inputSec), offsetInSec(offsetInSec), type(type),
addend(addend), kind(kind), expr(expr) {}
/// This constructor records a relative relocation with no symbol.
DynamicReloc(RelType type, const InputSectionBase *inputSec,
uint64_t offsetInSec, int64_t addend = 0)
: type(type), sym(nullptr), inputSec(inputSec), offsetInSec(offsetInSec),
kind(AddendOnly), expr(R_ADDEND), addend(addend) {}
: sym(nullptr), inputSec(inputSec), offsetInSec(offsetInSec), type(type),
addend(addend), kind(AddendOnly), expr(R_ADDEND) {}
/// This constructor records dynamic relocation settings used by the MIPS
/// multi-GOT implementation.
DynamicReloc(RelType type, const InputSectionBase *inputSec,
uint64_t offsetInSec, const OutputSection *outputSec,
int64_t addend)
: type(type), sym(nullptr), inputSec(inputSec), offsetInSec(offsetInSec),
kind(MipsMultiGotPage), expr(R_ADDEND), addend(addend),
outputSec(outputSec) {}
: sym(nullptr), outputSec(outputSec), inputSec(inputSec),
offsetInSec(offsetInSec), type(type), addend(addend),
kind(MipsMultiGotPage), expr(R_ADDEND) {}
uint64_t getOffset() const;
uint32_t getSymIndex(SymbolTableBaseSection *symTab) const;
@@ -476,18 +476,24 @@ public:
/// address/the address of the corresponding GOT entry/etc.
int64_t computeAddend() const;
RelType type;
void computeRaw(SymbolTableBaseSection *symtab);
Symbol *sym;
const OutputSection *outputSec = nullptr;
const InputSectionBase *inputSec;
uint64_t offsetInSec;
uint64_t r_offset;
RelType type;
uint32_t r_sym;
// Initially input addend, then the output addend after
// RelocationSection<ELFT>::writeTo.
int64_t addend;
private:
Kind kind;
// The kind of expression used to calculate the added (required e.g. for
// relative GOT relocations).
RelExpr expr;
int64_t addend;
const OutputSection *outputSec = nullptr;
};
template <class ELFT> class DynamicSection final : public SyntheticSection {