Support for splitting jump tables.

Summary:
Add level for "-jump-tables=<n>" option:
  1 - all jump tables are output in the same section (default).
  2 - basic splitting, if the table is used it is output to hot section
      otherwise to cold one.
  3 - aggressively split compound jump tables and collect profile for
      all entries.

Option "-print-jump-tables" outputs all jump tables for debugging
and/or analyzing purposes. Use with "-jump-tables=3" to get profile
values for every entry in a jump table.

(cherry picked from FBD3912119)
This commit is contained in:
Maksim Panchenko
2016-09-16 15:54:32 -07:00
parent ecc4b9e713
commit 4464861a02
8 changed files with 353 additions and 94 deletions

View File

@@ -30,7 +30,7 @@ bool operator<(const BinaryBasicBlock &LHS, const BinaryBasicBlock &RHS) {
return LHS.Offset < RHS.Offset;
}
MCInst *BinaryBasicBlock::findFirstNonPseudoInstruction() {
MCInst *BinaryBasicBlock::getFirstNonPseudo() {
auto &BC = Function->getBinaryContext();
for (auto &Inst : Instructions) {
if (!BC.MII->get(Inst.getOpcode()).isPseudo())
@@ -39,7 +39,7 @@ MCInst *BinaryBasicBlock::findFirstNonPseudoInstruction() {
return nullptr;
}
MCInst *BinaryBasicBlock::findLastNonPseudoInstruction() {
MCInst *BinaryBasicBlock::getLastNonPseudo() {
auto &BC = Function->getBinaryContext();
for (auto Itr = Instructions.rbegin(); Itr != Instructions.rend(); ++Itr) {
if (!BC.MII->get(Itr->getOpcode()).isPseudo())

View File

@@ -382,11 +382,11 @@ public:
/// Return a pointer to the first non-pseudo instruction in this basic
/// block. Returns nullptr if none exists.
MCInst *findFirstNonPseudoInstruction();
MCInst *getFirstNonPseudo();
/// Return a pointer to the last non-pseudo instruction in this basic
/// block. Returns nullptr if none exists.
MCInst *findLastNonPseudoInstruction();
MCInst *getLastNonPseudo();
/// Set minimum alignment for the basic block.
void setAlignment(uint64_t Align) {

View File

@@ -314,9 +314,8 @@ void BinaryContext::printInstruction(raw_ostream &OS,
}
}
if (MIA->isIndirectBranch(Instruction)) {
auto JTIndex = MIA->getJumpTableIndex(Instruction);
if (JTIndex != -1LL) {
OS << " # JUMPTABLE " << JTIndex;
if (auto JTAddress = MIA->getJumpTable(Instruction)) {
OS << " # JUMPTABLE @0x" << Twine::utohexstr(JTAddress);
}
}

View File

@@ -36,17 +36,34 @@
#define DEBUG_TYPE "bolt"
using namespace llvm;
using namespace bolt;
namespace opts {
extern cl::opt<unsigned> Verbosity;
extern cl::opt<bool> PrintDynoStats;
static cl::opt<bool>
static cl::opt<BinaryFunction::JumpTableSupportLevel>
JumpTables("jump-tables",
cl::desc("enable jump table support (experimental)"),
cl::desc("jump tables support"),
cl::init(BinaryFunction::JTS_NONE),
cl::values(clEnumValN(BinaryFunction::JTS_NONE, "0",
"do not optimize functions with jump tables"),
clEnumValN(BinaryFunction::JTS_BASIC, "1",
"optimize functions with jump tables"),
clEnumValN(BinaryFunction::JTS_SPLIT, "2",
"split jump tables into hot and cold"),
clEnumValN(BinaryFunction::JTS_AGGRESSIVE, "3",
"aggressively split jump tables (unsafe)"),
clEnumValEnd),
cl::ZeroOrMore);
static cl::opt<bool>
PrintJumpTables("print-jump-tables",
cl::desc("print jump tables"),
cl::ZeroOrMore,
cl::Hidden);
static cl::opt<bool>
AgressiveSplitting("split-all-cold",
cl::desc("outline as many cold basic blocks as possible"),
@@ -235,6 +252,7 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation,
OS << "\n Id Fun Addr : 0x" << Twine::utohexstr(IdenticalFunctionAddress);
if (opts::PrintDynoStats && !BasicBlocksLayout.empty()) {
OS << '\n';
DynoStats dynoStats = getDynoStats();
OS << dynoStats;
}
@@ -357,14 +375,9 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation,
OS << '\n';
}
for(unsigned Index = 0; Index < JumpTables.size(); ++Index) {
const auto &JumpTable = JumpTables[Index];
OS << "Jump Table #" << (Index + 1) << '\n';
for (unsigned EIndex = 0; EIndex < JumpTable.Entries.size(); ++EIndex) {
const auto *Entry = JumpTable.Entries[EIndex];
OS << " entry " << EIndex << ": " << Entry->getName() << '\n';
}
OS << '\n';
// Print all jump tables.
for (auto &JTI : JumpTables) {
JTI.second.print(OS);
}
OS << "DWARF CFI Instructions:\n";
@@ -373,7 +386,8 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation,
for (auto &Elmt : OffsetToCFI) {
OS << format(" %08x:\t", Elmt.first);
assert(Elmt.second < FrameInstructions.size() && "Incorrect CFI offset");
BinaryContext::printCFI(OS, FrameInstructions[Elmt.second].getOperation());
BinaryContext::printCFI(OS,
FrameInstructions[Elmt.second].getOperation());
OS << "\n";
}
} else {
@@ -523,10 +537,30 @@ bool BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
SegRegNum != bolt::NoRegister || ScaleValue != PtrSize)
return IndirectBranchType::UNKNOWN;
auto ArrayStart = DispValue;
auto ArrayStart = static_cast<uint64_t>(DispValue);
if (BaseRegNum == RIPRegister)
ArrayStart += getAddress() + Offset + Size;
// Check if there's already a jump table registered at this address.
if (auto *JT = getJumpTableContainingAddress(ArrayStart)) {
auto JTOffset = ArrayStart - JT->Address;
// Get or create a label.
auto LI = JT->Labels.find(JTOffset);
if (LI == JT->Labels.end()) {
auto *JTStartLabel = BC.Ctx->createTempSymbol("JUMP_TABLE", true);
auto Result = JT->Labels.emplace(JTOffset, JTStartLabel);
assert(Result.second && "error adding jump table label");
LI = Result.first;
}
BC.MIA->replaceMemOperandDisp(*MemLocInstr, LI->second, BC.Ctx.get());
BC.MIA->setJumpTable(Instruction, ArrayStart);
JTSites.emplace_back(Offset, ArrayStart);
return IndirectBranchType::POSSIBLE_JUMP_TABLE;
}
auto SectionOrError = BC.getSectionForAddress(ArrayStart);
if (!SectionOrError) {
// No section - possibly an absolute address. Since we don't allow
@@ -552,6 +586,7 @@ bool BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
uint64_t Value = 0;
auto Result = IndirectBranchType::UNKNOWN;
std::vector<MCSymbol *> JTLabelCandidates;
std::vector<uint64_t> JTOffsetCandidates;
while (ValueOffset <= Section.getSize() - PtrSize) {
DEBUG(dbgs() << "BOLT-DEBUG: indirect jmp at 0x"
<< Twine::utohexstr(getAddress() + Offset)
@@ -565,7 +600,7 @@ bool BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
// Is it possible to have a jump table with function start as an entry?
auto *JTEntry = getOrCreateLocalLabel(Value);
JTLabelCandidates.push_back(JTEntry);
TakenBranches.emplace_back(Offset, Value - getAddress());
JTOffsetCandidates.push_back(Value - getAddress());
Result = IndirectBranchType::POSSIBLE_JUMP_TABLE;
continue;
}
@@ -577,19 +612,26 @@ bool BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
break;
}
JTLabelCandidates.push_back(getFunctionEndLabel());
JTOffsetCandidates.push_back(Value - getAddress());
}
if (Result == IndirectBranchType::POSSIBLE_JUMP_TABLE) {
assert(JTLabelCandidates.size() > 2 &&
"expected more than 2 jump table entries");
auto *JTStartLabel = BC.Ctx->createTempSymbol("JUMP_TABLE", true);
JumpTables.emplace_back(JumpTable{JTStartLabel,
std::move(JTLabelCandidates)});
BC.MIA->replaceMemOperandDisp(*MemLocInstr, JTStartLabel, BC.Ctx.get());
BC.MIA->setJumpTableIndex(Instruction, JumpTables.size());
DEBUG(dbgs() << "BOLT-DEBUG: creating jump table "
<< JTStartLabel->getName()
<< " in function " << *this << " with "
<< JTLabelCandidates.size() << " entries.\n");
JumpTables.emplace(ArrayStart, JumpTable{ArrayStart,
PtrSize,
std::move(JTLabelCandidates),
std::move(JTOffsetCandidates),
{{0, JTStartLabel}}});
BC.MIA->replaceMemOperandDisp(*MemLocInstr, JTStartLabel, BC.Ctx.get());
BC.MIA->setJumpTable(Instruction, ArrayStart);
JTSites.emplace_back(Offset, ArrayStart);
return Result;
}
BC.InterproceduralReferences.insert(Value);
@@ -727,7 +769,7 @@ bool BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
MIA->convertJmpToTailCall(Instruction);
break;
case IndirectBranchType::POSSIBLE_JUMP_TABLE:
if (!opts::JumpTables)
if (opts::JumpTables == JTS_NONE)
IsSimple = false;
break;
case IndirectBranchType::UNKNOWN:
@@ -771,6 +813,40 @@ bool BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
Offset += Size;
}
// Update TakenBranches from JumpTables.
//
// We want to do it after initial processing since we don't know jump tables
// boundaries until we process them all.
for (auto &JTSite : JTSites) {
auto JTSiteOffset = JTSite.first;
auto JTAddress = JTSite.second;
auto *JT = getJumpTableContainingAddress(JTAddress);
assert(JT && "cannot find jump table for address");
uint32_t EI = (JTAddress - JT->Address) / JT->EntrySize;
while (EI < JT->Entries.size()) {
auto TargetOffset = JT->OffsetEntries[EI];
if (TargetOffset < getSize())
TakenBranches.emplace_back(JTSiteOffset, TargetOffset);
++EI;
// A label at the next entry means the end of this jump table.
if (JT->Labels.count(EI * JT->EntrySize))
break;
}
}
// Free memory used by jump table offsets.
for (auto &JTI : JumpTables) {
auto &JT = JTI.second;
clearList(JT.OffsetEntries);
}
// Remove duplicates branches. We can get a bunch of them from jump tables.
// Without doing jump table value profiling we don't have use for extra
// (duplicate) branches.
std::sort(TakenBranches.begin(), TakenBranches.end());
auto NewEnd = std::unique(TakenBranches.begin(), TakenBranches.end());
TakenBranches.erase(NewEnd, TakenBranches.end());
// TODO: clear memory if not simple function?
// Update state.
@@ -793,7 +869,7 @@ bool BinaryFunction::postProcessIndirectBranches() {
}
// Validate the tail call assumptions.
if (BC.MIA->isTailCall(Instr) || (BC.MIA->getJumpTableIndex(Instr) > 0)) {
if (BC.MIA->isTailCall(Instr) || BC.MIA->getJumpTable(Instr)) {
if (BC.MIA->getMemoryOperandNo(Instr) != -1) {
// We have validated memory contents addressed by the jump
// instruction already.
@@ -1063,6 +1139,34 @@ bool BinaryFunction::buildCFG() {
} else {
const BranchInfo &BInfo = BranchInfoOrErr.get();
FromBB->addSuccessor(ToBB, BInfo.Branches, BInfo.Mispreds);
// Populate profile counts for the jump table.
auto *LastInstr = FromBB->getLastNonPseudo();
if (!LastInstr)
continue;
auto JTAddress = BC.MIA->getJumpTable(*LastInstr);
if (!JTAddress)
continue;
auto *JT = getJumpTableContainingAddress(JTAddress);
if (!JT)
continue;
JT->Count += BInfo.Branches;
if (opts::JumpTables < JTS_AGGRESSIVE)
continue;
if (JT->Counts.empty())
JT->Counts.resize(JT->Entries.size());
auto EI = JT->Entries.begin();
auto Delta = (JTAddress - JT->Address) / JT->EntrySize;
EI += Delta;
while (EI != JT->Entries.end()) {
if (ToBB->getLabel() == *EI) {
JT->Counts[Delta] += BInfo.Branches;
}
++Delta;
++EI;
// A label marks the start of another jump table.
if (JT->Labels.count(Delta * JT->EntrySize))
break;
}
}
}
}
@@ -1311,7 +1415,7 @@ void BinaryFunction::evaluateProfileData(const FuncBranchData &BranchData) {
(float) (LocalProfileBranches.size() - OrphanBranches.size()) /
(float) LocalProfileBranches.size();
if (opts::Verbosity >= 2 && !OrphanBranches.empty()) {
if (opts::Verbosity >= 1 && !OrphanBranches.empty()) {
errs() << "BOLT-WARNING: profile branches match only "
<< format("%.1f%%", ProfileMatchRatio * 100.0f) << " ("
<< (LocalProfileBranches.size() - OrphanBranches.size()) << '/'
@@ -1322,6 +1426,7 @@ void BinaryFunction::evaluateProfileData(const FuncBranchData &BranchData) {
errs() << "\t0x" << Twine::utohexstr(OBranch.first) << " -> 0x"
<< Twine::utohexstr(OBranch.second) << " (0x"
<< Twine::utohexstr(OBranch.first + getAddress()) << " -> 0x"
<< Twine::utohexstr(OBranch.second + getAddress()) << ")\n";
);
}
}
@@ -1868,8 +1973,8 @@ void BinaryFunction::dumpGraph(raw_ostream& OS) const {
CondBranch,
UncondBranch);
const auto *LastInstr = BB->findLastNonPseudoInstruction();
const bool IsJumpTable = LastInstr && BC.MIA->getJumpTableIndex(*LastInstr) > 0;
const auto *LastInstr = BB->getLastNonPseudo();
const bool IsJumpTable = LastInstr && BC.MIA->getJumpTable(*LastInstr);
auto BI = BB->branch_info_begin();
for (auto *Succ : BB->successors()) {
@@ -2551,18 +2656,92 @@ BinaryFunction::~BinaryFunction() {
void BinaryFunction::emitJumpTables(MCStreamer *Streamer) {
if (JumpTables.empty())
return;
Streamer->SwitchSection(BC.MOFI->getReadOnlySection());
for (auto &JumpTable : JumpTables) {
DEBUG(dbgs() << "BOLT-DEBUG: emitting jump table "
<< JumpTable.StartLabel->getName() << '\n');
Streamer->EmitLabel(JumpTable.StartLabel);
// TODO (#9806207): based on jump table type (PIC vs non-PIC etc.)
// we would need to emit different references.
for (auto *Entry : JumpTable.Entries) {
Streamer->EmitSymbolValue(Entry, BC.AsmInfo->getPointerSize());
}
if (opts::PrintJumpTables) {
outs() << "BOLT-INFO: jump tables for function " << *this << ":\n";
}
for (auto &JTI : JumpTables) {
auto &JT = JTI.second;
if (opts::PrintJumpTables)
JT.print(outs());
JT.emit(Streamer,
BC.MOFI->getReadOnlySection(),
BC.MOFI->getReadOnlyColdSection());
}
}
// TODO (#9806207): based on jump table type (PIC vs non-PIC etc.) we will
// need to emit different references.
uint64_t BinaryFunction::JumpTable::emit(MCStreamer *Streamer,
MCSection *HotSection,
MCSection *ColdSection) {
// Pre-process entries for aggressive splitting.
// Each label represents a separate switch table and gets its own count
// determining its destination.
std::map<MCSymbol *, uint64_t> LabelCounts;
if (opts::JumpTables > JTS_SPLIT && !Counts.empty()) {
MCSymbol *CurrentLabel = Labels[0];
uint64_t CurrentLabelCount = 0;
for (unsigned Index = 0; Index < Entries.size(); ++Index) {
auto LI = Labels.find(Index * EntrySize);
if (LI != Labels.end()) {
LabelCounts[CurrentLabel] = CurrentLabelCount;
CurrentLabel = LI->second;
CurrentLabelCount = 0;
}
CurrentLabelCount += Counts[Index];
}
LabelCounts[CurrentLabel] = CurrentLabelCount;
} else {
Streamer->SwitchSection(Count > 0 ? HotSection : ColdSection);
Streamer->EmitValueToAlignment(EntrySize);
}
uint64_t Offset = 0;
for (auto *Entry : Entries) {
auto LI = Labels.find(Offset);
if (LI != Labels.end()) {
DEBUG(dbgs() << "BOLT-DEBUG: emitting jump table "
<< LI->second->getName() << " (originally was at address 0x"
<< Twine::utohexstr(Address + Offset)
<< (Offset ? "as part of larger jump table\n" : "\n"));
if (!LabelCounts.empty()) {
DEBUG(dbgs() << "BOLT-DEBUG: jump table count: "
<< LabelCounts[LI->second] << '\n');
if (LabelCounts[LI->second] > 0) {
Streamer->SwitchSection(HotSection);
} else {
Streamer->SwitchSection(ColdSection);
}
Streamer->EmitValueToAlignment(EntrySize);
}
Streamer->EmitLabel(LI->second);
}
Streamer->EmitSymbolValue(Entry, EntrySize);
Offset += EntrySize;
}
return Offset;
}
void BinaryFunction::JumpTable::print(raw_ostream &OS) const {
uint64_t Offset = 0;
for (const auto *Entry : Entries) {
auto LI = Labels.find(Offset);
if (LI != Labels.end()) {
OS << "Jump Table " << LI->second->getName() << " at @0x"
<< Twine::utohexstr(Address+Offset);
if (Offset) {
OS << " (possibly part of larger jump table):\n";
} else {
OS << " with total count of " << Count << ":\n";
}
}
OS << format(" 0x%04" PRIx64 " : ", Offset) << Entry->getName();
if (!Counts.empty())
OS << " : " << Counts[Offset / EntrySize];
OS << '\n';
Offset += EntrySize;
}
OS << "\n\n";
}
void BinaryFunction::calculateLoopInfo() {
@@ -2738,8 +2917,8 @@ DynoStats BinaryFunction::getDynoStats() const {
Stats[DynoStats::INSTRUCTIONS] += BB->getNumNonPseudos() * BBExecutionCount;
// Jump tables.
const auto *LastInstr = BB->findLastNonPseudoInstruction();
if (BC.MIA->getJumpTableIndex(*LastInstr) > 0) {
const auto *LastInstr = BB->getLastNonPseudo();
if (BC.MIA->getJumpTable(*LastInstr)) {
Stats[DynoStats::JUMP_TABLE_BRANCHES] += BBExecutionCount;
DEBUG(
static uint64_t MostFrequentJT;

View File

@@ -185,6 +185,13 @@ public:
LT_OPTIMIZE_SHUFFLE,
};
enum JumpTableSupportLevel : char {
JTS_NONE = 0, /// Disable jump tables support
JTS_BASIC = 1, /// Enable basic jump tables support
JTS_SPLIT = 2, /// Enable hot/cold splitting of jump tables
JTS_AGGRESSIVE = 3, /// Aggressive splitting of jump tables
};
static constexpr uint64_t COUNT_NO_PROFILE =
std::numeric_limits<uint64_t>::max();
// Function size, in number of BBs, above which we fallback to a heuristic
@@ -429,11 +436,76 @@ private:
CFIInstrMapType CIEFrameInstructions;
/// Representation of a jump table.
///
/// The jump table may include other jump tables that are referenced by
/// a different label at a different offset in this jump table.
struct JumpTable {
MCSymbol *StartLabel;
/// Original address.
uint64_t Address;
/// Size of the entry used for storage.
std::size_t EntrySize;
/// All the entries as labels.
std::vector<MCSymbol *> Entries;
/// All the entries as offsets into a function. Invalid after CFG is built.
std::vector<uint64_t> OffsetEntries;
/// Map <Offset> -> <Label> used for embedded jump tables. Label at 0 offset
/// is the main label for the jump table.
std::map<unsigned, MCSymbol *> Labels;
/// Return the size of the jump table.
uint64_t getSize() const {
return Entries.size() * EntrySize;
}
/// Constructor.
JumpTable(uint64_t Address,
std::size_t EntrySize,
decltype(Entries) &&Entries,
decltype(OffsetEntries) &&OffsetEntries,
decltype(Labels) &&Labels)
: Address(Address), EntrySize(EntrySize), Entries(Entries),
OffsetEntries(OffsetEntries), Labels(Labels)
{}
/// Dynamic number of times each entry in the table was referenced.
/// Identical entries will have a shared count (identical for every
/// entry in the set).
std::vector<uint64_t> Counts;
/// Total number of times this jump table was used.
uint64_t Count{0};
/// Emit jump table data. Callee supplies sections for the data.
/// Return the number of total bytes emitted.
uint64_t emit(MCStreamer *Streamer, MCSection *HotSection,
MCSection *ColdSection);
/// Print for debugging purposes.
void print(raw_ostream &OS) const;
};
std::vector<JumpTable> JumpTables;
/// All compound jump tables for this function.
/// <OriginalAddress> -> <JumpTable>
std::map<uint64_t, JumpTable> JumpTables;
/// Return jump table that covers a given \p Address in memory.
JumpTable *getJumpTableContainingAddress(uint64_t Address) {
auto JTI = JumpTables.upper_bound(Address);
if (JTI == JumpTables.begin())
return nullptr;
--JTI;
if (JTI->first + JTI->second.getSize() > Address) {
return &JTI->second;
}
return nullptr;
}
/// All jump table sites in the function.
std::vector<std::pair<uint64_t, uint64_t>> JTSites;
// Blocks are kept sorted in the layout order. If we need to change the
// layout (if BasicBlocksLayout stores a different order than BasicBlocks),

View File

@@ -150,7 +150,7 @@ void OptimizeBodylessFunctions::analyze(
if (BF.size() != 1 || BF.front().getNumNonPseudos() != 1)
return;
const auto *FirstInstr = BF.front().findFirstNonPseudoInstruction();
const auto *FirstInstr = BF.front().getFirstNonPseudo();
if (!FirstInstr)
return;
if (!BC.MIA->isTailCall(*FirstInstr))
@@ -917,7 +917,7 @@ bool SimplifyConditionalTailCalls::fixTailCalls(BinaryContext &BC,
if (BB->getNumNonPseudos() != 1)
continue;
auto *Instr = BB->findFirstNonPseudoInstruction();
auto *Instr = BB->getFirstNonPseudo();
if (!MIA->isTailCall(*Instr))
continue;
auto *CalleeSymbol = MIA->getTargetSymbol(*Instr);
@@ -1040,7 +1040,7 @@ void Peepholes::fixDoubleJumps(BinaryContext &BC,
} else {
// Succ will be null in the tail call case. In this case we
// need to explicitly add a tail call instruction.
auto *Branch = Pred->findLastNonPseudoInstruction();
auto *Branch = Pred->getLastNonPseudo();
if (Branch && BC.MIA->isUnconditionalBranch(*Branch)) {
Pred->removeSuccessor(&BB);
Pred->eraseInstruction(Branch);
@@ -1060,7 +1060,7 @@ void Peepholes::fixDoubleJumps(BinaryContext &BC,
if (BB.getNumNonPseudos() != 1 || BB.isLandingPad())
continue;
auto *Inst = BB.findFirstNonPseudoInstruction();
auto *Inst = BB.getFirstNonPseudo();
const bool IsTailCall = BC.MIA->isTailCall(*Inst);
if (!BC.MIA->isUnconditionalBranch(*Inst) && !IsTailCall)
@@ -1090,7 +1090,7 @@ void Peepholes::fixDoubleJumps(BinaryContext &BC,
void Peepholes::addTailcallTraps(BinaryContext &BC,
BinaryFunction &Function) {
for (auto &BB : Function) {
auto *Inst = BB.findLastNonPseudoInstruction();
auto *Inst = BB.getLastNonPseudo();
if (Inst && BC.MIA->isTailCall(*Inst) && BC.MIA->isIndirectBranch(*Inst)) {
MCInst Trap;
if (BC.MIA->createTrap(Trap)) {

View File

@@ -1547,33 +1547,27 @@ void RewriteInstance::emitFunctions() {
}
// Map special sections to their addresses in the output image.
//
// TODO: perhaps we should process all the allocated sections here?
std::vector<std::string> Sections = { ".eh_frame", ".gcc_except_table",
".rodata" };
".rodata", ".rodata.cold" };
for (auto &SectionName : Sections) {
auto SMII = EFMM->SectionMapInfo.find(SectionName);
if (SMII != EFMM->SectionMapInfo.end()) {
SectionInfo &SI = SMII->second;
NextAvailableAddress = RoundUpToAlignment(NextAvailableAddress,
SI.Alignment);
DEBUG(dbgs() << "BOLT: mapping 0x"
<< Twine::utohexstr(SI.AllocAddress)
<< " to 0x" << Twine::utohexstr(NextAvailableAddress)
<< '\n');
if (SMII == EFMM->SectionMapInfo.end())
continue;
SectionInfo &SI = SMII->second;
NextAvailableAddress = RoundUpToAlignment(NextAvailableAddress,
SI.Alignment);
DEBUG(dbgs() << "BOLT: mapping section " << SectionName << " (0x"
<< Twine::utohexstr(SI.AllocAddress)
<< ") to 0x" << Twine::utohexstr(NextAvailableAddress)
<< '\n');
OLT.mapSectionAddress(ObjectsHandle,
SI.SectionID,
NextAvailableAddress);
SI.FileAddress = NextAvailableAddress;
SI.FileOffset = getFileOffsetFor(NextAvailableAddress);
OLT.mapSectionAddress(ObjectsHandle,
SI.SectionID,
NextAvailableAddress);
SI.FileAddress = NextAvailableAddress;
SI.FileOffset = getFileOffsetFor(NextAvailableAddress);
NextAvailableAddress += SI.Size;
} else {
if (opts::Verbosity >= 2) {
errs() << "BOLT-WARNING: cannot remap " << SectionName << '\n';
}
}
NextAvailableAddress += SI.Size;
}
if (opts::UpdateDebugSections) {
@@ -1702,6 +1696,13 @@ void RewriteInstance::patchELFPHDRTable() {
"could not add program header for the new segment");
}
namespace {
void writePadding(raw_pwrite_stream &OS, unsigned BytesToWrite) {
for (unsigned I = 0; I < BytesToWrite; ++I)
OS.write((unsigned char)0);
}
}
void RewriteInstance::rewriteNoteSections() {
auto ELF64LEFile = dyn_cast<ELF64LEObjectFile>(InputFile);
if (!ELF64LEFile) {
@@ -1725,13 +1726,10 @@ void RewriteInstance::rewriteNoteSections() {
// Insert padding as needed.
if (Section.sh_addralign > 1) {
auto Padding = OffsetToAlignment(NextAvailableOffset,
Section.sh_addralign);
const unsigned char ZeroByte{0};
for (unsigned I = 0; I < Padding; ++I)
OS.write(ZeroByte);
NextAvailableOffset += Padding;
auto PaddingSize = OffsetToAlignment(NextAvailableOffset,
Section.sh_addralign);
writePadding(OS, PaddingSize);
NextAvailableOffset += PaddingSize;
assert(Section.sh_size % Section.sh_addralign == 0 &&
"section size does not match section alignment");
@@ -1821,19 +1819,19 @@ void RewriteInstance::rewriteNoteSections() {
// * There could be modifications done to non-allocatable sections, e.g.
// size could be increased.
// * New non-allocatable sections are added to the end of the file.
void RewriteInstance::patchELFSectionHeaderTable() {
auto ELF64LEFile = dyn_cast<ELF64LEObjectFile>(InputFile);
if (!ELF64LEFile) {
errs() << "BOLT-ERROR: only 64-bit LE ELF binaries are supported\n";
exit(1);
}
auto Obj = ELF64LEFile->getELFFile();
using Elf_Shdr = std::remove_pointer<decltype(Obj)>::type::Elf_Shdr;
template <typename ELFT>
void RewriteInstance::patchELFSectionHeaderTable(ELFObjectFile<ELFT> *File) {
using Elf_Shdr = typename ELFObjectFile<ELFT>::Elf_Shdr;
auto Obj = File->getELFFile();
auto &OS = Out->os();
auto SHTOffset = OS.tell();
auto PaddingSize = OffsetToAlignment(SHTOffset, sizeof(Elf_Shdr));
writePadding(OS, PaddingSize);
SHTOffset += PaddingSize;
// Copy over entries for original allocatable sections with minor
// modifications (e.g. name).
for (auto &Section : Obj->sections()) {
@@ -2067,12 +2065,10 @@ void RewriteInstance::rewriteFile() {
if (opts::Verbosity >= 1) {
outs() << "BOLT: writing a new .eh_frame_hdr\n";
}
if (FrameHdrAlign > 1) {
auto PaddingSize = OffsetToAlignment(NextAvailableAddress, FrameHdrAlign);
for (unsigned I = 0; I < PaddingSize; ++I)
Out->os().write((unsigned char)0);
NextAvailableAddress += PaddingSize;
}
auto PaddingSize = OffsetToAlignment(NextAvailableAddress, FrameHdrAlign);
writePadding(Out->os(), PaddingSize);
NextAvailableAddress += PaddingSize;
SectionInfo EHFrameHdrSecInfo;
EHFrameHdrSecInfo.FileAddress = NextAvailableAddress;

View File

@@ -189,7 +189,20 @@ private:
/// Patch ELF book-keeping info.
void patchELF();
void patchELFPHDRTable();
void patchELFSectionHeaderTable();
template <typename ELFT>
void patchELFSectionHeaderTable(ELFObjectFile<ELFT> *Obj);
void patchELFSectionHeaderTable() {
if (auto *ELF32LE = dyn_cast<ELF32LEObjectFile>(InputFile))
return patchELFSectionHeaderTable(ELF32LE);
if (auto *ELF64LE = dyn_cast<ELF64LEObjectFile>(InputFile))
return patchELFSectionHeaderTable(ELF64LE);
if (auto *ELF32BE = dyn_cast<ELF32BEObjectFile>(InputFile))
return patchELFSectionHeaderTable(ELF32BE);
auto *ELF64BE = cast<ELF64BEObjectFile>(InputFile);
return patchELFSectionHeaderTable(ELF64BE);
}
/// Computes output .debug_line line table offsets for each compile unit,
/// and updates stmt_list for a corresponding compile unit.