mirror of
https://github.com/intel/llvm.git
synced 2026-01-15 20:54:40 +08:00
Support for splitting jump tables.
Summary:
Add level for "-jump-tables=<n>" option:
1 - all jump tables are output in the same section (default).
2 - basic splitting, if the table is used it is output to hot section
otherwise to cold one.
3 - aggressively split compound jump tables and collect profile for
all entries.
Option "-print-jump-tables" outputs all jump tables for debugging
and/or analyzing purposes. Use with "-jump-tables=3" to get profile
values for every entry in a jump table.
(cherry picked from FBD3912119)
This commit is contained in:
@@ -30,7 +30,7 @@ bool operator<(const BinaryBasicBlock &LHS, const BinaryBasicBlock &RHS) {
|
||||
return LHS.Offset < RHS.Offset;
|
||||
}
|
||||
|
||||
MCInst *BinaryBasicBlock::findFirstNonPseudoInstruction() {
|
||||
MCInst *BinaryBasicBlock::getFirstNonPseudo() {
|
||||
auto &BC = Function->getBinaryContext();
|
||||
for (auto &Inst : Instructions) {
|
||||
if (!BC.MII->get(Inst.getOpcode()).isPseudo())
|
||||
@@ -39,7 +39,7 @@ MCInst *BinaryBasicBlock::findFirstNonPseudoInstruction() {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
MCInst *BinaryBasicBlock::findLastNonPseudoInstruction() {
|
||||
MCInst *BinaryBasicBlock::getLastNonPseudo() {
|
||||
auto &BC = Function->getBinaryContext();
|
||||
for (auto Itr = Instructions.rbegin(); Itr != Instructions.rend(); ++Itr) {
|
||||
if (!BC.MII->get(Itr->getOpcode()).isPseudo())
|
||||
|
||||
@@ -382,11 +382,11 @@ public:
|
||||
|
||||
/// Return a pointer to the first non-pseudo instruction in this basic
|
||||
/// block. Returns nullptr if none exists.
|
||||
MCInst *findFirstNonPseudoInstruction();
|
||||
MCInst *getFirstNonPseudo();
|
||||
|
||||
/// Return a pointer to the last non-pseudo instruction in this basic
|
||||
/// block. Returns nullptr if none exists.
|
||||
MCInst *findLastNonPseudoInstruction();
|
||||
MCInst *getLastNonPseudo();
|
||||
|
||||
/// Set minimum alignment for the basic block.
|
||||
void setAlignment(uint64_t Align) {
|
||||
|
||||
@@ -314,9 +314,8 @@ void BinaryContext::printInstruction(raw_ostream &OS,
|
||||
}
|
||||
}
|
||||
if (MIA->isIndirectBranch(Instruction)) {
|
||||
auto JTIndex = MIA->getJumpTableIndex(Instruction);
|
||||
if (JTIndex != -1LL) {
|
||||
OS << " # JUMPTABLE " << JTIndex;
|
||||
if (auto JTAddress = MIA->getJumpTable(Instruction)) {
|
||||
OS << " # JUMPTABLE @0x" << Twine::utohexstr(JTAddress);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -36,17 +36,34 @@
|
||||
#define DEBUG_TYPE "bolt"
|
||||
|
||||
using namespace llvm;
|
||||
using namespace bolt;
|
||||
|
||||
namespace opts {
|
||||
|
||||
extern cl::opt<unsigned> Verbosity;
|
||||
extern cl::opt<bool> PrintDynoStats;
|
||||
|
||||
static cl::opt<bool>
|
||||
static cl::opt<BinaryFunction::JumpTableSupportLevel>
|
||||
JumpTables("jump-tables",
|
||||
cl::desc("enable jump table support (experimental)"),
|
||||
cl::desc("jump tables support"),
|
||||
cl::init(BinaryFunction::JTS_NONE),
|
||||
cl::values(clEnumValN(BinaryFunction::JTS_NONE, "0",
|
||||
"do not optimize functions with jump tables"),
|
||||
clEnumValN(BinaryFunction::JTS_BASIC, "1",
|
||||
"optimize functions with jump tables"),
|
||||
clEnumValN(BinaryFunction::JTS_SPLIT, "2",
|
||||
"split jump tables into hot and cold"),
|
||||
clEnumValN(BinaryFunction::JTS_AGGRESSIVE, "3",
|
||||
"aggressively split jump tables (unsafe)"),
|
||||
clEnumValEnd),
|
||||
cl::ZeroOrMore);
|
||||
|
||||
static cl::opt<bool>
|
||||
PrintJumpTables("print-jump-tables",
|
||||
cl::desc("print jump tables"),
|
||||
cl::ZeroOrMore,
|
||||
cl::Hidden);
|
||||
|
||||
static cl::opt<bool>
|
||||
AgressiveSplitting("split-all-cold",
|
||||
cl::desc("outline as many cold basic blocks as possible"),
|
||||
@@ -235,6 +252,7 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation,
|
||||
OS << "\n Id Fun Addr : 0x" << Twine::utohexstr(IdenticalFunctionAddress);
|
||||
|
||||
if (opts::PrintDynoStats && !BasicBlocksLayout.empty()) {
|
||||
OS << '\n';
|
||||
DynoStats dynoStats = getDynoStats();
|
||||
OS << dynoStats;
|
||||
}
|
||||
@@ -357,14 +375,9 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation,
|
||||
OS << '\n';
|
||||
}
|
||||
|
||||
for(unsigned Index = 0; Index < JumpTables.size(); ++Index) {
|
||||
const auto &JumpTable = JumpTables[Index];
|
||||
OS << "Jump Table #" << (Index + 1) << '\n';
|
||||
for (unsigned EIndex = 0; EIndex < JumpTable.Entries.size(); ++EIndex) {
|
||||
const auto *Entry = JumpTable.Entries[EIndex];
|
||||
OS << " entry " << EIndex << ": " << Entry->getName() << '\n';
|
||||
}
|
||||
OS << '\n';
|
||||
// Print all jump tables.
|
||||
for (auto &JTI : JumpTables) {
|
||||
JTI.second.print(OS);
|
||||
}
|
||||
|
||||
OS << "DWARF CFI Instructions:\n";
|
||||
@@ -373,7 +386,8 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation,
|
||||
for (auto &Elmt : OffsetToCFI) {
|
||||
OS << format(" %08x:\t", Elmt.first);
|
||||
assert(Elmt.second < FrameInstructions.size() && "Incorrect CFI offset");
|
||||
BinaryContext::printCFI(OS, FrameInstructions[Elmt.second].getOperation());
|
||||
BinaryContext::printCFI(OS,
|
||||
FrameInstructions[Elmt.second].getOperation());
|
||||
OS << "\n";
|
||||
}
|
||||
} else {
|
||||
@@ -523,10 +537,30 @@ bool BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
|
||||
SegRegNum != bolt::NoRegister || ScaleValue != PtrSize)
|
||||
return IndirectBranchType::UNKNOWN;
|
||||
|
||||
auto ArrayStart = DispValue;
|
||||
auto ArrayStart = static_cast<uint64_t>(DispValue);
|
||||
if (BaseRegNum == RIPRegister)
|
||||
ArrayStart += getAddress() + Offset + Size;
|
||||
|
||||
// Check if there's already a jump table registered at this address.
|
||||
if (auto *JT = getJumpTableContainingAddress(ArrayStart)) {
|
||||
auto JTOffset = ArrayStart - JT->Address;
|
||||
// Get or create a label.
|
||||
auto LI = JT->Labels.find(JTOffset);
|
||||
if (LI == JT->Labels.end()) {
|
||||
auto *JTStartLabel = BC.Ctx->createTempSymbol("JUMP_TABLE", true);
|
||||
auto Result = JT->Labels.emplace(JTOffset, JTStartLabel);
|
||||
assert(Result.second && "error adding jump table label");
|
||||
LI = Result.first;
|
||||
}
|
||||
|
||||
BC.MIA->replaceMemOperandDisp(*MemLocInstr, LI->second, BC.Ctx.get());
|
||||
BC.MIA->setJumpTable(Instruction, ArrayStart);
|
||||
|
||||
JTSites.emplace_back(Offset, ArrayStart);
|
||||
|
||||
return IndirectBranchType::POSSIBLE_JUMP_TABLE;
|
||||
}
|
||||
|
||||
auto SectionOrError = BC.getSectionForAddress(ArrayStart);
|
||||
if (!SectionOrError) {
|
||||
// No section - possibly an absolute address. Since we don't allow
|
||||
@@ -552,6 +586,7 @@ bool BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
|
||||
uint64_t Value = 0;
|
||||
auto Result = IndirectBranchType::UNKNOWN;
|
||||
std::vector<MCSymbol *> JTLabelCandidates;
|
||||
std::vector<uint64_t> JTOffsetCandidates;
|
||||
while (ValueOffset <= Section.getSize() - PtrSize) {
|
||||
DEBUG(dbgs() << "BOLT-DEBUG: indirect jmp at 0x"
|
||||
<< Twine::utohexstr(getAddress() + Offset)
|
||||
@@ -565,7 +600,7 @@ bool BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
|
||||
// Is it possible to have a jump table with function start as an entry?
|
||||
auto *JTEntry = getOrCreateLocalLabel(Value);
|
||||
JTLabelCandidates.push_back(JTEntry);
|
||||
TakenBranches.emplace_back(Offset, Value - getAddress());
|
||||
JTOffsetCandidates.push_back(Value - getAddress());
|
||||
Result = IndirectBranchType::POSSIBLE_JUMP_TABLE;
|
||||
continue;
|
||||
}
|
||||
@@ -577,19 +612,26 @@ bool BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
|
||||
break;
|
||||
}
|
||||
JTLabelCandidates.push_back(getFunctionEndLabel());
|
||||
JTOffsetCandidates.push_back(Value - getAddress());
|
||||
}
|
||||
if (Result == IndirectBranchType::POSSIBLE_JUMP_TABLE) {
|
||||
assert(JTLabelCandidates.size() > 2 &&
|
||||
"expected more than 2 jump table entries");
|
||||
auto *JTStartLabel = BC.Ctx->createTempSymbol("JUMP_TABLE", true);
|
||||
JumpTables.emplace_back(JumpTable{JTStartLabel,
|
||||
std::move(JTLabelCandidates)});
|
||||
BC.MIA->replaceMemOperandDisp(*MemLocInstr, JTStartLabel, BC.Ctx.get());
|
||||
BC.MIA->setJumpTableIndex(Instruction, JumpTables.size());
|
||||
DEBUG(dbgs() << "BOLT-DEBUG: creating jump table "
|
||||
<< JTStartLabel->getName()
|
||||
<< " in function " << *this << " with "
|
||||
<< JTLabelCandidates.size() << " entries.\n");
|
||||
JumpTables.emplace(ArrayStart, JumpTable{ArrayStart,
|
||||
PtrSize,
|
||||
std::move(JTLabelCandidates),
|
||||
std::move(JTOffsetCandidates),
|
||||
{{0, JTStartLabel}}});
|
||||
BC.MIA->replaceMemOperandDisp(*MemLocInstr, JTStartLabel, BC.Ctx.get());
|
||||
BC.MIA->setJumpTable(Instruction, ArrayStart);
|
||||
|
||||
JTSites.emplace_back(Offset, ArrayStart);
|
||||
|
||||
return Result;
|
||||
}
|
||||
BC.InterproceduralReferences.insert(Value);
|
||||
@@ -727,7 +769,7 @@ bool BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
|
||||
MIA->convertJmpToTailCall(Instruction);
|
||||
break;
|
||||
case IndirectBranchType::POSSIBLE_JUMP_TABLE:
|
||||
if (!opts::JumpTables)
|
||||
if (opts::JumpTables == JTS_NONE)
|
||||
IsSimple = false;
|
||||
break;
|
||||
case IndirectBranchType::UNKNOWN:
|
||||
@@ -771,6 +813,40 @@ bool BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
|
||||
Offset += Size;
|
||||
}
|
||||
|
||||
// Update TakenBranches from JumpTables.
|
||||
//
|
||||
// We want to do it after initial processing since we don't know jump tables
|
||||
// boundaries until we process them all.
|
||||
for (auto &JTSite : JTSites) {
|
||||
auto JTSiteOffset = JTSite.first;
|
||||
auto JTAddress = JTSite.second;
|
||||
auto *JT = getJumpTableContainingAddress(JTAddress);
|
||||
assert(JT && "cannot find jump table for address");
|
||||
uint32_t EI = (JTAddress - JT->Address) / JT->EntrySize;
|
||||
while (EI < JT->Entries.size()) {
|
||||
auto TargetOffset = JT->OffsetEntries[EI];
|
||||
if (TargetOffset < getSize())
|
||||
TakenBranches.emplace_back(JTSiteOffset, TargetOffset);
|
||||
++EI;
|
||||
// A label at the next entry means the end of this jump table.
|
||||
if (JT->Labels.count(EI * JT->EntrySize))
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Free memory used by jump table offsets.
|
||||
for (auto &JTI : JumpTables) {
|
||||
auto &JT = JTI.second;
|
||||
clearList(JT.OffsetEntries);
|
||||
}
|
||||
|
||||
// Remove duplicates branches. We can get a bunch of them from jump tables.
|
||||
// Without doing jump table value profiling we don't have use for extra
|
||||
// (duplicate) branches.
|
||||
std::sort(TakenBranches.begin(), TakenBranches.end());
|
||||
auto NewEnd = std::unique(TakenBranches.begin(), TakenBranches.end());
|
||||
TakenBranches.erase(NewEnd, TakenBranches.end());
|
||||
|
||||
// TODO: clear memory if not simple function?
|
||||
|
||||
// Update state.
|
||||
@@ -793,7 +869,7 @@ bool BinaryFunction::postProcessIndirectBranches() {
|
||||
}
|
||||
|
||||
// Validate the tail call assumptions.
|
||||
if (BC.MIA->isTailCall(Instr) || (BC.MIA->getJumpTableIndex(Instr) > 0)) {
|
||||
if (BC.MIA->isTailCall(Instr) || BC.MIA->getJumpTable(Instr)) {
|
||||
if (BC.MIA->getMemoryOperandNo(Instr) != -1) {
|
||||
// We have validated memory contents addressed by the jump
|
||||
// instruction already.
|
||||
@@ -1063,6 +1139,34 @@ bool BinaryFunction::buildCFG() {
|
||||
} else {
|
||||
const BranchInfo &BInfo = BranchInfoOrErr.get();
|
||||
FromBB->addSuccessor(ToBB, BInfo.Branches, BInfo.Mispreds);
|
||||
// Populate profile counts for the jump table.
|
||||
auto *LastInstr = FromBB->getLastNonPseudo();
|
||||
if (!LastInstr)
|
||||
continue;
|
||||
auto JTAddress = BC.MIA->getJumpTable(*LastInstr);
|
||||
if (!JTAddress)
|
||||
continue;
|
||||
auto *JT = getJumpTableContainingAddress(JTAddress);
|
||||
if (!JT)
|
||||
continue;
|
||||
JT->Count += BInfo.Branches;
|
||||
if (opts::JumpTables < JTS_AGGRESSIVE)
|
||||
continue;
|
||||
if (JT->Counts.empty())
|
||||
JT->Counts.resize(JT->Entries.size());
|
||||
auto EI = JT->Entries.begin();
|
||||
auto Delta = (JTAddress - JT->Address) / JT->EntrySize;
|
||||
EI += Delta;
|
||||
while (EI != JT->Entries.end()) {
|
||||
if (ToBB->getLabel() == *EI) {
|
||||
JT->Counts[Delta] += BInfo.Branches;
|
||||
}
|
||||
++Delta;
|
||||
++EI;
|
||||
// A label marks the start of another jump table.
|
||||
if (JT->Labels.count(Delta * JT->EntrySize))
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1311,7 +1415,7 @@ void BinaryFunction::evaluateProfileData(const FuncBranchData &BranchData) {
|
||||
(float) (LocalProfileBranches.size() - OrphanBranches.size()) /
|
||||
(float) LocalProfileBranches.size();
|
||||
|
||||
if (opts::Verbosity >= 2 && !OrphanBranches.empty()) {
|
||||
if (opts::Verbosity >= 1 && !OrphanBranches.empty()) {
|
||||
errs() << "BOLT-WARNING: profile branches match only "
|
||||
<< format("%.1f%%", ProfileMatchRatio * 100.0f) << " ("
|
||||
<< (LocalProfileBranches.size() - OrphanBranches.size()) << '/'
|
||||
@@ -1322,6 +1426,7 @@ void BinaryFunction::evaluateProfileData(const FuncBranchData &BranchData) {
|
||||
errs() << "\t0x" << Twine::utohexstr(OBranch.first) << " -> 0x"
|
||||
<< Twine::utohexstr(OBranch.second) << " (0x"
|
||||
<< Twine::utohexstr(OBranch.first + getAddress()) << " -> 0x"
|
||||
<< Twine::utohexstr(OBranch.second + getAddress()) << ")\n";
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -1868,8 +1973,8 @@ void BinaryFunction::dumpGraph(raw_ostream& OS) const {
|
||||
CondBranch,
|
||||
UncondBranch);
|
||||
|
||||
const auto *LastInstr = BB->findLastNonPseudoInstruction();
|
||||
const bool IsJumpTable = LastInstr && BC.MIA->getJumpTableIndex(*LastInstr) > 0;
|
||||
const auto *LastInstr = BB->getLastNonPseudo();
|
||||
const bool IsJumpTable = LastInstr && BC.MIA->getJumpTable(*LastInstr);
|
||||
|
||||
auto BI = BB->branch_info_begin();
|
||||
for (auto *Succ : BB->successors()) {
|
||||
@@ -2551,18 +2656,92 @@ BinaryFunction::~BinaryFunction() {
|
||||
void BinaryFunction::emitJumpTables(MCStreamer *Streamer) {
|
||||
if (JumpTables.empty())
|
||||
return;
|
||||
|
||||
Streamer->SwitchSection(BC.MOFI->getReadOnlySection());
|
||||
for (auto &JumpTable : JumpTables) {
|
||||
DEBUG(dbgs() << "BOLT-DEBUG: emitting jump table "
|
||||
<< JumpTable.StartLabel->getName() << '\n');
|
||||
Streamer->EmitLabel(JumpTable.StartLabel);
|
||||
// TODO (#9806207): based on jump table type (PIC vs non-PIC etc.)
|
||||
// we would need to emit different references.
|
||||
for (auto *Entry : JumpTable.Entries) {
|
||||
Streamer->EmitSymbolValue(Entry, BC.AsmInfo->getPointerSize());
|
||||
}
|
||||
if (opts::PrintJumpTables) {
|
||||
outs() << "BOLT-INFO: jump tables for function " << *this << ":\n";
|
||||
}
|
||||
for (auto &JTI : JumpTables) {
|
||||
auto &JT = JTI.second;
|
||||
if (opts::PrintJumpTables)
|
||||
JT.print(outs());
|
||||
JT.emit(Streamer,
|
||||
BC.MOFI->getReadOnlySection(),
|
||||
BC.MOFI->getReadOnlyColdSection());
|
||||
}
|
||||
}
|
||||
|
||||
// TODO (#9806207): based on jump table type (PIC vs non-PIC etc.) we will
|
||||
// need to emit different references.
|
||||
uint64_t BinaryFunction::JumpTable::emit(MCStreamer *Streamer,
|
||||
MCSection *HotSection,
|
||||
MCSection *ColdSection) {
|
||||
// Pre-process entries for aggressive splitting.
|
||||
// Each label represents a separate switch table and gets its own count
|
||||
// determining its destination.
|
||||
std::map<MCSymbol *, uint64_t> LabelCounts;
|
||||
if (opts::JumpTables > JTS_SPLIT && !Counts.empty()) {
|
||||
MCSymbol *CurrentLabel = Labels[0];
|
||||
uint64_t CurrentLabelCount = 0;
|
||||
for (unsigned Index = 0; Index < Entries.size(); ++Index) {
|
||||
auto LI = Labels.find(Index * EntrySize);
|
||||
if (LI != Labels.end()) {
|
||||
LabelCounts[CurrentLabel] = CurrentLabelCount;
|
||||
CurrentLabel = LI->second;
|
||||
CurrentLabelCount = 0;
|
||||
}
|
||||
CurrentLabelCount += Counts[Index];
|
||||
}
|
||||
LabelCounts[CurrentLabel] = CurrentLabelCount;
|
||||
} else {
|
||||
Streamer->SwitchSection(Count > 0 ? HotSection : ColdSection);
|
||||
Streamer->EmitValueToAlignment(EntrySize);
|
||||
}
|
||||
uint64_t Offset = 0;
|
||||
for (auto *Entry : Entries) {
|
||||
auto LI = Labels.find(Offset);
|
||||
if (LI != Labels.end()) {
|
||||
DEBUG(dbgs() << "BOLT-DEBUG: emitting jump table "
|
||||
<< LI->second->getName() << " (originally was at address 0x"
|
||||
<< Twine::utohexstr(Address + Offset)
|
||||
<< (Offset ? "as part of larger jump table\n" : "\n"));
|
||||
if (!LabelCounts.empty()) {
|
||||
DEBUG(dbgs() << "BOLT-DEBUG: jump table count: "
|
||||
<< LabelCounts[LI->second] << '\n');
|
||||
if (LabelCounts[LI->second] > 0) {
|
||||
Streamer->SwitchSection(HotSection);
|
||||
} else {
|
||||
Streamer->SwitchSection(ColdSection);
|
||||
}
|
||||
Streamer->EmitValueToAlignment(EntrySize);
|
||||
}
|
||||
Streamer->EmitLabel(LI->second);
|
||||
}
|
||||
Streamer->EmitSymbolValue(Entry, EntrySize);
|
||||
Offset += EntrySize;
|
||||
}
|
||||
|
||||
return Offset;
|
||||
}
|
||||
|
||||
void BinaryFunction::JumpTable::print(raw_ostream &OS) const {
|
||||
uint64_t Offset = 0;
|
||||
for (const auto *Entry : Entries) {
|
||||
auto LI = Labels.find(Offset);
|
||||
if (LI != Labels.end()) {
|
||||
OS << "Jump Table " << LI->second->getName() << " at @0x"
|
||||
<< Twine::utohexstr(Address+Offset);
|
||||
if (Offset) {
|
||||
OS << " (possibly part of larger jump table):\n";
|
||||
} else {
|
||||
OS << " with total count of " << Count << ":\n";
|
||||
}
|
||||
}
|
||||
OS << format(" 0x%04" PRIx64 " : ", Offset) << Entry->getName();
|
||||
if (!Counts.empty())
|
||||
OS << " : " << Counts[Offset / EntrySize];
|
||||
OS << '\n';
|
||||
Offset += EntrySize;
|
||||
}
|
||||
OS << "\n\n";
|
||||
}
|
||||
|
||||
void BinaryFunction::calculateLoopInfo() {
|
||||
@@ -2738,8 +2917,8 @@ DynoStats BinaryFunction::getDynoStats() const {
|
||||
Stats[DynoStats::INSTRUCTIONS] += BB->getNumNonPseudos() * BBExecutionCount;
|
||||
|
||||
// Jump tables.
|
||||
const auto *LastInstr = BB->findLastNonPseudoInstruction();
|
||||
if (BC.MIA->getJumpTableIndex(*LastInstr) > 0) {
|
||||
const auto *LastInstr = BB->getLastNonPseudo();
|
||||
if (BC.MIA->getJumpTable(*LastInstr)) {
|
||||
Stats[DynoStats::JUMP_TABLE_BRANCHES] += BBExecutionCount;
|
||||
DEBUG(
|
||||
static uint64_t MostFrequentJT;
|
||||
|
||||
@@ -185,6 +185,13 @@ public:
|
||||
LT_OPTIMIZE_SHUFFLE,
|
||||
};
|
||||
|
||||
enum JumpTableSupportLevel : char {
|
||||
JTS_NONE = 0, /// Disable jump tables support
|
||||
JTS_BASIC = 1, /// Enable basic jump tables support
|
||||
JTS_SPLIT = 2, /// Enable hot/cold splitting of jump tables
|
||||
JTS_AGGRESSIVE = 3, /// Aggressive splitting of jump tables
|
||||
};
|
||||
|
||||
static constexpr uint64_t COUNT_NO_PROFILE =
|
||||
std::numeric_limits<uint64_t>::max();
|
||||
// Function size, in number of BBs, above which we fallback to a heuristic
|
||||
@@ -429,11 +436,76 @@ private:
|
||||
CFIInstrMapType CIEFrameInstructions;
|
||||
|
||||
/// Representation of a jump table.
|
||||
///
|
||||
/// The jump table may include other jump tables that are referenced by
|
||||
/// a different label at a different offset in this jump table.
|
||||
struct JumpTable {
|
||||
MCSymbol *StartLabel;
|
||||
/// Original address.
|
||||
uint64_t Address;
|
||||
|
||||
/// Size of the entry used for storage.
|
||||
std::size_t EntrySize;
|
||||
|
||||
/// All the entries as labels.
|
||||
std::vector<MCSymbol *> Entries;
|
||||
|
||||
/// All the entries as offsets into a function. Invalid after CFG is built.
|
||||
std::vector<uint64_t> OffsetEntries;
|
||||
|
||||
/// Map <Offset> -> <Label> used for embedded jump tables. Label at 0 offset
|
||||
/// is the main label for the jump table.
|
||||
std::map<unsigned, MCSymbol *> Labels;
|
||||
|
||||
/// Return the size of the jump table.
|
||||
uint64_t getSize() const {
|
||||
return Entries.size() * EntrySize;
|
||||
}
|
||||
|
||||
/// Constructor.
|
||||
JumpTable(uint64_t Address,
|
||||
std::size_t EntrySize,
|
||||
decltype(Entries) &&Entries,
|
||||
decltype(OffsetEntries) &&OffsetEntries,
|
||||
decltype(Labels) &&Labels)
|
||||
: Address(Address), EntrySize(EntrySize), Entries(Entries),
|
||||
OffsetEntries(OffsetEntries), Labels(Labels)
|
||||
{}
|
||||
|
||||
/// Dynamic number of times each entry in the table was referenced.
|
||||
/// Identical entries will have a shared count (identical for every
|
||||
/// entry in the set).
|
||||
std::vector<uint64_t> Counts;
|
||||
|
||||
/// Total number of times this jump table was used.
|
||||
uint64_t Count{0};
|
||||
|
||||
/// Emit jump table data. Callee supplies sections for the data.
|
||||
/// Return the number of total bytes emitted.
|
||||
uint64_t emit(MCStreamer *Streamer, MCSection *HotSection,
|
||||
MCSection *ColdSection);
|
||||
|
||||
/// Print for debugging purposes.
|
||||
void print(raw_ostream &OS) const;
|
||||
};
|
||||
std::vector<JumpTable> JumpTables;
|
||||
|
||||
/// All compound jump tables for this function.
|
||||
/// <OriginalAddress> -> <JumpTable>
|
||||
std::map<uint64_t, JumpTable> JumpTables;
|
||||
|
||||
/// Return jump table that covers a given \p Address in memory.
|
||||
JumpTable *getJumpTableContainingAddress(uint64_t Address) {
|
||||
auto JTI = JumpTables.upper_bound(Address);
|
||||
if (JTI == JumpTables.begin())
|
||||
return nullptr;
|
||||
--JTI;
|
||||
if (JTI->first + JTI->second.getSize() > Address) {
|
||||
return &JTI->second;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
/// All jump table sites in the function.
|
||||
std::vector<std::pair<uint64_t, uint64_t>> JTSites;
|
||||
|
||||
// Blocks are kept sorted in the layout order. If we need to change the
|
||||
// layout (if BasicBlocksLayout stores a different order than BasicBlocks),
|
||||
|
||||
@@ -150,7 +150,7 @@ void OptimizeBodylessFunctions::analyze(
|
||||
if (BF.size() != 1 || BF.front().getNumNonPseudos() != 1)
|
||||
return;
|
||||
|
||||
const auto *FirstInstr = BF.front().findFirstNonPseudoInstruction();
|
||||
const auto *FirstInstr = BF.front().getFirstNonPseudo();
|
||||
if (!FirstInstr)
|
||||
return;
|
||||
if (!BC.MIA->isTailCall(*FirstInstr))
|
||||
@@ -917,7 +917,7 @@ bool SimplifyConditionalTailCalls::fixTailCalls(BinaryContext &BC,
|
||||
if (BB->getNumNonPseudos() != 1)
|
||||
continue;
|
||||
|
||||
auto *Instr = BB->findFirstNonPseudoInstruction();
|
||||
auto *Instr = BB->getFirstNonPseudo();
|
||||
if (!MIA->isTailCall(*Instr))
|
||||
continue;
|
||||
auto *CalleeSymbol = MIA->getTargetSymbol(*Instr);
|
||||
@@ -1040,7 +1040,7 @@ void Peepholes::fixDoubleJumps(BinaryContext &BC,
|
||||
} else {
|
||||
// Succ will be null in the tail call case. In this case we
|
||||
// need to explicitly add a tail call instruction.
|
||||
auto *Branch = Pred->findLastNonPseudoInstruction();
|
||||
auto *Branch = Pred->getLastNonPseudo();
|
||||
if (Branch && BC.MIA->isUnconditionalBranch(*Branch)) {
|
||||
Pred->removeSuccessor(&BB);
|
||||
Pred->eraseInstruction(Branch);
|
||||
@@ -1060,7 +1060,7 @@ void Peepholes::fixDoubleJumps(BinaryContext &BC,
|
||||
if (BB.getNumNonPseudos() != 1 || BB.isLandingPad())
|
||||
continue;
|
||||
|
||||
auto *Inst = BB.findFirstNonPseudoInstruction();
|
||||
auto *Inst = BB.getFirstNonPseudo();
|
||||
const bool IsTailCall = BC.MIA->isTailCall(*Inst);
|
||||
|
||||
if (!BC.MIA->isUnconditionalBranch(*Inst) && !IsTailCall)
|
||||
@@ -1090,7 +1090,7 @@ void Peepholes::fixDoubleJumps(BinaryContext &BC,
|
||||
void Peepholes::addTailcallTraps(BinaryContext &BC,
|
||||
BinaryFunction &Function) {
|
||||
for (auto &BB : Function) {
|
||||
auto *Inst = BB.findLastNonPseudoInstruction();
|
||||
auto *Inst = BB.getLastNonPseudo();
|
||||
if (Inst && BC.MIA->isTailCall(*Inst) && BC.MIA->isIndirectBranch(*Inst)) {
|
||||
MCInst Trap;
|
||||
if (BC.MIA->createTrap(Trap)) {
|
||||
|
||||
@@ -1547,33 +1547,27 @@ void RewriteInstance::emitFunctions() {
|
||||
}
|
||||
|
||||
// Map special sections to their addresses in the output image.
|
||||
//
|
||||
// TODO: perhaps we should process all the allocated sections here?
|
||||
std::vector<std::string> Sections = { ".eh_frame", ".gcc_except_table",
|
||||
".rodata" };
|
||||
".rodata", ".rodata.cold" };
|
||||
for (auto &SectionName : Sections) {
|
||||
auto SMII = EFMM->SectionMapInfo.find(SectionName);
|
||||
if (SMII != EFMM->SectionMapInfo.end()) {
|
||||
SectionInfo &SI = SMII->second;
|
||||
NextAvailableAddress = RoundUpToAlignment(NextAvailableAddress,
|
||||
SI.Alignment);
|
||||
DEBUG(dbgs() << "BOLT: mapping 0x"
|
||||
<< Twine::utohexstr(SI.AllocAddress)
|
||||
<< " to 0x" << Twine::utohexstr(NextAvailableAddress)
|
||||
<< '\n');
|
||||
if (SMII == EFMM->SectionMapInfo.end())
|
||||
continue;
|
||||
SectionInfo &SI = SMII->second;
|
||||
NextAvailableAddress = RoundUpToAlignment(NextAvailableAddress,
|
||||
SI.Alignment);
|
||||
DEBUG(dbgs() << "BOLT: mapping section " << SectionName << " (0x"
|
||||
<< Twine::utohexstr(SI.AllocAddress)
|
||||
<< ") to 0x" << Twine::utohexstr(NextAvailableAddress)
|
||||
<< '\n');
|
||||
|
||||
OLT.mapSectionAddress(ObjectsHandle,
|
||||
SI.SectionID,
|
||||
NextAvailableAddress);
|
||||
SI.FileAddress = NextAvailableAddress;
|
||||
SI.FileOffset = getFileOffsetFor(NextAvailableAddress);
|
||||
OLT.mapSectionAddress(ObjectsHandle,
|
||||
SI.SectionID,
|
||||
NextAvailableAddress);
|
||||
SI.FileAddress = NextAvailableAddress;
|
||||
SI.FileOffset = getFileOffsetFor(NextAvailableAddress);
|
||||
|
||||
NextAvailableAddress += SI.Size;
|
||||
} else {
|
||||
if (opts::Verbosity >= 2) {
|
||||
errs() << "BOLT-WARNING: cannot remap " << SectionName << '\n';
|
||||
}
|
||||
}
|
||||
NextAvailableAddress += SI.Size;
|
||||
}
|
||||
|
||||
if (opts::UpdateDebugSections) {
|
||||
@@ -1702,6 +1696,13 @@ void RewriteInstance::patchELFPHDRTable() {
|
||||
"could not add program header for the new segment");
|
||||
}
|
||||
|
||||
namespace {
|
||||
void writePadding(raw_pwrite_stream &OS, unsigned BytesToWrite) {
|
||||
for (unsigned I = 0; I < BytesToWrite; ++I)
|
||||
OS.write((unsigned char)0);
|
||||
}
|
||||
}
|
||||
|
||||
void RewriteInstance::rewriteNoteSections() {
|
||||
auto ELF64LEFile = dyn_cast<ELF64LEObjectFile>(InputFile);
|
||||
if (!ELF64LEFile) {
|
||||
@@ -1725,13 +1726,10 @@ void RewriteInstance::rewriteNoteSections() {
|
||||
|
||||
// Insert padding as needed.
|
||||
if (Section.sh_addralign > 1) {
|
||||
auto Padding = OffsetToAlignment(NextAvailableOffset,
|
||||
Section.sh_addralign);
|
||||
const unsigned char ZeroByte{0};
|
||||
for (unsigned I = 0; I < Padding; ++I)
|
||||
OS.write(ZeroByte);
|
||||
|
||||
NextAvailableOffset += Padding;
|
||||
auto PaddingSize = OffsetToAlignment(NextAvailableOffset,
|
||||
Section.sh_addralign);
|
||||
writePadding(OS, PaddingSize);
|
||||
NextAvailableOffset += PaddingSize;
|
||||
|
||||
assert(Section.sh_size % Section.sh_addralign == 0 &&
|
||||
"section size does not match section alignment");
|
||||
@@ -1821,19 +1819,19 @@ void RewriteInstance::rewriteNoteSections() {
|
||||
// * There could be modifications done to non-allocatable sections, e.g.
|
||||
// size could be increased.
|
||||
// * New non-allocatable sections are added to the end of the file.
|
||||
void RewriteInstance::patchELFSectionHeaderTable() {
|
||||
auto ELF64LEFile = dyn_cast<ELF64LEObjectFile>(InputFile);
|
||||
if (!ELF64LEFile) {
|
||||
errs() << "BOLT-ERROR: only 64-bit LE ELF binaries are supported\n";
|
||||
exit(1);
|
||||
}
|
||||
auto Obj = ELF64LEFile->getELFFile();
|
||||
using Elf_Shdr = std::remove_pointer<decltype(Obj)>::type::Elf_Shdr;
|
||||
template <typename ELFT>
|
||||
void RewriteInstance::patchELFSectionHeaderTable(ELFObjectFile<ELFT> *File) {
|
||||
|
||||
using Elf_Shdr = typename ELFObjectFile<ELFT>::Elf_Shdr;
|
||||
|
||||
auto Obj = File->getELFFile();
|
||||
auto &OS = Out->os();
|
||||
|
||||
auto SHTOffset = OS.tell();
|
||||
|
||||
auto PaddingSize = OffsetToAlignment(SHTOffset, sizeof(Elf_Shdr));
|
||||
writePadding(OS, PaddingSize);
|
||||
SHTOffset += PaddingSize;
|
||||
|
||||
// Copy over entries for original allocatable sections with minor
|
||||
// modifications (e.g. name).
|
||||
for (auto &Section : Obj->sections()) {
|
||||
@@ -2067,12 +2065,10 @@ void RewriteInstance::rewriteFile() {
|
||||
if (opts::Verbosity >= 1) {
|
||||
outs() << "BOLT: writing a new .eh_frame_hdr\n";
|
||||
}
|
||||
if (FrameHdrAlign > 1) {
|
||||
auto PaddingSize = OffsetToAlignment(NextAvailableAddress, FrameHdrAlign);
|
||||
for (unsigned I = 0; I < PaddingSize; ++I)
|
||||
Out->os().write((unsigned char)0);
|
||||
NextAvailableAddress += PaddingSize;
|
||||
}
|
||||
|
||||
auto PaddingSize = OffsetToAlignment(NextAvailableAddress, FrameHdrAlign);
|
||||
writePadding(Out->os(), PaddingSize);
|
||||
NextAvailableAddress += PaddingSize;
|
||||
|
||||
SectionInfo EHFrameHdrSecInfo;
|
||||
EHFrameHdrSecInfo.FileAddress = NextAvailableAddress;
|
||||
|
||||
@@ -189,7 +189,20 @@ private:
|
||||
/// Patch ELF book-keeping info.
|
||||
void patchELF();
|
||||
void patchELFPHDRTable();
|
||||
void patchELFSectionHeaderTable();
|
||||
|
||||
template <typename ELFT>
|
||||
void patchELFSectionHeaderTable(ELFObjectFile<ELFT> *Obj);
|
||||
|
||||
void patchELFSectionHeaderTable() {
|
||||
if (auto *ELF32LE = dyn_cast<ELF32LEObjectFile>(InputFile))
|
||||
return patchELFSectionHeaderTable(ELF32LE);
|
||||
if (auto *ELF64LE = dyn_cast<ELF64LEObjectFile>(InputFile))
|
||||
return patchELFSectionHeaderTable(ELF64LE);
|
||||
if (auto *ELF32BE = dyn_cast<ELF32BEObjectFile>(InputFile))
|
||||
return patchELFSectionHeaderTable(ELF32BE);
|
||||
auto *ELF64BE = cast<ELF64BEObjectFile>(InputFile);
|
||||
return patchELFSectionHeaderTable(ELF64BE);
|
||||
}
|
||||
|
||||
/// Computes output .debug_line line table offsets for each compile unit,
|
||||
/// and updates stmt_list for a corresponding compile unit.
|
||||
|
||||
Reference in New Issue
Block a user