Add experimental jump table support.

Summary:
Option "-jump-tables=1" enables experimental support for jump tables.

The option hasn't been tested with optimizations other than block
re-ordering.

Only non-PIC jump tables are supported at the moment.

(cherry picked from FBD3867849)
This commit is contained in:
Maksim Panchenko
2016-09-14 16:45:40 -07:00
parent 7483cd0fa6
commit 2f3a859772
6 changed files with 141 additions and 66 deletions

View File

@@ -313,6 +313,12 @@ void BinaryContext::printInstruction(raw_ostream &OS,
OS << "; GNU_args_size = " << GnuArgsSize;
}
}
if (MIA->isIndirectBranch(Instruction)) {
auto JTIndex = MIA->getJumpTableIndex(Instruction);
if (JTIndex != -1LL) {
OS << " # JUMPTABLE " << JTIndex;
}
}
const DWARFDebugLine::LineTable *LineTable =
Function && opts::PrintDebugInfo ? Function->getDWARFUnitLineTable().second

View File

@@ -21,6 +21,7 @@
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstPrinter.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -41,6 +42,11 @@ namespace opts {
extern cl::opt<unsigned> Verbosity;
extern cl::opt<bool> PrintDynoStats;
static cl::opt<bool>
JumpTables("jump-tables",
cl::desc("enable jump table support (experimental)"),
cl::ZeroOrMore);
static cl::opt<bool>
AgressiveSplitting("split-all-cold",
cl::desc("outline as many cold basic blocks as possible"),
@@ -333,6 +339,16 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation,
OS << '\n';
}
for(unsigned Index = 0; Index < JumpTables.size(); ++Index) {
const auto &JumpTable = JumpTables[Index];
OS << "Jump Table #" << (Index + 1) << '\n';
for (unsigned EIndex = 0; EIndex < JumpTable.Entries.size(); ++EIndex) {
const auto *Entry = JumpTable.Entries[EIndex];
OS << " entry " << EIndex << ": " << Entry->getName() << '\n';
}
OS << '\n';
}
OS << "DWARF CFI Instructions:\n";
if (OffsetToCFI.size()) {
// Pre-buildCFG information
@@ -389,8 +405,8 @@ bool BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
[&](MCInst &Instruction, uint64_t Address, uint64_t Size) {
uint64_t TargetAddress{0};
MCSymbol *TargetSymbol{nullptr};
if (!BC.MIA->evaluateRIPOperandTarget(Instruction, Address, Size,
TargetAddress)) {
if (!BC.MIA->evaluateMemOperandTarget(Instruction, TargetAddress, Address,
Size)) {
DEBUG(dbgs() << "BOLT: rip-relative operand can't be evaluated:\n";
BC.InstPrinter->printInst(&Instruction, dbgs(), "", *BC.STI);
dbgs() << '\n';
@@ -418,7 +434,7 @@ bool BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
}
if (!TargetSymbol)
TargetSymbol = BC.getOrCreateGlobalSymbol(TargetAddress, "DATAat");
BC.MIA->replaceRIPOperandDisp(
BC.MIA->replaceMemOperandDisp(
Instruction, MCOperand::createExpr(MCSymbolRefExpr::create(
TargetSymbol, MCSymbolRefExpr::VK_None, *BC.Ctx)));
return true;
@@ -427,7 +443,7 @@ bool BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
enum class IndirectBranchType : char {
UNKNOWN = 0, /// Unable to determine type.
POSSIBLE_TAIL_CALL, /// Possibly a tail call.
POSSIBLE_SWITCH_TABLE, /// Possibly a switch/jump table
POSSIBLE_JUMP_TABLE, /// Possibly a switch/jump table
POSSIBLE_GOTO /// Possibly a gcc's computed goto.
};
@@ -441,7 +457,7 @@ bool BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
//
// We are interested in the cases where Scale == sizeof(uintptr_t) and
// the contents of the memory are presumably a function array.
const auto *MemLocInstr = &Instruction;
auto *MemLocInstr = &Instruction;
if (Instruction.getNumOperands() == 1) {
// If the indirect jump is on register - try to detect if the
// register value is loaded from a memory location.
@@ -452,7 +468,7 @@ bool BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
// in postProcessIndirectBranches().
for (auto PrevII = Instructions.rbegin(); PrevII != Instructions.rend();
++PrevII) {
const auto &PrevInstr = PrevII->second;
auto &PrevInstr = PrevII->second;
const auto &PrevInstrDesc = BC.MII->get(PrevInstr.getOpcode());
if (!PrevInstrDesc.hasDefOfPhysReg(PrevInstr, JmpRegNum, *BC.MRI))
continue;
@@ -516,6 +532,8 @@ bool BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
DataExtractor DE(SectionContents, BC.AsmInfo->isLittleEndian(), PtrSize);
auto ValueOffset = static_cast<uint32_t>(ArrayStart - Section.getAddress());
uint64_t Value = 0;
auto Result = IndirectBranchType::UNKNOWN;
std::vector<MCSymbol *> JTLabelCandidates;
while (ValueOffset <= Section.getSize() - PtrSize) {
DEBUG(dbgs() << "BOLT-DEBUG: indirect jmp at 0x"
<< Twine::utohexstr(getAddress() + Offset)
@@ -526,7 +544,12 @@ bool BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
DEBUG(dbgs() << ", which contains value "
<< Twine::utohexstr(Value) << '\n');
if (containsAddress(Value) && Value != getAddress()) {
return IndirectBranchType::POSSIBLE_SWITCH_TABLE;
// Is it possible to have a jump table with function start as an entry?
auto *JTEntry = getOrCreateLocalLabel(Value);
JTLabelCandidates.push_back(JTEntry);
TakenBranches.emplace_back(Offset, Value - getAddress());
Result = IndirectBranchType::POSSIBLE_JUMP_TABLE;
continue;
}
// Potentially a switch table can contain __builtin_unreachable() entry
// pointing just right after the function. In this case we have to check
@@ -535,6 +558,21 @@ bool BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
if (Value != getAddress() + getSize()) {
break;
}
JTLabelCandidates.push_back(getFunctionEndLabel());
}
if (Result == IndirectBranchType::POSSIBLE_JUMP_TABLE) {
assert(JTLabelCandidates.size() > 2 &&
"expected more than 2 jump table entries");
auto *JTStartLabel = BC.Ctx->createTempSymbol("JUMP_TABLE", true);
JumpTables.emplace_back(JumpTable{JTStartLabel,
std::move(JTLabelCandidates)});
BC.MIA->replaceMemOperandDisp(*MemLocInstr, JTStartLabel, BC.Ctx.get());
BC.MIA->setJumpTableIndex(Instruction, JumpTables.size());
DEBUG(dbgs() << "BOLT-DEBUG: creating jump table "
<< JTStartLabel->getName()
<< " in function " << *this << " with "
<< JTLabelCandidates.size() << " entries.\n");
return Result;
}
BC.InterproceduralReferences.insert(Value);
return IndirectBranchType::POSSIBLE_TAIL_CALL;
@@ -654,11 +692,11 @@ bool BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
*Ctx)));
if (!IsCall) {
// Add taken branch info.
TakenBranches.push_back({Offset, TargetAddress - getAddress()});
TakenBranches.emplace_back(Offset, TargetAddress - getAddress());
}
if (IsCondBranch) {
// Add fallthrough branch info.
FTBranches.push_back({Offset, Offset + Size});
FTBranches.emplace_back(Offset, Offset + Size);
}
} else {
// Could not evaluate branch. Should be an indirect call or an
@@ -671,8 +709,9 @@ bool BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
case IndirectBranchType::POSSIBLE_TAIL_CALL:
MIA->convertJmpToTailCall(Instruction);
break;
case IndirectBranchType::POSSIBLE_SWITCH_TABLE:
IsSimple = false;
case IndirectBranchType::POSSIBLE_JUMP_TABLE:
if (!opts::JumpTables)
IsSimple = false;
break;
case IndirectBranchType::UNKNOWN:
// Keep processing. We'll do more checks and fixes in
@@ -739,17 +778,10 @@ bool BinaryFunction::postProcessIndirectBranches() {
}
// Validate the tail call assumptions.
if (BC.MIA->isTailCall(Instr)) {
unsigned BaseRegNum;
int64_t ScaleValue;
unsigned IndexRegNum;
int64_t DispValue;
unsigned SegRegNum;
if (BC.MIA->evaluateX86MemoryOperand(Instr, BaseRegNum,
ScaleValue, IndexRegNum,
DispValue, SegRegNum)) {
// We have validated the memory contents addressed by the
// jump instruction already.
if (BC.MIA->isTailCall(Instr) || (BC.MIA->getJumpTableIndex(Instr) > 0)) {
if (BC.MIA->getMemoryOperandNo(Instr) != -1) {
// We have validated memory contents addressed by the jump
// instruction already.
continue;
}
// This is jump on register. Just make sure the register is defined
@@ -767,13 +799,16 @@ bool BinaryFunction::postProcessIndirectBranches() {
}
if (IsJmpRegSetInBB)
continue;
DEBUG(dbgs() << "BOLT-INFO: rejected potential indirect tail call in "
<< "function " << *this << " because the jump-on register "
<< "was not defined in basic block "
<< BB->getName() << ":\n";
BC.printInstructions(dbgs(), BB->begin(), BB->end(),
BB->getOffset(), this);
);
if (opts::Verbosity >= 2) {
outs() << "BOLT-INFO: rejected potential "
<< (BC.MIA->isTailCall(Instr) ? "indirect tail call"
: "jump table")
<< " in function " << *this
<< " because the jump-on register was not defined in "
<< " basic block " << BB->getName() << ".\n";
DEBUG(dbgs() << BC.printInstructions(dbgs(), BB->begin(), BB->end(),
BB->getOffset(), this));
}
return false;
}
@@ -788,12 +823,13 @@ bool BinaryFunction::postProcessIndirectBranches() {
}
}
if (!IsEpilogue) {
DEBUG(dbgs() << "BOLT-INFO: rejected potential indirect tail call in "
<< "function " << *this << " in basic block "
<< BB->getName() << ":\n";
BC.printInstructions(dbgs(), BB->begin(), BB->end(),
BB->getOffset(), this);
);
if (opts::Verbosity >= 2) {
outs() << "BOLT-INFO: rejected potential indirect tail call in "
<< "function " << *this << " in basic block "
<< BB->getName() << ".\n";
DEBUG(BC.printInstructions(dbgs(), BB->begin(), BB->end(),
BB->getOffset(), this));
}
return false;
}
BC.MIA->convertJmpToTailCall(Instr);
@@ -1369,12 +1405,14 @@ void BinaryFunction::inferFallThroughCounts() {
<< Twine::utohexstr(getAddress() + CurBB->getOffset()) << '\n';
});
// If there is a FT, the last successor will be it.
auto &SuccCount = *CurBB->branch_info_rbegin();
auto &Succ = *CurBB->succ_rbegin();
if (SuccCount.Count == BinaryBasicBlock::COUNT_FALLTHROUGH_EDGE) {
SuccCount.Count = Inferred;
Succ->setExecutionCount(Succ->getExecutionCount() + Inferred);
if (CurBB->succ_size() <= 2) {
// If there is an FT it will be the last successor.
auto &SuccCount = *CurBB->branch_info_rbegin();
auto &Succ = *CurBB->succ_rbegin();
if (SuccCount.Count == BinaryBasicBlock::COUNT_FALLTHROUGH_EDGE) {
SuccCount.Count = Inferred;
Succ->ExecutionCount += Inferred;
}
}
} // end for (CurBB : BasicBlocks)
@@ -1441,9 +1479,11 @@ void BinaryFunction::removeConditionalTailCalls() {
// Create a basic block containing the unconditional tail call
// instruction and place it at the end of the function.
// We have to add 1 byte as there's potentially an existing branch past
// the end of the code as a result of __builtin_unreachable().
const BinaryBasicBlock *LastBB = BasicBlocks.back();
uint64_t NewBlockOffset =
LastBB->getOffset() + BC.computeCodeSize(LastBB->begin(), LastBB->end());
uint64_t NewBlockOffset = LastBB->getOffset() +
BC.computeCodeSize(LastBB->begin(), LastBB->end()) + 1;
TailCallBB = addBasicBlock(NewBlockOffset, TCLabel);
TailCallBB->addInstruction(TailCallInst);
@@ -1566,9 +1606,9 @@ bool BinaryFunction::fixCFIState() {
// because this happens rarely.
if (NestedLevel != 0) {
if (opts::Verbosity >= 1) {
errs() << "BOLT-WARNING: CFI rewriter detected nested CFI state while"
<< " replaying CFI instructions for BB " << InBB->getName()
<< " in function " << *this << '\n';
errs() << "BOLT-WARNING: CFI rewriter detected nested CFI state"
<< " while replaying CFI instructions for BB "
<< InBB->getName() << " in function " << *this << '\n';
}
return false;
}
@@ -2481,6 +2521,23 @@ BinaryFunction::~BinaryFunction() {
}
}
void BinaryFunction::emitJumpTables(MCStreamer *Streamer) {
if (JumpTables.empty())
return;
Streamer->SwitchSection(BC.MOFI->getReadOnlySection());
for (auto &JumpTable : JumpTables) {
DEBUG(dbgs() << "BOLT-DEBUG: emitting jump table "
<< JumpTable.StartLabel->getName() << '\n');
Streamer->EmitLabel(JumpTable.StartLabel);
// TODO (#9806207): based on jump table type (PIC vs non-PIC etc.)
// we would need to emit different references.
for (auto *Entry : JumpTable.Entries) {
Streamer->EmitSymbolValue(Entry, BC.AsmInfo->getPointerSize());
}
}
}
void BinaryFunction::calculateLoopInfo() {
// Discover loops.
BinaryDominatorTree DomTree(false);

View File

@@ -404,6 +404,13 @@ private:
/// function and that apply before the entry basic block).
CFIInstrMapType CIEFrameInstructions;
/// Representation of a jump table.
struct JumpTable {
MCSymbol *StartLabel;
std::vector<MCSymbol *> Entries;
};
std::vector<JumpTable> JumpTables;
// Blocks are kept sorted in the layout order. If we need to change the
// layout (if BasicBlocksLayout stores a different order than BasicBlocks),
// the terminating instructions need to be modified.
@@ -673,19 +680,19 @@ public:
return MaxSize;
}
/// Return MC symbol associtated with the function.
/// Return MC symbol associated with the function.
/// All references to the function should use this symbol.
MCSymbol *getSymbol() {
return OutputSymbol;
}
/// Return MC symbol associtated with the function (const version).
/// Return MC symbol associated with the function (const version).
/// All references to the function should use this symbol.
const MCSymbol *getSymbol() const {
return OutputSymbol;
}
/// Return MC symbol associtated with the end of the function.
/// Return MC symbol associated with the end of the function.
MCSymbol *getFunctionEndLabel() {
assert(BC.Ctx && "cannot be called with empty context");
if (!FunctionEndLabel) {
@@ -782,8 +789,8 @@ public:
/// Returns NULL if basic block already exists at the \p Offset.
BinaryBasicBlock *addBasicBlock(uint64_t Offset, MCSymbol *Label,
bool DeriveAlignment = false) {
assert(CurrentState == State::CFG ||
(!getBasicBlockAtOffset(Offset) && "basic block already exists"));
assert((CurrentState == State::CFG || !getBasicBlockAtOffset(Offset)) &&
"basic block already exists in pre-CFG state");
auto BBPtr = createBasicBlock(Offset, Label, DeriveAlignment);
BasicBlocks.emplace_back(BBPtr.release());
@@ -1148,6 +1155,9 @@ public:
/// Emit exception handling ranges for the function.
void emitLSDA(MCStreamer *Streamer);
/// Emit jump tables for the function.
void emitJumpTables(MCStreamer *Streamer);
/// Merge profile data of this function into those of the given
/// function. The functions should have been proven identical with
/// isIdenticalWith.

View File

@@ -1068,17 +1068,15 @@ bool SimplifyRODataLoads::simplifyRODataLoads(
uint64_t TargetAddress;
if (MIA->hasRIPOperand(Inst)) {
// Try to find the symbol that corresponds to the rip-relative operand.
MCOperand DisplOp;
if (!MIA->getRIPOperandDisp(Inst, DisplOp))
continue;
assert(DisplOp.isExpr() &&
"found rip-relative with non-symbolic displacement");
// Try to find the symbol that corresponds to the RIP-relative operand.
auto DispOpI = MIA->getMemOperandDisp(Inst);
assert(DispOpI != Inst.end() && "expected RIP-relative displacement");
assert(DispOpI->isExpr() &&
"found RIP-relative with non-symbolic displacement");
// Get displacement symbol.
const MCSymbolRefExpr *DisplExpr;
if (!(DisplExpr = dyn_cast<MCSymbolRefExpr>(DisplOp.getExpr())))
if (!(DisplExpr = dyn_cast<MCSymbolRefExpr>(DispOpI->getExpr())))
continue;
const MCSymbol &DisplSymbol = DisplExpr->getSymbol();
@@ -1092,7 +1090,7 @@ bool SimplifyRODataLoads::simplifyRODataLoads(
continue;
}
// Get the contents of the section containing the target addresss of the
// Get the contents of the section containing the target address of the
// memory operand. We are only interested in read-only sections.
ErrorOr<SectionRef> DataSectionOrErr =
BC.getSectionForAddress(TargetAddress);

View File

@@ -277,7 +277,7 @@ int64_t MinBranchGreedyClusterAlgorithm::calculateWeight(
break;
++BI;
}
assert(BI != PredBB->branch_info_end() && "invalied control flow graph");
assert(BI != PredBB->branch_info_end() && "invalid control flow graph");
assert(BI->Count != BinaryBasicBlock::COUNT_FALLTHROUGH_EDGE &&
"attempted reordering blocks of function with no profile data");
assert(BI->Count <= std::numeric_limits<int64_t>::max() &&
@@ -304,7 +304,7 @@ void MinBranchGreedyClusterAlgorithm::adjustQueue(
auto Comp = [&] (const EdgeTy &A, const EdgeTy &B) {
// With equal weights, prioritize branches with lower index
// source/destination. This helps to keep original block order for blocks
// when optimal order cannot be deducted from a profile.
// when optimal order cannot be deduced from a profile.
if (Weight[A] == Weight[B]) {
uint32_t ASrcBBIndex = BF.getIndex(A.Src);
uint32_t BSrcBBIndex = BF.getIndex(B.Src);

View File

@@ -283,7 +283,8 @@ uint8_t *ExecutableFileMemoryManager::allocateSection(intptr_t Size,
IsReadOnly);
}
DEBUG(dbgs() << "BOLT: allocating " << (IsCode ? "code" : "data")
DEBUG(dbgs() << "BOLT: allocating "
<< (IsCode ? "code" : (IsReadOnly ? "read-only data" : "data"))
<< " section : " << SectionName
<< " with size " << Size << ", alignment " << Alignment
<< " at 0x" << ret << "\n");
@@ -1025,7 +1026,7 @@ void RewriteInstance::disassembleFunctions() {
auto *ContainingFunction = getBinaryFunctionContainingAddress(Addr);
if (ContainingFunction && ContainingFunction->getAddress() != Addr) {
if (opts::Verbosity >= 1) {
errs() << "BOLT-WARNING: Function " << ContainingFunction
errs() << "BOLT-WARNING: Function " << *ContainingFunction
<< " has internal BBs that are target of a reference located in "
<< "another function. Skipping the function.\n";
}
@@ -1325,8 +1326,10 @@ void emitFunction(MCStreamer &Streamer, BinaryFunction &Function,
Streamer.EmitLabel(Function.getFunctionEndLabel());
// Emit LSDA before anything else?
if (!EmitColdPart)
if (!EmitColdPart) {
Function.emitLSDA(&Streamer);
Function.emitJumpTables(&Streamer);
}
// TODO: is there any use in emiting end of function?
// Perhaps once we have a support for C++ exceptions.
@@ -1521,7 +1524,8 @@ void RewriteInstance::emitFunctions() {
// Map special sections to their addresses in the output image.
//
// TODO: perhaps we should process all the allocated sections here?
std::vector<std::string> Sections = { ".eh_frame", ".gcc_except_table" };
std::vector<std::string> Sections = { ".eh_frame", ".gcc_except_table",
".rodata" };
for (auto &SectionName : Sections) {
auto SMII = EFMM->SectionMapInfo.find(SectionName);
if (SMII != EFMM->SectionMapInfo.end()) {