Files
llvm/bolt/BinaryFunction.cpp

3447 lines
118 KiB
C++
Raw Normal View History

//===--- BinaryFunction.cpp - Interface for machine-level function --------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
//===----------------------------------------------------------------------===//
#include "BinaryBasicBlock.h"
#include "BinaryFunction.h"
#include "ReorderAlgorithm.h"
#include "DataReader.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstPrinter.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/raw_ostream.h"
#include <limits>
#include <queue>
#include <string>
#include <functional>
#undef DEBUG_TYPE
#define DEBUG_TYPE "bolt"
using namespace llvm;
using namespace bolt;
namespace opts {
extern cl::opt<unsigned> Verbosity;
extern cl::opt<bool> PrintDynoStats;
static cl::opt<BinaryFunction::JumpTableSupportLevel>
JumpTables("jump-tables",
cl::desc("jump tables support"),
cl::init(BinaryFunction::JTS_NONE),
cl::values(clEnumValN(BinaryFunction::JTS_NONE, "0",
"do not optimize functions with jump tables"),
clEnumValN(BinaryFunction::JTS_BASIC, "1",
"optimize functions with jump tables"),
clEnumValN(BinaryFunction::JTS_SPLIT, "2",
"split jump tables into hot and cold"),
clEnumValN(BinaryFunction::JTS_AGGRESSIVE, "3",
"aggressively split jump tables (unsafe)"),
clEnumValEnd),
cl::ZeroOrMore);
static cl::opt<bool>
PrintJumpTables("print-jump-tables",
cl::desc("print jump tables"),
cl::ZeroOrMore,
cl::Hidden);
static cl::opt<bool>
AgressiveSplitting("split-all-cold",
cl::desc("outline as many cold basic blocks as possible"),
cl::ZeroOrMore);
static cl::opt<bool>
DotToolTipCode("dot-tooltip-code",
cl::desc("add basic block instructions as tool tips on nodes"),
cl::ZeroOrMore,
cl::Hidden);
static cl::opt<uint32_t>
DynoStatsScale("dyno-stats-scale",
cl::desc("scale to be applied while reporting dyno stats"),
cl::Optional,
cl::init(1));
} // namespace opts
namespace llvm {
namespace bolt {
// Temporary constant.
//
// TODO: move to architecture-specific file together with the code that is
// using it.
constexpr unsigned NoRegister = 0;
constexpr const char *DynoStats::Desc[];
namespace {
/// Gets debug line information for the instruction located at the given
/// address in the original binary. The SMLoc's pointer is used
/// to point to this information, which is represented by a
/// DebugLineTableRowRef. The returned pointer is null if no debug line
/// information for this instruction was found.
SMLoc findDebugLineInformationForInstructionAt(
uint64_t Address,
DWARFUnitLineTable &ULT) {
// We use the pointer in SMLoc to store an instance of DebugLineTableRowRef,
// which occupies 64 bits. Thus, we can only proceed if the struct fits into
// the pointer itself.
assert(
sizeof(decltype(SMLoc().getPointer())) >= sizeof(DebugLineTableRowRef) &&
"Cannot fit instruction debug line information into SMLoc's pointer");
SMLoc NullResult = DebugLineTableRowRef::NULL_ROW.toSMLoc();
auto &LineTable = ULT.second;
if (!LineTable)
return NullResult;
uint32_t RowIndex = LineTable->lookupAddress(Address);
if (RowIndex == LineTable->UnknownRowIndex)
return NullResult;
assert(RowIndex < LineTable->Rows.size() &&
"Line Table lookup returned invalid index.");
decltype(SMLoc().getPointer()) Ptr;
DebugLineTableRowRef *InstructionLocation =
reinterpret_cast<DebugLineTableRowRef *>(&Ptr);
InstructionLocation->DwCompileUnitIndex = ULT.first->getOffset();
InstructionLocation->RowIndex = RowIndex + 1;
return SMLoc::getFromPointer(Ptr);
}
} // namespace
bool DynoStats::operator<(const DynoStats &Other) const {
return std::lexicographical_compare(
&Stats[FIRST_DYNO_STAT], &Stats[LAST_DYNO_STAT],
&Other.Stats[FIRST_DYNO_STAT], &Other.Stats[LAST_DYNO_STAT]
);
}
bool DynoStats::lessThan(const DynoStats &Other,
ArrayRef<Category> Keys) const {
return std::lexicographical_compare(
Keys.begin(), Keys.end(),
Keys.begin(), Keys.end(),
[this,&Other](const Category A, const Category) {
return Stats[A] < Other.Stats[A];
}
);
}
uint64_t BinaryFunction::Count = 0;
BinaryBasicBlock *
BinaryFunction::getBasicBlockContainingOffset(uint64_t Offset) {
if (Offset > Size)
return nullptr;
if (BasicBlockOffsets.empty())
return nullptr;
/*
* This is commented out because it makes BOLT too slow.
* assert(std::is_sorted(BasicBlockOffsets.begin(),
* BasicBlockOffsets.end(),
* CompareBasicBlockOffsets())));
*/
auto I = std::upper_bound(BasicBlockOffsets.begin(),
BasicBlockOffsets.end(),
BasicBlockOffset(Offset, nullptr),
CompareBasicBlockOffsets());
assert(I != BasicBlockOffsets.begin() && "first basic block not at offset 0");
--I;
return I->second;
}
Update DWARF lexical blocks address ranges. Summary: Updates DWARF lexical blocks address ranges in the output binary after optimizations. This is similar to updating function address ranges except that the ranges representation needs to be more general, since address ranges can begin or end in the middle of a basic block. The following changes were made: - Added a data structure for iterating over the basic blocks that intersect an address range: BasicBlockTable.h - Added some more bookkeeping in BinaryBasicBlock. Basically, I needed to keep track of the block's size in the input binary as well as its address in the output binary. This information is mostly set by BinaryFunction after disassembly. - Added a representation for address ranges relative to basic blocks (BasicBlockOffsetRanges.h). Will also serve for location lists. - Added a representation for Lexical Blocks (LexicalBlock.h) - Small refactorings in DebugArangesWriter: -- Renamed to DebugRangesSectionsWriter since it also writes .debug_ranges -- Refactored it not to depend on BinaryFunction but instead on anything that can be assined an aoffset in .debug_ranges (added an interface for that) - Iterate over the DIE tree during initialization to find lexical blocks in .debug_info (BinaryContext.cpp) - Added patches to .debug_abbrev and .debug_info in RewriteInstance to update lexical blocks attributes (in fact, this part is very similar to what was done to function address ranges and I just refactored/reused that code) - Added small test case (lexical_blocks_address_ranges_debug.test) (cherry picked from FBD3113181)
2016-03-28 17:45:22 -07:00
size_t
BinaryFunction::getBasicBlockOriginalSize(const BinaryBasicBlock *BB) const {
auto Index = getIndex(BB);
if (Index + 1 == BasicBlocks.size()) {
return Size - BB->getOffset();
} else {
return BasicBlocks[Index + 1]->getOffset() - BB->getOffset();
Update DWARF lexical blocks address ranges. Summary: Updates DWARF lexical blocks address ranges in the output binary after optimizations. This is similar to updating function address ranges except that the ranges representation needs to be more general, since address ranges can begin or end in the middle of a basic block. The following changes were made: - Added a data structure for iterating over the basic blocks that intersect an address range: BasicBlockTable.h - Added some more bookkeeping in BinaryBasicBlock. Basically, I needed to keep track of the block's size in the input binary as well as its address in the output binary. This information is mostly set by BinaryFunction after disassembly. - Added a representation for address ranges relative to basic blocks (BasicBlockOffsetRanges.h). Will also serve for location lists. - Added a representation for Lexical Blocks (LexicalBlock.h) - Small refactorings in DebugArangesWriter: -- Renamed to DebugRangesSectionsWriter since it also writes .debug_ranges -- Refactored it not to depend on BinaryFunction but instead on anything that can be assined an aoffset in .debug_ranges (added an interface for that) - Iterate over the DIE tree during initialization to find lexical blocks in .debug_info (BinaryContext.cpp) - Added patches to .debug_abbrev and .debug_info in RewriteInstance to update lexical blocks attributes (in fact, this part is very similar to what was done to function address ranges and I just refactored/reused that code) - Added small test case (lexical_blocks_address_ranges_debug.test) (cherry picked from FBD3113181)
2016-03-28 17:45:22 -07:00
}
}
void BinaryFunction::markUnreachable() {
std::stack<BinaryBasicBlock *> Stack;
for (auto *BB : layout()) {
BB->markValid(false);
}
// Add all entries and landing pads as roots.
for (auto *BB : BasicBlocks) {
if (BB->isEntryPoint() || BB->isLandingPad()) {
Stack.push(BB);
BB->markValid(true);
}
}
// Determine reachable BBs from the entry point
while (!Stack.empty()) {
auto BB = Stack.top();
Stack.pop();
for (auto Succ : BB->successors()) {
if (Succ->isValid())
continue;
Succ->markValid(true);
Stack.push(Succ);
}
}
}
// Any unnecessary fallthrough jumps revealed after calling eraseInvalidBBs
// will be cleaned up by fixBranches().
std::pair<unsigned, uint64_t> BinaryFunction::eraseInvalidBBs() {
BasicBlockOrderType NewLayout;
unsigned Count = 0;
uint64_t Bytes = 0;
for (auto *BB : layout()) {
assert((!BB->isEntryPoint() || BB->isValid()) &&
"all entry blocks must be valid");
if (BB->isValid()) {
NewLayout.push_back(BB);
} else {
++Count;
Bytes += BC.computeCodeSize(BB->begin(), BB->end());
}
}
BasicBlocksLayout = std::move(NewLayout);
BasicBlockListType NewBasicBlocks;
for (auto I = BasicBlocks.begin(), E = BasicBlocks.end(); I != E; ++I) {
if ((*I)->isValid()) {
NewBasicBlocks.push_back(*I);
} else {
DeletedBasicBlocks.push_back(*I);
}
}
BasicBlocks = std::move(NewBasicBlocks);
assert(BasicBlocks.size() == BasicBlocksLayout.size());
// Update CFG state if needed
if (Count > 0) {
updateBBIndices(0);
recomputeLandingPads(0, BasicBlocks.size());
BBCFIState = annotateCFIState();
fixCFIState();
}
return std::make_pair(Count, Bytes);
}
void BinaryFunction::dump(std::string Annotation,
bool PrintInstructions) const {
print(dbgs(), Annotation, PrintInstructions);
}
void BinaryFunction::print(raw_ostream &OS, std::string Annotation,
bool PrintInstructions) const {
StringRef SectionName;
Section.getName(SectionName);
OS << "Binary Function \"" << *this << "\" " << Annotation << " {";
if (Names.size() > 1) {
OS << "\n Other names : ";
auto Sep = "";
for (unsigned i = 0; i < Names.size() - 1; ++i) {
OS << Sep << Names[i];
Sep = "\n ";
}
}
OS << "\n Number : " << FunctionNumber
<< "\n State : " << CurrentState
<< "\n Address : 0x" << Twine::utohexstr(Address)
<< "\n Size : 0x" << Twine::utohexstr(Size)
<< "\n MaxSize : 0x" << Twine::utohexstr(MaxSize)
<< "\n Offset : 0x" << Twine::utohexstr(FileOffset)
<< "\n Section : " << SectionName
<< "\n Orc Section : " << getCodeSectionName()
<< "\n LSDA : 0x" << Twine::utohexstr(getLSDAAddress())
<< "\n IsSimple : " << IsSimple
<< "\n IsSplit : " << IsSplit
<< "\n BB Count : " << BasicBlocksLayout.size();
if (FrameInstructions.size()) {
OS << "\n CFI Instrs : " << FrameInstructions.size();
}
if (BasicBlocksLayout.size()) {
OS << "\n BB Layout : ";
auto Sep = "";
for (auto BB : BasicBlocksLayout) {
OS << Sep << BB->getName();
Sep = ", ";
}
}
if (ImageAddress)
OS << "\n Image : 0x" << Twine::utohexstr(ImageAddress);
if (ExecutionCount != COUNT_NO_PROFILE) {
OS << "\n Exec Count : " << ExecutionCount;
OS << "\n Profile Acc : " << format("%.1f%%", ProfileMatchRatio * 100.0f);
}
if (IdenticalFunctionAddress != Address)
OS << "\n Id Fun Addr : 0x" << Twine::utohexstr(IdenticalFunctionAddress);
if (opts::PrintDynoStats && !BasicBlocksLayout.empty()) {
OS << '\n';
DynoStats dynoStats = getDynoStats();
OS << dynoStats;
}
OS << "\n}\n";
if (!PrintInstructions || !BC.InstPrinter)
return;
// Offset of the instruction in function.
uint64_t Offset{0};
if (BasicBlocks.empty() && !Instructions.empty()) {
// Print before CFG was built.
for (const auto &II : Instructions) {
Offset = II.first;
// Print label if exists at this offset.
auto LI = Labels.find(Offset);
if (LI != Labels.end())
OS << LI->second->getName() << ":\n";
BC.printInstruction(OS, II.second, Offset, this);
}
}
for (uint32_t I = 0, E = BasicBlocksLayout.size(); I != E; ++I) {
auto BB = BasicBlocksLayout[I];
if (I != 0 &&
BB->isCold() != BasicBlocksLayout[I - 1]->isCold())
OS << "------- HOT-COLD SPLIT POINT -------\n\n";
OS << BB->getName() << " ("
<< BB->size() << " instructions, align : "
<< BB->getAlignment() << ")\n";
if (BB->isEntryPoint())
OS << " Entry Point\n";
if (BB->isLandingPad())
OS << " Landing Pad\n";
uint64_t BBExecCount = BB->getExecutionCount();
if (BBExecCount != BinaryBasicBlock::COUNT_NO_PROFILE) {
OS << " Exec Count : " << BBExecCount << "\n";
}
if (!BBCFIState.empty()) {
OS << " CFI State : " << BBCFIState[getIndex(BB)] << '\n';
}
if (!BB->pred_empty()) {
OS << " Predecessors: ";
auto Sep = "";
for (auto Pred : BB->predecessors()) {
OS << Sep << Pred->getName();
Sep = ", ";
}
OS << '\n';
}
if (!BB->throw_empty()) {
OS << " Throwers: ";
auto Sep = "";
for (auto Throw : BB->throwers()) {
OS << Sep << Throw->getName();
Sep = ", ";
}
OS << '\n';
}
Offset = RoundUpToAlignment(Offset, BB->getAlignment());
// Note: offsets are imprecise since this is happening prior to relaxation.
Offset = BC.printInstructions(OS, BB->begin(), BB->end(), Offset, this);
if (!BB->succ_empty()) {
OS << " Successors: ";
auto BI = BB->branch_info_begin();
auto Sep = "";
for (auto Succ : BB->successors()) {
assert(BI != BB->branch_info_end() && "missing BranchInfo entry");
OS << Sep << Succ->getName();
if (ExecutionCount != COUNT_NO_PROFILE &&
BI->MispredictedCount != BinaryBasicBlock::COUNT_FALLTHROUGH_EDGE) {
OS << " (mispreds: " << BI->MispredictedCount
<< ", count: " << BI->Count << ")";
} else if (ExecutionCount != COUNT_NO_PROFILE &&
BI->Count != BinaryBasicBlock::COUNT_FALLTHROUGH_EDGE) {
OS << " (inferred count: " << BI->Count << ")";
}
Sep = ", ";
++BI;
}
OS << '\n';
}
if (!BB->lp_empty()) {
OS << " Landing Pads: ";
auto Sep = "";
for (auto LP : BB->landing_pads()) {
OS << Sep << LP->getName();
if (ExecutionCount != COUNT_NO_PROFILE) {
OS << " (count: " << LP->getExecutionCount() << ")";
}
Sep = ", ";
}
OS << '\n';
}
OS << '\n';
}
// Dump new exception ranges for the function.
if (!CallSites.empty()) {
OS << "EH table:\n";
for (auto &CSI : CallSites) {
OS << " [" << *CSI.Start << ", " << *CSI.End << ") landing pad : ";
if (CSI.LP)
OS << *CSI.LP;
else
OS << "0";
OS << ", action : " << CSI.Action << '\n';
}
OS << '\n';
}
// Print all jump tables.
for (auto &JTI : JumpTables) {
JTI.second.print(OS);
}
OS << "DWARF CFI Instructions:\n";
if (OffsetToCFI.size()) {
// Pre-buildCFG information
for (auto &Elmt : OffsetToCFI) {
OS << format(" %08x:\t", Elmt.first);
assert(Elmt.second < FrameInstructions.size() && "Incorrect CFI offset");
BinaryContext::printCFI(OS,
FrameInstructions[Elmt.second].getOperation());
OS << "\n";
}
} else {
// Post-buildCFG information
for (uint32_t I = 0, E = FrameInstructions.size(); I != E; ++I) {
const MCCFIInstruction &CFI = FrameInstructions[I];
OS << format(" %d:\t", I);
BinaryContext::printCFI(OS, CFI.getOperation());
OS << "\n";
}
}
if (FrameInstructions.empty())
OS << " <empty>\n";
OS << "End of Function \"" << *this << "\"\n\n";
}
BinaryFunction::IndirectBranchType
BinaryFunction::analyzeIndirectBranch(MCInst &Instruction,
unsigned Size,
uint64_t Offset) {
auto &MIA = BC.MIA;
IndirectBranchType Type = IndirectBranchType::UNKNOWN;
// An instruction referencing memory used by jump instruction (directly or
// via register). This location could be an array of function pointers
// in case of indirect tail call, or a jump table.
MCInst *MemLocInstr = nullptr;
// Address of the table referenced by MemLocInstr. Could be either an
// array of function pointers, or a jump table.
uint64_t ArrayStart = 0;
auto analyzePICJumpTable =
[&](InstrMapType::reverse_iterator II,
InstrMapType::reverse_iterator IE,
unsigned R1,
unsigned R2) {
// Analyze PIC-style jump table code template:
//
// lea PIC_JUMP_TABLE(%rip), {%r1|%r2} <- MemLocInstr
// mov ({%r1|%r2}, %index, 4), {%r2|%r1}
// add %r2, %r1
// jmp *%r1
//
// (with any irrelevant instructions in-between)
//
// When we call this helper we've already determined %r1 and %r2, and
// reverse instruction iterator \p II is pointing to the ADD instruction.
//
// PIC jump table looks like following:
//
// JT: ----------
// E1:| L1 - JT |
// |----------|
// E2:| L2 - JT |
// |----------|
// | |
// ......
// En:| Ln - JT |
// ----------
//
// Where L1, L2, ..., Ln represent labels in the function.
//
// The actual relocations in the table will be of the form:
//
// Ln - JT
// = (Ln - En) + (En - JT)
// = R_X86_64_PC32(Ln) + En - JT
// = R_X86_64_PC32(Ln + offsetof(En))
//
DEBUG(dbgs() << "BOLT-DEBUG: checking for PIC jump table\n");
MCInst *MovInstr = nullptr;
while (++II != IE) {
auto &Instr = II->second;
const auto &InstrDesc = BC.MII->get(Instr.getOpcode());
if (!InstrDesc.hasDefOfPhysReg(Instr, R1, *BC.MRI) &&
!InstrDesc.hasDefOfPhysReg(Instr, R2, *BC.MRI)) {
// Ignore instructions that don't affect R1, R2 registers.
continue;
} else if (!MovInstr) {
// Expect to see MOV instruction.
if (!MIA->isMOVSX64rm32(Instr)) {
DEBUG(dbgs() << "BOLT-DEBUG: MOV instruction expected.\n");
break;
}
// Check if it's setting %r1 or %r2. In canonical form it sets %r2.
// If it sets %r1 - rename the registers so we have to only check
// a single form.
auto MovDestReg = Instr.getOperand(0).getReg();
if (MovDestReg != R2)
std::swap(R1, R2);
if (MovDestReg != R2) {
DEBUG(dbgs() << "BOLT-DEBUG: MOV instruction expected to set %r2\n");
break;
}
// Verify operands for MOV.
unsigned BaseRegNum;
int64_t ScaleValue;
unsigned IndexRegNum;
int64_t DispValue;
unsigned SegRegNum;
if (!MIA->evaluateX86MemoryOperand(Instr, &BaseRegNum,
&ScaleValue, &IndexRegNum,
&DispValue, &SegRegNum))
break;
if (BaseRegNum != R1 ||
ScaleValue != 4 ||
IndexRegNum == bolt::NoRegister ||
DispValue != 0 ||
SegRegNum != bolt::NoRegister)
break;
MovInstr = &Instr;
} else {
assert(MovInstr && "MOV instruction expected to be set");
if (!MIA->isLEA64r(Instr)) {
DEBUG(dbgs() << "BOLT-DEBUG: LEA instruction expected\n");
break;
}
if (Instr.getOperand(0).getReg() != R1) {
DEBUG(dbgs() << "BOLT-DEBUG: LEA instruction expected to set %r1\n");
break;
}
// Verify operands for LEA.
unsigned BaseRegNum;
int64_t ScaleValue;
unsigned IndexRegNum;
const MCExpr *DispExpr = nullptr;
unsigned SegRegNum;
if (!MIA->evaluateX86MemoryOperand(Instr, &BaseRegNum,
&ScaleValue, &IndexRegNum,
nullptr, &SegRegNum, &DispExpr))
break;
if (BaseRegNum != BC.MRI->getProgramCounter() ||
IndexRegNum != bolt::NoRegister ||
SegRegNum != bolt::NoRegister ||
DispExpr == nullptr)
break;
MemLocInstr = &Instr;
break;
}
}
if (!MemLocInstr)
return IndirectBranchType::UNKNOWN;
DEBUG(dbgs() << "BOLT-DEBUG: checking potential PIC jump table\n");
return IndirectBranchType::POSSIBLE_PIC_JUMP_TABLE;
};
// Try to find a (base) memory location from where the address for
// the indirect branch is loaded. For X86-64 the memory will be specified
// in the following format:
//
// {%rip}/{%basereg} + Imm + IndexReg * Scale
//
// We are interested in the cases where Scale == sizeof(uintptr_t) and
// the contents of the memory are presumably a function array.
//
// Normal jump table:
//
// jmp *(JUMP_TABLE, %index, Scale)
//
// or
//
// mov (JUMP_TABLE, %index, Scale), %r1
// ...
// jmp %r1
//
// We handle PIC-style jump tables separately.
//
if (Instruction.getNumOperands() == 1) {
// If the indirect jump is on register - try to detect if the
// register value is loaded from a memory location.
assert(Instruction.getOperand(0).isReg() && "register operand expected");
const auto R1 = Instruction.getOperand(0).getReg();
// Check if one of the previous instructions defines the jump-on register.
// We will check that this instruction belongs to the same basic block
// in postProcessIndirectBranches().
for (auto PrevII = Instructions.rbegin(); PrevII != Instructions.rend();
++PrevII) {
auto &PrevInstr = PrevII->second;
const auto &PrevInstrDesc = BC.MII->get(PrevInstr.getOpcode());
if (!PrevInstrDesc.hasDefOfPhysReg(PrevInstr, R1, *BC.MRI))
continue;
if (MIA->isMoveMem2Reg(PrevInstr)) {
MemLocInstr = &PrevInstr;
break;
} else if (MIA->isADD64rr(PrevInstr)) {
auto R2 = PrevInstr.getOperand(2).getReg();
if (R1 == R2)
return IndirectBranchType::UNKNOWN;
Type = analyzePICJumpTable(PrevII, Instructions.rend(), R1, R2);
break;
} else {
return IndirectBranchType::UNKNOWN;
}
}
if (!MemLocInstr) {
// No definition seen for the register in this function so far. Could be
// an input parameter - which means it is an external code reference.
// It also could be that the definition happens to be in the code that
// we haven't processed yet. Since we have to be conservative, return
// as UNKNOWN case.
return IndirectBranchType::UNKNOWN;
}
} else {
MemLocInstr = &Instruction;
}
const auto RIPRegister = BC.MRI->getProgramCounter();
auto PtrSize = BC.AsmInfo->getPointerSize();
// Analyze the memory location.
unsigned BaseRegNum;
int64_t ScaleValue;
unsigned IndexRegNum;
int64_t DispValue;
unsigned SegRegNum;
const MCExpr *DispExpr;
if (!MIA->evaluateX86MemoryOperand(*MemLocInstr, &BaseRegNum,
&ScaleValue, &IndexRegNum,
&DispValue, &SegRegNum,
&DispExpr))
return IndirectBranchType::UNKNOWN;
if ((BaseRegNum != bolt::NoRegister && BaseRegNum != RIPRegister) ||
SegRegNum != bolt::NoRegister)
return IndirectBranchType::UNKNOWN;
if (Type == IndirectBranchType::POSSIBLE_PIC_JUMP_TABLE &&
(ScaleValue != 1 || BaseRegNum != RIPRegister))
return IndirectBranchType::UNKNOWN;
if (Type != IndirectBranchType::POSSIBLE_PIC_JUMP_TABLE &&
ScaleValue != PtrSize)
return IndirectBranchType::UNKNOWN;
// RIP-relative addressing should be converted to symbol form by now
// in processed instructions (but not in jump).
if (DispExpr) {
auto SI = BC.GlobalSymbols.find(DispExpr->getSymbol().getName());
assert(SI != BC.GlobalSymbols.end() && "global symbol needs a value");
ArrayStart = SI->second;
} else {
ArrayStart = static_cast<uint64_t>(DispValue);
if (BaseRegNum == RIPRegister)
ArrayStart += getAddress() + Offset + Size;
}
DEBUG(dbgs() << "BOLT-DEBUG: addressed memory is 0x"
<< Twine::utohexstr(ArrayStart) << '\n');
// Check if there's already a jump table registered at this address.
if (auto *JT = getJumpTableContainingAddress(ArrayStart)) {
auto JTOffset = ArrayStart - JT->Address;
if (Type == IndirectBranchType::POSSIBLE_PIC_JUMP_TABLE && JTOffset != 0) {
// Adjust the size of this jump table and create a new one if necessary.
// We cannot re-use the entries since the offsets are relative to the
// table start.
DEBUG(dbgs() << "BOLT-DEBUG: adjusting size of jump table at 0x"
<< Twine::utohexstr(JT->Address) << '\n');
JT->OffsetEntries.resize(JTOffset / JT->EntrySize);
} else {
// Re-use an existing jump table. Perhaps parts of it.
if (Type != IndirectBranchType::POSSIBLE_PIC_JUMP_TABLE) {
assert(JT->Type == JumpTable::JTT_NORMAL &&
"normal jump table expected");
Type = IndirectBranchType::POSSIBLE_JUMP_TABLE;
} else {
assert(JT->Type == JumpTable::JTT_PIC && "PIC jump table expected");
}
// Get or create a new label for the table.
auto LI = JT->Labels.find(JTOffset);
if (LI == JT->Labels.end()) {
auto *JTStartLabel = BC.Ctx->createTempSymbol("JUMP_TABLE", true);
auto Result = JT->Labels.emplace(JTOffset, JTStartLabel);
assert(Result.second && "error adding jump table label");
LI = Result.first;
}
BC.MIA->replaceMemOperandDisp(*MemLocInstr, LI->second, BC.Ctx.get());
BC.MIA->setJumpTable(Instruction, ArrayStart);
JTSites.emplace_back(Offset, ArrayStart);
return Type;
}
}
auto SectionOrError = BC.getSectionForAddress(ArrayStart);
if (!SectionOrError) {
// No section - possibly an absolute address. Since we don't allow
// internal function addresses to escape the function scope - we
// consider it a tail call.
if (opts::Verbosity >= 1) {
errs() << "BOLT-WARNING: no section for address 0x"
<< Twine::utohexstr(ArrayStart) << " referenced from function "
<< *this << '\n';
}
return IndirectBranchType::POSSIBLE_TAIL_CALL;
}
auto &Section = *SectionOrError;
if (Section.isVirtual()) {
// The contents are filled at runtime.
return IndirectBranchType::POSSIBLE_TAIL_CALL;
}
// Extract the value at the start of the array.
StringRef SectionContents;
Section.getContents(SectionContents);
auto EntrySize =
Type == IndirectBranchType::POSSIBLE_PIC_JUMP_TABLE ? 4 : PtrSize;
DataExtractor DE(SectionContents, BC.AsmInfo->isLittleEndian(), EntrySize);
auto ValueOffset = static_cast<uint32_t>(ArrayStart - Section.getAddress());
uint64_t Value = 0;
std::vector<uint64_t> JTOffsetCandidates;
while (ValueOffset <= Section.getSize() - EntrySize) {
DEBUG(dbgs() << "BOLT-DEBUG: indirect jmp at 0x"
<< Twine::utohexstr(getAddress() + Offset)
<< " is referencing address 0x"
<< Twine::utohexstr(Section.getAddress() + ValueOffset));
// Extract the value and increment the offset.
if (Type == IndirectBranchType::POSSIBLE_PIC_JUMP_TABLE) {
Value = ArrayStart + DE.getSigned(&ValueOffset, 4);
} else {
Value = DE.getAddress(&ValueOffset);
}
DEBUG(dbgs() << ", which contains value "
<< Twine::utohexstr(Value) << '\n');
if (containsAddress(Value) && Value != getAddress()) {
// Is it possible to have a jump table with function start as an entry?
JTOffsetCandidates.push_back(Value - getAddress());
if (Type == IndirectBranchType::UNKNOWN)
Type = IndirectBranchType::POSSIBLE_JUMP_TABLE;
continue;
}
// Potentially a switch table can contain __builtin_unreachable() entry
// pointing just right after the function. In this case we have to check
// another entry. Otherwise the entry is outside of this function scope
// and it's not a switch table.
if (Value == getAddress() + getSize()) {
JTOffsetCandidates.push_back(Value - getAddress());
} else {
break;
}
}
if (Type == IndirectBranchType::POSSIBLE_JUMP_TABLE ||
Type == IndirectBranchType::POSSIBLE_PIC_JUMP_TABLE) {
assert(JTOffsetCandidates.size() > 2 &&
"expected more than 2 jump table entries");
auto *JTStartLabel = BC.Ctx->createTempSymbol("JUMP_TABLE", true);
DEBUG(dbgs() << "BOLT-DEBUG: creating jump table "
<< JTStartLabel->getName()
<< " in function " << *this << " with "
<< JTOffsetCandidates.size() << " entries.\n");
auto JumpTableType =
Type == IndirectBranchType::POSSIBLE_JUMP_TABLE
? JumpTable::JTT_NORMAL
: JumpTable::JTT_PIC;
JumpTables.emplace(ArrayStart, JumpTable{ArrayStart,
EntrySize,
JumpTableType,
std::move(JTOffsetCandidates),
{{0, JTStartLabel}}});
BC.MIA->replaceMemOperandDisp(*MemLocInstr, JTStartLabel, BC.Ctx.get());
BC.MIA->setJumpTable(Instruction, ArrayStart);
JTSites.emplace_back(Offset, ArrayStart);
return Type;
}
BC.InterproceduralReferences.insert(Value);
return IndirectBranchType::POSSIBLE_TAIL_CALL;
}
MCSymbol *BinaryFunction::getOrCreateLocalLabel(uint64_t Address,
bool CreatePastEnd) {
MCSymbol *Result;
// Check if there's already a registered label.
auto Offset = Address - getAddress();
if ((Offset == getSize()) && CreatePastEnd)
return getFunctionEndLabel();
// Check if there's a global symbol registered at given address.
// If so - reuse it since we want to keep the symbol value updated.
if (Offset != 0) {
if (auto *Symbol = BC.getGlobalSymbolAtAddress(Address)) {
Labels[Offset] = Symbol;
return Symbol;
}
}
auto LI = Labels.find(Offset);
if (LI == Labels.end()) {
Result = BC.Ctx->createTempSymbol();
Labels[Offset] = Result;
} else {
Result = LI->second;
}
return Result;
}
bool BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
assert(FunctionData.size() == getSize() &&
"function size does not match raw data size");
auto &Ctx = BC.Ctx;
auto &MIA = BC.MIA;
DWARFUnitLineTable ULT = getDWARFUnitLineTable();
// Insert a label at the beginning of the function. This will be our first
// basic block.
Labels[0] = Ctx->createTempSymbol("BB0", false);
addEntryPointAtOffset(0);
auto handleRIPOperand =
[&](MCInst &Instruction, uint64_t Address, uint64_t Size) {
uint64_t TargetAddress{0};
MCSymbol *TargetSymbol{nullptr};
if (!MIA->evaluateMemOperandTarget(Instruction, TargetAddress, Address,
Size)) {
DEBUG(dbgs() << "BOLT: rip-relative operand can't be evaluated:\n";
BC.InstPrinter->printInst(&Instruction, dbgs(), "", *BC.STI);
dbgs() << '\n';
Instruction.dump_pretty(dbgs(), BC.InstPrinter.get());
dbgs() << '\n';);
return false;
}
if (TargetAddress == 0) {
if (opts::Verbosity >= 1) {
errs() << "BOLT-WARNING: rip-relative operand is zero in function "
<< *this << ". Ignoring function.\n";
}
return false;
}
// Note that the address does not necessarily have to reside inside
// a section, it could be an absolute address too.
auto Section = BC.getSectionForAddress(TargetAddress);
if (Section && Section->isText()) {
if (containsAddress(TargetAddress)) {
if (TargetAddress != getAddress()) {
// The address could potentially escape. Mark it as another entry
// point into the function.
DEBUG(dbgs() << "BOLT-DEBUG: potentially escaped address 0x"
<< Twine::utohexstr(TargetAddress) << " in function "
<< *this << '\n');
TargetSymbol = addEntryPointAtOffset(TargetAddress - getAddress());
}
} else {
BC.InterproceduralReferences.insert(TargetAddress);
}
}
if (!TargetSymbol)
TargetSymbol = BC.getOrCreateGlobalSymbol(TargetAddress, "DATAat");
MIA->replaceMemOperandDisp(
Instruction, MCOperand::createExpr(MCSymbolRefExpr::create(
TargetSymbol, MCSymbolRefExpr::VK_None, *BC.Ctx)));
return true;
};
for (uint64_t Offset = 0; Offset < getSize(); ) {
MCInst Instruction;
uint64_t Size;
uint64_t AbsoluteInstrAddr = getAddress() + Offset;
if (!BC.DisAsm->getInstruction(Instruction,
Size,
FunctionData.slice(Offset),
AbsoluteInstrAddr,
nulls(),
nulls())) {
// Ignore this function. Skip to the next one.
if (opts::Verbosity >= 1) {
errs() << "BOLT-WARNING: unable to disassemble instruction at offset 0x"
<< Twine::utohexstr(Offset) << " (address 0x"
<< Twine::utohexstr(AbsoluteInstrAddr) << ") in function "
<< *this << '\n';
}
IsSimple = false;
break;
}
if (MIA->hasEVEXEncoding(Instruction)) {
if (opts::Verbosity >= 1) {
errs() << "BOLT-WARNING: function " << *this << " uses instruction"
" encoded with EVEX (AVX-512) at offset 0x"
<< Twine::utohexstr(Offset) << ". Disassembly could be wrong."
" Skipping further processing.\n";
}
IsSimple = false;
break;
}
// Convert instruction to a shorter version that could be relaxed if needed.
MIA->shortenInstruction(Instruction);
if (MIA->isBranch(Instruction) || MIA->isCall(Instruction)) {
uint64_t TargetAddress = 0;
if (MIA->evaluateBranch(Instruction,
AbsoluteInstrAddr,
Size,
TargetAddress)) {
// Check if the target is within the same function. Otherwise it's
// a call, possibly a tail call.
//
// If the target *is* the function address it could be either a branch
// or a recursive call.
bool IsCall = MIA->isCall(Instruction);
bool IsCondBranch = MIA->isConditionalBranch(Instruction);
MCSymbol *TargetSymbol{nullptr};
if (IsCall && containsAddress(TargetAddress)) {
if (TargetAddress == getAddress()) {
// Recursive call.
TargetSymbol = getSymbol();
} else {
// Possibly an old-style PIC code
if (opts::Verbosity >= 1) {
errs() << "BOLT-WARNING: internal call detected at 0x"
<< Twine::utohexstr(AbsoluteInstrAddr)
<< " in function " << *this << ". Skipping.\n";
}
IsSimple = false;
}
}
if (!TargetSymbol) {
// Create either local label or external symbol.
if (containsAddress(TargetAddress)) {
TargetSymbol = getOrCreateLocalLabel(TargetAddress);
} else {
BC.InterproceduralReferences.insert(TargetAddress);
if (opts::Verbosity >= 2 && !IsCall && Size == 2) {
errs() << "BOLT-WARNING: relaxed tail call detected at 0x"
<< Twine::utohexstr(AbsoluteInstrAddr)
<< " in function " << *this
<< ". Code size will be increased.\n";
}
assert(!MIA->isTailCall(Instruction) &&
"synthetic tail call instruction found");
// This is a call regardless of the opcode.
// Assign proper opcode for tail calls, so that they could be
// treated as calls.
if (!IsCall) {
if (!MIA->convertJmpToTailCall(Instruction) &&
opts::Verbosity >= 2) {
assert(IsCondBranch && "unknown tail call instruction");
errs() << "BOLT-WARNING: conditional tail call detected in "
<< "function " << *this << " at 0x"
<< Twine::utohexstr(AbsoluteInstrAddr) << ".\n";
}
// TODO: A better way to do this would be using annotations for
// MCInst objects.
TailCallOffsets.emplace(std::make_pair(Offset,
TargetAddress));
IsCall = true;
}
TargetSymbol = BC.getOrCreateGlobalSymbol(TargetAddress,
"FUNCat");
if (TargetAddress == 0) {
// We actually see calls to address 0 because of the weak symbols
// from the libraries. In reality more often than not it is
// unreachable code, but we don't know it and have to emit calls
// to 0 which make LLVM JIT unhappy.
if (opts::Verbosity >= 1) {
errs() << "BOLT-WARNING: Function " << *this
<< " has a call to address zero. Ignoring function.\n";
}
IsSimple = false;
}
}
}
Instruction.clear();
Instruction.addOperand(
MCOperand::createExpr(
MCSymbolRefExpr::create(TargetSymbol,
MCSymbolRefExpr::VK_None,
*Ctx)));
if (!IsCall) {
// Add taken branch info.
TakenBranches.emplace_back(Offset, TargetAddress - getAddress());
}
if (IsCondBranch) {
// Add fallthrough branch info.
FTBranches.emplace_back(Offset, Offset + Size);
}
} else {
// Could not evaluate branch. Should be an indirect call or an
// indirect branch. Bail out on the latter case.
if (MIA->isIndirectBranch(Instruction)) {
auto Result = analyzeIndirectBranch(Instruction, Size, Offset);
switch (Result) {
default:
llvm_unreachable("unexpected result");
case IndirectBranchType::POSSIBLE_TAIL_CALL:
MIA->convertJmpToTailCall(Instruction);
break;
case IndirectBranchType::POSSIBLE_JUMP_TABLE:
case IndirectBranchType::POSSIBLE_PIC_JUMP_TABLE:
if (opts::JumpTables == JTS_NONE)
IsSimple = false;
break;
case IndirectBranchType::UNKNOWN:
// Keep processing. We'll do more checks and fixes in
// postProcessIndirectBranches().
break;
};
}
// Indirect call. We only need to fix it if the operand is RIP-relative
if (IsSimple && MIA->hasRIPOperand(Instruction)) {
if (!handleRIPOperand(Instruction, AbsoluteInstrAddr, Size)) {
if (opts::Verbosity >= 1) {
errs() << "BOLT-WARNING: cannot handle RIP operand at 0x"
<< Twine::utohexstr(AbsoluteInstrAddr)
<< ". Skipping function " << *this << ".\n";
}
IsSimple = false;
}
}
}
} else {
if (MIA->hasRIPOperand(Instruction)) {
if (!handleRIPOperand(Instruction, AbsoluteInstrAddr, Size)) {
if (opts::Verbosity >= 1) {
errs() << "BOLT-WARNING: cannot handle RIP operand at 0x"
<< Twine::utohexstr(AbsoluteInstrAddr)
<< ". Skipping function " << *this << ".\n";
}
IsSimple = false;
}
}
}
if (ULT.first && ULT.second) {
Instruction.setLoc(
findDebugLineInformationForInstructionAt(AbsoluteInstrAddr, ULT));
}
addInstruction(Offset, std::move(Instruction));
Offset += Size;
}
postProcessJumpTables();
// TODO: clear memory if not simple function?
// Update state.
updateState(State::Disassembled);
return true;
}
void BinaryFunction::postProcessJumpTables() {
// Create labels for all entries.
for (auto &JTI : JumpTables) {
auto &JT = JTI.second;
for (auto Offset : JT.OffsetEntries) {
auto *Label = getOrCreateLocalLabel(getAddress() + Offset,
/*CreatePastEnd*/ true);
JT.Entries.push_back(Label);
}
}
// Add TakenBranches from JumpTables.
//
// We want to do it after initial processing since we don't know jump tables'
// boundaries until we process them all.
for (auto &JTSite : JTSites) {
auto JTSiteOffset = JTSite.first;
auto JTAddress = JTSite.second;
auto *JT = getJumpTableContainingAddress(JTAddress);
assert(JT && "cannot find jump table for address");
auto EntryOffset = JTAddress - JT->Address;
while (EntryOffset < JT->getSize()) {
auto TargetOffset = JT->OffsetEntries[EntryOffset / JT->EntrySize];
if (TargetOffset < getSize())
TakenBranches.emplace_back(JTSiteOffset, TargetOffset);
EntryOffset += JT->EntrySize;
// A label at the next entry means the end of this jump table.
if (JT->Labels.count(EntryOffset))
break;
}
}
// Free memory used by jump table offsets.
for (auto &JTI : JumpTables) {
auto &JT = JTI.second;
clearList(JT.OffsetEntries);
}
// Remove duplicates branches. We can get a bunch of them from jump tables.
// Without doing jump table value profiling we don't have use for extra
// (duplicate) branches.
std::sort(TakenBranches.begin(), TakenBranches.end());
auto NewEnd = std::unique(TakenBranches.begin(), TakenBranches.end());
TakenBranches.erase(NewEnd, TakenBranches.end());
}
bool BinaryFunction::postProcessIndirectBranches() {
for (auto *BB : layout()) {
for (auto &Instr : *BB) {
if (!BC.MIA->isIndirectBranch(Instr))
continue;
// If there's an indirect branch in a single-block function -
// it must be a tail call.
if (layout_size() == 1) {
BC.MIA->convertJmpToTailCall(Instr);
return true;
}
// Validate the tail call or jump table assumptions.
if (BC.MIA->isTailCall(Instr) || BC.MIA->getJumpTable(Instr)) {
if (BC.MIA->getMemoryOperandNo(Instr) != -1) {
// We have validated memory contents addressed by the jump
// instruction already.
continue;
}
// This is jump on register. Just make sure the register is defined
// in the containing basic block. Other assumptions were checked
// earlier.
assert(Instr.getOperand(0).isReg() && "register operand expected");
const auto R1 = Instr.getOperand(0).getReg();
auto PrevInstr = BB->rbegin();
while (PrevInstr != BB->rend()) {
const auto &PrevInstrDesc = BC.MII->get(PrevInstr->getOpcode());
if (PrevInstrDesc.hasDefOfPhysReg(*PrevInstr, R1, *BC.MRI)) {
break;
}
++PrevInstr;
}
if (PrevInstr == BB->rend()) {
if (opts::Verbosity >= 2) {
outs() << "BOLT-INFO: rejected potential "
<< (BC.MIA->isTailCall(Instr) ? "indirect tail call"
: "jump table")
<< " in function " << *this
<< " because the jump-on register was not defined in "
<< " basic block " << BB->getName() << ".\n";
DEBUG(dbgs() << BC.printInstructions(dbgs(), BB->begin(), BB->end(),
BB->getOffset(), this, true));
}
return false;
}
// In case of PIC jump table we need to do more checks.
if (BC.MIA->isMoveMem2Reg(*PrevInstr))
continue;
assert(BC.MIA->isADD64rr(*PrevInstr) && "add instruction expected");
auto R2 = PrevInstr->getOperand(2).getReg();
// Make sure both regs are set in the same basic block prior to ADD.
bool IsR1Set = false;
bool IsR2Set = false;
while ((++PrevInstr != BB->rend()) && !(IsR1Set && IsR2Set)) {
const auto &PrevInstrDesc = BC.MII->get(PrevInstr->getOpcode());
if (PrevInstrDesc.hasDefOfPhysReg(*PrevInstr, R1, *BC.MRI))
IsR1Set = true;
else if (PrevInstrDesc.hasDefOfPhysReg(*PrevInstr, R2, *BC.MRI))
IsR2Set = true;
}
if (!IsR1Set || !IsR2Set)
return false;
continue;
}
// If this block contains an epilogue code and has an indirect branch,
// then most likely it's a tail call. Otherwise, we cannot tell for sure
// what it is and conservatively reject the function's CFG.
bool IsEpilogue = false;
for (const auto &Instr : *BB) {
if (BC.MIA->isLeave(Instr) || BC.MIA->isPop(Instr)) {
IsEpilogue = true;
break;
}
}
if (!IsEpilogue) {
if (opts::Verbosity >= 2) {
outs() << "BOLT-INFO: rejected potential indirect tail call in "
<< "function " << *this << " in basic block "
<< BB->getName() << ".\n";
DEBUG(BC.printInstructions(dbgs(), BB->begin(), BB->end(),
BB->getOffset(), this, true));
}
return false;
}
BC.MIA->convertJmpToTailCall(Instr);
}
}
return true;
}
void BinaryFunction::clearLandingPads(const unsigned StartIndex,
const unsigned NumBlocks) {
// remove all landing pads/throws for the given collection of blocks
for (auto I = StartIndex; I < StartIndex + NumBlocks; ++I) {
BasicBlocks[I]->clearLandingPads();
}
}
void BinaryFunction::addLandingPads(const unsigned StartIndex,
const unsigned NumBlocks) {
for (auto *BB : BasicBlocks) {
if (LandingPads.find(BB->getLabel()) != LandingPads.end()) {
const MCSymbol *LP = BB->getLabel();
for (unsigned I : LPToBBIndex[LP]) {
assert(I < BasicBlocks.size());
BinaryBasicBlock *ThrowBB = BasicBlocks[I];
const unsigned ThrowBBIndex = getIndex(ThrowBB);
if (ThrowBBIndex >= StartIndex && ThrowBBIndex < StartIndex + NumBlocks)
ThrowBB->addLandingPad(BB);
}
}
}
}
void BinaryFunction::recomputeLandingPads(const unsigned StartIndex,
const unsigned NumBlocks) {
assert(LPToBBIndex.empty());
clearLandingPads(StartIndex, NumBlocks);
for (auto I = StartIndex; I < StartIndex + NumBlocks; ++I) {
auto *BB = BasicBlocks[I];
for (auto &Instr : BB->instructions()) {
// Store info about associated landing pad.
if (BC.MIA->isInvoke(Instr)) {
const MCSymbol *LP;
uint64_t Action;
std::tie(LP, Action) = BC.MIA->getEHInfo(Instr);
if (LP) {
LPToBBIndex[LP].push_back(getIndex(BB));
}
}
}
}
addLandingPads(StartIndex, NumBlocks);
clearList(LPToBBIndex);
}
bool BinaryFunction::buildCFG() {
auto &MIA = BC.MIA;
auto BranchDataOrErr = BC.DR.getFuncBranchData(getNames());
if (!BranchDataOrErr) {
DEBUG(dbgs() << "no branch data found for \"" << *this << "\"\n");
} else {
ExecutionCount = BranchDataOrErr->ExecutionCount;
}
if (!isSimple())
return false;
if (!(CurrentState == State::Disassembled))
return false;
assert(BasicBlocks.empty() && "basic block list should be empty");
assert((Labels.find(0) != Labels.end()) &&
"first instruction should always have a label");
// Create basic blocks in the original layout order:
//
// * Every instruction with associated label marks
// the beginning of a basic block.
// * Conditional instruction marks the end of a basic block,
// except when the following instruction is an
// unconditional branch, and the unconditional branch is not
// a destination of another branch. In the latter case, the
// basic block will consist of a single unconditional branch
// (missed optimization opportunity?).
//
// Created basic blocks are sorted in layout order since they are
// created in the same order as instructions, and instructions are
// sorted by offsets.
BinaryBasicBlock *InsertBB{nullptr};
BinaryBasicBlock *PrevBB{nullptr};
bool IsLastInstrNop{false};
bool IsPreviousInstrTailCall{false};
const MCInst *PrevInstr{nullptr};
auto addCFIPlaceholders =
[this](uint64_t CFIOffset, BinaryBasicBlock *InsertBB) {
for (auto FI = OffsetToCFI.lower_bound(CFIOffset),
FE = OffsetToCFI.upper_bound(CFIOffset);
FI != FE; ++FI) {
addCFIPseudo(InsertBB, InsertBB->end(), FI->second);
}
};
for (auto I = Instructions.begin(), E = Instructions.end(); I != E; ++I) {
const uint32_t Offset = I->first;
const auto &Instr = I->second;
auto LI = Labels.find(Offset);
if (LI != Labels.end()) {
// Always create new BB at branch destination.
PrevBB = InsertBB;
InsertBB = addBasicBlock(LI->first, LI->second,
/* DeriveAlignment = */ IsLastInstrNop);
if (hasEntryPointAtOffset(Offset))
InsertBB->setEntryPoint();
}
// Ignore nops. We use nops to derive alignment of the next basic block.
// It will not always work, as some blocks are naturally aligned, but
// it's just part of heuristic for block alignment.
if (MIA->isNoop(Instr)) {
IsLastInstrNop = true;
continue;
}
if (!InsertBB) {
// It must be a fallthrough or unreachable code. Create a new block unless
// we see an unconditional branch following a conditional one.
assert(PrevBB && "no previous basic block for a fall through");
assert(PrevInstr && "no previous instruction for a fall through");
if (MIA->isUnconditionalBranch(Instr) &&
!MIA->isUnconditionalBranch(*PrevInstr) && !IsPreviousInstrTailCall) {
// Temporarily restore inserter basic block.
InsertBB = PrevBB;
} else {
InsertBB = addBasicBlock(Offset,
BC.Ctx->createTempSymbol("FT", true),
/* DeriveAlignment = */ IsLastInstrNop);
}
}
if (Offset == 0) {
// Add associated CFI pseudos in the first offset (0)
addCFIPlaceholders(0, InsertBB);
}
IsLastInstrNop = false;
uint32_t InsertIndex = InsertBB->addInstruction(Instr);
PrevInstr = &Instr;
// Record whether this basic block is terminated with a tail call.
auto TCI = TailCallOffsets.find(Offset);
if (TCI != TailCallOffsets.end()) {
uint64_t TargetAddr = TCI->second;
TailCallTerminatedBlocks.emplace(
std::make_pair(InsertBB,
TailCallInfo(Offset, InsertIndex, TargetAddr)));
IsPreviousInstrTailCall = true;
} else {
IsPreviousInstrTailCall = false;
}
// Add associated CFI instrs. We always add the CFI instruction that is
// located immediately after this instruction, since the next CFI
// instruction reflects the change in state caused by this instruction.
auto NextInstr = std::next(I);
uint64_t CFIOffset;
if (NextInstr != E)
CFIOffset = NextInstr->first;
else
CFIOffset = getSize();
addCFIPlaceholders(CFIOffset, InsertBB);
// Store info about associated landing pad.
if (MIA->isInvoke(Instr)) {
const MCSymbol *LP;
uint64_t Action;
std::tie(LP, Action) = MIA->getEHInfo(Instr);
if (LP) {
LPToBBIndex[LP].push_back(getIndex(InsertBB));
}
}
// How well do we detect tail calls here?
if (MIA->isTerminator(Instr)) {
PrevBB = InsertBB;
InsertBB = nullptr;
}
}
// Intermediate dump.
DEBUG(print(dbgs(), "after creating basic blocks"));
// TODO: handle properly calls to no-return functions,
// e.g. exit(3), etc. Otherwise we'll see a false fall-through
// blocks.
// Make sure we can use profile data for this function.
if (BranchDataOrErr)
evaluateProfileData(BranchDataOrErr.get());
for (auto &Branch : TakenBranches) {
DEBUG(dbgs() << "registering branch [0x" << Twine::utohexstr(Branch.first)
<< "] -> [0x" << Twine::utohexstr(Branch.second) << "]\n");
auto *FromBB = getBasicBlockContainingOffset(Branch.first);
assert(FromBB && "cannot find BB containing FROM branch");
auto *ToBB = getBasicBlockAtOffset(Branch.second);
assert(ToBB && "cannot find BB containing TO branch");
if (BranchDataOrErr.getError()) {
FromBB->addSuccessor(ToBB);
} else {
const FuncBranchData &BranchData = BranchDataOrErr.get();
auto BranchInfoOrErr = BranchData.getBranch(Branch.first, Branch.second);
if (BranchInfoOrErr.getError()) {
FromBB->addSuccessor(ToBB);
} else {
const BranchInfo &BInfo = BranchInfoOrErr.get();
FromBB->addSuccessor(ToBB, BInfo.Branches, BInfo.Mispreds);
// Populate profile counts for the jump table.
auto *LastInstr = FromBB->getLastNonPseudo();
if (!LastInstr)
continue;
auto JTAddress = BC.MIA->getJumpTable(*LastInstr);
if (!JTAddress)
continue;
auto *JT = getJumpTableContainingAddress(JTAddress);
if (!JT)
continue;
JT->Count += BInfo.Branches;
if (opts::JumpTables < JTS_AGGRESSIVE)
continue;
if (JT->Counts.empty())
JT->Counts.resize(JT->Entries.size());
auto EI = JT->Entries.begin();
auto Delta = (JTAddress - JT->Address) / JT->EntrySize;
EI += Delta;
while (EI != JT->Entries.end()) {
if (ToBB->getLabel() == *EI) {
JT->Counts[Delta] += BInfo.Branches;
}
++Delta;
++EI;
// A label marks the start of another jump table.
if (JT->Labels.count(Delta * JT->EntrySize))
break;
}
}
}
}
for (auto &Branch : FTBranches) {
DEBUG(dbgs() << "registering fallthrough [0x"
<< Twine::utohexstr(Branch.first) << "] -> [0x"
<< Twine::utohexstr(Branch.second) << "]\n");
auto *FromBB = getBasicBlockContainingOffset(Branch.first);
assert(FromBB && "cannot find BB containing FROM branch");
// Try to find the destination basic block. If the jump instruction was
// followed by a no-op then the destination offset recorded in FTBranches
// will point to that no-op but the destination basic block will start
// after the no-op due to ingoring no-ops when creating basic blocks.
// So we have to skip any no-ops when trying to find the destination
// basic block.
auto *ToBB = getBasicBlockAtOffset(Branch.second);
if (ToBB == nullptr) {
auto I = Instructions.find(Branch.second), E = Instructions.end();
while (ToBB == nullptr && I != E && MIA->isNoop(I->second)) {
++I;
if (I == E)
break;
ToBB = getBasicBlockAtOffset(I->first);
}
if (ToBB == nullptr) {
// We have a fall-through that does not point to another BB, ignore it
// as it may happen in cases where we have a BB finished by two
// branches.
continue;
}
}
// Does not add a successor if we can't find profile data, leave it to the
// inference pass to guess its frequency
if (BranchDataOrErr) {
const FuncBranchData &BranchData = BranchDataOrErr.get();
auto BranchInfoOrErr = BranchData.getBranch(Branch.first, Branch.second);
if (BranchInfoOrErr) {
const BranchInfo &BInfo = BranchInfoOrErr.get();
FromBB->addSuccessor(ToBB, BInfo.Branches, BInfo.Mispreds);
}
}
}
for (auto &I : TailCallTerminatedBlocks) {
TailCallInfo &TCInfo = I.second;
if (BranchDataOrErr) {
const FuncBranchData &BranchData = BranchDataOrErr.get();
auto BranchInfoOrErr = BranchData.getDirectCallBranch(TCInfo.Offset);
if (BranchInfoOrErr) {
const BranchInfo &BInfo = BranchInfoOrErr.get();
TCInfo.Count = BInfo.Branches;
TCInfo.Mispreds = BInfo.Mispreds;
}
}
}
// Add fall-through branches (except for non-taken conditional branches with
// profile data, which were already accounted for in TakenBranches).
PrevBB = nullptr;
bool IsPrevFT = false; // Is previous block a fall-through.
for (auto BB : BasicBlocks) {
if (IsPrevFT) {
PrevBB->addSuccessor(BB, BinaryBasicBlock::COUNT_FALLTHROUGH_EDGE,
BinaryBasicBlock::COUNT_FALLTHROUGH_EDGE);
}
if (BB->empty()) {
IsPrevFT = true;
PrevBB = BB;
continue;
}
auto LastInstIter = --BB->end();
while (MIA->isCFI(*LastInstIter) && LastInstIter != BB->begin())
--LastInstIter;
// Check if the last instruction is a conditional jump that serves as a tail
// call.
bool IsCondTailCall = MIA->isConditionalBranch(*LastInstIter) &&
TailCallTerminatedBlocks.count(BB);
if (BB->succ_size() == 0) {
if (IsCondTailCall) {
// Conditional tail call without profile data for non-taken branch.
IsPrevFT = true;
} else {
// Unless the last instruction is a terminator, control will fall
// through to the next basic block.
IsPrevFT = MIA->isTerminator(*LastInstIter) ? false : true;
}
} else if (BB->succ_size() == 1) {
if (IsCondTailCall) {
// Conditional tail call with data for non-taken branch. A fall-through
// edge has already ben added in the CFG.
IsPrevFT = false;
} else {
// Fall-through should be added if the last instruction is a conditional
// jump, since there was no profile data for the non-taken branch.
IsPrevFT = MIA->isConditionalBranch(*LastInstIter) ? true : false;
}
} else {
// Ends with 2 branches, with an indirect jump or it is a conditional
// branch whose frequency has been inferred from LBR.
IsPrevFT = false;
}
PrevBB = BB;
}
if (!IsPrevFT) {
// Possibly a call that does not return.
DEBUG(dbgs() << "last block was marked as a fall-through\n");
}
// Add associated landing pad blocks to each basic block.
addLandingPads(0, BasicBlocks.size());
// Infer frequency for non-taken branches
if (hasValidProfile())
inferFallThroughCounts();
// Update CFI information for each BB
BBCFIState = annotateCFIState();
// Convert conditional tail call branches to conditional branches that jump
// to a tail call.
removeConditionalTailCalls();
// Set the basic block layout to the original order.
for (auto BB : BasicBlocks) {
BasicBlocksLayout.emplace_back(BB);
}
// Make any necessary adjustments for indirect branches.
if (!postProcessIndirectBranches())
setSimple(false);
// Fix the possibly corrupted CFI state. CFI state may have been corrupted
// because of the CFG modifications while removing conditional tail calls.
fixCFIState();
// Clean-up memory taken by instructions and labels.
clearList(Instructions);
clearList(TailCallOffsets);
clearList(TailCallTerminatedBlocks);
clearList(OffsetToCFI);
clearList(Labels);
clearList(TakenBranches);
clearList(FTBranches);
clearList(LPToBBIndex);
clearList(EntryOffsets);
// Update the state.
CurrentState = State::CFG;
// Annotate invoke instructions with GNU_args_size data.
propagateGnuArgsSizeInfo();
return true;
}
void BinaryFunction::evaluateProfileData(const FuncBranchData &BranchData) {
BranchListType ProfileBranches(BranchData.Data.size());
std::transform(BranchData.Data.begin(),
BranchData.Data.end(),
ProfileBranches.begin(),
[](const BranchInfo &BI) {
return std::make_pair(BI.From.Offset,
BI.To.Name == BI.From.Name ?
BI.To.Offset : -1U);
});
BranchListType LocalProfileBranches;
std::copy_if(ProfileBranches.begin(),
ProfileBranches.end(),
std::back_inserter(LocalProfileBranches),
[](const std::pair<uint32_t, uint32_t> &Branch) {
return Branch.second != -1U;
});
// Until we define a minimal profile, we consider no branch data to be a valid
// profile. It could happen to a function without branches.
if (LocalProfileBranches.empty()) {
ProfileMatchRatio = 1.0f;
return;
}
std::sort(LocalProfileBranches.begin(), LocalProfileBranches.end());
BranchListType FunctionBranches = TakenBranches;
FunctionBranches.insert(FunctionBranches.end(),
FTBranches.begin(),
FTBranches.end());
std::sort(FunctionBranches.begin(), FunctionBranches.end());
BranchListType DiffBranches; // Branches in profile without a match.
std::set_difference(LocalProfileBranches.begin(),
LocalProfileBranches.end(),
FunctionBranches.begin(),
FunctionBranches.end(),
std::back_inserter(DiffBranches));
// Branches without a match in CFG.
BranchListType OrphanBranches;
// Eliminate recursive calls and returns from recursive calls from the list
// of branches that have no match. They are not considered local branches.
auto isRecursiveBranch = [&](std::pair<uint32_t, uint32_t> &Branch) {
auto SrcInstrI = Instructions.find(Branch.first);
if (SrcInstrI == Instructions.end())
return false;
// Check if it is a recursive call.
if (BC.MIA->isCall(SrcInstrI->second) && Branch.second == 0)
return true;
auto DstInstrI = Instructions.find(Branch.second);
if (DstInstrI == Instructions.end())
return false;
// Check if it is a return from a recursive call.
bool IsSrcReturn = BC.MIA->isReturn(SrcInstrI->second);
// "rep ret" is considered to be 2 different instructions.
if (!IsSrcReturn && BC.MIA->isPrefix(SrcInstrI->second)) {
auto SrcInstrSuccessorI = SrcInstrI;
++SrcInstrSuccessorI;
assert(SrcInstrSuccessorI != Instructions.end() &&
"unexpected prefix instruction at the end of function");
IsSrcReturn = BC.MIA->isReturn(SrcInstrSuccessorI->second);
}
if (IsSrcReturn && Branch.second != 0) {
// Make sure the destination follows the call instruction.
auto DstInstrPredecessorI = DstInstrI;
--DstInstrPredecessorI;
assert(DstInstrPredecessorI != Instructions.end() && "invalid iterator");
if (BC.MIA->isCall(DstInstrPredecessorI->second))
return true;
}
return false;
};
std::remove_copy_if(DiffBranches.begin(),
DiffBranches.end(),
std::back_inserter(OrphanBranches),
isRecursiveBranch);
ProfileMatchRatio =
(float) (LocalProfileBranches.size() - OrphanBranches.size()) /
(float) LocalProfileBranches.size();
if (opts::Verbosity >= 1 && !OrphanBranches.empty()) {
errs() << "BOLT-WARNING: profile branches match only "
<< format("%.1f%%", ProfileMatchRatio * 100.0f) << " ("
<< (LocalProfileBranches.size() - OrphanBranches.size()) << '/'
<< LocalProfileBranches.size() << ") for function "
<< *this << '\n';
DEBUG(
for (auto &OBranch : OrphanBranches)
errs() << "\t0x" << Twine::utohexstr(OBranch.first) << " -> 0x"
<< Twine::utohexstr(OBranch.second) << " (0x"
<< Twine::utohexstr(OBranch.first + getAddress()) << " -> 0x"
<< Twine::utohexstr(OBranch.second + getAddress()) << ")\n";
);
}
}
void BinaryFunction::inferFallThroughCounts() {
assert(!BasicBlocks.empty() && "basic block list should not be empty");
auto BranchDataOrErr = BC.DR.getFuncBranchData(getNames());
// Compute preliminary execution time for each basic block
for (auto CurBB : BasicBlocks) {
if (CurBB == *BasicBlocks.begin()) {
CurBB->setExecutionCount(ExecutionCount);
continue;
}
CurBB->ExecutionCount = 0;
}
for (auto CurBB : BasicBlocks) {
auto SuccCount = CurBB->branch_info_begin();
for (auto Succ : CurBB->successors()) {
// Do not update execution count of the entry block (when we have tail
// calls). We already accounted for those when computing the func count.
if (Succ == *BasicBlocks.begin()) {
++SuccCount;
continue;
}
if (SuccCount->Count != BinaryBasicBlock::COUNT_FALLTHROUGH_EDGE)
Succ->setExecutionCount(Succ->getExecutionCount() + SuccCount->Count);
++SuccCount;
}
}
// Update execution counts of landing pad blocks.
if (!BranchDataOrErr.getError()) {
const FuncBranchData &BranchData = BranchDataOrErr.get();
for (const auto &I : BranchData.EntryData) {
BinaryBasicBlock *BB = getBasicBlockAtOffset(I.To.Offset);
if (BB && LandingPads.find(BB->getLabel()) != LandingPads.end()) {
BB->setExecutionCount(BB->getExecutionCount() + I.Branches);
}
}
}
// Work on a basic block at a time, propagating frequency information forwards
// It is important to walk in the layout order
for (auto CurBB : BasicBlocks) {
uint64_t BBExecCount = CurBB->getExecutionCount();
// Propagate this information to successors, filling in fall-through edges
// with frequency information
if (CurBB->succ_size() == 0)
continue;
// Calculate frequency of outgoing branches from this node according to
// LBR data
uint64_t ReportedBranches = 0;
for (const auto &SuccCount : CurBB->branch_info()) {
if (SuccCount.Count != BinaryBasicBlock::COUNT_FALLTHROUGH_EDGE)
ReportedBranches += SuccCount.Count;
}
// Calculate frequency of outgoing tail calls from this node according to
// LBR data
uint64_t ReportedTailCalls = 0;
auto TCI = TailCallTerminatedBlocks.find(CurBB);
if (TCI != TailCallTerminatedBlocks.end()) {
ReportedTailCalls = TCI->second.Count;
}
// Calculate frequency of throws from this node according to LBR data
// for branching into associated landing pads. Since it is possible
// for a landing pad to be associated with more than one basic blocks,
// we may overestimate the frequency of throws for such blocks.
uint64_t ReportedThrows = 0;
for (BinaryBasicBlock *LP: CurBB->landing_pads()) {
ReportedThrows += LP->getExecutionCount();
}
uint64_t TotalReportedJumps =
ReportedBranches + ReportedTailCalls + ReportedThrows;
// Infer the frequency of the fall-through edge, representing not taking the
// branch
uint64_t Inferred = 0;
if (BBExecCount > TotalReportedJumps)
Inferred = BBExecCount - TotalReportedJumps;
DEBUG({
if (opts::Verbosity >= 1 && BBExecCount < TotalReportedJumps)
errs()
<< "BOLT-WARNING: Fall-through inference is slightly inconsistent. "
"exec frequency is less than the outgoing edges frequency ("
<< BBExecCount << " < " << ReportedBranches
<< ") for BB at offset 0x"
<< Twine::utohexstr(getAddress() + CurBB->getOffset()) << '\n';
});
if (CurBB->succ_size() <= 2) {
// If there is an FT it will be the last successor.
auto &SuccCount = *CurBB->branch_info_rbegin();
auto &Succ = *CurBB->succ_rbegin();
if (SuccCount.Count == BinaryBasicBlock::COUNT_FALLTHROUGH_EDGE) {
SuccCount.Count = Inferred;
Succ->ExecutionCount += Inferred;
}
}
} // end for (CurBB : BasicBlocks)
return;
}
void BinaryFunction::removeConditionalTailCalls() {
for (auto &I : TailCallTerminatedBlocks) {
BinaryBasicBlock *BB = I.first;
TailCallInfo &TCInfo = I.second;
// Get the conditional tail call instruction.
MCInst &CondTailCallInst = BB->getInstructionAtIndex(TCInfo.Index);
if (!BC.MIA->isConditionalBranch(CondTailCallInst)) {
// The block is not terminated with a conditional tail call.
continue;
}
// Assert that the tail call does not throw.
const MCSymbol *LP;
uint64_t Action;
std::tie(LP, Action) = BC.MIA->getEHInfo(CondTailCallInst);
assert(!LP && "found tail call with associated landing pad");
// Create the unconditional tail call instruction.
const auto *TailCallTargetLabel = BC.MIA->getTargetSymbol(CondTailCallInst);
assert(TailCallTargetLabel && "symbol expected for direct tail call");
MCInst TailCallInst;
BC.MIA->createTailCall(TailCallInst, TailCallTargetLabel, BC.Ctx.get());
// The way we will remove this conditional tail call depends on the
// direction of the jump when it is taken. We want to preserve this
// direction.
BinaryBasicBlock *TailCallBB = nullptr;
MCSymbol *TCLabel = BC.Ctx->createTempSymbol("TC", true);
if (getAddress() >= TCInfo.TargetAddress) {
// Backward jump: We will reverse the condition of the tail call, change
// its target to the following (currently fall-through) block, and insert
// a new block between them that will contain the unconditional tail call.
// Reverse the condition of the tail call and update its target.
unsigned InsertIdx = getIndex(BB) + 1;
assert(InsertIdx < size() && "no fall-through for conditional tail call");
BinaryBasicBlock *NextBB = BasicBlocks[InsertIdx];
BC.MIA->reverseBranchCondition(
CondTailCallInst, NextBB->getLabel(), BC.Ctx.get());
// Create a basic block containing the unconditional tail call instruction
// and place it between BB and NextBB.
std::vector<std::unique_ptr<BinaryBasicBlock>> TailCallBBs;
TailCallBBs.emplace_back(createBasicBlock(NextBB->getOffset(), TCLabel));
TailCallBBs[0]->addInstruction(TailCallInst);
insertBasicBlocks(BB, std::move(TailCallBBs),
/* UpdateLayout */ false,
/* UpdateCFIState */ false);
TailCallBB = BasicBlocks[InsertIdx];
// Add the correct CFI state for the new block.
BBCFIState.insert(BBCFIState.begin() + InsertIdx, TCInfo.CFIStateBefore);
} else {
// Forward jump: we will create a new basic block at the end of the
// function containing the unconditional tail call and change the target
// of the conditional tail call to this basic block.
// Create a basic block containing the unconditional tail call
// instruction and place it at the end of the function.
// We have to add 1 byte as there's potentially an existing branch past
// the end of the code as a result of __builtin_unreachable().
const BinaryBasicBlock *LastBB = BasicBlocks.back();
uint64_t NewBlockOffset = LastBB->getOffset() +
BC.computeCodeSize(LastBB->begin(), LastBB->end()) + 1;
TailCallBB = addBasicBlock(NewBlockOffset, TCLabel);
TailCallBB->addInstruction(TailCallInst);
// Add the correct CFI state for the new block. It has to be inserted in
// the one before last position (the last position holds the CFI state
// after the last block).
BBCFIState.insert(BBCFIState.begin() + BBCFIState.size() - 1,
TCInfo.CFIStateBefore);
// Replace the target of the conditional tail call with the label of the
// new basic block.
BC.MIA->replaceBranchTarget(CondTailCallInst, TCLabel, BC.Ctx.get());
}
// Add CFG edge with profile info from BB to TailCallBB info and swap
// edges if the TailCallBB corresponds to the taken branch.
BB->addSuccessor(TailCallBB, TCInfo.Count, TCInfo.Mispreds);
if (getAddress() < TCInfo.TargetAddress)
BB->swapConditionalSuccessors();
// Add execution count for the block.
if (hasValidProfile())
TailCallBB->setExecutionCount(TCInfo.Count);
}
}
uint64_t BinaryFunction::getFunctionScore() {
if (FunctionScore != -1)
return FunctionScore;
uint64_t TotalScore = 0ULL;
for (auto BB : layout()) {
uint64_t BBExecCount = BB->getExecutionCount();
if (BBExecCount == BinaryBasicBlock::COUNT_NO_PROFILE)
continue;
BBExecCount *= BB->getNumNonPseudos();
TotalScore += BBExecCount;
}
FunctionScore = TotalScore;
return FunctionScore;
}
BinaryFunction::CFIStateVector
BinaryFunction::annotateCFIState(const MCInst *Stop) {
assert(!BasicBlocks.empty() && "basic block list should not be empty");
uint32_t State = 0;
uint32_t HighestState = 0;
std::stack<uint32_t> StateStack;
CFIStateVector CFIState;
for (auto CI = BasicBlocks.begin(), CE = BasicBlocks.end(); CI != CE; ++CI) {
BinaryBasicBlock *CurBB = *CI;
// Annotate this BB entry
CFIState.emplace_back(State);
// While building the CFG, we want to save the CFI state before a tail call
// instruction, so that we can correctly remove condtional tail calls
auto TCI = TailCallTerminatedBlocks.find(CurBB);
bool SaveState = TCI != TailCallTerminatedBlocks.end();
// Advance state
uint32_t Idx = 0;
for (const auto &Instr : *CurBB) {
auto *CFI = getCFIFor(Instr);
if (CFI == nullptr) {
if (SaveState && Idx == TCI->second.Index)
TCI->second.CFIStateBefore = State;
++Idx;
if (&Instr == Stop) {
CFIState.emplace_back(State);
return CFIState;
}
continue;
}
++HighestState;
if (CFI->getOperation() == MCCFIInstruction::OpRememberState) {
StateStack.push(State);
} else if (CFI->getOperation() == MCCFIInstruction::OpRestoreState) {
assert(!StateStack.empty() && "Corrupt CFI stack");
State = StateStack.top();
StateStack.pop();
} else if (CFI->getOperation() != MCCFIInstruction::OpGnuArgsSize) {
State = HighestState;
}
++Idx;
if (&Instr == Stop) {
CFIState.emplace_back(State);
return CFIState;
}
}
}
// Store the state after the last BB
CFIState.emplace_back(State);
assert(StateStack.empty() && "Corrupt CFI stack");
return CFIState;
}
bool BinaryFunction::fixCFIState() {
auto Sep = "";
DEBUG(dbgs() << "Trying to fix CFI states for each BB after reordering.\n");
DEBUG(dbgs() << "This is the list of CFI states for each BB of " << *this
<< ": ");
auto replayCFIInstrs =
[this](uint32_t FromState, uint32_t ToState, BinaryBasicBlock *InBB,
BinaryBasicBlock::iterator InsertIt) -> bool {
if (FromState == ToState)
return true;
assert(FromState < ToState);
std::vector<uint32_t> NewCFIs;
uint32_t NestedLevel = 0;
for (uint32_t CurState = FromState; CurState < ToState; ++CurState) {
assert(CurState < FrameInstructions.size());
MCCFIInstruction *Instr = &FrameInstructions[CurState];
if (Instr->getOperation() == MCCFIInstruction::OpRememberState)
++NestedLevel;
if (!NestedLevel)
NewCFIs.push_back(CurState);
if (Instr->getOperation() == MCCFIInstruction::OpRestoreState)
--NestedLevel;
}
// TODO: If in replaying the CFI instructions to reach this state we
// have state stack instructions, we could still work out the logic
// to extract only the necessary instructions to reach this state
// without using the state stack. Not sure if it is worth the effort
// because this happens rarely.
if (NestedLevel != 0) {
if (opts::Verbosity >= 1) {
errs() << "BOLT-WARNING: CFI rewriter detected nested CFI state"
<< " while replaying CFI instructions for BB "
<< InBB->getName() << " in function " << *this << '\n';
}
return false;
}
for (auto CFI : NewCFIs) {
// Ignore GNU_args_size instructions.
if (FrameInstructions[CFI].getOperation() !=
MCCFIInstruction::OpGnuArgsSize) {
InsertIt = addCFIPseudo(InBB, InsertIt, CFI);
++InsertIt;
}
}
return true;
};
uint32_t State = 0;
auto *FDEStartBB = BasicBlocksLayout[0];
for (uint32_t I = 0, E = BasicBlocksLayout.size(); I != E; ++I) {
auto *BB = BasicBlocksLayout[I];
uint32_t BBIndex = getIndex(BB);
// Hot-cold border: check if this is the first BB to be allocated in a cold
// region (a different FDE). If yes, we need to reset the CFI state and
// the FDEStartBB that is used to insert remember_state CFIs (t12863876).
if (I != 0 && BB->isCold() != BasicBlocksLayout[I - 1]->isCold()) {
State = 0;
FDEStartBB = BB;
}
// We need to recover the correct state if it doesn't match expected
// state at BB entry point.
if (BBCFIState[BBIndex] < State) {
// In this case, State is currently higher than what this BB expect it
// to be. To solve this, we need to insert a CFI instruction to remember
// the old state at function entry, then another CFI instruction to
// restore it at the entry of this BB and replay CFI instructions to
// reach the desired state.
uint32_t OldState = BBCFIState[BBIndex];
// Remember state at function entry point (our reference state).
auto InsertIt = FDEStartBB->begin();
while (InsertIt != FDEStartBB->end() && BC.MIA->isCFI(*InsertIt))
++InsertIt;
addCFIPseudo(FDEStartBB, InsertIt, FrameInstructions.size());
FrameInstructions.emplace_back(
MCCFIInstruction::createRememberState(nullptr));
// Restore state
InsertIt = addCFIPseudo(BB, BB->begin(), FrameInstructions.size());
++InsertIt;
FrameInstructions.emplace_back(
MCCFIInstruction::createRestoreState(nullptr));
if (!replayCFIInstrs(0, OldState, BB, InsertIt))
return false;
// Check if we messed up the stack in this process
int StackOffset = 0;
for (BinaryBasicBlock *CurBB : BasicBlocksLayout) {
if (CurBB == BB)
break;
for (auto &Instr : *CurBB) {
if (auto *CFI = getCFIFor(Instr)) {
if (CFI->getOperation() == MCCFIInstruction::OpRememberState)
++StackOffset;
if (CFI->getOperation() == MCCFIInstruction::OpRestoreState)
--StackOffset;
}
}
}
auto Pos = BB->begin();
while (Pos != BB->end() && BC.MIA->isCFI(*Pos)) {
auto CFI = getCFIFor(*Pos);
if (CFI->getOperation() == MCCFIInstruction::OpRememberState)
++StackOffset;
if (CFI->getOperation() == MCCFIInstruction::OpRestoreState)
--StackOffset;
++Pos;
}
if (StackOffset != 0) {
if (opts::Verbosity >= 1) {
errs() << " BOLT-WARNING: not possible to remember/recover state"
<< " without corrupting CFI state stack in function "
<< *this << "\n";
}
return false;
}
} else if (BBCFIState[BBIndex] > State) {
// If BBCFIState[BBIndex] > State, it means we are behind in the
// state. Just emit all instructions to reach this state at the
// beginning of this BB. If this sequence of instructions involve
// remember state or restore state, bail out.
if (!replayCFIInstrs(State, BBCFIState[BBIndex], BB, BB->begin()))
return false;
}
State = BBCFIState[BBIndex + 1];
DEBUG(dbgs() << Sep << State);
DEBUG(Sep = ", ");
}
DEBUG(dbgs() << "\n");
return true;
}
void BinaryFunction::modifyLayout(LayoutType Type, bool MinBranchClusters,
bool Split) {
if (BasicBlocksLayout.empty() || Type == LT_NONE)
return;
BasicBlockOrderType NewLayout;
std::unique_ptr<ReorderAlgorithm> Algo;
// Cannot do optimal layout without profile.
if (Type != LT_REVERSE && !hasValidProfile())
return;
if (Type == LT_REVERSE) {
Algo.reset(new ReverseReorderAlgorithm());
}
else if (BasicBlocksLayout.size() <= FUNC_SIZE_THRESHOLD &&
Type != LT_OPTIMIZE_SHUFFLE) {
// Work on optimal solution if problem is small enough
DEBUG(dbgs() << "finding optimal block layout for " << *this << "\n");
Algo.reset(new OptimalReorderAlgorithm());
}
else {
DEBUG(dbgs() << "running block layout heuristics on " << *this << "\n");
std::unique_ptr<ClusterAlgorithm> CAlgo;
if (MinBranchClusters)
CAlgo.reset(new MinBranchGreedyClusterAlgorithm());
else
CAlgo.reset(new PHGreedyClusterAlgorithm());
switch(Type) {
case LT_OPTIMIZE:
Algo.reset(new OptimizeReorderAlgorithm(std::move(CAlgo)));
break;
case LT_OPTIMIZE_BRANCH:
Algo.reset(new OptimizeBranchReorderAlgorithm(std::move(CAlgo)));
break;
case LT_OPTIMIZE_CACHE:
Algo.reset(new OptimizeCacheReorderAlgorithm(std::move(CAlgo)));
break;
case LT_OPTIMIZE_SHUFFLE:
Algo.reset(new RandomClusterReorderAlgorithm(std::move(CAlgo)));
break;
default:
llvm_unreachable("unexpected layout type");
}
}
Algo->reorderBasicBlocks(*this, NewLayout);
BasicBlocksLayout.clear();
BasicBlocksLayout.swap(NewLayout);
if (Split)
splitFunction();
}
namespace {
#ifndef MAX_PATH
#define MAX_PATH 255
#endif
std::string constructFilename(std::string Filename,
std::string Annotation,
std::string Suffix) {
std::replace(Filename.begin(), Filename.end(), '/', '-');
if (!Annotation.empty()) {
Annotation.insert(0, "-");
}
if (Filename.size() + Annotation.size() + Suffix.size() > MAX_PATH) {
assert(Suffix.size() + Annotation.size() <= MAX_PATH);
if (opts::Verbosity >= 1) {
errs() << "BOLT-WARNING: Filename \"" << Filename << Annotation << Suffix
<< "\" exceeds the " << MAX_PATH << " size limit, truncating.\n";
}
Filename.resize(MAX_PATH - (Suffix.size() + Annotation.size()));
}
Filename += Annotation;
Filename += Suffix;
return Filename;
}
std::string formatEscapes(const std::string& Str) {
std::string Result;
for (unsigned I = 0; I < Str.size(); ++I) {
auto C = Str[I];
switch (C) {
case '\n':
Result += "&#13;";
break;
case '"':
break;
default:
Result += C;
break;
}
}
return Result;
}
}
void BinaryFunction::dumpGraph(raw_ostream& OS) const {
OS << "strict digraph \"" << getPrintName() << "\" {\n";
uint64_t Offset = Address;
for (auto *BB : BasicBlocks) {
auto LayoutPos = std::find(BasicBlocksLayout.begin(),
BasicBlocksLayout.end(),
BB);
unsigned Layout = LayoutPos - BasicBlocksLayout.begin();
OS << format("\"%s\" [label=\"%s\\n(O:%lu,I:%u,L%u)\"]\n",
BB->getName().data(),
BB->getName().data(),
BB->getOffset(),
getIndex(BB),
Layout);
OS << format("\"%s\" [shape=box]\n", BB->getName().data());
if (opts::DotToolTipCode) {
std::string Str;
raw_string_ostream CS(Str);
Offset = BC.printInstructions(CS, BB->begin(), BB->end(), Offset, this);
const auto Code = formatEscapes(CS.str());
OS << format("\"%s\" [tooltip=\"%s\"]\n",
BB->getName().data(),
Code.c_str());
}
// analyzeBranch is just used to get the names of the branch
// opcodes.
const MCSymbol *TBB = nullptr;
const MCSymbol *FBB = nullptr;
MCInst *CondBranch = nullptr;
MCInst *UncondBranch = nullptr;
const bool Success = BB->analyzeBranch(TBB,
FBB,
CondBranch,
UncondBranch);
const auto *LastInstr = BB->getLastNonPseudo();
const bool IsJumpTable = LastInstr && BC.MIA->getJumpTable(*LastInstr);
auto BI = BB->branch_info_begin();
for (auto *Succ : BB->successors()) {
std::string Branch;
if (Success) {
if (Succ == BB->getConditionalSuccessor(true)) {
Branch = CondBranch
? BC.InstPrinter->getOpcodeName(CondBranch->getOpcode())
: "TB";
} else if (Succ == BB->getConditionalSuccessor(false)) {
Branch = UncondBranch
? BC.InstPrinter->getOpcodeName(UncondBranch->getOpcode())
: "FB";
} else {
Branch = "FT";
}
}
if (IsJumpTable) {
Branch = "JT";
}
OS << format("\"%s\" -> \"%s\" [label=\"%s",
BB->getName().data(),
Succ->getName().data(),
Branch.c_str());
if (BB->getExecutionCount() != COUNT_NO_PROFILE &&
BI->MispredictedCount != BinaryBasicBlock::COUNT_FALLTHROUGH_EDGE) {
OS << "\\n(M:" << BI->MispredictedCount << ",C:" << BI->Count << ")";
} else if (ExecutionCount != COUNT_NO_PROFILE &&
BI->Count != BinaryBasicBlock::COUNT_FALLTHROUGH_EDGE) {
OS << "\\n(IC:" << BI->Count << ")";
}
OS << "\"]\n";
++BI;
}
for (auto *LP : BB->landing_pads()) {
OS << format("\"%s\" -> \"%s\" [constraint=false style=dashed]\n",
BB->getName().data(),
LP->getName().data());
}
}
OS << "}\n";
}
void BinaryFunction::viewGraph() const {
SmallString<MAX_PATH> Filename;
if (auto EC = sys::fs::createTemporaryFile("bolt-cfg", "dot", Filename)) {
errs() << "BOLT-ERROR: " << EC.message() << ", unable to create "
<< " bolt-cfg-XXXXX.dot temporary file.\n";
return;
}
dumpGraphToFile(Filename.str());
if (DisplayGraph(Filename)) {
errs() << "BOLT-ERROR: Can't display " << Filename << " with graphviz.\n";
}
if (auto EC = sys::fs::remove(Filename)) {
errs() << "BOLT-WARNING: " << EC.message() << ", failed to remove "
<< Filename << "\n";
}
}
void BinaryFunction::dumpGraphForPass(std::string Annotation) const {
auto Filename = constructFilename(getPrintName(), Annotation, ".dot");
outs() << "BOLT-DEBUG: Dumping CFG to " << Filename << "\n";
dumpGraphToFile(Filename);
}
void BinaryFunction::dumpGraphToFile(std::string Filename) const {
std::error_code EC;
raw_fd_ostream of(Filename, EC, sys::fs::F_None);
if (EC) {
if (opts::Verbosity >= 1) {
errs() << "BOLT-WARNING: " << EC.message() << ", unable to open "
<< Filename << " for output.\n";
}
return;
}
dumpGraph(of);
}
void BinaryFunction::fixBranches() {
auto &MIA = BC.MIA;
auto *Ctx = BC.Ctx.get();
for (unsigned I = 0, E = BasicBlocksLayout.size(); I != E; ++I) {
BinaryBasicBlock *BB = BasicBlocksLayout[I];
const MCSymbol *TBB = nullptr;
const MCSymbol *FBB = nullptr;
MCInst *CondBranch = nullptr;
MCInst *UncondBranch = nullptr;
if (!BB->analyzeBranch(TBB, FBB, CondBranch, UncondBranch))
continue;
// We will create unconditional branch with correct destination if needed.
if (UncondBranch)
BB->eraseInstruction(UncondBranch);
// Basic block that follows the current one in the final layout.
const BinaryBasicBlock *NextBB = nullptr;
if (I + 1 != E && BB->isCold() == BasicBlocksLayout[I + 1]->isCold())
NextBB = BasicBlocksLayout[I + 1];
if (BB->succ_size() == 1) {
// __builtin_unreachable() could create a conditional branch that
// falls-through into the next function - hence the block will have only
// one valid successor. Since behaviour is undefined - we replace
// the conditional branch with an unconditional if required.
if (CondBranch)
BB->eraseInstruction(CondBranch);
if (BB->getSuccessor() == NextBB)
continue;
BB->addBranchInstruction(BB->getSuccessor());
} else if (BB->succ_size() == 2) {
assert(CondBranch && "conditional branch expected");
const auto *TSuccessor = BB->getConditionalSuccessor(true);
const auto *FSuccessor = BB->getConditionalSuccessor(false);
if (NextBB && NextBB == TSuccessor) {
std::swap(TSuccessor, FSuccessor);
MIA->reverseBranchCondition(*CondBranch, TSuccessor->getLabel(), Ctx);
BB->swapConditionalSuccessors();
} else {
MIA->replaceBranchTarget(*CondBranch, TSuccessor->getLabel(), Ctx);
}
if (!NextBB || (NextBB != TSuccessor && NextBB != FSuccessor)) {
BB->addBranchInstruction(FSuccessor);
}
}
// Cases where the number of successors is 0 (block ends with a
// terminator) or more than 2 (switch table) don't require branch
// instruction adjustments.
}
}
void BinaryFunction::splitFunction() {
bool AllCold = true;
for (BinaryBasicBlock *BB : BasicBlocksLayout) {
auto ExecCount = BB->getExecutionCount();
if (ExecCount == BinaryBasicBlock::COUNT_NO_PROFILE)
return;
if (ExecCount != 0)
AllCold = false;
}
if (AllCold)
return;
assert(BasicBlocksLayout.size() > 0);
// Never outline the first basic block.
BasicBlocks.front()->setCanOutline(false);
for (auto BB : BasicBlocks) {
if (!BB->canOutline())
continue;
if (BB->getExecutionCount() != 0) {
BB->setCanOutline(false);
continue;
}
if (hasEHRanges()) {
// We cannot move landing pads (or rather entry points for landing
// pads).
if (BB->isLandingPad()) {
BB->setCanOutline(false);
continue;
}
// We cannot move a block that can throw since exception-handling
// runtime cannot deal with split functions. However, if we can guarantee
// that the block never throws, it is safe to move the block to
// decrease the size of the function.
for (auto &Instr : *BB) {
if (BC.MIA->isInvoke(Instr)) {
BB->setCanOutline(false);
break;
}
}
}
}
if (opts::AgressiveSplitting) {
// All blocks with 0 count that we can move go to the end of the function.
std::stable_sort(BasicBlocksLayout.begin(), BasicBlocksLayout.end(),
[&] (BinaryBasicBlock *A, BinaryBasicBlock *B) {
return A->canOutline() < B->canOutline();
});
} else if (hasEHRanges()) {
// Typically functions with exception handling have landing pads at the end.
// We cannot move beginning of landing pads, but we can move 0-count blocks
// comprising landing pads to the end and thus facilitating splitting.
auto FirstLP = BasicBlocksLayout.begin();
while ((*FirstLP)->isLandingPad())
++FirstLP;
std::stable_sort(FirstLP, BasicBlocksLayout.end(),
[&] (BinaryBasicBlock *A, BinaryBasicBlock *B) {
return A->canOutline() < B->canOutline();
});
}
// Separate hot from cold
for (auto I = BasicBlocksLayout.rbegin(), E = BasicBlocksLayout.rend();
I != E; ++I) {
BinaryBasicBlock *BB = *I;
if (!BB->canOutline())
break;
BB->setIsCold(true);
IsSplit = true;
}
}
void BinaryFunction::propagateGnuArgsSizeInfo() {
assert(CurrentState == State::CFG && "unexpected function state");
if (!hasEHRanges() || !usesGnuArgsSize())
return;
// The current value of DW_CFA_GNU_args_size affects all following
// invoke instructions until the next CFI overrides it.
// It is important to iterate basic blocks in the original order when
// assigning the value.
uint64_t CurrentGnuArgsSize = 0;
for (auto BB : BasicBlocks) {
for (auto II = BB->begin(); II != BB->end(); ) {
auto &Instr = *II;
if (BC.MIA->isCFI(Instr)) {
auto CFI = getCFIFor(Instr);
if (CFI->getOperation() == MCCFIInstruction::OpGnuArgsSize) {
CurrentGnuArgsSize = CFI->getOffset();
// Delete DW_CFA_GNU_args_size instructions and only regenerate
// during the final code emission. The information is embedded
// inside call instructions.
II = BB->erasePseudoInstruction(II);
} else {
++II;
}
continue;
}
// Add the value of GNU_args_size as an extra operand if landing pad
// is non-empty.
BC.MIA->addGnuArgsSize(Instr, CurrentGnuArgsSize);
++II;
}
}
}
void BinaryFunction::mergeProfileDataInto(BinaryFunction &BF) const {
if (!hasValidProfile() || !BF.hasValidProfile())
return;
// Update BF's execution count.
uint64_t MyExecutionCount = getExecutionCount();
if (MyExecutionCount != BinaryFunction::COUNT_NO_PROFILE) {
uint64_t OldExecCount = BF.getExecutionCount();
uint64_t NewExecCount =
OldExecCount == BinaryFunction::COUNT_NO_PROFILE ?
MyExecutionCount :
MyExecutionCount + OldExecCount;
BF.setExecutionCount(NewExecCount);
}
// Update BF's basic block and edge counts.
auto BBMergeI = BF.begin();
for (BinaryBasicBlock *BB : BasicBlocks) {
BinaryBasicBlock *BBMerge = &*BBMergeI;
assert(getIndex(BB) == BF.getIndex(BBMerge));
// Update BF's basic block count.
uint64_t MyBBExecutionCount = BB->getExecutionCount();
if (MyBBExecutionCount != BinaryBasicBlock::COUNT_NO_PROFILE) {
uint64_t OldExecCount = BBMerge->getExecutionCount();
uint64_t NewExecCount =
OldExecCount == BinaryBasicBlock::COUNT_NO_PROFILE ?
MyBBExecutionCount :
MyBBExecutionCount + OldExecCount;
BBMerge->setExecutionCount(NewExecCount);
}
// Update BF's edge count for successors of this basic block.
auto BBMergeSI = BBMerge->succ_begin();
auto BII = BB->branch_info_begin();
auto BIMergeI = BBMerge->branch_info_begin();
for (BinaryBasicBlock *BBSucc : BB->successors()) {
BinaryBasicBlock *BBMergeSucc = *BBMergeSI;
assert(getIndex(BBSucc) == BF.getIndex(BBMergeSucc));
if (BII->Count != BinaryBasicBlock::COUNT_FALLTHROUGH_EDGE) {
uint64_t OldBranchCount = BIMergeI->Count;
uint64_t NewBranchCount =
OldBranchCount == BinaryBasicBlock::COUNT_FALLTHROUGH_EDGE ?
BII->Count :
BII->Count + OldBranchCount;
BIMergeI->Count = NewBranchCount;
}
if (BII->MispredictedCount != BinaryBasicBlock::COUNT_FALLTHROUGH_EDGE) {
uint64_t OldMispredictedCount = BIMergeI->MispredictedCount;
uint64_t NewMispredictedCount =
OldMispredictedCount == BinaryBasicBlock::COUNT_FALLTHROUGH_EDGE ?
BII->MispredictedCount :
BII->MispredictedCount + OldMispredictedCount;
BIMergeI->MispredictedCount = NewMispredictedCount;
}
++BBMergeSI;
++BII;
++BIMergeI;
}
assert(BBMergeSI == BBMerge->succ_end());
++BBMergeI;
}
assert(BBMergeI == BF.end());
}
std::pair<bool, unsigned> BinaryFunction::isCalleeEquivalentWith(
const MCInst &Inst, const BinaryBasicBlock &BB, const MCInst &InstOther,
const BinaryBasicBlock &BBOther, const BinaryFunction &BF) const {
// The callee operand in a direct call is the first operand. This
// operand should be a symbol corresponding to the callee function.
constexpr unsigned CalleeOpIndex = 0;
// Helper function.
auto getGlobalAddress = [this] (const MCSymbol &Symbol) -> uint64_t {
auto AI = BC.GlobalSymbols.find(Symbol.getName());
assert(AI != BC.GlobalSymbols.end());
return AI->second;
};
const MCOperand &CalleeOp = Inst.getOperand(CalleeOpIndex);
const MCOperand &CalleeOpOther = InstOther.getOperand(CalleeOpIndex);
if (!CalleeOp.isExpr() || !CalleeOpOther.isExpr()) {
// At least one of these is actually an indirect call.
return std::make_pair(false, 0);
}
const MCSymbol &CalleeSymbol = CalleeOp.getExpr()->getSymbol();
uint64_t CalleeAddress = getGlobalAddress(CalleeSymbol);
const MCSymbol &CalleeSymbolOther = CalleeOpOther.getExpr()->getSymbol();
uint64_t CalleeAddressOther = getGlobalAddress(CalleeSymbolOther);
bool BothRecursiveCalls =
CalleeAddress == getAddress() &&
CalleeAddressOther == BF.getAddress();
bool SameCallee = CalleeAddress == CalleeAddressOther;
return std::make_pair(BothRecursiveCalls || SameCallee, CalleeOpIndex);
}
std::pair<bool, unsigned> BinaryFunction::isTargetEquivalentWith(
const MCInst &Inst, const BinaryBasicBlock &BB, const MCInst &InstOther,
const BinaryBasicBlock &BBOther, const BinaryFunction &BF,
bool AreInvokes) const {
// The target operand in a (non-indirect) jump instruction is the
// first operand.
unsigned TargetOpIndex = 0;
if (AreInvokes) {
// The landing pad operand in an invoke is either the second or the
// sixth operand, depending on the number of operands of the invoke.
TargetOpIndex = 1;
if (Inst.getNumOperands() == 7 || Inst.getNumOperands() == 8)
TargetOpIndex = 5;
}
const MCOperand &TargetOp = Inst.getOperand(TargetOpIndex);
const MCOperand &TargetOpOther = InstOther.getOperand(TargetOpIndex);
if (!TargetOp.isExpr() || !TargetOpOther.isExpr()) {
assert(AreInvokes);
// An invoke without a landing pad operand has no catch handler. As long
// as both invokes have no catch target, we can consider they have the
// same catch target.
return std::make_pair(!TargetOp.isExpr() && !TargetOpOther.isExpr(),
TargetOpIndex);
}
const MCSymbol &TargetSymbol = TargetOp.getExpr()->getSymbol();
BinaryBasicBlock *TargetBB =
AreInvokes ?
BB.getLandingPad(&TargetSymbol) :
BB.getSuccessor(&TargetSymbol);
const MCSymbol &TargetSymbolOther = TargetOpOther.getExpr()->getSymbol();
BinaryBasicBlock *TargetBBOther =
AreInvokes ?
BBOther.getLandingPad(&TargetSymbolOther) :
BBOther.getSuccessor(&TargetSymbolOther);
if (TargetBB == nullptr || TargetBBOther == nullptr) {
assert(!AreInvokes);
// This is a tail call implemented with a jump that was not
// converted to a call (e.g. conditional jump). Since the
// instructions were not identical, the functions canot be
// proven identical either.
return std::make_pair(false, 0);
}
return std::make_pair(getIndex(TargetBB) == BF.getIndex(TargetBBOther),
TargetOpIndex);
}
bool BinaryFunction::isInstrEquivalentWith(
const MCInst &Inst, const BinaryBasicBlock &BB, const MCInst &InstOther,
const BinaryBasicBlock &BBOther, const BinaryFunction &BF) const {
// First check their opcodes.
if (Inst.getOpcode() != InstOther.getOpcode()) {
return false;
}
// Then check if they have the same number of operands.
unsigned NumOperands = Inst.getNumOperands();
unsigned NumOperandsOther = InstOther.getNumOperands();
if (NumOperands != NumOperandsOther) {
return false;
}
// We are interested in 3 special cases:
//
// a) both instructions are recursive calls.
// b) both instructions are local jumps to basic blocks with same indices.
// c) both instructions are invokes with landing pad blocks with same indices.
//
// In any of these cases the instructions will differ in some operands, but
// given identical CFG of the functions, they can still be considered
// equivalent.
bool BothCalls =
BC.MIA->isCall(Inst) &&
BC.MIA->isCall(InstOther);
bool BothInvokes =
BC.MIA->isInvoke(Inst) &&
BC.MIA->isInvoke(InstOther);
bool BothBranches =
BC.MIA->isBranch(Inst) &&
!BC.MIA->isIndirectBranch(Inst) &&
BC.MIA->isBranch(InstOther) &&
!BC.MIA->isIndirectBranch(InstOther);
if (!BothCalls && !BothInvokes && !BothBranches) {
return Inst.equals(InstOther);
}
// We figure out if both instructions are recursive calls (case a) or else
// if they are calls to the same function.
bool EquivCallees = false;
unsigned CalleeOpIndex = 0;
if (BothCalls) {
std::tie(EquivCallees, CalleeOpIndex) =
isCalleeEquivalentWith(Inst, BB, InstOther, BBOther, BF);
}
// We figure out if both instructions are jumps (case b) or invokes (case c)
// with equivalent jump targets or landing pads respectively.
assert(!(BothInvokes && BothBranches));
bool SameTarget = false;
unsigned TargetOpIndex = 0;
if (BothInvokes || BothBranches) {
std::tie(SameTarget, TargetOpIndex) =
isTargetEquivalentWith(Inst, BB, InstOther, BBOther, BF, BothInvokes);
}
// Compare all operands.
for (unsigned i = 0; i < NumOperands; ++i) {
if (i == CalleeOpIndex && BothCalls && EquivCallees)
continue;
if (i == TargetOpIndex && (BothInvokes || BothBranches) && SameTarget)
continue;
if (!Inst.getOperand(i).equals(InstOther.getOperand(i)))
return false;
}
// The instructions are equal although (some of) their operands
// may differ.
return true;
}
bool BinaryFunction::isIdenticalWith(const BinaryFunction &BF) const {
assert(CurrentState == State::CFG && BF.CurrentState == State::CFG);
// Compare the two functions, one basic block at a time.
// Currently we require two identical basic blocks to have identical
// instruction sequences and the same index in their corresponding
// functions. The latter is important for CFG equality.
// We do not consider functions with just different pseudo instruction
// sequences non-identical by default. However we print a wanring
// in case two instructions that are identical have different pseudo
// instruction sequences.
bool PseudosDiffer = false;
if (size() != BF.size())
return false;
auto BBI = BF.begin();
for (const BinaryBasicBlock *BB : BasicBlocks) {
const BinaryBasicBlock *BBOther = &*BBI;
if (getIndex(BB) != BF.getIndex(BBOther))
return false;
// Compare successor basic blocks.
if (BB->succ_size() != BBOther->succ_size())
return false;
auto SuccBBI = BBOther->succ_begin();
for (const BinaryBasicBlock *SuccBB : BB->successors()) {
const BinaryBasicBlock *SuccBBOther = *SuccBBI;
if (getIndex(SuccBB) != BF.getIndex(SuccBBOther))
return false;
++SuccBBI;
}
// Compare landing pads.
if (BB->lp_size() != BBOther->lp_size())
return false;
auto LPI = BBOther->lp_begin();
for (const BinaryBasicBlock *LP : BB->landing_pads()) {
const BinaryBasicBlock *LPOther = *LPI;
if (getIndex(LP) != BF.getIndex(LPOther))
return false;
++LPI;
}
// Compare instructions.
auto I = BB->begin(), E = BB->end();
auto OtherI = BBOther->begin(), OtherE = BBOther->end();
while (I != E && OtherI != OtherE) {
const MCInst &Inst = *I;
const MCInst &InstOther = *OtherI;
bool IsInstPseudo = BC.MII->get(Inst.getOpcode()).isPseudo();
bool IsInstOtherPseudo = BC.MII->get(InstOther.getOpcode()).isPseudo();
if (IsInstPseudo == IsInstOtherPseudo) {
// Either both are pseudos or none is.
bool areEqual =
isInstrEquivalentWith(Inst, *BB, InstOther, *BBOther, BF);
if (!areEqual && IsInstPseudo) {
// Different pseudo instructions.
PseudosDiffer = true;
}
else if (!areEqual) {
// Different non-pseudo instructions.
return false;
}
++I; ++OtherI;
}
else {
// One instruction is a pseudo while the other is not.
PseudosDiffer = true;
IsInstPseudo ? ++I : ++OtherI;
}
}
// Check for trailing instructions or pseudos in one of the basic blocks.
auto TrailI = I == E ? OtherI : I;
auto TrailE = I == E ? OtherE : E;
while (TrailI != TrailE) {
const MCInst &InstTrail = *TrailI;
if (!BC.MII->get(InstTrail.getOpcode()).isPseudo()) {
// One of the functions has more instructions in this basic block
// than the other, hence not identical.
return false;
}
// There are trailing pseudos only in one of the basic blocks.
PseudosDiffer = true;
++TrailI;
}
++BBI;
}
if (opts::Verbosity >= 1 && PseudosDiffer) {
errs() << "BOLT-WARNING: functions " << *this << " and "
<< BF << " are identical, but have different"
<< " pseudo instruction sequences.\n";
}
return true;
}
std::size_t BinaryFunction::hash() const {
assert(CurrentState == State::CFG);
// The hash is computed by creating a string of all the opcodes
// in the function and hashing that string with std::hash.
std::string Opcodes;
for (const BinaryBasicBlock *BB : BasicBlocks) {
for (const MCInst &Inst : *BB) {
unsigned Opcode = Inst.getOpcode();
if (BC.MII->get(Opcode).isPseudo())
continue;
if (Opcode == 0) {
Opcodes.push_back(0);
continue;
}
while (Opcode) {
uint8_t LSB = Opcode & 0xff;
Opcodes.push_back(LSB);
Opcode = Opcode >> 8;
}
}
}
return std::hash<std::string>{}(Opcodes);
}
void BinaryFunction::insertBasicBlocks(
BinaryBasicBlock *Start,
std::vector<std::unique_ptr<BinaryBasicBlock>> &&NewBBs,
const bool UpdateLayout,
const bool UpdateCFIState) {
const auto StartIndex = getIndex(Start);
const auto NumNewBlocks = NewBBs.size();
BasicBlocks.insert(BasicBlocks.begin() + StartIndex + 1,
NumNewBlocks,
nullptr);
auto I = StartIndex + 1;
for (auto &BB : NewBBs) {
assert(!BasicBlocks[I]);
BasicBlocks[I++] = BB.release();
}
updateBBIndices(StartIndex);
recomputeLandingPads(StartIndex, NumNewBlocks + 1);
// Make sure the basic blocks are sorted properly.
assert(std::is_sorted(begin(), end()));
if (UpdateLayout) {
updateLayout(Start, NumNewBlocks);
}
if (UpdateCFIState) {
updateCFIState(Start, NumNewBlocks);
}
}
void BinaryFunction::updateBBIndices(const unsigned StartIndex) {
for (auto I = StartIndex; I < BasicBlocks.size(); ++I) {
BasicBlocks[I]->Index = I;
}
}
void BinaryFunction::updateCFIState(BinaryBasicBlock *Start,
const unsigned NumNewBlocks) {
assert(TailCallTerminatedBlocks.empty());
auto PartialCFIState = annotateCFIState(&(*Start->rbegin()));
const auto StartIndex = getIndex(Start);
BBCFIState.insert(BBCFIState.begin() + StartIndex + 1,
NumNewBlocks,
PartialCFIState.back());
assert(BBCFIState.size() == BasicBlocks.size() + 1);
fixCFIState();
}
void BinaryFunction::updateLayout(BinaryBasicBlock* Start,
const unsigned NumNewBlocks) {
// Insert new blocks in the layout immediately after Start.
auto Pos = std::find(layout_begin(), layout_end(), Start);
assert(Pos != layout_end());
auto Begin = &BasicBlocks[getIndex(Start) + 1];
auto End = &BasicBlocks[getIndex(Start) + NumNewBlocks + 1];
BasicBlocksLayout.insert(Pos + 1, Begin, End);
}
void BinaryFunction::updateLayout(LayoutType Type,
bool MinBranchClusters,
bool Split) {
// Recompute layout with original parameters.
BasicBlocksLayout = BasicBlocks;
modifyLayout(Type, MinBranchClusters, Split);
}
bool BinaryFunction::isSymbolValidInScope(const SymbolRef &Symbol,
uint64_t SymbolSize) const {
// Some symbols are tolerated inside function bodies, others are not.
// The real function boundaries may not be known at this point.
// It's okay to have a zero-sized symbol in the middle of non-zero-sized
// function.
if (SymbolSize == 0 && containsAddress(*Symbol.getAddress()))
return true;
if (Symbol.getType() != SymbolRef::ST_Unknown)
return false;
if (Symbol.getFlags() & SymbolRef::SF_Global)
return false;
return true;
}
BinaryFunction::~BinaryFunction() {
for (auto BB : BasicBlocks) {
delete BB;
}
for (auto BB : DeletedBasicBlocks) {
delete BB;
}
}
void BinaryFunction::emitJumpTables(MCStreamer *Streamer) {
if (JumpTables.empty())
return;
if (opts::PrintJumpTables) {
outs() << "BOLT-INFO: jump tables for function " << *this << ":\n";
}
for (auto &JTI : JumpTables) {
auto &JT = JTI.second;
if (opts::PrintJumpTables)
JT.print(outs());
JT.emit(Streamer,
BC.MOFI->getReadOnlySection(),
BC.MOFI->getReadOnlyColdSection());
}
}
// TODO (#9806207): based on jump table type (PIC vs non-PIC etc.) we will
// need to emit different references.
uint64_t BinaryFunction::JumpTable::emit(MCStreamer *Streamer,
MCSection *HotSection,
MCSection *ColdSection) {
// Pre-process entries for aggressive splitting.
// Each label represents a separate switch table and gets its own count
// determining its destination.
std::map<MCSymbol *, uint64_t> LabelCounts;
if (opts::JumpTables > JTS_SPLIT && !Counts.empty()) {
MCSymbol *CurrentLabel = Labels[0];
uint64_t CurrentLabelCount = 0;
for (unsigned Index = 0; Index < Entries.size(); ++Index) {
auto LI = Labels.find(Index * EntrySize);
if (LI != Labels.end()) {
LabelCounts[CurrentLabel] = CurrentLabelCount;
CurrentLabel = LI->second;
CurrentLabelCount = 0;
}
CurrentLabelCount += Counts[Index];
}
LabelCounts[CurrentLabel] = CurrentLabelCount;
} else {
Streamer->SwitchSection(Count > 0 ? HotSection : ColdSection);
Streamer->EmitValueToAlignment(EntrySize);
}
MCSymbol *LastLabel = nullptr;
uint64_t Offset = 0;
for (auto *Entry : Entries) {
auto LI = Labels.find(Offset);
if (LI != Labels.end()) {
DEBUG(dbgs() << "BOLT-DEBUG: emitting jump table "
<< LI->second->getName() << " (originally was at address 0x"
<< Twine::utohexstr(Address + Offset)
<< (Offset ? "as part of larger jump table\n" : "\n"));
if (!LabelCounts.empty()) {
DEBUG(dbgs() << "BOLT-DEBUG: jump table count: "
<< LabelCounts[LI->second] << '\n');
if (LabelCounts[LI->second] > 0) {
Streamer->SwitchSection(HotSection);
} else {
Streamer->SwitchSection(ColdSection);
}
Streamer->EmitValueToAlignment(EntrySize);
}
Streamer->EmitLabel(LI->second);
LastLabel = LI->second;
}
if (Type == JTT_NORMAL) {
Streamer->EmitSymbolValue(Entry, EntrySize);
} else {
auto JT = MCSymbolRefExpr::create(LastLabel, Streamer->getContext());
auto E = MCSymbolRefExpr::create(Entry, Streamer->getContext());
auto Value = MCBinaryExpr::createSub(E, JT, Streamer->getContext());
Streamer->EmitValue(Value, EntrySize);
}
Offset += EntrySize;
}
return Offset;
}
void BinaryFunction::JumpTable::print(raw_ostream &OS) const {
uint64_t Offset = 0;
for (const auto *Entry : Entries) {
auto LI = Labels.find(Offset);
if (LI != Labels.end()) {
OS << "Jump Table " << LI->second->getName() << " at @0x"
<< Twine::utohexstr(Address+Offset);
if (Offset) {
OS << " (possibly part of larger jump table):\n";
} else {
OS << " with total count of " << Count << ":\n";
}
}
OS << format(" 0x%04" PRIx64 " : ", Offset) << Entry->getName();
if (!Counts.empty())
OS << " : " << Counts[Offset / EntrySize];
OS << '\n';
Offset += EntrySize;
}
OS << "\n\n";
}
void BinaryFunction::calculateLoopInfo() {
// Discover loops.
BinaryDominatorTree DomTree(false);
DomTree.recalculate<BinaryFunction>(*this);
BLI.reset(new BinaryLoopInfo());
BLI->analyze(DomTree);
// Traverse discovered loops and add depth and profile information.
std::stack<BinaryLoop *> St;
for (auto I = BLI->begin(), E = BLI->end(); I != E; ++I) {
St.push(*I);
++BLI->OuterLoops;
}
while (!St.empty()) {
BinaryLoop *L = St.top();
St.pop();
++BLI->TotalLoops;
BLI->MaximumDepth = std::max(L->getLoopDepth(), BLI->MaximumDepth);
// Add nested loops in the stack.
for (BinaryLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
St.push(*I);
}
// Skip if no valid profile is found.
if (!hasValidProfile()) {
L->EntryCount = COUNT_NO_PROFILE;
L->ExitCount = COUNT_NO_PROFILE;
L->TotalBackEdgeCount = COUNT_NO_PROFILE;
continue;
}
// Compute back edge count.
SmallVector<BinaryBasicBlock *, 1> Latches;
L->getLoopLatches(Latches);
for (BinaryBasicBlock *Latch : Latches) {
auto BI = Latch->branch_info_begin();
for (BinaryBasicBlock *Succ : Latch->successors()) {
if (Succ == L->getHeader()) {
assert(BI->Count != BinaryBasicBlock::COUNT_FALLTHROUGH_EDGE &&
"profile data not found");
L->TotalBackEdgeCount += BI->Count;
}
++BI;
}
}
// Compute entry count.
L->EntryCount = L->getHeader()->getExecutionCount() - L->TotalBackEdgeCount;
// Compute exit count.
SmallVector<BinaryLoop::Edge, 1> ExitEdges;
L->getExitEdges(ExitEdges);
for (BinaryLoop::Edge &Exit : ExitEdges) {
const BinaryBasicBlock *Exiting = Exit.first;
const BinaryBasicBlock *ExitTarget = Exit.second;
auto BI = Exiting->branch_info_begin();
for (BinaryBasicBlock *Succ : Exiting->successors()) {
if (Succ == ExitTarget) {
assert(BI->Count != BinaryBasicBlock::COUNT_FALLTHROUGH_EDGE &&
"profile data not found");
L->ExitCount += BI->Count;
}
++BI;
}
}
}
}
void BinaryFunction::printLoopInfo(raw_ostream &OS) const {
OS << "Loop Info for Function \"" << *this << "\"";
if (hasValidProfile()) {
OS << " (count: " << getExecutionCount() << ")";
}
OS << "\n";
std::stack<BinaryLoop *> St;
for (auto I = BLI->begin(), E = BLI->end(); I != E; ++I) {
St.push(*I);
}
while (!St.empty()) {
BinaryLoop *L = St.top();
St.pop();
for (BinaryLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
St.push(*I);
}
if (!hasValidProfile())
continue;
OS << (L->getLoopDepth() > 1 ? "Nested" : "Outer") << " loop header: "
<< L->getHeader()->getName();
OS << "\n";
OS << "Loop basic blocks: ";
auto Sep = "";
for (auto BI = L->block_begin(), BE = L->block_end(); BI != BE; ++BI) {
OS << Sep << (*BI)->getName();
Sep = ", ";
}
OS << "\n";
if (hasValidProfile()) {
OS << "Total back edge count: " << L->TotalBackEdgeCount << "\n";
OS << "Loop entry count: " << L->EntryCount << "\n";
OS << "Loop exit count: " << L->ExitCount << "\n";
if (L->EntryCount > 0) {
OS << "Average iters per entry: "
<< format("%.4lf", (double)L->TotalBackEdgeCount / L->EntryCount)
<< "\n";
}
}
OS << "----\n";
}
OS << "Total number of loops: "<< BLI->TotalLoops << "\n";
OS << "Number of outer loops: " << BLI->OuterLoops << "\n";
OS << "Maximum nested loop depth: " << BLI->MaximumDepth << "\n\n";
}
DynoStats BinaryFunction::getDynoStats() const {
DynoStats Stats;
// Return empty-stats about the function we don't completely understand.
if (!isSimple() || !hasValidProfile())
return Stats;
// Update enumeration of basic blocks for correct detection of branch'
// direction.
updateLayoutIndices();
for (const auto &BB : layout()) {
// The basic block execution count equals to the sum of incoming branch
// frequencies. This may deviate from the sum of outgoing branches of the
// basic block especially since the block may contain a function that
// does not return or a function that throws an exception.
uint64_t BBExecutionCount = 0;
for (const auto &BI : BB->branch_info())
if (BI.Count != BinaryBasicBlock::COUNT_NO_PROFILE)
BBExecutionCount += BI.Count;
// Ignore empty blocks and blocks that were not executed.
if (BB->getNumNonPseudos() == 0 || BBExecutionCount == 0)
continue;
// Count the number of calls by iterating through all instructions.
for (const auto &Instr : *BB) {
if (!BC.MIA->isCall(Instr))
continue;
Stats[DynoStats::FUNCTION_CALLS] += BBExecutionCount;
if (BC.MIA->getMemoryOperandNo(Instr) != -1) {
Stats[DynoStats::INDIRECT_CALLS] += BBExecutionCount;
} else if (const auto *CallSymbol = BC.MIA->getTargetSymbol(Instr)) {
if (BC.getFunctionForSymbol(CallSymbol))
continue;
auto GSI = BC.GlobalSymbols.find(CallSymbol->getName());
if (GSI == BC.GlobalSymbols.end())
continue;
auto Section = BC.getSectionForAddress(GSI->second);
if (!Section)
continue;
StringRef SectionName;
Section->getName(SectionName);
if (SectionName == ".plt") {
Stats[DynoStats::PLT_CALLS] += BBExecutionCount;
}
}
}
Stats[DynoStats::INSTRUCTIONS] += BB->getNumNonPseudos() * BBExecutionCount;
// Jump tables.
const auto *LastInstr = BB->getLastNonPseudo();
if (BC.MIA->getJumpTable(*LastInstr)) {
Stats[DynoStats::JUMP_TABLE_BRANCHES] += BBExecutionCount;
DEBUG(
static uint64_t MostFrequentJT;
if (BBExecutionCount > MostFrequentJT) {
MostFrequentJT = BBExecutionCount;
dbgs() << "BOLT-INFO: most frequently executed jump table is in "
<< "function " << *this << " in basic block " << BB->getName()
<< " executed totally " << BBExecutionCount << " times.\n";
}
);
continue;
}
// Update stats for branches.
const MCSymbol *TBB = nullptr;
const MCSymbol *FBB = nullptr;
MCInst *CondBranch = nullptr;
MCInst *UncondBranch = nullptr;
if (!BB->analyzeBranch(TBB, FBB, CondBranch, UncondBranch)) {
continue;
}
if (!CondBranch && !UncondBranch) {
continue;
}
// Simple unconditional branch.
if (!CondBranch) {
Stats[DynoStats::UNCOND_BRANCHES] += BBExecutionCount;
continue;
}
// Conditional branch that could be followed by an unconditional branch.
uint64_t TakenCount = BB->getBranchInfo(true).Count;
if (TakenCount == COUNT_NO_PROFILE)
TakenCount = 0;
uint64_t NonTakenCount = BB->getBranchInfo(false).Count;
if (NonTakenCount == COUNT_NO_PROFILE)
NonTakenCount = 0;
assert(TakenCount + NonTakenCount == BBExecutionCount &&
"internal calculation error");
if (isForwardBranch(BB, BB->getConditionalSuccessor(true))) {
Stats[DynoStats::FORWARD_COND_BRANCHES] += BBExecutionCount;
Stats[DynoStats::FORWARD_COND_BRANCHES_TAKEN] += TakenCount;
} else {
Stats[DynoStats::BACKWARD_COND_BRANCHES] += BBExecutionCount;
Stats[DynoStats::BACKWARD_COND_BRANCHES_TAKEN] += TakenCount;
}
if (UncondBranch) {
Stats[DynoStats::UNCOND_BRANCHES] += NonTakenCount;
}
}
return Stats;
}
void DynoStats::print(raw_ostream &OS, const DynoStats *Other) const {
auto printStatWithDelta = [&](const std::string &Name, uint64_t Stat,
uint64_t OtherStat) {
OS << format("%'20lld : ", Stat * opts::DynoStatsScale) << Name;
if (Other) {
OS << format(" (%+.1f%%)",
( (float) Stat - (float) OtherStat ) * 100.0 /
(float) (OtherStat + 1) );
}
OS << '\n';
};
for (auto Stat = DynoStats::FIRST_DYNO_STAT + 1;
Stat < DynoStats::LAST_DYNO_STAT;
++Stat) {
printStatWithDelta(Desc[Stat], Stats[Stat], Other ? (*Other)[Stat] : 0);
}
}
void DynoStats::operator+=(const DynoStats &Other) {
for (auto Stat = DynoStats::FIRST_DYNO_STAT + 1;
Stat < DynoStats::LAST_DYNO_STAT;
++Stat) {
Stats[Stat] += Other[Stat];
}
}
size_t Relocation::getSizeForType(uint64_t Type) {
switch (Type) {
default:
llvm_unreachable("unsupported relocation type");
case ELF::R_X86_64_PC8:
return 1;
case ELF::R_X86_64_PLT32:
case ELF::R_X86_64_PC32:
case ELF::R_X86_64_32S:
case ELF::R_X86_64_32:
case ELF::R_X86_64_GOTPCREL:
case ELF::R_X86_64_GOTTPOFF:
case ELF::R_X86_64_TPOFF32:
return 4;
case ELF::R_X86_64_PC64:
case ELF::R_X86_64_64:
return 8;
}
}
size_t Relocation::emitTo(MCStreamer *Streamer) {
const auto Size = getSizeForType(Type);
auto &Ctx = Streamer->getContext();
switch (Type) {
default:
llvm_unreachable("unsupported relocation type");
case ELF::R_X86_64_PC8:
case ELF::R_X86_64_PC32: {
auto *TempLabel = Ctx.createTempSymbol();
Streamer->EmitLabel(TempLabel);
auto Value =
MCBinaryExpr::createSub(MCSymbolRefExpr::create(Symbol, Ctx),
MCSymbolRefExpr::create(TempLabel, Ctx),
Ctx);
if (Addend) {
Value = MCBinaryExpr::createAdd(Value,
MCConstantExpr::create(Addend, Ctx),
Ctx);
}
Streamer->EmitValue(Value, Size);
break;
}
case ELF::R_X86_64_64:
case ELF::R_X86_64_32:
case ELF::R_X86_64_32S:
Streamer->EmitSymbolValue(Symbol, Size);
break;
}
return Size;
}
} // namespace bolt
} // namespace llvm