2015-10-09 17:21:14 -07:00
|
|
|
//===--- BinaryFunction.cpp - Interface for machine-level function --------===//
|
|
|
|
|
//
|
|
|
|
|
// The LLVM Compiler Infrastructure
|
|
|
|
|
//
|
|
|
|
|
// This file is distributed under the University of Illinois Open Source
|
|
|
|
|
// License. See LICENSE.TXT for details.
|
|
|
|
|
//
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
//
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
|
|
|
2015-10-14 15:35:14 -07:00
|
|
|
#include "BinaryBasicBlock.h"
|
|
|
|
|
#include "BinaryFunction.h"
|
|
|
|
|
#include "DataReader.h"
|
2017-02-16 14:57:57 -08:00
|
|
|
#include "Passes/ReorderAlgorithm.h"
|
2015-10-09 17:21:14 -07:00
|
|
|
#include "llvm/ADT/StringRef.h"
|
2016-02-25 16:57:07 -08:00
|
|
|
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
|
2015-10-09 17:21:14 -07:00
|
|
|
#include "llvm/MC/MCAsmInfo.h"
|
|
|
|
|
#include "llvm/MC/MCContext.h"
|
2015-10-09 21:47:18 -07:00
|
|
|
#include "llvm/MC/MCExpr.h"
|
2015-10-09 17:21:14 -07:00
|
|
|
#include "llvm/MC/MCInst.h"
|
|
|
|
|
#include "llvm/MC/MCInstPrinter.h"
|
2016-09-27 19:09:38 -07:00
|
|
|
#include "llvm/MC/MCSection.h"
|
|
|
|
|
#include "llvm/MC/MCSectionELF.h"
|
2016-09-14 16:45:40 -07:00
|
|
|
#include "llvm/MC/MCStreamer.h"
|
2015-10-09 17:21:14 -07:00
|
|
|
#include "llvm/Object/ObjectFile.h"
|
2016-01-21 14:18:30 -08:00
|
|
|
#include "llvm/Support/CommandLine.h"
|
2015-10-09 17:21:14 -07:00
|
|
|
#include "llvm/Support/Debug.h"
|
2016-07-01 08:40:56 -07:00
|
|
|
#include "llvm/Support/GraphWriter.h"
|
2015-10-09 17:21:14 -07:00
|
|
|
#include "llvm/Support/raw_ostream.h"
|
|
|
|
|
#include <limits>
|
2015-10-13 12:18:54 -07:00
|
|
|
#include <queue>
|
2015-10-09 17:21:14 -07:00
|
|
|
#include <string>
|
2016-06-09 11:36:55 -07:00
|
|
|
#include <functional>
|
2015-10-09 17:21:14 -07:00
|
|
|
|
|
|
|
|
#undef DEBUG_TYPE
|
2016-02-05 14:42:04 -08:00
|
|
|
#define DEBUG_TYPE "bolt"
|
2015-10-09 17:21:14 -07:00
|
|
|
|
2016-09-02 14:15:29 -07:00
|
|
|
using namespace llvm;
|
2016-09-16 15:54:32 -07:00
|
|
|
using namespace bolt;
|
2015-10-09 17:21:14 -07:00
|
|
|
|
2016-01-21 14:18:30 -08:00
|
|
|
namespace opts {
|
|
|
|
|
|
2017-03-28 14:40:20 -07:00
|
|
|
extern cl::OptionCategory BoltCategory;
|
|
|
|
|
extern cl::OptionCategory BoltOptCategory;
|
|
|
|
|
extern cl::OptionCategory BoltRelocCategory;
|
|
|
|
|
|
2016-09-27 19:09:38 -07:00
|
|
|
extern bool shouldProcess(const BinaryFunction &);
|
|
|
|
|
|
2016-08-29 21:11:22 -07:00
|
|
|
extern cl::opt<bool> PrintDynoStats;
|
2016-09-27 19:09:38 -07:00
|
|
|
extern cl::opt<bool> Relocs;
|
|
|
|
|
extern cl::opt<bool> UpdateDebugSections;
|
2017-03-08 19:58:33 -08:00
|
|
|
extern cl::opt<IndirectCallPromotionType> IndirectCallPromotion;
|
2016-09-27 19:09:38 -07:00
|
|
|
extern cl::opt<unsigned> Verbosity;
|
|
|
|
|
|
|
|
|
|
static cl::opt<bool>
|
|
|
|
|
AggressiveSplitting("split-all-cold",
|
2017-03-28 14:40:20 -07:00
|
|
|
cl::desc("outline as many cold basic blocks as possible"),
|
|
|
|
|
cl::ZeroOrMore,
|
|
|
|
|
cl::cat(BoltOptCategory));
|
2016-09-27 19:09:38 -07:00
|
|
|
|
|
|
|
|
static cl::opt<bool>
|
|
|
|
|
AlignBlocks("align-blocks",
|
2017-03-28 14:40:20 -07:00
|
|
|
cl::desc("try to align BBs inserting nops"),
|
|
|
|
|
cl::ZeroOrMore,
|
|
|
|
|
cl::cat(BoltOptCategory));
|
2016-09-02 14:15:29 -07:00
|
|
|
|
2016-09-27 19:09:38 -07:00
|
|
|
static cl::opt<bool>
|
|
|
|
|
DotToolTipCode("dot-tooltip-code",
|
2017-03-28 14:40:20 -07:00
|
|
|
cl::desc("add basic block instructions as tool tips on nodes"),
|
|
|
|
|
cl::ZeroOrMore,
|
|
|
|
|
cl::Hidden,
|
|
|
|
|
cl::cat(BoltCategory));
|
2016-09-27 19:09:38 -07:00
|
|
|
|
|
|
|
|
static cl::opt<uint32_t>
|
|
|
|
|
DynoStatsScale("dyno-stats-scale",
|
2017-03-28 14:40:20 -07:00
|
|
|
cl::desc("scale to be applied while reporting dyno stats"),
|
|
|
|
|
cl::Optional,
|
|
|
|
|
cl::init(1),
|
|
|
|
|
cl::Hidden,
|
|
|
|
|
cl::cat(BoltCategory));
|
2016-09-27 19:09:38 -07:00
|
|
|
|
2017-01-17 15:49:59 -08:00
|
|
|
cl::opt<JumpTableSupportLevel>
|
2016-09-14 16:45:40 -07:00
|
|
|
JumpTables("jump-tables",
|
2017-01-17 15:49:59 -08:00
|
|
|
cl::desc("jump tables support (default=basic)"),
|
|
|
|
|
cl::init(JTS_BASIC),
|
|
|
|
|
cl::values(
|
|
|
|
|
clEnumValN(JTS_NONE, "none",
|
|
|
|
|
"do not optimize functions with jump tables"),
|
|
|
|
|
clEnumValN(JTS_BASIC, "basic",
|
|
|
|
|
"optimize functions with jump tables"),
|
|
|
|
|
clEnumValN(JTS_MOVE, "move",
|
|
|
|
|
"move jump tables to a separate section"),
|
|
|
|
|
clEnumValN(JTS_SPLIT, "split",
|
|
|
|
|
"split jump tables section into hot and cold based on "
|
|
|
|
|
"function execution frequency"),
|
|
|
|
|
clEnumValN(JTS_AGGRESSIVE, "aggressive",
|
|
|
|
|
"aggressively split jump tables section based on usage "
|
|
|
|
|
"of the tables"),
|
|
|
|
|
clEnumValEnd),
|
2017-03-28 14:40:20 -07:00
|
|
|
cl::ZeroOrMore,
|
|
|
|
|
cl::cat(BoltOptCategory));
|
2016-09-14 16:45:40 -07:00
|
|
|
|
2016-09-16 15:54:32 -07:00
|
|
|
static cl::opt<bool>
|
|
|
|
|
PrintJumpTables("print-jump-tables",
|
2017-03-28 14:40:20 -07:00
|
|
|
cl::desc("print jump tables"),
|
|
|
|
|
cl::ZeroOrMore,
|
|
|
|
|
cl::Hidden,
|
|
|
|
|
cl::cat(BoltCategory));
|
2016-09-16 15:54:32 -07:00
|
|
|
|
2017-03-17 19:05:11 -07:00
|
|
|
static cl::list<std::string>
|
|
|
|
|
PrintOnly("print-only",
|
|
|
|
|
cl::CommaSeparated,
|
|
|
|
|
cl::desc("list of functions to print"),
|
|
|
|
|
cl::value_desc("func1,func2,func3,..."),
|
2017-03-28 14:40:20 -07:00
|
|
|
cl::Hidden,
|
|
|
|
|
cl::cat(BoltCategory));
|
2017-03-17 19:05:11 -07:00
|
|
|
|
2016-04-20 15:31:11 -07:00
|
|
|
static cl::opt<bool>
|
2016-09-27 19:09:38 -07:00
|
|
|
SplitEH("split-eh",
|
2017-03-28 14:40:20 -07:00
|
|
|
cl::desc("split C++ exception handling code (experimental)"),
|
|
|
|
|
cl::ZeroOrMore,
|
|
|
|
|
cl::Hidden,
|
|
|
|
|
cl::cat(BoltOptCategory));
|
2016-08-29 21:11:22 -07:00
|
|
|
|
2017-03-17 19:05:11 -07:00
|
|
|
bool shouldPrint(const BinaryFunction &Function) {
|
|
|
|
|
if (PrintOnly.empty())
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
|
|
for (auto &Name : opts::PrintOnly) {
|
|
|
|
|
if (Function.hasName(Name)) {
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
2016-01-21 14:18:30 -08:00
|
|
|
} // namespace opts
|
|
|
|
|
|
2016-09-02 14:15:29 -07:00
|
|
|
namespace llvm {
|
|
|
|
|
namespace bolt {
|
|
|
|
|
|
2016-08-22 14:24:09 -07:00
|
|
|
// Temporary constant.
|
|
|
|
|
//
|
|
|
|
|
// TODO: move to architecture-specific file together with the code that is
|
|
|
|
|
// using it.
|
|
|
|
|
constexpr unsigned NoRegister = 0;
|
|
|
|
|
|
2016-08-29 21:11:22 -07:00
|
|
|
constexpr const char *DynoStats::Desc[];
|
2017-05-19 14:45:46 -07:00
|
|
|
constexpr unsigned BinaryFunction::MinAlign;
|
2017-05-01 16:52:54 -07:00
|
|
|
|
2016-02-25 16:57:07 -08:00
|
|
|
namespace {
|
|
|
|
|
|
2016-05-27 20:19:19 -07:00
|
|
|
/// Gets debug line information for the instruction located at the given
|
|
|
|
|
/// address in the original binary. The SMLoc's pointer is used
|
|
|
|
|
/// to point to this information, which is represented by a
|
|
|
|
|
/// DebugLineTableRowRef. The returned pointer is null if no debug line
|
|
|
|
|
/// information for this instruction was found.
|
|
|
|
|
SMLoc findDebugLineInformationForInstructionAt(
|
|
|
|
|
uint64_t Address,
|
|
|
|
|
DWARFUnitLineTable &ULT) {
|
|
|
|
|
// We use the pointer in SMLoc to store an instance of DebugLineTableRowRef,
|
|
|
|
|
// which occupies 64 bits. Thus, we can only proceed if the struct fits into
|
|
|
|
|
// the pointer itself.
|
|
|
|
|
assert(
|
|
|
|
|
sizeof(decltype(SMLoc().getPointer())) >= sizeof(DebugLineTableRowRef) &&
|
|
|
|
|
"Cannot fit instruction debug line information into SMLoc's pointer");
|
2016-02-25 16:57:07 -08:00
|
|
|
|
2016-05-27 20:19:19 -07:00
|
|
|
SMLoc NullResult = DebugLineTableRowRef::NULL_ROW.toSMLoc();
|
2016-02-25 16:57:07 -08:00
|
|
|
|
2016-05-27 20:19:19 -07:00
|
|
|
auto &LineTable = ULT.second;
|
|
|
|
|
if (!LineTable)
|
|
|
|
|
return NullResult;
|
|
|
|
|
|
|
|
|
|
uint32_t RowIndex = LineTable->lookupAddress(Address);
|
|
|
|
|
if (RowIndex == LineTable->UnknownRowIndex)
|
|
|
|
|
return NullResult;
|
|
|
|
|
|
|
|
|
|
assert(RowIndex < LineTable->Rows.size() &&
|
|
|
|
|
"Line Table lookup returned invalid index.");
|
|
|
|
|
|
|
|
|
|
decltype(SMLoc().getPointer()) Ptr;
|
|
|
|
|
DebugLineTableRowRef *InstructionLocation =
|
|
|
|
|
reinterpret_cast<DebugLineTableRowRef *>(&Ptr);
|
|
|
|
|
|
|
|
|
|
InstructionLocation->DwCompileUnitIndex = ULT.first->getOffset();
|
|
|
|
|
InstructionLocation->RowIndex = RowIndex + 1;
|
|
|
|
|
|
|
|
|
|
return SMLoc::getFromPointer(Ptr);
|
2016-02-25 16:57:07 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
} // namespace
|
|
|
|
|
|
2016-09-20 20:55:49 -07:00
|
|
|
bool DynoStats::operator<(const DynoStats &Other) const {
|
|
|
|
|
return std::lexicographical_compare(
|
|
|
|
|
&Stats[FIRST_DYNO_STAT], &Stats[LAST_DYNO_STAT],
|
|
|
|
|
&Other.Stats[FIRST_DYNO_STAT], &Other.Stats[LAST_DYNO_STAT]
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
2017-03-14 09:03:23 -07:00
|
|
|
bool DynoStats::operator==(const DynoStats &Other) const {
|
|
|
|
|
return std::equal(
|
|
|
|
|
&Stats[FIRST_DYNO_STAT], &Stats[LAST_DYNO_STAT],
|
|
|
|
|
&Other.Stats[FIRST_DYNO_STAT]
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
2016-09-20 20:55:49 -07:00
|
|
|
bool DynoStats::lessThan(const DynoStats &Other,
|
|
|
|
|
ArrayRef<Category> Keys) const {
|
|
|
|
|
return std::lexicographical_compare(
|
|
|
|
|
Keys.begin(), Keys.end(),
|
|
|
|
|
Keys.begin(), Keys.end(),
|
|
|
|
|
[this,&Other](const Category A, const Category) {
|
|
|
|
|
return Stats[A] < Other.Stats[A];
|
|
|
|
|
}
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
2015-12-18 17:00:46 -08:00
|
|
|
uint64_t BinaryFunction::Count = 0;
|
|
|
|
|
|
2015-10-12 12:12:16 -07:00
|
|
|
BinaryBasicBlock *
|
|
|
|
|
BinaryFunction::getBasicBlockContainingOffset(uint64_t Offset) {
|
|
|
|
|
if (Offset > Size)
|
|
|
|
|
return nullptr;
|
|
|
|
|
|
2016-09-07 18:59:23 -07:00
|
|
|
if (BasicBlockOffsets.empty())
|
2015-10-12 12:12:16 -07:00
|
|
|
return nullptr;
|
|
|
|
|
|
2016-09-07 18:59:23 -07:00
|
|
|
/*
|
|
|
|
|
* This is commented out because it makes BOLT too slow.
|
|
|
|
|
* assert(std::is_sorted(BasicBlockOffsets.begin(),
|
|
|
|
|
* BasicBlockOffsets.end(),
|
|
|
|
|
* CompareBasicBlockOffsets())));
|
|
|
|
|
*/
|
|
|
|
|
auto I = std::upper_bound(BasicBlockOffsets.begin(),
|
|
|
|
|
BasicBlockOffsets.end(),
|
|
|
|
|
BasicBlockOffset(Offset, nullptr),
|
|
|
|
|
CompareBasicBlockOffsets());
|
|
|
|
|
assert(I != BasicBlockOffsets.begin() && "first basic block not at offset 0");
|
|
|
|
|
--I;
|
2017-05-16 09:27:34 -07:00
|
|
|
auto *BB = I->second;
|
|
|
|
|
return (Offset < BB->getOffset() + BB->getOriginalSize()) ? BB : nullptr;
|
2016-03-28 17:45:22 -07:00
|
|
|
}
|
|
|
|
|
|
2016-09-07 18:59:23 -07:00
|
|
|
void BinaryFunction::markUnreachable() {
|
|
|
|
|
std::stack<BinaryBasicBlock *> Stack;
|
|
|
|
|
|
|
|
|
|
for (auto *BB : layout()) {
|
|
|
|
|
BB->markValid(false);
|
|
|
|
|
}
|
|
|
|
|
|
2016-09-29 11:19:06 -07:00
|
|
|
// Add all entries and landing pads as roots.
|
2016-09-07 18:59:23 -07:00
|
|
|
for (auto *BB : BasicBlocks) {
|
2016-09-29 11:19:06 -07:00
|
|
|
if (BB->isEntryPoint() || BB->isLandingPad()) {
|
2016-09-07 18:59:23 -07:00
|
|
|
Stack.push(BB);
|
|
|
|
|
BB->markValid(true);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Determine reachable BBs from the entry point
|
|
|
|
|
while (!Stack.empty()) {
|
|
|
|
|
auto BB = Stack.top();
|
|
|
|
|
Stack.pop();
|
|
|
|
|
for (auto Succ : BB->successors()) {
|
|
|
|
|
if (Succ->isValid())
|
|
|
|
|
continue;
|
|
|
|
|
Succ->markValid(true);
|
|
|
|
|
Stack.push(Succ);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Any unnecessary fallthrough jumps revealed after calling eraseInvalidBBs
|
|
|
|
|
// will be cleaned up by fixBranches().
|
|
|
|
|
std::pair<unsigned, uint64_t> BinaryFunction::eraseInvalidBBs() {
|
2015-10-20 12:47:37 -07:00
|
|
|
BasicBlockOrderType NewLayout;
|
|
|
|
|
unsigned Count = 0;
|
2016-09-07 18:59:23 -07:00
|
|
|
uint64_t Bytes = 0;
|
2016-09-29 11:19:06 -07:00
|
|
|
for (auto *BB : layout()) {
|
|
|
|
|
assert((!BB->isEntryPoint() || BB->isValid()) &&
|
|
|
|
|
"all entry blocks must be valid");
|
|
|
|
|
if (BB->isValid()) {
|
|
|
|
|
NewLayout.push_back(BB);
|
2016-09-07 18:59:23 -07:00
|
|
|
} else {
|
2015-10-20 12:47:37 -07:00
|
|
|
++Count;
|
2016-09-29 11:19:06 -07:00
|
|
|
Bytes += BC.computeCodeSize(BB->begin(), BB->end());
|
2016-09-07 18:59:23 -07:00
|
|
|
}
|
2015-10-20 12:47:37 -07:00
|
|
|
}
|
|
|
|
|
BasicBlocksLayout = std::move(NewLayout);
|
2016-09-07 18:59:23 -07:00
|
|
|
|
|
|
|
|
BasicBlockListType NewBasicBlocks;
|
|
|
|
|
for (auto I = BasicBlocks.begin(), E = BasicBlocks.end(); I != E; ++I) {
|
|
|
|
|
if ((*I)->isValid()) {
|
|
|
|
|
NewBasicBlocks.push_back(*I);
|
|
|
|
|
} else {
|
|
|
|
|
DeletedBasicBlocks.push_back(*I);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
BasicBlocks = std::move(NewBasicBlocks);
|
|
|
|
|
|
|
|
|
|
assert(BasicBlocks.size() == BasicBlocksLayout.size());
|
|
|
|
|
|
|
|
|
|
// Update CFG state if needed
|
|
|
|
|
if (Count > 0) {
|
|
|
|
|
updateBBIndices(0);
|
|
|
|
|
recomputeLandingPads(0, BasicBlocks.size());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return std::make_pair(Count, Bytes);
|
2015-10-20 12:47:37 -07:00
|
|
|
}
|
|
|
|
|
|
2016-09-22 18:08:20 -07:00
|
|
|
bool BinaryFunction::isForwardCall(const MCSymbol *CalleeSymbol) const {
|
2017-03-20 22:44:25 -07:00
|
|
|
// This function should work properly before and after function reordering.
|
|
|
|
|
// In order to accomplish this, we use the function index (if it is valid).
|
|
|
|
|
// If the function indices are not valid, we fall back to the original
|
|
|
|
|
// addresses. This should be ok because the functions without valid indices
|
|
|
|
|
// should have been ordered with a stable sort.
|
2016-09-22 18:08:20 -07:00
|
|
|
const auto *CalleeBF = BC.getFunctionForSymbol(CalleeSymbol);
|
|
|
|
|
if (CalleeBF) {
|
2017-03-20 22:44:25 -07:00
|
|
|
if (hasValidIndex() && CalleeBF->hasValidIndex()) {
|
|
|
|
|
return getIndex() < CalleeBF->getIndex();
|
|
|
|
|
} else if (hasValidIndex() && !CalleeBF->hasValidIndex()) {
|
|
|
|
|
return true;
|
|
|
|
|
} else if (!hasValidIndex() && CalleeBF->hasValidIndex()) {
|
|
|
|
|
return false;
|
|
|
|
|
} else {
|
|
|
|
|
return getAddress() < CalleeBF->getAddress();
|
|
|
|
|
}
|
2016-09-22 18:08:20 -07:00
|
|
|
} else {
|
|
|
|
|
// Absolute symbol.
|
|
|
|
|
auto const CalleeSI = BC.GlobalSymbols.find(CalleeSymbol->getName());
|
|
|
|
|
assert(CalleeSI != BC.GlobalSymbols.end() && "unregistered symbol found");
|
|
|
|
|
return CalleeSI->second > getAddress();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2017-03-08 19:58:33 -08:00
|
|
|
void BinaryFunction::dump(bool PrintInstructions) const {
|
|
|
|
|
print(dbgs(), "", PrintInstructions);
|
2016-07-23 08:01:53 -07:00
|
|
|
}
|
|
|
|
|
|
2015-10-23 15:52:59 -07:00
|
|
|
void BinaryFunction::print(raw_ostream &OS, std::string Annotation,
|
|
|
|
|
bool PrintInstructions) const {
|
2016-09-27 19:09:38 -07:00
|
|
|
// FIXME: remove after #15075512 is done.
|
2017-03-17 19:05:11 -07:00
|
|
|
if (!opts::shouldProcess(*this) || !opts::shouldPrint(*this))
|
2016-09-27 19:09:38 -07:00
|
|
|
return;
|
|
|
|
|
|
2015-10-09 17:21:14 -07:00
|
|
|
StringRef SectionName;
|
|
|
|
|
Section.getName(SectionName);
|
2016-08-07 12:35:23 -07:00
|
|
|
OS << "Binary Function \"" << *this << "\" " << Annotation << " {";
|
2016-06-10 17:13:05 -07:00
|
|
|
if (Names.size() > 1) {
|
|
|
|
|
OS << "\n Other names : ";
|
|
|
|
|
auto Sep = "";
|
|
|
|
|
for (unsigned i = 0; i < Names.size() - 1; ++i) {
|
|
|
|
|
OS << Sep << Names[i];
|
|
|
|
|
Sep = "\n ";
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
OS << "\n Number : " << FunctionNumber
|
2015-10-09 17:21:14 -07:00
|
|
|
<< "\n State : " << CurrentState
|
|
|
|
|
<< "\n Address : 0x" << Twine::utohexstr(Address)
|
|
|
|
|
<< "\n Size : 0x" << Twine::utohexstr(Size)
|
|
|
|
|
<< "\n MaxSize : 0x" << Twine::utohexstr(MaxSize)
|
|
|
|
|
<< "\n Offset : 0x" << Twine::utohexstr(FileOffset)
|
|
|
|
|
<< "\n Section : " << SectionName
|
|
|
|
|
<< "\n Orc Section : " << getCodeSectionName()
|
2015-12-16 17:56:49 -08:00
|
|
|
<< "\n LSDA : 0x" << Twine::utohexstr(getLSDAAddress())
|
2015-10-09 17:21:14 -07:00
|
|
|
<< "\n IsSimple : " << IsSimple
|
2015-11-19 17:59:41 -08:00
|
|
|
<< "\n IsSplit : " << IsSplit
|
2015-11-08 12:23:54 -08:00
|
|
|
<< "\n BB Count : " << BasicBlocksLayout.size();
|
2016-12-21 17:13:56 -08:00
|
|
|
|
2017-02-24 21:59:33 -08:00
|
|
|
if (hasCFG()) {
|
2016-12-21 17:13:56 -08:00
|
|
|
OS << "\n Hash : " << Twine::utohexstr(hash());
|
|
|
|
|
}
|
2015-11-08 12:23:54 -08:00
|
|
|
if (FrameInstructions.size()) {
|
|
|
|
|
OS << "\n CFI Instrs : " << FrameInstructions.size();
|
|
|
|
|
}
|
2015-10-23 15:52:59 -07:00
|
|
|
if (BasicBlocksLayout.size()) {
|
|
|
|
|
OS << "\n BB Layout : ";
|
|
|
|
|
auto Sep = "";
|
|
|
|
|
for (auto BB : BasicBlocksLayout) {
|
|
|
|
|
OS << Sep << BB->getName();
|
|
|
|
|
Sep = ", ";
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (ImageAddress)
|
|
|
|
|
OS << "\n Image : 0x" << Twine::utohexstr(ImageAddress);
|
2016-06-15 18:36:16 -07:00
|
|
|
if (ExecutionCount != COUNT_NO_PROFILE) {
|
2015-10-12 12:30:47 -07:00
|
|
|
OS << "\n Exec Count : " << ExecutionCount;
|
2016-06-15 18:36:16 -07:00
|
|
|
OS << "\n Profile Acc : " << format("%.1f%%", ProfileMatchRatio * 100.0f);
|
|
|
|
|
}
|
2015-10-23 15:52:59 -07:00
|
|
|
|
2016-08-29 21:11:22 -07:00
|
|
|
if (opts::PrintDynoStats && !BasicBlocksLayout.empty()) {
|
2016-09-16 15:54:32 -07:00
|
|
|
OS << '\n';
|
2016-08-29 21:11:22 -07:00
|
|
|
DynoStats dynoStats = getDynoStats();
|
|
|
|
|
OS << dynoStats;
|
|
|
|
|
}
|
|
|
|
|
|
2015-10-12 12:30:47 -07:00
|
|
|
OS << "\n}\n";
|
2015-10-09 17:21:14 -07:00
|
|
|
|
|
|
|
|
if (!PrintInstructions || !BC.InstPrinter)
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
// Offset of the instruction in function.
|
|
|
|
|
uint64_t Offset{0};
|
|
|
|
|
|
|
|
|
|
if (BasicBlocks.empty() && !Instructions.empty()) {
|
|
|
|
|
// Print before CFG was built.
|
|
|
|
|
for (const auto &II : Instructions) {
|
2016-06-15 18:36:16 -07:00
|
|
|
Offset = II.first;
|
2015-10-09 17:21:14 -07:00
|
|
|
|
|
|
|
|
// Print label if exists at this offset.
|
|
|
|
|
auto LI = Labels.find(Offset);
|
|
|
|
|
if (LI != Labels.end())
|
|
|
|
|
OS << LI->second->getName() << ":\n";
|
|
|
|
|
|
2016-07-23 08:01:53 -07:00
|
|
|
BC.printInstruction(OS, II.second, Offset, this);
|
2015-10-09 17:21:14 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2015-11-19 17:59:41 -08:00
|
|
|
for (uint32_t I = 0, E = BasicBlocksLayout.size(); I != E; ++I) {
|
|
|
|
|
auto BB = BasicBlocksLayout[I];
|
|
|
|
|
if (I != 0 &&
|
2016-09-13 17:12:00 -07:00
|
|
|
BB->isCold() != BasicBlocksLayout[I - 1]->isCold())
|
2015-11-19 17:59:41 -08:00
|
|
|
OS << "------- HOT-COLD SPLIT POINT -------\n\n";
|
|
|
|
|
|
2015-10-16 09:49:04 -07:00
|
|
|
OS << BB->getName() << " ("
|
2016-12-21 17:13:56 -08:00
|
|
|
<< BB->size() << " instructions, align : " << BB->getAlignment()
|
|
|
|
|
<< ")\n";
|
2015-10-09 17:21:14 -07:00
|
|
|
|
2016-09-29 11:19:06 -07:00
|
|
|
if (BB->isEntryPoint())
|
|
|
|
|
OS << " Entry Point\n";
|
|
|
|
|
|
|
|
|
|
if (BB->isLandingPad())
|
2015-11-12 18:56:58 -08:00
|
|
|
OS << " Landing Pad\n";
|
|
|
|
|
|
2015-10-16 09:49:04 -07:00
|
|
|
uint64_t BBExecCount = BB->getExecutionCount();
|
2016-12-21 17:13:56 -08:00
|
|
|
if (hasValidProfile()) {
|
2015-10-13 10:25:45 -07:00
|
|
|
OS << " Exec Count : " << BBExecCount << "\n";
|
|
|
|
|
}
|
2017-02-24 21:59:33 -08:00
|
|
|
if (BB->getCFIState() >= 0) {
|
|
|
|
|
OS << " CFI State : " << BB->getCFIState() << '\n';
|
2016-01-16 14:58:22 -08:00
|
|
|
}
|
2016-09-13 17:12:00 -07:00
|
|
|
if (!BB->pred_empty()) {
|
2015-10-09 17:21:14 -07:00
|
|
|
OS << " Predecessors: ";
|
|
|
|
|
auto Sep = "";
|
2016-09-13 17:12:00 -07:00
|
|
|
for (auto Pred : BB->predecessors()) {
|
2015-10-09 17:21:14 -07:00
|
|
|
OS << Sep << Pred->getName();
|
|
|
|
|
Sep = ", ";
|
|
|
|
|
}
|
|
|
|
|
OS << '\n';
|
|
|
|
|
}
|
2016-09-13 17:12:00 -07:00
|
|
|
if (!BB->throw_empty()) {
|
2016-05-26 15:10:09 -07:00
|
|
|
OS << " Throwers: ";
|
|
|
|
|
auto Sep = "";
|
2016-09-13 17:12:00 -07:00
|
|
|
for (auto Throw : BB->throwers()) {
|
2016-05-26 15:10:09 -07:00
|
|
|
OS << Sep << Throw->getName();
|
|
|
|
|
Sep = ", ";
|
|
|
|
|
}
|
|
|
|
|
OS << '\n';
|
|
|
|
|
}
|
2015-10-09 17:21:14 -07:00
|
|
|
|
2015-10-16 09:49:04 -07:00
|
|
|
Offset = RoundUpToAlignment(Offset, BB->getAlignment());
|
2015-10-09 17:21:14 -07:00
|
|
|
|
2016-07-23 08:01:53 -07:00
|
|
|
// Note: offsets are imprecise since this is happening prior to relaxation.
|
|
|
|
|
Offset = BC.printInstructions(OS, BB->begin(), BB->end(), Offset, this);
|
2015-10-09 17:21:14 -07:00
|
|
|
|
2016-09-13 17:12:00 -07:00
|
|
|
if (!BB->succ_empty()) {
|
2015-10-09 17:21:14 -07:00
|
|
|
OS << " Successors: ";
|
2016-09-13 17:12:00 -07:00
|
|
|
auto BI = BB->branch_info_begin();
|
2015-10-09 17:21:14 -07:00
|
|
|
auto Sep = "";
|
2016-09-13 17:12:00 -07:00
|
|
|
for (auto Succ : BB->successors()) {
|
|
|
|
|
assert(BI != BB->branch_info_end() && "missing BranchInfo entry");
|
2015-10-13 10:25:45 -07:00
|
|
|
OS << Sep << Succ->getName();
|
|
|
|
|
if (ExecutionCount != COUNT_NO_PROFILE &&
|
2016-12-21 17:13:56 -08:00
|
|
|
BI->MispredictedCount != BinaryBasicBlock::COUNT_INFERRED) {
|
2015-10-13 10:25:45 -07:00
|
|
|
OS << " (mispreds: " << BI->MispredictedCount
|
|
|
|
|
<< ", count: " << BI->Count << ")";
|
|
|
|
|
} else if (ExecutionCount != COUNT_NO_PROFILE &&
|
2016-12-21 17:13:56 -08:00
|
|
|
BI->Count != BinaryBasicBlock::COUNT_NO_PROFILE) {
|
2015-10-13 10:25:45 -07:00
|
|
|
OS << " (inferred count: " << BI->Count << ")";
|
|
|
|
|
}
|
2015-10-09 17:21:14 -07:00
|
|
|
Sep = ", ";
|
2015-10-12 12:30:47 -07:00
|
|
|
++BI;
|
2015-10-09 17:21:14 -07:00
|
|
|
}
|
|
|
|
|
OS << '\n';
|
|
|
|
|
}
|
|
|
|
|
|
2016-09-13 17:12:00 -07:00
|
|
|
if (!BB->lp_empty()) {
|
2016-05-26 15:10:09 -07:00
|
|
|
OS << " Landing Pads: ";
|
|
|
|
|
auto Sep = "";
|
2016-09-13 17:12:00 -07:00
|
|
|
for (auto LP : BB->landing_pads()) {
|
2016-05-26 15:10:09 -07:00
|
|
|
OS << Sep << LP->getName();
|
|
|
|
|
if (ExecutionCount != COUNT_NO_PROFILE) {
|
2016-09-13 17:12:00 -07:00
|
|
|
OS << " (count: " << LP->getExecutionCount() << ")";
|
2016-05-26 15:10:09 -07:00
|
|
|
}
|
|
|
|
|
Sep = ", ";
|
|
|
|
|
}
|
|
|
|
|
OS << '\n';
|
|
|
|
|
}
|
|
|
|
|
|
2017-02-24 21:59:33 -08:00
|
|
|
// In CFG_Finalized state we can miscalculate CFI state at exit.
|
|
|
|
|
if (CurrentState == State::CFG) {
|
|
|
|
|
const auto CFIStateAtExit = BB->getCFIStateAtExit();
|
|
|
|
|
if (CFIStateAtExit >= 0)
|
|
|
|
|
OS << " CFI State: " << CFIStateAtExit << '\n';
|
|
|
|
|
}
|
|
|
|
|
|
2015-10-09 17:21:14 -07:00
|
|
|
OS << '\n';
|
|
|
|
|
}
|
|
|
|
|
|
2015-11-13 14:18:45 -08:00
|
|
|
// Dump new exception ranges for the function.
|
|
|
|
|
if (!CallSites.empty()) {
|
|
|
|
|
OS << "EH table:\n";
|
|
|
|
|
for (auto &CSI : CallSites) {
|
|
|
|
|
OS << " [" << *CSI.Start << ", " << *CSI.End << ") landing pad : ";
|
|
|
|
|
if (CSI.LP)
|
|
|
|
|
OS << *CSI.LP;
|
|
|
|
|
else
|
|
|
|
|
OS << "0";
|
|
|
|
|
OS << ", action : " << CSI.Action << '\n';
|
|
|
|
|
}
|
|
|
|
|
OS << '\n';
|
2015-11-04 16:48:47 -08:00
|
|
|
}
|
|
|
|
|
|
2016-09-16 15:54:32 -07:00
|
|
|
// Print all jump tables.
|
|
|
|
|
for (auto &JTI : JumpTables) {
|
|
|
|
|
JTI.second.print(OS);
|
2016-09-14 16:45:40 -07:00
|
|
|
}
|
|
|
|
|
|
2015-11-08 12:23:54 -08:00
|
|
|
OS << "DWARF CFI Instructions:\n";
|
|
|
|
|
if (OffsetToCFI.size()) {
|
|
|
|
|
// Pre-buildCFG information
|
|
|
|
|
for (auto &Elmt : OffsetToCFI) {
|
|
|
|
|
OS << format(" %08x:\t", Elmt.first);
|
|
|
|
|
assert(Elmt.second < FrameInstructions.size() && "Incorrect CFI offset");
|
2017-05-01 16:52:54 -07:00
|
|
|
BinaryContext::printCFI(OS, FrameInstructions[Elmt.second]);
|
2015-11-08 12:23:54 -08:00
|
|
|
OS << "\n";
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
// Post-buildCFG information
|
|
|
|
|
for (uint32_t I = 0, E = FrameInstructions.size(); I != E; ++I) {
|
|
|
|
|
const MCCFIInstruction &CFI = FrameInstructions[I];
|
|
|
|
|
OS << format(" %d:\t", I);
|
2017-05-01 16:52:54 -07:00
|
|
|
BinaryContext::printCFI(OS, CFI);
|
2015-11-08 12:23:54 -08:00
|
|
|
OS << "\n";
|
2015-11-04 16:48:47 -08:00
|
|
|
}
|
|
|
|
|
}
|
2015-11-08 12:23:54 -08:00
|
|
|
if (FrameInstructions.empty())
|
|
|
|
|
OS << " <empty>\n";
|
2015-11-04 16:48:47 -08:00
|
|
|
|
2016-08-07 12:35:23 -07:00
|
|
|
OS << "End of Function \"" << *this << "\"\n\n";
|
2015-10-09 17:21:14 -07:00
|
|
|
}
|
|
|
|
|
|
2016-09-27 19:09:38 -07:00
|
|
|
BinaryFunction::IndirectBranchType
|
|
|
|
|
BinaryFunction::analyzeIndirectBranch(MCInst &Instruction,
|
|
|
|
|
unsigned Size,
|
|
|
|
|
uint64_t Offset) {
|
|
|
|
|
auto &MIA = BC.MIA;
|
|
|
|
|
|
|
|
|
|
IndirectBranchType Type = IndirectBranchType::UNKNOWN;
|
|
|
|
|
|
|
|
|
|
// An instruction referencing memory used by jump instruction (directly or
|
|
|
|
|
// via register). This location could be an array of function pointers
|
|
|
|
|
// in case of indirect tail call, or a jump table.
|
|
|
|
|
MCInst *MemLocInstr = nullptr;
|
|
|
|
|
|
|
|
|
|
// Address of the table referenced by MemLocInstr. Could be either an
|
|
|
|
|
// array of function pointers, or a jump table.
|
|
|
|
|
uint64_t ArrayStart = 0;
|
|
|
|
|
|
|
|
|
|
auto analyzePICJumpTable =
|
|
|
|
|
[&](InstrMapType::reverse_iterator II,
|
|
|
|
|
InstrMapType::reverse_iterator IE,
|
|
|
|
|
unsigned R1,
|
|
|
|
|
unsigned R2) {
|
|
|
|
|
// Analyze PIC-style jump table code template:
|
|
|
|
|
//
|
|
|
|
|
// lea PIC_JUMP_TABLE(%rip), {%r1|%r2} <- MemLocInstr
|
|
|
|
|
// mov ({%r1|%r2}, %index, 4), {%r2|%r1}
|
|
|
|
|
// add %r2, %r1
|
|
|
|
|
// jmp *%r1
|
|
|
|
|
//
|
|
|
|
|
// (with any irrelevant instructions in-between)
|
|
|
|
|
//
|
2016-09-29 11:19:06 -07:00
|
|
|
// When we call this helper we've already determined %r1 and %r2, and
|
|
|
|
|
// reverse instruction iterator \p II is pointing to the ADD instruction.
|
2016-09-27 19:09:38 -07:00
|
|
|
//
|
|
|
|
|
// PIC jump table looks like following:
|
|
|
|
|
//
|
|
|
|
|
// JT: ----------
|
|
|
|
|
// E1:| L1 - JT |
|
|
|
|
|
// |----------|
|
|
|
|
|
// E2:| L2 - JT |
|
|
|
|
|
// |----------|
|
|
|
|
|
// | |
|
|
|
|
|
// ......
|
|
|
|
|
// En:| Ln - JT |
|
|
|
|
|
// ----------
|
|
|
|
|
//
|
|
|
|
|
// Where L1, L2, ..., Ln represent labels in the function.
|
|
|
|
|
//
|
|
|
|
|
// The actual relocations in the table will be of the form:
|
|
|
|
|
//
|
|
|
|
|
// Ln - JT
|
|
|
|
|
// = (Ln - En) + (En - JT)
|
|
|
|
|
// = R_X86_64_PC32(Ln) + En - JT
|
|
|
|
|
// = R_X86_64_PC32(Ln + offsetof(En))
|
|
|
|
|
//
|
|
|
|
|
DEBUG(dbgs() << "BOLT-DEBUG: checking for PIC jump table\n");
|
|
|
|
|
MCInst *MovInstr = nullptr;
|
|
|
|
|
while (++II != IE) {
|
|
|
|
|
auto &Instr = II->second;
|
|
|
|
|
const auto &InstrDesc = BC.MII->get(Instr.getOpcode());
|
|
|
|
|
if (!InstrDesc.hasDefOfPhysReg(Instr, R1, *BC.MRI) &&
|
|
|
|
|
!InstrDesc.hasDefOfPhysReg(Instr, R2, *BC.MRI)) {
|
|
|
|
|
// Ignore instructions that don't affect R1, R2 registers.
|
|
|
|
|
continue;
|
|
|
|
|
} else if (!MovInstr) {
|
|
|
|
|
// Expect to see MOV instruction.
|
|
|
|
|
if (!MIA->isMOVSX64rm32(Instr)) {
|
|
|
|
|
DEBUG(dbgs() << "BOLT-DEBUG: MOV instruction expected.\n");
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Check if it's setting %r1 or %r2. In canonical form it sets %r2.
|
|
|
|
|
// If it sets %r1 - rename the registers so we have to only check
|
|
|
|
|
// a single form.
|
|
|
|
|
auto MovDestReg = Instr.getOperand(0).getReg();
|
|
|
|
|
if (MovDestReg != R2)
|
|
|
|
|
std::swap(R1, R2);
|
|
|
|
|
if (MovDestReg != R2) {
|
|
|
|
|
DEBUG(dbgs() << "BOLT-DEBUG: MOV instruction expected to set %r2\n");
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Verify operands for MOV.
|
|
|
|
|
unsigned BaseRegNum;
|
|
|
|
|
int64_t ScaleValue;
|
|
|
|
|
unsigned IndexRegNum;
|
|
|
|
|
int64_t DispValue;
|
|
|
|
|
unsigned SegRegNum;
|
|
|
|
|
if (!MIA->evaluateX86MemoryOperand(Instr, &BaseRegNum,
|
|
|
|
|
&ScaleValue, &IndexRegNum,
|
|
|
|
|
&DispValue, &SegRegNum))
|
|
|
|
|
break;
|
|
|
|
|
if (BaseRegNum != R1 ||
|
|
|
|
|
ScaleValue != 4 ||
|
|
|
|
|
IndexRegNum == bolt::NoRegister ||
|
|
|
|
|
DispValue != 0 ||
|
|
|
|
|
SegRegNum != bolt::NoRegister)
|
|
|
|
|
break;
|
|
|
|
|
MovInstr = &Instr;
|
|
|
|
|
} else {
|
|
|
|
|
assert(MovInstr && "MOV instruction expected to be set");
|
2016-09-27 19:09:38 -07:00
|
|
|
if (!InstrDesc.hasDefOfPhysReg(Instr, R1, *BC.MRI))
|
|
|
|
|
continue;
|
2016-09-27 19:09:38 -07:00
|
|
|
if (!MIA->isLEA64r(Instr)) {
|
|
|
|
|
DEBUG(dbgs() << "BOLT-DEBUG: LEA instruction expected\n");
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
if (Instr.getOperand(0).getReg() != R1) {
|
|
|
|
|
DEBUG(dbgs() << "BOLT-DEBUG: LEA instruction expected to set %r1\n");
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Verify operands for LEA.
|
|
|
|
|
unsigned BaseRegNum;
|
|
|
|
|
int64_t ScaleValue;
|
|
|
|
|
unsigned IndexRegNum;
|
|
|
|
|
const MCExpr *DispExpr = nullptr;
|
|
|
|
|
unsigned SegRegNum;
|
|
|
|
|
if (!MIA->evaluateX86MemoryOperand(Instr, &BaseRegNum,
|
|
|
|
|
&ScaleValue, &IndexRegNum,
|
|
|
|
|
nullptr, &SegRegNum, &DispExpr))
|
|
|
|
|
break;
|
|
|
|
|
if (BaseRegNum != BC.MRI->getProgramCounter() ||
|
|
|
|
|
IndexRegNum != bolt::NoRegister ||
|
|
|
|
|
SegRegNum != bolt::NoRegister ||
|
|
|
|
|
DispExpr == nullptr)
|
|
|
|
|
break;
|
|
|
|
|
MemLocInstr = &Instr;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!MemLocInstr)
|
|
|
|
|
return IndirectBranchType::UNKNOWN;
|
|
|
|
|
|
|
|
|
|
DEBUG(dbgs() << "BOLT-DEBUG: checking potential PIC jump table\n");
|
|
|
|
|
return IndirectBranchType::POSSIBLE_PIC_JUMP_TABLE;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// Try to find a (base) memory location from where the address for
|
|
|
|
|
// the indirect branch is loaded. For X86-64 the memory will be specified
|
|
|
|
|
// in the following format:
|
|
|
|
|
//
|
|
|
|
|
// {%rip}/{%basereg} + Imm + IndexReg * Scale
|
|
|
|
|
//
|
|
|
|
|
// We are interested in the cases where Scale == sizeof(uintptr_t) and
|
|
|
|
|
// the contents of the memory are presumably a function array.
|
|
|
|
|
//
|
|
|
|
|
// Normal jump table:
|
|
|
|
|
//
|
|
|
|
|
// jmp *(JUMP_TABLE, %index, Scale)
|
|
|
|
|
//
|
|
|
|
|
// or
|
|
|
|
|
//
|
|
|
|
|
// mov (JUMP_TABLE, %index, Scale), %r1
|
|
|
|
|
// ...
|
|
|
|
|
// jmp %r1
|
|
|
|
|
//
|
|
|
|
|
// We handle PIC-style jump tables separately.
|
|
|
|
|
//
|
|
|
|
|
if (Instruction.getNumOperands() == 1) {
|
|
|
|
|
// If the indirect jump is on register - try to detect if the
|
|
|
|
|
// register value is loaded from a memory location.
|
|
|
|
|
assert(Instruction.getOperand(0).isReg() && "register operand expected");
|
|
|
|
|
const auto R1 = Instruction.getOperand(0).getReg();
|
|
|
|
|
// Check if one of the previous instructions defines the jump-on register.
|
|
|
|
|
// We will check that this instruction belongs to the same basic block
|
|
|
|
|
// in postProcessIndirectBranches().
|
|
|
|
|
for (auto PrevII = Instructions.rbegin(); PrevII != Instructions.rend();
|
|
|
|
|
++PrevII) {
|
|
|
|
|
auto &PrevInstr = PrevII->second;
|
|
|
|
|
const auto &PrevInstrDesc = BC.MII->get(PrevInstr.getOpcode());
|
|
|
|
|
|
|
|
|
|
if (!PrevInstrDesc.hasDefOfPhysReg(PrevInstr, R1, *BC.MRI))
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
if (MIA->isMoveMem2Reg(PrevInstr)) {
|
|
|
|
|
MemLocInstr = &PrevInstr;
|
|
|
|
|
break;
|
|
|
|
|
} else if (MIA->isADD64rr(PrevInstr)) {
|
|
|
|
|
auto R2 = PrevInstr.getOperand(2).getReg();
|
|
|
|
|
if (R1 == R2)
|
|
|
|
|
return IndirectBranchType::UNKNOWN;
|
|
|
|
|
Type = analyzePICJumpTable(PrevII, Instructions.rend(), R1, R2);
|
|
|
|
|
break;
|
|
|
|
|
} else {
|
|
|
|
|
return IndirectBranchType::UNKNOWN;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (!MemLocInstr) {
|
|
|
|
|
// No definition seen for the register in this function so far. Could be
|
|
|
|
|
// an input parameter - which means it is an external code reference.
|
|
|
|
|
// It also could be that the definition happens to be in the code that
|
|
|
|
|
// we haven't processed yet. Since we have to be conservative, return
|
|
|
|
|
// as UNKNOWN case.
|
|
|
|
|
return IndirectBranchType::UNKNOWN;
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
MemLocInstr = &Instruction;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const auto RIPRegister = BC.MRI->getProgramCounter();
|
|
|
|
|
auto PtrSize = BC.AsmInfo->getPointerSize();
|
|
|
|
|
|
|
|
|
|
// Analyze the memory location.
|
|
|
|
|
unsigned BaseRegNum;
|
|
|
|
|
int64_t ScaleValue;
|
|
|
|
|
unsigned IndexRegNum;
|
|
|
|
|
int64_t DispValue;
|
|
|
|
|
unsigned SegRegNum;
|
|
|
|
|
const MCExpr *DispExpr;
|
|
|
|
|
if (!MIA->evaluateX86MemoryOperand(*MemLocInstr, &BaseRegNum,
|
|
|
|
|
&ScaleValue, &IndexRegNum,
|
|
|
|
|
&DispValue, &SegRegNum,
|
|
|
|
|
&DispExpr))
|
|
|
|
|
return IndirectBranchType::UNKNOWN;
|
|
|
|
|
|
[BOLT] Optimize jump tables with hot entries
Summary:
This diff is similar to Bill's diff for optimizing jump tables
(and is built on top of it), but it differs in the strategy used to
optimize the jump table. The previous approach loads the target address
from the jump table and compare it to check if it is a hot target. This
accomplishes branch misprediction reduction by promote the indirect jmp
to a (more predictable) direct jmp.
load %r10, JMPTABLE
cmp %r10, HOTTARGET
je HOTTARGET
ijmp [JMPTABLE + %index * scale]
The idea in this diff is instead to make dcache better by avoiding the
load of the jump table, leaving branch mispredictions as a secondary
target. To do this we compare the index used in the indirect jmp and if
it matches a known hot entry, it performs a direct jump to the target.
cmp %index, HOTINDEX
je CORRESPONDING_TARGET
ijmp [JMPTABLE + %index * scale]
The downside of this approach is that we may have multiple indices
associated with a single target, but we only have profiling to show
which targets are hot and we have no clue about which indices are hot.
INDEX TARGET
0 4004f8
8 4004f8
10 4003d0
18 4004f8
Profiling data:
TARGET COUNT
4004f8 10020
4003d0 17
In this example, we know 4004f8 is hot, but to make a direct call to it
we need to check for indices 0, 8 and 18 -- 3 comparisons instead of 1.
Therefore, once we know a target is hot, we must generate code to
compare against all possible indices associated with this target because
we don't know which index is the hot one (IF there's a hotter index).
cmp %index, 0
je 4004f8
cmp %index, 8
je 4004f8
cmp %index, 18
je 4004f8
(... up to N comparisons as in --indirect-call-promotion-topn=N )
ijmp [JMPTABLE + %index * scale]
(cherry picked from FBD5005620)
2017-05-01 14:04:40 -07:00
|
|
|
// Do not set annotate with index reg if address was precomputed earlier
|
|
|
|
|
// and reg may not be live at the jump site.
|
|
|
|
|
if (MemLocInstr != &Instruction)
|
|
|
|
|
IndexRegNum = 0;
|
|
|
|
|
|
2016-09-27 19:09:38 -07:00
|
|
|
if ((BaseRegNum != bolt::NoRegister && BaseRegNum != RIPRegister) ||
|
|
|
|
|
SegRegNum != bolt::NoRegister)
|
|
|
|
|
return IndirectBranchType::UNKNOWN;
|
|
|
|
|
|
|
|
|
|
if (Type == IndirectBranchType::POSSIBLE_PIC_JUMP_TABLE &&
|
|
|
|
|
(ScaleValue != 1 || BaseRegNum != RIPRegister))
|
|
|
|
|
return IndirectBranchType::UNKNOWN;
|
|
|
|
|
|
|
|
|
|
if (Type != IndirectBranchType::POSSIBLE_PIC_JUMP_TABLE &&
|
|
|
|
|
ScaleValue != PtrSize)
|
|
|
|
|
return IndirectBranchType::UNKNOWN;
|
|
|
|
|
|
|
|
|
|
// RIP-relative addressing should be converted to symbol form by now
|
|
|
|
|
// in processed instructions (but not in jump).
|
|
|
|
|
if (DispExpr) {
|
|
|
|
|
auto SI = BC.GlobalSymbols.find(DispExpr->getSymbol().getName());
|
|
|
|
|
assert(SI != BC.GlobalSymbols.end() && "global symbol needs a value");
|
|
|
|
|
ArrayStart = SI->second;
|
|
|
|
|
} else {
|
|
|
|
|
ArrayStart = static_cast<uint64_t>(DispValue);
|
|
|
|
|
if (BaseRegNum == RIPRegister)
|
|
|
|
|
ArrayStart += getAddress() + Offset + Size;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
DEBUG(dbgs() << "BOLT-DEBUG: addressed memory is 0x"
|
|
|
|
|
<< Twine::utohexstr(ArrayStart) << '\n');
|
|
|
|
|
|
|
|
|
|
// Check if there's already a jump table registered at this address.
|
|
|
|
|
if (auto *JT = getJumpTableContainingAddress(ArrayStart)) {
|
[BOLT] Optimize jump tables with hot entries
Summary:
This diff is similar to Bill's diff for optimizing jump tables
(and is built on top of it), but it differs in the strategy used to
optimize the jump table. The previous approach loads the target address
from the jump table and compare it to check if it is a hot target. This
accomplishes branch misprediction reduction by promote the indirect jmp
to a (more predictable) direct jmp.
load %r10, JMPTABLE
cmp %r10, HOTTARGET
je HOTTARGET
ijmp [JMPTABLE + %index * scale]
The idea in this diff is instead to make dcache better by avoiding the
load of the jump table, leaving branch mispredictions as a secondary
target. To do this we compare the index used in the indirect jmp and if
it matches a known hot entry, it performs a direct jump to the target.
cmp %index, HOTINDEX
je CORRESPONDING_TARGET
ijmp [JMPTABLE + %index * scale]
The downside of this approach is that we may have multiple indices
associated with a single target, but we only have profiling to show
which targets are hot and we have no clue about which indices are hot.
INDEX TARGET
0 4004f8
8 4004f8
10 4003d0
18 4004f8
Profiling data:
TARGET COUNT
4004f8 10020
4003d0 17
In this example, we know 4004f8 is hot, but to make a direct call to it
we need to check for indices 0, 8 and 18 -- 3 comparisons instead of 1.
Therefore, once we know a target is hot, we must generate code to
compare against all possible indices associated with this target because
we don't know which index is the hot one (IF there's a hotter index).
cmp %index, 0
je 4004f8
cmp %index, 8
je 4004f8
cmp %index, 18
je 4004f8
(... up to N comparisons as in --indirect-call-promotion-topn=N )
ijmp [JMPTABLE + %index * scale]
(cherry picked from FBD5005620)
2017-05-01 14:04:40 -07:00
|
|
|
auto JTOffset = ArrayStart - JT->Address;
|
2016-09-27 19:09:38 -07:00
|
|
|
if (Type == IndirectBranchType::POSSIBLE_PIC_JUMP_TABLE && JTOffset != 0) {
|
|
|
|
|
// Adjust the size of this jump table and create a new one if necessary.
|
|
|
|
|
// We cannot re-use the entries since the offsets are relative to the
|
|
|
|
|
// table start.
|
|
|
|
|
DEBUG(dbgs() << "BOLT-DEBUG: adjusting size of jump table at 0x"
|
|
|
|
|
<< Twine::utohexstr(JT->Address) << '\n');
|
|
|
|
|
JT->OffsetEntries.resize(JTOffset / JT->EntrySize);
|
|
|
|
|
} else {
|
|
|
|
|
// Re-use an existing jump table. Perhaps parts of it.
|
|
|
|
|
if (Type != IndirectBranchType::POSSIBLE_PIC_JUMP_TABLE) {
|
2016-09-29 11:19:06 -07:00
|
|
|
assert(JT->Type == JumpTable::JTT_NORMAL &&
|
|
|
|
|
"normal jump table expected");
|
2016-09-27 19:09:38 -07:00
|
|
|
Type = IndirectBranchType::POSSIBLE_JUMP_TABLE;
|
|
|
|
|
} else {
|
|
|
|
|
assert(JT->Type == JumpTable::JTT_PIC && "PIC jump table expected");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Get or create a new label for the table.
|
|
|
|
|
auto LI = JT->Labels.find(JTOffset);
|
|
|
|
|
if (LI == JT->Labels.end()) {
|
2016-09-27 19:09:38 -07:00
|
|
|
auto *JTStartLabel = BC.getOrCreateGlobalSymbol(ArrayStart,
|
|
|
|
|
"JUMP_TABLEat");
|
2016-09-27 19:09:38 -07:00
|
|
|
auto Result = JT->Labels.emplace(JTOffset, JTStartLabel);
|
|
|
|
|
assert(Result.second && "error adding jump table label");
|
|
|
|
|
LI = Result.first;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
BC.MIA->replaceMemOperandDisp(*MemLocInstr, LI->second, BC.Ctx.get());
|
[BOLT] Optimize jump tables with hot entries
Summary:
This diff is similar to Bill's diff for optimizing jump tables
(and is built on top of it), but it differs in the strategy used to
optimize the jump table. The previous approach loads the target address
from the jump table and compare it to check if it is a hot target. This
accomplishes branch misprediction reduction by promote the indirect jmp
to a (more predictable) direct jmp.
load %r10, JMPTABLE
cmp %r10, HOTTARGET
je HOTTARGET
ijmp [JMPTABLE + %index * scale]
The idea in this diff is instead to make dcache better by avoiding the
load of the jump table, leaving branch mispredictions as a secondary
target. To do this we compare the index used in the indirect jmp and if
it matches a known hot entry, it performs a direct jump to the target.
cmp %index, HOTINDEX
je CORRESPONDING_TARGET
ijmp [JMPTABLE + %index * scale]
The downside of this approach is that we may have multiple indices
associated with a single target, but we only have profiling to show
which targets are hot and we have no clue about which indices are hot.
INDEX TARGET
0 4004f8
8 4004f8
10 4003d0
18 4004f8
Profiling data:
TARGET COUNT
4004f8 10020
4003d0 17
In this example, we know 4004f8 is hot, but to make a direct call to it
we need to check for indices 0, 8 and 18 -- 3 comparisons instead of 1.
Therefore, once we know a target is hot, we must generate code to
compare against all possible indices associated with this target because
we don't know which index is the hot one (IF there's a hotter index).
cmp %index, 0
je 4004f8
cmp %index, 8
je 4004f8
cmp %index, 18
je 4004f8
(... up to N comparisons as in --indirect-call-promotion-topn=N )
ijmp [JMPTABLE + %index * scale]
(cherry picked from FBD5005620)
2017-05-01 14:04:40 -07:00
|
|
|
BC.MIA->setJumpTable(BC.Ctx.get(), Instruction, ArrayStart, IndexRegNum);
|
2016-09-27 19:09:38 -07:00
|
|
|
|
|
|
|
|
JTSites.emplace_back(Offset, ArrayStart);
|
|
|
|
|
|
|
|
|
|
return Type;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
auto SectionOrError = BC.getSectionForAddress(ArrayStart);
|
|
|
|
|
if (!SectionOrError) {
|
|
|
|
|
// No section - possibly an absolute address. Since we don't allow
|
|
|
|
|
// internal function addresses to escape the function scope - we
|
|
|
|
|
// consider it a tail call.
|
|
|
|
|
if (opts::Verbosity >= 1) {
|
|
|
|
|
errs() << "BOLT-WARNING: no section for address 0x"
|
|
|
|
|
<< Twine::utohexstr(ArrayStart) << " referenced from function "
|
|
|
|
|
<< *this << '\n';
|
|
|
|
|
}
|
|
|
|
|
return IndirectBranchType::POSSIBLE_TAIL_CALL;
|
|
|
|
|
}
|
|
|
|
|
auto &Section = *SectionOrError;
|
|
|
|
|
if (Section.isVirtual()) {
|
|
|
|
|
// The contents are filled at runtime.
|
|
|
|
|
return IndirectBranchType::POSSIBLE_TAIL_CALL;
|
|
|
|
|
}
|
|
|
|
|
// Extract the value at the start of the array.
|
|
|
|
|
StringRef SectionContents;
|
|
|
|
|
Section.getContents(SectionContents);
|
|
|
|
|
auto EntrySize =
|
|
|
|
|
Type == IndirectBranchType::POSSIBLE_PIC_JUMP_TABLE ? 4 : PtrSize;
|
|
|
|
|
DataExtractor DE(SectionContents, BC.AsmInfo->isLittleEndian(), EntrySize);
|
|
|
|
|
auto ValueOffset = static_cast<uint32_t>(ArrayStart - Section.getAddress());
|
|
|
|
|
uint64_t Value = 0;
|
|
|
|
|
std::vector<uint64_t> JTOffsetCandidates;
|
|
|
|
|
while (ValueOffset <= Section.getSize() - EntrySize) {
|
|
|
|
|
DEBUG(dbgs() << "BOLT-DEBUG: indirect jmp at 0x"
|
|
|
|
|
<< Twine::utohexstr(getAddress() + Offset)
|
|
|
|
|
<< " is referencing address 0x"
|
|
|
|
|
<< Twine::utohexstr(Section.getAddress() + ValueOffset));
|
|
|
|
|
// Extract the value and increment the offset.
|
|
|
|
|
if (Type == IndirectBranchType::POSSIBLE_PIC_JUMP_TABLE) {
|
|
|
|
|
Value = ArrayStart + DE.getSigned(&ValueOffset, 4);
|
|
|
|
|
} else {
|
|
|
|
|
Value = DE.getAddress(&ValueOffset);
|
|
|
|
|
}
|
|
|
|
|
DEBUG(dbgs() << ", which contains value "
|
|
|
|
|
<< Twine::utohexstr(Value) << '\n');
|
|
|
|
|
if (containsAddress(Value) && Value != getAddress()) {
|
|
|
|
|
// Is it possible to have a jump table with function start as an entry?
|
|
|
|
|
JTOffsetCandidates.push_back(Value - getAddress());
|
|
|
|
|
if (Type == IndirectBranchType::UNKNOWN)
|
|
|
|
|
Type = IndirectBranchType::POSSIBLE_JUMP_TABLE;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
// Potentially a switch table can contain __builtin_unreachable() entry
|
|
|
|
|
// pointing just right after the function. In this case we have to check
|
|
|
|
|
// another entry. Otherwise the entry is outside of this function scope
|
|
|
|
|
// and it's not a switch table.
|
|
|
|
|
if (Value == getAddress() + getSize()) {
|
|
|
|
|
JTOffsetCandidates.push_back(Value - getAddress());
|
|
|
|
|
} else {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (Type == IndirectBranchType::POSSIBLE_JUMP_TABLE ||
|
|
|
|
|
Type == IndirectBranchType::POSSIBLE_PIC_JUMP_TABLE) {
|
|
|
|
|
assert(JTOffsetCandidates.size() > 2 &&
|
|
|
|
|
"expected more than 2 jump table entries");
|
2016-09-27 19:09:38 -07:00
|
|
|
auto *JTStartLabel = BC.getOrCreateGlobalSymbol(ArrayStart, "JUMP_TABLEat");
|
2016-09-27 19:09:38 -07:00
|
|
|
DEBUG(dbgs() << "BOLT-DEBUG: creating jump table "
|
|
|
|
|
<< JTStartLabel->getName()
|
|
|
|
|
<< " in function " << *this << " with "
|
|
|
|
|
<< JTOffsetCandidates.size() << " entries.\n");
|
|
|
|
|
auto JumpTableType =
|
|
|
|
|
Type == IndirectBranchType::POSSIBLE_JUMP_TABLE
|
|
|
|
|
? JumpTable::JTT_NORMAL
|
|
|
|
|
: JumpTable::JTT_PIC;
|
|
|
|
|
JumpTables.emplace(ArrayStart, JumpTable{ArrayStart,
|
|
|
|
|
EntrySize,
|
|
|
|
|
JumpTableType,
|
|
|
|
|
std::move(JTOffsetCandidates),
|
|
|
|
|
{{0, JTStartLabel}}});
|
|
|
|
|
BC.MIA->replaceMemOperandDisp(*MemLocInstr, JTStartLabel, BC.Ctx.get());
|
[BOLT] Optimize jump tables with hot entries
Summary:
This diff is similar to Bill's diff for optimizing jump tables
(and is built on top of it), but it differs in the strategy used to
optimize the jump table. The previous approach loads the target address
from the jump table and compare it to check if it is a hot target. This
accomplishes branch misprediction reduction by promote the indirect jmp
to a (more predictable) direct jmp.
load %r10, JMPTABLE
cmp %r10, HOTTARGET
je HOTTARGET
ijmp [JMPTABLE + %index * scale]
The idea in this diff is instead to make dcache better by avoiding the
load of the jump table, leaving branch mispredictions as a secondary
target. To do this we compare the index used in the indirect jmp and if
it matches a known hot entry, it performs a direct jump to the target.
cmp %index, HOTINDEX
je CORRESPONDING_TARGET
ijmp [JMPTABLE + %index * scale]
The downside of this approach is that we may have multiple indices
associated with a single target, but we only have profiling to show
which targets are hot and we have no clue about which indices are hot.
INDEX TARGET
0 4004f8
8 4004f8
10 4003d0
18 4004f8
Profiling data:
TARGET COUNT
4004f8 10020
4003d0 17
In this example, we know 4004f8 is hot, but to make a direct call to it
we need to check for indices 0, 8 and 18 -- 3 comparisons instead of 1.
Therefore, once we know a target is hot, we must generate code to
compare against all possible indices associated with this target because
we don't know which index is the hot one (IF there's a hotter index).
cmp %index, 0
je 4004f8
cmp %index, 8
je 4004f8
cmp %index, 18
je 4004f8
(... up to N comparisons as in --indirect-call-promotion-topn=N )
ijmp [JMPTABLE + %index * scale]
(cherry picked from FBD5005620)
2017-05-01 14:04:40 -07:00
|
|
|
BC.MIA->setJumpTable(BC.Ctx.get(), Instruction, ArrayStart, IndexRegNum);
|
2016-09-27 19:09:38 -07:00
|
|
|
|
|
|
|
|
JTSites.emplace_back(Offset, ArrayStart);
|
|
|
|
|
|
|
|
|
|
return Type;
|
|
|
|
|
}
|
|
|
|
|
BC.InterproceduralReferences.insert(Value);
|
|
|
|
|
return IndirectBranchType::POSSIBLE_TAIL_CALL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
MCSymbol *BinaryFunction::getOrCreateLocalLabel(uint64_t Address,
|
|
|
|
|
bool CreatePastEnd) {
|
|
|
|
|
MCSymbol *Result;
|
|
|
|
|
// Check if there's already a registered label.
|
|
|
|
|
auto Offset = Address - getAddress();
|
|
|
|
|
|
|
|
|
|
if ((Offset == getSize()) && CreatePastEnd)
|
|
|
|
|
return getFunctionEndLabel();
|
|
|
|
|
|
2016-09-29 11:19:06 -07:00
|
|
|
// Check if there's a global symbol registered at given address.
|
|
|
|
|
// If so - reuse it since we want to keep the symbol value updated.
|
|
|
|
|
if (Offset != 0) {
|
|
|
|
|
if (auto *Symbol = BC.getGlobalSymbolAtAddress(Address)) {
|
|
|
|
|
Labels[Offset] = Symbol;
|
|
|
|
|
return Symbol;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-09-27 19:09:38 -07:00
|
|
|
auto LI = Labels.find(Offset);
|
|
|
|
|
if (LI == Labels.end()) {
|
|
|
|
|
Result = BC.Ctx->createTempSymbol();
|
|
|
|
|
Labels[Offset] = Result;
|
|
|
|
|
} else {
|
|
|
|
|
Result = LI->second;
|
|
|
|
|
}
|
|
|
|
|
return Result;
|
|
|
|
|
}
|
|
|
|
|
|
2016-09-27 19:09:38 -07:00
|
|
|
void BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
|
2015-10-09 17:21:14 -07:00
|
|
|
assert(FunctionData.size() == getSize() &&
|
|
|
|
|
"function size does not match raw data size");
|
|
|
|
|
|
|
|
|
|
auto &Ctx = BC.Ctx;
|
|
|
|
|
auto &MIA = BC.MIA;
|
Indirect call promotion optimization.
Summary:
Perform indirect call promotion optimization in BOLT.
The code scans the instructions during CFG creation for all
indirect calls. Right now indirect tail calls are not handled
since the functions are marked not simple. The offsets of the
indirect calls are stored for later use by the ICP pass.
The indirect call promotion pass visits each indirect call and
examines the BranchData for each. If the most frequent targets
from that callsite exceed the specified threshold (default 90%),
the call is promoted. Otherwise, it is ignored. By default,
only one target is considered at each callsite.
When an candiate callsite is processed, we modify the callsite
to test for the most common call targets before calling through
the original generic call mechanism.
The CFG and layout are modified by ICP.
A few new command line options have been added:
-indirect-call-promotion
-indirect-call-promotion-threshold=<percentage>
-indirect-call-promotion-topn=<int>
The threshold is the minimum frequency of a call target needed
before ICP is triggered.
The topn option controls the number of targets to consider for
each callsite, e.g. ICP is triggered if topn=2 and the total
requency of the top two call targets exceeds the threshold.
Example of ICP:
C++ code:
int B_count = 0;
int C_count = 0;
struct A { virtual void foo() = 0; }
struct B : public A { virtual void foo() { ++B_count; }; };
struct C : public A { virtual void foo() { ++C_count; }; };
A* a = ...
a->foo();
...
original:
400863: 49 8b 07 mov (%r15),%rax
400866: 4c 89 ff mov %r15,%rdi
400869: ff 10 callq *(%rax)
40086b: 41 83 e6 01 and $0x1,%r14d
40086f: 4d 89 e6 mov %r12,%r14
400872: 4c 0f 44 f5 cmove %rbp,%r14
400876: 4c 89 f7 mov %r14,%rdi
...
after ICP:
40085e: 49 8b 07 mov (%r15),%rax
400861: 4c 89 ff mov %r15,%rdi
400864: 49 ba e0 0b 40 00 00 movabs $0x400be0,%r10
40086b: 00 00 00
40086e: 4c 3b 10 cmp (%rax),%r10
400871: 75 29 jne 40089c <main+0x9c>
400873: 41 ff d2 callq *%r10
400876: 41 83 e6 01 and $0x1,%r14d
40087a: 4d 89 e6 mov %r12,%r14
40087d: 4c 0f 44 f5 cmove %rbp,%r14
400881: 4c 89 f7 mov %r14,%rdi
...
40089c: ff 10 callq *(%rax)
40089e: eb d6 jmp 400876 <main+0x76>
(cherry picked from FBD3612218)
2016-09-07 18:59:23 -07:00
|
|
|
auto BranchDataOrErr = BC.DR.getFuncBranchData(getNames());
|
2016-02-25 16:57:07 -08:00
|
|
|
|
2016-05-27 20:19:19 -07:00
|
|
|
DWARFUnitLineTable ULT = getDWARFUnitLineTable();
|
2015-10-09 17:21:14 -07:00
|
|
|
|
|
|
|
|
// Insert a label at the beginning of the function. This will be our first
|
|
|
|
|
// basic block.
|
|
|
|
|
Labels[0] = Ctx->createTempSymbol("BB0", false);
|
2016-09-29 11:19:06 -07:00
|
|
|
addEntryPointAtOffset(0);
|
2015-10-09 17:21:14 -07:00
|
|
|
|
2015-11-02 09:46:50 -08:00
|
|
|
auto handleRIPOperand =
|
2016-08-07 12:35:23 -07:00
|
|
|
[&](MCInst &Instruction, uint64_t Address, uint64_t Size) {
|
|
|
|
|
uint64_t TargetAddress{0};
|
|
|
|
|
MCSymbol *TargetSymbol{nullptr};
|
2016-09-27 19:09:38 -07:00
|
|
|
if (!MIA->evaluateMemOperandTarget(Instruction, TargetAddress, Address,
|
|
|
|
|
Size)) {
|
2016-09-27 19:09:38 -07:00
|
|
|
errs() << "BOLT-ERROR: rip-relative operand can't be evaluated:\n";
|
|
|
|
|
BC.InstPrinter->printInst(&Instruction, errs(), "", *BC.STI);
|
|
|
|
|
errs() << '\n';
|
|
|
|
|
Instruction.dump_pretty(errs(), BC.InstPrinter.get());
|
|
|
|
|
errs() << '\n';;
|
2016-08-07 12:35:23 -07:00
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
if (TargetAddress == 0) {
|
2016-09-02 14:15:29 -07:00
|
|
|
if (opts::Verbosity >= 1) {
|
2016-09-27 19:09:38 -07:00
|
|
|
outs() << "BOLT-INFO: rip-relative operand is zero in function "
|
|
|
|
|
<< *this << ".\n";
|
2016-09-02 14:15:29 -07:00
|
|
|
}
|
2016-08-07 12:35:23 -07:00
|
|
|
}
|
2016-08-22 14:24:09 -07:00
|
|
|
|
|
|
|
|
// Note that the address does not necessarily have to reside inside
|
|
|
|
|
// a section, it could be an absolute address too.
|
|
|
|
|
auto Section = BC.getSectionForAddress(TargetAddress);
|
|
|
|
|
if (Section && Section->isText()) {
|
|
|
|
|
if (containsAddress(TargetAddress)) {
|
2016-09-29 11:19:06 -07:00
|
|
|
if (TargetAddress != getAddress()) {
|
|
|
|
|
// The address could potentially escape. Mark it as another entry
|
|
|
|
|
// point into the function.
|
|
|
|
|
DEBUG(dbgs() << "BOLT-DEBUG: potentially escaped address 0x"
|
|
|
|
|
<< Twine::utohexstr(TargetAddress) << " in function "
|
|
|
|
|
<< *this << '\n');
|
2016-09-27 19:09:38 -07:00
|
|
|
TargetSymbol = getOrCreateLocalLabel(TargetAddress);
|
|
|
|
|
addEntryPointAtOffset(TargetAddress - getAddress());
|
2016-09-29 11:19:06 -07:00
|
|
|
}
|
2016-08-22 14:24:09 -07:00
|
|
|
} else {
|
|
|
|
|
BC.InterproceduralReferences.insert(TargetAddress);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (!TargetSymbol)
|
|
|
|
|
TargetSymbol = BC.getOrCreateGlobalSymbol(TargetAddress, "DATAat");
|
2016-09-27 19:09:38 -07:00
|
|
|
MIA->replaceMemOperandDisp(
|
2016-08-07 12:35:23 -07:00
|
|
|
Instruction, MCOperand::createExpr(MCSymbolRefExpr::create(
|
|
|
|
|
TargetSymbol, MCSymbolRefExpr::VK_None, *BC.Ctx)));
|
|
|
|
|
return true;
|
|
|
|
|
};
|
2015-11-02 09:46:50 -08:00
|
|
|
|
2017-03-03 11:35:41 -08:00
|
|
|
uint64_t Size = 0; // instruction size
|
|
|
|
|
for (uint64_t Offset = 0; Offset < getSize(); Offset += Size) {
|
2015-10-09 17:21:14 -07:00
|
|
|
MCInst Instruction;
|
2016-09-27 19:09:38 -07:00
|
|
|
const uint64_t AbsoluteInstrAddr = getAddress() + Offset;
|
2016-02-25 16:57:07 -08:00
|
|
|
|
2015-10-09 17:21:14 -07:00
|
|
|
if (!BC.DisAsm->getInstruction(Instruction,
|
|
|
|
|
Size,
|
|
|
|
|
FunctionData.slice(Offset),
|
2016-02-25 16:57:07 -08:00
|
|
|
AbsoluteInstrAddr,
|
2015-10-09 17:21:14 -07:00
|
|
|
nulls(),
|
|
|
|
|
nulls())) {
|
2017-02-08 09:14:10 -08:00
|
|
|
// Functions with "soft" boundaries, e.g. coming from assembly source,
|
|
|
|
|
// can have 0-byte padding at the end.
|
|
|
|
|
bool IsZeroPadding = true;
|
|
|
|
|
for (auto I = Offset; I < getSize(); ++I) {
|
|
|
|
|
if (FunctionData[I] != 0) {
|
|
|
|
|
IsZeroPadding = false;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!IsZeroPadding) {
|
|
|
|
|
// Ignore this function. Skip to the next one in non-relocs mode.
|
|
|
|
|
errs() << "BOLT-ERROR: unable to disassemble instruction at offset 0x"
|
|
|
|
|
<< Twine::utohexstr(Offset) << " (address 0x"
|
|
|
|
|
<< Twine::utohexstr(AbsoluteInstrAddr) << ") in function "
|
|
|
|
|
<< *this << '\n';
|
|
|
|
|
IsSimple = false;
|
|
|
|
|
}
|
2015-10-09 17:21:14 -07:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
2016-09-27 19:09:38 -07:00
|
|
|
// Cannot process functions with AVX-512 instructions.
|
2016-10-16 18:56:56 -07:00
|
|
|
if (MIA->hasEVEXEncoding(Instruction)) {
|
|
|
|
|
if (opts::Verbosity >= 1) {
|
|
|
|
|
errs() << "BOLT-WARNING: function " << *this << " uses instruction"
|
|
|
|
|
" encoded with EVEX (AVX-512) at offset 0x"
|
|
|
|
|
<< Twine::utohexstr(Offset) << ". Disassembly could be wrong."
|
|
|
|
|
" Skipping further processing.\n";
|
|
|
|
|
}
|
|
|
|
|
IsSimple = false;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
2016-09-27 19:09:38 -07:00
|
|
|
// Check if there's a relocation associated with this instruction.
|
|
|
|
|
if (!Relocations.empty()) {
|
|
|
|
|
auto RI = Relocations.lower_bound(Offset);
|
|
|
|
|
if (RI != Relocations.end() && RI->first < Offset + Size) {
|
|
|
|
|
const auto &Relocation = RI->second;
|
|
|
|
|
DEBUG(dbgs() << "BOLT-DEBUG: replacing immediate with relocation"
|
|
|
|
|
" against " << Relocation.Symbol->getName()
|
|
|
|
|
<< " in function " << *this
|
|
|
|
|
<< " for instruction at offset 0x"
|
|
|
|
|
<< Twine::utohexstr(Offset) << '\n');
|
|
|
|
|
int64_t Value;
|
|
|
|
|
const auto Result =
|
|
|
|
|
BC.MIA->replaceImmWithSymbol(Instruction, Relocation.Symbol,
|
|
|
|
|
Relocation.Addend, BC.Ctx.get(), Value);
|
2017-05-25 10:29:38 -07:00
|
|
|
(void)Result;
|
2016-09-27 19:09:38 -07:00
|
|
|
assert(Result && "cannot replace immediate with relocation");
|
|
|
|
|
|
|
|
|
|
// Make sure we replaced the correct immediate (instruction
|
|
|
|
|
// can have multiple immediate operands).
|
|
|
|
|
assert(static_cast<uint64_t>(Value) == Relocation.Value &&
|
|
|
|
|
"immediate value mismatch in function");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-07-19 11:19:18 -07:00
|
|
|
// Convert instruction to a shorter version that could be relaxed if needed.
|
|
|
|
|
MIA->shortenInstruction(Instruction);
|
|
|
|
|
|
2015-10-09 17:21:14 -07:00
|
|
|
if (MIA->isBranch(Instruction) || MIA->isCall(Instruction)) {
|
2016-08-22 14:24:09 -07:00
|
|
|
uint64_t TargetAddress = 0;
|
2015-10-09 17:21:14 -07:00
|
|
|
if (MIA->evaluateBranch(Instruction,
|
|
|
|
|
AbsoluteInstrAddr,
|
|
|
|
|
Size,
|
2016-08-22 14:24:09 -07:00
|
|
|
TargetAddress)) {
|
2015-10-09 17:21:14 -07:00
|
|
|
// Check if the target is within the same function. Otherwise it's
|
|
|
|
|
// a call, possibly a tail call.
|
|
|
|
|
//
|
|
|
|
|
// If the target *is* the function address it could be either a branch
|
|
|
|
|
// or a recursive call.
|
|
|
|
|
bool IsCall = MIA->isCall(Instruction);
|
2016-09-27 19:09:38 -07:00
|
|
|
const bool IsCondBranch = MIA->isConditionalBranch(Instruction);
|
2015-10-09 17:21:14 -07:00
|
|
|
MCSymbol *TargetSymbol{nullptr};
|
|
|
|
|
|
2016-08-22 14:24:09 -07:00
|
|
|
if (IsCall && containsAddress(TargetAddress)) {
|
|
|
|
|
if (TargetAddress == getAddress()) {
|
2015-10-09 17:21:14 -07:00
|
|
|
// Recursive call.
|
2016-08-07 12:35:23 -07:00
|
|
|
TargetSymbol = getSymbol();
|
2015-10-09 17:21:14 -07:00
|
|
|
} else {
|
|
|
|
|
// Possibly an old-style PIC code
|
2017-02-21 14:18:09 -08:00
|
|
|
errs() << "BOLT-WARNING: internal call detected at 0x"
|
2016-09-27 19:09:38 -07:00
|
|
|
<< Twine::utohexstr(AbsoluteInstrAddr)
|
|
|
|
|
<< " in function " << *this << ". Skipping.\n";
|
2015-10-09 17:21:14 -07:00
|
|
|
IsSimple = false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!TargetSymbol) {
|
|
|
|
|
// Create either local label or external symbol.
|
2016-08-22 14:24:09 -07:00
|
|
|
if (containsAddress(TargetAddress)) {
|
|
|
|
|
TargetSymbol = getOrCreateLocalLabel(TargetAddress);
|
2015-10-09 17:21:14 -07:00
|
|
|
} else {
|
2017-03-03 11:35:41 -08:00
|
|
|
if (TargetAddress == getAddress() + getSize() &&
|
|
|
|
|
TargetAddress < getAddress() + getMaxSize()) {
|
|
|
|
|
// Result of __builtin_unreachable().
|
|
|
|
|
DEBUG(dbgs() << "BOLT-DEBUG: jump past end detected at 0x"
|
|
|
|
|
<< Twine::utohexstr(AbsoluteInstrAddr)
|
|
|
|
|
<< " in function " << *this
|
|
|
|
|
<< " : replacing with nop.\n");
|
|
|
|
|
BC.MIA->createNoop(Instruction);
|
|
|
|
|
if (IsCondBranch) {
|
2017-04-18 23:32:11 -07:00
|
|
|
// Register branch function profile validation.
|
|
|
|
|
IgnoredBranches.emplace_back(Offset, Offset + Size);
|
2017-03-03 11:35:41 -08:00
|
|
|
}
|
|
|
|
|
goto add_instruction;
|
|
|
|
|
}
|
2016-08-22 14:24:09 -07:00
|
|
|
BC.InterproceduralReferences.insert(TargetAddress);
|
2016-09-27 19:09:38 -07:00
|
|
|
if (opts::Verbosity >= 2 && !IsCall && Size == 2 && !opts::Relocs) {
|
2016-02-05 14:42:04 -08:00
|
|
|
errs() << "BOLT-WARNING: relaxed tail call detected at 0x"
|
2015-10-20 10:51:17 -07:00
|
|
|
<< Twine::utohexstr(AbsoluteInstrAddr)
|
2016-08-22 14:24:09 -07:00
|
|
|
<< " in function " << *this
|
2015-10-20 10:51:17 -07:00
|
|
|
<< ". Code size will be increased.\n";
|
|
|
|
|
}
|
|
|
|
|
|
2016-07-13 18:57:40 -07:00
|
|
|
assert(!MIA->isTailCall(Instruction) &&
|
|
|
|
|
"synthetic tail call instruction found");
|
|
|
|
|
|
2015-11-12 18:56:58 -08:00
|
|
|
// This is a call regardless of the opcode.
|
|
|
|
|
// Assign proper opcode for tail calls, so that they could be
|
|
|
|
|
// treated as calls.
|
|
|
|
|
if (!IsCall) {
|
2016-09-06 13:19:26 -07:00
|
|
|
if (!MIA->convertJmpToTailCall(Instruction) &&
|
|
|
|
|
opts::Verbosity >= 2) {
|
2016-07-13 18:57:40 -07:00
|
|
|
assert(IsCondBranch && "unknown tail call instruction");
|
|
|
|
|
errs() << "BOLT-WARNING: conditional tail call detected in "
|
2016-08-07 12:35:23 -07:00
|
|
|
<< "function " << *this << " at 0x"
|
2016-07-13 18:57:40 -07:00
|
|
|
<< Twine::utohexstr(AbsoluteInstrAddr) << ".\n";
|
|
|
|
|
}
|
|
|
|
|
// TODO: A better way to do this would be using annotations for
|
|
|
|
|
// MCInst objects.
|
2016-08-07 12:35:23 -07:00
|
|
|
TailCallOffsets.emplace(std::make_pair(Offset,
|
2016-08-22 14:24:09 -07:00
|
|
|
TargetAddress));
|
2015-11-12 18:56:58 -08:00
|
|
|
IsCall = true;
|
|
|
|
|
}
|
|
|
|
|
|
2016-08-22 14:24:09 -07:00
|
|
|
TargetSymbol = BC.getOrCreateGlobalSymbol(TargetAddress,
|
2015-10-14 15:35:14 -07:00
|
|
|
"FUNCat");
|
2016-08-22 14:24:09 -07:00
|
|
|
if (TargetAddress == 0) {
|
2016-09-27 19:09:38 -07:00
|
|
|
// We actually see calls to address 0 in presence of weak symbols
|
|
|
|
|
// originating from libraries. This code is never meant to be
|
|
|
|
|
// executed.
|
|
|
|
|
if (opts::Verbosity >= 2) {
|
|
|
|
|
outs() << "BOLT-INFO: Function " << *this
|
|
|
|
|
<< " has a call to address zero.\n";
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (opts::Relocs) {
|
|
|
|
|
// Check if we need to create relocation to move this function's
|
|
|
|
|
// code without re-assembly.
|
|
|
|
|
size_t RelSize = (Size < 5) ? 1 : 4;
|
|
|
|
|
auto RelOffset = Offset + Size - RelSize;
|
|
|
|
|
auto RI = MoveRelocations.find(RelOffset);
|
|
|
|
|
if (RI == MoveRelocations.end()) {
|
|
|
|
|
uint64_t RelType = (RelSize == 1) ? ELF::R_X86_64_PC8
|
|
|
|
|
: ELF::R_X86_64_PC32;
|
|
|
|
|
DEBUG(dbgs() << "BOLT-DEBUG: creating relocation for static"
|
|
|
|
|
<< " function call to " << TargetSymbol->getName()
|
|
|
|
|
<< " at offset 0x"
|
|
|
|
|
<< Twine::utohexstr(RelOffset)
|
|
|
|
|
<< " with size " << RelSize
|
|
|
|
|
<< " for function " << *this << '\n');
|
|
|
|
|
addRelocation(getAddress() + RelOffset, TargetSymbol, RelType,
|
|
|
|
|
-RelSize, 0);
|
|
|
|
|
}
|
|
|
|
|
auto OI = PCRelativeRelocationOffsets.find(RelOffset);
|
|
|
|
|
if (OI != PCRelativeRelocationOffsets.end()) {
|
|
|
|
|
PCRelativeRelocationOffsets.erase(OI);
|
2016-09-02 14:15:29 -07:00
|
|
|
}
|
2015-11-02 09:46:50 -08:00
|
|
|
}
|
2015-10-09 17:21:14 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!IsCall) {
|
2016-06-15 18:36:16 -07:00
|
|
|
// Add taken branch info.
|
2016-09-14 16:45:40 -07:00
|
|
|
TakenBranches.emplace_back(Offset, TargetAddress - getAddress());
|
2015-10-09 17:21:14 -07:00
|
|
|
}
|
2016-05-24 09:26:25 -07:00
|
|
|
if (IsCondBranch) {
|
2015-10-26 15:00:56 -07:00
|
|
|
// Add fallthrough branch info.
|
2016-09-14 16:45:40 -07:00
|
|
|
FTBranches.emplace_back(Offset, Offset + Size);
|
2015-10-26 15:00:56 -07:00
|
|
|
}
|
Indirect call promotion optimization.
Summary:
Perform indirect call promotion optimization in BOLT.
The code scans the instructions during CFG creation for all
indirect calls. Right now indirect tail calls are not handled
since the functions are marked not simple. The offsets of the
indirect calls are stored for later use by the ICP pass.
The indirect call promotion pass visits each indirect call and
examines the BranchData for each. If the most frequent targets
from that callsite exceed the specified threshold (default 90%),
the call is promoted. Otherwise, it is ignored. By default,
only one target is considered at each callsite.
When an candiate callsite is processed, we modify the callsite
to test for the most common call targets before calling through
the original generic call mechanism.
The CFG and layout are modified by ICP.
A few new command line options have been added:
-indirect-call-promotion
-indirect-call-promotion-threshold=<percentage>
-indirect-call-promotion-topn=<int>
The threshold is the minimum frequency of a call target needed
before ICP is triggered.
The topn option controls the number of targets to consider for
each callsite, e.g. ICP is triggered if topn=2 and the total
requency of the top two call targets exceeds the threshold.
Example of ICP:
C++ code:
int B_count = 0;
int C_count = 0;
struct A { virtual void foo() = 0; }
struct B : public A { virtual void foo() { ++B_count; }; };
struct C : public A { virtual void foo() { ++C_count; }; };
A* a = ...
a->foo();
...
original:
400863: 49 8b 07 mov (%r15),%rax
400866: 4c 89 ff mov %r15,%rdi
400869: ff 10 callq *(%rax)
40086b: 41 83 e6 01 and $0x1,%r14d
40086f: 4d 89 e6 mov %r12,%r14
400872: 4c 0f 44 f5 cmove %rbp,%r14
400876: 4c 89 f7 mov %r14,%rdi
...
after ICP:
40085e: 49 8b 07 mov (%r15),%rax
400861: 4c 89 ff mov %r15,%rdi
400864: 49 ba e0 0b 40 00 00 movabs $0x400be0,%r10
40086b: 00 00 00
40086e: 4c 3b 10 cmp (%rax),%r10
400871: 75 29 jne 40089c <main+0x9c>
400873: 41 ff d2 callq *%r10
400876: 41 83 e6 01 and $0x1,%r14d
40087a: 4d 89 e6 mov %r12,%r14
40087d: 4c 0f 44 f5 cmove %rbp,%r14
400881: 4c 89 f7 mov %r14,%rdi
...
40089c: ff 10 callq *(%rax)
40089e: eb d6 jmp 400876 <main+0x76>
(cherry picked from FBD3612218)
2016-09-07 18:59:23 -07:00
|
|
|
|
|
|
|
|
const bool isIndirect =
|
|
|
|
|
((IsCall || !IsCondBranch) && MIA->isIndirectBranch(Instruction));
|
|
|
|
|
|
|
|
|
|
Instruction.clear();
|
|
|
|
|
Instruction.addOperand(
|
|
|
|
|
MCOperand::createExpr(
|
|
|
|
|
MCSymbolRefExpr::create(TargetSymbol,
|
|
|
|
|
MCSymbolRefExpr::VK_None,
|
|
|
|
|
*Ctx)));
|
|
|
|
|
|
2017-03-03 11:35:41 -08:00
|
|
|
if (BranchDataOrErr) {
|
|
|
|
|
if (IsCall) {
|
|
|
|
|
MIA->addAnnotation(Ctx.get(), Instruction, "EdgeCountData", Offset);
|
|
|
|
|
}
|
|
|
|
|
if (isIndirect) {
|
|
|
|
|
MIA->addAnnotation(Ctx.get(), Instruction, "IndirectBranchData",
|
|
|
|
|
Offset);
|
|
|
|
|
}
|
Indirect call promotion optimization.
Summary:
Perform indirect call promotion optimization in BOLT.
The code scans the instructions during CFG creation for all
indirect calls. Right now indirect tail calls are not handled
since the functions are marked not simple. The offsets of the
indirect calls are stored for later use by the ICP pass.
The indirect call promotion pass visits each indirect call and
examines the BranchData for each. If the most frequent targets
from that callsite exceed the specified threshold (default 90%),
the call is promoted. Otherwise, it is ignored. By default,
only one target is considered at each callsite.
When an candiate callsite is processed, we modify the callsite
to test for the most common call targets before calling through
the original generic call mechanism.
The CFG and layout are modified by ICP.
A few new command line options have been added:
-indirect-call-promotion
-indirect-call-promotion-threshold=<percentage>
-indirect-call-promotion-topn=<int>
The threshold is the minimum frequency of a call target needed
before ICP is triggered.
The topn option controls the number of targets to consider for
each callsite, e.g. ICP is triggered if topn=2 and the total
requency of the top two call targets exceeds the threshold.
Example of ICP:
C++ code:
int B_count = 0;
int C_count = 0;
struct A { virtual void foo() = 0; }
struct B : public A { virtual void foo() { ++B_count; }; };
struct C : public A { virtual void foo() { ++C_count; }; };
A* a = ...
a->foo();
...
original:
400863: 49 8b 07 mov (%r15),%rax
400866: 4c 89 ff mov %r15,%rdi
400869: ff 10 callq *(%rax)
40086b: 41 83 e6 01 and $0x1,%r14d
40086f: 4d 89 e6 mov %r12,%r14
400872: 4c 0f 44 f5 cmove %rbp,%r14
400876: 4c 89 f7 mov %r14,%rdi
...
after ICP:
40085e: 49 8b 07 mov (%r15),%rax
400861: 4c 89 ff mov %r15,%rdi
400864: 49 ba e0 0b 40 00 00 movabs $0x400be0,%r10
40086b: 00 00 00
40086e: 4c 3b 10 cmp (%rax),%r10
400871: 75 29 jne 40089c <main+0x9c>
400873: 41 ff d2 callq *%r10
400876: 41 83 e6 01 and $0x1,%r14d
40087a: 4d 89 e6 mov %r12,%r14
40087d: 4c 0f 44 f5 cmove %rbp,%r14
400881: 4c 89 f7 mov %r14,%rdi
...
40089c: ff 10 callq *(%rax)
40089e: eb d6 jmp 400876 <main+0x76>
(cherry picked from FBD3612218)
2016-09-07 18:59:23 -07:00
|
|
|
}
|
2015-10-09 17:21:14 -07:00
|
|
|
} else {
|
2016-08-22 14:24:09 -07:00
|
|
|
// Could not evaluate branch. Should be an indirect call or an
|
|
|
|
|
// indirect branch. Bail out on the latter case.
|
2017-03-03 11:35:41 -08:00
|
|
|
bool MaybeEdgeCountData = false;
|
2015-11-02 09:46:50 -08:00
|
|
|
if (MIA->isIndirectBranch(Instruction)) {
|
2016-08-22 14:24:09 -07:00
|
|
|
auto Result = analyzeIndirectBranch(Instruction, Size, Offset);
|
|
|
|
|
switch (Result) {
|
|
|
|
|
default:
|
|
|
|
|
llvm_unreachable("unexpected result");
|
|
|
|
|
case IndirectBranchType::POSSIBLE_TAIL_CALL:
|
Indirect call promotion optimization.
Summary:
Perform indirect call promotion optimization in BOLT.
The code scans the instructions during CFG creation for all
indirect calls. Right now indirect tail calls are not handled
since the functions are marked not simple. The offsets of the
indirect calls are stored for later use by the ICP pass.
The indirect call promotion pass visits each indirect call and
examines the BranchData for each. If the most frequent targets
from that callsite exceed the specified threshold (default 90%),
the call is promoted. Otherwise, it is ignored. By default,
only one target is considered at each callsite.
When an candiate callsite is processed, we modify the callsite
to test for the most common call targets before calling through
the original generic call mechanism.
The CFG and layout are modified by ICP.
A few new command line options have been added:
-indirect-call-promotion
-indirect-call-promotion-threshold=<percentage>
-indirect-call-promotion-topn=<int>
The threshold is the minimum frequency of a call target needed
before ICP is triggered.
The topn option controls the number of targets to consider for
each callsite, e.g. ICP is triggered if topn=2 and the total
requency of the top two call targets exceeds the threshold.
Example of ICP:
C++ code:
int B_count = 0;
int C_count = 0;
struct A { virtual void foo() = 0; }
struct B : public A { virtual void foo() { ++B_count; }; };
struct C : public A { virtual void foo() { ++C_count; }; };
A* a = ...
a->foo();
...
original:
400863: 49 8b 07 mov (%r15),%rax
400866: 4c 89 ff mov %r15,%rdi
400869: ff 10 callq *(%rax)
40086b: 41 83 e6 01 and $0x1,%r14d
40086f: 4d 89 e6 mov %r12,%r14
400872: 4c 0f 44 f5 cmove %rbp,%r14
400876: 4c 89 f7 mov %r14,%rdi
...
after ICP:
40085e: 49 8b 07 mov (%r15),%rax
400861: 4c 89 ff mov %r15,%rdi
400864: 49 ba e0 0b 40 00 00 movabs $0x400be0,%r10
40086b: 00 00 00
40086e: 4c 3b 10 cmp (%rax),%r10
400871: 75 29 jne 40089c <main+0x9c>
400873: 41 ff d2 callq *%r10
400876: 41 83 e6 01 and $0x1,%r14d
40087a: 4d 89 e6 mov %r12,%r14
40087d: 4c 0f 44 f5 cmove %rbp,%r14
400881: 4c 89 f7 mov %r14,%rdi
...
40089c: ff 10 callq *(%rax)
40089e: eb d6 jmp 400876 <main+0x76>
(cherry picked from FBD3612218)
2016-09-07 18:59:23 -07:00
|
|
|
{
|
|
|
|
|
auto Result = MIA->convertJmpToTailCall(Instruction);
|
2017-05-25 10:29:38 -07:00
|
|
|
(void)Result;
|
Indirect call promotion optimization.
Summary:
Perform indirect call promotion optimization in BOLT.
The code scans the instructions during CFG creation for all
indirect calls. Right now indirect tail calls are not handled
since the functions are marked not simple. The offsets of the
indirect calls are stored for later use by the ICP pass.
The indirect call promotion pass visits each indirect call and
examines the BranchData for each. If the most frequent targets
from that callsite exceed the specified threshold (default 90%),
the call is promoted. Otherwise, it is ignored. By default,
only one target is considered at each callsite.
When an candiate callsite is processed, we modify the callsite
to test for the most common call targets before calling through
the original generic call mechanism.
The CFG and layout are modified by ICP.
A few new command line options have been added:
-indirect-call-promotion
-indirect-call-promotion-threshold=<percentage>
-indirect-call-promotion-topn=<int>
The threshold is the minimum frequency of a call target needed
before ICP is triggered.
The topn option controls the number of targets to consider for
each callsite, e.g. ICP is triggered if topn=2 and the total
requency of the top two call targets exceeds the threshold.
Example of ICP:
C++ code:
int B_count = 0;
int C_count = 0;
struct A { virtual void foo() = 0; }
struct B : public A { virtual void foo() { ++B_count; }; };
struct C : public A { virtual void foo() { ++C_count; }; };
A* a = ...
a->foo();
...
original:
400863: 49 8b 07 mov (%r15),%rax
400866: 4c 89 ff mov %r15,%rdi
400869: ff 10 callq *(%rax)
40086b: 41 83 e6 01 and $0x1,%r14d
40086f: 4d 89 e6 mov %r12,%r14
400872: 4c 0f 44 f5 cmove %rbp,%r14
400876: 4c 89 f7 mov %r14,%rdi
...
after ICP:
40085e: 49 8b 07 mov (%r15),%rax
400861: 4c 89 ff mov %r15,%rdi
400864: 49 ba e0 0b 40 00 00 movabs $0x400be0,%r10
40086b: 00 00 00
40086e: 4c 3b 10 cmp (%rax),%r10
400871: 75 29 jne 40089c <main+0x9c>
400873: 41 ff d2 callq *%r10
400876: 41 83 e6 01 and $0x1,%r14d
40087a: 4d 89 e6 mov %r12,%r14
40087d: 4c 0f 44 f5 cmove %rbp,%r14
400881: 4c 89 f7 mov %r14,%rdi
...
40089c: ff 10 callq *(%rax)
40089e: eb d6 jmp 400876 <main+0x76>
(cherry picked from FBD3612218)
2016-09-07 18:59:23 -07:00
|
|
|
assert(Result);
|
|
|
|
|
if (BranchDataOrErr) {
|
|
|
|
|
MIA->addAnnotation(Ctx.get(), Instruction, "IndirectBranchData",
|
|
|
|
|
Offset);
|
|
|
|
|
}
|
|
|
|
|
}
|
2016-08-22 14:24:09 -07:00
|
|
|
break;
|
2016-09-14 16:45:40 -07:00
|
|
|
case IndirectBranchType::POSSIBLE_JUMP_TABLE:
|
2016-09-27 19:09:38 -07:00
|
|
|
case IndirectBranchType::POSSIBLE_PIC_JUMP_TABLE:
|
2016-09-16 15:54:32 -07:00
|
|
|
if (opts::JumpTables == JTS_NONE)
|
2016-09-14 16:45:40 -07:00
|
|
|
IsSimple = false;
|
2017-03-03 11:35:41 -08:00
|
|
|
MaybeEdgeCountData = true;
|
2016-08-22 14:24:09 -07:00
|
|
|
break;
|
|
|
|
|
case IndirectBranchType::UNKNOWN:
|
|
|
|
|
// Keep processing. We'll do more checks and fixes in
|
|
|
|
|
// postProcessIndirectBranches().
|
2017-03-03 11:35:41 -08:00
|
|
|
MaybeEdgeCountData = true;
|
Indirect call promotion optimization.
Summary:
Perform indirect call promotion optimization in BOLT.
The code scans the instructions during CFG creation for all
indirect calls. Right now indirect tail calls are not handled
since the functions are marked not simple. The offsets of the
indirect calls are stored for later use by the ICP pass.
The indirect call promotion pass visits each indirect call and
examines the BranchData for each. If the most frequent targets
from that callsite exceed the specified threshold (default 90%),
the call is promoted. Otherwise, it is ignored. By default,
only one target is considered at each callsite.
When an candiate callsite is processed, we modify the callsite
to test for the most common call targets before calling through
the original generic call mechanism.
The CFG and layout are modified by ICP.
A few new command line options have been added:
-indirect-call-promotion
-indirect-call-promotion-threshold=<percentage>
-indirect-call-promotion-topn=<int>
The threshold is the minimum frequency of a call target needed
before ICP is triggered.
The topn option controls the number of targets to consider for
each callsite, e.g. ICP is triggered if topn=2 and the total
requency of the top two call targets exceeds the threshold.
Example of ICP:
C++ code:
int B_count = 0;
int C_count = 0;
struct A { virtual void foo() = 0; }
struct B : public A { virtual void foo() { ++B_count; }; };
struct C : public A { virtual void foo() { ++C_count; }; };
A* a = ...
a->foo();
...
original:
400863: 49 8b 07 mov (%r15),%rax
400866: 4c 89 ff mov %r15,%rdi
400869: ff 10 callq *(%rax)
40086b: 41 83 e6 01 and $0x1,%r14d
40086f: 4d 89 e6 mov %r12,%r14
400872: 4c 0f 44 f5 cmove %rbp,%r14
400876: 4c 89 f7 mov %r14,%rdi
...
after ICP:
40085e: 49 8b 07 mov (%r15),%rax
400861: 4c 89 ff mov %r15,%rdi
400864: 49 ba e0 0b 40 00 00 movabs $0x400be0,%r10
40086b: 00 00 00
40086e: 4c 3b 10 cmp (%rax),%r10
400871: 75 29 jne 40089c <main+0x9c>
400873: 41 ff d2 callq *%r10
400876: 41 83 e6 01 and $0x1,%r14d
40087a: 4d 89 e6 mov %r12,%r14
40087d: 4c 0f 44 f5 cmove %rbp,%r14
400881: 4c 89 f7 mov %r14,%rdi
...
40089c: ff 10 callq *(%rax)
40089e: eb d6 jmp 400876 <main+0x76>
(cherry picked from FBD3612218)
2016-09-07 18:59:23 -07:00
|
|
|
if (BranchDataOrErr) {
|
|
|
|
|
MIA->addAnnotation(Ctx.get(),
|
|
|
|
|
Instruction,
|
|
|
|
|
"MaybeIndirectBranchData",
|
|
|
|
|
Offset);
|
|
|
|
|
}
|
2016-08-22 14:24:09 -07:00
|
|
|
break;
|
|
|
|
|
};
|
Indirect call promotion optimization.
Summary:
Perform indirect call promotion optimization in BOLT.
The code scans the instructions during CFG creation for all
indirect calls. Right now indirect tail calls are not handled
since the functions are marked not simple. The offsets of the
indirect calls are stored for later use by the ICP pass.
The indirect call promotion pass visits each indirect call and
examines the BranchData for each. If the most frequent targets
from that callsite exceed the specified threshold (default 90%),
the call is promoted. Otherwise, it is ignored. By default,
only one target is considered at each callsite.
When an candiate callsite is processed, we modify the callsite
to test for the most common call targets before calling through
the original generic call mechanism.
The CFG and layout are modified by ICP.
A few new command line options have been added:
-indirect-call-promotion
-indirect-call-promotion-threshold=<percentage>
-indirect-call-promotion-topn=<int>
The threshold is the minimum frequency of a call target needed
before ICP is triggered.
The topn option controls the number of targets to consider for
each callsite, e.g. ICP is triggered if topn=2 and the total
requency of the top two call targets exceeds the threshold.
Example of ICP:
C++ code:
int B_count = 0;
int C_count = 0;
struct A { virtual void foo() = 0; }
struct B : public A { virtual void foo() { ++B_count; }; };
struct C : public A { virtual void foo() { ++C_count; }; };
A* a = ...
a->foo();
...
original:
400863: 49 8b 07 mov (%r15),%rax
400866: 4c 89 ff mov %r15,%rdi
400869: ff 10 callq *(%rax)
40086b: 41 83 e6 01 and $0x1,%r14d
40086f: 4d 89 e6 mov %r12,%r14
400872: 4c 0f 44 f5 cmove %rbp,%r14
400876: 4c 89 f7 mov %r14,%rdi
...
after ICP:
40085e: 49 8b 07 mov (%r15),%rax
400861: 4c 89 ff mov %r15,%rdi
400864: 49 ba e0 0b 40 00 00 movabs $0x400be0,%r10
40086b: 00 00 00
40086e: 4c 3b 10 cmp (%rax),%r10
400871: 75 29 jne 40089c <main+0x9c>
400873: 41 ff d2 callq *%r10
400876: 41 83 e6 01 and $0x1,%r14d
40087a: 4d 89 e6 mov %r12,%r14
40087d: 4c 0f 44 f5 cmove %rbp,%r14
400881: 4c 89 f7 mov %r14,%rdi
...
40089c: ff 10 callq *(%rax)
40089e: eb d6 jmp 400876 <main+0x76>
(cherry picked from FBD3612218)
2016-09-07 18:59:23 -07:00
|
|
|
} else if (MIA->isCall(Instruction)) {
|
|
|
|
|
if (BranchDataOrErr) {
|
|
|
|
|
MIA->addAnnotation(Ctx.get(), Instruction, "IndirectBranchData",
|
|
|
|
|
Offset);
|
|
|
|
|
}
|
2015-11-02 09:46:50 -08:00
|
|
|
}
|
2017-03-03 11:35:41 -08:00
|
|
|
if (BranchDataOrErr) {
|
|
|
|
|
const char* AttrName =
|
|
|
|
|
MaybeEdgeCountData ? "MaybeEdgeCountData" : "EdgeCountData";
|
|
|
|
|
MIA->addAnnotation(Ctx.get(), Instruction, AttrName, Offset);
|
|
|
|
|
}
|
2015-11-02 09:46:50 -08:00
|
|
|
// Indirect call. We only need to fix it if the operand is RIP-relative
|
2016-08-22 14:24:09 -07:00
|
|
|
if (IsSimple && MIA->hasRIPOperand(Instruction)) {
|
2015-11-02 09:46:50 -08:00
|
|
|
if (!handleRIPOperand(Instruction, AbsoluteInstrAddr, Size)) {
|
2016-09-27 19:09:38 -07:00
|
|
|
errs() << "BOLT-ERROR: cannot handle RIP operand at 0x"
|
|
|
|
|
<< Twine::utohexstr(AbsoluteInstrAddr)
|
|
|
|
|
<< ". Skipping function " << *this << ".\n";
|
2015-11-02 09:46:50 -08:00
|
|
|
IsSimple = false;
|
|
|
|
|
}
|
|
|
|
|
}
|
2015-10-09 17:21:14 -07:00
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
if (MIA->hasRIPOperand(Instruction)) {
|
2015-11-02 09:46:50 -08:00
|
|
|
if (!handleRIPOperand(Instruction, AbsoluteInstrAddr, Size)) {
|
2016-09-27 19:09:38 -07:00
|
|
|
errs() << "BOLT-ERROR: cannot handle RIP operand at 0x"
|
|
|
|
|
<< Twine::utohexstr(AbsoluteInstrAddr)
|
|
|
|
|
<< ". Skipping function " << *this << ".\n";
|
2015-10-09 21:47:18 -07:00
|
|
|
IsSimple = false;
|
|
|
|
|
}
|
2015-10-09 17:21:14 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2017-03-03 11:35:41 -08:00
|
|
|
add_instruction:
|
2016-05-27 20:19:19 -07:00
|
|
|
if (ULT.first && ULT.second) {
|
2016-02-25 16:57:07 -08:00
|
|
|
Instruction.setLoc(
|
2016-05-27 20:19:19 -07:00
|
|
|
findDebugLineInformationForInstructionAt(AbsoluteInstrAddr, ULT));
|
2016-02-25 16:57:07 -08:00
|
|
|
}
|
|
|
|
|
|
2015-10-09 17:21:14 -07:00
|
|
|
addInstruction(Offset, std::move(Instruction));
|
|
|
|
|
}
|
|
|
|
|
|
2016-09-27 19:09:38 -07:00
|
|
|
postProcessJumpTables();
|
|
|
|
|
|
|
|
|
|
// Update state.
|
|
|
|
|
updateState(State::Disassembled);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void BinaryFunction::postProcessJumpTables() {
|
|
|
|
|
// Create labels for all entries.
|
|
|
|
|
for (auto &JTI : JumpTables) {
|
|
|
|
|
auto &JT = JTI.second;
|
|
|
|
|
for (auto Offset : JT.OffsetEntries) {
|
|
|
|
|
auto *Label = getOrCreateLocalLabel(getAddress() + Offset,
|
|
|
|
|
/*CreatePastEnd*/ true);
|
|
|
|
|
JT.Entries.push_back(Label);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Add TakenBranches from JumpTables.
|
2016-09-16 15:54:32 -07:00
|
|
|
//
|
2016-09-27 19:09:38 -07:00
|
|
|
// We want to do it after initial processing since we don't know jump tables'
|
2016-09-16 15:54:32 -07:00
|
|
|
// boundaries until we process them all.
|
|
|
|
|
for (auto &JTSite : JTSites) {
|
2016-09-27 19:09:38 -07:00
|
|
|
const auto JTSiteOffset = JTSite.first;
|
|
|
|
|
const auto JTAddress = JTSite.second;
|
|
|
|
|
const auto *JT = getJumpTableContainingAddress(JTAddress);
|
2016-09-16 15:54:32 -07:00
|
|
|
assert(JT && "cannot find jump table for address");
|
2016-09-27 19:09:38 -07:00
|
|
|
auto EntryOffset = JTAddress - JT->Address;
|
|
|
|
|
while (EntryOffset < JT->getSize()) {
|
|
|
|
|
auto TargetOffset = JT->OffsetEntries[EntryOffset / JT->EntrySize];
|
2016-09-16 15:54:32 -07:00
|
|
|
if (TargetOffset < getSize())
|
|
|
|
|
TakenBranches.emplace_back(JTSiteOffset, TargetOffset);
|
2016-09-27 19:09:38 -07:00
|
|
|
|
2017-02-21 16:15:15 -08:00
|
|
|
// Take ownership of jump table relocations.
|
2017-01-17 15:49:59 -08:00
|
|
|
if (opts::Relocs)
|
2017-02-21 16:15:15 -08:00
|
|
|
BC.removeRelocationAt(JT->Address + EntryOffset);
|
2016-09-27 19:09:38 -07:00
|
|
|
|
2016-09-27 19:09:38 -07:00
|
|
|
EntryOffset += JT->EntrySize;
|
2016-09-27 19:09:38 -07:00
|
|
|
|
2016-09-16 15:54:32 -07:00
|
|
|
// A label at the next entry means the end of this jump table.
|
2016-09-27 19:09:38 -07:00
|
|
|
if (JT->Labels.count(EntryOffset))
|
2016-09-16 15:54:32 -07:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Free memory used by jump table offsets.
|
|
|
|
|
for (auto &JTI : JumpTables) {
|
|
|
|
|
auto &JT = JTI.second;
|
|
|
|
|
clearList(JT.OffsetEntries);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Remove duplicates branches. We can get a bunch of them from jump tables.
|
|
|
|
|
// Without doing jump table value profiling we don't have use for extra
|
|
|
|
|
// (duplicate) branches.
|
|
|
|
|
std::sort(TakenBranches.begin(), TakenBranches.end());
|
|
|
|
|
auto NewEnd = std::unique(TakenBranches.begin(), TakenBranches.end());
|
|
|
|
|
TakenBranches.erase(NewEnd, TakenBranches.end());
|
2015-10-09 17:21:14 -07:00
|
|
|
}
|
|
|
|
|
|
2016-08-22 14:24:09 -07:00
|
|
|
bool BinaryFunction::postProcessIndirectBranches() {
|
Indirect call promotion optimization.
Summary:
Perform indirect call promotion optimization in BOLT.
The code scans the instructions during CFG creation for all
indirect calls. Right now indirect tail calls are not handled
since the functions are marked not simple. The offsets of the
indirect calls are stored for later use by the ICP pass.
The indirect call promotion pass visits each indirect call and
examines the BranchData for each. If the most frequent targets
from that callsite exceed the specified threshold (default 90%),
the call is promoted. Otherwise, it is ignored. By default,
only one target is considered at each callsite.
When an candiate callsite is processed, we modify the callsite
to test for the most common call targets before calling through
the original generic call mechanism.
The CFG and layout are modified by ICP.
A few new command line options have been added:
-indirect-call-promotion
-indirect-call-promotion-threshold=<percentage>
-indirect-call-promotion-topn=<int>
The threshold is the minimum frequency of a call target needed
before ICP is triggered.
The topn option controls the number of targets to consider for
each callsite, e.g. ICP is triggered if topn=2 and the total
requency of the top two call targets exceeds the threshold.
Example of ICP:
C++ code:
int B_count = 0;
int C_count = 0;
struct A { virtual void foo() = 0; }
struct B : public A { virtual void foo() { ++B_count; }; };
struct C : public A { virtual void foo() { ++C_count; }; };
A* a = ...
a->foo();
...
original:
400863: 49 8b 07 mov (%r15),%rax
400866: 4c 89 ff mov %r15,%rdi
400869: ff 10 callq *(%rax)
40086b: 41 83 e6 01 and $0x1,%r14d
40086f: 4d 89 e6 mov %r12,%r14
400872: 4c 0f 44 f5 cmove %rbp,%r14
400876: 4c 89 f7 mov %r14,%rdi
...
after ICP:
40085e: 49 8b 07 mov (%r15),%rax
400861: 4c 89 ff mov %r15,%rdi
400864: 49 ba e0 0b 40 00 00 movabs $0x400be0,%r10
40086b: 00 00 00
40086e: 4c 3b 10 cmp (%rax),%r10
400871: 75 29 jne 40089c <main+0x9c>
400873: 41 ff d2 callq *%r10
400876: 41 83 e6 01 and $0x1,%r14d
40087a: 4d 89 e6 mov %r12,%r14
40087d: 4c 0f 44 f5 cmove %rbp,%r14
400881: 4c 89 f7 mov %r14,%rdi
...
40089c: ff 10 callq *(%rax)
40089e: eb d6 jmp 400876 <main+0x76>
(cherry picked from FBD3612218)
2016-09-07 18:59:23 -07:00
|
|
|
auto BranchDataOrErr = BC.DR.getFuncBranchData(getNames());
|
|
|
|
|
|
2016-08-22 14:24:09 -07:00
|
|
|
for (auto *BB : layout()) {
|
|
|
|
|
for (auto &Instr : *BB) {
|
|
|
|
|
if (!BC.MIA->isIndirectBranch(Instr))
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
// If there's an indirect branch in a single-block function -
|
|
|
|
|
// it must be a tail call.
|
|
|
|
|
if (layout_size() == 1) {
|
|
|
|
|
BC.MIA->convertJmpToTailCall(Instr);
|
2017-03-03 11:35:41 -08:00
|
|
|
BC.MIA->renameAnnotation(Instr,
|
|
|
|
|
"MaybeEdgeCountData",
|
|
|
|
|
"EdgeCountData");
|
|
|
|
|
BC.MIA->renameAnnotation(Instr,
|
|
|
|
|
"MaybeIndirectBranchData",
|
|
|
|
|
"IndirectBranchData");
|
2016-08-22 14:24:09 -07:00
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2016-09-27 19:09:38 -07:00
|
|
|
// Validate the tail call or jump table assumptions.
|
2016-09-16 15:54:32 -07:00
|
|
|
if (BC.MIA->isTailCall(Instr) || BC.MIA->getJumpTable(Instr)) {
|
2016-09-14 16:45:40 -07:00
|
|
|
if (BC.MIA->getMemoryOperandNo(Instr) != -1) {
|
|
|
|
|
// We have validated memory contents addressed by the jump
|
|
|
|
|
// instruction already.
|
2016-08-22 14:24:09 -07:00
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
// This is jump on register. Just make sure the register is defined
|
|
|
|
|
// in the containing basic block. Other assumptions were checked
|
|
|
|
|
// earlier.
|
|
|
|
|
assert(Instr.getOperand(0).isReg() && "register operand expected");
|
2016-09-27 19:09:38 -07:00
|
|
|
const auto R1 = Instr.getOperand(0).getReg();
|
|
|
|
|
auto PrevInstr = BB->rbegin();
|
|
|
|
|
while (PrevInstr != BB->rend()) {
|
|
|
|
|
const auto &PrevInstrDesc = BC.MII->get(PrevInstr->getOpcode());
|
|
|
|
|
if (PrevInstrDesc.hasDefOfPhysReg(*PrevInstr, R1, *BC.MRI)) {
|
2016-08-22 14:24:09 -07:00
|
|
|
break;
|
|
|
|
|
}
|
2016-09-27 19:09:38 -07:00
|
|
|
++PrevInstr;
|
2016-08-22 14:24:09 -07:00
|
|
|
}
|
2016-09-27 19:09:38 -07:00
|
|
|
if (PrevInstr == BB->rend()) {
|
|
|
|
|
if (opts::Verbosity >= 2) {
|
|
|
|
|
outs() << "BOLT-INFO: rejected potential "
|
|
|
|
|
<< (BC.MIA->isTailCall(Instr) ? "indirect tail call"
|
|
|
|
|
: "jump table")
|
|
|
|
|
<< " in function " << *this
|
|
|
|
|
<< " because the jump-on register was not defined in "
|
|
|
|
|
<< " basic block " << BB->getName() << ".\n";
|
|
|
|
|
DEBUG(dbgs() << BC.printInstructions(dbgs(), BB->begin(), BB->end(),
|
|
|
|
|
BB->getOffset(), this, true));
|
|
|
|
|
}
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
// In case of PIC jump table we need to do more checks.
|
|
|
|
|
if (BC.MIA->isMoveMem2Reg(*PrevInstr))
|
2016-08-22 14:24:09 -07:00
|
|
|
continue;
|
2016-09-27 19:09:38 -07:00
|
|
|
assert(BC.MIA->isADD64rr(*PrevInstr) && "add instruction expected");
|
|
|
|
|
auto R2 = PrevInstr->getOperand(2).getReg();
|
|
|
|
|
// Make sure both regs are set in the same basic block prior to ADD.
|
|
|
|
|
bool IsR1Set = false;
|
|
|
|
|
bool IsR2Set = false;
|
|
|
|
|
while ((++PrevInstr != BB->rend()) && !(IsR1Set && IsR2Set)) {
|
|
|
|
|
const auto &PrevInstrDesc = BC.MII->get(PrevInstr->getOpcode());
|
|
|
|
|
if (PrevInstrDesc.hasDefOfPhysReg(*PrevInstr, R1, *BC.MRI))
|
|
|
|
|
IsR1Set = true;
|
|
|
|
|
else if (PrevInstrDesc.hasDefOfPhysReg(*PrevInstr, R2, *BC.MRI))
|
|
|
|
|
IsR2Set = true;
|
2016-09-14 16:45:40 -07:00
|
|
|
}
|
2016-09-27 19:09:38 -07:00
|
|
|
|
|
|
|
|
if (!IsR1Set || !IsR2Set)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
continue;
|
2016-08-22 14:24:09 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// If this block contains an epilogue code and has an indirect branch,
|
|
|
|
|
// then most likely it's a tail call. Otherwise, we cannot tell for sure
|
|
|
|
|
// what it is and conservatively reject the function's CFG.
|
|
|
|
|
bool IsEpilogue = false;
|
|
|
|
|
for (const auto &Instr : *BB) {
|
|
|
|
|
if (BC.MIA->isLeave(Instr) || BC.MIA->isPop(Instr)) {
|
|
|
|
|
IsEpilogue = true;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (!IsEpilogue) {
|
2016-09-14 16:45:40 -07:00
|
|
|
if (opts::Verbosity >= 2) {
|
|
|
|
|
outs() << "BOLT-INFO: rejected potential indirect tail call in "
|
|
|
|
|
<< "function " << *this << " in basic block "
|
|
|
|
|
<< BB->getName() << ".\n";
|
|
|
|
|
DEBUG(BC.printInstructions(dbgs(), BB->begin(), BB->end(),
|
2016-09-27 19:09:38 -07:00
|
|
|
BB->getOffset(), this, true));
|
2016-09-14 16:45:40 -07:00
|
|
|
}
|
2016-08-22 14:24:09 -07:00
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
BC.MIA->convertJmpToTailCall(Instr);
|
2017-03-03 11:35:41 -08:00
|
|
|
BC.MIA->renameAnnotation(Instr,
|
|
|
|
|
"MaybeEdgeCountData",
|
|
|
|
|
"EdgeCountData");
|
|
|
|
|
BC.MIA->renameAnnotation(Instr,
|
|
|
|
|
"MaybeIndirectBranchData",
|
|
|
|
|
"IndirectBranchData");
|
2016-08-22 14:24:09 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2016-07-23 12:50:34 -07:00
|
|
|
void BinaryFunction::clearLandingPads(const unsigned StartIndex,
|
|
|
|
|
const unsigned NumBlocks) {
|
|
|
|
|
// remove all landing pads/throws for the given collection of blocks
|
|
|
|
|
for (auto I = StartIndex; I < StartIndex + NumBlocks; ++I) {
|
2016-09-13 17:12:00 -07:00
|
|
|
BasicBlocks[I]->clearLandingPads();
|
2016-07-23 12:50:34 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void BinaryFunction::addLandingPads(const unsigned StartIndex,
|
|
|
|
|
const unsigned NumBlocks) {
|
2016-07-29 14:17:06 -07:00
|
|
|
for (auto *BB : BasicBlocks) {
|
2016-07-23 12:50:34 -07:00
|
|
|
if (LandingPads.find(BB->getLabel()) != LandingPads.end()) {
|
2016-08-07 12:35:23 -07:00
|
|
|
const MCSymbol *LP = BB->getLabel();
|
2016-07-29 14:17:06 -07:00
|
|
|
for (unsigned I : LPToBBIndex[LP]) {
|
2016-07-23 12:50:34 -07:00
|
|
|
assert(I < BasicBlocks.size());
|
|
|
|
|
BinaryBasicBlock *ThrowBB = BasicBlocks[I];
|
2016-07-29 14:17:06 -07:00
|
|
|
const unsigned ThrowBBIndex = getIndex(ThrowBB);
|
|
|
|
|
if (ThrowBBIndex >= StartIndex && ThrowBBIndex < StartIndex + NumBlocks)
|
|
|
|
|
ThrowBB->addLandingPad(BB);
|
2016-07-23 12:50:34 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void BinaryFunction::recomputeLandingPads(const unsigned StartIndex,
|
|
|
|
|
const unsigned NumBlocks) {
|
|
|
|
|
assert(LPToBBIndex.empty());
|
|
|
|
|
|
|
|
|
|
clearLandingPads(StartIndex, NumBlocks);
|
2016-07-13 18:57:40 -07:00
|
|
|
|
2016-07-23 12:50:34 -07:00
|
|
|
for (auto I = StartIndex; I < StartIndex + NumBlocks; ++I) {
|
|
|
|
|
auto *BB = BasicBlocks[I];
|
2016-09-13 17:12:00 -07:00
|
|
|
for (auto &Instr : BB->instructions()) {
|
2016-07-23 12:50:34 -07:00
|
|
|
// Store info about associated landing pad.
|
|
|
|
|
if (BC.MIA->isInvoke(Instr)) {
|
|
|
|
|
const MCSymbol *LP;
|
|
|
|
|
uint64_t Action;
|
|
|
|
|
std::tie(LP, Action) = BC.MIA->getEHInfo(Instr);
|
|
|
|
|
if (LP) {
|
2016-09-13 17:12:00 -07:00
|
|
|
LPToBBIndex[LP].push_back(getIndex(BB));
|
2016-07-23 12:50:34 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
addLandingPads(StartIndex, NumBlocks);
|
|
|
|
|
|
|
|
|
|
clearList(LPToBBIndex);
|
|
|
|
|
}
|
2015-10-09 17:21:14 -07:00
|
|
|
|
2016-07-23 12:50:34 -07:00
|
|
|
bool BinaryFunction::buildCFG() {
|
2015-10-09 17:21:14 -07:00
|
|
|
auto &MIA = BC.MIA;
|
|
|
|
|
|
2016-06-10 17:13:05 -07:00
|
|
|
auto BranchDataOrErr = BC.DR.getFuncBranchData(getNames());
|
2016-06-15 18:36:16 -07:00
|
|
|
if (!BranchDataOrErr) {
|
2016-08-07 12:35:23 -07:00
|
|
|
DEBUG(dbgs() << "no branch data found for \"" << *this << "\"\n");
|
2015-10-12 12:30:47 -07:00
|
|
|
} else {
|
2016-06-15 18:36:16 -07:00
|
|
|
ExecutionCount = BranchDataOrErr->ExecutionCount;
|
2015-10-12 12:30:47 -07:00
|
|
|
}
|
|
|
|
|
|
2016-09-27 19:09:38 -07:00
|
|
|
if (!isSimple()) {
|
|
|
|
|
assert(!opts::Relocs &&
|
|
|
|
|
"cannot process file with non-simple function in relocs mode");
|
2015-10-09 17:21:14 -07:00
|
|
|
return false;
|
2016-09-27 19:09:38 -07:00
|
|
|
}
|
2015-10-09 17:21:14 -07:00
|
|
|
|
|
|
|
|
if (!(CurrentState == State::Disassembled))
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
assert(BasicBlocks.empty() && "basic block list should be empty");
|
|
|
|
|
assert((Labels.find(0) != Labels.end()) &&
|
|
|
|
|
"first instruction should always have a label");
|
|
|
|
|
|
|
|
|
|
// Create basic blocks in the original layout order:
|
|
|
|
|
//
|
|
|
|
|
// * Every instruction with associated label marks
|
|
|
|
|
// the beginning of a basic block.
|
|
|
|
|
// * Conditional instruction marks the end of a basic block,
|
|
|
|
|
// except when the following instruction is an
|
|
|
|
|
// unconditional branch, and the unconditional branch is not
|
|
|
|
|
// a destination of another branch. In the latter case, the
|
|
|
|
|
// basic block will consist of a single unconditional branch
|
|
|
|
|
// (missed optimization opportunity?).
|
|
|
|
|
//
|
|
|
|
|
// Created basic blocks are sorted in layout order since they are
|
|
|
|
|
// created in the same order as instructions, and instructions are
|
|
|
|
|
// sorted by offsets.
|
|
|
|
|
BinaryBasicBlock *InsertBB{nullptr};
|
|
|
|
|
BinaryBasicBlock *PrevBB{nullptr};
|
2016-07-13 18:57:40 -07:00
|
|
|
bool IsLastInstrNop{false};
|
|
|
|
|
bool IsPreviousInstrTailCall{false};
|
|
|
|
|
const MCInst *PrevInstr{nullptr};
|
2015-11-08 12:23:54 -08:00
|
|
|
|
|
|
|
|
auto addCFIPlaceholders =
|
|
|
|
|
[this](uint64_t CFIOffset, BinaryBasicBlock *InsertBB) {
|
|
|
|
|
for (auto FI = OffsetToCFI.lower_bound(CFIOffset),
|
|
|
|
|
FE = OffsetToCFI.upper_bound(CFIOffset);
|
|
|
|
|
FI != FE; ++FI) {
|
|
|
|
|
addCFIPseudo(InsertBB, InsertBB->end(), FI->second);
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
for (auto I = Instructions.begin(), E = Instructions.end(); I != E; ++I) {
|
2016-07-13 18:57:40 -07:00
|
|
|
const uint32_t Offset = I->first;
|
|
|
|
|
const auto &Instr = I->second;
|
|
|
|
|
|
|
|
|
|
auto LI = Labels.find(Offset);
|
2015-10-09 17:21:14 -07:00
|
|
|
if (LI != Labels.end()) {
|
|
|
|
|
// Always create new BB at branch destination.
|
|
|
|
|
PrevBB = InsertBB;
|
2015-10-20 10:51:17 -07:00
|
|
|
InsertBB = addBasicBlock(LI->first, LI->second,
|
|
|
|
|
/* DeriveAlignment = */ IsLastInstrNop);
|
2016-09-29 11:19:06 -07:00
|
|
|
if (hasEntryPointAtOffset(Offset))
|
|
|
|
|
InsertBB->setEntryPoint();
|
2015-10-09 17:21:14 -07:00
|
|
|
}
|
2016-01-19 00:20:06 -08:00
|
|
|
// Ignore nops. We use nops to derive alignment of the next basic block.
|
|
|
|
|
// It will not always work, as some blocks are naturally aligned, but
|
|
|
|
|
// it's just part of heuristic for block alignment.
|
2016-07-13 18:57:40 -07:00
|
|
|
if (MIA->isNoop(Instr)) {
|
2016-01-19 00:20:06 -08:00
|
|
|
IsLastInstrNop = true;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
2015-10-09 17:21:14 -07:00
|
|
|
if (!InsertBB) {
|
2015-10-20 10:17:38 -07:00
|
|
|
// It must be a fallthrough or unreachable code. Create a new block unless
|
|
|
|
|
// we see an unconditional branch following a conditional one.
|
2015-10-09 17:21:14 -07:00
|
|
|
assert(PrevBB && "no previous basic block for a fall through");
|
2015-10-20 10:17:38 -07:00
|
|
|
assert(PrevInstr && "no previous instruction for a fall through");
|
2016-07-13 18:57:40 -07:00
|
|
|
if (MIA->isUnconditionalBranch(Instr) &&
|
|
|
|
|
!MIA->isUnconditionalBranch(*PrevInstr) && !IsPreviousInstrTailCall) {
|
2015-10-09 17:21:14 -07:00
|
|
|
// Temporarily restore inserter basic block.
|
|
|
|
|
InsertBB = PrevBB;
|
|
|
|
|
} else {
|
2016-07-13 18:57:40 -07:00
|
|
|
InsertBB = addBasicBlock(Offset,
|
2015-10-20 10:51:17 -07:00
|
|
|
BC.Ctx->createTempSymbol("FT", true),
|
|
|
|
|
/* DeriveAlignment = */ IsLastInstrNop);
|
2015-10-09 17:21:14 -07:00
|
|
|
}
|
|
|
|
|
}
|
2016-07-13 18:57:40 -07:00
|
|
|
if (Offset == 0) {
|
2015-11-08 12:23:54 -08:00
|
|
|
// Add associated CFI pseudos in the first offset (0)
|
|
|
|
|
addCFIPlaceholders(0, InsertBB);
|
|
|
|
|
}
|
2015-10-20 10:51:17 -07:00
|
|
|
|
|
|
|
|
IsLastInstrNop = false;
|
2016-07-13 18:57:40 -07:00
|
|
|
uint32_t InsertIndex = InsertBB->addInstruction(Instr);
|
|
|
|
|
PrevInstr = &Instr;
|
|
|
|
|
|
|
|
|
|
// Record whether this basic block is terminated with a tail call.
|
|
|
|
|
auto TCI = TailCallOffsets.find(Offset);
|
|
|
|
|
if (TCI != TailCallOffsets.end()) {
|
|
|
|
|
uint64_t TargetAddr = TCI->second;
|
|
|
|
|
TailCallTerminatedBlocks.emplace(
|
|
|
|
|
std::make_pair(InsertBB,
|
|
|
|
|
TailCallInfo(Offset, InsertIndex, TargetAddr)));
|
|
|
|
|
IsPreviousInstrTailCall = true;
|
|
|
|
|
} else {
|
|
|
|
|
IsPreviousInstrTailCall = false;
|
|
|
|
|
}
|
|
|
|
|
|
2015-11-08 12:23:54 -08:00
|
|
|
// Add associated CFI instrs. We always add the CFI instruction that is
|
|
|
|
|
// located immediately after this instruction, since the next CFI
|
|
|
|
|
// instruction reflects the change in state caused by this instruction.
|
2016-08-29 21:11:22 -07:00
|
|
|
auto NextInstr = std::next(I);
|
2015-11-08 12:23:54 -08:00
|
|
|
uint64_t CFIOffset;
|
|
|
|
|
if (NextInstr != E)
|
|
|
|
|
CFIOffset = NextInstr->first;
|
|
|
|
|
else
|
|
|
|
|
CFIOffset = getSize();
|
|
|
|
|
addCFIPlaceholders(CFIOffset, InsertBB);
|
2015-10-09 17:21:14 -07:00
|
|
|
|
2016-05-24 09:26:25 -07:00
|
|
|
// Store info about associated landing pad.
|
2016-07-13 18:57:40 -07:00
|
|
|
if (MIA->isInvoke(Instr)) {
|
2016-05-24 09:26:25 -07:00
|
|
|
const MCSymbol *LP;
|
|
|
|
|
uint64_t Action;
|
2016-07-13 18:57:40 -07:00
|
|
|
std::tie(LP, Action) = MIA->getEHInfo(Instr);
|
2016-05-24 09:26:25 -07:00
|
|
|
if (LP) {
|
|
|
|
|
LPToBBIndex[LP].push_back(getIndex(InsertBB));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2015-10-09 17:21:14 -07:00
|
|
|
// How well do we detect tail calls here?
|
2016-07-13 18:57:40 -07:00
|
|
|
if (MIA->isTerminator(Instr)) {
|
2015-10-09 17:21:14 -07:00
|
|
|
PrevBB = InsertBB;
|
|
|
|
|
InsertBB = nullptr;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2017-03-12 11:30:05 -07:00
|
|
|
if (BasicBlocks.empty()) {
|
|
|
|
|
setSimple(false);
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
2015-10-09 17:21:14 -07:00
|
|
|
// Intermediate dump.
|
2015-10-23 15:52:59 -07:00
|
|
|
DEBUG(print(dbgs(), "after creating basic blocks"));
|
2015-10-09 17:21:14 -07:00
|
|
|
|
|
|
|
|
// TODO: handle properly calls to no-return functions,
|
|
|
|
|
// e.g. exit(3), etc. Otherwise we'll see a false fall-through
|
|
|
|
|
// blocks.
|
|
|
|
|
|
2016-06-15 18:36:16 -07:00
|
|
|
// Make sure we can use profile data for this function.
|
|
|
|
|
if (BranchDataOrErr)
|
|
|
|
|
evaluateProfileData(BranchDataOrErr.get());
|
|
|
|
|
|
|
|
|
|
for (auto &Branch : TakenBranches) {
|
2015-10-09 17:21:14 -07:00
|
|
|
DEBUG(dbgs() << "registering branch [0x" << Twine::utohexstr(Branch.first)
|
|
|
|
|
<< "] -> [0x" << Twine::utohexstr(Branch.second) << "]\n");
|
2016-09-07 18:59:23 -07:00
|
|
|
auto *FromBB = getBasicBlockContainingOffset(Branch.first);
|
2015-10-09 17:21:14 -07:00
|
|
|
assert(FromBB && "cannot find BB containing FROM branch");
|
2016-09-07 18:59:23 -07:00
|
|
|
auto *ToBB = getBasicBlockAtOffset(Branch.second);
|
2015-10-09 17:21:14 -07:00
|
|
|
assert(ToBB && "cannot find BB containing TO branch");
|
|
|
|
|
|
2015-10-26 15:00:56 -07:00
|
|
|
if (BranchDataOrErr.getError()) {
|
2015-10-12 12:30:47 -07:00
|
|
|
FromBB->addSuccessor(ToBB);
|
|
|
|
|
} else {
|
|
|
|
|
const FuncBranchData &BranchData = BranchDataOrErr.get();
|
|
|
|
|
auto BranchInfoOrErr = BranchData.getBranch(Branch.first, Branch.second);
|
2015-10-26 15:00:56 -07:00
|
|
|
if (BranchInfoOrErr.getError()) {
|
2015-10-12 12:30:47 -07:00
|
|
|
FromBB->addSuccessor(ToBB);
|
|
|
|
|
} else {
|
|
|
|
|
const BranchInfo &BInfo = BranchInfoOrErr.get();
|
|
|
|
|
FromBB->addSuccessor(ToBB, BInfo.Branches, BInfo.Mispreds);
|
2016-09-16 15:54:32 -07:00
|
|
|
// Populate profile counts for the jump table.
|
2017-02-23 18:09:10 -08:00
|
|
|
auto *LastInstr = FromBB->getLastNonPseudoInstr();
|
2016-09-16 15:54:32 -07:00
|
|
|
if (!LastInstr)
|
|
|
|
|
continue;
|
|
|
|
|
auto JTAddress = BC.MIA->getJumpTable(*LastInstr);
|
|
|
|
|
if (!JTAddress)
|
|
|
|
|
continue;
|
|
|
|
|
auto *JT = getJumpTableContainingAddress(JTAddress);
|
|
|
|
|
if (!JT)
|
|
|
|
|
continue;
|
|
|
|
|
JT->Count += BInfo.Branches;
|
2017-03-08 19:58:33 -08:00
|
|
|
if (opts::IndirectCallPromotion < ICP_JUMP_TABLES &&
|
|
|
|
|
opts::JumpTables < JTS_AGGRESSIVE)
|
2016-09-16 15:54:32 -07:00
|
|
|
continue;
|
|
|
|
|
if (JT->Counts.empty())
|
|
|
|
|
JT->Counts.resize(JT->Entries.size());
|
|
|
|
|
auto EI = JT->Entries.begin();
|
|
|
|
|
auto Delta = (JTAddress - JT->Address) / JT->EntrySize;
|
|
|
|
|
EI += Delta;
|
|
|
|
|
while (EI != JT->Entries.end()) {
|
|
|
|
|
if (ToBB->getLabel() == *EI) {
|
2017-03-08 19:58:33 -08:00
|
|
|
assert(Delta < JT->Counts.size());
|
|
|
|
|
JT->Counts[Delta].Mispreds += BInfo.Mispreds;
|
|
|
|
|
JT->Counts[Delta].Count += BInfo.Branches;
|
2016-09-16 15:54:32 -07:00
|
|
|
}
|
|
|
|
|
++Delta;
|
|
|
|
|
++EI;
|
|
|
|
|
// A label marks the start of another jump table.
|
|
|
|
|
if (JT->Labels.count(Delta * JT->EntrySize))
|
|
|
|
|
break;
|
|
|
|
|
}
|
2015-10-12 12:30:47 -07:00
|
|
|
}
|
|
|
|
|
}
|
2015-10-09 17:21:14 -07:00
|
|
|
}
|
|
|
|
|
|
2015-10-26 15:00:56 -07:00
|
|
|
for (auto &Branch : FTBranches) {
|
|
|
|
|
DEBUG(dbgs() << "registering fallthrough [0x"
|
|
|
|
|
<< Twine::utohexstr(Branch.first) << "] -> [0x"
|
|
|
|
|
<< Twine::utohexstr(Branch.second) << "]\n");
|
2016-09-07 18:59:23 -07:00
|
|
|
auto *FromBB = getBasicBlockContainingOffset(Branch.first);
|
2015-10-26 15:00:56 -07:00
|
|
|
assert(FromBB && "cannot find BB containing FROM branch");
|
2016-06-27 14:51:38 -07:00
|
|
|
// Try to find the destination basic block. If the jump instruction was
|
|
|
|
|
// followed by a no-op then the destination offset recorded in FTBranches
|
|
|
|
|
// will point to that no-op but the destination basic block will start
|
2017-03-03 11:35:41 -08:00
|
|
|
// after the no-op due to ignoring no-ops when creating basic blocks.
|
2016-06-27 14:51:38 -07:00
|
|
|
// So we have to skip any no-ops when trying to find the destination
|
|
|
|
|
// basic block.
|
2016-09-07 18:59:23 -07:00
|
|
|
auto *ToBB = getBasicBlockAtOffset(Branch.second);
|
2016-06-27 14:51:38 -07:00
|
|
|
if (ToBB == nullptr) {
|
|
|
|
|
auto I = Instructions.find(Branch.second), E = Instructions.end();
|
|
|
|
|
while (ToBB == nullptr && I != E && MIA->isNoop(I->second)) {
|
|
|
|
|
++I;
|
|
|
|
|
if (I == E)
|
|
|
|
|
break;
|
|
|
|
|
ToBB = getBasicBlockAtOffset(I->first);
|
|
|
|
|
}
|
|
|
|
|
if (ToBB == nullptr) {
|
|
|
|
|
// We have a fall-through that does not point to another BB, ignore it
|
|
|
|
|
// as it may happen in cases where we have a BB finished by two
|
|
|
|
|
// branches.
|
2017-03-03 11:35:41 -08:00
|
|
|
// This can also happen when we delete a branch past the end of a
|
|
|
|
|
// function in case of a call to __builtin_unreachable().
|
2016-06-27 14:51:38 -07:00
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
}
|
2015-10-26 15:00:56 -07:00
|
|
|
|
|
|
|
|
// Does not add a successor if we can't find profile data, leave it to the
|
|
|
|
|
// inference pass to guess its frequency
|
2016-07-13 18:57:40 -07:00
|
|
|
if (BranchDataOrErr) {
|
2015-10-26 15:00:56 -07:00
|
|
|
const FuncBranchData &BranchData = BranchDataOrErr.get();
|
|
|
|
|
auto BranchInfoOrErr = BranchData.getBranch(Branch.first, Branch.second);
|
2016-07-13 18:57:40 -07:00
|
|
|
if (BranchInfoOrErr) {
|
2015-10-26 15:00:56 -07:00
|
|
|
const BranchInfo &BInfo = BranchInfoOrErr.get();
|
|
|
|
|
FromBB->addSuccessor(ToBB, BInfo.Branches, BInfo.Mispreds);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-07-13 18:57:40 -07:00
|
|
|
for (auto &I : TailCallTerminatedBlocks) {
|
|
|
|
|
TailCallInfo &TCInfo = I.second;
|
|
|
|
|
if (BranchDataOrErr) {
|
|
|
|
|
const FuncBranchData &BranchData = BranchDataOrErr.get();
|
|
|
|
|
auto BranchInfoOrErr = BranchData.getDirectCallBranch(TCInfo.Offset);
|
|
|
|
|
if (BranchInfoOrErr) {
|
|
|
|
|
const BranchInfo &BInfo = BranchInfoOrErr.get();
|
|
|
|
|
TCInfo.Count = BInfo.Branches;
|
|
|
|
|
TCInfo.Mispreds = BInfo.Mispreds;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2015-10-26 15:00:56 -07:00
|
|
|
// Add fall-through branches (except for non-taken conditional branches with
|
2016-06-15 18:36:16 -07:00
|
|
|
// profile data, which were already accounted for in TakenBranches).
|
2015-10-09 17:21:14 -07:00
|
|
|
PrevBB = nullptr;
|
|
|
|
|
bool IsPrevFT = false; // Is previous block a fall-through.
|
2016-06-07 16:27:52 -07:00
|
|
|
for (auto BB : BasicBlocks) {
|
2015-10-09 17:21:14 -07:00
|
|
|
if (IsPrevFT) {
|
2016-12-21 17:13:56 -08:00
|
|
|
PrevBB->addSuccessor(BB, BinaryBasicBlock::COUNT_NO_PROFILE,
|
|
|
|
|
BinaryBasicBlock::COUNT_INFERRED);
|
2015-10-09 17:21:14 -07:00
|
|
|
}
|
2016-06-07 16:27:52 -07:00
|
|
|
if (BB->empty()) {
|
2015-10-20 10:51:17 -07:00
|
|
|
IsPrevFT = true;
|
2016-06-07 16:27:52 -07:00
|
|
|
PrevBB = BB;
|
2015-11-08 12:23:54 -08:00
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
2016-06-07 16:27:52 -07:00
|
|
|
auto LastInstIter = --BB->end();
|
|
|
|
|
while (MIA->isCFI(*LastInstIter) && LastInstIter != BB->begin())
|
2015-11-08 12:23:54 -08:00
|
|
|
--LastInstIter;
|
2016-07-13 18:57:40 -07:00
|
|
|
|
|
|
|
|
// Check if the last instruction is a conditional jump that serves as a tail
|
|
|
|
|
// call.
|
|
|
|
|
bool IsCondTailCall = MIA->isConditionalBranch(*LastInstIter) &&
|
|
|
|
|
TailCallTerminatedBlocks.count(BB);
|
|
|
|
|
|
2016-06-07 16:27:52 -07:00
|
|
|
if (BB->succ_size() == 0) {
|
2016-07-13 18:57:40 -07:00
|
|
|
if (IsCondTailCall) {
|
|
|
|
|
// Conditional tail call without profile data for non-taken branch.
|
|
|
|
|
IsPrevFT = true;
|
|
|
|
|
} else {
|
|
|
|
|
// Unless the last instruction is a terminator, control will fall
|
|
|
|
|
// through to the next basic block.
|
|
|
|
|
IsPrevFT = MIA->isTerminator(*LastInstIter) ? false : true;
|
|
|
|
|
}
|
2016-06-07 16:27:52 -07:00
|
|
|
} else if (BB->succ_size() == 1) {
|
2016-07-13 18:57:40 -07:00
|
|
|
if (IsCondTailCall) {
|
|
|
|
|
// Conditional tail call with data for non-taken branch. A fall-through
|
|
|
|
|
// edge has already ben added in the CFG.
|
|
|
|
|
IsPrevFT = false;
|
|
|
|
|
} else {
|
|
|
|
|
// Fall-through should be added if the last instruction is a conditional
|
|
|
|
|
// jump, since there was no profile data for the non-taken branch.
|
|
|
|
|
IsPrevFT = MIA->isConditionalBranch(*LastInstIter) ? true : false;
|
|
|
|
|
}
|
2015-10-09 17:21:14 -07:00
|
|
|
} else {
|
2015-10-26 15:00:56 -07:00
|
|
|
// Ends with 2 branches, with an indirect jump or it is a conditional
|
2016-07-13 18:57:40 -07:00
|
|
|
// branch whose frequency has been inferred from LBR.
|
2015-10-09 17:21:14 -07:00
|
|
|
IsPrevFT = false;
|
|
|
|
|
}
|
|
|
|
|
|
2016-06-07 16:27:52 -07:00
|
|
|
PrevBB = BB;
|
2015-10-09 17:21:14 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!IsPrevFT) {
|
|
|
|
|
// Possibly a call that does not return.
|
|
|
|
|
DEBUG(dbgs() << "last block was marked as a fall-through\n");
|
|
|
|
|
}
|
|
|
|
|
|
2016-05-24 09:26:25 -07:00
|
|
|
// Add associated landing pad blocks to each basic block.
|
2016-07-23 12:50:34 -07:00
|
|
|
addLandingPads(0, BasicBlocks.size());
|
2016-05-24 09:26:25 -07:00
|
|
|
|
2015-10-13 10:25:45 -07:00
|
|
|
// Infer frequency for non-taken branches
|
2016-06-15 18:36:16 -07:00
|
|
|
if (hasValidProfile())
|
2015-10-13 10:25:45 -07:00
|
|
|
inferFallThroughCounts();
|
2016-12-21 17:13:56 -08:00
|
|
|
else
|
|
|
|
|
clearProfile();
|
2015-10-13 10:25:45 -07:00
|
|
|
|
2017-02-24 21:59:33 -08:00
|
|
|
// Assign CFI information to each BB entry.
|
|
|
|
|
annotateCFIState();
|
2015-11-08 12:23:54 -08:00
|
|
|
|
2016-07-13 18:57:40 -07:00
|
|
|
// Convert conditional tail call branches to conditional branches that jump
|
|
|
|
|
// to a tail call.
|
|
|
|
|
removeConditionalTailCalls();
|
|
|
|
|
|
|
|
|
|
// Set the basic block layout to the original order.
|
2017-05-16 09:27:34 -07:00
|
|
|
PrevBB = nullptr;
|
2016-07-13 18:57:40 -07:00
|
|
|
for (auto BB : BasicBlocks) {
|
|
|
|
|
BasicBlocksLayout.emplace_back(BB);
|
2017-05-16 09:27:34 -07:00
|
|
|
if (PrevBB)
|
|
|
|
|
PrevBB->setEndOffset(BB->getOffset());
|
|
|
|
|
PrevBB = BB;
|
2016-07-13 18:57:40 -07:00
|
|
|
}
|
2017-05-16 09:27:34 -07:00
|
|
|
PrevBB->setEndOffset(getSize());
|
2016-07-13 18:57:40 -07:00
|
|
|
|
2016-08-22 14:24:09 -07:00
|
|
|
// Make any necessary adjustments for indirect branches.
|
2016-09-27 19:09:38 -07:00
|
|
|
if (!postProcessIndirectBranches()) {
|
|
|
|
|
if (opts::Verbosity) {
|
|
|
|
|
errs() << "BOLT-WARNING: failed to post-process indirect branches for "
|
|
|
|
|
<< *this << '\n';
|
|
|
|
|
}
|
|
|
|
|
// In relocation mode we want to keep processing the function but avoid
|
|
|
|
|
// optimizing it.
|
2016-08-22 14:24:09 -07:00
|
|
|
setSimple(false);
|
2016-09-27 19:09:38 -07:00
|
|
|
}
|
2016-08-22 14:24:09 -07:00
|
|
|
|
2017-03-03 11:35:41 -08:00
|
|
|
// Eliminate inconsistencies between branch instructions and CFG.
|
|
|
|
|
postProcessBranches();
|
|
|
|
|
|
2015-10-09 17:21:14 -07:00
|
|
|
// Clean-up memory taken by instructions and labels.
|
2016-09-27 19:09:38 -07:00
|
|
|
//
|
|
|
|
|
// NB: don't clear Labels list as we may need them if we mark the function
|
|
|
|
|
// as non-simple later in the process of discovering extra entry points.
|
2016-06-15 18:36:16 -07:00
|
|
|
clearList(Instructions);
|
2016-07-13 18:57:40 -07:00
|
|
|
clearList(TailCallOffsets);
|
|
|
|
|
clearList(TailCallTerminatedBlocks);
|
2016-06-15 18:36:16 -07:00
|
|
|
clearList(OffsetToCFI);
|
|
|
|
|
clearList(TakenBranches);
|
|
|
|
|
clearList(FTBranches);
|
2017-04-18 23:32:11 -07:00
|
|
|
clearList(IgnoredBranches);
|
2016-06-15 18:36:16 -07:00
|
|
|
clearList(LPToBBIndex);
|
2016-09-29 11:19:06 -07:00
|
|
|
clearList(EntryOffsets);
|
2015-10-09 17:21:14 -07:00
|
|
|
|
|
|
|
|
// Update the state.
|
|
|
|
|
CurrentState = State::CFG;
|
|
|
|
|
|
2016-04-19 22:00:29 -07:00
|
|
|
// Annotate invoke instructions with GNU_args_size data.
|
|
|
|
|
propagateGnuArgsSizeInfo();
|
|
|
|
|
|
2017-02-27 21:44:38 -08:00
|
|
|
assert(validateCFG() && "Invalid CFG detected after disassembly");
|
|
|
|
|
|
2015-10-09 17:21:14 -07:00
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2016-09-27 19:09:38 -07:00
|
|
|
void BinaryFunction::addEntryPoint(uint64_t Address) {
|
|
|
|
|
assert(containsAddress(Address) && "address does not belong to the function");
|
|
|
|
|
|
|
|
|
|
auto Offset = Address - getAddress();
|
|
|
|
|
|
|
|
|
|
DEBUG(dbgs() << "BOLT-INFO: adding external entry point to function " << *this
|
|
|
|
|
<< " at offset 0x" << Twine::utohexstr(Address - getAddress())
|
|
|
|
|
<< '\n');
|
|
|
|
|
|
|
|
|
|
auto *EntrySymbol = BC.getGlobalSymbolAtAddress(Address);
|
|
|
|
|
|
|
|
|
|
// If we haven't disassembled the function yet we can add a new entry point
|
|
|
|
|
// even if it doesn't have an associated entry in the symbol table.
|
|
|
|
|
if (CurrentState == State::Empty) {
|
|
|
|
|
if (!EntrySymbol) {
|
|
|
|
|
DEBUG(dbgs() << "creating local label\n");
|
|
|
|
|
EntrySymbol = getOrCreateLocalLabel(Address);
|
|
|
|
|
} else {
|
|
|
|
|
DEBUG(dbgs() << "using global symbol " << EntrySymbol->getName() << '\n');
|
|
|
|
|
}
|
|
|
|
|
addEntryPointAtOffset(Address - getAddress());
|
|
|
|
|
Labels.emplace(Offset, EntrySymbol);
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
assert(EntrySymbol && "expected symbol at address");
|
|
|
|
|
|
|
|
|
|
if (isSimple()) {
|
|
|
|
|
// Find basic block corresponding to the address and substitute label.
|
|
|
|
|
auto *BB = getBasicBlockAtOffset(Offset);
|
|
|
|
|
if (!BB) {
|
|
|
|
|
// TODO #14762450: split basic block and process function.
|
|
|
|
|
if (opts::Verbosity || opts::Relocs) {
|
|
|
|
|
errs() << "BOLT-WARNING: no basic block at offset 0x"
|
|
|
|
|
<< Twine::utohexstr(Offset) << " in function " << *this
|
|
|
|
|
<< ". Marking non-simple.\n";
|
|
|
|
|
}
|
|
|
|
|
setSimple(false);
|
|
|
|
|
} else {
|
|
|
|
|
BB->setLabel(EntrySymbol);
|
|
|
|
|
BB->setEntryPoint(true);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Fix/append labels list.
|
|
|
|
|
auto LI = Labels.find(Offset);
|
|
|
|
|
if (LI != Labels.end()) {
|
|
|
|
|
LI->second = EntrySymbol;
|
|
|
|
|
} else {
|
|
|
|
|
Labels.emplace(Offset, EntrySymbol);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-06-15 18:36:16 -07:00
|
|
|
void BinaryFunction::evaluateProfileData(const FuncBranchData &BranchData) {
|
|
|
|
|
BranchListType ProfileBranches(BranchData.Data.size());
|
|
|
|
|
std::transform(BranchData.Data.begin(),
|
|
|
|
|
BranchData.Data.end(),
|
|
|
|
|
ProfileBranches.begin(),
|
|
|
|
|
[](const BranchInfo &BI) {
|
|
|
|
|
return std::make_pair(BI.From.Offset,
|
|
|
|
|
BI.To.Name == BI.From.Name ?
|
|
|
|
|
BI.To.Offset : -1U);
|
|
|
|
|
});
|
|
|
|
|
BranchListType LocalProfileBranches;
|
|
|
|
|
std::copy_if(ProfileBranches.begin(),
|
|
|
|
|
ProfileBranches.end(),
|
|
|
|
|
std::back_inserter(LocalProfileBranches),
|
|
|
|
|
[](const std::pair<uint32_t, uint32_t> &Branch) {
|
|
|
|
|
return Branch.second != -1U;
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
// Until we define a minimal profile, we consider no branch data to be a valid
|
|
|
|
|
// profile. It could happen to a function without branches.
|
|
|
|
|
if (LocalProfileBranches.empty()) {
|
|
|
|
|
ProfileMatchRatio = 1.0f;
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::sort(LocalProfileBranches.begin(), LocalProfileBranches.end());
|
|
|
|
|
|
|
|
|
|
BranchListType FunctionBranches = TakenBranches;
|
|
|
|
|
FunctionBranches.insert(FunctionBranches.end(),
|
|
|
|
|
FTBranches.begin(),
|
|
|
|
|
FTBranches.end());
|
2017-04-18 23:32:11 -07:00
|
|
|
FunctionBranches.insert(FunctionBranches.end(),
|
|
|
|
|
IgnoredBranches.begin(),
|
|
|
|
|
IgnoredBranches.end());
|
2016-06-15 18:36:16 -07:00
|
|
|
std::sort(FunctionBranches.begin(), FunctionBranches.end());
|
|
|
|
|
|
|
|
|
|
BranchListType DiffBranches; // Branches in profile without a match.
|
|
|
|
|
std::set_difference(LocalProfileBranches.begin(),
|
|
|
|
|
LocalProfileBranches.end(),
|
|
|
|
|
FunctionBranches.begin(),
|
|
|
|
|
FunctionBranches.end(),
|
|
|
|
|
std::back_inserter(DiffBranches));
|
|
|
|
|
|
|
|
|
|
// Branches without a match in CFG.
|
|
|
|
|
BranchListType OrphanBranches;
|
|
|
|
|
|
|
|
|
|
// Eliminate recursive calls and returns from recursive calls from the list
|
|
|
|
|
// of branches that have no match. They are not considered local branches.
|
|
|
|
|
auto isRecursiveBranch = [&](std::pair<uint32_t, uint32_t> &Branch) {
|
|
|
|
|
auto SrcInstrI = Instructions.find(Branch.first);
|
|
|
|
|
if (SrcInstrI == Instructions.end())
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
// Check if it is a recursive call.
|
|
|
|
|
if (BC.MIA->isCall(SrcInstrI->second) && Branch.second == 0)
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
|
|
auto DstInstrI = Instructions.find(Branch.second);
|
|
|
|
|
if (DstInstrI == Instructions.end())
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
// Check if it is a return from a recursive call.
|
|
|
|
|
bool IsSrcReturn = BC.MIA->isReturn(SrcInstrI->second);
|
|
|
|
|
// "rep ret" is considered to be 2 different instructions.
|
|
|
|
|
if (!IsSrcReturn && BC.MIA->isPrefix(SrcInstrI->second)) {
|
|
|
|
|
auto SrcInstrSuccessorI = SrcInstrI;
|
|
|
|
|
++SrcInstrSuccessorI;
|
|
|
|
|
assert(SrcInstrSuccessorI != Instructions.end() &&
|
|
|
|
|
"unexpected prefix instruction at the end of function");
|
|
|
|
|
IsSrcReturn = BC.MIA->isReturn(SrcInstrSuccessorI->second);
|
|
|
|
|
}
|
|
|
|
|
if (IsSrcReturn && Branch.second != 0) {
|
|
|
|
|
// Make sure the destination follows the call instruction.
|
|
|
|
|
auto DstInstrPredecessorI = DstInstrI;
|
|
|
|
|
--DstInstrPredecessorI;
|
|
|
|
|
assert(DstInstrPredecessorI != Instructions.end() && "invalid iterator");
|
|
|
|
|
if (BC.MIA->isCall(DstInstrPredecessorI->second))
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
return false;
|
|
|
|
|
};
|
|
|
|
|
std::remove_copy_if(DiffBranches.begin(),
|
|
|
|
|
DiffBranches.end(),
|
|
|
|
|
std::back_inserter(OrphanBranches),
|
|
|
|
|
isRecursiveBranch);
|
|
|
|
|
|
|
|
|
|
ProfileMatchRatio =
|
|
|
|
|
(float) (LocalProfileBranches.size() - OrphanBranches.size()) /
|
|
|
|
|
(float) LocalProfileBranches.size();
|
|
|
|
|
|
2016-09-16 15:54:32 -07:00
|
|
|
if (opts::Verbosity >= 1 && !OrphanBranches.empty()) {
|
2016-06-15 18:36:16 -07:00
|
|
|
errs() << "BOLT-WARNING: profile branches match only "
|
|
|
|
|
<< format("%.1f%%", ProfileMatchRatio * 100.0f) << " ("
|
|
|
|
|
<< (LocalProfileBranches.size() - OrphanBranches.size()) << '/'
|
|
|
|
|
<< LocalProfileBranches.size() << ") for function "
|
2016-08-07 12:35:23 -07:00
|
|
|
<< *this << '\n';
|
2016-06-15 18:36:16 -07:00
|
|
|
DEBUG(
|
|
|
|
|
for (auto &OBranch : OrphanBranches)
|
|
|
|
|
errs() << "\t0x" << Twine::utohexstr(OBranch.first) << " -> 0x"
|
|
|
|
|
<< Twine::utohexstr(OBranch.second) << " (0x"
|
|
|
|
|
<< Twine::utohexstr(OBranch.first + getAddress()) << " -> 0x"
|
2016-09-16 15:54:32 -07:00
|
|
|
<< Twine::utohexstr(OBranch.second + getAddress()) << ")\n";
|
2016-06-15 18:36:16 -07:00
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-12-21 17:13:56 -08:00
|
|
|
void BinaryFunction::clearProfile() {
|
|
|
|
|
// Keep function execution profile the same. Only clear basic block and edge
|
|
|
|
|
// counts.
|
|
|
|
|
for (auto *BB : BasicBlocks) {
|
|
|
|
|
BB->ExecutionCount = 0;
|
|
|
|
|
for (auto &BI : BB->branch_info()) {
|
|
|
|
|
BI.Count = 0;
|
|
|
|
|
BI.MispredictedCount = 0;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2015-10-13 10:25:45 -07:00
|
|
|
void BinaryFunction::inferFallThroughCounts() {
|
|
|
|
|
assert(!BasicBlocks.empty() && "basic block list should not be empty");
|
|
|
|
|
|
2016-06-10 17:13:05 -07:00
|
|
|
auto BranchDataOrErr = BC.DR.getFuncBranchData(getNames());
|
2016-05-23 16:16:25 -07:00
|
|
|
|
2015-10-13 10:25:45 -07:00
|
|
|
// Compute preliminary execution time for each basic block
|
2016-06-07 16:27:52 -07:00
|
|
|
for (auto CurBB : BasicBlocks) {
|
|
|
|
|
CurBB->ExecutionCount = 0;
|
2015-10-13 10:25:45 -07:00
|
|
|
}
|
2016-12-21 17:13:56 -08:00
|
|
|
BasicBlocks.front()->setExecutionCount(ExecutionCount);
|
2015-10-13 10:25:45 -07:00
|
|
|
|
2016-06-07 16:27:52 -07:00
|
|
|
for (auto CurBB : BasicBlocks) {
|
2016-09-13 17:12:00 -07:00
|
|
|
auto SuccCount = CurBB->branch_info_begin();
|
2016-06-07 16:27:52 -07:00
|
|
|
for (auto Succ : CurBB->successors()) {
|
2015-10-20 16:48:54 -07:00
|
|
|
// Do not update execution count of the entry block (when we have tail
|
|
|
|
|
// calls). We already accounted for those when computing the func count.
|
2016-12-21 17:13:56 -08:00
|
|
|
if (Succ == BasicBlocks.front()) {
|
2016-07-03 21:30:35 -07:00
|
|
|
++SuccCount;
|
2015-10-20 16:48:54 -07:00
|
|
|
continue;
|
2016-07-03 21:30:35 -07:00
|
|
|
}
|
2016-12-21 17:13:56 -08:00
|
|
|
if (SuccCount->Count != BinaryBasicBlock::COUNT_NO_PROFILE)
|
2016-09-13 17:12:00 -07:00
|
|
|
Succ->setExecutionCount(Succ->getExecutionCount() + SuccCount->Count);
|
2015-10-13 10:25:45 -07:00
|
|
|
++SuccCount;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-06-15 18:36:16 -07:00
|
|
|
// Update execution counts of landing pad blocks.
|
2016-05-23 16:16:25 -07:00
|
|
|
if (!BranchDataOrErr.getError()) {
|
|
|
|
|
const FuncBranchData &BranchData = BranchDataOrErr.get();
|
|
|
|
|
for (const auto &I : BranchData.EntryData) {
|
|
|
|
|
BinaryBasicBlock *BB = getBasicBlockAtOffset(I.To.Offset);
|
|
|
|
|
if (BB && LandingPads.find(BB->getLabel()) != LandingPads.end()) {
|
2016-09-13 17:12:00 -07:00
|
|
|
BB->setExecutionCount(BB->getExecutionCount() + I.Branches);
|
2016-05-23 16:16:25 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-12-21 17:13:56 -08:00
|
|
|
// Work on a basic block at a time, propagating frequency information
|
|
|
|
|
// forwards.
|
|
|
|
|
// It is important to walk in the layout order.
|
2016-06-07 16:27:52 -07:00
|
|
|
for (auto CurBB : BasicBlocks) {
|
|
|
|
|
uint64_t BBExecCount = CurBB->getExecutionCount();
|
2015-10-13 10:25:45 -07:00
|
|
|
|
|
|
|
|
// Propagate this information to successors, filling in fall-through edges
|
|
|
|
|
// with frequency information
|
2016-06-07 16:27:52 -07:00
|
|
|
if (CurBB->succ_size() == 0)
|
2015-10-13 10:25:45 -07:00
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
// Calculate frequency of outgoing branches from this node according to
|
2016-12-21 17:13:56 -08:00
|
|
|
// LBR data.
|
2015-10-13 10:25:45 -07:00
|
|
|
uint64_t ReportedBranches = 0;
|
2016-09-15 10:24:22 -07:00
|
|
|
for (const auto &SuccCount : CurBB->branch_info()) {
|
2016-12-21 17:13:56 -08:00
|
|
|
if (SuccCount.Count != BinaryBasicBlock::COUNT_NO_PROFILE)
|
2015-10-13 10:25:45 -07:00
|
|
|
ReportedBranches += SuccCount.Count;
|
|
|
|
|
}
|
|
|
|
|
|
2016-07-13 18:57:40 -07:00
|
|
|
// Calculate frequency of outgoing tail calls from this node according to
|
2016-12-21 17:13:56 -08:00
|
|
|
// LBR data.
|
2016-07-13 18:57:40 -07:00
|
|
|
uint64_t ReportedTailCalls = 0;
|
|
|
|
|
auto TCI = TailCallTerminatedBlocks.find(CurBB);
|
|
|
|
|
if (TCI != TailCallTerminatedBlocks.end()) {
|
|
|
|
|
ReportedTailCalls = TCI->second.Count;
|
|
|
|
|
}
|
|
|
|
|
|
2016-05-24 09:26:25 -07:00
|
|
|
// Calculate frequency of throws from this node according to LBR data
|
|
|
|
|
// for branching into associated landing pads. Since it is possible
|
|
|
|
|
// for a landing pad to be associated with more than one basic blocks,
|
|
|
|
|
// we may overestimate the frequency of throws for such blocks.
|
|
|
|
|
uint64_t ReportedThrows = 0;
|
2016-09-13 17:12:00 -07:00
|
|
|
for (BinaryBasicBlock *LP: CurBB->landing_pads()) {
|
|
|
|
|
ReportedThrows += LP->getExecutionCount();
|
2016-05-24 09:26:25 -07:00
|
|
|
}
|
|
|
|
|
|
2016-07-13 18:57:40 -07:00
|
|
|
uint64_t TotalReportedJumps =
|
|
|
|
|
ReportedBranches + ReportedTailCalls + ReportedThrows;
|
2016-05-24 09:26:25 -07:00
|
|
|
|
2015-10-13 10:25:45 -07:00
|
|
|
// Infer the frequency of the fall-through edge, representing not taking the
|
2016-12-21 17:13:56 -08:00
|
|
|
// branch.
|
2015-10-13 10:25:45 -07:00
|
|
|
uint64_t Inferred = 0;
|
2016-05-24 09:26:25 -07:00
|
|
|
if (BBExecCount > TotalReportedJumps)
|
|
|
|
|
Inferred = BBExecCount - TotalReportedJumps;
|
2015-11-08 12:23:54 -08:00
|
|
|
|
|
|
|
|
DEBUG({
|
2016-09-02 14:15:29 -07:00
|
|
|
if (opts::Verbosity >= 1 && BBExecCount < TotalReportedJumps)
|
|
|
|
|
errs()
|
2016-02-05 14:42:04 -08:00
|
|
|
<< "BOLT-WARNING: Fall-through inference is slightly inconsistent. "
|
2015-11-08 12:23:54 -08:00
|
|
|
"exec frequency is less than the outgoing edges frequency ("
|
|
|
|
|
<< BBExecCount << " < " << ReportedBranches
|
|
|
|
|
<< ") for BB at offset 0x"
|
2016-06-07 16:27:52 -07:00
|
|
|
<< Twine::utohexstr(getAddress() + CurBB->getOffset()) << '\n';
|
2015-11-08 12:23:54 -08:00
|
|
|
});
|
2015-10-13 10:25:45 -07:00
|
|
|
|
2016-09-14 16:45:40 -07:00
|
|
|
if (CurBB->succ_size() <= 2) {
|
|
|
|
|
// If there is an FT it will be the last successor.
|
|
|
|
|
auto &SuccCount = *CurBB->branch_info_rbegin();
|
|
|
|
|
auto &Succ = *CurBB->succ_rbegin();
|
2016-12-21 17:13:56 -08:00
|
|
|
if (SuccCount.Count == BinaryBasicBlock::COUNT_NO_PROFILE) {
|
2016-09-14 16:45:40 -07:00
|
|
|
SuccCount.Count = Inferred;
|
|
|
|
|
Succ->ExecutionCount += Inferred;
|
|
|
|
|
}
|
2015-10-13 10:25:45 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
} // end for (CurBB : BasicBlocks)
|
|
|
|
|
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2016-07-13 18:57:40 -07:00
|
|
|
void BinaryFunction::removeConditionalTailCalls() {
|
|
|
|
|
for (auto &I : TailCallTerminatedBlocks) {
|
|
|
|
|
BinaryBasicBlock *BB = I.first;
|
|
|
|
|
TailCallInfo &TCInfo = I.second;
|
|
|
|
|
|
|
|
|
|
// Get the conditional tail call instruction.
|
|
|
|
|
MCInst &CondTailCallInst = BB->getInstructionAtIndex(TCInfo.Index);
|
|
|
|
|
if (!BC.MIA->isConditionalBranch(CondTailCallInst)) {
|
|
|
|
|
// The block is not terminated with a conditional tail call.
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Assert that the tail call does not throw.
|
|
|
|
|
const MCSymbol *LP;
|
|
|
|
|
uint64_t Action;
|
|
|
|
|
std::tie(LP, Action) = BC.MIA->getEHInfo(CondTailCallInst);
|
|
|
|
|
assert(!LP && "found tail call with associated landing pad");
|
|
|
|
|
|
2016-08-29 21:11:22 -07:00
|
|
|
// Create the unconditional tail call instruction.
|
2016-09-08 14:52:26 -07:00
|
|
|
const auto *TailCallTargetLabel = BC.MIA->getTargetSymbol(CondTailCallInst);
|
|
|
|
|
assert(TailCallTargetLabel && "symbol expected for direct tail call");
|
2016-07-13 18:57:40 -07:00
|
|
|
MCInst TailCallInst;
|
2016-09-08 14:52:26 -07:00
|
|
|
BC.MIA->createTailCall(TailCallInst, TailCallTargetLabel, BC.Ctx.get());
|
2016-07-13 18:57:40 -07:00
|
|
|
|
|
|
|
|
// The way we will remove this conditional tail call depends on the
|
|
|
|
|
// direction of the jump when it is taken. We want to preserve this
|
|
|
|
|
// direction.
|
|
|
|
|
BinaryBasicBlock *TailCallBB = nullptr;
|
2016-08-29 21:11:22 -07:00
|
|
|
MCSymbol *TCLabel = BC.Ctx->createTempSymbol("TC", true);
|
|
|
|
|
if (getAddress() >= TCInfo.TargetAddress) {
|
2016-07-13 18:57:40 -07:00
|
|
|
// Backward jump: We will reverse the condition of the tail call, change
|
|
|
|
|
// its target to the following (currently fall-through) block, and insert
|
2016-08-29 21:11:22 -07:00
|
|
|
// a new block between them that will contain the unconditional tail call.
|
2016-07-13 18:57:40 -07:00
|
|
|
|
|
|
|
|
// Reverse the condition of the tail call and update its target.
|
|
|
|
|
unsigned InsertIdx = getIndex(BB) + 1;
|
2016-08-29 21:11:22 -07:00
|
|
|
assert(InsertIdx < size() && "no fall-through for conditional tail call");
|
2016-09-13 20:32:12 -07:00
|
|
|
BinaryBasicBlock *NextBB = BasicBlocks[InsertIdx];
|
2016-08-29 21:11:22 -07:00
|
|
|
|
2016-07-13 18:57:40 -07:00
|
|
|
BC.MIA->reverseBranchCondition(
|
|
|
|
|
CondTailCallInst, NextBB->getLabel(), BC.Ctx.get());
|
|
|
|
|
|
|
|
|
|
// Create a basic block containing the unconditional tail call instruction
|
|
|
|
|
// and place it between BB and NextBB.
|
|
|
|
|
std::vector<std::unique_ptr<BinaryBasicBlock>> TailCallBBs;
|
|
|
|
|
TailCallBBs.emplace_back(createBasicBlock(NextBB->getOffset(), TCLabel));
|
|
|
|
|
TailCallBBs[0]->addInstruction(TailCallInst);
|
2016-09-07 18:59:23 -07:00
|
|
|
insertBasicBlocks(BB, std::move(TailCallBBs),
|
|
|
|
|
/* UpdateLayout */ false,
|
|
|
|
|
/* UpdateCFIState */ false);
|
2016-09-13 20:32:12 -07:00
|
|
|
TailCallBB = BasicBlocks[InsertIdx];
|
2016-07-13 18:57:40 -07:00
|
|
|
|
|
|
|
|
// Add the correct CFI state for the new block.
|
2017-02-24 21:59:33 -08:00
|
|
|
TailCallBB->setCFIState(TCInfo.CFIStateBefore);
|
2016-07-13 18:57:40 -07:00
|
|
|
} else {
|
|
|
|
|
// Forward jump: we will create a new basic block at the end of the
|
2016-08-29 21:11:22 -07:00
|
|
|
// function containing the unconditional tail call and change the target
|
|
|
|
|
// of the conditional tail call to this basic block.
|
2016-07-13 18:57:40 -07:00
|
|
|
|
|
|
|
|
// Create a basic block containing the unconditional tail call
|
|
|
|
|
// instruction and place it at the end of the function.
|
2016-09-14 16:45:40 -07:00
|
|
|
// We have to add 1 byte as there's potentially an existing branch past
|
|
|
|
|
// the end of the code as a result of __builtin_unreachable().
|
2016-07-13 18:57:40 -07:00
|
|
|
const BinaryBasicBlock *LastBB = BasicBlocks.back();
|
Indirect call promotion optimization.
Summary:
Perform indirect call promotion optimization in BOLT.
The code scans the instructions during CFG creation for all
indirect calls. Right now indirect tail calls are not handled
since the functions are marked not simple. The offsets of the
indirect calls are stored for later use by the ICP pass.
The indirect call promotion pass visits each indirect call and
examines the BranchData for each. If the most frequent targets
from that callsite exceed the specified threshold (default 90%),
the call is promoted. Otherwise, it is ignored. By default,
only one target is considered at each callsite.
When an candiate callsite is processed, we modify the callsite
to test for the most common call targets before calling through
the original generic call mechanism.
The CFG and layout are modified by ICP.
A few new command line options have been added:
-indirect-call-promotion
-indirect-call-promotion-threshold=<percentage>
-indirect-call-promotion-topn=<int>
The threshold is the minimum frequency of a call target needed
before ICP is triggered.
The topn option controls the number of targets to consider for
each callsite, e.g. ICP is triggered if topn=2 and the total
requency of the top two call targets exceeds the threshold.
Example of ICP:
C++ code:
int B_count = 0;
int C_count = 0;
struct A { virtual void foo() = 0; }
struct B : public A { virtual void foo() { ++B_count; }; };
struct C : public A { virtual void foo() { ++C_count; }; };
A* a = ...
a->foo();
...
original:
400863: 49 8b 07 mov (%r15),%rax
400866: 4c 89 ff mov %r15,%rdi
400869: ff 10 callq *(%rax)
40086b: 41 83 e6 01 and $0x1,%r14d
40086f: 4d 89 e6 mov %r12,%r14
400872: 4c 0f 44 f5 cmove %rbp,%r14
400876: 4c 89 f7 mov %r14,%rdi
...
after ICP:
40085e: 49 8b 07 mov (%r15),%rax
400861: 4c 89 ff mov %r15,%rdi
400864: 49 ba e0 0b 40 00 00 movabs $0x400be0,%r10
40086b: 00 00 00
40086e: 4c 3b 10 cmp (%rax),%r10
400871: 75 29 jne 40089c <main+0x9c>
400873: 41 ff d2 callq *%r10
400876: 41 83 e6 01 and $0x1,%r14d
40087a: 4d 89 e6 mov %r12,%r14
40087d: 4c 0f 44 f5 cmove %rbp,%r14
400881: 4c 89 f7 mov %r14,%rdi
...
40089c: ff 10 callq *(%rax)
40089e: eb d6 jmp 400876 <main+0x76>
(cherry picked from FBD3612218)
2016-09-07 18:59:23 -07:00
|
|
|
uint64_t NewBlockOffset =
|
2017-02-24 21:59:33 -08:00
|
|
|
LastBB->getOffset()
|
|
|
|
|
+ BC.computeCodeSize(LastBB->begin(), LastBB->end()) + 1;
|
2016-07-13 18:57:40 -07:00
|
|
|
TailCallBB = addBasicBlock(NewBlockOffset, TCLabel);
|
|
|
|
|
TailCallBB->addInstruction(TailCallInst);
|
|
|
|
|
|
|
|
|
|
// Add the correct CFI state for the new block. It has to be inserted in
|
|
|
|
|
// the one before last position (the last position holds the CFI state
|
|
|
|
|
// after the last block).
|
2017-02-24 21:59:33 -08:00
|
|
|
TailCallBB->setCFIState(TCInfo.CFIStateBefore);
|
2016-07-13 18:57:40 -07:00
|
|
|
|
|
|
|
|
// Replace the target of the conditional tail call with the label of the
|
|
|
|
|
// new basic block.
|
|
|
|
|
BC.MIA->replaceBranchTarget(CondTailCallInst, TCLabel, BC.Ctx.get());
|
|
|
|
|
}
|
|
|
|
|
|
2016-08-29 21:11:22 -07:00
|
|
|
// Add CFG edge with profile info from BB to TailCallBB info and swap
|
|
|
|
|
// edges if the TailCallBB corresponds to the taken branch.
|
2016-07-13 18:57:40 -07:00
|
|
|
BB->addSuccessor(TailCallBB, TCInfo.Count, TCInfo.Mispreds);
|
2016-08-29 21:11:22 -07:00
|
|
|
if (getAddress() < TCInfo.TargetAddress)
|
|
|
|
|
BB->swapConditionalSuccessors();
|
2016-07-13 18:57:40 -07:00
|
|
|
|
|
|
|
|
// Add execution count for the block.
|
|
|
|
|
if (hasValidProfile())
|
2016-09-13 17:12:00 -07:00
|
|
|
TailCallBB->setExecutionCount(TCInfo.Count);
|
2016-07-13 18:57:40 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2015-11-13 15:27:59 -08:00
|
|
|
uint64_t BinaryFunction::getFunctionScore() {
|
2015-11-19 17:59:41 -08:00
|
|
|
if (FunctionScore != -1)
|
|
|
|
|
return FunctionScore;
|
|
|
|
|
|
2015-11-13 15:27:59 -08:00
|
|
|
uint64_t TotalScore = 0ULL;
|
|
|
|
|
for (auto BB : layout()) {
|
|
|
|
|
uint64_t BBExecCount = BB->getExecutionCount();
|
|
|
|
|
if (BBExecCount == BinaryBasicBlock::COUNT_NO_PROFILE)
|
|
|
|
|
continue;
|
2016-09-09 14:42:35 -07:00
|
|
|
BBExecCount *= BB->getNumNonPseudos();
|
2015-11-13 15:27:59 -08:00
|
|
|
TotalScore += BBExecCount;
|
|
|
|
|
}
|
2015-11-19 17:59:41 -08:00
|
|
|
FunctionScore = TotalScore;
|
|
|
|
|
return FunctionScore;
|
2015-11-13 15:27:59 -08:00
|
|
|
}
|
|
|
|
|
|
2017-02-24 21:59:33 -08:00
|
|
|
void BinaryFunction::annotateCFIState() {
|
|
|
|
|
assert(CurrentState == State::Disassembled && "unexpected function state");
|
2015-11-08 12:23:54 -08:00
|
|
|
assert(!BasicBlocks.empty() && "basic block list should not be empty");
|
|
|
|
|
|
2017-02-24 21:59:33 -08:00
|
|
|
// This is an index of the last processed CFI in FDE CFI program.
|
|
|
|
|
int32_t State = 0;
|
|
|
|
|
|
|
|
|
|
// This is an index of RememberState CFI reflecting effective state right
|
|
|
|
|
// after execution of RestoreState CFI.
|
|
|
|
|
//
|
|
|
|
|
// It differs from State iff the CFI at (State-1)
|
|
|
|
|
// was RestoreState (modulo GNU_args_size CFIs, which are ignored).
|
|
|
|
|
//
|
|
|
|
|
// This allows us to generate shorter replay sequences when producing new
|
|
|
|
|
// CFI programs.
|
|
|
|
|
int32_t EffectiveState = 0;
|
2015-11-08 12:23:54 -08:00
|
|
|
|
2017-02-24 21:59:33 -08:00
|
|
|
// For tracking RememberState/RestoreState sequences.
|
|
|
|
|
std::stack<int32_t> StateStack;
|
|
|
|
|
|
|
|
|
|
for (auto *BB : BasicBlocks) {
|
|
|
|
|
BB->setCFIState(EffectiveState);
|
2015-11-08 12:23:54 -08:00
|
|
|
|
2016-07-13 18:57:40 -07:00
|
|
|
// While building the CFG, we want to save the CFI state before a tail call
|
2017-02-24 21:59:33 -08:00
|
|
|
// instruction, so that we can correctly remove conditional tail calls.
|
|
|
|
|
auto TCI = TailCallTerminatedBlocks.find(BB);
|
2016-07-13 18:57:40 -07:00
|
|
|
bool SaveState = TCI != TailCallTerminatedBlocks.end();
|
|
|
|
|
|
2017-02-24 21:59:33 -08:00
|
|
|
uint32_t Idx = 0; // instruction index in a current basic block
|
|
|
|
|
for (const auto &Instr : *BB) {
|
|
|
|
|
++Idx;
|
|
|
|
|
if (SaveState && Idx == TCI->second.Index) {
|
|
|
|
|
TCI->second.CFIStateBefore = EffectiveState;
|
|
|
|
|
SaveState = false;
|
2016-07-13 18:57:40 -07:00
|
|
|
}
|
2017-02-24 21:59:33 -08:00
|
|
|
|
|
|
|
|
const auto *CFI = getCFIFor(Instr);
|
|
|
|
|
if (!CFI)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
++State;
|
|
|
|
|
|
2015-11-08 12:23:54 -08:00
|
|
|
if (CFI->getOperation() == MCCFIInstruction::OpRememberState) {
|
2017-02-24 21:59:33 -08:00
|
|
|
StateStack.push(EffectiveState);
|
2016-04-19 22:00:29 -07:00
|
|
|
} else if (CFI->getOperation() == MCCFIInstruction::OpRestoreState) {
|
2017-02-24 21:59:33 -08:00
|
|
|
assert(!StateStack.empty() && "corrupt CFI stack");
|
|
|
|
|
EffectiveState = StateStack.top();
|
2015-11-08 12:23:54 -08:00
|
|
|
StateStack.pop();
|
2016-04-19 22:00:29 -07:00
|
|
|
} else if (CFI->getOperation() != MCCFIInstruction::OpGnuArgsSize) {
|
2017-02-24 21:59:33 -08:00
|
|
|
// OpGnuArgsSize CFIs do not affect the CFI state.
|
|
|
|
|
EffectiveState = State;
|
2016-09-07 18:59:23 -07:00
|
|
|
}
|
2015-11-08 12:23:54 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2017-02-24 21:59:33 -08:00
|
|
|
assert(StateStack.empty() && "corrupt CFI stack");
|
2015-11-08 12:23:54 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool BinaryFunction::fixCFIState() {
|
|
|
|
|
DEBUG(dbgs() << "Trying to fix CFI states for each BB after reordering.\n");
|
2016-08-07 12:35:23 -07:00
|
|
|
DEBUG(dbgs() << "This is the list of CFI states for each BB of " << *this
|
2015-11-08 12:23:54 -08:00
|
|
|
<< ": ");
|
|
|
|
|
|
|
|
|
|
auto replayCFIInstrs =
|
2017-02-24 21:59:33 -08:00
|
|
|
[this](int32_t FromState, int32_t ToState, BinaryBasicBlock *InBB,
|
|
|
|
|
BinaryBasicBlock::iterator InsertIt) -> bool {
|
|
|
|
|
if (FromState == ToState)
|
|
|
|
|
return true;
|
|
|
|
|
assert(FromState < ToState && "can only replay CFIs forward");
|
|
|
|
|
|
|
|
|
|
std::vector<uint32_t> NewCFIs;
|
|
|
|
|
uint32_t NestedLevel = 0;
|
|
|
|
|
for (auto CurState = FromState; CurState < ToState; ++CurState) {
|
|
|
|
|
MCCFIInstruction *Instr = &FrameInstructions[CurState];
|
|
|
|
|
if (Instr->getOperation() == MCCFIInstruction::OpRememberState)
|
|
|
|
|
++NestedLevel;
|
|
|
|
|
if (!NestedLevel)
|
|
|
|
|
NewCFIs.push_back(CurState);
|
|
|
|
|
if (Instr->getOperation() == MCCFIInstruction::OpRestoreState)
|
|
|
|
|
--NestedLevel;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// TODO: If in replaying the CFI instructions to reach this state we
|
|
|
|
|
// have state stack instructions, we could still work out the logic
|
|
|
|
|
// to extract only the necessary instructions to reach this state
|
|
|
|
|
// without using the state stack. Not sure if it is worth the effort
|
|
|
|
|
// because this happens rarely.
|
|
|
|
|
if (NestedLevel != 0) {
|
|
|
|
|
errs() << "BOLT-WARNING: CFI rewriter detected nested CFI state"
|
|
|
|
|
<< " while replaying CFI instructions for BB "
|
|
|
|
|
<< InBB->getName() << " in function " << *this << '\n';
|
|
|
|
|
return false;
|
|
|
|
|
}
|
2016-01-16 14:58:22 -08:00
|
|
|
|
2017-02-24 21:59:33 -08:00
|
|
|
for (auto CFI : NewCFIs) {
|
|
|
|
|
// Ignore GNU_args_size instructions.
|
|
|
|
|
if (FrameInstructions[CFI].getOperation() !=
|
|
|
|
|
MCCFIInstruction::OpGnuArgsSize) {
|
|
|
|
|
InsertIt = addCFIPseudo(InBB, InsertIt, CFI);
|
|
|
|
|
++InsertIt;
|
|
|
|
|
}
|
|
|
|
|
}
|
2015-11-08 12:23:54 -08:00
|
|
|
|
2017-02-24 21:59:33 -08:00
|
|
|
return true;
|
|
|
|
|
};
|
2015-11-08 12:23:54 -08:00
|
|
|
|
2017-02-24 21:59:33 -08:00
|
|
|
int32_t State = 0;
|
2016-08-24 14:25:33 -07:00
|
|
|
auto *FDEStartBB = BasicBlocksLayout[0];
|
2017-02-24 21:59:33 -08:00
|
|
|
bool SeenCold = false;
|
2017-05-25 10:29:38 -07:00
|
|
|
auto Sep = "";
|
|
|
|
|
(void)Sep;
|
2017-02-24 21:59:33 -08:00
|
|
|
for (auto *BB : BasicBlocksLayout) {
|
|
|
|
|
const auto CFIStateAtExit = BB->getCFIStateAtExit();
|
2015-11-08 12:23:54 -08:00
|
|
|
|
2015-11-19 17:59:41 -08:00
|
|
|
// Hot-cold border: check if this is the first BB to be allocated in a cold
|
2017-02-24 21:59:33 -08:00
|
|
|
// region (with a different FDE). If yes, we need to reset the CFI state and
|
|
|
|
|
// the FDEStartBB that is used to insert remember_state CFIs.
|
|
|
|
|
if (!SeenCold && BB->isCold()) {
|
2015-11-19 17:59:41 -08:00
|
|
|
State = 0;
|
2016-08-24 14:25:33 -07:00
|
|
|
FDEStartBB = BB;
|
2017-02-24 21:59:33 -08:00
|
|
|
SeenCold = true;
|
2016-08-24 14:25:33 -07:00
|
|
|
}
|
2015-11-19 17:59:41 -08:00
|
|
|
|
2016-01-16 14:58:22 -08:00
|
|
|
// We need to recover the correct state if it doesn't match expected
|
|
|
|
|
// state at BB entry point.
|
2017-02-24 21:59:33 -08:00
|
|
|
if (BB->getCFIState() < State) {
|
2016-01-16 14:58:22 -08:00
|
|
|
// In this case, State is currently higher than what this BB expect it
|
|
|
|
|
// to be. To solve this, we need to insert a CFI instruction to remember
|
|
|
|
|
// the old state at function entry, then another CFI instruction to
|
|
|
|
|
// restore it at the entry of this BB and replay CFI instructions to
|
|
|
|
|
// reach the desired state.
|
2017-02-24 21:59:33 -08:00
|
|
|
int32_t OldState = BB->getCFIState();
|
2016-01-16 14:58:22 -08:00
|
|
|
// Remember state at function entry point (our reference state).
|
2016-11-28 17:45:25 -08:00
|
|
|
auto InsertIt = FDEStartBB->begin();
|
2016-08-24 14:25:33 -07:00
|
|
|
while (InsertIt != FDEStartBB->end() && BC.MIA->isCFI(*InsertIt))
|
2015-11-08 12:23:54 -08:00
|
|
|
++InsertIt;
|
2016-08-24 14:25:33 -07:00
|
|
|
addCFIPseudo(FDEStartBB, InsertIt, FrameInstructions.size());
|
2016-01-16 14:58:22 -08:00
|
|
|
FrameInstructions.emplace_back(
|
|
|
|
|
MCCFIInstruction::createRememberState(nullptr));
|
|
|
|
|
// Restore state
|
|
|
|
|
InsertIt = addCFIPseudo(BB, BB->begin(), FrameInstructions.size());
|
|
|
|
|
++InsertIt;
|
|
|
|
|
FrameInstructions.emplace_back(
|
|
|
|
|
MCCFIInstruction::createRestoreState(nullptr));
|
|
|
|
|
if (!replayCFIInstrs(0, OldState, BB, InsertIt))
|
|
|
|
|
return false;
|
|
|
|
|
// Check if we messed up the stack in this process
|
|
|
|
|
int StackOffset = 0;
|
|
|
|
|
for (BinaryBasicBlock *CurBB : BasicBlocksLayout) {
|
|
|
|
|
if (CurBB == BB)
|
|
|
|
|
break;
|
|
|
|
|
for (auto &Instr : *CurBB) {
|
2016-07-23 08:01:53 -07:00
|
|
|
if (auto *CFI = getCFIFor(Instr)) {
|
2016-01-16 14:58:22 -08:00
|
|
|
if (CFI->getOperation() == MCCFIInstruction::OpRememberState)
|
|
|
|
|
++StackOffset;
|
|
|
|
|
if (CFI->getOperation() == MCCFIInstruction::OpRestoreState)
|
|
|
|
|
--StackOffset;
|
2015-11-08 12:23:54 -08:00
|
|
|
}
|
|
|
|
|
}
|
2016-01-16 14:58:22 -08:00
|
|
|
}
|
|
|
|
|
auto Pos = BB->begin();
|
2016-01-19 00:20:06 -08:00
|
|
|
while (Pos != BB->end() && BC.MIA->isCFI(*Pos)) {
|
|
|
|
|
auto CFI = getCFIFor(*Pos);
|
2016-01-16 14:58:22 -08:00
|
|
|
if (CFI->getOperation() == MCCFIInstruction::OpRememberState)
|
|
|
|
|
++StackOffset;
|
|
|
|
|
if (CFI->getOperation() == MCCFIInstruction::OpRestoreState)
|
|
|
|
|
--StackOffset;
|
2016-01-19 00:20:06 -08:00
|
|
|
++Pos;
|
2016-01-16 14:58:22 -08:00
|
|
|
}
|
2015-11-08 12:23:54 -08:00
|
|
|
|
2016-01-16 14:58:22 -08:00
|
|
|
if (StackOffset != 0) {
|
2017-02-24 21:59:33 -08:00
|
|
|
errs() << "BOLT-WARNING: not possible to remember/recover state"
|
|
|
|
|
<< " without corrupting CFI state stack in function "
|
|
|
|
|
<< *this << " @ " << BB->getName() << "\n";
|
2016-01-16 14:58:22 -08:00
|
|
|
return false;
|
2015-11-08 12:23:54 -08:00
|
|
|
}
|
2017-02-24 21:59:33 -08:00
|
|
|
} else if (BB->getCFIState() > State) {
|
|
|
|
|
// If BB's CFI state is greater than State, it means we are behind in the
|
2016-01-16 14:58:22 -08:00
|
|
|
// state. Just emit all instructions to reach this state at the
|
|
|
|
|
// beginning of this BB. If this sequence of instructions involve
|
|
|
|
|
// remember state or restore state, bail out.
|
2017-02-24 21:59:33 -08:00
|
|
|
if (!replayCFIInstrs(State, BB->getCFIState(), BB, BB->begin()))
|
2016-01-16 14:58:22 -08:00
|
|
|
return false;
|
2015-11-08 12:23:54 -08:00
|
|
|
}
|
|
|
|
|
|
2017-02-24 21:59:33 -08:00
|
|
|
State = CFIStateAtExit;
|
|
|
|
|
DEBUG(dbgs() << Sep << State; Sep = ", ");
|
2015-11-08 12:23:54 -08:00
|
|
|
}
|
|
|
|
|
DEBUG(dbgs() << "\n");
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2016-07-15 16:11:30 -07:00
|
|
|
void BinaryFunction::modifyLayout(LayoutType Type, bool MinBranchClusters,
|
2016-08-15 15:37:00 -07:00
|
|
|
bool Split) {
|
2016-01-13 17:19:40 -08:00
|
|
|
if (BasicBlocksLayout.empty() || Type == LT_NONE)
|
|
|
|
|
return;
|
|
|
|
|
|
2016-06-16 18:47:57 -07:00
|
|
|
BasicBlockOrderType NewLayout;
|
|
|
|
|
std::unique_ptr<ReorderAlgorithm> Algo;
|
2015-10-13 12:18:54 -07:00
|
|
|
|
2016-01-13 17:19:40 -08:00
|
|
|
// Cannot do optimal layout without profile.
|
2016-06-16 18:47:57 -07:00
|
|
|
if (Type != LT_REVERSE && !hasValidProfile())
|
2016-01-13 17:19:40 -08:00
|
|
|
return;
|
|
|
|
|
|
2016-06-16 18:47:57 -07:00
|
|
|
if (Type == LT_REVERSE) {
|
|
|
|
|
Algo.reset(new ReverseReorderAlgorithm());
|
2016-01-22 16:45:39 -08:00
|
|
|
}
|
2016-09-11 14:33:58 -07:00
|
|
|
else if (BasicBlocksLayout.size() <= FUNC_SIZE_THRESHOLD &&
|
|
|
|
|
Type != LT_OPTIMIZE_SHUFFLE) {
|
2016-06-16 18:47:57 -07:00
|
|
|
// Work on optimal solution if problem is small enough
|
2016-08-07 12:35:23 -07:00
|
|
|
DEBUG(dbgs() << "finding optimal block layout for " << *this << "\n");
|
2016-06-16 18:47:57 -07:00
|
|
|
Algo.reset(new OptimalReorderAlgorithm());
|
2015-10-13 12:18:54 -07:00
|
|
|
}
|
2016-06-16 18:47:57 -07:00
|
|
|
else {
|
2016-08-07 12:35:23 -07:00
|
|
|
DEBUG(dbgs() << "running block layout heuristics on " << *this << "\n");
|
2015-10-13 12:18:54 -07:00
|
|
|
|
2016-07-15 16:11:30 -07:00
|
|
|
std::unique_ptr<ClusterAlgorithm> CAlgo;
|
|
|
|
|
if (MinBranchClusters)
|
|
|
|
|
CAlgo.reset(new MinBranchGreedyClusterAlgorithm());
|
|
|
|
|
else
|
|
|
|
|
CAlgo.reset(new PHGreedyClusterAlgorithm());
|
2015-10-13 12:18:54 -07:00
|
|
|
|
2016-06-16 18:47:57 -07:00
|
|
|
switch(Type) {
|
|
|
|
|
case LT_OPTIMIZE:
|
|
|
|
|
Algo.reset(new OptimizeReorderAlgorithm(std::move(CAlgo)));
|
|
|
|
|
break;
|
2015-10-14 16:58:55 -07:00
|
|
|
|
2016-06-16 18:47:57 -07:00
|
|
|
case LT_OPTIMIZE_BRANCH:
|
|
|
|
|
Algo.reset(new OptimizeBranchReorderAlgorithm(std::move(CAlgo)));
|
|
|
|
|
break;
|
2015-10-14 16:58:55 -07:00
|
|
|
|
2016-06-16 18:47:57 -07:00
|
|
|
case LT_OPTIMIZE_CACHE:
|
|
|
|
|
Algo.reset(new OptimizeCacheReorderAlgorithm(std::move(CAlgo)));
|
|
|
|
|
break;
|
2015-10-14 16:58:55 -07:00
|
|
|
|
2016-09-11 14:33:58 -07:00
|
|
|
case LT_OPTIMIZE_SHUFFLE:
|
|
|
|
|
Algo.reset(new RandomClusterReorderAlgorithm(std::move(CAlgo)));
|
|
|
|
|
break;
|
|
|
|
|
|
2016-06-16 18:47:57 -07:00
|
|
|
default:
|
|
|
|
|
llvm_unreachable("unexpected layout type");
|
2015-10-14 16:58:55 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-06-16 18:47:57 -07:00
|
|
|
Algo->reorderBasicBlocks(*this, NewLayout);
|
2015-10-14 16:58:55 -07:00
|
|
|
BasicBlocksLayout.clear();
|
2016-06-16 18:47:57 -07:00
|
|
|
BasicBlocksLayout.swap(NewLayout);
|
2015-10-14 16:58:55 -07:00
|
|
|
|
2015-11-19 17:59:41 -08:00
|
|
|
if (Split)
|
|
|
|
|
splitFunction();
|
2015-10-16 09:49:04 -07:00
|
|
|
}
|
|
|
|
|
|
2016-09-27 19:09:38 -07:00
|
|
|
void BinaryFunction::emitBody(MCStreamer &Streamer, bool EmitColdPart) {
|
|
|
|
|
int64_t CurrentGnuArgsSize = 0;
|
|
|
|
|
for (auto BB : layout()) {
|
|
|
|
|
if (EmitColdPart != BB->isCold())
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
if (opts::AlignBlocks && BB->getAlignment() > 1)
|
|
|
|
|
Streamer.EmitCodeAlignment(BB->getAlignment());
|
|
|
|
|
Streamer.EmitLabel(BB->getLabel());
|
|
|
|
|
|
|
|
|
|
// Remember if last instruction emitted was a prefix
|
|
|
|
|
bool LastIsPrefix = false;
|
2017-04-03 16:24:26 -07:00
|
|
|
SMLoc LastLocSeen;
|
2016-09-27 19:09:38 -07:00
|
|
|
for (auto I = BB->begin(), E = BB->end(); I != E; ++I) {
|
|
|
|
|
auto &Instr = *I;
|
|
|
|
|
// Handle pseudo instructions.
|
|
|
|
|
if (BC.MIA->isEHLabel(Instr)) {
|
|
|
|
|
const auto *Label = BC.MIA->getTargetSymbol(Instr);
|
|
|
|
|
assert(Instr.getNumOperands() == 1 && Label &&
|
|
|
|
|
"bad EH_LABEL instruction");
|
|
|
|
|
Streamer.EmitLabel(const_cast<MCSymbol *>(Label));
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
if (BC.MIA->isCFI(Instr)) {
|
|
|
|
|
Streamer.EmitCFIInstruction(*getCFIFor(Instr));
|
|
|
|
|
continue;
|
|
|
|
|
}
|
2017-04-03 16:24:26 -07:00
|
|
|
if (opts::UpdateDebugSections && UnitLineTable.first) {
|
|
|
|
|
LastLocSeen = emitLineInfo(Instr.getLoc(), LastLocSeen);
|
2016-09-27 19:09:38 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Emit GNU_args_size CFIs as necessary.
|
|
|
|
|
if (usesGnuArgsSize() && BC.MIA->isInvoke(Instr)) {
|
|
|
|
|
auto NewGnuArgsSize = BC.MIA->getGnuArgsSize(Instr);
|
|
|
|
|
assert(NewGnuArgsSize >= 0 && "expected non-negative GNU_args_size");
|
|
|
|
|
if (NewGnuArgsSize != CurrentGnuArgsSize) {
|
|
|
|
|
CurrentGnuArgsSize = NewGnuArgsSize;
|
|
|
|
|
Streamer.EmitCFIGnuArgsSize(CurrentGnuArgsSize);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Streamer.EmitInstruction(Instr, *BC.STI);
|
|
|
|
|
LastIsPrefix = BC.MIA->isPrefix(Instr);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void BinaryFunction::emitBodyRaw(MCStreamer *Streamer) {
|
|
|
|
|
|
|
|
|
|
// #14998851: Fix gold linker's '--emit-relocs'.
|
|
|
|
|
assert(false &&
|
|
|
|
|
"cannot emit raw body unless relocation accuracy is guaranteed");
|
|
|
|
|
|
|
|
|
|
// Raw contents of the function.
|
|
|
|
|
StringRef SectionContents;
|
|
|
|
|
Section.getContents(SectionContents);
|
|
|
|
|
|
|
|
|
|
// Raw contents of the function.
|
|
|
|
|
StringRef FunctionContents =
|
|
|
|
|
SectionContents.substr(getAddress() - Section.getAddress(),
|
|
|
|
|
getSize());
|
|
|
|
|
|
|
|
|
|
if (opts::Verbosity)
|
|
|
|
|
outs() << "BOLT-INFO: emitting function " << *this << " in raw ("
|
|
|
|
|
<< getSize() << " bytes).\n";
|
|
|
|
|
|
|
|
|
|
// We split the function blob into smaller blocks and output relocations
|
|
|
|
|
// and/or labels between them.
|
|
|
|
|
uint64_t FunctionOffset = 0;
|
|
|
|
|
auto LI = Labels.begin();
|
|
|
|
|
auto RI = MoveRelocations.begin();
|
|
|
|
|
while (LI != Labels.end() ||
|
|
|
|
|
RI != MoveRelocations.end()) {
|
|
|
|
|
uint64_t NextLabelOffset = (LI == Labels.end() ? getSize() : LI->first);
|
|
|
|
|
uint64_t NextRelocationOffset =
|
|
|
|
|
(RI == MoveRelocations.end() ? getSize() : RI->first);
|
|
|
|
|
auto NextStop = std::min(NextLabelOffset, NextRelocationOffset);
|
|
|
|
|
assert(NextStop <= getSize() && "internal overflow error");
|
|
|
|
|
if (FunctionOffset < NextStop) {
|
|
|
|
|
Streamer->EmitBytes(
|
|
|
|
|
FunctionContents.slice(FunctionOffset, NextStop));
|
|
|
|
|
FunctionOffset = NextStop;
|
|
|
|
|
}
|
|
|
|
|
if (LI != Labels.end() && FunctionOffset == LI->first) {
|
|
|
|
|
Streamer->EmitLabel(LI->second);
|
|
|
|
|
DEBUG(dbgs() << "BOLT-DEBUG: emitted label " << LI->second->getName()
|
|
|
|
|
<< " at offset 0x" << Twine::utohexstr(LI->first) << '\n');
|
|
|
|
|
++LI;
|
|
|
|
|
}
|
|
|
|
|
if (RI != MoveRelocations.end() && FunctionOffset == RI->first) {
|
|
|
|
|
auto RelocationSize = RI->second.emit(Streamer);
|
|
|
|
|
DEBUG(dbgs() << "BOLT-DEBUG: emitted relocation for symbol "
|
|
|
|
|
<< RI->second.Symbol->getName() << " at offset 0x"
|
|
|
|
|
<< Twine::utohexstr(RI->first)
|
|
|
|
|
<< " with size " << RelocationSize << '\n');
|
|
|
|
|
FunctionOffset += RelocationSize;
|
|
|
|
|
++RI;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
assert(FunctionOffset <= getSize() && "overflow error");
|
|
|
|
|
if (FunctionOffset < getSize()) {
|
|
|
|
|
Streamer->EmitBytes(FunctionContents.substr(FunctionOffset));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-07-01 08:40:56 -07:00
|
|
|
namespace {
|
|
|
|
|
|
|
|
|
|
#ifndef MAX_PATH
|
|
|
|
|
#define MAX_PATH 255
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
std::string constructFilename(std::string Filename,
|
|
|
|
|
std::string Annotation,
|
|
|
|
|
std::string Suffix) {
|
|
|
|
|
std::replace(Filename.begin(), Filename.end(), '/', '-');
|
|
|
|
|
if (!Annotation.empty()) {
|
|
|
|
|
Annotation.insert(0, "-");
|
|
|
|
|
}
|
|
|
|
|
if (Filename.size() + Annotation.size() + Suffix.size() > MAX_PATH) {
|
|
|
|
|
assert(Suffix.size() + Annotation.size() <= MAX_PATH);
|
2016-09-02 14:15:29 -07:00
|
|
|
if (opts::Verbosity >= 1) {
|
|
|
|
|
errs() << "BOLT-WARNING: Filename \"" << Filename << Annotation << Suffix
|
|
|
|
|
<< "\" exceeds the " << MAX_PATH << " size limit, truncating.\n";
|
|
|
|
|
}
|
2016-07-01 08:40:56 -07:00
|
|
|
Filename.resize(MAX_PATH - (Suffix.size() + Annotation.size()));
|
|
|
|
|
}
|
|
|
|
|
Filename += Annotation;
|
|
|
|
|
Filename += Suffix;
|
|
|
|
|
return Filename;
|
|
|
|
|
}
|
|
|
|
|
|
2016-07-29 19:18:37 -07:00
|
|
|
std::string formatEscapes(const std::string& Str) {
|
|
|
|
|
std::string Result;
|
|
|
|
|
for (unsigned I = 0; I < Str.size(); ++I) {
|
|
|
|
|
auto C = Str[I];
|
|
|
|
|
switch (C) {
|
|
|
|
|
case '\n':
|
|
|
|
|
Result += " ";
|
|
|
|
|
break;
|
|
|
|
|
case '"':
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
Result += C;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return Result;
|
|
|
|
|
}
|
|
|
|
|
|
2016-07-01 08:40:56 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void BinaryFunction::dumpGraph(raw_ostream& OS) const {
|
2016-07-29 19:18:37 -07:00
|
|
|
OS << "strict digraph \"" << getPrintName() << "\" {\n";
|
|
|
|
|
uint64_t Offset = Address;
|
2016-07-01 08:40:56 -07:00
|
|
|
for (auto *BB : BasicBlocks) {
|
2016-07-29 19:18:37 -07:00
|
|
|
auto LayoutPos = std::find(BasicBlocksLayout.begin(),
|
|
|
|
|
BasicBlocksLayout.end(),
|
|
|
|
|
BB);
|
|
|
|
|
unsigned Layout = LayoutPos - BasicBlocksLayout.begin();
|
Indirect call promotion optimization.
Summary:
Perform indirect call promotion optimization in BOLT.
The code scans the instructions during CFG creation for all
indirect calls. Right now indirect tail calls are not handled
since the functions are marked not simple. The offsets of the
indirect calls are stored for later use by the ICP pass.
The indirect call promotion pass visits each indirect call and
examines the BranchData for each. If the most frequent targets
from that callsite exceed the specified threshold (default 90%),
the call is promoted. Otherwise, it is ignored. By default,
only one target is considered at each callsite.
When an candiate callsite is processed, we modify the callsite
to test for the most common call targets before calling through
the original generic call mechanism.
The CFG and layout are modified by ICP.
A few new command line options have been added:
-indirect-call-promotion
-indirect-call-promotion-threshold=<percentage>
-indirect-call-promotion-topn=<int>
The threshold is the minimum frequency of a call target needed
before ICP is triggered.
The topn option controls the number of targets to consider for
each callsite, e.g. ICP is triggered if topn=2 and the total
requency of the top two call targets exceeds the threshold.
Example of ICP:
C++ code:
int B_count = 0;
int C_count = 0;
struct A { virtual void foo() = 0; }
struct B : public A { virtual void foo() { ++B_count; }; };
struct C : public A { virtual void foo() { ++C_count; }; };
A* a = ...
a->foo();
...
original:
400863: 49 8b 07 mov (%r15),%rax
400866: 4c 89 ff mov %r15,%rdi
400869: ff 10 callq *(%rax)
40086b: 41 83 e6 01 and $0x1,%r14d
40086f: 4d 89 e6 mov %r12,%r14
400872: 4c 0f 44 f5 cmove %rbp,%r14
400876: 4c 89 f7 mov %r14,%rdi
...
after ICP:
40085e: 49 8b 07 mov (%r15),%rax
400861: 4c 89 ff mov %r15,%rdi
400864: 49 ba e0 0b 40 00 00 movabs $0x400be0,%r10
40086b: 00 00 00
40086e: 4c 3b 10 cmp (%rax),%r10
400871: 75 29 jne 40089c <main+0x9c>
400873: 41 ff d2 callq *%r10
400876: 41 83 e6 01 and $0x1,%r14d
40087a: 4d 89 e6 mov %r12,%r14
40087d: 4c 0f 44 f5 cmove %rbp,%r14
400881: 4c 89 f7 mov %r14,%rdi
...
40089c: ff 10 callq *(%rax)
40089e: eb d6 jmp 400876 <main+0x76>
(cherry picked from FBD3612218)
2016-09-07 18:59:23 -07:00
|
|
|
const char* ColdStr = BB->isCold() ? " (cold)" : "";
|
|
|
|
|
OS << format("\"%s\" [label=\"%s%s\\n(C:%lu,O:%lu,I:%u,L:%u:CFI:%u)\"]\n",
|
2016-07-29 19:18:37 -07:00
|
|
|
BB->getName().data(),
|
|
|
|
|
BB->getName().data(),
|
Indirect call promotion optimization.
Summary:
Perform indirect call promotion optimization in BOLT.
The code scans the instructions during CFG creation for all
indirect calls. Right now indirect tail calls are not handled
since the functions are marked not simple. The offsets of the
indirect calls are stored for later use by the ICP pass.
The indirect call promotion pass visits each indirect call and
examines the BranchData for each. If the most frequent targets
from that callsite exceed the specified threshold (default 90%),
the call is promoted. Otherwise, it is ignored. By default,
only one target is considered at each callsite.
When an candiate callsite is processed, we modify the callsite
to test for the most common call targets before calling through
the original generic call mechanism.
The CFG and layout are modified by ICP.
A few new command line options have been added:
-indirect-call-promotion
-indirect-call-promotion-threshold=<percentage>
-indirect-call-promotion-topn=<int>
The threshold is the minimum frequency of a call target needed
before ICP is triggered.
The topn option controls the number of targets to consider for
each callsite, e.g. ICP is triggered if topn=2 and the total
requency of the top two call targets exceeds the threshold.
Example of ICP:
C++ code:
int B_count = 0;
int C_count = 0;
struct A { virtual void foo() = 0; }
struct B : public A { virtual void foo() { ++B_count; }; };
struct C : public A { virtual void foo() { ++C_count; }; };
A* a = ...
a->foo();
...
original:
400863: 49 8b 07 mov (%r15),%rax
400866: 4c 89 ff mov %r15,%rdi
400869: ff 10 callq *(%rax)
40086b: 41 83 e6 01 and $0x1,%r14d
40086f: 4d 89 e6 mov %r12,%r14
400872: 4c 0f 44 f5 cmove %rbp,%r14
400876: 4c 89 f7 mov %r14,%rdi
...
after ICP:
40085e: 49 8b 07 mov (%r15),%rax
400861: 4c 89 ff mov %r15,%rdi
400864: 49 ba e0 0b 40 00 00 movabs $0x400be0,%r10
40086b: 00 00 00
40086e: 4c 3b 10 cmp (%rax),%r10
400871: 75 29 jne 40089c <main+0x9c>
400873: 41 ff d2 callq *%r10
400876: 41 83 e6 01 and $0x1,%r14d
40087a: 4d 89 e6 mov %r12,%r14
40087d: 4c 0f 44 f5 cmove %rbp,%r14
400881: 4c 89 f7 mov %r14,%rdi
...
40089c: ff 10 callq *(%rax)
40089e: eb d6 jmp 400876 <main+0x76>
(cherry picked from FBD3612218)
2016-09-07 18:59:23 -07:00
|
|
|
ColdStr,
|
|
|
|
|
(BB->ExecutionCount != BinaryBasicBlock::COUNT_NO_PROFILE
|
|
|
|
|
? BB->ExecutionCount
|
|
|
|
|
: 0),
|
2016-07-29 19:18:37 -07:00
|
|
|
BB->getOffset(),
|
2016-09-13 17:12:00 -07:00
|
|
|
getIndex(BB),
|
Indirect call promotion optimization.
Summary:
Perform indirect call promotion optimization in BOLT.
The code scans the instructions during CFG creation for all
indirect calls. Right now indirect tail calls are not handled
since the functions are marked not simple. The offsets of the
indirect calls are stored for later use by the ICP pass.
The indirect call promotion pass visits each indirect call and
examines the BranchData for each. If the most frequent targets
from that callsite exceed the specified threshold (default 90%),
the call is promoted. Otherwise, it is ignored. By default,
only one target is considered at each callsite.
When an candiate callsite is processed, we modify the callsite
to test for the most common call targets before calling through
the original generic call mechanism.
The CFG and layout are modified by ICP.
A few new command line options have been added:
-indirect-call-promotion
-indirect-call-promotion-threshold=<percentage>
-indirect-call-promotion-topn=<int>
The threshold is the minimum frequency of a call target needed
before ICP is triggered.
The topn option controls the number of targets to consider for
each callsite, e.g. ICP is triggered if topn=2 and the total
requency of the top two call targets exceeds the threshold.
Example of ICP:
C++ code:
int B_count = 0;
int C_count = 0;
struct A { virtual void foo() = 0; }
struct B : public A { virtual void foo() { ++B_count; }; };
struct C : public A { virtual void foo() { ++C_count; }; };
A* a = ...
a->foo();
...
original:
400863: 49 8b 07 mov (%r15),%rax
400866: 4c 89 ff mov %r15,%rdi
400869: ff 10 callq *(%rax)
40086b: 41 83 e6 01 and $0x1,%r14d
40086f: 4d 89 e6 mov %r12,%r14
400872: 4c 0f 44 f5 cmove %rbp,%r14
400876: 4c 89 f7 mov %r14,%rdi
...
after ICP:
40085e: 49 8b 07 mov (%r15),%rax
400861: 4c 89 ff mov %r15,%rdi
400864: 49 ba e0 0b 40 00 00 movabs $0x400be0,%r10
40086b: 00 00 00
40086e: 4c 3b 10 cmp (%rax),%r10
400871: 75 29 jne 40089c <main+0x9c>
400873: 41 ff d2 callq *%r10
400876: 41 83 e6 01 and $0x1,%r14d
40087a: 4d 89 e6 mov %r12,%r14
40087d: 4c 0f 44 f5 cmove %rbp,%r14
400881: 4c 89 f7 mov %r14,%rdi
...
40089c: ff 10 callq *(%rax)
40089e: eb d6 jmp 400876 <main+0x76>
(cherry picked from FBD3612218)
2016-09-07 18:59:23 -07:00
|
|
|
Layout,
|
2017-02-24 21:59:33 -08:00
|
|
|
BB->getCFIState());
|
2016-07-29 19:18:37 -07:00
|
|
|
OS << format("\"%s\" [shape=box]\n", BB->getName().data());
|
|
|
|
|
if (opts::DotToolTipCode) {
|
|
|
|
|
std::string Str;
|
|
|
|
|
raw_string_ostream CS(Str);
|
|
|
|
|
Offset = BC.printInstructions(CS, BB->begin(), BB->end(), Offset, this);
|
|
|
|
|
const auto Code = formatEscapes(CS.str());
|
|
|
|
|
OS << format("\"%s\" [tooltip=\"%s\"]\n",
|
|
|
|
|
BB->getName().data(),
|
|
|
|
|
Code.c_str());
|
|
|
|
|
}
|
|
|
|
|
|
2016-09-13 20:32:12 -07:00
|
|
|
// analyzeBranch is just used to get the names of the branch
|
|
|
|
|
// opcodes.
|
2016-07-29 19:18:37 -07:00
|
|
|
const MCSymbol *TBB = nullptr;
|
|
|
|
|
const MCSymbol *FBB = nullptr;
|
|
|
|
|
MCInst *CondBranch = nullptr;
|
|
|
|
|
MCInst *UncondBranch = nullptr;
|
2016-09-13 17:12:00 -07:00
|
|
|
const bool Success = BB->analyzeBranch(TBB,
|
|
|
|
|
FBB,
|
|
|
|
|
CondBranch,
|
|
|
|
|
UncondBranch);
|
2016-07-29 19:18:37 -07:00
|
|
|
|
2017-02-23 18:09:10 -08:00
|
|
|
const auto *LastInstr = BB->getLastNonPseudoInstr();
|
2016-09-16 15:54:32 -07:00
|
|
|
const bool IsJumpTable = LastInstr && BC.MIA->getJumpTable(*LastInstr);
|
2016-09-27 19:09:38 -07:00
|
|
|
|
2016-09-13 17:12:00 -07:00
|
|
|
auto BI = BB->branch_info_begin();
|
2016-07-01 08:40:56 -07:00
|
|
|
for (auto *Succ : BB->successors()) {
|
2016-07-29 19:18:37 -07:00
|
|
|
std::string Branch;
|
|
|
|
|
if (Success) {
|
2016-09-13 20:32:12 -07:00
|
|
|
if (Succ == BB->getConditionalSuccessor(true)) {
|
|
|
|
|
Branch = CondBranch
|
|
|
|
|
? BC.InstPrinter->getOpcodeName(CondBranch->getOpcode())
|
|
|
|
|
: "TB";
|
|
|
|
|
} else if (Succ == BB->getConditionalSuccessor(false)) {
|
|
|
|
|
Branch = UncondBranch
|
|
|
|
|
? BC.InstPrinter->getOpcodeName(UncondBranch->getOpcode())
|
|
|
|
|
: "FB";
|
2016-07-29 19:18:37 -07:00
|
|
|
} else {
|
|
|
|
|
Branch = "FT";
|
|
|
|
|
}
|
|
|
|
|
}
|
2016-09-13 20:32:12 -07:00
|
|
|
if (IsJumpTable) {
|
|
|
|
|
Branch = "JT";
|
|
|
|
|
}
|
2016-07-29 19:18:37 -07:00
|
|
|
OS << format("\"%s\" -> \"%s\" [label=\"%s",
|
|
|
|
|
BB->getName().data(),
|
|
|
|
|
Succ->getName().data(),
|
|
|
|
|
Branch.c_str());
|
|
|
|
|
|
2016-09-13 17:12:00 -07:00
|
|
|
if (BB->getExecutionCount() != COUNT_NO_PROFILE &&
|
2016-12-21 17:13:56 -08:00
|
|
|
BI->MispredictedCount != BinaryBasicBlock::COUNT_INFERRED) {
|
Indirect call promotion optimization.
Summary:
Perform indirect call promotion optimization in BOLT.
The code scans the instructions during CFG creation for all
indirect calls. Right now indirect tail calls are not handled
since the functions are marked not simple. The offsets of the
indirect calls are stored for later use by the ICP pass.
The indirect call promotion pass visits each indirect call and
examines the BranchData for each. If the most frequent targets
from that callsite exceed the specified threshold (default 90%),
the call is promoted. Otherwise, it is ignored. By default,
only one target is considered at each callsite.
When an candiate callsite is processed, we modify the callsite
to test for the most common call targets before calling through
the original generic call mechanism.
The CFG and layout are modified by ICP.
A few new command line options have been added:
-indirect-call-promotion
-indirect-call-promotion-threshold=<percentage>
-indirect-call-promotion-topn=<int>
The threshold is the minimum frequency of a call target needed
before ICP is triggered.
The topn option controls the number of targets to consider for
each callsite, e.g. ICP is triggered if topn=2 and the total
requency of the top two call targets exceeds the threshold.
Example of ICP:
C++ code:
int B_count = 0;
int C_count = 0;
struct A { virtual void foo() = 0; }
struct B : public A { virtual void foo() { ++B_count; }; };
struct C : public A { virtual void foo() { ++C_count; }; };
A* a = ...
a->foo();
...
original:
400863: 49 8b 07 mov (%r15),%rax
400866: 4c 89 ff mov %r15,%rdi
400869: ff 10 callq *(%rax)
40086b: 41 83 e6 01 and $0x1,%r14d
40086f: 4d 89 e6 mov %r12,%r14
400872: 4c 0f 44 f5 cmove %rbp,%r14
400876: 4c 89 f7 mov %r14,%rdi
...
after ICP:
40085e: 49 8b 07 mov (%r15),%rax
400861: 4c 89 ff mov %r15,%rdi
400864: 49 ba e0 0b 40 00 00 movabs $0x400be0,%r10
40086b: 00 00 00
40086e: 4c 3b 10 cmp (%rax),%r10
400871: 75 29 jne 40089c <main+0x9c>
400873: 41 ff d2 callq *%r10
400876: 41 83 e6 01 and $0x1,%r14d
40087a: 4d 89 e6 mov %r12,%r14
40087d: 4c 0f 44 f5 cmove %rbp,%r14
400881: 4c 89 f7 mov %r14,%rdi
...
40089c: ff 10 callq *(%rax)
40089e: eb d6 jmp 400876 <main+0x76>
(cherry picked from FBD3612218)
2016-09-07 18:59:23 -07:00
|
|
|
OS << "\\n(C:" << BI->Count << ",M:" << BI->MispredictedCount << ")";
|
2016-07-29 19:18:37 -07:00
|
|
|
} else if (ExecutionCount != COUNT_NO_PROFILE &&
|
2016-12-21 17:13:56 -08:00
|
|
|
BI->Count != BinaryBasicBlock::COUNT_NO_PROFILE) {
|
2016-09-13 17:12:00 -07:00
|
|
|
OS << "\\n(IC:" << BI->Count << ")";
|
2016-07-29 19:18:37 -07:00
|
|
|
}
|
|
|
|
|
OS << "\"]\n";
|
|
|
|
|
|
2016-09-13 17:12:00 -07:00
|
|
|
++BI;
|
2016-07-29 19:18:37 -07:00
|
|
|
}
|
2016-09-13 17:12:00 -07:00
|
|
|
for (auto *LP : BB->landing_pads()) {
|
2016-07-29 19:18:37 -07:00
|
|
|
OS << format("\"%s\" -> \"%s\" [constraint=false style=dashed]\n",
|
|
|
|
|
BB->getName().data(),
|
|
|
|
|
LP->getName().data());
|
2016-07-01 08:40:56 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
OS << "}\n";
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void BinaryFunction::viewGraph() const {
|
|
|
|
|
SmallString<MAX_PATH> Filename;
|
|
|
|
|
if (auto EC = sys::fs::createTemporaryFile("bolt-cfg", "dot", Filename)) {
|
2016-09-02 14:15:29 -07:00
|
|
|
errs() << "BOLT-ERROR: " << EC.message() << ", unable to create "
|
2016-07-01 08:40:56 -07:00
|
|
|
<< " bolt-cfg-XXXXX.dot temporary file.\n";
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
dumpGraphToFile(Filename.str());
|
|
|
|
|
if (DisplayGraph(Filename)) {
|
2016-09-02 14:15:29 -07:00
|
|
|
errs() << "BOLT-ERROR: Can't display " << Filename << " with graphviz.\n";
|
2016-07-01 08:40:56 -07:00
|
|
|
}
|
|
|
|
|
if (auto EC = sys::fs::remove(Filename)) {
|
2016-09-02 14:15:29 -07:00
|
|
|
errs() << "BOLT-WARNING: " << EC.message() << ", failed to remove "
|
|
|
|
|
<< Filename << "\n";
|
2016-07-01 08:40:56 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void BinaryFunction::dumpGraphForPass(std::string Annotation) const {
|
2016-08-07 12:35:23 -07:00
|
|
|
auto Filename = constructFilename(getPrintName(), Annotation, ".dot");
|
2016-09-02 14:15:29 -07:00
|
|
|
outs() << "BOLT-DEBUG: Dumping CFG to " << Filename << "\n";
|
2016-07-23 08:01:53 -07:00
|
|
|
dumpGraphToFile(Filename);
|
2016-07-01 08:40:56 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void BinaryFunction::dumpGraphToFile(std::string Filename) const {
|
|
|
|
|
std::error_code EC;
|
|
|
|
|
raw_fd_ostream of(Filename, EC, sys::fs::F_None);
|
|
|
|
|
if (EC) {
|
2016-09-02 14:15:29 -07:00
|
|
|
if (opts::Verbosity >= 1) {
|
|
|
|
|
errs() << "BOLT-WARNING: " << EC.message() << ", unable to open "
|
|
|
|
|
<< Filename << " for output.\n";
|
|
|
|
|
}
|
2016-07-01 08:40:56 -07:00
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
dumpGraph(of);
|
|
|
|
|
}
|
|
|
|
|
|
2017-02-27 21:44:38 -08:00
|
|
|
bool BinaryFunction::validateCFG() const {
|
Indirect call promotion optimization.
Summary:
Perform indirect call promotion optimization in BOLT.
The code scans the instructions during CFG creation for all
indirect calls. Right now indirect tail calls are not handled
since the functions are marked not simple. The offsets of the
indirect calls are stored for later use by the ICP pass.
The indirect call promotion pass visits each indirect call and
examines the BranchData for each. If the most frequent targets
from that callsite exceed the specified threshold (default 90%),
the call is promoted. Otherwise, it is ignored. By default,
only one target is considered at each callsite.
When an candiate callsite is processed, we modify the callsite
to test for the most common call targets before calling through
the original generic call mechanism.
The CFG and layout are modified by ICP.
A few new command line options have been added:
-indirect-call-promotion
-indirect-call-promotion-threshold=<percentage>
-indirect-call-promotion-topn=<int>
The threshold is the minimum frequency of a call target needed
before ICP is triggered.
The topn option controls the number of targets to consider for
each callsite, e.g. ICP is triggered if topn=2 and the total
requency of the top two call targets exceeds the threshold.
Example of ICP:
C++ code:
int B_count = 0;
int C_count = 0;
struct A { virtual void foo() = 0; }
struct B : public A { virtual void foo() { ++B_count; }; };
struct C : public A { virtual void foo() { ++C_count; }; };
A* a = ...
a->foo();
...
original:
400863: 49 8b 07 mov (%r15),%rax
400866: 4c 89 ff mov %r15,%rdi
400869: ff 10 callq *(%rax)
40086b: 41 83 e6 01 and $0x1,%r14d
40086f: 4d 89 e6 mov %r12,%r14
400872: 4c 0f 44 f5 cmove %rbp,%r14
400876: 4c 89 f7 mov %r14,%rdi
...
after ICP:
40085e: 49 8b 07 mov (%r15),%rax
400861: 4c 89 ff mov %r15,%rdi
400864: 49 ba e0 0b 40 00 00 movabs $0x400be0,%r10
40086b: 00 00 00
40086e: 4c 3b 10 cmp (%rax),%r10
400871: 75 29 jne 40089c <main+0x9c>
400873: 41 ff d2 callq *%r10
400876: 41 83 e6 01 and $0x1,%r14d
40087a: 4d 89 e6 mov %r12,%r14
40087d: 4c 0f 44 f5 cmove %rbp,%r14
400881: 4c 89 f7 mov %r14,%rdi
...
40089c: ff 10 callq *(%rax)
40089e: eb d6 jmp 400876 <main+0x76>
(cherry picked from FBD3612218)
2016-09-07 18:59:23 -07:00
|
|
|
bool Valid = true;
|
|
|
|
|
for (auto *BB : BasicBlocks) {
|
|
|
|
|
Valid &= BB->validateSuccessorInvariants();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!Valid)
|
|
|
|
|
return Valid;
|
|
|
|
|
|
|
|
|
|
for (auto *BB : BasicBlocks) {
|
|
|
|
|
std::set<BinaryBasicBlock *> Seen;
|
|
|
|
|
for (auto *LPBlock : BB->LandingPads) {
|
|
|
|
|
Valid &= Seen.count(LPBlock) == 0;
|
|
|
|
|
if (!Valid) {
|
2017-02-27 21:44:38 -08:00
|
|
|
errs() << "BOLT-WARNING: Duplicate LP seen " << LPBlock->getName()
|
|
|
|
|
<< "in " << *this << "\n";
|
Indirect call promotion optimization.
Summary:
Perform indirect call promotion optimization in BOLT.
The code scans the instructions during CFG creation for all
indirect calls. Right now indirect tail calls are not handled
since the functions are marked not simple. The offsets of the
indirect calls are stored for later use by the ICP pass.
The indirect call promotion pass visits each indirect call and
examines the BranchData for each. If the most frequent targets
from that callsite exceed the specified threshold (default 90%),
the call is promoted. Otherwise, it is ignored. By default,
only one target is considered at each callsite.
When an candiate callsite is processed, we modify the callsite
to test for the most common call targets before calling through
the original generic call mechanism.
The CFG and layout are modified by ICP.
A few new command line options have been added:
-indirect-call-promotion
-indirect-call-promotion-threshold=<percentage>
-indirect-call-promotion-topn=<int>
The threshold is the minimum frequency of a call target needed
before ICP is triggered.
The topn option controls the number of targets to consider for
each callsite, e.g. ICP is triggered if topn=2 and the total
requency of the top two call targets exceeds the threshold.
Example of ICP:
C++ code:
int B_count = 0;
int C_count = 0;
struct A { virtual void foo() = 0; }
struct B : public A { virtual void foo() { ++B_count; }; };
struct C : public A { virtual void foo() { ++C_count; }; };
A* a = ...
a->foo();
...
original:
400863: 49 8b 07 mov (%r15),%rax
400866: 4c 89 ff mov %r15,%rdi
400869: ff 10 callq *(%rax)
40086b: 41 83 e6 01 and $0x1,%r14d
40086f: 4d 89 e6 mov %r12,%r14
400872: 4c 0f 44 f5 cmove %rbp,%r14
400876: 4c 89 f7 mov %r14,%rdi
...
after ICP:
40085e: 49 8b 07 mov (%r15),%rax
400861: 4c 89 ff mov %r15,%rdi
400864: 49 ba e0 0b 40 00 00 movabs $0x400be0,%r10
40086b: 00 00 00
40086e: 4c 3b 10 cmp (%rax),%r10
400871: 75 29 jne 40089c <main+0x9c>
400873: 41 ff d2 callq *%r10
400876: 41 83 e6 01 and $0x1,%r14d
40087a: 4d 89 e6 mov %r12,%r14
40087d: 4c 0f 44 f5 cmove %rbp,%r14
400881: 4c 89 f7 mov %r14,%rdi
...
40089c: ff 10 callq *(%rax)
40089e: eb d6 jmp 400876 <main+0x76>
(cherry picked from FBD3612218)
2016-09-07 18:59:23 -07:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
Seen.insert(LPBlock);
|
|
|
|
|
auto count = LPBlock->Throwers.count(BB);
|
|
|
|
|
Valid &= (count == 1);
|
|
|
|
|
if (!Valid) {
|
2017-02-27 21:44:38 -08:00
|
|
|
errs() << "BOLT-WARNING: Inconsistent landing pad detected in "
|
|
|
|
|
<< *this << ": " << LPBlock->getName()
|
Indirect call promotion optimization.
Summary:
Perform indirect call promotion optimization in BOLT.
The code scans the instructions during CFG creation for all
indirect calls. Right now indirect tail calls are not handled
since the functions are marked not simple. The offsets of the
indirect calls are stored for later use by the ICP pass.
The indirect call promotion pass visits each indirect call and
examines the BranchData for each. If the most frequent targets
from that callsite exceed the specified threshold (default 90%),
the call is promoted. Otherwise, it is ignored. By default,
only one target is considered at each callsite.
When an candiate callsite is processed, we modify the callsite
to test for the most common call targets before calling through
the original generic call mechanism.
The CFG and layout are modified by ICP.
A few new command line options have been added:
-indirect-call-promotion
-indirect-call-promotion-threshold=<percentage>
-indirect-call-promotion-topn=<int>
The threshold is the minimum frequency of a call target needed
before ICP is triggered.
The topn option controls the number of targets to consider for
each callsite, e.g. ICP is triggered if topn=2 and the total
requency of the top two call targets exceeds the threshold.
Example of ICP:
C++ code:
int B_count = 0;
int C_count = 0;
struct A { virtual void foo() = 0; }
struct B : public A { virtual void foo() { ++B_count; }; };
struct C : public A { virtual void foo() { ++C_count; }; };
A* a = ...
a->foo();
...
original:
400863: 49 8b 07 mov (%r15),%rax
400866: 4c 89 ff mov %r15,%rdi
400869: ff 10 callq *(%rax)
40086b: 41 83 e6 01 and $0x1,%r14d
40086f: 4d 89 e6 mov %r12,%r14
400872: 4c 0f 44 f5 cmove %rbp,%r14
400876: 4c 89 f7 mov %r14,%rdi
...
after ICP:
40085e: 49 8b 07 mov (%r15),%rax
400861: 4c 89 ff mov %r15,%rdi
400864: 49 ba e0 0b 40 00 00 movabs $0x400be0,%r10
40086b: 00 00 00
40086e: 4c 3b 10 cmp (%rax),%r10
400871: 75 29 jne 40089c <main+0x9c>
400873: 41 ff d2 callq *%r10
400876: 41 83 e6 01 and $0x1,%r14d
40087a: 4d 89 e6 mov %r12,%r14
40087d: 4c 0f 44 f5 cmove %rbp,%r14
400881: 4c 89 f7 mov %r14,%rdi
...
40089c: ff 10 callq *(%rax)
40089e: eb d6 jmp 400876 <main+0x76>
(cherry picked from FBD3612218)
2016-09-07 18:59:23 -07:00
|
|
|
<< " is in LandingPads but not in " << BB->getName()
|
|
|
|
|
<< "->Throwers\n";
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return Valid;
|
|
|
|
|
}
|
|
|
|
|
|
2015-10-16 09:49:04 -07:00
|
|
|
void BinaryFunction::fixBranches() {
|
|
|
|
|
auto &MIA = BC.MIA;
|
2016-08-29 21:11:22 -07:00
|
|
|
auto *Ctx = BC.Ctx.get();
|
2015-10-16 09:49:04 -07:00
|
|
|
|
|
|
|
|
for (unsigned I = 0, E = BasicBlocksLayout.size(); I != E; ++I) {
|
|
|
|
|
BinaryBasicBlock *BB = BasicBlocksLayout[I];
|
|
|
|
|
const MCSymbol *TBB = nullptr;
|
|
|
|
|
const MCSymbol *FBB = nullptr;
|
|
|
|
|
MCInst *CondBranch = nullptr;
|
|
|
|
|
MCInst *UncondBranch = nullptr;
|
2016-09-13 17:12:00 -07:00
|
|
|
if (!BB->analyzeBranch(TBB, FBB, CondBranch, UncondBranch))
|
2015-10-16 09:49:04 -07:00
|
|
|
continue;
|
|
|
|
|
|
2016-08-29 21:11:22 -07:00
|
|
|
// We will create unconditional branch with correct destination if needed.
|
|
|
|
|
if (UncondBranch)
|
|
|
|
|
BB->eraseInstruction(UncondBranch);
|
2015-10-16 09:49:04 -07:00
|
|
|
|
2016-08-29 21:11:22 -07:00
|
|
|
// Basic block that follows the current one in the final layout.
|
|
|
|
|
const BinaryBasicBlock *NextBB = nullptr;
|
2016-09-13 17:12:00 -07:00
|
|
|
if (I + 1 != E && BB->isCold() == BasicBlocksLayout[I + 1]->isCold())
|
2016-08-29 21:11:22 -07:00
|
|
|
NextBB = BasicBlocksLayout[I + 1];
|
|
|
|
|
|
|
|
|
|
if (BB->succ_size() == 1) {
|
|
|
|
|
// __builtin_unreachable() could create a conditional branch that
|
|
|
|
|
// falls-through into the next function - hence the block will have only
|
2016-09-27 19:09:38 -07:00
|
|
|
// one valid successor. Since behaviour is undefined - we replace
|
2016-08-29 21:11:22 -07:00
|
|
|
// the conditional branch with an unconditional if required.
|
|
|
|
|
if (CondBranch)
|
|
|
|
|
BB->eraseInstruction(CondBranch);
|
|
|
|
|
if (BB->getSuccessor() == NextBB)
|
2015-10-16 09:49:04 -07:00
|
|
|
continue;
|
2016-08-29 21:11:22 -07:00
|
|
|
BB->addBranchInstruction(BB->getSuccessor());
|
|
|
|
|
} else if (BB->succ_size() == 2) {
|
|
|
|
|
assert(CondBranch && "conditional branch expected");
|
|
|
|
|
const auto *TSuccessor = BB->getConditionalSuccessor(true);
|
|
|
|
|
const auto *FSuccessor = BB->getConditionalSuccessor(false);
|
|
|
|
|
if (NextBB && NextBB == TSuccessor) {
|
|
|
|
|
std::swap(TSuccessor, FSuccessor);
|
|
|
|
|
MIA->reverseBranchCondition(*CondBranch, TSuccessor->getLabel(), Ctx);
|
|
|
|
|
BB->swapConditionalSuccessors();
|
|
|
|
|
} else {
|
|
|
|
|
MIA->replaceBranchTarget(*CondBranch, TSuccessor->getLabel(), Ctx);
|
2015-10-16 09:49:04 -07:00
|
|
|
}
|
2017-03-20 22:44:25 -07:00
|
|
|
if (TSuccessor == FSuccessor) {
|
|
|
|
|
BB->removeDuplicateConditionalSuccessor(CondBranch);
|
|
|
|
|
}
|
2016-08-29 21:11:22 -07:00
|
|
|
if (!NextBB || (NextBB != TSuccessor && NextBB != FSuccessor)) {
|
|
|
|
|
BB->addBranchInstruction(FSuccessor);
|
2016-07-23 12:50:34 -07:00
|
|
|
}
|
|
|
|
|
}
|
2016-08-29 21:11:22 -07:00
|
|
|
// Cases where the number of successors is 0 (block ends with a
|
|
|
|
|
// terminator) or more than 2 (switch table) don't require branch
|
|
|
|
|
// instruction adjustments.
|
2016-07-23 12:50:34 -07:00
|
|
|
}
|
2017-02-27 21:44:38 -08:00
|
|
|
assert(validateCFG() && "Invalid CFG detected after fixing branches");
|
2016-07-23 12:50:34 -07:00
|
|
|
}
|
|
|
|
|
|
2015-11-19 17:59:41 -08:00
|
|
|
void BinaryFunction::splitFunction() {
|
|
|
|
|
bool AllCold = true;
|
|
|
|
|
for (BinaryBasicBlock *BB : BasicBlocksLayout) {
|
|
|
|
|
auto ExecCount = BB->getExecutionCount();
|
|
|
|
|
if (ExecCount == BinaryBasicBlock::COUNT_NO_PROFILE)
|
|
|
|
|
return;
|
|
|
|
|
if (ExecCount != 0)
|
|
|
|
|
AllCold = false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (AllCold)
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
assert(BasicBlocksLayout.size() > 0);
|
2016-01-22 16:45:39 -08:00
|
|
|
|
2016-04-20 15:31:11 -07:00
|
|
|
// Never outline the first basic block.
|
2016-09-13 17:12:00 -07:00
|
|
|
BasicBlocks.front()->setCanOutline(false);
|
2016-06-07 16:27:52 -07:00
|
|
|
for (auto BB : BasicBlocks) {
|
2016-09-13 17:12:00 -07:00
|
|
|
if (!BB->canOutline())
|
2016-04-20 15:31:11 -07:00
|
|
|
continue;
|
2016-06-07 16:27:52 -07:00
|
|
|
if (BB->getExecutionCount() != 0) {
|
2016-09-13 17:12:00 -07:00
|
|
|
BB->setCanOutline(false);
|
2016-04-20 15:31:11 -07:00
|
|
|
continue;
|
2016-01-22 16:45:39 -08:00
|
|
|
}
|
2016-09-27 19:09:38 -07:00
|
|
|
if (hasEHRanges() && !opts::SplitEH) {
|
2016-04-20 15:31:11 -07:00
|
|
|
// We cannot move landing pads (or rather entry points for landing
|
|
|
|
|
// pads).
|
2016-09-13 17:12:00 -07:00
|
|
|
if (BB->isLandingPad()) {
|
|
|
|
|
BB->setCanOutline(false);
|
2016-01-22 16:45:39 -08:00
|
|
|
continue;
|
|
|
|
|
}
|
2016-04-20 15:31:11 -07:00
|
|
|
// We cannot move a block that can throw since exception-handling
|
|
|
|
|
// runtime cannot deal with split functions. However, if we can guarantee
|
|
|
|
|
// that the block never throws, it is safe to move the block to
|
|
|
|
|
// decrease the size of the function.
|
2016-06-07 16:27:52 -07:00
|
|
|
for (auto &Instr : *BB) {
|
2016-01-22 16:45:39 -08:00
|
|
|
if (BC.MIA->isInvoke(Instr)) {
|
2016-09-13 17:12:00 -07:00
|
|
|
BB->setCanOutline(false);
|
2016-01-22 16:45:39 -08:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2016-04-20 15:31:11 -07:00
|
|
|
}
|
|
|
|
|
|
2016-09-27 19:09:38 -07:00
|
|
|
if (opts::AggressiveSplitting) {
|
2016-04-20 15:31:11 -07:00
|
|
|
// All blocks with 0 count that we can move go to the end of the function.
|
2016-09-27 19:09:38 -07:00
|
|
|
// Even if they were natural to cluster formation and were seen in-between
|
|
|
|
|
// hot basic blocks.
|
2016-01-22 16:45:39 -08:00
|
|
|
std::stable_sort(BasicBlocksLayout.begin(), BasicBlocksLayout.end(),
|
|
|
|
|
[&] (BinaryBasicBlock *A, BinaryBasicBlock *B) {
|
2016-01-26 16:03:58 -08:00
|
|
|
return A->canOutline() < B->canOutline();
|
2016-01-22 16:45:39 -08:00
|
|
|
});
|
2016-09-27 19:09:38 -07:00
|
|
|
} else if (hasEHRanges() && !opts::SplitEH) {
|
2016-04-20 15:31:11 -07:00
|
|
|
// Typically functions with exception handling have landing pads at the end.
|
|
|
|
|
// We cannot move beginning of landing pads, but we can move 0-count blocks
|
2016-09-27 19:09:38 -07:00
|
|
|
// comprising landing pads to the end and thus facilitate splitting.
|
2016-04-20 15:31:11 -07:00
|
|
|
auto FirstLP = BasicBlocksLayout.begin();
|
2016-09-13 17:12:00 -07:00
|
|
|
while ((*FirstLP)->isLandingPad())
|
2016-04-20 15:31:11 -07:00
|
|
|
++FirstLP;
|
|
|
|
|
|
|
|
|
|
std::stable_sort(FirstLP, BasicBlocksLayout.end(),
|
|
|
|
|
[&] (BinaryBasicBlock *A, BinaryBasicBlock *B) {
|
|
|
|
|
return A->canOutline() < B->canOutline();
|
|
|
|
|
});
|
|
|
|
|
}
|
2016-01-22 16:45:39 -08:00
|
|
|
|
2016-09-27 19:09:38 -07:00
|
|
|
// Separate hot from cold starting from the bottom.
|
2016-04-20 15:31:11 -07:00
|
|
|
for (auto I = BasicBlocksLayout.rbegin(), E = BasicBlocksLayout.rend();
|
|
|
|
|
I != E; ++I) {
|
|
|
|
|
BinaryBasicBlock *BB = *I;
|
|
|
|
|
if (!BB->canOutline())
|
|
|
|
|
break;
|
2016-09-13 17:12:00 -07:00
|
|
|
BB->setIsCold(true);
|
2016-04-20 15:31:11 -07:00
|
|
|
IsSplit = true;
|
2015-11-19 17:59:41 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-04-19 22:00:29 -07:00
|
|
|
void BinaryFunction::propagateGnuArgsSizeInfo() {
|
|
|
|
|
assert(CurrentState == State::CFG && "unexpected function state");
|
|
|
|
|
|
|
|
|
|
if (!hasEHRanges() || !usesGnuArgsSize())
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
// The current value of DW_CFA_GNU_args_size affects all following
|
2016-04-20 15:31:11 -07:00
|
|
|
// invoke instructions until the next CFI overrides it.
|
2016-04-19 22:00:29 -07:00
|
|
|
// It is important to iterate basic blocks in the original order when
|
|
|
|
|
// assigning the value.
|
|
|
|
|
uint64_t CurrentGnuArgsSize = 0;
|
2016-06-07 16:27:52 -07:00
|
|
|
for (auto BB : BasicBlocks) {
|
|
|
|
|
for (auto II = BB->begin(); II != BB->end(); ) {
|
2016-04-19 22:00:29 -07:00
|
|
|
auto &Instr = *II;
|
|
|
|
|
if (BC.MIA->isCFI(Instr)) {
|
|
|
|
|
auto CFI = getCFIFor(Instr);
|
|
|
|
|
if (CFI->getOperation() == MCCFIInstruction::OpGnuArgsSize) {
|
|
|
|
|
CurrentGnuArgsSize = CFI->getOffset();
|
|
|
|
|
// Delete DW_CFA_GNU_args_size instructions and only regenerate
|
|
|
|
|
// during the final code emission. The information is embedded
|
|
|
|
|
// inside call instructions.
|
2016-08-29 21:11:22 -07:00
|
|
|
II = BB->erasePseudoInstruction(II);
|
2016-09-27 19:09:38 -07:00
|
|
|
continue;
|
2016-04-19 22:00:29 -07:00
|
|
|
}
|
2016-09-27 19:09:38 -07:00
|
|
|
} else if (BC.MIA->isInvoke(Instr)) {
|
|
|
|
|
// Add the value of GNU_args_size as an extra operand to invokes.
|
|
|
|
|
BC.MIA->addGnuArgsSize(Instr, CurrentGnuArgsSize);
|
2016-04-19 22:00:29 -07:00
|
|
|
}
|
|
|
|
|
++II;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2017-03-03 11:35:41 -08:00
|
|
|
void BinaryFunction::postProcessBranches() {
|
|
|
|
|
if (!isSimple())
|
|
|
|
|
return;
|
|
|
|
|
for (auto *BB : BasicBlocksLayout) {
|
|
|
|
|
auto LastInstrRI = BB->getLastNonPseudo();
|
|
|
|
|
if (BB->succ_size() == 1) {
|
|
|
|
|
if (LastInstrRI != BB->rend() &&
|
|
|
|
|
BC.MIA->isConditionalBranch(*LastInstrRI)) {
|
|
|
|
|
// __builtin_unreachable() could create a conditional branch that
|
|
|
|
|
// falls-through into the next function - hence the block will have only
|
|
|
|
|
// one valid successor. Such behaviour is undefined and thus we remove
|
|
|
|
|
// the conditional branch while leaving a valid successor.
|
|
|
|
|
assert(BB == BasicBlocksLayout.back() && "last basic block expected");
|
|
|
|
|
BB->eraseInstruction(std::next(LastInstrRI.base()));
|
|
|
|
|
DEBUG(dbgs() << "BOLT-DEBUG: erasing conditional branch in "
|
|
|
|
|
<< BB->getName() << " in function " << *this << '\n');
|
|
|
|
|
}
|
|
|
|
|
} else if (BB->succ_size() == 0) {
|
|
|
|
|
// Ignore unreachable basic blocks.
|
|
|
|
|
if (BB->pred_size() == 0 || BB->isLandingPad())
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
// If it's the basic block that does not end up with a terminator - we
|
|
|
|
|
// insert a return instruction unless it's a call instruction.
|
|
|
|
|
if (LastInstrRI == BB->rend()) {
|
|
|
|
|
DEBUG(dbgs() << "BOLT-DEBUG: at least one instruction expected in BB "
|
|
|
|
|
<< BB->getName() << " in function " << *this << '\n');
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
if (!BC.MIA->isTerminator(*LastInstrRI) &&
|
|
|
|
|
!BC.MIA->isCall(*LastInstrRI)) {
|
|
|
|
|
DEBUG(dbgs() << "BOLT-DEBUG: adding return to basic block "
|
|
|
|
|
<< BB->getName() << " in function " << *this << '\n');
|
|
|
|
|
MCInst ReturnInstr;
|
|
|
|
|
BC.MIA->createReturn(ReturnInstr);
|
|
|
|
|
BB->addInstruction(ReturnInstr);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
assert(validateCFG() && "invalid CFG");
|
|
|
|
|
}
|
|
|
|
|
|
2016-06-09 11:36:55 -07:00
|
|
|
void BinaryFunction::mergeProfileDataInto(BinaryFunction &BF) const {
|
2016-12-21 17:13:56 -08:00
|
|
|
// No reason to merge invalid or empty profiles into BF.
|
|
|
|
|
if (!hasValidProfile())
|
2016-06-09 11:36:55 -07:00
|
|
|
return;
|
|
|
|
|
|
2016-12-21 17:13:56 -08:00
|
|
|
// Update function execution count.
|
|
|
|
|
if (getExecutionCount() != BinaryFunction::COUNT_NO_PROFILE) {
|
|
|
|
|
BF.setExecutionCount(BF.getKnownExecutionCount() + getExecutionCount());
|
2016-06-09 11:36:55 -07:00
|
|
|
}
|
|
|
|
|
|
2016-12-21 17:13:56 -08:00
|
|
|
// Since we are merging a valid profile, the new profile should be valid too.
|
|
|
|
|
// It has either already been valid, or it has been cleaned up.
|
|
|
|
|
BF.ProfileMatchRatio = 1.0f;
|
|
|
|
|
|
|
|
|
|
// Update basic block and edge counts.
|
2016-06-09 11:36:55 -07:00
|
|
|
auto BBMergeI = BF.begin();
|
|
|
|
|
for (BinaryBasicBlock *BB : BasicBlocks) {
|
|
|
|
|
BinaryBasicBlock *BBMerge = &*BBMergeI;
|
|
|
|
|
assert(getIndex(BB) == BF.getIndex(BBMerge));
|
|
|
|
|
|
2016-12-21 17:13:56 -08:00
|
|
|
// Update basic block count.
|
|
|
|
|
if (BB->getExecutionCount() != BinaryBasicBlock::COUNT_NO_PROFILE) {
|
|
|
|
|
BBMerge->setExecutionCount(
|
|
|
|
|
BBMerge->getKnownExecutionCount() + BB->getExecutionCount());
|
2016-06-09 11:36:55 -07:00
|
|
|
}
|
|
|
|
|
|
2016-12-21 17:13:56 -08:00
|
|
|
// Update edge count for successors of this basic block.
|
2016-06-09 11:36:55 -07:00
|
|
|
auto BBMergeSI = BBMerge->succ_begin();
|
2016-09-13 17:12:00 -07:00
|
|
|
auto BIMergeI = BBMerge->branch_info_begin();
|
2016-12-21 17:13:56 -08:00
|
|
|
auto BII = BB->branch_info_begin();
|
|
|
|
|
for (const auto *BBSucc : BB->successors()) {
|
2017-05-25 10:29:38 -07:00
|
|
|
(void)BBSucc;
|
|
|
|
|
assert(getIndex(BBSucc) == BF.getIndex(*BBMergeSI));
|
2016-06-09 11:36:55 -07:00
|
|
|
|
2016-12-21 17:13:56 -08:00
|
|
|
// At this point no branch count should be set to COUNT_NO_PROFILE.
|
|
|
|
|
assert(BII->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
|
|
|
|
|
"unexpected unknown branch profile");
|
|
|
|
|
assert(BIMergeI->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
|
|
|
|
|
"unexpected unknown branch profile");
|
2016-06-09 11:36:55 -07:00
|
|
|
|
2016-12-21 17:13:56 -08:00
|
|
|
BIMergeI->Count += BII->Count;
|
|
|
|
|
|
|
|
|
|
// When we merge inferred and real fall-through branch data, the merged
|
|
|
|
|
// data is considered inferred.
|
|
|
|
|
if (BII->MispredictedCount != BinaryBasicBlock::COUNT_INFERRED &&
|
|
|
|
|
BIMergeI->MispredictedCount != BinaryBasicBlock::COUNT_INFERRED) {
|
|
|
|
|
BIMergeI->MispredictedCount += BII->MispredictedCount;
|
|
|
|
|
} else {
|
|
|
|
|
BIMergeI->MispredictedCount = BinaryBasicBlock::COUNT_INFERRED;
|
2016-06-09 11:36:55 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
++BBMergeSI;
|
|
|
|
|
++BII;
|
|
|
|
|
++BIMergeI;
|
|
|
|
|
}
|
|
|
|
|
assert(BBMergeSI == BBMerge->succ_end());
|
|
|
|
|
|
|
|
|
|
++BBMergeI;
|
|
|
|
|
}
|
|
|
|
|
assert(BBMergeI == BF.end());
|
|
|
|
|
}
|
|
|
|
|
|
2017-05-25 10:29:38 -07:00
|
|
|
BinaryFunction::BasicBlockOrderType BinaryFunction::dfs() const {
|
2016-12-21 17:13:56 -08:00
|
|
|
BasicBlockOrderType DFS;
|
|
|
|
|
unsigned Index = 0;
|
|
|
|
|
std::stack<BinaryBasicBlock *> Stack;
|
2016-06-09 11:36:55 -07:00
|
|
|
|
2016-12-21 17:13:56 -08:00
|
|
|
// Push entry points to the stack in reverse order.
|
|
|
|
|
//
|
|
|
|
|
// NB: we rely on the original order of entries to match.
|
|
|
|
|
for (auto BBI = layout_rbegin(); BBI != layout_rend(); ++BBI) {
|
|
|
|
|
auto *BB = *BBI;
|
|
|
|
|
if (BB->isEntryPoint())
|
|
|
|
|
Stack.push(BB);
|
|
|
|
|
BB->setLayoutIndex(BinaryBasicBlock::InvalidIndex);
|
2016-06-09 11:36:55 -07:00
|
|
|
}
|
|
|
|
|
|
2016-12-21 17:13:56 -08:00
|
|
|
while (!Stack.empty()) {
|
|
|
|
|
auto *BB = Stack.top();
|
|
|
|
|
Stack.pop();
|
2016-06-09 11:36:55 -07:00
|
|
|
|
2016-12-21 17:13:56 -08:00
|
|
|
if (BB->getLayoutIndex() != BinaryBasicBlock::InvalidIndex)
|
2016-06-09 11:36:55 -07:00
|
|
|
continue;
|
|
|
|
|
|
2016-12-21 17:13:56 -08:00
|
|
|
BB->setLayoutIndex(Index++);
|
|
|
|
|
DFS.push_back(BB);
|
2016-06-09 11:36:55 -07:00
|
|
|
|
2016-12-21 17:13:56 -08:00
|
|
|
for (auto *SuccBB : BB->landing_pads()) {
|
|
|
|
|
Stack.push(SuccBB);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (auto *SuccBB : BB->successors()) {
|
|
|
|
|
Stack.push(SuccBB);
|
|
|
|
|
}
|
2016-06-09 11:36:55 -07:00
|
|
|
}
|
|
|
|
|
|
2016-12-21 17:13:56 -08:00
|
|
|
return DFS;
|
2016-06-09 11:36:55 -07:00
|
|
|
}
|
|
|
|
|
|
2016-12-21 17:13:56 -08:00
|
|
|
bool BinaryFunction::isIdenticalWith(const BinaryFunction &OtherBF,
|
|
|
|
|
bool IgnoreSymbols,
|
|
|
|
|
bool UseDFS) const {
|
2017-02-24 21:59:33 -08:00
|
|
|
assert(hasCFG() && OtherBF.hasCFG() && "both functions should have CFG");
|
2016-06-09 11:36:55 -07:00
|
|
|
|
|
|
|
|
// Compare the two functions, one basic block at a time.
|
|
|
|
|
// Currently we require two identical basic blocks to have identical
|
|
|
|
|
// instruction sequences and the same index in their corresponding
|
|
|
|
|
// functions. The latter is important for CFG equality.
|
|
|
|
|
|
2016-12-21 17:13:56 -08:00
|
|
|
if (layout_size() != OtherBF.layout_size())
|
|
|
|
|
return false;
|
2016-06-09 11:36:55 -07:00
|
|
|
|
2016-12-21 17:13:56 -08:00
|
|
|
// Comparing multi-entry functions could be non-trivial.
|
|
|
|
|
if (isMultiEntry() || OtherBF.isMultiEntry())
|
2016-06-09 11:36:55 -07:00
|
|
|
return false;
|
|
|
|
|
|
2016-12-21 17:13:56 -08:00
|
|
|
// Process both functions in either DFS or existing order.
|
|
|
|
|
const auto &Order = UseDFS ? dfs() : BasicBlocksLayout;
|
|
|
|
|
const auto &OtherOrder = UseDFS ? OtherBF.dfs() : OtherBF.BasicBlocksLayout;
|
2016-09-27 19:09:38 -07:00
|
|
|
|
2016-12-21 17:13:56 -08:00
|
|
|
auto BBI = OtherOrder.begin();
|
|
|
|
|
for (const auto *BB : Order) {
|
2016-09-27 19:09:38 -07:00
|
|
|
const auto *OtherBB = *BBI;
|
2016-06-09 11:36:55 -07:00
|
|
|
|
2016-12-21 17:13:56 -08:00
|
|
|
if (BB->getLayoutIndex() != OtherBB->getLayoutIndex())
|
|
|
|
|
return false;
|
|
|
|
|
|
2016-06-09 11:36:55 -07:00
|
|
|
// Compare successor basic blocks.
|
2016-12-21 17:13:56 -08:00
|
|
|
// NOTE: the comparison for jump tables is only partially verified here.
|
2016-09-27 19:09:38 -07:00
|
|
|
if (BB->succ_size() != OtherBB->succ_size())
|
2016-06-09 11:36:55 -07:00
|
|
|
return false;
|
2016-12-21 17:13:56 -08:00
|
|
|
|
2016-09-27 19:09:38 -07:00
|
|
|
auto SuccBBI = OtherBB->succ_begin();
|
|
|
|
|
for (const auto *SuccBB : BB->successors()) {
|
|
|
|
|
const auto *SuccOtherBB = *SuccBBI;
|
|
|
|
|
if (SuccBB->getLayoutIndex() != SuccOtherBB->getLayoutIndex())
|
2016-06-09 11:36:55 -07:00
|
|
|
return false;
|
|
|
|
|
++SuccBBI;
|
|
|
|
|
}
|
|
|
|
|
|
2016-12-21 17:13:56 -08:00
|
|
|
// Compare all instructions including pseudos.
|
2016-06-09 11:36:55 -07:00
|
|
|
auto I = BB->begin(), E = BB->end();
|
2016-09-27 19:09:38 -07:00
|
|
|
auto OtherI = OtherBB->begin(), OtherE = OtherBB->end();
|
2016-06-09 11:36:55 -07:00
|
|
|
while (I != E && OtherI != OtherE) {
|
|
|
|
|
|
2016-12-21 17:13:56 -08:00
|
|
|
bool Identical;
|
|
|
|
|
if (IgnoreSymbols) {
|
|
|
|
|
Identical =
|
|
|
|
|
isInstrEquivalentWith(*I, *BB, *OtherI, *OtherBB, OtherBF,
|
|
|
|
|
[](const MCSymbol *A, const MCSymbol *B) {
|
|
|
|
|
return true;
|
|
|
|
|
});
|
|
|
|
|
} else {
|
|
|
|
|
// Compare symbols.
|
|
|
|
|
auto AreSymbolsIdentical = [&] (const MCSymbol *A, const MCSymbol *B) {
|
|
|
|
|
if (A == B)
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
|
|
// All local symbols are considered identical since they affect a
|
|
|
|
|
// control flow and we check the control flow separately.
|
|
|
|
|
// If a local symbol is escaped, then the function (potentially) has
|
|
|
|
|
// multiple entry points and we exclude such functions from
|
|
|
|
|
// comparison.
|
|
|
|
|
if (A->isTemporary() && B->isTemporary())
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
|
|
// Compare symbols as functions.
|
|
|
|
|
const auto *FunctionA = BC.getFunctionForSymbol(A);
|
|
|
|
|
const auto *FunctionB = BC.getFunctionForSymbol(B);
|
|
|
|
|
if (FunctionA && FunctionB) {
|
|
|
|
|
// Self-referencing functions and recursive calls.
|
|
|
|
|
if (FunctionA == this && FunctionB == &OtherBF)
|
|
|
|
|
return true;
|
|
|
|
|
return FunctionA == FunctionB;
|
|
|
|
|
}
|
2016-06-09 11:36:55 -07:00
|
|
|
|
2016-12-21 17:13:56 -08:00
|
|
|
// Check if symbols are jump tables.
|
|
|
|
|
auto SIA = BC.GlobalSymbols.find(A->getName());
|
|
|
|
|
if (SIA == BC.GlobalSymbols.end())
|
|
|
|
|
return false;
|
|
|
|
|
auto SIB = BC.GlobalSymbols.find(B->getName());
|
|
|
|
|
if (SIB == BC.GlobalSymbols.end())
|
|
|
|
|
return false;
|
2016-06-09 11:36:55 -07:00
|
|
|
|
2016-12-21 17:13:56 -08:00
|
|
|
assert((SIA->second != SIB->second) &&
|
|
|
|
|
"different symbols should not have the same value");
|
2016-06-09 11:36:55 -07:00
|
|
|
|
2016-12-21 17:13:56 -08:00
|
|
|
const auto *JumpTableA = getJumpTableContainingAddress(SIA->second);
|
|
|
|
|
if (!JumpTableA)
|
|
|
|
|
return false;
|
|
|
|
|
const auto *JumpTableB =
|
|
|
|
|
OtherBF.getJumpTableContainingAddress(SIB->second);
|
|
|
|
|
if (!JumpTableB)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
if ((SIA->second - JumpTableA->Address) !=
|
|
|
|
|
(SIB->second - JumpTableB->Address))
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
return equalJumpTables(JumpTableA, JumpTableB, OtherBF);
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
Identical =
|
|
|
|
|
isInstrEquivalentWith(*I, *BB, *OtherI, *OtherBB, OtherBF,
|
|
|
|
|
AreSymbolsIdentical);
|
2016-06-09 11:36:55 -07:00
|
|
|
}
|
|
|
|
|
|
2016-12-21 17:13:56 -08:00
|
|
|
if (!Identical)
|
2016-06-09 11:36:55 -07:00
|
|
|
return false;
|
|
|
|
|
|
2016-12-21 17:13:56 -08:00
|
|
|
++I; ++OtherI;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// One of the identical blocks may have a trailing unconditional jump that
|
|
|
|
|
// is ignored for CFG purposes.
|
|
|
|
|
auto *TrailingInstr = (I != E ? &(*I)
|
|
|
|
|
: (OtherI != OtherE ? &(*OtherI) : 0));
|
|
|
|
|
if (TrailingInstr && !BC.MIA->isUnconditionalBranch(*TrailingInstr)) {
|
|
|
|
|
return false;
|
2016-06-09 11:36:55 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
++BBI;
|
|
|
|
|
}
|
|
|
|
|
|
2016-12-21 17:13:56 -08:00
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool BinaryFunction::equalJumpTables(const JumpTable *JumpTableA,
|
|
|
|
|
const JumpTable *JumpTableB,
|
|
|
|
|
const BinaryFunction &BFB) const {
|
|
|
|
|
if (JumpTableA->EntrySize != JumpTableB->EntrySize)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
if (JumpTableA->Type != JumpTableB->Type)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
if (JumpTableA->getSize() != JumpTableB->getSize())
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
for (uint64_t Index = 0; Index < JumpTableA->Entries.size(); ++Index) {
|
|
|
|
|
const auto *LabelA = JumpTableA->Entries[Index];
|
|
|
|
|
const auto *LabelB = JumpTableB->Entries[Index];
|
|
|
|
|
|
|
|
|
|
const auto *TargetA = getBasicBlockForLabel(LabelA);
|
|
|
|
|
const auto *TargetB = BFB.getBasicBlockForLabel(LabelB);
|
|
|
|
|
|
|
|
|
|
if (!TargetA || !TargetB) {
|
|
|
|
|
assert((TargetA || LabelA == getFunctionEndLabel()) &&
|
|
|
|
|
"no target basic block found");
|
|
|
|
|
assert((TargetB || LabelB == BFB.getFunctionEndLabel()) &&
|
|
|
|
|
"no target basic block found");
|
|
|
|
|
|
|
|
|
|
if (TargetA != TargetB)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
assert(TargetA && TargetB && "cannot locate target block(s)");
|
|
|
|
|
|
|
|
|
|
if (TargetA->getLayoutIndex() != TargetB->getLayoutIndex())
|
|
|
|
|
return false;
|
2016-06-09 11:36:55 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2016-12-21 17:13:56 -08:00
|
|
|
std::size_t BinaryFunction::hash(bool Recompute, bool UseDFS) const {
|
2017-02-24 21:59:33 -08:00
|
|
|
assert(hasCFG() && "function is expected to have CFG");
|
2016-06-09 11:36:55 -07:00
|
|
|
|
2016-12-21 17:13:56 -08:00
|
|
|
if (!Recompute)
|
|
|
|
|
return Hash;
|
|
|
|
|
|
|
|
|
|
const auto &Order = UseDFS ? dfs() : BasicBlocksLayout;
|
|
|
|
|
|
2016-06-09 11:36:55 -07:00
|
|
|
// The hash is computed by creating a string of all the opcodes
|
|
|
|
|
// in the function and hashing that string with std::hash.
|
|
|
|
|
std::string Opcodes;
|
2016-12-21 17:13:56 -08:00
|
|
|
for (const auto *BB : Order) {
|
2016-09-27 19:09:38 -07:00
|
|
|
for (const auto &Inst : *BB) {
|
2016-06-09 11:36:55 -07:00
|
|
|
unsigned Opcode = Inst.getOpcode();
|
|
|
|
|
|
|
|
|
|
if (BC.MII->get(Opcode).isPseudo())
|
|
|
|
|
continue;
|
|
|
|
|
|
2016-12-21 17:13:56 -08:00
|
|
|
// Ignore unconditional jumps since we check CFG consistency by processing
|
|
|
|
|
// basic blocks in order and do not rely on branches to be in-sync with
|
|
|
|
|
// CFG. Note that we still use condition code of conditional jumps.
|
|
|
|
|
if (BC.MIA->isUnconditionalBranch(Inst))
|
2016-09-27 19:09:38 -07:00
|
|
|
continue;
|
|
|
|
|
|
2016-06-09 11:36:55 -07:00
|
|
|
if (Opcode == 0) {
|
|
|
|
|
Opcodes.push_back(0);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
while (Opcode) {
|
|
|
|
|
uint8_t LSB = Opcode & 0xff;
|
|
|
|
|
Opcodes.push_back(LSB);
|
|
|
|
|
Opcode = Opcode >> 8;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-12-21 17:13:56 -08:00
|
|
|
return Hash = std::hash<std::string>{}(Opcodes);
|
2016-06-09 11:36:55 -07:00
|
|
|
}
|
|
|
|
|
|
2016-07-23 12:50:34 -07:00
|
|
|
void BinaryFunction::insertBasicBlocks(
|
|
|
|
|
BinaryBasicBlock *Start,
|
2016-07-13 18:57:40 -07:00
|
|
|
std::vector<std::unique_ptr<BinaryBasicBlock>> &&NewBBs,
|
2016-09-07 18:59:23 -07:00
|
|
|
const bool UpdateLayout,
|
|
|
|
|
const bool UpdateCFIState) {
|
2016-07-23 12:50:34 -07:00
|
|
|
const auto StartIndex = getIndex(Start);
|
|
|
|
|
const auto NumNewBlocks = NewBBs.size();
|
|
|
|
|
|
|
|
|
|
BasicBlocks.insert(BasicBlocks.begin() + StartIndex + 1,
|
|
|
|
|
NumNewBlocks,
|
|
|
|
|
nullptr);
|
|
|
|
|
|
|
|
|
|
auto I = StartIndex + 1;
|
|
|
|
|
for (auto &BB : NewBBs) {
|
|
|
|
|
assert(!BasicBlocks[I]);
|
|
|
|
|
BasicBlocks[I++] = BB.release();
|
|
|
|
|
}
|
|
|
|
|
|
2016-09-07 18:59:23 -07:00
|
|
|
updateBBIndices(StartIndex);
|
|
|
|
|
|
|
|
|
|
recomputeLandingPads(StartIndex, NumNewBlocks + 1);
|
|
|
|
|
|
|
|
|
|
// Make sure the basic blocks are sorted properly.
|
|
|
|
|
assert(std::is_sorted(begin(), end()));
|
|
|
|
|
|
|
|
|
|
if (UpdateLayout) {
|
|
|
|
|
updateLayout(Start, NumNewBlocks);
|
2016-07-23 12:50:34 -07:00
|
|
|
}
|
|
|
|
|
|
2016-07-13 18:57:40 -07:00
|
|
|
if (UpdateCFIState) {
|
2016-09-07 18:59:23 -07:00
|
|
|
updateCFIState(Start, NumNewBlocks);
|
2016-07-13 18:57:40 -07:00
|
|
|
}
|
2016-09-07 18:59:23 -07:00
|
|
|
}
|
2016-07-23 12:50:34 -07:00
|
|
|
|
2016-09-07 18:59:23 -07:00
|
|
|
void BinaryFunction::updateBBIndices(const unsigned StartIndex) {
|
|
|
|
|
for (auto I = StartIndex; I < BasicBlocks.size(); ++I) {
|
|
|
|
|
BasicBlocks[I]->Index = I;
|
|
|
|
|
}
|
|
|
|
|
}
|
2016-07-23 12:50:34 -07:00
|
|
|
|
2016-09-07 18:59:23 -07:00
|
|
|
void BinaryFunction::updateCFIState(BinaryBasicBlock *Start,
|
|
|
|
|
const unsigned NumNewBlocks) {
|
|
|
|
|
assert(TailCallTerminatedBlocks.empty());
|
2017-02-24 21:59:33 -08:00
|
|
|
const auto CFIState = Start->getCFIStateAtExit();
|
|
|
|
|
const auto StartIndex = getIndex(Start) + 1;
|
|
|
|
|
for (unsigned I = 0; I < NumNewBlocks; ++I) {
|
|
|
|
|
BasicBlocks[StartIndex + I]->setCFIState(CFIState);
|
|
|
|
|
}
|
2016-07-23 12:50:34 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void BinaryFunction::updateLayout(BinaryBasicBlock* Start,
|
|
|
|
|
const unsigned NumNewBlocks) {
|
|
|
|
|
// Insert new blocks in the layout immediately after Start.
|
|
|
|
|
auto Pos = std::find(layout_begin(), layout_end(), Start);
|
|
|
|
|
assert(Pos != layout_end());
|
2016-09-13 17:12:00 -07:00
|
|
|
auto Begin = &BasicBlocks[getIndex(Start) + 1];
|
|
|
|
|
auto End = &BasicBlocks[getIndex(Start) + NumNewBlocks + 1];
|
2016-07-23 12:50:34 -07:00
|
|
|
BasicBlocksLayout.insert(Pos + 1, Begin, End);
|
Indirect call promotion optimization.
Summary:
Perform indirect call promotion optimization in BOLT.
The code scans the instructions during CFG creation for all
indirect calls. Right now indirect tail calls are not handled
since the functions are marked not simple. The offsets of the
indirect calls are stored for later use by the ICP pass.
The indirect call promotion pass visits each indirect call and
examines the BranchData for each. If the most frequent targets
from that callsite exceed the specified threshold (default 90%),
the call is promoted. Otherwise, it is ignored. By default,
only one target is considered at each callsite.
When an candiate callsite is processed, we modify the callsite
to test for the most common call targets before calling through
the original generic call mechanism.
The CFG and layout are modified by ICP.
A few new command line options have been added:
-indirect-call-promotion
-indirect-call-promotion-threshold=<percentage>
-indirect-call-promotion-topn=<int>
The threshold is the minimum frequency of a call target needed
before ICP is triggered.
The topn option controls the number of targets to consider for
each callsite, e.g. ICP is triggered if topn=2 and the total
requency of the top two call targets exceeds the threshold.
Example of ICP:
C++ code:
int B_count = 0;
int C_count = 0;
struct A { virtual void foo() = 0; }
struct B : public A { virtual void foo() { ++B_count; }; };
struct C : public A { virtual void foo() { ++C_count; }; };
A* a = ...
a->foo();
...
original:
400863: 49 8b 07 mov (%r15),%rax
400866: 4c 89 ff mov %r15,%rdi
400869: ff 10 callq *(%rax)
40086b: 41 83 e6 01 and $0x1,%r14d
40086f: 4d 89 e6 mov %r12,%r14
400872: 4c 0f 44 f5 cmove %rbp,%r14
400876: 4c 89 f7 mov %r14,%rdi
...
after ICP:
40085e: 49 8b 07 mov (%r15),%rax
400861: 4c 89 ff mov %r15,%rdi
400864: 49 ba e0 0b 40 00 00 movabs $0x400be0,%r10
40086b: 00 00 00
40086e: 4c 3b 10 cmp (%rax),%r10
400871: 75 29 jne 40089c <main+0x9c>
400873: 41 ff d2 callq *%r10
400876: 41 83 e6 01 and $0x1,%r14d
40087a: 4d 89 e6 mov %r12,%r14
40087d: 4c 0f 44 f5 cmove %rbp,%r14
400881: 4c 89 f7 mov %r14,%rdi
...
40089c: ff 10 callq *(%rax)
40089e: eb d6 jmp 400876 <main+0x76>
(cherry picked from FBD3612218)
2016-09-07 18:59:23 -07:00
|
|
|
updateLayoutIndices();
|
2016-07-23 12:50:34 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void BinaryFunction::updateLayout(LayoutType Type,
|
|
|
|
|
bool MinBranchClusters,
|
|
|
|
|
bool Split) {
|
|
|
|
|
// Recompute layout with original parameters.
|
|
|
|
|
BasicBlocksLayout = BasicBlocks;
|
|
|
|
|
modifyLayout(Type, MinBranchClusters, Split);
|
Indirect call promotion optimization.
Summary:
Perform indirect call promotion optimization in BOLT.
The code scans the instructions during CFG creation for all
indirect calls. Right now indirect tail calls are not handled
since the functions are marked not simple. The offsets of the
indirect calls are stored for later use by the ICP pass.
The indirect call promotion pass visits each indirect call and
examines the BranchData for each. If the most frequent targets
from that callsite exceed the specified threshold (default 90%),
the call is promoted. Otherwise, it is ignored. By default,
only one target is considered at each callsite.
When an candiate callsite is processed, we modify the callsite
to test for the most common call targets before calling through
the original generic call mechanism.
The CFG and layout are modified by ICP.
A few new command line options have been added:
-indirect-call-promotion
-indirect-call-promotion-threshold=<percentage>
-indirect-call-promotion-topn=<int>
The threshold is the minimum frequency of a call target needed
before ICP is triggered.
The topn option controls the number of targets to consider for
each callsite, e.g. ICP is triggered if topn=2 and the total
requency of the top two call targets exceeds the threshold.
Example of ICP:
C++ code:
int B_count = 0;
int C_count = 0;
struct A { virtual void foo() = 0; }
struct B : public A { virtual void foo() { ++B_count; }; };
struct C : public A { virtual void foo() { ++C_count; }; };
A* a = ...
a->foo();
...
original:
400863: 49 8b 07 mov (%r15),%rax
400866: 4c 89 ff mov %r15,%rdi
400869: ff 10 callq *(%rax)
40086b: 41 83 e6 01 and $0x1,%r14d
40086f: 4d 89 e6 mov %r12,%r14
400872: 4c 0f 44 f5 cmove %rbp,%r14
400876: 4c 89 f7 mov %r14,%rdi
...
after ICP:
40085e: 49 8b 07 mov (%r15),%rax
400861: 4c 89 ff mov %r15,%rdi
400864: 49 ba e0 0b 40 00 00 movabs $0x400be0,%r10
40086b: 00 00 00
40086e: 4c 3b 10 cmp (%rax),%r10
400871: 75 29 jne 40089c <main+0x9c>
400873: 41 ff d2 callq *%r10
400876: 41 83 e6 01 and $0x1,%r14d
40087a: 4d 89 e6 mov %r12,%r14
40087d: 4c 0f 44 f5 cmove %rbp,%r14
400881: 4c 89 f7 mov %r14,%rdi
...
40089c: ff 10 callq *(%rax)
40089e: eb d6 jmp 400876 <main+0x76>
(cherry picked from FBD3612218)
2016-09-07 18:59:23 -07:00
|
|
|
updateLayoutIndices();
|
2016-07-23 12:50:34 -07:00
|
|
|
}
|
|
|
|
|
|
2017-05-01 16:52:54 -07:00
|
|
|
bool BinaryFunction::replaceJumpTableEntryIn(BinaryBasicBlock *BB,
|
|
|
|
|
BinaryBasicBlock *OldDest,
|
|
|
|
|
BinaryBasicBlock *NewDest) {
|
|
|
|
|
auto *Instr = BB->getLastNonPseudoInstr();
|
|
|
|
|
if (!Instr || !BC.MIA->isIndirectBranch(*Instr))
|
|
|
|
|
return false;
|
|
|
|
|
auto JTAddress = BC.MIA->getJumpTable(*Instr);
|
|
|
|
|
assert(JTAddress && "Invalid jump table address");
|
|
|
|
|
auto *JT = getJumpTableContainingAddress(JTAddress);
|
|
|
|
|
assert(JT && "No jump table structure for this indirect branch");
|
|
|
|
|
bool Patched = JT->replaceDestination(JTAddress, OldDest->getLabel(),
|
|
|
|
|
NewDest->getLabel());
|
|
|
|
|
assert(Patched && "Invalid entry to be replaced in jump table");
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
BinaryBasicBlock *BinaryFunction::splitEdge(BinaryBasicBlock *From,
|
|
|
|
|
BinaryBasicBlock *To) {
|
|
|
|
|
// Create intermediate BB
|
|
|
|
|
MCSymbol *Tmp = BC.Ctx->createTempSymbol("SplitEdge", true);
|
|
|
|
|
auto NewBB = createBasicBlock(0, Tmp);
|
|
|
|
|
auto NewBBPtr = NewBB.get();
|
|
|
|
|
|
|
|
|
|
// Update "From" BB
|
|
|
|
|
auto I = From->succ_begin();
|
|
|
|
|
auto BI = From->branch_info_begin();
|
|
|
|
|
for (; I != From->succ_end(); ++I) {
|
|
|
|
|
if (*I == To)
|
|
|
|
|
break;
|
|
|
|
|
++BI;
|
|
|
|
|
}
|
|
|
|
|
assert(I != From->succ_end() && "Invalid CFG edge in splitEdge!");
|
|
|
|
|
uint64_t OrigCount{BI->Count};
|
|
|
|
|
uint64_t OrigMispreds{BI->MispredictedCount};
|
|
|
|
|
replaceJumpTableEntryIn(From, To, NewBBPtr);
|
|
|
|
|
From->replaceSuccessor(To, NewBBPtr, OrigCount, OrigMispreds);
|
|
|
|
|
|
|
|
|
|
NewBB->addSuccessor(To, OrigCount, OrigMispreds);
|
|
|
|
|
NewBB->setExecutionCount(OrigCount);
|
|
|
|
|
NewBB->setIsCold(From->isCold());
|
|
|
|
|
|
|
|
|
|
// Update CFI and BB layout with new intermediate BB
|
|
|
|
|
std::vector<std::unique_ptr<BinaryBasicBlock>> NewBBs;
|
|
|
|
|
NewBBs.emplace_back(std::move(NewBB));
|
|
|
|
|
insertBasicBlocks(From, std::move(NewBBs), true, true);
|
|
|
|
|
return NewBBPtr;
|
|
|
|
|
}
|
|
|
|
|
|
2016-09-29 11:19:06 -07:00
|
|
|
bool BinaryFunction::isSymbolValidInScope(const SymbolRef &Symbol,
|
|
|
|
|
uint64_t SymbolSize) const {
|
|
|
|
|
// Some symbols are tolerated inside function bodies, others are not.
|
|
|
|
|
// The real function boundaries may not be known at this point.
|
|
|
|
|
|
|
|
|
|
// It's okay to have a zero-sized symbol in the middle of non-zero-sized
|
|
|
|
|
// function.
|
|
|
|
|
if (SymbolSize == 0 && containsAddress(*Symbol.getAddress()))
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
|
|
if (Symbol.getType() != SymbolRef::ST_Unknown)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
if (Symbol.getFlags() & SymbolRef::SF_Global)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2017-04-03 16:24:26 -07:00
|
|
|
SMLoc BinaryFunction::emitLineInfo(SMLoc NewLoc, SMLoc PrevLoc) const {
|
|
|
|
|
auto *FunctionCU = UnitLineTable.first;
|
|
|
|
|
const auto *FunctionLineTable = UnitLineTable.second;
|
|
|
|
|
assert(FunctionCU && "cannot emit line info for function without CU");
|
|
|
|
|
|
|
|
|
|
auto RowReference = DebugLineTableRowRef::fromSMLoc(NewLoc);
|
|
|
|
|
|
|
|
|
|
// Check if no new line info needs to be emitted.
|
|
|
|
|
if (RowReference == DebugLineTableRowRef::NULL_ROW ||
|
|
|
|
|
NewLoc.getPointer() == PrevLoc.getPointer())
|
|
|
|
|
return PrevLoc;
|
|
|
|
|
|
|
|
|
|
unsigned CurrentFilenum = 0;
|
|
|
|
|
const auto *CurrentLineTable = FunctionLineTable;
|
|
|
|
|
|
|
|
|
|
// If the CU id from the current instruction location does not
|
|
|
|
|
// match the CU id from the current function, it means that we
|
|
|
|
|
// have come across some inlined code. We must look up the CU
|
|
|
|
|
// for the instruction's original function and get the line table
|
|
|
|
|
// from that.
|
|
|
|
|
const auto FunctionUnitIndex = FunctionCU->getOffset();
|
|
|
|
|
const auto CurrentUnitIndex = RowReference.DwCompileUnitIndex;
|
|
|
|
|
if (CurrentUnitIndex != FunctionUnitIndex) {
|
|
|
|
|
CurrentLineTable = BC.DwCtx->getLineTableForUnit(
|
|
|
|
|
BC.DwCtx->getCompileUnitForOffset(CurrentUnitIndex));
|
|
|
|
|
// Add filename from the inlined function to the current CU.
|
|
|
|
|
CurrentFilenum =
|
|
|
|
|
BC.addDebugFilenameToUnit(FunctionUnitIndex, CurrentUnitIndex,
|
|
|
|
|
CurrentLineTable->Rows[RowReference.RowIndex - 1].File);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const auto &CurrentRow = CurrentLineTable->Rows[RowReference.RowIndex - 1];
|
|
|
|
|
if (!CurrentFilenum)
|
|
|
|
|
CurrentFilenum = CurrentRow.File;
|
|
|
|
|
|
|
|
|
|
BC.Ctx->setCurrentDwarfLoc(
|
|
|
|
|
CurrentFilenum,
|
|
|
|
|
CurrentRow.Line,
|
|
|
|
|
CurrentRow.Column,
|
|
|
|
|
(DWARF2_FLAG_IS_STMT * CurrentRow.IsStmt) |
|
|
|
|
|
(DWARF2_FLAG_BASIC_BLOCK * CurrentRow.BasicBlock) |
|
|
|
|
|
(DWARF2_FLAG_PROLOGUE_END * CurrentRow.PrologueEnd) |
|
|
|
|
|
(DWARF2_FLAG_EPILOGUE_BEGIN * CurrentRow.EpilogueBegin),
|
|
|
|
|
CurrentRow.Isa,
|
|
|
|
|
CurrentRow.Discriminator);
|
|
|
|
|
BC.Ctx->setDwarfCompileUnitID(FunctionUnitIndex);
|
|
|
|
|
|
|
|
|
|
return NewLoc;
|
|
|
|
|
}
|
|
|
|
|
|
2016-06-07 16:27:52 -07:00
|
|
|
BinaryFunction::~BinaryFunction() {
|
|
|
|
|
for (auto BB : BasicBlocks) {
|
|
|
|
|
delete BB;
|
|
|
|
|
}
|
2016-09-07 18:59:23 -07:00
|
|
|
for (auto BB : DeletedBasicBlocks) {
|
|
|
|
|
delete BB;
|
|
|
|
|
}
|
2016-06-07 16:27:52 -07:00
|
|
|
}
|
|
|
|
|
|
2016-09-14 16:45:40 -07:00
|
|
|
void BinaryFunction::emitJumpTables(MCStreamer *Streamer) {
|
|
|
|
|
if (JumpTables.empty())
|
|
|
|
|
return;
|
2016-09-16 15:54:32 -07:00
|
|
|
if (opts::PrintJumpTables) {
|
|
|
|
|
outs() << "BOLT-INFO: jump tables for function " << *this << ":\n";
|
|
|
|
|
}
|
|
|
|
|
for (auto &JTI : JumpTables) {
|
|
|
|
|
auto &JT = JTI.second;
|
|
|
|
|
if (opts::PrintJumpTables)
|
|
|
|
|
JT.print(outs());
|
2017-01-17 15:49:59 -08:00
|
|
|
if (opts::JumpTables == JTS_BASIC && opts::Relocs) {
|
|
|
|
|
JT.updateOriginal(BC);
|
|
|
|
|
} else {
|
|
|
|
|
MCSection *HotSection, *ColdSection;
|
|
|
|
|
if (opts::JumpTables == JTS_BASIC) {
|
|
|
|
|
JT.SectionName =
|
|
|
|
|
".local.JUMP_TABLEat0x" + Twine::utohexstr(JT.Address).str();
|
|
|
|
|
HotSection = BC.Ctx->getELFSection(JT.SectionName,
|
|
|
|
|
ELF::SHT_PROGBITS,
|
|
|
|
|
ELF::SHF_ALLOC);
|
|
|
|
|
ColdSection = HotSection;
|
|
|
|
|
} else {
|
|
|
|
|
HotSection = BC.MOFI->getReadOnlySection();
|
|
|
|
|
ColdSection = BC.MOFI->getReadOnlyColdSection();
|
|
|
|
|
}
|
|
|
|
|
JT.emit(Streamer, HotSection, ColdSection);
|
|
|
|
|
}
|
2016-09-16 15:54:32 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2017-03-08 19:58:33 -08:00
|
|
|
std::pair<size_t, size_t>
|
|
|
|
|
BinaryFunction::JumpTable::getEntriesForAddress(const uint64_t Addr) const {
|
|
|
|
|
const uint64_t InstOffset = Addr - Address;
|
|
|
|
|
size_t StartIndex = 0, EndIndex = 0;
|
|
|
|
|
uint64_t Offset = 0;
|
|
|
|
|
|
|
|
|
|
for (size_t I = 0; I < Entries.size(); ++I) {
|
|
|
|
|
auto LI = Labels.find(Offset);
|
|
|
|
|
if (LI != Labels.end()) {
|
|
|
|
|
const auto NextLI = std::next(LI);
|
|
|
|
|
const auto NextOffset =
|
|
|
|
|
NextLI == Labels.end() ? getSize() : NextLI->first;
|
|
|
|
|
if (InstOffset >= LI->first && InstOffset < NextOffset) {
|
|
|
|
|
StartIndex = I;
|
|
|
|
|
EndIndex = I;
|
|
|
|
|
while (Offset < NextOffset) {
|
|
|
|
|
++EndIndex;
|
|
|
|
|
Offset += EntrySize;
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
Offset += EntrySize;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return std::make_pair(StartIndex, EndIndex);
|
|
|
|
|
}
|
|
|
|
|
|
2017-05-01 16:52:54 -07:00
|
|
|
bool BinaryFunction::JumpTable::replaceDestination(uint64_t JTAddress,
|
|
|
|
|
const MCSymbol *OldDest,
|
|
|
|
|
MCSymbol *NewDest) {
|
|
|
|
|
bool Patched{false};
|
|
|
|
|
const auto Range = getEntriesForAddress(JTAddress);
|
|
|
|
|
for (auto I = &Entries[Range.first], E = &Entries[Range.second];
|
|
|
|
|
I != E; ++I) {
|
|
|
|
|
auto &Entry = *I;
|
|
|
|
|
if (Entry == OldDest) {
|
|
|
|
|
Patched = true;
|
|
|
|
|
Entry = NewDest;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return Patched;
|
|
|
|
|
}
|
|
|
|
|
|
2016-09-27 19:09:38 -07:00
|
|
|
void BinaryFunction::JumpTable::updateOriginal(BinaryContext &BC) {
|
2017-01-17 15:49:59 -08:00
|
|
|
// In non-relocation mode we have to emit jump tables in local sections.
|
|
|
|
|
// This way we only overwrite them when a corresponding function is
|
|
|
|
|
// overwritten.
|
|
|
|
|
assert(opts::Relocs && "relocation mode expected");
|
2016-09-27 19:09:38 -07:00
|
|
|
auto SectionOrError = BC.getSectionForAddress(Address);
|
|
|
|
|
assert(SectionOrError && "section not found for jump table");
|
|
|
|
|
auto Section = SectionOrError.get();
|
|
|
|
|
uint64_t Offset = Address - Section.getAddress();
|
2017-01-17 15:49:59 -08:00
|
|
|
StringRef SectionName;
|
|
|
|
|
Section.getName(SectionName);
|
2016-09-27 19:09:38 -07:00
|
|
|
for (auto *Entry : Entries) {
|
|
|
|
|
const auto RelType = (Type == JTT_NORMAL) ? ELF::R_X86_64_64
|
|
|
|
|
: ELF::R_X86_64_PC32;
|
2017-01-17 15:49:59 -08:00
|
|
|
const uint64_t RelAddend = (Type == JTT_NORMAL)
|
|
|
|
|
? 0 : Offset - (Address - Section.getAddress());
|
|
|
|
|
DEBUG(dbgs() << "adding relocation to section " << SectionName
|
|
|
|
|
<< " at offset " << Twine::utohexstr(Offset) << " for symbol "
|
|
|
|
|
<< Entry->getName() << " with addend "
|
|
|
|
|
<< Twine::utohexstr(RelAddend) << '\n');
|
2016-09-27 19:09:38 -07:00
|
|
|
BC.addSectionRelocation(Section, Offset, Entry, RelType, RelAddend);
|
|
|
|
|
Offset += EntrySize;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-09-16 15:54:32 -07:00
|
|
|
uint64_t BinaryFunction::JumpTable::emit(MCStreamer *Streamer,
|
|
|
|
|
MCSection *HotSection,
|
|
|
|
|
MCSection *ColdSection) {
|
|
|
|
|
// Pre-process entries for aggressive splitting.
|
|
|
|
|
// Each label represents a separate switch table and gets its own count
|
|
|
|
|
// determining its destination.
|
|
|
|
|
std::map<MCSymbol *, uint64_t> LabelCounts;
|
|
|
|
|
if (opts::JumpTables > JTS_SPLIT && !Counts.empty()) {
|
|
|
|
|
MCSymbol *CurrentLabel = Labels[0];
|
|
|
|
|
uint64_t CurrentLabelCount = 0;
|
|
|
|
|
for (unsigned Index = 0; Index < Entries.size(); ++Index) {
|
|
|
|
|
auto LI = Labels.find(Index * EntrySize);
|
|
|
|
|
if (LI != Labels.end()) {
|
|
|
|
|
LabelCounts[CurrentLabel] = CurrentLabelCount;
|
|
|
|
|
CurrentLabel = LI->second;
|
|
|
|
|
CurrentLabelCount = 0;
|
|
|
|
|
}
|
2017-03-08 19:58:33 -08:00
|
|
|
CurrentLabelCount += Counts[Index].Count;
|
2016-09-16 15:54:32 -07:00
|
|
|
}
|
|
|
|
|
LabelCounts[CurrentLabel] = CurrentLabelCount;
|
|
|
|
|
} else {
|
|
|
|
|
Streamer->SwitchSection(Count > 0 ? HotSection : ColdSection);
|
|
|
|
|
Streamer->EmitValueToAlignment(EntrySize);
|
|
|
|
|
}
|
2016-09-27 19:09:38 -07:00
|
|
|
MCSymbol *LastLabel = nullptr;
|
2016-09-16 15:54:32 -07:00
|
|
|
uint64_t Offset = 0;
|
|
|
|
|
for (auto *Entry : Entries) {
|
|
|
|
|
auto LI = Labels.find(Offset);
|
|
|
|
|
if (LI != Labels.end()) {
|
|
|
|
|
DEBUG(dbgs() << "BOLT-DEBUG: emitting jump table "
|
|
|
|
|
<< LI->second->getName() << " (originally was at address 0x"
|
|
|
|
|
<< Twine::utohexstr(Address + Offset)
|
|
|
|
|
<< (Offset ? "as part of larger jump table\n" : "\n"));
|
|
|
|
|
if (!LabelCounts.empty()) {
|
|
|
|
|
DEBUG(dbgs() << "BOLT-DEBUG: jump table count: "
|
|
|
|
|
<< LabelCounts[LI->second] << '\n');
|
|
|
|
|
if (LabelCounts[LI->second] > 0) {
|
|
|
|
|
Streamer->SwitchSection(HotSection);
|
|
|
|
|
} else {
|
|
|
|
|
Streamer->SwitchSection(ColdSection);
|
|
|
|
|
}
|
|
|
|
|
Streamer->EmitValueToAlignment(EntrySize);
|
|
|
|
|
}
|
|
|
|
|
Streamer->EmitLabel(LI->second);
|
2016-09-27 19:09:38 -07:00
|
|
|
LastLabel = LI->second;
|
|
|
|
|
}
|
|
|
|
|
if (Type == JTT_NORMAL) {
|
|
|
|
|
Streamer->EmitSymbolValue(Entry, EntrySize);
|
2016-09-27 19:09:38 -07:00
|
|
|
} else { // JTT_PIC
|
2016-09-27 19:09:38 -07:00
|
|
|
auto JT = MCSymbolRefExpr::create(LastLabel, Streamer->getContext());
|
|
|
|
|
auto E = MCSymbolRefExpr::create(Entry, Streamer->getContext());
|
|
|
|
|
auto Value = MCBinaryExpr::createSub(E, JT, Streamer->getContext());
|
|
|
|
|
Streamer->EmitValue(Value, EntrySize);
|
2016-09-16 15:54:32 -07:00
|
|
|
}
|
|
|
|
|
Offset += EntrySize;
|
|
|
|
|
}
|
2016-09-14 16:45:40 -07:00
|
|
|
|
2016-09-16 15:54:32 -07:00
|
|
|
return Offset;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void BinaryFunction::JumpTable::print(raw_ostream &OS) const {
|
|
|
|
|
uint64_t Offset = 0;
|
|
|
|
|
for (const auto *Entry : Entries) {
|
|
|
|
|
auto LI = Labels.find(Offset);
|
|
|
|
|
if (LI != Labels.end()) {
|
|
|
|
|
OS << "Jump Table " << LI->second->getName() << " at @0x"
|
|
|
|
|
<< Twine::utohexstr(Address+Offset);
|
|
|
|
|
if (Offset) {
|
|
|
|
|
OS << " (possibly part of larger jump table):\n";
|
|
|
|
|
} else {
|
|
|
|
|
OS << " with total count of " << Count << ":\n";
|
|
|
|
|
}
|
2016-09-14 16:45:40 -07:00
|
|
|
}
|
2016-09-16 15:54:32 -07:00
|
|
|
OS << format(" 0x%04" PRIx64 " : ", Offset) << Entry->getName();
|
2017-03-08 19:58:33 -08:00
|
|
|
if (!Counts.empty()) {
|
|
|
|
|
OS << " : " << Counts[Offset / EntrySize].Mispreds
|
|
|
|
|
<< "/" << Counts[Offset / EntrySize].Count;
|
|
|
|
|
}
|
2016-09-16 15:54:32 -07:00
|
|
|
OS << '\n';
|
|
|
|
|
Offset += EntrySize;
|
2016-09-14 16:45:40 -07:00
|
|
|
}
|
2016-09-16 15:54:32 -07:00
|
|
|
OS << "\n\n";
|
2016-09-14 16:45:40 -07:00
|
|
|
}
|
|
|
|
|
|
2016-05-26 10:58:01 -07:00
|
|
|
void BinaryFunction::calculateLoopInfo() {
|
|
|
|
|
// Discover loops.
|
|
|
|
|
BinaryDominatorTree DomTree(false);
|
|
|
|
|
DomTree.recalculate<BinaryFunction>(*this);
|
|
|
|
|
BLI.reset(new BinaryLoopInfo());
|
|
|
|
|
BLI->analyze(DomTree);
|
|
|
|
|
|
|
|
|
|
// Traverse discovered loops and add depth and profile information.
|
|
|
|
|
std::stack<BinaryLoop *> St;
|
|
|
|
|
for (auto I = BLI->begin(), E = BLI->end(); I != E; ++I) {
|
|
|
|
|
St.push(*I);
|
|
|
|
|
++BLI->OuterLoops;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
while (!St.empty()) {
|
|
|
|
|
BinaryLoop *L = St.top();
|
|
|
|
|
St.pop();
|
|
|
|
|
++BLI->TotalLoops;
|
|
|
|
|
BLI->MaximumDepth = std::max(L->getLoopDepth(), BLI->MaximumDepth);
|
|
|
|
|
|
|
|
|
|
// Add nested loops in the stack.
|
|
|
|
|
for (BinaryLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
|
|
|
|
|
St.push(*I);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Skip if no valid profile is found.
|
|
|
|
|
if (!hasValidProfile()) {
|
|
|
|
|
L->EntryCount = COUNT_NO_PROFILE;
|
|
|
|
|
L->ExitCount = COUNT_NO_PROFILE;
|
|
|
|
|
L->TotalBackEdgeCount = COUNT_NO_PROFILE;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Compute back edge count.
|
|
|
|
|
SmallVector<BinaryBasicBlock *, 1> Latches;
|
|
|
|
|
L->getLoopLatches(Latches);
|
|
|
|
|
|
|
|
|
|
for (BinaryBasicBlock *Latch : Latches) {
|
2016-09-13 17:12:00 -07:00
|
|
|
auto BI = Latch->branch_info_begin();
|
2016-05-26 10:58:01 -07:00
|
|
|
for (BinaryBasicBlock *Succ : Latch->successors()) {
|
|
|
|
|
if (Succ == L->getHeader()) {
|
2016-12-21 17:13:56 -08:00
|
|
|
assert(BI->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
|
2016-05-26 10:58:01 -07:00
|
|
|
"profile data not found");
|
|
|
|
|
L->TotalBackEdgeCount += BI->Count;
|
|
|
|
|
}
|
|
|
|
|
++BI;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Compute entry count.
|
|
|
|
|
L->EntryCount = L->getHeader()->getExecutionCount() - L->TotalBackEdgeCount;
|
|
|
|
|
|
|
|
|
|
// Compute exit count.
|
|
|
|
|
SmallVector<BinaryLoop::Edge, 1> ExitEdges;
|
|
|
|
|
L->getExitEdges(ExitEdges);
|
|
|
|
|
for (BinaryLoop::Edge &Exit : ExitEdges) {
|
|
|
|
|
const BinaryBasicBlock *Exiting = Exit.first;
|
|
|
|
|
const BinaryBasicBlock *ExitTarget = Exit.second;
|
2016-09-13 17:12:00 -07:00
|
|
|
auto BI = Exiting->branch_info_begin();
|
2016-05-26 10:58:01 -07:00
|
|
|
for (BinaryBasicBlock *Succ : Exiting->successors()) {
|
|
|
|
|
if (Succ == ExitTarget) {
|
2016-12-21 17:13:56 -08:00
|
|
|
assert(BI->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
|
2016-05-26 10:58:01 -07:00
|
|
|
"profile data not found");
|
|
|
|
|
L->ExitCount += BI->Count;
|
|
|
|
|
}
|
|
|
|
|
++BI;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2017-05-16 09:27:34 -07:00
|
|
|
DWARFAddressRangesVector BinaryFunction::getOutputAddressRanges() const {
|
|
|
|
|
DWARFAddressRangesVector OutputRanges;
|
|
|
|
|
|
|
|
|
|
OutputRanges.emplace_back(getOutputAddress(),
|
|
|
|
|
getOutputAddress() + getOutputSize());
|
|
|
|
|
if (isSplit()) {
|
|
|
|
|
assert(isEmitted() && "split function should be emitted");
|
|
|
|
|
OutputRanges.emplace_back(cold().getAddress(),
|
|
|
|
|
cold().getAddress() + cold().getImageSize());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return OutputRanges;
|
|
|
|
|
}
|
|
|
|
|
|
2017-05-31 09:36:49 -07:00
|
|
|
uint64_t BinaryFunction::translateInputToOutputAddress(uint64_t Address) const {
|
|
|
|
|
// If the function hasn't changed return the same address.
|
|
|
|
|
if (!isEmitted() && !opts::Relocs)
|
|
|
|
|
return Address;
|
|
|
|
|
|
|
|
|
|
if (Address < getAddress())
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
|
|
// FIXME: #18950828 - we rely on relative offsets inside basic blocks to stay
|
|
|
|
|
// intact. Instead we can use pseudo instructions and/or annotations.
|
|
|
|
|
const auto Offset = Address - getAddress();
|
|
|
|
|
const auto *BB = getBasicBlockContainingOffset(Offset);
|
|
|
|
|
if (!BB) {
|
|
|
|
|
// Special case for address immediately past the end of the function.
|
|
|
|
|
if (Offset == getSize())
|
|
|
|
|
return getOutputAddress() + getOutputSize();
|
|
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return std::min(BB->getOutputAddressRange().first + Offset - BB->getOffset(),
|
|
|
|
|
BB->getOutputAddressRange().second);
|
|
|
|
|
}
|
|
|
|
|
|
2017-05-16 09:27:34 -07:00
|
|
|
DWARFAddressRangesVector BinaryFunction::translateInputToOutputRanges(
|
2017-05-24 15:20:27 -07:00
|
|
|
const DWARFAddressRangesVector &InputRanges) const {
|
|
|
|
|
// If the function hasn't changed return the same ranges.
|
2017-05-16 09:27:34 -07:00
|
|
|
if (!isEmitted() && !opts::Relocs)
|
|
|
|
|
return InputRanges;
|
|
|
|
|
|
2017-05-24 15:20:27 -07:00
|
|
|
// Even though we will merge ranges in a post-processing pass, we attempt to
|
|
|
|
|
// merge them in a main processing loop as it improves the processing time.
|
2017-05-16 09:27:34 -07:00
|
|
|
uint64_t PrevEndAddress = 0;
|
2017-05-24 15:20:27 -07:00
|
|
|
DWARFAddressRangesVector OutputRanges;
|
2017-05-16 09:27:34 -07:00
|
|
|
for (const auto &Range : InputRanges) {
|
|
|
|
|
if (!containsAddress(Range.first)) {
|
|
|
|
|
DEBUG(dbgs() << "BOLT-DEBUG: invalid debug address range detected for "
|
|
|
|
|
<< *this << " : [0x" << Twine::utohexstr(Range.first)
|
|
|
|
|
<< ", 0x" << Twine::utohexstr(Range.second) << "]\n");
|
|
|
|
|
PrevEndAddress = 0;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
auto InputOffset = Range.first - getAddress();
|
2017-05-24 15:20:27 -07:00
|
|
|
const auto InputEndOffset = std::min(Range.second - getAddress(), getSize());
|
|
|
|
|
|
|
|
|
|
auto BBI = std::upper_bound(BasicBlockOffsets.begin(),
|
|
|
|
|
BasicBlockOffsets.end(),
|
|
|
|
|
BasicBlockOffset(InputOffset, nullptr),
|
|
|
|
|
CompareBasicBlockOffsets());
|
|
|
|
|
--BBI;
|
2017-05-16 09:27:34 -07:00
|
|
|
do {
|
2017-05-24 15:20:27 -07:00
|
|
|
const auto *BB = BBI->second;
|
|
|
|
|
if (InputOffset < BB->getOffset() || InputOffset >= BB->getEndOffset()) {
|
2017-05-16 09:27:34 -07:00
|
|
|
DEBUG(dbgs() << "BOLT-DEBUG: invalid debug address range detected for "
|
|
|
|
|
<< *this << " : [0x" << Twine::utohexstr(Range.first)
|
|
|
|
|
<< ", 0x" << Twine::utohexstr(Range.second) << "]\n");
|
|
|
|
|
PrevEndAddress = 0;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Skip the range if the block was deleted.
|
|
|
|
|
if (const auto OutputStart = BB->getOutputAddressRange().first) {
|
|
|
|
|
const auto StartAddress = OutputStart + InputOffset - BB->getOffset();
|
|
|
|
|
auto EndAddress = BB->getOutputAddressRange().second;
|
|
|
|
|
if (InputEndOffset < BB->getEndOffset())
|
|
|
|
|
EndAddress = StartAddress + InputEndOffset - InputOffset;
|
|
|
|
|
|
|
|
|
|
if (StartAddress == PrevEndAddress) {
|
2017-05-24 15:20:27 -07:00
|
|
|
OutputRanges.back().second = std::max(OutputRanges.back().second,
|
|
|
|
|
EndAddress);
|
2017-05-16 09:27:34 -07:00
|
|
|
} else {
|
2017-05-24 15:20:27 -07:00
|
|
|
OutputRanges.emplace_back(StartAddress,
|
|
|
|
|
std::max(StartAddress, EndAddress));
|
2017-05-16 09:27:34 -07:00
|
|
|
}
|
2017-05-24 15:20:27 -07:00
|
|
|
PrevEndAddress = OutputRanges.back().second;
|
2017-05-16 09:27:34 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
InputOffset = BB->getEndOffset();
|
2017-05-24 15:20:27 -07:00
|
|
|
++BBI;
|
2017-05-16 09:27:34 -07:00
|
|
|
} while (InputOffset < InputEndOffset);
|
|
|
|
|
}
|
|
|
|
|
|
2017-05-24 15:20:27 -07:00
|
|
|
// Post-processing pass to sort and merge ranges.
|
|
|
|
|
std::sort(OutputRanges.begin(), OutputRanges.end());
|
|
|
|
|
DWARFAddressRangesVector MergedRanges;
|
|
|
|
|
PrevEndAddress = 0;
|
|
|
|
|
for(const auto &Range : OutputRanges) {
|
|
|
|
|
if (Range.first <= PrevEndAddress) {
|
|
|
|
|
MergedRanges.back().second = std::max(MergedRanges.back().second,
|
|
|
|
|
Range.second);
|
|
|
|
|
} else {
|
|
|
|
|
MergedRanges.emplace_back(Range.first, Range.second);
|
|
|
|
|
}
|
|
|
|
|
PrevEndAddress = MergedRanges.back().second;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return MergedRanges;
|
2017-05-16 09:27:34 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
DWARFDebugLoc::LocationList BinaryFunction::translateInputToOutputLocationList(
|
2017-05-24 15:20:27 -07:00
|
|
|
const DWARFDebugLoc::LocationList &InputLL,
|
2017-05-16 09:27:34 -07:00
|
|
|
uint64_t BaseAddress) const {
|
|
|
|
|
// If the function wasn't changed - there's nothing to update.
|
|
|
|
|
if (!isEmitted() && !opts::Relocs) {
|
|
|
|
|
if (!BaseAddress) {
|
|
|
|
|
return InputLL;
|
|
|
|
|
} else {
|
|
|
|
|
auto OutputLL = std::move(InputLL);
|
|
|
|
|
for (auto &Entry : OutputLL.Entries) {
|
|
|
|
|
Entry.Begin += BaseAddress;
|
|
|
|
|
Entry.End += BaseAddress;
|
|
|
|
|
}
|
|
|
|
|
return OutputLL;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
uint64_t PrevEndAddress = 0;
|
2017-05-24 15:20:27 -07:00
|
|
|
SmallVectorImpl<unsigned char> *PrevLoc = nullptr;
|
|
|
|
|
DWARFDebugLoc::LocationList OutputLL;
|
2017-05-16 09:27:34 -07:00
|
|
|
for (auto &Entry : InputLL.Entries) {
|
|
|
|
|
const auto Start = Entry.Begin + BaseAddress;
|
|
|
|
|
const auto End = Entry.End + BaseAddress;
|
|
|
|
|
if (!containsAddress(Start)) {
|
|
|
|
|
DEBUG(dbgs() << "BOLT-DEBUG: invalid debug address range detected for "
|
|
|
|
|
<< *this << " : [0x" << Twine::utohexstr(Start)
|
|
|
|
|
<< ", 0x" << Twine::utohexstr(End) << "]\n");
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
auto InputOffset = Start - getAddress();
|
2017-05-24 15:20:27 -07:00
|
|
|
const auto InputEndOffset = std::min(End - getAddress(), getSize());
|
|
|
|
|
auto BBI = std::upper_bound(BasicBlockOffsets.begin(),
|
|
|
|
|
BasicBlockOffsets.end(),
|
|
|
|
|
BasicBlockOffset(InputOffset, nullptr),
|
|
|
|
|
CompareBasicBlockOffsets());
|
|
|
|
|
--BBI;
|
2017-05-16 09:27:34 -07:00
|
|
|
do {
|
2017-05-24 15:20:27 -07:00
|
|
|
const auto *BB = BBI->second;
|
|
|
|
|
if (InputOffset < BB->getOffset() || InputOffset >= BB->getEndOffset()) {
|
2017-05-16 09:27:34 -07:00
|
|
|
DEBUG(dbgs() << "BOLT-DEBUG: invalid debug address range detected for "
|
|
|
|
|
<< *this << " : [0x" << Twine::utohexstr(Start)
|
|
|
|
|
<< ", 0x" << Twine::utohexstr(End) << "]\n");
|
|
|
|
|
PrevEndAddress = 0;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Skip the range if the block was deleted.
|
|
|
|
|
if (const auto OutputStart = BB->getOutputAddressRange().first) {
|
|
|
|
|
const auto StartAddress = OutputStart + InputOffset - BB->getOffset();
|
|
|
|
|
auto EndAddress = BB->getOutputAddressRange().second;
|
|
|
|
|
if (InputEndOffset < BB->getEndOffset())
|
|
|
|
|
EndAddress = StartAddress + InputEndOffset - InputOffset;
|
|
|
|
|
|
2017-05-24 15:20:27 -07:00
|
|
|
if (StartAddress == PrevEndAddress && Entry.Loc == *PrevLoc) {
|
|
|
|
|
OutputLL.Entries.back().End = std::max(OutputLL.Entries.back().End,
|
|
|
|
|
EndAddress);
|
2017-05-16 09:27:34 -07:00
|
|
|
} else {
|
|
|
|
|
OutputLL.Entries.emplace_back(
|
|
|
|
|
DWARFDebugLoc::Entry{StartAddress,
|
2017-05-24 15:20:27 -07:00
|
|
|
std::max(StartAddress, EndAddress),
|
|
|
|
|
Entry.Loc});
|
2017-05-16 09:27:34 -07:00
|
|
|
}
|
2017-05-24 15:20:27 -07:00
|
|
|
PrevEndAddress = OutputLL.Entries.back().End;
|
|
|
|
|
PrevLoc = &OutputLL.Entries.back().Loc;
|
2017-05-16 09:27:34 -07:00
|
|
|
}
|
2017-05-24 15:20:27 -07:00
|
|
|
|
|
|
|
|
++BBI;
|
2017-05-16 09:27:34 -07:00
|
|
|
InputOffset = BB->getEndOffset();
|
|
|
|
|
} while (InputOffset < InputEndOffset);
|
|
|
|
|
}
|
|
|
|
|
|
2017-05-24 15:20:27 -07:00
|
|
|
// Sort and merge adjacent entries with identical location.
|
|
|
|
|
std::stable_sort(OutputLL.Entries.begin(), OutputLL.Entries.end(),
|
|
|
|
|
[] (const DWARFDebugLoc::Entry &A, const DWARFDebugLoc::Entry &B) {
|
|
|
|
|
return A.Begin < B.Begin;
|
|
|
|
|
});
|
|
|
|
|
DWARFDebugLoc::LocationList MergedLL;
|
|
|
|
|
PrevEndAddress = 0;
|
|
|
|
|
PrevLoc = nullptr;
|
|
|
|
|
for(const auto &Entry : OutputLL.Entries) {
|
|
|
|
|
if (Entry.Begin <= PrevEndAddress && *PrevLoc == Entry.Loc) {
|
|
|
|
|
MergedLL.Entries.back().End = std::max(Entry.End,
|
|
|
|
|
MergedLL.Entries.back().End);;
|
|
|
|
|
} else {
|
|
|
|
|
const auto Begin = std::max(Entry.Begin, PrevEndAddress);
|
|
|
|
|
const auto End = std::max(Begin, Entry.End);
|
|
|
|
|
MergedLL.Entries.emplace_back(DWARFDebugLoc::Entry{Begin,
|
|
|
|
|
End,
|
|
|
|
|
Entry.Loc});
|
|
|
|
|
}
|
|
|
|
|
PrevEndAddress = MergedLL.Entries.back().End;
|
|
|
|
|
PrevLoc = &MergedLL.Entries.back().Loc;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return MergedLL;
|
2017-05-16 09:27:34 -07:00
|
|
|
}
|
|
|
|
|
|
2016-05-26 10:58:01 -07:00
|
|
|
void BinaryFunction::printLoopInfo(raw_ostream &OS) const {
|
2016-08-07 12:35:23 -07:00
|
|
|
OS << "Loop Info for Function \"" << *this << "\"";
|
2016-05-26 10:58:01 -07:00
|
|
|
if (hasValidProfile()) {
|
|
|
|
|
OS << " (count: " << getExecutionCount() << ")";
|
|
|
|
|
}
|
|
|
|
|
OS << "\n";
|
|
|
|
|
|
|
|
|
|
std::stack<BinaryLoop *> St;
|
|
|
|
|
for (auto I = BLI->begin(), E = BLI->end(); I != E; ++I) {
|
|
|
|
|
St.push(*I);
|
|
|
|
|
}
|
|
|
|
|
while (!St.empty()) {
|
|
|
|
|
BinaryLoop *L = St.top();
|
|
|
|
|
St.pop();
|
|
|
|
|
|
|
|
|
|
for (BinaryLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
|
|
|
|
|
St.push(*I);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!hasValidProfile())
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
OS << (L->getLoopDepth() > 1 ? "Nested" : "Outer") << " loop header: "
|
|
|
|
|
<< L->getHeader()->getName();
|
|
|
|
|
OS << "\n";
|
|
|
|
|
OS << "Loop basic blocks: ";
|
|
|
|
|
auto Sep = "";
|
|
|
|
|
for (auto BI = L->block_begin(), BE = L->block_end(); BI != BE; ++BI) {
|
|
|
|
|
OS << Sep << (*BI)->getName();
|
|
|
|
|
Sep = ", ";
|
|
|
|
|
}
|
|
|
|
|
OS << "\n";
|
|
|
|
|
if (hasValidProfile()) {
|
|
|
|
|
OS << "Total back edge count: " << L->TotalBackEdgeCount << "\n";
|
|
|
|
|
OS << "Loop entry count: " << L->EntryCount << "\n";
|
|
|
|
|
OS << "Loop exit count: " << L->ExitCount << "\n";
|
|
|
|
|
if (L->EntryCount > 0) {
|
|
|
|
|
OS << "Average iters per entry: "
|
|
|
|
|
<< format("%.4lf", (double)L->TotalBackEdgeCount / L->EntryCount)
|
|
|
|
|
<< "\n";
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
OS << "----\n";
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
OS << "Total number of loops: "<< BLI->TotalLoops << "\n";
|
|
|
|
|
OS << "Number of outer loops: " << BLI->OuterLoops << "\n";
|
|
|
|
|
OS << "Maximum nested loop depth: " << BLI->MaximumDepth << "\n\n";
|
|
|
|
|
}
|
|
|
|
|
|
2016-08-29 21:11:22 -07:00
|
|
|
DynoStats BinaryFunction::getDynoStats() const {
|
|
|
|
|
DynoStats Stats;
|
|
|
|
|
|
|
|
|
|
// Return empty-stats about the function we don't completely understand.
|
2016-09-16 13:13:16 -07:00
|
|
|
if (!isSimple() || !hasValidProfile())
|
2016-08-29 21:11:22 -07:00
|
|
|
return Stats;
|
|
|
|
|
|
2017-01-10 11:20:56 -08:00
|
|
|
// If the function was folded in non-relocation mode we keep its profile
|
|
|
|
|
// for optimization. However, it should be excluded from the dyno stats.
|
|
|
|
|
if (isFolded())
|
|
|
|
|
return Stats;
|
|
|
|
|
|
2016-09-08 14:52:26 -07:00
|
|
|
// Update enumeration of basic blocks for correct detection of branch'
|
|
|
|
|
// direction.
|
|
|
|
|
updateLayoutIndices();
|
2016-08-29 21:11:22 -07:00
|
|
|
|
|
|
|
|
for (const auto &BB : layout()) {
|
|
|
|
|
// The basic block execution count equals to the sum of incoming branch
|
|
|
|
|
// frequencies. This may deviate from the sum of outgoing branches of the
|
|
|
|
|
// basic block especially since the block may contain a function that
|
|
|
|
|
// does not return or a function that throws an exception.
|
2017-03-17 10:32:56 -07:00
|
|
|
const uint64_t BBExecutionCount = BB->getKnownExecutionCount();
|
2016-08-29 21:11:22 -07:00
|
|
|
|
2016-09-15 10:24:22 -07:00
|
|
|
// Ignore empty blocks and blocks that were not executed.
|
|
|
|
|
if (BB->getNumNonPseudos() == 0 || BBExecutionCount == 0)
|
2016-08-29 21:11:22 -07:00
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
// Count the number of calls by iterating through all instructions.
|
|
|
|
|
for (const auto &Instr : *BB) {
|
2017-03-17 10:32:56 -07:00
|
|
|
if (BC.MIA->isStore(Instr)) {
|
|
|
|
|
Stats[DynoStats::STORES] += BBExecutionCount;
|
|
|
|
|
}
|
|
|
|
|
if (BC.MIA->isLoad(Instr)) {
|
|
|
|
|
Stats[DynoStats::LOADS] += BBExecutionCount;
|
|
|
|
|
}
|
2016-09-15 15:47:10 -07:00
|
|
|
if (!BC.MIA->isCall(Instr))
|
|
|
|
|
continue;
|
|
|
|
|
Stats[DynoStats::FUNCTION_CALLS] += BBExecutionCount;
|
|
|
|
|
if (BC.MIA->getMemoryOperandNo(Instr) != -1) {
|
|
|
|
|
Stats[DynoStats::INDIRECT_CALLS] += BBExecutionCount;
|
2016-09-15 15:47:10 -07:00
|
|
|
} else if (const auto *CallSymbol = BC.MIA->getTargetSymbol(Instr)) {
|
|
|
|
|
if (BC.getFunctionForSymbol(CallSymbol))
|
|
|
|
|
continue;
|
|
|
|
|
auto GSI = BC.GlobalSymbols.find(CallSymbol->getName());
|
|
|
|
|
if (GSI == BC.GlobalSymbols.end())
|
|
|
|
|
continue;
|
|
|
|
|
auto Section = BC.getSectionForAddress(GSI->second);
|
|
|
|
|
if (!Section)
|
|
|
|
|
continue;
|
|
|
|
|
StringRef SectionName;
|
|
|
|
|
Section->getName(SectionName);
|
|
|
|
|
if (SectionName == ".plt") {
|
|
|
|
|
Stats[DynoStats::PLT_CALLS] += BBExecutionCount;
|
|
|
|
|
}
|
2016-08-29 21:11:22 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Stats[DynoStats::INSTRUCTIONS] += BB->getNumNonPseudos() * BBExecutionCount;
|
|
|
|
|
|
2016-09-15 10:24:22 -07:00
|
|
|
// Jump tables.
|
2017-02-23 18:09:10 -08:00
|
|
|
const auto *LastInstr = BB->getLastNonPseudoInstr();
|
2016-09-16 15:54:32 -07:00
|
|
|
if (BC.MIA->getJumpTable(*LastInstr)) {
|
2016-09-15 10:24:22 -07:00
|
|
|
Stats[DynoStats::JUMP_TABLE_BRANCHES] += BBExecutionCount;
|
|
|
|
|
DEBUG(
|
|
|
|
|
static uint64_t MostFrequentJT;
|
|
|
|
|
if (BBExecutionCount > MostFrequentJT) {
|
|
|
|
|
MostFrequentJT = BBExecutionCount;
|
|
|
|
|
dbgs() << "BOLT-INFO: most frequently executed jump table is in "
|
|
|
|
|
<< "function " << *this << " in basic block " << BB->getName()
|
|
|
|
|
<< " executed totally " << BBExecutionCount << " times.\n";
|
|
|
|
|
}
|
|
|
|
|
);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
2016-08-29 21:11:22 -07:00
|
|
|
// Update stats for branches.
|
|
|
|
|
const MCSymbol *TBB = nullptr;
|
|
|
|
|
const MCSymbol *FBB = nullptr;
|
|
|
|
|
MCInst *CondBranch = nullptr;
|
|
|
|
|
MCInst *UncondBranch = nullptr;
|
2016-09-13 17:12:00 -07:00
|
|
|
if (!BB->analyzeBranch(TBB, FBB, CondBranch, UncondBranch)) {
|
2016-08-29 21:11:22 -07:00
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!CondBranch && !UncondBranch) {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Simple unconditional branch.
|
|
|
|
|
if (!CondBranch) {
|
|
|
|
|
Stats[DynoStats::UNCOND_BRANCHES] += BBExecutionCount;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Conditional branch that could be followed by an unconditional branch.
|
2016-09-22 18:08:20 -07:00
|
|
|
uint64_t TakenCount;
|
|
|
|
|
uint64_t NonTakenCount;
|
|
|
|
|
bool IsForwardBranch;
|
|
|
|
|
if (BB->succ_size() == 2) {
|
|
|
|
|
TakenCount = BB->getBranchInfo(true).Count;
|
|
|
|
|
NonTakenCount = BB->getBranchInfo(false).Count;
|
|
|
|
|
IsForwardBranch = isForwardBranch(BB, BB->getConditionalSuccessor(true));
|
|
|
|
|
} else {
|
|
|
|
|
// SCTC breaks the CFG invariant so we have to make some affordances
|
|
|
|
|
// here if we want dyno stats after running it.
|
|
|
|
|
TakenCount = BB->branch_info_begin()->Count;
|
|
|
|
|
if (TakenCount != COUNT_NO_PROFILE)
|
|
|
|
|
NonTakenCount = BBExecutionCount - TakenCount;
|
|
|
|
|
else
|
|
|
|
|
NonTakenCount = 0;
|
2017-04-05 13:23:58 -07:00
|
|
|
|
|
|
|
|
// If succ_size == 0 then we are branching to a function
|
|
|
|
|
// rather than a BB label.
|
|
|
|
|
IsForwardBranch = BB->succ_size() == 0
|
|
|
|
|
? isForwardCall(BC.MIA->getTargetSymbol(*CondBranch))
|
|
|
|
|
: isForwardBranch(BB, BB->getFallthrough());
|
2016-09-22 18:08:20 -07:00
|
|
|
}
|
|
|
|
|
|
2016-08-29 21:11:22 -07:00
|
|
|
if (TakenCount == COUNT_NO_PROFILE)
|
|
|
|
|
TakenCount = 0;
|
|
|
|
|
if (NonTakenCount == COUNT_NO_PROFILE)
|
|
|
|
|
NonTakenCount = 0;
|
|
|
|
|
|
2016-09-22 18:08:20 -07:00
|
|
|
if (IsForwardBranch) {
|
2016-08-29 21:11:22 -07:00
|
|
|
Stats[DynoStats::FORWARD_COND_BRANCHES] += BBExecutionCount;
|
|
|
|
|
Stats[DynoStats::FORWARD_COND_BRANCHES_TAKEN] += TakenCount;
|
|
|
|
|
} else {
|
|
|
|
|
Stats[DynoStats::BACKWARD_COND_BRANCHES] += BBExecutionCount;
|
|
|
|
|
Stats[DynoStats::BACKWARD_COND_BRANCHES_TAKEN] += TakenCount;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (UncondBranch) {
|
|
|
|
|
Stats[DynoStats::UNCOND_BRANCHES] += NonTakenCount;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return Stats;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void DynoStats::print(raw_ostream &OS, const DynoStats *Other) const {
|
|
|
|
|
auto printStatWithDelta = [&](const std::string &Name, uint64_t Stat,
|
|
|
|
|
uint64_t OtherStat) {
|
|
|
|
|
OS << format("%'20lld : ", Stat * opts::DynoStatsScale) << Name;
|
|
|
|
|
if (Other) {
|
2017-03-14 09:03:23 -07:00
|
|
|
if (Stat != OtherStat) {
|
2016-08-29 21:11:22 -07:00
|
|
|
OS << format(" (%+.1f%%)",
|
|
|
|
|
( (float) Stat - (float) OtherStat ) * 100.0 /
|
|
|
|
|
(float) (OtherStat + 1) );
|
2017-03-14 09:03:23 -07:00
|
|
|
} else {
|
|
|
|
|
OS << " (=)";
|
|
|
|
|
}
|
2016-08-29 21:11:22 -07:00
|
|
|
}
|
|
|
|
|
OS << '\n';
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
for (auto Stat = DynoStats::FIRST_DYNO_STAT + 1;
|
|
|
|
|
Stat < DynoStats::LAST_DYNO_STAT;
|
|
|
|
|
++Stat) {
|
|
|
|
|
printStatWithDelta(Desc[Stat], Stats[Stat], Other ? (*Other)[Stat] : 0);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void DynoStats::operator+=(const DynoStats &Other) {
|
|
|
|
|
for (auto Stat = DynoStats::FIRST_DYNO_STAT + 1;
|
|
|
|
|
Stat < DynoStats::LAST_DYNO_STAT;
|
|
|
|
|
++Stat) {
|
|
|
|
|
Stats[Stat] += Other[Stat];
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-02-05 14:42:04 -08:00
|
|
|
} // namespace bolt
|
2015-10-09 17:21:14 -07:00
|
|
|
} // namespace llvm
|