mirror of
https://github.com/intel/llvm.git
synced 2026-01-13 19:08:21 +08:00
[BOLT][NFC] Move DynoStats out of BinaryFunction
Summary: Move DynoStats into separate source files. (cherry picked from FBD15138883)
This commit is contained in:
@@ -13,6 +13,7 @@
|
||||
#include "BinaryBasicBlock.h"
|
||||
#include "BinaryFunction.h"
|
||||
#include "DataReader.h"
|
||||
#include "DynoStats.h"
|
||||
#include "MCPlusBuilder.h"
|
||||
#include "llvm/ADT/edit_distance.h"
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
@@ -90,14 +91,6 @@ DotToolTipCode("dot-tooltip-code",
|
||||
cl::Hidden,
|
||||
cl::cat(BoltCategory));
|
||||
|
||||
static cl::opt<uint32_t>
|
||||
DynoStatsScale("dyno-stats-scale",
|
||||
cl::desc("scale to be applied while reporting dyno stats"),
|
||||
cl::Optional,
|
||||
cl::init(1),
|
||||
cl::Hidden,
|
||||
cl::cat(BoltCategory));
|
||||
|
||||
cl::opt<JumpTableSupportLevel>
|
||||
JumpTables("jump-tables",
|
||||
cl::desc("jump tables support (default=basic)"),
|
||||
@@ -193,7 +186,6 @@ bool shouldPrint(const BinaryFunction &Function) {
|
||||
namespace llvm {
|
||||
namespace bolt {
|
||||
|
||||
constexpr const char *DynoStats::Desc[];
|
||||
constexpr unsigned BinaryFunction::MinAlign;
|
||||
const char BinaryFunction::TimerGroupName[] = "buildfuncs";
|
||||
const char BinaryFunction::TimerGroupDesc[] = "Build Binary Functions";
|
||||
@@ -245,31 +237,6 @@ SMLoc findDebugLineInformationForInstructionAt(
|
||||
|
||||
} // namespace
|
||||
|
||||
bool DynoStats::operator<(const DynoStats &Other) const {
|
||||
return std::lexicographical_compare(
|
||||
&Stats[FIRST_DYNO_STAT], &Stats[LAST_DYNO_STAT],
|
||||
&Other.Stats[FIRST_DYNO_STAT], &Other.Stats[LAST_DYNO_STAT]
|
||||
);
|
||||
}
|
||||
|
||||
bool DynoStats::operator==(const DynoStats &Other) const {
|
||||
return std::equal(
|
||||
&Stats[FIRST_DYNO_STAT], &Stats[LAST_DYNO_STAT],
|
||||
&Other.Stats[FIRST_DYNO_STAT]
|
||||
);
|
||||
}
|
||||
|
||||
bool DynoStats::lessThan(const DynoStats &Other,
|
||||
ArrayRef<Category> Keys) const {
|
||||
return std::lexicographical_compare(
|
||||
Keys.begin(), Keys.end(),
|
||||
Keys.begin(), Keys.end(),
|
||||
[this,&Other](const Category A, const Category) {
|
||||
return Stats[A] < Other.Stats[A];
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
uint64_t BinaryFunction::Count = 0;
|
||||
|
||||
const std::string *
|
||||
@@ -493,7 +460,7 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation,
|
||||
|
||||
if (opts::PrintDynoStats && !BasicBlocksLayout.empty()) {
|
||||
OS << '\n';
|
||||
DynoStats dynoStats = getDynoStats();
|
||||
DynoStats dynoStats = getDynoStats(*this);
|
||||
OS << dynoStats;
|
||||
}
|
||||
|
||||
@@ -4284,145 +4251,6 @@ void BinaryFunction::printLoopInfo(raw_ostream &OS) const {
|
||||
OS << "Maximum nested loop depth: " << BLI->MaximumDepth << "\n\n";
|
||||
}
|
||||
|
||||
DynoStats BinaryFunction::getDynoStats() const {
|
||||
DynoStats Stats(/*PrintAArch64Stats*/ BC.isAArch64());
|
||||
|
||||
// Return empty-stats about the function we don't completely understand.
|
||||
if (!isSimple() || !hasValidProfile())
|
||||
return Stats;
|
||||
|
||||
// If the function was folded in non-relocation mode we keep its profile
|
||||
// for optimization. However, it should be excluded from the dyno stats.
|
||||
if (isFolded())
|
||||
return Stats;
|
||||
|
||||
// Update enumeration of basic blocks for correct detection of branch'
|
||||
// direction.
|
||||
updateLayoutIndices();
|
||||
|
||||
for (const auto &BB : layout()) {
|
||||
// The basic block execution count equals to the sum of incoming branch
|
||||
// frequencies. This may deviate from the sum of outgoing branches of the
|
||||
// basic block especially since the block may contain a function that
|
||||
// does not return or a function that throws an exception.
|
||||
const uint64_t BBExecutionCount = BB->getKnownExecutionCount();
|
||||
|
||||
// Ignore empty blocks and blocks that were not executed.
|
||||
if (BB->getNumNonPseudos() == 0 || BBExecutionCount == 0)
|
||||
continue;
|
||||
|
||||
// Count AArch64 linker-inserted veneers
|
||||
if(isAArch64Veneer())
|
||||
Stats[DynoStats::VENEER_CALLS_AARCH64] += getKnownExecutionCount();
|
||||
|
||||
// Count the number of calls by iterating through all instructions.
|
||||
for (const auto &Instr : *BB) {
|
||||
if (BC.MIB->isStore(Instr)) {
|
||||
Stats[DynoStats::STORES] += BBExecutionCount;
|
||||
}
|
||||
if (BC.MIB->isLoad(Instr)) {
|
||||
Stats[DynoStats::LOADS] += BBExecutionCount;
|
||||
}
|
||||
|
||||
if (!BC.MIB->isCall(Instr))
|
||||
continue;
|
||||
|
||||
uint64_t CallFreq = BBExecutionCount;
|
||||
if (BC.MIB->getConditionalTailCall(Instr)) {
|
||||
CallFreq =
|
||||
BC.MIB->getAnnotationWithDefault<uint64_t>(Instr, "CTCTakenCount");
|
||||
}
|
||||
Stats[DynoStats::FUNCTION_CALLS] += CallFreq;
|
||||
if (BC.MIB->isIndirectCall(Instr)) {
|
||||
Stats[DynoStats::INDIRECT_CALLS] += CallFreq;
|
||||
} else if (const auto *CallSymbol = BC.MIB->getTargetSymbol(Instr)) {
|
||||
const auto *BF = BC.getFunctionForSymbol(CallSymbol);
|
||||
if (BF && BF->isPLTFunction()) {
|
||||
Stats[DynoStats::PLT_CALLS] += CallFreq;
|
||||
|
||||
// We don't process PLT functions and hence have to adjust relevant
|
||||
// dynostats here for:
|
||||
//
|
||||
// jmp *GOT_ENTRY(%rip)
|
||||
//
|
||||
// NOTE: this is arch-specific.
|
||||
Stats[DynoStats::FUNCTION_CALLS] += CallFreq;
|
||||
Stats[DynoStats::INDIRECT_CALLS] += CallFreq;
|
||||
Stats[DynoStats::LOADS] += CallFreq;
|
||||
Stats[DynoStats::INSTRUCTIONS] += CallFreq;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Stats[DynoStats::INSTRUCTIONS] += BB->getNumNonPseudos() * BBExecutionCount;
|
||||
|
||||
// Jump tables.
|
||||
const auto *LastInstr = BB->getLastNonPseudoInstr();
|
||||
if (BC.MIB->getJumpTable(*LastInstr)) {
|
||||
Stats[DynoStats::JUMP_TABLE_BRANCHES] += BBExecutionCount;
|
||||
DEBUG(
|
||||
static uint64_t MostFrequentJT;
|
||||
if (BBExecutionCount > MostFrequentJT) {
|
||||
MostFrequentJT = BBExecutionCount;
|
||||
dbgs() << "BOLT-INFO: most frequently executed jump table is in "
|
||||
<< "function " << *this << " in basic block " << BB->getName()
|
||||
<< " executed totally " << BBExecutionCount << " times.\n";
|
||||
}
|
||||
);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Update stats for branches.
|
||||
const MCSymbol *TBB = nullptr;
|
||||
const MCSymbol *FBB = nullptr;
|
||||
MCInst *CondBranch = nullptr;
|
||||
MCInst *UncondBranch = nullptr;
|
||||
if (!BB->analyzeBranch(TBB, FBB, CondBranch, UncondBranch)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!CondBranch && !UncondBranch) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Simple unconditional branch.
|
||||
if (!CondBranch) {
|
||||
Stats[DynoStats::UNCOND_BRANCHES] += BBExecutionCount;
|
||||
continue;
|
||||
}
|
||||
|
||||
// CTCs
|
||||
if (BC.MIB->getConditionalTailCall(*CondBranch)) {
|
||||
if (BB->branch_info_begin() != BB->branch_info_end())
|
||||
Stats[DynoStats::UNCOND_BRANCHES] += BB->branch_info_begin()->Count;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Conditional branch that could be followed by an unconditional branch.
|
||||
auto TakenCount = BB->getTakenBranchInfo().Count;
|
||||
if (TakenCount == COUNT_NO_PROFILE)
|
||||
TakenCount = 0;
|
||||
|
||||
auto NonTakenCount = BB->getFallthroughBranchInfo().Count;
|
||||
if (NonTakenCount == COUNT_NO_PROFILE)
|
||||
NonTakenCount = 0;
|
||||
|
||||
if (isForwardBranch(BB, BB->getConditionalSuccessor(true))) {
|
||||
Stats[DynoStats::FORWARD_COND_BRANCHES] += BBExecutionCount;
|
||||
Stats[DynoStats::FORWARD_COND_BRANCHES_TAKEN] += TakenCount;
|
||||
} else {
|
||||
Stats[DynoStats::BACKWARD_COND_BRANCHES] += BBExecutionCount;
|
||||
Stats[DynoStats::BACKWARD_COND_BRANCHES_TAKEN] += TakenCount;
|
||||
}
|
||||
|
||||
if (UncondBranch) {
|
||||
Stats[DynoStats::UNCOND_BRANCHES] += NonTakenCount;
|
||||
}
|
||||
}
|
||||
|
||||
return Stats;
|
||||
}
|
||||
|
||||
bool BinaryFunction::isAArch64Veneer() const {
|
||||
if (BasicBlocks.size() != 1)
|
||||
return false;
|
||||
@@ -4439,41 +4267,5 @@ bool BinaryFunction::isAArch64Veneer() const {
|
||||
return true;
|
||||
}
|
||||
|
||||
void DynoStats::print(raw_ostream &OS, const DynoStats *Other) const {
|
||||
auto printStatWithDelta = [&](const std::string &Name, uint64_t Stat,
|
||||
uint64_t OtherStat) {
|
||||
OS << format("%'20lld : ", Stat * opts::DynoStatsScale) << Name;
|
||||
if (Other) {
|
||||
if (Stat != OtherStat) {
|
||||
OtherStat = std::max(OtherStat, uint64_t(1)); // to prevent divide by 0
|
||||
OS << format(" (%+.1f%%)",
|
||||
( (float) Stat - (float) OtherStat ) * 100.0 /
|
||||
(float) (OtherStat) );
|
||||
} else {
|
||||
OS << " (=)";
|
||||
}
|
||||
}
|
||||
OS << '\n';
|
||||
};
|
||||
|
||||
for (auto Stat = DynoStats::FIRST_DYNO_STAT + 1;
|
||||
Stat < DynoStats::LAST_DYNO_STAT;
|
||||
++Stat) {
|
||||
|
||||
if (!PrintAArch64Stats && Stat == DynoStats::VENEER_CALLS_AARCH64)
|
||||
continue;
|
||||
|
||||
printStatWithDelta(Desc[Stat], Stats[Stat], Other ? (*Other)[Stat] : 0);
|
||||
}
|
||||
}
|
||||
|
||||
void DynoStats::operator+=(const DynoStats &Other) {
|
||||
for (auto Stat = DynoStats::FIRST_DYNO_STAT + 1;
|
||||
Stat < DynoStats::LAST_DYNO_STAT;
|
||||
++Stat) {
|
||||
Stats[Stat] += Other[Stat];
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace bolt
|
||||
} // namespace llvm
|
||||
|
||||
@@ -54,108 +54,6 @@ namespace bolt {
|
||||
using DWARFUnitLineTable = std::pair<DWARFUnit *,
|
||||
const DWARFDebugLine::LineTable *>;
|
||||
|
||||
/// Class encapsulating runtime statistics about an execution unit.
|
||||
class DynoStats {
|
||||
|
||||
#define DYNO_STATS\
|
||||
D(FIRST_DYNO_STAT, "<reserved>", Fn)\
|
||||
D(FORWARD_COND_BRANCHES, "executed forward branches", Fn)\
|
||||
D(FORWARD_COND_BRANCHES_TAKEN, "taken forward branches", Fn)\
|
||||
D(BACKWARD_COND_BRANCHES, "executed backward branches", Fn)\
|
||||
D(BACKWARD_COND_BRANCHES_TAKEN, "taken backward branches", Fn)\
|
||||
D(UNCOND_BRANCHES, "executed unconditional branches", Fn)\
|
||||
D(FUNCTION_CALLS, "all function calls", Fn)\
|
||||
D(INDIRECT_CALLS, "indirect calls", Fn)\
|
||||
D(PLT_CALLS, "PLT calls", Fn)\
|
||||
D(INSTRUCTIONS, "executed instructions", Fn)\
|
||||
D(LOADS, "executed load instructions", Fn)\
|
||||
D(STORES, "executed store instructions", Fn)\
|
||||
D(JUMP_TABLE_BRANCHES, "taken jump table branches", Fn)\
|
||||
D(ALL_BRANCHES, "total branches",\
|
||||
Fadd(ALL_CONDITIONAL, UNCOND_BRANCHES))\
|
||||
D(ALL_TAKEN, "taken branches",\
|
||||
Fadd(TAKEN_CONDITIONAL, UNCOND_BRANCHES))\
|
||||
D(NONTAKEN_CONDITIONAL, "non-taken conditional branches",\
|
||||
Fsub(ALL_CONDITIONAL, TAKEN_CONDITIONAL))\
|
||||
D(TAKEN_CONDITIONAL, "taken conditional branches",\
|
||||
Fadd(FORWARD_COND_BRANCHES_TAKEN, BACKWARD_COND_BRANCHES_TAKEN))\
|
||||
D(ALL_CONDITIONAL, "all conditional branches",\
|
||||
Fadd(FORWARD_COND_BRANCHES, BACKWARD_COND_BRANCHES))\
|
||||
D(VENEER_CALLS_AARCH64, "linker-inserted veneer calls", Fn)\
|
||||
D(LAST_DYNO_STAT, "<reserved>", 0)
|
||||
|
||||
public:
|
||||
#define D(name, ...) name,
|
||||
enum Category : uint8_t { DYNO_STATS };
|
||||
#undef D
|
||||
|
||||
|
||||
private:
|
||||
uint64_t Stats[LAST_DYNO_STAT+1];
|
||||
bool PrintAArch64Stats;
|
||||
|
||||
#define D(name, desc, ...) desc,
|
||||
static constexpr const char *Desc[] = { DYNO_STATS };
|
||||
#undef D
|
||||
|
||||
public:
|
||||
DynoStats(bool PrintAArch64Stats ) {
|
||||
this->PrintAArch64Stats = PrintAArch64Stats;
|
||||
for (auto Stat = FIRST_DYNO_STAT + 0; Stat < LAST_DYNO_STAT; ++Stat)
|
||||
Stats[Stat] = 0;
|
||||
}
|
||||
|
||||
uint64_t &operator[](size_t I) {
|
||||
assert(I > FIRST_DYNO_STAT && I < LAST_DYNO_STAT &&
|
||||
"index out of bounds");
|
||||
return Stats[I];
|
||||
}
|
||||
|
||||
uint64_t operator[](size_t I) const {
|
||||
switch (I) {
|
||||
#define D(name, desc, func) \
|
||||
case name: \
|
||||
return func;
|
||||
#define Fn Stats[I]
|
||||
#define Fadd(a, b) operator[](a) + operator[](b)
|
||||
#define Fsub(a, b) operator[](a) - operator[](b)
|
||||
#define F(a) operator[](a)
|
||||
#define Radd(a, b) (a + b)
|
||||
#define Rsub(a, b) (a - b)
|
||||
DYNO_STATS
|
||||
#undef Rsub
|
||||
#undef Radd
|
||||
#undef F
|
||||
#undef Fsub
|
||||
#undef Fadd
|
||||
#undef Fn
|
||||
#undef D
|
||||
default:
|
||||
llvm_unreachable("index out of bounds");
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void print(raw_ostream &OS, const DynoStats *Other = nullptr) const;
|
||||
|
||||
void operator+=(const DynoStats &Other);
|
||||
bool operator<(const DynoStats &Other) const;
|
||||
bool operator==(const DynoStats &Other) const;
|
||||
bool operator!=(const DynoStats &Other) const { return !operator==(Other); }
|
||||
bool lessThan(const DynoStats &Other, ArrayRef<Category> Keys) const;
|
||||
|
||||
static const char* Description(const Category C) {
|
||||
return Desc[C];
|
||||
}
|
||||
};
|
||||
|
||||
inline raw_ostream &operator<<(raw_ostream &OS, const DynoStats &Stats) {
|
||||
Stats.print(OS, nullptr);
|
||||
return OS;
|
||||
}
|
||||
|
||||
DynoStats operator+(const DynoStats &A, const DynoStats &B);
|
||||
|
||||
/// Types of macro-fusion alignment corrections.
|
||||
enum MacroFusionType {
|
||||
MFT_NONE,
|
||||
@@ -927,13 +825,6 @@ public:
|
||||
/// Attempt to validate CFG invariants.
|
||||
bool validateCFG() const;
|
||||
|
||||
/// Return dynostats for the function.
|
||||
///
|
||||
/// The function relies on branch instructions being in-sync with CFG for
|
||||
/// branch instructions stats. Thus it is better to call it after
|
||||
/// fixBranches().
|
||||
DynoStats getDynoStats() const;
|
||||
|
||||
BinaryBasicBlock *getBasicBlockForLabel(const MCSymbol *Label) {
|
||||
auto I = LabelToBB.find(Label);
|
||||
return I == LabelToBB.end() ? nullptr : I->second;
|
||||
@@ -2377,48 +2268,6 @@ public:
|
||||
const FragmentInfo &cold() const { return ColdFragment; }
|
||||
};
|
||||
|
||||
/// Return program-wide dynostats.
|
||||
template <typename FuncsType>
|
||||
inline DynoStats getDynoStats(const FuncsType &Funcs) {
|
||||
bool IsAArch64 = Funcs.begin()->second.getBinaryContext().isAArch64();
|
||||
DynoStats dynoStats(IsAArch64);
|
||||
for (auto &BFI : Funcs) {
|
||||
auto &BF = BFI.second;
|
||||
if (BF.isSimple()) {
|
||||
dynoStats += BF.getDynoStats();
|
||||
}
|
||||
}
|
||||
return dynoStats;
|
||||
}
|
||||
|
||||
/// Call a function with optional before and after dynostats printing.
|
||||
template <typename FnType, typename FuncsType>
|
||||
inline void
|
||||
callWithDynoStats(FnType &&Func,
|
||||
const FuncsType &Funcs,
|
||||
StringRef Phase,
|
||||
const bool Flag) {
|
||||
bool IsAArch64 = Funcs.begin()->second.getBinaryContext().isAArch64();
|
||||
DynoStats DynoStatsBefore(IsAArch64);
|
||||
if (Flag) {
|
||||
DynoStatsBefore = getDynoStats(Funcs);
|
||||
}
|
||||
|
||||
Func();
|
||||
|
||||
if (Flag) {
|
||||
const auto DynoStatsAfter = getDynoStats(Funcs);
|
||||
const auto Changed = (DynoStatsAfter != DynoStatsBefore);
|
||||
outs() << "BOLT-INFO: program-wide dynostats after running "
|
||||
<< Phase << (Changed ? "" : " (no change)") << ":\n\n"
|
||||
<< DynoStatsBefore << '\n';
|
||||
if (Changed) {
|
||||
DynoStatsAfter.print(outs(), &DynoStatsBefore);
|
||||
}
|
||||
outs() << '\n';
|
||||
}
|
||||
}
|
||||
|
||||
inline raw_ostream &operator<<(raw_ostream &OS,
|
||||
const BinaryFunction &Function) {
|
||||
OS << Function.getPrintName();
|
||||
|
||||
@@ -76,6 +76,7 @@ add_llvm_tool(llvm-bolt
|
||||
DataReader.cpp
|
||||
DebugData.cpp
|
||||
DWARFRewriter.cpp
|
||||
DynoStats.cpp
|
||||
Exceptions.cpp
|
||||
ExecutableFileMemoryManager.cpp
|
||||
Heatmap.cpp
|
||||
|
||||
253
bolt/src/DynoStats.cpp
Normal file
253
bolt/src/DynoStats.cpp
Normal file
@@ -0,0 +1,253 @@
|
||||
//===--- DynoStats.cpp ----------------------------------------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
|
||||
#include "DynoStats.h"
|
||||
#include "BinaryBasicBlock.h"
|
||||
#include "BinaryFunction.h"
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
#include "llvm/MC/MCInst.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include <algorithm>
|
||||
#include <numeric>
|
||||
#include <string>
|
||||
|
||||
#undef DEBUG_TYPE
|
||||
#define DEBUG_TYPE "bolt"
|
||||
|
||||
using namespace llvm;
|
||||
using namespace bolt;
|
||||
|
||||
namespace opts {
|
||||
|
||||
extern cl::OptionCategory BoltCategory;
|
||||
|
||||
static cl::opt<uint32_t>
|
||||
DynoStatsScale("dyno-stats-scale",
|
||||
cl::desc("scale to be applied while reporting dyno stats"),
|
||||
cl::Optional,
|
||||
cl::init(1),
|
||||
cl::Hidden,
|
||||
cl::cat(BoltCategory));
|
||||
|
||||
} // namespace opts
|
||||
|
||||
namespace llvm {
|
||||
namespace bolt {
|
||||
|
||||
constexpr const char *DynoStats::Desc[];
|
||||
|
||||
bool DynoStats::operator<(const DynoStats &Other) const {
|
||||
return std::lexicographical_compare(
|
||||
&Stats[FIRST_DYNO_STAT], &Stats[LAST_DYNO_STAT],
|
||||
&Other.Stats[FIRST_DYNO_STAT], &Other.Stats[LAST_DYNO_STAT]
|
||||
);
|
||||
}
|
||||
|
||||
bool DynoStats::operator==(const DynoStats &Other) const {
|
||||
return std::equal(
|
||||
&Stats[FIRST_DYNO_STAT], &Stats[LAST_DYNO_STAT],
|
||||
&Other.Stats[FIRST_DYNO_STAT]
|
||||
);
|
||||
}
|
||||
|
||||
bool DynoStats::lessThan(const DynoStats &Other,
|
||||
ArrayRef<Category> Keys) const {
|
||||
return std::lexicographical_compare(
|
||||
Keys.begin(), Keys.end(),
|
||||
Keys.begin(), Keys.end(),
|
||||
[this,&Other](const Category A, const Category) {
|
||||
return Stats[A] < Other.Stats[A];
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
void DynoStats::print(raw_ostream &OS, const DynoStats *Other) const {
|
||||
auto printStatWithDelta = [&](const std::string &Name, uint64_t Stat,
|
||||
uint64_t OtherStat) {
|
||||
OS << format("%'20lld : ", Stat * opts::DynoStatsScale) << Name;
|
||||
if (Other) {
|
||||
if (Stat != OtherStat) {
|
||||
OtherStat = std::max(OtherStat, uint64_t(1)); // to prevent divide by 0
|
||||
OS << format(" (%+.1f%%)",
|
||||
( (float) Stat - (float) OtherStat ) * 100.0 /
|
||||
(float) (OtherStat) );
|
||||
} else {
|
||||
OS << " (=)";
|
||||
}
|
||||
}
|
||||
OS << '\n';
|
||||
};
|
||||
|
||||
for (auto Stat = DynoStats::FIRST_DYNO_STAT + 1;
|
||||
Stat < DynoStats::LAST_DYNO_STAT;
|
||||
++Stat) {
|
||||
|
||||
if (!PrintAArch64Stats && Stat == DynoStats::VENEER_CALLS_AARCH64)
|
||||
continue;
|
||||
|
||||
printStatWithDelta(Desc[Stat], Stats[Stat], Other ? (*Other)[Stat] : 0);
|
||||
}
|
||||
}
|
||||
|
||||
void DynoStats::operator+=(const DynoStats &Other) {
|
||||
for (auto Stat = DynoStats::FIRST_DYNO_STAT + 1;
|
||||
Stat < DynoStats::LAST_DYNO_STAT;
|
||||
++Stat) {
|
||||
Stats[Stat] += Other[Stat];
|
||||
}
|
||||
}
|
||||
|
||||
DynoStats getDynoStats(const BinaryFunction &BF) {
|
||||
auto &BC = BF.getBinaryContext();
|
||||
|
||||
DynoStats Stats(/*PrintAArch64Stats*/ BC.isAArch64());
|
||||
|
||||
// Return empty-stats about the function we don't completely understand.
|
||||
if (!BF.isSimple() || !BF.hasValidProfile())
|
||||
return Stats;
|
||||
|
||||
// If the function was folded in non-relocation mode we keep its profile
|
||||
// for optimization. However, it should be excluded from the dyno stats.
|
||||
if (BF.isFolded())
|
||||
return Stats;
|
||||
|
||||
// Update enumeration of basic blocks for correct detection of branch'
|
||||
// direction.
|
||||
BF.updateLayoutIndices();
|
||||
|
||||
for (const auto &BB : BF.layout()) {
|
||||
// The basic block execution count equals to the sum of incoming branch
|
||||
// frequencies. This may deviate from the sum of outgoing branches of the
|
||||
// basic block especially since the block may contain a function that
|
||||
// does not return or a function that throws an exception.
|
||||
const uint64_t BBExecutionCount = BB->getKnownExecutionCount();
|
||||
|
||||
// Ignore empty blocks and blocks that were not executed.
|
||||
if (BB->getNumNonPseudos() == 0 || BBExecutionCount == 0)
|
||||
continue;
|
||||
|
||||
// Count AArch64 linker-inserted veneers
|
||||
if(BF.isAArch64Veneer())
|
||||
Stats[DynoStats::VENEER_CALLS_AARCH64] += BF.getKnownExecutionCount();
|
||||
|
||||
// Count the number of calls by iterating through all instructions.
|
||||
for (const auto &Instr : *BB) {
|
||||
if (BC.MIB->isStore(Instr)) {
|
||||
Stats[DynoStats::STORES] += BBExecutionCount;
|
||||
}
|
||||
if (BC.MIB->isLoad(Instr)) {
|
||||
Stats[DynoStats::LOADS] += BBExecutionCount;
|
||||
}
|
||||
|
||||
if (!BC.MIB->isCall(Instr))
|
||||
continue;
|
||||
|
||||
uint64_t CallFreq = BBExecutionCount;
|
||||
if (BC.MIB->getConditionalTailCall(Instr)) {
|
||||
CallFreq =
|
||||
BC.MIB->getAnnotationWithDefault<uint64_t>(Instr, "CTCTakenCount");
|
||||
}
|
||||
Stats[DynoStats::FUNCTION_CALLS] += CallFreq;
|
||||
if (BC.MIB->isIndirectCall(Instr)) {
|
||||
Stats[DynoStats::INDIRECT_CALLS] += CallFreq;
|
||||
} else if (const auto *CallSymbol = BC.MIB->getTargetSymbol(Instr)) {
|
||||
const auto *BF = BC.getFunctionForSymbol(CallSymbol);
|
||||
if (BF && BF->isPLTFunction()) {
|
||||
Stats[DynoStats::PLT_CALLS] += CallFreq;
|
||||
|
||||
// We don't process PLT functions and hence have to adjust relevant
|
||||
// dynostats here for:
|
||||
//
|
||||
// jmp *GOT_ENTRY(%rip)
|
||||
//
|
||||
// NOTE: this is arch-specific.
|
||||
Stats[DynoStats::FUNCTION_CALLS] += CallFreq;
|
||||
Stats[DynoStats::INDIRECT_CALLS] += CallFreq;
|
||||
Stats[DynoStats::LOADS] += CallFreq;
|
||||
Stats[DynoStats::INSTRUCTIONS] += CallFreq;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Stats[DynoStats::INSTRUCTIONS] += BB->getNumNonPseudos() * BBExecutionCount;
|
||||
|
||||
// Jump tables.
|
||||
const auto *LastInstr = BB->getLastNonPseudoInstr();
|
||||
if (BC.MIB->getJumpTable(*LastInstr)) {
|
||||
Stats[DynoStats::JUMP_TABLE_BRANCHES] += BBExecutionCount;
|
||||
DEBUG(
|
||||
static uint64_t MostFrequentJT;
|
||||
if (BBExecutionCount > MostFrequentJT) {
|
||||
MostFrequentJT = BBExecutionCount;
|
||||
dbgs() << "BOLT-INFO: most frequently executed jump table is in "
|
||||
<< "function " << BF << " in basic block " << BB->getName()
|
||||
<< " executed totally " << BBExecutionCount << " times.\n";
|
||||
}
|
||||
);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Update stats for branches.
|
||||
const MCSymbol *TBB = nullptr;
|
||||
const MCSymbol *FBB = nullptr;
|
||||
MCInst *CondBranch = nullptr;
|
||||
MCInst *UncondBranch = nullptr;
|
||||
if (!BB->analyzeBranch(TBB, FBB, CondBranch, UncondBranch)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!CondBranch && !UncondBranch) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Simple unconditional branch.
|
||||
if (!CondBranch) {
|
||||
Stats[DynoStats::UNCOND_BRANCHES] += BBExecutionCount;
|
||||
continue;
|
||||
}
|
||||
|
||||
// CTCs
|
||||
if (BC.MIB->getConditionalTailCall(*CondBranch)) {
|
||||
if (BB->branch_info_begin() != BB->branch_info_end())
|
||||
Stats[DynoStats::UNCOND_BRANCHES] += BB->branch_info_begin()->Count;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Conditional branch that could be followed by an unconditional branch.
|
||||
auto TakenCount = BB->getTakenBranchInfo().Count;
|
||||
if (TakenCount == BinaryBasicBlock::COUNT_NO_PROFILE)
|
||||
TakenCount = 0;
|
||||
|
||||
auto NonTakenCount = BB->getFallthroughBranchInfo().Count;
|
||||
if (NonTakenCount == BinaryBasicBlock::COUNT_NO_PROFILE)
|
||||
NonTakenCount = 0;
|
||||
|
||||
if (BF.isForwardBranch(BB, BB->getConditionalSuccessor(true))) {
|
||||
Stats[DynoStats::FORWARD_COND_BRANCHES] += BBExecutionCount;
|
||||
Stats[DynoStats::FORWARD_COND_BRANCHES_TAKEN] += TakenCount;
|
||||
} else {
|
||||
Stats[DynoStats::BACKWARD_COND_BRANCHES] += BBExecutionCount;
|
||||
Stats[DynoStats::BACKWARD_COND_BRANCHES_TAKEN] += TakenCount;
|
||||
}
|
||||
|
||||
if (UncondBranch) {
|
||||
Stats[DynoStats::UNCOND_BRANCHES] += NonTakenCount;
|
||||
}
|
||||
}
|
||||
|
||||
return Stats;
|
||||
}
|
||||
|
||||
} // namespace bolt
|
||||
} // namespace llvm
|
||||
178
bolt/src/DynoStats.h
Normal file
178
bolt/src/DynoStats.h
Normal file
@@ -0,0 +1,178 @@
|
||||
//===--- DynoStats.h ------------------------------------------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_TOOLS_LLVM_BOLT_DYNO_STATS_H
|
||||
#define LLVM_TOOLS_LLVM_BOLT_DYNO_STATS_H
|
||||
|
||||
#include "BinaryFunction.h"
|
||||
#include "llvm/ADT/ArrayRef.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
namespace bolt {
|
||||
|
||||
/// Class encapsulating runtime statistics about an execution unit.
|
||||
class DynoStats {
|
||||
|
||||
#define DYNO_STATS\
|
||||
D(FIRST_DYNO_STAT, "<reserved>", Fn)\
|
||||
D(FORWARD_COND_BRANCHES, "executed forward branches", Fn)\
|
||||
D(FORWARD_COND_BRANCHES_TAKEN, "taken forward branches", Fn)\
|
||||
D(BACKWARD_COND_BRANCHES, "executed backward branches", Fn)\
|
||||
D(BACKWARD_COND_BRANCHES_TAKEN, "taken backward branches", Fn)\
|
||||
D(UNCOND_BRANCHES, "executed unconditional branches", Fn)\
|
||||
D(FUNCTION_CALLS, "all function calls", Fn)\
|
||||
D(INDIRECT_CALLS, "indirect calls", Fn)\
|
||||
D(PLT_CALLS, "PLT calls", Fn)\
|
||||
D(INSTRUCTIONS, "executed instructions", Fn)\
|
||||
D(LOADS, "executed load instructions", Fn)\
|
||||
D(STORES, "executed store instructions", Fn)\
|
||||
D(JUMP_TABLE_BRANCHES, "taken jump table branches", Fn)\
|
||||
D(ALL_BRANCHES, "total branches",\
|
||||
Fadd(ALL_CONDITIONAL, UNCOND_BRANCHES))\
|
||||
D(ALL_TAKEN, "taken branches",\
|
||||
Fadd(TAKEN_CONDITIONAL, UNCOND_BRANCHES))\
|
||||
D(NONTAKEN_CONDITIONAL, "non-taken conditional branches",\
|
||||
Fsub(ALL_CONDITIONAL, TAKEN_CONDITIONAL))\
|
||||
D(TAKEN_CONDITIONAL, "taken conditional branches",\
|
||||
Fadd(FORWARD_COND_BRANCHES_TAKEN, BACKWARD_COND_BRANCHES_TAKEN))\
|
||||
D(ALL_CONDITIONAL, "all conditional branches",\
|
||||
Fadd(FORWARD_COND_BRANCHES, BACKWARD_COND_BRANCHES))\
|
||||
D(VENEER_CALLS_AARCH64, "linker-inserted veneer calls", Fn)\
|
||||
D(LAST_DYNO_STAT, "<reserved>", 0)
|
||||
|
||||
public:
|
||||
#define D(name, ...) name,
|
||||
enum Category : uint8_t { DYNO_STATS };
|
||||
#undef D
|
||||
|
||||
|
||||
private:
|
||||
uint64_t Stats[LAST_DYNO_STAT+1];
|
||||
bool PrintAArch64Stats;
|
||||
|
||||
#define D(name, desc, ...) desc,
|
||||
static constexpr const char *Desc[] = { DYNO_STATS };
|
||||
#undef D
|
||||
|
||||
public:
|
||||
DynoStats(bool PrintAArch64Stats) {
|
||||
this->PrintAArch64Stats = PrintAArch64Stats;
|
||||
for (auto Stat = FIRST_DYNO_STAT + 0; Stat < LAST_DYNO_STAT; ++Stat)
|
||||
Stats[Stat] = 0;
|
||||
}
|
||||
|
||||
uint64_t &operator[](size_t I) {
|
||||
assert(I > FIRST_DYNO_STAT && I < LAST_DYNO_STAT &&
|
||||
"index out of bounds");
|
||||
return Stats[I];
|
||||
}
|
||||
|
||||
uint64_t operator[](size_t I) const {
|
||||
switch (I) {
|
||||
#define D(name, desc, func) \
|
||||
case name: \
|
||||
return func;
|
||||
#define Fn Stats[I]
|
||||
#define Fadd(a, b) operator[](a) + operator[](b)
|
||||
#define Fsub(a, b) operator[](a) - operator[](b)
|
||||
#define F(a) operator[](a)
|
||||
#define Radd(a, b) (a + b)
|
||||
#define Rsub(a, b) (a - b)
|
||||
DYNO_STATS
|
||||
#undef Rsub
|
||||
#undef Radd
|
||||
#undef F
|
||||
#undef Fsub
|
||||
#undef Fadd
|
||||
#undef Fn
|
||||
#undef D
|
||||
default:
|
||||
llvm_unreachable("index out of bounds");
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void print(raw_ostream &OS, const DynoStats *Other = nullptr) const;
|
||||
|
||||
void operator+=(const DynoStats &Other);
|
||||
bool operator<(const DynoStats &Other) const;
|
||||
bool operator==(const DynoStats &Other) const;
|
||||
bool operator!=(const DynoStats &Other) const { return !operator==(Other); }
|
||||
bool lessThan(const DynoStats &Other, ArrayRef<Category> Keys) const;
|
||||
|
||||
static const char* Description(const Category C) {
|
||||
return Desc[C];
|
||||
}
|
||||
};
|
||||
|
||||
inline raw_ostream &operator<<(raw_ostream &OS, const DynoStats &Stats) {
|
||||
Stats.print(OS, nullptr);
|
||||
return OS;
|
||||
}
|
||||
|
||||
DynoStats operator+(const DynoStats &A, const DynoStats &B);
|
||||
|
||||
/// Return dynostats for the function.
|
||||
///
|
||||
/// The function relies on branch instructions being in-sync with CFG for
|
||||
/// branch instructions stats. Thus it is better to call it after
|
||||
/// fixBranches().
|
||||
DynoStats getDynoStats(const BinaryFunction &BF);
|
||||
|
||||
/// Return program-wide dynostats.
|
||||
template <typename FuncsType>
|
||||
inline DynoStats getDynoStats(const FuncsType &Funcs) {
|
||||
bool IsAArch64 = Funcs.begin()->second.getBinaryContext().isAArch64();
|
||||
DynoStats dynoStats(IsAArch64);
|
||||
for (auto &BFI : Funcs) {
|
||||
auto &BF = BFI.second;
|
||||
if (BF.isSimple()) {
|
||||
dynoStats += getDynoStats(BF);
|
||||
}
|
||||
}
|
||||
return dynoStats;
|
||||
}
|
||||
|
||||
/// Call a function with optional before and after dynostats printing.
|
||||
template <typename FnType, typename FuncsType>
|
||||
inline void
|
||||
callWithDynoStats(FnType &&Func,
|
||||
const FuncsType &Funcs,
|
||||
StringRef Phase,
|
||||
const bool Flag) {
|
||||
bool IsAArch64 = Funcs.begin()->second.getBinaryContext().isAArch64();
|
||||
DynoStats DynoStatsBefore(IsAArch64);
|
||||
if (Flag) {
|
||||
DynoStatsBefore = getDynoStats(Funcs);
|
||||
}
|
||||
|
||||
Func();
|
||||
|
||||
if (Flag) {
|
||||
const auto DynoStatsAfter = getDynoStats(Funcs);
|
||||
const auto Changed = (DynoStatsAfter != DynoStatsBefore);
|
||||
outs() << "BOLT-INFO: program-wide dynostats after running "
|
||||
<< Phase << (Changed ? "" : " (no change)") << ":\n\n"
|
||||
<< DynoStatsBefore << '\n';
|
||||
if (Changed) {
|
||||
DynoStatsAfter.print(outs(), &DynoStatsBefore);
|
||||
}
|
||||
outs() << '\n';
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace bolt
|
||||
} // namespace llvm
|
||||
|
||||
#endif
|
||||
@@ -1331,7 +1331,7 @@ PrintProgramStats::runOnFunctions(BinaryContext &BC) {
|
||||
const auto &BF = BFI.second;
|
||||
if (shouldOptimize(BF) && BF.hasValidProfile()) {
|
||||
Functions.push_back(&BF);
|
||||
Stats.emplace(&BF, BF.getDynoStats());
|
||||
Stats.emplace(&BF, getDynoStats(BF));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1383,7 +1383,7 @@ PrintProgramStats::runOnFunctions(BinaryContext &BC) {
|
||||
outs() << " are:\n";
|
||||
auto SFI = Functions.begin();
|
||||
for (unsigned I = 0; I < 100 && SFI != Functions.end(); ++SFI, ++I) {
|
||||
const auto Stats = (*SFI)->getDynoStats();
|
||||
const auto Stats = getDynoStats(**SFI);
|
||||
outs() << " " << **SFI;
|
||||
if (!SortAll) {
|
||||
outs() << " (";
|
||||
|
||||
@@ -16,6 +16,7 @@
|
||||
|
||||
#include "BinaryContext.h"
|
||||
#include "BinaryFunction.h"
|
||||
#include "DynoStats.h"
|
||||
#include "HFSort.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
|
||||
|
||||
Reference in New Issue
Block a user