Reject profile data for functions that do not match.

Summary:
Verify profile data for a function and reject if there are branches
that don't correspond to any branches in the function CFG. Note that
we have to ignore branches resulting from recursive calls.

Fix printing instruction offsets in disassembled state.

Allow function to have non-zero execution count even if we don't
have branch information.

(cherry picked from FBD3451596)
This commit is contained in:
Maksim Panchenko
2016-06-15 18:36:16 -07:00
parent 88ac5d9d0e
commit 70f82d9371
3 changed files with 181 additions and 72 deletions

View File

@@ -166,8 +166,10 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation,
}
if (ImageAddress)
OS << "\n Image : 0x" << Twine::utohexstr(ImageAddress);
if (ExecutionCount != COUNT_NO_PROFILE)
if (ExecutionCount != COUNT_NO_PROFILE) {
OS << "\n Exec Count : " << ExecutionCount;
OS << "\n Profile Acc : " << format("%.1f%%", ProfileMatchRatio * 100.0f);
}
OS << "\n}\n";
@@ -262,7 +264,7 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation,
if (BasicBlocks.empty() && !Instructions.empty()) {
// Print before CFG was built.
for (const auto &II : Instructions) {
auto Offset = II.first;
Offset = II.first;
// Print label if exists at this offset.
auto LI = Labels.find(Offset);
@@ -549,8 +551,8 @@ bool BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
MCSymbolRefExpr::VK_None,
*Ctx)));
if (!IsCall) {
// Add local branch info.
LocalBranches.push_back({Offset, TargetOffset});
// Add taken branch info.
TakenBranches.push_back({Offset, TargetOffset});
}
if (IsCondBranch) {
// Add fallthrough branch info.
@@ -611,11 +613,10 @@ bool BinaryFunction::buildCFG() {
auto &MIA = BC.MIA;
auto BranchDataOrErr = BC.DR.getFuncBranchData(getName());
if (std::error_code EC = BranchDataOrErr.getError()) {
if (!BranchDataOrErr) {
DEBUG(dbgs() << "no branch data found for \"" << getName() << "\"\n");
} else {
if (!BranchDataOrErr.get().Data.empty())
ExecutionCount = BranchDataOrErr.get().ExecutionCount;
ExecutionCount = BranchDataOrErr->ExecutionCount;
}
if (!isSimple())
@@ -736,7 +737,11 @@ bool BinaryFunction::buildCFG() {
// e.g. exit(3), etc. Otherwise we'll see a false fall-through
// blocks.
for (auto &Branch : LocalBranches) {
// Make sure we can use profile data for this function.
if (BranchDataOrErr)
evaluateProfileData(BranchDataOrErr.get());
for (auto &Branch : TakenBranches) {
DEBUG(dbgs() << "registering branch [0x" << Twine::utohexstr(Branch.first)
<< "] -> [0x" << Twine::utohexstr(Branch.second) << "]\n");
BinaryBasicBlock *FromBB = getBasicBlockContainingOffset(Branch.first);
@@ -783,7 +788,7 @@ bool BinaryFunction::buildCFG() {
}
// Add fall-through branches (except for non-taken conditional branches with
// profile data, which were already accounted for in LocalBranches).
// profile data, which were already accounted for in TakenBranches).
PrevBB = nullptr;
bool IsPrevFT = false; // Is previous block a fall-through.
for (auto BB : BasicBlocks) {
@@ -830,20 +835,19 @@ bool BinaryFunction::buildCFG() {
}
// Infer frequency for non-taken branches
if (ExecutionCount != COUNT_NO_PROFILE && !BranchDataOrErr.getError()) {
if (hasValidProfile())
inferFallThroughCounts();
}
// Update CFI information for each BB
annotateCFIState();
// Clean-up memory taken by instructions and labels.
clearInstructions();
clearCFIOffsets();
clearLabels();
clearLocalBranches();
clearFTBranches();
clearLPToBBIndex();
clearList(Instructions);
clearList(OffsetToCFI);
clearList(Labels);
clearList(TakenBranches);
clearList(FTBranches);
clearList(LPToBBIndex);
// Update the state.
CurrentState = State::CFG;
@@ -854,6 +858,108 @@ bool BinaryFunction::buildCFG() {
return true;
}
void BinaryFunction::evaluateProfileData(const FuncBranchData &BranchData) {
BranchListType ProfileBranches(BranchData.Data.size());
std::transform(BranchData.Data.begin(),
BranchData.Data.end(),
ProfileBranches.begin(),
[](const BranchInfo &BI) {
return std::make_pair(BI.From.Offset,
BI.To.Name == BI.From.Name ?
BI.To.Offset : -1U);
});
BranchListType LocalProfileBranches;
std::copy_if(ProfileBranches.begin(),
ProfileBranches.end(),
std::back_inserter(LocalProfileBranches),
[](const std::pair<uint32_t, uint32_t> &Branch) {
return Branch.second != -1U;
});
// Until we define a minimal profile, we consider no branch data to be a valid
// profile. It could happen to a function without branches.
if (LocalProfileBranches.empty()) {
ProfileMatchRatio = 1.0f;
return;
}
std::sort(LocalProfileBranches.begin(), LocalProfileBranches.end());
BranchListType FunctionBranches = TakenBranches;
FunctionBranches.insert(FunctionBranches.end(),
FTBranches.begin(),
FTBranches.end());
std::sort(FunctionBranches.begin(), FunctionBranches.end());
BranchListType DiffBranches; // Branches in profile without a match.
std::set_difference(LocalProfileBranches.begin(),
LocalProfileBranches.end(),
FunctionBranches.begin(),
FunctionBranches.end(),
std::back_inserter(DiffBranches));
// Branches without a match in CFG.
BranchListType OrphanBranches;
// Eliminate recursive calls and returns from recursive calls from the list
// of branches that have no match. They are not considered local branches.
auto isRecursiveBranch = [&](std::pair<uint32_t, uint32_t> &Branch) {
auto SrcInstrI = Instructions.find(Branch.first);
if (SrcInstrI == Instructions.end())
return false;
// Check if it is a recursive call.
if (BC.MIA->isCall(SrcInstrI->second) && Branch.second == 0)
return true;
auto DstInstrI = Instructions.find(Branch.second);
if (DstInstrI == Instructions.end())
return false;
// Check if it is a return from a recursive call.
bool IsSrcReturn = BC.MIA->isReturn(SrcInstrI->second);
// "rep ret" is considered to be 2 different instructions.
if (!IsSrcReturn && BC.MIA->isPrefix(SrcInstrI->second)) {
auto SrcInstrSuccessorI = SrcInstrI;
++SrcInstrSuccessorI;
assert(SrcInstrSuccessorI != Instructions.end() &&
"unexpected prefix instruction at the end of function");
IsSrcReturn = BC.MIA->isReturn(SrcInstrSuccessorI->second);
}
if (IsSrcReturn && Branch.second != 0) {
// Make sure the destination follows the call instruction.
auto DstInstrPredecessorI = DstInstrI;
--DstInstrPredecessorI;
assert(DstInstrPredecessorI != Instructions.end() && "invalid iterator");
if (BC.MIA->isCall(DstInstrPredecessorI->second))
return true;
}
return false;
};
std::remove_copy_if(DiffBranches.begin(),
DiffBranches.end(),
std::back_inserter(OrphanBranches),
isRecursiveBranch);
ProfileMatchRatio =
(float) (LocalProfileBranches.size() - OrphanBranches.size()) /
(float) LocalProfileBranches.size();
if (!OrphanBranches.empty()) {
errs() << "BOLT-WARNING: profile branches match only "
<< format("%.1f%%", ProfileMatchRatio * 100.0f) << " ("
<< (LocalProfileBranches.size() - OrphanBranches.size()) << '/'
<< LocalProfileBranches.size() << ") for function "
<< getName() << '\n';
DEBUG(
for (auto &OBranch : OrphanBranches)
errs() << "\t0x" << Twine::utohexstr(OBranch.first) << " -> 0x"
<< Twine::utohexstr(OBranch.second) << " (0x"
<< Twine::utohexstr(OBranch.first + getAddress()) << " -> 0x"
);
}
}
void BinaryFunction::inferFallThroughCounts() {
assert(!BasicBlocks.empty() && "basic block list should not be empty");
@@ -881,7 +987,7 @@ void BinaryFunction::inferFallThroughCounts() {
}
}
// Udate execution counts of landing pad blocks.
// Update execution counts of landing pad blocks.
if (!BranchDataOrErr.getError()) {
const FuncBranchData &BranchData = BranchDataOrErr.get();
for (const auto &I : BranchData.EntryData) {
@@ -893,7 +999,7 @@ void BinaryFunction::inferFallThroughCounts() {
}
// Work on a basic block at a time, propagating frequency information forwards
// It is important to walk in the layour order
// It is important to walk in the layout order
for (auto CurBB : BasicBlocks) {
uint64_t BBExecCount = CurBB->getExecutionCount();
@@ -1036,7 +1142,7 @@ bool BinaryFunction::fixCFIState() {
// without using the state stack. Not sure if it is worth the effort
// because this happens rarely.
if (NestedLevel != 0) {
errs() << "BOLT-WARNING: CFI rewriter detected nested CFI state while "
errs() << "BOLT-WARNING: CFI rewriter detected nested CFI state while"
<< " replaying CFI instructions for BB " << InBB->getName()
<< " in function " << getName() << '\n';
return false;
@@ -1157,7 +1263,7 @@ void BinaryFunction::modifyLayout(LayoutType Type, bool Split) {
}
// Cannot do optimal layout without profile.
if (getExecutionCount() == BinaryFunction::COUNT_NO_PROFILE)
if (!hasValidProfile())
return;
// Work on optimal solution if problem is small enough

View File

@@ -19,6 +19,7 @@
#include "BinaryBasicBlock.h"
#include "BinaryContext.h"
#include "DataReader.h"
#include "DebugData.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/ilist.h"
@@ -158,6 +159,9 @@ private:
/// The profile data for the number of times the function was executed.
uint64_t ExecutionCount{COUNT_NO_PROFILE};
/// Profile match ration.
float ProfileMatchRatio{0.0};
/// Score of the function (estimated number of instructions executed,
/// according to profile data). -1 if the score has not been calculated yet.
int64_t FunctionScore{-1};
@@ -177,44 +181,10 @@ private:
/// the output binary.
uint32_t AddressRangesOffset{-1U};
/// Release storage used by instructions.
BinaryFunction &clearInstructions() {
InstrMapType TempMap;
Instructions.swap(TempMap);
return *this;
}
/// Release storage used by CFI offsets map.
BinaryFunction &clearCFIOffsets() {
std::multimap<uint32_t, uint32_t> TempMap;
OffsetToCFI.swap(TempMap);
return *this;
}
/// Release storage used by instructions.
BinaryFunction &clearLabels() {
LabelsMapType TempMap;
Labels.swap(TempMap);
return *this;
}
/// Release memory taken by local branch info.
BinaryFunction &clearLocalBranches() {
LocalBranchesListType TempList;
LocalBranches.swap(TempList);
return *this;
}
BinaryFunction &clearFTBranches() {
LocalBranchesListType TempList;
FTBranches.swap(TempList);
return *this;
}
/// Release memory taken by landing pad info.
BinaryFunction &clearLPToBBIndex() {
LandingPadsMapType TempMap;
LPToBBIndex.swap(TempMap);
/// Release memory taken by the list.
template<typename T> BinaryFunction &clearList(T& List) {
T TempList;
TempList.swap(List);
return *this;
}
@@ -223,13 +193,14 @@ private:
return *this;
}
/// Return basic block that originally was laid out immediately following
/// the given /p BB basic block.
const BinaryBasicBlock *
getOriginalLayoutSuccessor(const BinaryBasicBlock *BB) const;
/// Storage for all local branches in the function (non-fall-throughs).
using LocalBranchesListType = std::vector<std::pair<uint32_t, uint32_t>>;
LocalBranchesListType LocalBranches;
LocalBranchesListType FTBranches;
using BranchListType = std::vector<std::pair<uint32_t, uint32_t>>;
BranchListType TakenBranches; /// All local taken branches.
BranchListType FTBranches; /// All fall-through branches.
/// Storage for all landing pads and their corresponding invokes.
using LandingPadsMapType = std::map<const MCSymbol *, std::vector<unsigned> >;
@@ -574,6 +545,21 @@ public:
Instructions.emplace(Offset, std::forward<MCInst>(Instruction));
}
/// Return instruction at a given offset in the function. Valid before
/// CFG is constructed.
MCInst *getInstructionAtOffset(uint64_t Offset) {
assert(CurrentState == State::Disassembled &&
"can only call function in Disassembled state");
auto II = Instructions.find(Offset);
return (II == Instructions.end()) ? nullptr : &II->second;
}
/// Return true if function profile is present and accurate.
bool hasValidProfile() {
return ExecutionCount != COUNT_NO_PROFILE &&
ProfileMatchRatio == 1.0f;
}
void addCFIInstruction(uint64_t Offset, MCCFIInstruction &&Inst) {
assert(!Instructions.empty());
@@ -747,7 +733,7 @@ public:
/// If successful, this function will populate the list of instructions
/// for this function together with offsets from the function start
/// in the input. It will also populate Labels with destinations for
/// local branches, and LocalBranches with [from, to] info.
/// local branches, and TakenBranches with [from, to] info.
///
/// \p FunctionData is the set bytes representing the function body.
///
@@ -768,6 +754,10 @@ public:
/// State::CFG. Returns false if CFG cannot be built.
bool buildCFG();
/// Check how closely the profile data matches the function and set
/// ProfileMatchRatio to reflect the accuracy.
void evaluateProfileData(const FuncBranchData &BranchData);
/// Walks the list of basic blocks filling in missing information about
/// edge frequency for fall-throughs.
///

View File

@@ -925,22 +925,35 @@ void RewriteInstance::disassembleFunctions() {
}
uint64_t NumSimpleFunctions{0};
uint64_t NumStaleProfileFunctions{0};
std::vector<BinaryFunction *> ProfiledFunctions;
for (auto &BFI : BinaryFunctions) {
if (!BFI.second.isSimple())
auto &Function = BFI.second;
if (!Function.isSimple())
continue;
++NumSimpleFunctions;
if (BFI.second.getExecutionCount() != BinaryFunction::COUNT_NO_PROFILE)
ProfiledFunctions.push_back(&BFI.second);
if (Function.getExecutionCount() == BinaryFunction::COUNT_NO_PROFILE)
continue;
if (Function.hasValidProfile())
ProfiledFunctions.push_back(&Function);
else
++NumStaleProfileFunctions;
}
errs() << "BOLT-INFO: " << ProfiledFunctions.size() << " functions out of "
<< NumSimpleFunctions
<< " simple functions ("
errs() << "BOLT-INFO: "
<< ProfiledFunctions.size() + NumStaleProfileFunctions
<< " functions out of " << NumSimpleFunctions << " simple functions ("
<< format("%.1f",
ProfiledFunctions.size() /
(float) NumSimpleFunctions * 100.0)
(ProfiledFunctions.size() + NumStaleProfileFunctions) /
(float) NumSimpleFunctions * 100.0f)
<< "%) have non-empty execution profile.\n";
if (NumStaleProfileFunctions) {
errs() << "BOLT-INFO: " << NumStaleProfileFunctions
<< format(" (%.1f%) ", NumStaleProfileFunctions /
(float) NumSimpleFunctions * 100.0f)
<< " function" << (NumStaleProfileFunctions == 1 ? "" : "s")
<< " have invalid (possibly stale) profile.\n";
}
if (ProfiledFunctions.size() > 10) {
errs() << "BOLT-INFO: top called functions are:\n";