mirror of
https://github.com/intel/llvm.git
synced 2026-01-14 03:50:17 +08:00
[BOLT] Fixes for new profile
Summary: Do a better job of recording fall-through branches in new profile mode (-prof-compat-mode=0). For this we need to record offsets for all instructions that are last in the containing basic block. Change the way we convert conditional tail calls. Now we never reverse the condition. This is required for better profile matching. The original approach of preserving the direction was controversial to start with. Add "-infer-fall-throughs" option (on by default) to allow disabling inference of fall-through edge counts. (cherry picked from FBD6994293)
This commit is contained in:
@@ -411,18 +411,18 @@ public:
|
||||
|
||||
/// Add instruction at the end of this basic block.
|
||||
/// Returns the index of the instruction in the Instructions vector of the BB.
|
||||
uint32_t addInstruction(MCInst &&Inst) {
|
||||
iterator addInstruction(MCInst &&Inst) {
|
||||
adjustNumPseudos(Inst, 1);
|
||||
Instructions.emplace_back(Inst);
|
||||
return Instructions.size() - 1;
|
||||
return std::prev(Instructions.end());
|
||||
}
|
||||
|
||||
/// Add instruction at the end of this basic block.
|
||||
/// Returns the index of the instruction in the Instructions vector of the BB.
|
||||
uint32_t addInstruction(const MCInst &Inst) {
|
||||
iterator addInstruction(const MCInst &Inst) {
|
||||
adjustNumPseudos(Inst, 1);
|
||||
Instructions.push_back(Inst);
|
||||
return Instructions.size() - 1;
|
||||
return std::prev(Instructions.end());
|
||||
}
|
||||
|
||||
/// Add a range of instructions to the end of this basic block.
|
||||
|
||||
@@ -470,7 +470,11 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation,
|
||||
|
||||
uint64_t BBExecCount = BB->getExecutionCount();
|
||||
if (hasValidProfile()) {
|
||||
OS << " Exec Count : " << BBExecCount << '\n';
|
||||
OS << " Exec Count : ";
|
||||
if (BB->getExecutionCount() != BinaryBasicBlock::COUNT_NO_PROFILE)
|
||||
OS << BBExecCount << '\n';
|
||||
else
|
||||
OS << "<unknown>\n";
|
||||
}
|
||||
if (BB->getCFIState() >= 0) {
|
||||
OS << " CFI State : " << BB->getCFIState() << '\n';
|
||||
@@ -1492,7 +1496,7 @@ bool BinaryFunction::buildCFG() {
|
||||
BinaryBasicBlock *InsertBB{nullptr};
|
||||
BinaryBasicBlock *PrevBB{nullptr};
|
||||
bool IsLastInstrNop{false};
|
||||
const MCInst *PrevInstr{nullptr};
|
||||
uint64_t LastInstrOffset{0};
|
||||
|
||||
auto addCFIPlaceholders =
|
||||
[this](uint64_t CFIOffset, BinaryBasicBlock *InsertBB) {
|
||||
@@ -1503,6 +1507,16 @@ bool BinaryFunction::buildCFG() {
|
||||
}
|
||||
};
|
||||
|
||||
// For profiling purposes we need to save the offset of the last instruction
|
||||
// in the basic block. But in certain cases we don't if the instruction was
|
||||
// the last one, and we have to go back and update its offset.
|
||||
auto updateOffset = [&](uint64_t Offset) {
|
||||
assert(PrevBB && PrevBB != InsertBB && "invalid previous block");
|
||||
auto *PrevInstr = PrevBB->getLastNonPseudoInstr();
|
||||
if (PrevInstr && !MIA->hasAnnotation(*PrevInstr, "Offset"))
|
||||
MIA->addAnnotation(BC.Ctx.get(), *PrevInstr, "Offset", Offset);
|
||||
};
|
||||
|
||||
for (auto I = Instructions.begin(), E = Instructions.end(); I != E; ++I) {
|
||||
const auto Offset = I->first;
|
||||
auto &Instr = I->second;
|
||||
@@ -1515,6 +1529,8 @@ bool BinaryFunction::buildCFG() {
|
||||
/* DeriveAlignment = */ IsLastInstrNop);
|
||||
if (hasEntryPointAtOffset(Offset))
|
||||
InsertBB->setEntryPoint();
|
||||
if (PrevBB)
|
||||
updateOffset(LastInstrOffset);
|
||||
}
|
||||
// Ignore nops. We use nops to derive alignment of the next basic block.
|
||||
// It will not always work, as some blocks are naturally aligned, but
|
||||
@@ -1528,6 +1544,7 @@ bool BinaryFunction::buildCFG() {
|
||||
// we see an unconditional branch following a conditional one. The latter
|
||||
// should not be a conditional tail call.
|
||||
assert(PrevBB && "no previous basic block for a fall through");
|
||||
auto *PrevInstr = PrevBB->getLastNonPseudoInstr();
|
||||
assert(PrevInstr && "no previous instruction for a fall through");
|
||||
if (MIA->isUnconditionalBranch(Instr) &&
|
||||
!MIA->isUnconditionalBranch(*PrevInstr) &&
|
||||
@@ -1538,6 +1555,7 @@ bool BinaryFunction::buildCFG() {
|
||||
InsertBB = addBasicBlock(Offset,
|
||||
BC.Ctx->createTempSymbol("FT", true),
|
||||
/* DeriveAlignment = */ IsLastInstrNop);
|
||||
updateOffset(LastInstrOffset);
|
||||
}
|
||||
}
|
||||
if (Offset == 0) {
|
||||
@@ -1545,9 +1563,10 @@ bool BinaryFunction::buildCFG() {
|
||||
addCFIPlaceholders(0, InsertBB);
|
||||
}
|
||||
|
||||
IsLastInstrNop = false;
|
||||
InsertBB->addInstruction(Instr);
|
||||
PrevInstr = &Instr;
|
||||
const auto IsBlockEnd = MIA->isTerminator(Instr);
|
||||
IsLastInstrNop = MIA->isNoop(Instr);
|
||||
LastInstrOffset = Offset;
|
||||
InsertBB->addInstruction(std::move(Instr));
|
||||
|
||||
// Add associated CFI instrs. We always add the CFI instruction that is
|
||||
// located immediately after this instruction, since the next CFI
|
||||
@@ -1558,9 +1577,11 @@ bool BinaryFunction::buildCFG() {
|
||||
CFIOffset = NextInstr->first;
|
||||
else
|
||||
CFIOffset = getSize();
|
||||
|
||||
// Note: this potentially invalidates instruction pointers/iterators.
|
||||
addCFIPlaceholders(CFIOffset, InsertBB);
|
||||
|
||||
if (MIA->isTerminator(Instr)) {
|
||||
if (IsBlockEnd) {
|
||||
PrevBB = InsertBB;
|
||||
InsertBB = nullptr;
|
||||
}
|
||||
@@ -1769,10 +1790,6 @@ void BinaryFunction::addEntryPoint(uint64_t Address) {
|
||||
}
|
||||
|
||||
void BinaryFunction::removeConditionalTailCalls() {
|
||||
// Don't touch code if non-simple ARM
|
||||
if (BC.TheTriple->getArch() == llvm::Triple::aarch64 && !isSimple())
|
||||
return;
|
||||
|
||||
// Blocks to be appended at the end.
|
||||
std::vector<std::unique_ptr<BinaryBasicBlock>> NewBlocks;
|
||||
|
||||
@@ -1824,29 +1841,14 @@ void BinaryFunction::removeConditionalTailCalls() {
|
||||
|
||||
BC.MIA->convertTailCallToJmp(*CTCInstr);
|
||||
|
||||
// In attempt to preserve the direction of the original conditional jump,
|
||||
// we will either create an unconditional jump in a separate basic block
|
||||
// at the end of the function, or reverse a condition of the jump
|
||||
// and create a fall-through block right after the original tail call.
|
||||
if (getAddress() >= *TargetAddressOrNone) {
|
||||
// Insert the basic block right after the current one.
|
||||
std::vector<std::unique_ptr<BinaryBasicBlock>> TCBB;
|
||||
TCBB.emplace_back(std::move(TailCallBB));
|
||||
BBI = insertBasicBlocks(BBI,
|
||||
std::move(TCBB),
|
||||
/* UpdateLayout */ true,
|
||||
/* UpdateCFIState */ false);
|
||||
BC.MIA->reverseBranchCondition(
|
||||
*CTCInstr, (*std::next(BBI)).getLabel(), BC.Ctx.get());
|
||||
BC.MIA->replaceBranchTarget(*CTCInstr, TailCallBB->getLabel(),
|
||||
BC.Ctx.get());
|
||||
|
||||
} else {
|
||||
BC.MIA->replaceBranchTarget(*CTCInstr, TailCallBB->getLabel(),
|
||||
BC.Ctx.get());
|
||||
// Add basic block to the list that will be added to the end.
|
||||
NewBlocks.emplace_back(std::move(TailCallBB));
|
||||
// Swap edges as the TailCallBB corresponds to the taken branch.
|
||||
BB.swapConditionalSuccessors();
|
||||
}
|
||||
// Add basic block to the list that will be added to the end.
|
||||
NewBlocks.emplace_back(std::move(TailCallBB));
|
||||
|
||||
// Swap edges as the TailCallBB corresponds to the taken branch.
|
||||
BB.swapConditionalSuccessors();
|
||||
|
||||
// This branch is no longer a conditional tail call.
|
||||
BC.MIA->unsetConditionalTailCall(*CTCInstr);
|
||||
|
||||
@@ -189,12 +189,22 @@ using IndirectCallSiteProfile = SmallVector<IndirectCallProfile, 4>;
|
||||
|
||||
inline raw_ostream &operator<<(raw_ostream &OS,
|
||||
const bolt::IndirectCallSiteProfile &ICSP) {
|
||||
const char *Sep = "";
|
||||
std::string TempString;
|
||||
raw_string_ostream SS(TempString);
|
||||
|
||||
const char *Sep = "\n ";
|
||||
uint64_t TotalCount = 0;
|
||||
uint64_t TotalMispreds = 0;
|
||||
for (auto &CSP : ICSP) {
|
||||
OS << Sep << "{ " << (CSP.IsFunction ? CSP.Name : "<unknown>") << ": "
|
||||
SS << Sep << "{ " << (CSP.IsFunction ? CSP.Name : "<unknown>") << ": "
|
||||
<< CSP.Count << " (" << CSP.Mispreds << " misses) }";
|
||||
Sep = ", ";
|
||||
Sep = ",\n ";
|
||||
TotalCount += CSP.Count;
|
||||
TotalMispreds += CSP.Mispreds;
|
||||
}
|
||||
SS.flush();
|
||||
|
||||
OS << TotalCount << " (" << TotalMispreds << " misses) :" << TempString;
|
||||
return OS;
|
||||
}
|
||||
|
||||
|
||||
@@ -69,14 +69,22 @@ FixFuncCounts("fix-func-counts",
|
||||
cl::Hidden,
|
||||
cl::cat(BoltOptCategory));
|
||||
|
||||
static cl::opt<bool>
|
||||
InferFallThroughs("infer-fall-throughs",
|
||||
cl::desc("infer execution count for fall-through blocks"),
|
||||
cl::init(true),
|
||||
cl::ZeroOrMore,
|
||||
cl::Hidden,
|
||||
cl::cat(BoltOptCategory));
|
||||
|
||||
} // namespace opts
|
||||
|
||||
namespace llvm {
|
||||
namespace bolt {
|
||||
|
||||
bool BinaryFunction::recordTrace(
|
||||
const LBREntry &First,
|
||||
const LBREntry &Second,
|
||||
const LBREntry &FirstLBR,
|
||||
const LBREntry &SecondLBR,
|
||||
uint64_t Count,
|
||||
SmallVector<std::pair<uint64_t, uint64_t>, 16> *Branches) {
|
||||
if (!isSimple())
|
||||
@@ -85,8 +93,8 @@ bool BinaryFunction::recordTrace(
|
||||
assert(CurrentState == State::CFG && "can only record traces in CFG state");
|
||||
|
||||
// Offsets of the trace within this function.
|
||||
const auto From = First.To - getAddress();
|
||||
const auto To = Second.From - getAddress();
|
||||
const auto From = FirstLBR.To - getAddress();
|
||||
const auto To = SecondLBR.From - getAddress();
|
||||
|
||||
if (From > To)
|
||||
return false;
|
||||
@@ -97,47 +105,27 @@ bool BinaryFunction::recordTrace(
|
||||
if (!FromBB || !ToBB)
|
||||
return false;
|
||||
|
||||
// Adjust FromBB if the first LBR is a return from the last instruction in
|
||||
// the previous block (that instruction should be a call).
|
||||
if (From == FromBB->getOffset() && !containsAddress(FirstLBR.From) &&
|
||||
!FromBB->isEntryPoint() && !FromBB->isLandingPad()) {
|
||||
auto *PrevBB = BasicBlocksLayout[FromBB->getIndex() - 1];
|
||||
if (PrevBB->getSuccessor(FromBB->getLabel())) {
|
||||
const auto *Instr = PrevBB->getLastNonPseudoInstr();
|
||||
if (Instr && BC.MIA->isCall(*Instr)) {
|
||||
FromBB = PrevBB;
|
||||
} else {
|
||||
DEBUG(dbgs() << "invalid incoming LBR (no call): " << FirstLBR << '\n');
|
||||
}
|
||||
} else {
|
||||
DEBUG(dbgs() << "invalid incoming LBR: " << FirstLBR << '\n');
|
||||
}
|
||||
}
|
||||
|
||||
// Fill out information for fall-through edges. The From and To could be
|
||||
// within the same basic block, e.g. when two call instructions are in the
|
||||
// same block. In this case we skip the processing.
|
||||
if (FromBB == ToBB) {
|
||||
if (opts::CompatMode)
|
||||
return true;
|
||||
|
||||
// If the previous block ended with a call, the destination of a return
|
||||
// would be in ToBB basic block. And if the ToBB starts with a control
|
||||
// transfer instruction, we will have a 0-length trace that we have to
|
||||
// account for as a fall-through edge.
|
||||
if (To == ToBB->getOffset()) {
|
||||
// External entry point.
|
||||
if (ToBB->isEntryPoint() || ToBB->isLandingPad())
|
||||
return true;
|
||||
|
||||
// Check that the origin LBR of a trace starts in another function.
|
||||
// Otherwise it's an internal branch that was accounted for.
|
||||
if (containsAddress(First.From))
|
||||
return true;
|
||||
|
||||
auto *PrevBB = BasicBlocksLayout[ToBB->getIndex() - 1];
|
||||
|
||||
// This could be a bad trace.
|
||||
if (!PrevBB->getSuccessor(ToBB->getLabel())) {
|
||||
DEBUG(dbgs() << "invalid LBR sequence:\n"
|
||||
<< " " << First << '\n'
|
||||
<< " " << Second << '\n');
|
||||
return false;
|
||||
}
|
||||
|
||||
auto &BI = PrevBB->getBranchInfo(*ToBB);
|
||||
BI.Count += Count;
|
||||
if (Branches) {
|
||||
const auto *Instr = PrevBB->getLastNonPseudoInstr();
|
||||
const auto Offset =
|
||||
BC.MIA->getAnnotationWithDefault<uint64_t>(*Instr, "Offset");
|
||||
Branches->push_back(std::make_pair(Offset, ToBB->getOffset()));
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -151,8 +139,8 @@ bool BinaryFunction::recordTrace(
|
||||
// Check for bad LBRs.
|
||||
if (!BB->getSuccessor(NextBB->getLabel())) {
|
||||
DEBUG(dbgs() << "no fall-through for the trace:\n"
|
||||
<< " " << First << '\n'
|
||||
<< " " << Second << '\n');
|
||||
<< " " << FirstLBR << '\n'
|
||||
<< " " << SecondLBR << '\n');
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -166,12 +154,13 @@ bool BinaryFunction::recordTrace(
|
||||
|
||||
if (Branches) {
|
||||
const auto *Instr = BB->getLastNonPseudoInstr();
|
||||
// Note: real offset for conditional jump instruction shouldn't be 0.
|
||||
const auto Offset =
|
||||
BC.MIA->getAnnotationWithDefault<uint64_t>(*Instr, "Offset");
|
||||
if (Offset) {
|
||||
Branches->push_back(std::make_pair(Offset, NextBB->getOffset()));
|
||||
uint64_t Offset{0};
|
||||
if (Instr) {
|
||||
Offset = BC.MIA->getAnnotationWithDefault<uint64_t>(*Instr, "Offset");
|
||||
} else {
|
||||
Offset = BB->getOffset();
|
||||
}
|
||||
Branches->emplace_back(std::make_pair(Offset, NextBB->getOffset()));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -374,7 +363,8 @@ void BinaryFunction::postProcessProfile() {
|
||||
}
|
||||
}
|
||||
|
||||
inferFallThroughCounts();
|
||||
if (opts::InferFallThroughs)
|
||||
inferFallThroughCounts();
|
||||
|
||||
// Update profile information for jump tables based on CFG branch data.
|
||||
for (auto *BB : BasicBlocks) {
|
||||
@@ -421,11 +411,11 @@ void BinaryFunction::postProcessProfile() {
|
||||
}
|
||||
|
||||
Optional<SmallVector<std::pair<uint64_t, uint64_t>, 16>>
|
||||
BinaryFunction::getFallthroughsInTrace(const LBREntry &First,
|
||||
const LBREntry &Second) {
|
||||
BinaryFunction::getFallthroughsInTrace(const LBREntry &FirstLBR,
|
||||
const LBREntry &SecondLBR) {
|
||||
SmallVector<std::pair<uint64_t, uint64_t>, 16> Res;
|
||||
|
||||
if (!recordTrace(First, Second, 1, &Res))
|
||||
if (!recordTrace(FirstLBR, SecondLBR, 1, &Res))
|
||||
return NoneType();
|
||||
|
||||
return Res;
|
||||
|
||||
@@ -161,7 +161,9 @@ ProfileReader::parseFunctionProfile(BinaryFunction &BF,
|
||||
continue;
|
||||
}
|
||||
|
||||
BB.setSuccessorBranchInfo(SuccessorBB, YamlSI.Count, YamlSI.Mispreds);
|
||||
auto &BI = BB.getBranchInfo(SuccessorBB);
|
||||
BI.Count += YamlSI.Count;
|
||||
BI.MispredictedCount += YamlSI.Mispreds;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user