diff --git a/bolt/BinaryBasicBlock.cpp b/bolt/BinaryBasicBlock.cpp index 6432d103cc99..7b56741acc10 100644 --- a/bolt/BinaryBasicBlock.cpp +++ b/bolt/BinaryBasicBlock.cpp @@ -184,6 +184,32 @@ uint32_t BinaryBasicBlock::getNumPseudos() const { return NumPseudos; } +ErrorOr> +BinaryBasicBlock::getBranchStats(const BinaryBasicBlock *Succ) const { + if (Function->hasValidProfile()) { + uint64_t TotalCount = 0; + uint64_t TotalMispreds = 0; + for (const auto &BI : BranchInfo) { + if (BI.Count != COUNT_FALLTHROUGH_EDGE) { + TotalCount += BI.Count; + TotalMispreds += BI.MispredictedCount; + } + } + + if (TotalCount > 0) { + auto Itr = std::find(Successors.begin(), Successors.end(), Succ); + assert(Itr != Successors.end()); + const auto &BI = BranchInfo[Itr - Successors.begin()]; + if (BI.Count && BI.Count != COUNT_FALLTHROUGH_EDGE) { + if (TotalMispreds == 0) TotalMispreds = 1; + return std::make_pair(double(BI.Count) / TotalCount, + double(BI.MispredictedCount) / TotalMispreds); + } + } + } + return make_error_code(llvm::errc::result_out_of_range); +} + void BinaryBasicBlock::dump() const { auto &BC = Function->getBinaryContext(); if (Label) outs() << Label->getName() << ":\n"; diff --git a/bolt/BinaryBasicBlock.h b/bolt/BinaryBasicBlock.h index fe7448252b51..1b18263ea675 100644 --- a/bolt/BinaryBasicBlock.h +++ b/bolt/BinaryBasicBlock.h @@ -329,6 +329,11 @@ public: return BranchInfo[Condition == true ? 0 : 1]; }; + /// Try to compute the taken and misprediction frequencies for the given + /// successor. The result is an error if no information can be found. + ErrorOr> + getBranchStats(const BinaryBasicBlock *Succ) const; + /// If the basic block ends with a conditional branch (possibly followed by /// an unconditional branch) and thus has 2 successor, reverse the order of /// its successors in CFG, update branch info, and return true. If the basic diff --git a/bolt/BinaryFunction.cpp b/bolt/BinaryFunction.cpp index 0d01cf299989..c344f589c0f7 100644 --- a/bolt/BinaryFunction.cpp +++ b/bolt/BinaryFunction.cpp @@ -258,6 +258,19 @@ std::pair BinaryFunction::eraseInvalidBBs() { return std::make_pair(Count, Bytes); } +bool BinaryFunction::isForwardCall(const MCSymbol *CalleeSymbol) const { + // TODO: Once we start reordering functions this has to change. #15031238 + const auto *CalleeBF = BC.getFunctionForSymbol(CalleeSymbol); + if (CalleeBF) { + return CalleeBF->getAddress() > getAddress(); + } else { + // Absolute symbol. + auto const CalleeSI = BC.GlobalSymbols.find(CalleeSymbol->getName()); + assert(CalleeSI != BC.GlobalSymbols.end() && "unregistered symbol found"); + return CalleeSI->second > getAddress(); + } +} + void BinaryFunction::dump(std::string Annotation, bool PrintInstructions) const { print(dbgs(), Annotation, PrintInstructions); @@ -3338,17 +3351,33 @@ DynoStats BinaryFunction::getDynoStats() const { } // Conditional branch that could be followed by an unconditional branch. - uint64_t TakenCount = BB->getBranchInfo(true).Count; + uint64_t TakenCount; + uint64_t NonTakenCount; + bool IsForwardBranch; + if (BB->succ_size() == 2) { + TakenCount = BB->getBranchInfo(true).Count; + NonTakenCount = BB->getBranchInfo(false).Count; + IsForwardBranch = isForwardBranch(BB, BB->getConditionalSuccessor(true)); + } else { + // SCTC breaks the CFG invariant so we have to make some affordances + // here if we want dyno stats after running it. + TakenCount = BB->branch_info_begin()->Count; + if (TakenCount != COUNT_NO_PROFILE) + NonTakenCount = BBExecutionCount - TakenCount; + else + NonTakenCount = 0; + IsForwardBranch = isForwardBranch(BB, BB->getFallthrough()); + } + if (TakenCount == COUNT_NO_PROFILE) TakenCount = 0; - uint64_t NonTakenCount = BB->getBranchInfo(false).Count; if (NonTakenCount == COUNT_NO_PROFILE) NonTakenCount = 0; assert(TakenCount + NonTakenCount == BBExecutionCount && "internal calculation error"); - if (isForwardBranch(BB, BB->getConditionalSuccessor(true))) { + if (IsForwardBranch) { Stats[DynoStats::FORWARD_COND_BRANCHES] += BBExecutionCount; Stats[DynoStats::FORWARD_COND_BRANCHES_TAKEN] += TakenCount; } else { diff --git a/bolt/BinaryFunction.h b/bolt/BinaryFunction.h index f4f71039bd2c..854847f0d949 100644 --- a/bolt/BinaryFunction.h +++ b/bolt/BinaryFunction.h @@ -1041,12 +1041,17 @@ public: /// Callee is responsible of updating basic block indices prior to using /// this function (e.g. by calling BinaryFunction::updateLayoutIndices()). static bool isForwardBranch(const BinaryBasicBlock *From, - const BinaryBasicBlock *To) { + const BinaryBasicBlock *To) { assert(From->getFunction() == To->getFunction() && "basic blocks should be in the same function"); return To->getLayoutIndex() > From->getLayoutIndex(); } + /// Determine direction of the call to callee symbol relative to the start + /// of this function. + /// Note: this doesn't take function splitting into account. + bool isForwardCall(const MCSymbol *CalleeSymbol) const; + /// Dump function information to debug output. If \p PrintInstructions /// is true - include instruction disassembly. void dump(std::string Annotation = "", bool PrintInstructions = true) const; diff --git a/bolt/BinaryPassManager.cpp b/bolt/BinaryPassManager.cpp index 000ee091dc8e..04b170345d7b 100644 --- a/bolt/BinaryPassManager.cpp +++ b/bolt/BinaryPassManager.cpp @@ -236,6 +236,13 @@ void BinaryFunctionPassManager::runAllPasses( llvm::make_unique(PrintSCTC), opts::SimplifyConditionalTailCalls); + Manager.registerPass(llvm::make_unique(PrintPeepholes), + opts::Peepholes); + + Manager.registerPass( + llvm::make_unique(PrintUCE), + opts::EliminateUnreachable); + Manager.registerPass(llvm::make_unique(PrintAfterFixup)); Manager.runPasses(); diff --git a/bolt/BinaryPasses.cpp b/bolt/BinaryPasses.cpp index ca91be325820..bff0fa2bee74 100644 --- a/bolt/BinaryPasses.cpp +++ b/bolt/BinaryPasses.cpp @@ -130,6 +130,27 @@ DynoStatsSortOrderOpt( cl::ZeroOrMore, cl::init(DynoStatsSortOrder::Descending)); +enum SctcModes : char { + SctcAlways, + SctcPreserveDirection, + SctcHeuristic +}; + +static cl::opt +SctcMode( + "sctc-mode", + cl::desc("mode for simplify conditional tail calls"), + cl::init(SctcHeuristic), + cl::values(clEnumValN(SctcAlways, "always", "always perform sctc"), + clEnumValN(SctcPreserveDirection, + "preserve", + "only perform sctc when branch direction is preserved"), + clEnumValN(SctcHeuristic, + "heuristic", + "use branch prediction data to control sctc"), + clEnumValEnd), + cl::ZeroOrMore); + } // namespace opts namespace llvm { @@ -799,6 +820,15 @@ void EliminateUnreachableBlocks::runOnFunction(BinaryFunction& Function) { unsigned Count; uint64_t Bytes; Function.markUnreachable(); + DEBUG({ + for (auto *BB : Function.layout()) { + if (!BB->isValid()) { + dbgs() << "BOLT-INFO: UCE found unreachable block " << BB->getName() + << " in function " << Function << "\n"; + BB->dump(); + } + } + }); std::tie(Count, Bytes) = Function.eraseInvalidBBs(); DeletedBlocks += Count; DeletedBytes += Bytes; @@ -893,13 +923,47 @@ void FixupFunctions::runOnFunctions( } } -bool SimplifyConditionalTailCalls::fixTailCalls(BinaryContext &BC, - BinaryFunction &BF) { - if (BF.layout_size() < 2) - return false; +bool +SimplifyConditionalTailCalls::shouldRewriteBranch(const BinaryBasicBlock *PredBB, + const MCInst &CondBranch, + const BinaryBasicBlock *BB, + const bool DirectionFlag) { + const bool IsForward = BinaryFunction::isForwardBranch(PredBB, BB); + if (IsForward) + ++NumOrigForwardBranches; + else + ++NumOrigBackwardBranches; + + if (opts::SctcMode == opts::SctcAlways) + return true; + + if (opts::SctcMode == opts::SctcPreserveDirection) + return IsForward == DirectionFlag; + + const auto Frequency = PredBB->getBranchStats(BB); + + // It's ok to rewrite the conditional branch if the new target will be + // a backward branch. + + // If no data available for these branches, then it should be ok to + // do the optimization since it will reduce code size. + if (Frequency.getError()) + return true; + + // TODO: should this use misprediction frequency instead? + const bool Result = + (IsForward && Frequency.get().first >= 0.5) || + (!IsForward && Frequency.get().first <= 0.5); + + return Result == DirectionFlag; +} + +uint64_t SimplifyConditionalTailCalls::fixTailCalls(BinaryContext &BC, + BinaryFunction &BF) { // Need updated indices to correctly detect branch' direction. BF.updateLayoutIndices(); + BF.markUnreachable(); auto &MIA = BC.MIA; uint64_t NumLocalCTCCandidates = 0; @@ -913,36 +977,22 @@ bool SimplifyConditionalTailCalls::fixTailCalls(BinaryContext &BC, auto *Instr = BB->getFirstNonPseudo(); if (!MIA->isTailCall(*Instr)) continue; + auto *CalleeSymbol = MIA->getTargetSymbol(*Instr); if (!CalleeSymbol) continue; // Detect direction of the possible conditional tail call. - // XXX: Once we start reordering functions this has to change. - bool IsForwardCTC; - const auto *CalleeBF = BC.getFunctionForSymbol(CalleeSymbol); - if (CalleeBF) { - IsForwardCTC = CalleeBF->getAddress() > BF.getAddress(); - } else { - // Absolute symbol. - auto const CalleeSI = BC.GlobalSymbols.find(CalleeSymbol->getName()); - assert(CalleeSI != BC.GlobalSymbols.end() && "unregistered symbol found"); - IsForwardCTC = CalleeSI->second > BF.getAddress(); - } + const bool IsForwardCTC = BF.isForwardCall(CalleeSymbol); // Iterate through all predecessors. for (auto *PredBB : BB->predecessors()) { - if (PredBB->getConditionalSuccessor(true) != BB) + auto *CondSucc = PredBB->getConditionalSuccessor(true); + if (!CondSucc) continue; ++NumLocalCTCCandidates; - // We don't want to reverse direction of the branch in new order - // without further profile analysis. - if (BF.isForwardBranch(PredBB, BB) != IsForwardCTC) - continue; - - // Change destination of the unconditional branch. const MCSymbol *TBB = nullptr; const MCSymbol *FBB = nullptr; MCInst *CondBranch = nullptr; @@ -951,24 +1001,63 @@ bool SimplifyConditionalTailCalls::fixTailCalls(BinaryContext &BC, assert(Result && "internal error analyzing conditional branch"); assert(CondBranch && "conditional branch expected"); - MIA->replaceBranchTarget(*CondBranch, CalleeSymbol, BC.Ctx.get()); + // We don't want to reverse direction of the branch in new order + // without further profile analysis. + const bool DirectionFlag = CondSucc == BB ? IsForwardCTC : !IsForwardCTC; + if (!shouldRewriteBranch(PredBB, *CondBranch, BB, DirectionFlag)) + continue; + + if (CondSucc != BB) { + // Patch the new target address into the conditional branch. + MIA->reverseBranchCondition(*CondBranch, CalleeSymbol, BC.Ctx.get()); + // Since we reversed the condition on the branch we need to change + // the target for the unconditional branch or add a unconditional + // branch to the old target. This has to be done manually since + // fixupBranches is not called after SCTC. + if (UncondBranch) { + MIA->replaceBranchTarget(*UncondBranch, + CondSucc->getLabel(), + BC.Ctx.get()); + } else { + MCInst Branch; + auto Result = MIA->createUncondBranch(Branch, + CondSucc->getLabel(), + BC.Ctx.get()); + assert(Result); + PredBB->addInstruction(Branch); + } + // Swap branch statistics after swapping the branch targets. + auto BI = PredBB->branch_info_begin(); + std::swap(*BI, *(BI + 1)); + } else { + // Change destination of the unconditional branch. + MIA->replaceBranchTarget(*CondBranch, CalleeSymbol, BC.Ctx.get()); + } + + // Remove the unused successor which may be eliminated later + // if there are no other users. PredBB->removeSuccessor(BB); + ++NumLocalCTCs; } // Remove the block from CFG if all predecessors were removed. - BB->markValid(BB->pred_size() != 0 || BB->isLandingPad()); + BB->markValid(BB->pred_size() != 0 || + BB->isLandingPad() || + BB->isEntryPoint()); } - // Clean-up unreachable tail-call blocks. - BF.eraseInvalidBBs(); + if (NumLocalCTCs > 0) { + // Clean-up unreachable tail-call blocks. + BF.eraseInvalidBBs(); + } DEBUG(dbgs() << "BOLT: created " << NumLocalCTCs << " conditional tail calls from a total of " << NumLocalCTCCandidates << " candidates in function " << BF << "\n";); NumTailCallsPatched += NumLocalCTCs; - NumTailCallCandidates += NumLocalCTCCandidates; + NumCandidateTailCalls += NumLocalCTCCandidates; return NumLocalCTCs > 0; } @@ -990,9 +1079,10 @@ void SimplifyConditionalTailCalls::runOnFunctions( } } - outs() << "BOLT-INFO: patched " << NumTailCallsPatched + outs() << "BOLT-INFO: SCTC: patched " << NumTailCallsPatched << " tail calls (" << NumOrigForwardBranches << " forward)" - << " from a total of " << NumTailCallCandidates << "\n"; + << " tail calls (" << NumOrigBackwardBranches << " backward)" + << " from a total of " << NumCandidateTailCalls << "\n"; } void Peepholes::shortenInstructions(BinaryContext &BC, diff --git a/bolt/BinaryPasses.h b/bolt/BinaryPasses.h index ae827ebda245..33e8a7ccb14e 100644 --- a/bolt/BinaryPasses.h +++ b/bolt/BinaryPasses.h @@ -230,24 +230,56 @@ class FixupFunctions : public BinaryFunctionPass { /// An optimization to simplify conditional tail calls by removing /// unnecessary branches. /// -/// Convert the sequence: +/// This optimization considers both of the following cases: /// -/// j L1 -/// ... -/// L1: jmp foo # tail call +/// foo: ... +/// jcc L1 original +/// ... +/// L1: jmp bar # TAILJMP /// -/// into: -/// j foo +/// -> /// -/// but only if 'j foo' turns out to be a forward branch. +/// foo: ... +/// jcc bar iff jcc L1 is expected +/// ... /// +/// L1 is unreachable +/// +/// OR +/// +/// foo: ... +/// jcc L2 +/// L1: jmp dest # TAILJMP +/// L2: ... +/// +/// -> +/// +/// foo: jncc dest # TAILJMP +/// L2: ... +/// +/// L1 is unreachable +/// +/// For this particular case, the first basic block ends with +/// a conditional branch and has two successors, one fall-through +/// and one for when the condition is true. +/// The target of the conditional is a basic block with a single +/// unconditional branch (i.e. tail call) to another function. +/// We don't care about the contents of the fall-through block. +/// We assume that the target of the conditional branch is the +/// first successor. class SimplifyConditionalTailCalls : public BinaryFunctionPass { - uint64_t NumTailCallCandidates{0}; + uint64_t NumCandidateTailCalls{0}; uint64_t NumTailCallsPatched{0}; uint64_t NumOrigForwardBranches{0}; + uint64_t NumOrigBackwardBranches{0}; std::unordered_set Modified; - bool fixTailCalls(BinaryContext &BC, BinaryFunction &BF); + bool shouldRewriteBranch(const BinaryBasicBlock *PredBB, + const MCInst &CondBranch, + const BinaryBasicBlock *BB, + const bool DirectionFlag); + + uint64_t fixTailCalls(BinaryContext &BC, BinaryFunction &BF); public: explicit SimplifyConditionalTailCalls(const cl::opt &PrintPass) : BinaryFunctionPass(PrintPass) { }