diff --git a/bolt/BinaryBasicBlock.cpp b/bolt/BinaryBasicBlock.cpp index 2b775dbe22b8..c31c4b886e51 100644 --- a/bolt/BinaryBasicBlock.cpp +++ b/bolt/BinaryBasicBlock.cpp @@ -29,6 +29,12 @@ bool operator<(const BinaryBasicBlock &LHS, const BinaryBasicBlock &RHS) { return LHS.Index < RHS.Index; } +void BinaryBasicBlock::adjustNumPseudos(const MCInst &Inst, int Sign) { + auto &BC = Function->getBinaryContext(); + if (BC.MII->get(Inst.getOpcode()).isPseudo()) + NumPseudos += Sign; +} + MCInst *BinaryBasicBlock::getFirstNonPseudo() { auto &BC = Function->getBinaryContext(); for (auto &Inst : Instructions) { @@ -47,6 +53,34 @@ MCInst *BinaryBasicBlock::getLastNonPseudo() { return nullptr; } +bool BinaryBasicBlock::validateSuccessorInvariants() { + const MCSymbol *TBB = nullptr; + const MCSymbol *FBB = nullptr; + MCInst *CondBranch = nullptr; + MCInst *UncondBranch = nullptr; + + assert(getNumPseudos() == getNumPseudos()); + + if (analyzeBranch(TBB, FBB, CondBranch, UncondBranch)) { + switch (Successors.size()) { + case 0: + return !CondBranch && !UncondBranch; + case 1: + return !CondBranch; + case 2: + if (CondBranch) { + return (TBB == getConditionalSuccessor(true)->getLabel() && + ((!UncondBranch && !FBB) || + (UncondBranch && FBB == getConditionalSuccessor(false)->getLabel()))); + } + return true; + default: + return true; + } + } + return true; +} + BinaryBasicBlock *BinaryBasicBlock::getSuccessor(const MCSymbol *Label) const { if (!Label && succ_size() == 1) return *succ_begin(); @@ -121,14 +155,16 @@ void BinaryBasicBlock::removePredecessor(BinaryBasicBlock *Pred) { } void BinaryBasicBlock::addLandingPad(BinaryBasicBlock *LPBlock) { - LandingPads.push_back(LPBlock); + if (std::find(LandingPads.begin(), LandingPads.end(), LPBlock) == LandingPads.end()) { + LandingPads.push_back(LPBlock); + } LPBlock->Throwers.insert(this); } void BinaryBasicBlock::clearLandingPads() { for (auto *LPBlock : LandingPads) { auto count = LPBlock->Throwers.erase(this); - assert(count == 1); + assert(count == 1 && "Possible duplicate entry in LandingPads"); } LandingPads.clear(); } diff --git a/bolt/BinaryBasicBlock.h b/bolt/BinaryBasicBlock.h index ee9a066eca38..df230a081efc 100644 --- a/bolt/BinaryBasicBlock.h +++ b/bolt/BinaryBasicBlock.h @@ -376,6 +376,7 @@ public: /// Add instruction at the end of this basic block. /// Returns the index of the instruction in the Instructions vector of the BB. uint32_t addInstruction(MCInst &&Inst) { + adjustNumPseudos(Inst, 1); Instructions.emplace_back(Inst); return Instructions.size() - 1; } @@ -383,6 +384,7 @@ public: /// Add instruction at the end of this basic block. /// Returns the index of the instruction in the Instructions vector of the BB. uint32_t addInstruction(const MCInst &Inst) { + adjustNumPseudos(Inst, 1); Instructions.push_back(Inst); return Instructions.size() - 1; } @@ -435,6 +437,10 @@ public: uint64_t Count = 0, uint64_t MispredictedCount = 0); + void addSuccessor(BinaryBasicBlock *Succ, const BinaryBranchInfo &BI) { + addSuccessor(Succ, BI.Count, BI.MispredictedCount); + } + /// Add a range of successors. template void addSuccessors(Itr Begin, Itr End) { @@ -448,8 +454,7 @@ public: void addSuccessors(Itr Begin, Itr End, BrItr BrBegin, BrItr BrEnd) { assert(std::distance(Begin, End) == std::distance(BrBegin, BrEnd)); while (Begin != End) { - const auto BrInfo = *BrBegin++; - addSuccessor(*Begin++, BrInfo.Count, BrInfo.MispredictedCount); + addSuccessor(*Begin++, *BrBegin++); } } @@ -551,20 +556,22 @@ public: /// Replace an instruction with a sequence of instructions. Returns true /// if the instruction to be replaced was found and replaced. template - bool replaceInstruction(MCInst *Inst, Itr Begin, Itr End) { + bool replaceInstruction(const MCInst *Inst, Itr Begin, Itr End) { auto I = Instructions.end(); auto B = Instructions.begin(); while (I > B) { --I; if (&*I == Inst) { + adjustNumPseudos(*Inst, -1); Instructions.insert(Instructions.erase(I), Begin, End); + adjustNumPseudos(Begin, End, 1); return true; } } return false; } - bool replaceInstruction(MCInst *Inst, + bool replaceInstruction(const MCInst *Inst, const std::vector &Replacement) { return replaceInstruction(Inst, Replacement.begin(), Replacement.end()); } @@ -580,7 +587,8 @@ public: Instructions.pop_back(); } std::reverse(SplitInst.begin(), SplitInst.end()); - + NumPseudos = 0; + adjustNumPseudos(Instructions.begin(), Instructions.end(), 1); return SplitInst; } @@ -626,7 +634,18 @@ public: /// A simple dump function for debugging. void dump() const; + /// Validate successor invariants for this BB. + bool validateSuccessorInvariants(); + private: + void adjustNumPseudos(const MCInst &Inst, int Sign); + + template + void adjustNumPseudos(Itr Begin, Itr End, int Sign) { + while (Begin != End) { + adjustNumPseudos(*Begin++, Sign); + } + } /// Adds predecessor to the BB. Most likely you don't need to call this. void addPredecessor(BinaryBasicBlock *Pred); diff --git a/bolt/BinaryContext.cpp b/bolt/BinaryContext.cpp index 3430807502bd..15850dd5e929 100644 --- a/bolt/BinaryContext.cpp +++ b/bolt/BinaryContext.cpp @@ -370,6 +370,14 @@ void BinaryContext::printInstruction(raw_ostream &OS, } } + MIA->forEachAnnotation( + Instruction, + [&OS](const MCAnnotation *Annotation) { + OS << " # " << Annotation->getName() << ": "; + Annotation->print(OS); + } + ); + const DWARFDebugLine::LineTable *LineTable = Function && opts::PrintDebugInfo ? Function->getDWARFUnitLineTable().second : nullptr; diff --git a/bolt/BinaryFunction.cpp b/bolt/BinaryFunction.cpp index d6420962cc6a..e4d9c8d0a193 100644 --- a/bolt/BinaryFunction.cpp +++ b/bolt/BinaryFunction.cpp @@ -910,6 +910,7 @@ void BinaryFunction::disassemble(ArrayRef FunctionData) { auto &Ctx = BC.Ctx; auto &MIA = BC.MIA; + auto BranchDataOrErr = BC.DR.getFuncBranchData(getNames()); DWARFUnitLineTable ULT = getDWARFUnitLineTable(); @@ -1122,12 +1123,6 @@ void BinaryFunction::disassemble(ArrayRef FunctionData) { } } - Instruction.clear(); - Instruction.addOperand( - MCOperand::createExpr( - MCSymbolRefExpr::create(TargetSymbol, - MCSymbolRefExpr::VK_None, - *Ctx))); if (!IsCall) { // Add taken branch info. TakenBranches.emplace_back(Offset, TargetAddress - getAddress()); @@ -1136,6 +1131,21 @@ void BinaryFunction::disassemble(ArrayRef FunctionData) { // Add fallthrough branch info. FTBranches.emplace_back(Offset, Offset + Size); } + + const bool isIndirect = + ((IsCall || !IsCondBranch) && MIA->isIndirectBranch(Instruction)); + + Instruction.clear(); + Instruction.addOperand( + MCOperand::createExpr( + MCSymbolRefExpr::create(TargetSymbol, + MCSymbolRefExpr::VK_None, + *Ctx))); + + if (isIndirect && BranchDataOrErr) { + MIA->addAnnotation(Ctx.get(), Instruction, "IndirectBranchData", + Offset); + } } else { // Could not evaluate branch. Should be an indirect call or an // indirect branch. Bail out on the latter case. @@ -1145,7 +1155,14 @@ void BinaryFunction::disassemble(ArrayRef FunctionData) { default: llvm_unreachable("unexpected result"); case IndirectBranchType::POSSIBLE_TAIL_CALL: - MIA->convertJmpToTailCall(Instruction); + { + auto Result = MIA->convertJmpToTailCall(Instruction); + assert(Result); + if (BranchDataOrErr) { + MIA->addAnnotation(Ctx.get(), Instruction, "IndirectBranchData", + Offset); + } + } break; case IndirectBranchType::POSSIBLE_JUMP_TABLE: case IndirectBranchType::POSSIBLE_PIC_JUMP_TABLE: @@ -1155,8 +1172,19 @@ void BinaryFunction::disassemble(ArrayRef FunctionData) { case IndirectBranchType::UNKNOWN: // Keep processing. We'll do more checks and fixes in // postProcessIndirectBranches(). + if (BranchDataOrErr) { + MIA->addAnnotation(Ctx.get(), + Instruction, + "MaybeIndirectBranchData", + Offset); + } break; }; + } else if (MIA->isCall(Instruction)) { + if (BranchDataOrErr) { + MIA->addAnnotation(Ctx.get(), Instruction, "IndirectBranchData", + Offset); + } } // Indirect call. We only need to fix it if the operand is RIP-relative if (IsSimple && MIA->hasRIPOperand(Instruction)) { @@ -1248,6 +1276,8 @@ void BinaryFunction::postProcessJumpTables() { } bool BinaryFunction::postProcessIndirectBranches() { + auto BranchDataOrErr = BC.DR.getFuncBranchData(getNames()); + for (auto *BB : layout()) { for (auto &Instr : *BB) { if (!BC.MIA->isIndirectBranch(Instr)) @@ -1257,6 +1287,15 @@ bool BinaryFunction::postProcessIndirectBranches() { // it must be a tail call. if (layout_size() == 1) { BC.MIA->convertJmpToTailCall(Instr); + + if (BC.MIA->hasAnnotation(Instr, "MaybeIndirectBranchData")) { + auto Offset = + BC.MIA->getAnnotationAs(Instr, "MaybeIndirectBranchData"); + BC.MIA->addAnnotation(BC.Ctx.get(), + Instr, + "IndirectBranchData", + Offset); + } return true; } @@ -1336,6 +1375,15 @@ bool BinaryFunction::postProcessIndirectBranches() { return false; } BC.MIA->convertJmpToTailCall(Instr); + + if (BranchDataOrErr) { + auto Offset = + BC.MIA->getAnnotationAs(Instr, "MaybeIndirectBranchData"); + BC.MIA->addAnnotation(BC.Ctx.get(), + Instr, + "IndirectBranchData", + Offset); + } } } return true; @@ -2097,8 +2145,8 @@ void BinaryFunction::removeConditionalTailCalls() { // We have to add 1 byte as there's potentially an existing branch past // the end of the code as a result of __builtin_unreachable(). const BinaryBasicBlock *LastBB = BasicBlocks.back(); - uint64_t NewBlockOffset = LastBB->getOffset() + - BC.computeCodeSize(LastBB->begin(), LastBB->end()) + 1; + uint64_t NewBlockOffset = + LastBB->getOffset() + BC.computeCodeSize(LastBB->begin(), LastBB->end()) + 1; TailCallBB = addBasicBlock(NewBlockOffset, TCLabel); TailCallBB->addInstruction(TailCallInst); @@ -2184,6 +2232,7 @@ BinaryFunction::annotateCFIState(const MCInst *Stop) { } else if (CFI->getOperation() != MCCFIInstruction::OpGnuArgsSize) { State = HighestState; } + assert(State <= FrameInstructions.size()); ++Idx; if (&Instr == Stop) { CFIState.emplace_back(State); @@ -2315,9 +2364,9 @@ bool BinaryFunction::fixCFIState() { if (StackOffset != 0) { if (opts::Verbosity >= 1) { - errs() << " BOLT-WARNING: not possible to remember/recover state" + errs() << "BOLT-WARNING: not possible to remember/recover state" << " without corrupting CFI state stack in function " - << *this << "\n"; + << *this << " @ " << BB->getName() << "\n"; } return false; } @@ -2616,12 +2665,18 @@ void BinaryFunction::dumpGraph(raw_ostream& OS) const { BasicBlocksLayout.end(), BB); unsigned Layout = LayoutPos - BasicBlocksLayout.begin(); - OS << format("\"%s\" [label=\"%s\\n(O:%lu,I:%u,L%u)\"]\n", + const char* ColdStr = BB->isCold() ? " (cold)" : ""; + OS << format("\"%s\" [label=\"%s%s\\n(C:%lu,O:%lu,I:%u,L:%u:CFI:%u)\"]\n", BB->getName().data(), BB->getName().data(), + ColdStr, + (BB->ExecutionCount != BinaryBasicBlock::COUNT_NO_PROFILE + ? BB->ExecutionCount + : 0), BB->getOffset(), getIndex(BB), - Layout); + Layout, + BBCFIState[getIndex(BB)]); OS << format("\"%s\" [shape=box]\n", BB->getName().data()); if (opts::DotToolTipCode) { std::string Str; @@ -2673,7 +2728,7 @@ void BinaryFunction::dumpGraph(raw_ostream& OS) const { if (BB->getExecutionCount() != COUNT_NO_PROFILE && BI->MispredictedCount != BinaryBasicBlock::COUNT_INFERRED) { - OS << "\\n(M:" << BI->MispredictedCount << ",C:" << BI->Count << ")"; + OS << "\\n(C:" << BI->Count << ",M:" << BI->MispredictedCount << ")"; } else if (ExecutionCount != COUNT_NO_PROFILE && BI->Count != BinaryBasicBlock::COUNT_NO_PROFILE) { OS << "\\n(IC:" << BI->Count << ")"; @@ -2727,6 +2782,41 @@ void BinaryFunction::dumpGraphToFile(std::string Filename) const { dumpGraph(of); } +bool BinaryFunction::validateCFG() { + bool Valid = true; + for (auto *BB : BasicBlocks) { + Valid &= BB->validateSuccessorInvariants(); + if (!Valid) { + errs() << "BOLT-WARNING: CFG invalid @ " << BB->getName() << "\n"; + } + } + + if (!Valid) + return Valid; + + for (auto *BB : BasicBlocks) { + std::set Seen; + for (auto *LPBlock : BB->LandingPads) { + Valid &= Seen.count(LPBlock) == 0; + if (!Valid) { + errs() << "Duplicate LP seen " << LPBlock->getName() << "\n"; + break; + } + Seen.insert(LPBlock); + auto count = LPBlock->Throwers.count(BB); + Valid &= (count == 1); + if (!Valid) { + errs() << "Inconsistent landing pad detected " << LPBlock->getName() + << " is in LandingPads but not in " << BB->getName() + << "->Throwers\n"; + break; + } + } + } + + return Valid; +} + void BinaryFunction::fixBranches() { auto &MIA = BC.MIA; auto *Ctx = BC.Ctx.get(); @@ -2778,6 +2868,7 @@ void BinaryFunction::fixBranches() { // terminator) or more than 2 (switch table) don't require branch // instruction adjustments. } + assert(validateCFG()); } void BinaryFunction::splitFunction() { @@ -3257,6 +3348,7 @@ void BinaryFunction::updateLayout(BinaryBasicBlock* Start, auto Begin = &BasicBlocks[getIndex(Start) + 1]; auto End = &BasicBlocks[getIndex(Start) + NumNewBlocks + 1]; BasicBlocksLayout.insert(Pos + 1, Begin, End); + updateLayoutIndices(); } void BinaryFunction::updateLayout(LayoutType Type, @@ -3265,6 +3357,7 @@ void BinaryFunction::updateLayout(LayoutType Type, // Recompute layout with original parameters. BasicBlocksLayout = BasicBlocks; modifyLayout(Type, MinBranchClusters, Split); + updateLayoutIndices(); } bool BinaryFunction::isSymbolValidInScope(const SymbolRef &Symbol, diff --git a/bolt/BinaryFunction.h b/bolt/BinaryFunction.h index b7c0e7615f64..2d74ec3ca7e4 100644 --- a/bolt/BinaryFunction.h +++ b/bolt/BinaryFunction.h @@ -80,7 +80,7 @@ class DynoStats { Fadd(FORWARD_COND_BRANCHES_TAKEN, BACKWARD_COND_BRANCHES_TAKEN))\ D(ALL_CONDITIONAL, "all conditional branches",\ Fadd(FORWARD_COND_BRANCHES, BACKWARD_COND_BRANCHES))\ - D(LAST_DYNO_STAT, "", Fn) + D(LAST_DYNO_STAT, "", 0) public: #define D(name, ...) name, @@ -839,6 +839,9 @@ public: return BC; } + /// Attempt to validate CFG invariants. + bool validateCFG(); + /// Return dynostats for the function. /// /// The function relies on branch instructions being in-sync with CFG for @@ -1161,9 +1164,8 @@ public: /// Insert the BBs contained in NewBBs into the basic blocks for this /// function. Update the associated state of all blocks as needed, i.e. - /// BB offsets, BB indices, and optionally CFI state. The new BBs are - /// inserted after Start. This operation could affect fallthrough branches - /// for Start. + /// BB offsets and BB indices. The new BBs are inserted after Start. + /// This operation could affect fallthrough branches for Start. /// void insertBasicBlocks( BinaryBasicBlock *Start, diff --git a/bolt/BinaryPassManager.cpp b/bolt/BinaryPassManager.cpp index 8bc640f12f12..25464cc00364 100644 --- a/bolt/BinaryPassManager.cpp +++ b/bolt/BinaryPassManager.cpp @@ -17,9 +17,9 @@ using namespace llvm; namespace opts { -extern llvm::cl::opt PrintAll; -extern llvm::cl::opt DumpDotAll; -extern llvm::cl::opt DynoStatsAll; +extern cl::opt PrintAll; +extern cl::opt DumpDotAll; +extern cl::opt DynoStatsAll; llvm::cl::opt TimeOpts("time-opts", cl::desc("print time spent in each optimization"), @@ -37,6 +37,11 @@ OptimizeBodylessFunctions( cl::desc("optimize functions that just do a tail call"), cl::ZeroOrMore); +static cl::opt +IndirectCallPromotion("indirect-call-promotion", + cl::desc("indirect call promotion"), + cl::ZeroOrMore); + static cl::opt InlineSmallFunctions( "inline-small-functions", @@ -118,6 +123,12 @@ PrintICF("print-icf", cl::ZeroOrMore, cl::Hidden); +static cl::opt +PrintICP("print-icp", + cl::desc("print functions after indirect call promotion"), + cl::ZeroOrMore, + cl::Hidden); + static cl::opt PrintInline("print-inline", cl::desc("print functions after inlining optimization"), @@ -197,7 +208,7 @@ void BinaryFunctionPassManager::runAllPasses( ) { BinaryFunctionPassManager Manager(BC, Functions, LargeFunctions); - // Here we manage dependencies/order manually, since passes are ran in the + // Here we manage dependencies/order manually, since passes are run in the // order they're registered. // Run this pass first to use stats for the original functions. @@ -205,6 +216,12 @@ void BinaryFunctionPassManager::runAllPasses( Manager.registerPass(llvm::make_unique(PrintICF)); + Manager.registerPass(llvm::make_unique(PrintICP), + opts::IndirectCallPromotion); + + Manager.registerPass(llvm::make_unique(PrintPeepholes), + opts::Peepholes); + Manager.registerPass(llvm::make_unique(PrintInline), opts::InlineSmallFunctions); diff --git a/bolt/BinaryPassManager.h b/bolt/BinaryPassManager.h index 747ceef59e0b..037b32de751c 100644 --- a/bolt/BinaryPassManager.h +++ b/bolt/BinaryPassManager.h @@ -60,7 +60,7 @@ private: /// Runs all enabled implemented passes on all functions. static void runAllPasses(BinaryContext &BC, std::map &Functions, - std::set &largeFunctions); + std::set &LargeFunctions); }; diff --git a/bolt/BinaryPasses.cpp b/bolt/BinaryPasses.cpp index 2b5ecb7f9e88..d63853c1e85a 100644 --- a/bolt/BinaryPasses.cpp +++ b/bolt/BinaryPasses.cpp @@ -65,6 +65,53 @@ AggressiveInlining("aggressive-inlining", cl::ZeroOrMore, cl::Hidden); +static cl::opt +IndirectCallPromotionThreshold( + "indirect-call-promotion-threshold", + cl::desc("threshold for optimizing a frequently taken indirect call"), + cl::init(90), + cl::ZeroOrMore); + +static cl::opt +IndirectCallPromotionMispredictThreshold( + "indirect-call-promotion-mispredict-threshold", + cl::desc("misprediction threshold for skipping ICP on an " + "indirect call"), + cl::init(2), + cl::ZeroOrMore); + +static cl::opt +IndirectCallPromotionUseMispredicts( + "indirect-call-promotion-use-mispredicts", + cl::desc("use misprediction frequency for determining whether or not ICP " + "should be applied at a callsite. The " + "-indirect-call-promotion-mispredict-threshold value will be used " + "by this heuristic"), + cl::ZeroOrMore); + +static cl::opt +IndirectCallPromotionTopN( + "indirect-call-promotion-topn", + cl::desc("number of targets to consider when doing indirect " + "call promotion"), + cl::init(1), + cl::ZeroOrMore); + +static cl::list +ICPFuncsList("icp-funcs", + cl::CommaSeparated, + cl::desc("list of functions to enable ICP for"), + cl::value_desc("func1,func2,func3,..."), + cl::Hidden); + +static cl::opt +ICPOldCodeSequence( + "icp-old-code-sequence", + cl::desc("use old code sequence for promoted calls"), + cl::init(false), + cl::ZeroOrMore, + cl::Hidden); + static cl::opt ReorderBlocks( "reorder-blocks", @@ -274,9 +321,11 @@ void InlineSmallFunctions::findInliningCandidates( continue; auto &BB = *Function.begin(); const auto &LastInstruction = *BB.rbegin(); - // Check if the function is small enough and doesn't do a tail call. + // Check if the function is small enough, doesn't do a tail call + // and doesn't throw exceptions. if (BB.size() > 0 && BB.getNumNonPseudos() <= kMaxInstructions && + BB.lp_empty() && BC.MIA->isReturn(LastInstruction) && !BC.MIA->isTailCall(LastInstruction)) { InliningCandidates.insert(&Function); @@ -634,10 +683,7 @@ InlineSmallFunctions::inlineCall( CallerBB->getExecutionCount()); } } - unsigned NumBlocksToAdd = InlinedInstance.size(); CallerFunction.insertBasicBlocks(CallerBB, std::move(InlinedInstance)); - CallerFunction.updateLayout(CallerBB, NumBlocksToAdd); - CallerFunction.fixBranches(); return std::make_pair(AfterInlinedBB, AfterInlinedIstrIndex); } @@ -1020,6 +1066,14 @@ uint64_t SimplifyConditionalTailCalls::fixTailCalls(BinaryContext &BC, MCInst *CondBranch = nullptr; MCInst *UncondBranch = nullptr; auto Result = PredBB->analyzeBranch(TBB, FBB, CondBranch, UncondBranch); + + // analyzeBranch can fail due to unusual branch instructions, e.g. jrcxz + if (!Result) { + DEBUG(dbgs() << "analyzeBranch failed in SCTC in block:\n"; + PredBB->dump()); + continue; + } + assert(Result && "internal error analyzing conditional branch"); assert(CondBranch && "conditional branch expected"); @@ -1116,6 +1170,10 @@ void Peepholes::shortenInstructions(BinaryContext &BC, } } +void debugDump(BinaryFunction *BF) { + BF->dump(); +} + // This peephole fixes jump instructions that jump to another basic // block with a single jump instruction, e.g. // @@ -1140,7 +1198,7 @@ void Peepholes::fixDoubleJumps(BinaryContext &BC, return; if (Succ) { - Pred->replaceSuccessor(&BB, Succ, BinaryBasicBlock::COUNT_NO_PROFILE); + Pred->replaceSuccessor(&BB, Succ); } else { // Succ will be null in the tail call case. In this case we // need to explicitly add a tail call instruction. @@ -1216,8 +1274,8 @@ void Peepholes::runOnFunctions(BinaryContext &BC, addTailcallTraps(BC, Function); } } - outs() << "BOLT-INFO: " << NumDoubleJumps << " double jumps patched.\n"; - outs() << "BOLT-INFO: " << TailCallTraps << " tail call traps inserted.\n"; + outs() << "BOLT-INFO: Peephole: " << NumDoubleJumps << " double jumps patched.\n"; + outs() << "BOLT-INFO: Peephole: " << TailCallTraps << " tail call traps inserted.\n"; } bool SimplifyRODataLoads::simplifyRODataLoads( @@ -1550,5 +1608,541 @@ void PrintSortedBy::runOnFunctions( } } +// Get list of targets for a given call sorted by most frequently +// called first. +std::vector IndirectCallPromotion::getCallTargets( + BinaryContext &BC, + const FuncBranchData &BranchData, + const MCInst &Inst +) const { + auto Offset = BC.MIA->getAnnotationAs(Inst, "IndirectBranchData"); + auto Branches = BranchData.getBranchRange(Offset); + std::vector Targets(Branches.begin(), Branches.end()); + + // Sort by most commonly called targets. + std::sort(Targets.begin(), Targets.end(), + [](const BranchInfo &A, const BranchInfo &B) { + return A.Branches > B.Branches; + }); + + return Targets; +} + +std::vector> +IndirectCallPromotion::findCallTargetSymbols( + BinaryContext &BC, + const std::vector &Targets, + const size_t N +) const { + std::vector> SymTargets; + + for (size_t I = 0; I < N; ++I) { + MCSymbol* Symbol = nullptr; + uint64_t Addr = 0; + if (Targets[I].To.IsSymbol) { + auto itr = BC.GlobalSymbols.find(Targets[I].To.Name); + if (itr == BC.GlobalSymbols.end()) { + // punt if we can't find a symbol. + break; + } + Symbol = BC.getOrCreateGlobalSymbol(itr->second, "FUNCat"); + assert(Symbol); + } else { + Addr = Targets[I].To.Offset; + } + SymTargets.push_back(std::make_pair(Symbol, Addr)); + } + + return SymTargets; +} + +std::vector> +IndirectCallPromotion::rewriteCall(BinaryContext &BC, + BinaryFunction &Function, + BinaryBasicBlock *IndCallBlock, + const MCInst &CallInst, + MCInstrAnalysis::ICPdata &&ICPcode) const { + // Create new basic blocks with correct code in each one first. + std::vector> NewBBs; + const bool IsTailCall = BC.MIA->isTailCall(CallInst); + + // Move instructions from the tail of the original call block + // to the merge block. + + // Remember any pseudo instructions following a tail call. These + // must be preserved and moved to the original block. + std::vector TailInsts; + const auto *TailInst= &CallInst; + if (IsTailCall) { + while (TailInst + 1 < &(*IndCallBlock->end()) && + BC.MII->get((TailInst + 1)->getOpcode()).isPseudo()) { + TailInsts.push_back(*++TailInst); + } + } + + auto MovedInst = IndCallBlock->splitInstructions(&CallInst); + + IndCallBlock->replaceInstruction(&CallInst, ICPcode.front().second); + IndCallBlock->addInstructions(TailInsts.begin(), TailInsts.end()); + + for (auto Itr = ICPcode.begin() + 1; Itr != ICPcode.end(); ++Itr) { + auto &Sym = Itr->first; + auto &Insts = Itr->second; + assert(Sym); + auto TBB = Function.createBasicBlock(0, Sym); + for (auto &Inst : Insts) { // sanitize new instructions. + if (BC.MIA->isCall(Inst)) + BC.MIA->removeAnnotation(Inst, "IndirectBranchData"); + } + TBB->addInstructions(Insts.begin(), Insts.end()); + NewBBs.emplace_back(std::move(TBB)); + } + + // Move tail of instructions from after the original call to + // the merge block. + if (!IsTailCall) { + NewBBs.back()->addInstructions(MovedInst.begin(), MovedInst.end()); + } + + return NewBBs; +} + +BinaryBasicBlock *IndirectCallPromotion::fixCFG( + BinaryContext &BC, + BinaryFunction &Function, + BinaryBasicBlock *IndCallBlock, + const bool IsTailCall, + IndirectCallPromotion::BasicBlocksVector &&NewBBs, + const std::vector &Targets +) const { + BinaryBasicBlock *MergeBlock = !IsTailCall ? NewBBs.back().get() : nullptr; + assert(NewBBs.size() >= 2); + assert(NewBBs.size() % 2 == 1 || IndCallBlock->succ_empty()); + assert(NewBBs.size() % 2 == 1 || IsTailCall); + using BinaryBranchInfo = BinaryBasicBlock::BinaryBranchInfo; + + if (MergeBlock) { + std::vector OldSucc(IndCallBlock->successors().begin(), + IndCallBlock->successors().end()); + std::vector BranchInfo(IndCallBlock->branch_info_begin(), + IndCallBlock->branch_info_end()); + + // Remove all successors from block doing the indirect call. + IndCallBlock->removeSuccessors(OldSucc.begin(), OldSucc.end()); + assert(IndCallBlock->succ_empty()); + + // Move them to the merge block. + MergeBlock->addSuccessors(OldSucc.begin(), + OldSucc.end(), + BranchInfo.begin(), + BranchInfo.end()); + + // Update the execution count on the MergeBlock. + MergeBlock->setExecutionCount(IndCallBlock->getExecutionCount()); + } + + // Scale indirect call counts to the execution count of the original + // basic block containing the indirect call. + uint64_t TotalIndirectBranches = 0; + uint64_t TotalIndirectMispreds = 0; + for (const auto &BI : Targets) { + TotalIndirectBranches += BI.Branches; + TotalIndirectMispreds += BI.Mispreds; + } + + uint64_t TotalCount = 0; + uint64_t TotalMispreds = 0; + + if (Function.hasValidProfile()) { + TotalCount = IndCallBlock->getExecutionCount(); + TotalMispreds = + TotalCount * ((double)TotalIndirectMispreds / TotalIndirectBranches); + assert(TotalCount != BinaryBasicBlock::COUNT_NO_PROFILE); + } + + // New BinaryBranchInfo scaled to the execution count of the original BB. + std::vector BBI; + for (auto Itr = Targets.begin(); Itr != Targets.end(); ++Itr) { + BBI.push_back( + BinaryBranchInfo{ + uint64_t(TotalCount * ((double)Itr->Branches / TotalIndirectBranches)), + uint64_t(TotalMispreds * ((double)Itr->Mispreds / TotalIndirectMispreds)) + } + ); + } + auto BI = BBI.begin(); + auto updateCurrentBranchInfo = [&]{ + assert(BI < BBI.end()); + TotalCount -= BI->Count; + TotalMispreds -= BI->MispredictedCount; + ++BI; + }; + + // Fix up successors and execution counts. + updateCurrentBranchInfo(); + IndCallBlock->addSuccessor(NewBBs[1].get(), TotalCount); // uncond branch + IndCallBlock->addSuccessor(NewBBs[0].get(), BBI[0]); // conditional branch + + size_t Adj = 1 + (!IsTailCall ? 1 : 0); + for (size_t I = 0; I < NewBBs.size() - Adj; ++I) { + assert(TotalCount <= IndCallBlock->getExecutionCount() || + TotalCount <= uint64_t(TotalIndirectBranches)); + uint64_t ExecCount = BBI[(I+1)/2].Count; + NewBBs[I]->setCanOutline(IndCallBlock->canOutline()); + NewBBs[I]->setIsCold(IndCallBlock->isCold()); + if (I % 2 == 0) { + if (MergeBlock) { + NewBBs[I]->addSuccessor(MergeBlock, BBI[(I+1)/2].Count); // uncond + } + } else { + assert(I + 2 < NewBBs.size()); + updateCurrentBranchInfo(); + NewBBs[I]->addSuccessor(NewBBs[I+2].get(), TotalCount); // uncond branch + NewBBs[I]->addSuccessor(NewBBs[I+1].get(), BBI[(I+1)/2]); // cond. branch + ExecCount += TotalCount; + } + NewBBs[I]->setExecutionCount(ExecCount); + } + + // Arrange for the MergeBlock to be the fallthrough for the first + // promoted call block. + if (MergeBlock) { + MergeBlock->setCanOutline(IndCallBlock->canOutline()); + MergeBlock->setIsCold(IndCallBlock->isCold()); + std::unique_ptr MBPtr; + std::swap(MBPtr, NewBBs.back()); + NewBBs.pop_back(); + NewBBs.emplace(NewBBs.begin() + 1, std::move(MBPtr)); + // TODO: is COUNT_FALLTHROUGH_EDGE the right thing here? + NewBBs.back()->addSuccessor(MergeBlock, TotalCount); // uncond branch + } + + // cold call block + // TODO: should be able to outline/cold this block. + NewBBs.back()->setExecutionCount(TotalCount); + NewBBs.back()->setCanOutline(IndCallBlock->canOutline()); + NewBBs.back()->setIsCold(IndCallBlock->isCold()); + + // update BB and BB layout. + Function.insertBasicBlocks(IndCallBlock, std::move(NewBBs)); + assert(Function.validateCFG()); + + return MergeBlock; +} + +size_t +IndirectCallPromotion::canPromoteCallsite(const BinaryBasicBlock *BB, + const MCInst &Inst, + const std::vector &Targets, + uint64_t NumCalls) { + // If we have no targets (or no calls), skip this callsite. + if (Targets.empty() || !NumCalls) { + if (opts::Verbosity >= 1) { + const auto InstIdx = &Inst - &(*BB->begin()); + outs() << "BOLT-INFO: ICP failed in " << *BB->getFunction() << " @ " + << InstIdx << " in " << BB->getName() + << ", calls = " << NumCalls + << ", targets empty or NumCalls == 0.\n"; + } + return 0; + } + + const auto TrialN = std::min(size_t(opts::IndirectCallPromotionTopN), + Targets.size()); + + if (!opts::ICPFuncsList.empty()) { + for (auto &Name : opts::ICPFuncsList) { + if (BB->getFunction()->hasName(Name)) + return TrialN; + } + return 0; + } + + // Pick the top N targets. + uint64_t TotalCallsTopN = 0; + uint64_t TotalMispredictsTopN = 0; + size_t N = 0; + + if (opts::IndirectCallPromotionUseMispredicts) { + // Count total number of mispredictions for (at most) the top N targets. + // We may choose a smaller N (TrialN vs. N) if the frequency threshold + // is exceeded by fewer targets. + double Threshold = double(opts::IndirectCallPromotionMispredictThreshold); + for (size_t I = 0; I < TrialN && Threshold > 0; ++I, ++N) { + const auto Frequency = (100.0 * Targets[I].Mispreds) / NumCalls; + TotalMispredictsTopN += Targets[I].Mispreds; + TotalNumFrequentCalls += Targets[I].Branches; + Threshold -= Frequency; + } + + // Compute the misprediction frequency of the top N call targets. If this + // frequency is greater than the threshold, we should try ICP on this callsite. + const double TopNFrequency = (100.0 * TotalMispredictsTopN) / NumCalls; + + if (TopNFrequency == 0 || + TopNFrequency < opts::IndirectCallPromotionMispredictThreshold) { + if (opts::Verbosity >= 1) { + const auto InstIdx = &Inst - &(*BB->begin()); + outs() << "BOLT-INFO: ICP failed in " << *BB->getFunction() << " @ " + << InstIdx << " in " << BB->getName() << ", calls = " + << NumCalls << ", top N mis. frequency " + << format("%.1f", TopNFrequency) << "% < " + << opts::IndirectCallPromotionMispredictThreshold << "%\n"; + } + return 0; + } + } else { + // Count total number of calls for (at most) the top N targets. + // We may choose a smaller N (TrialN vs. N) if the frequency threshold + // is exceeded by fewer targets. + double Threshold = double(opts::IndirectCallPromotionThreshold); + for (size_t I = 0; I < TrialN && Threshold > 0; ++I, ++N) { + const auto Frequency = (100.0 * Targets[I].Branches) / NumCalls; + TotalCallsTopN += Targets[I].Branches; + TotalMispredictsTopN += Targets[I].Mispreds; + TotalNumFrequentCalls += Targets[I].Branches; + Threshold -= Frequency; + } + + // Compute the frequency of the top N call targets. If this frequency + // is greater than the threshold, we should try ICP on this callsite. + const double TopNFrequency = (100.0 * TotalCallsTopN) / NumCalls; + + if (TopNFrequency == 0 || + TopNFrequency < opts::IndirectCallPromotionThreshold) { + if (opts::Verbosity >= 1) { + const auto InstIdx = &Inst - &(*BB->begin()); + outs() << "BOLT-INFO: ICP failed in " << *BB->getFunction() << " @ " + << InstIdx << " in " << BB->getName() << ", calls = " + << NumCalls << ", top N frequency " + << format("%.1f", TopNFrequency) << "% < " + << opts::IndirectCallPromotionThreshold << "%\n"; + } + return 0; + } + + // Compute the misprediction frequency of the top N call targets. If + // this frequency is less than the threshold, we should skip ICP at + // this callsite. + const double TopNMispredictFrequency = + (100.0 * TotalMispredictsTopN) / NumCalls; + + if (TopNMispredictFrequency < + opts::IndirectCallPromotionMispredictThreshold) { + if (opts::Verbosity >= 1) { + const auto InstIdx = &Inst - &(*BB->begin()); + outs() << "BOLT-INFO: ICP failed in " << *BB->getFunction() << " @ " + << InstIdx << " in " << BB->getName() << ", calls = " + << NumCalls << ", top N mispredict frequency " + << format("%.1f", TopNMispredictFrequency) << "% < " + << opts::IndirectCallPromotionMispredictThreshold << "%\n"; + } + return 0; + } + } + + return N; +} + +void +IndirectCallPromotion::printCallsiteInfo(const BinaryBasicBlock *BB, + const MCInst &Inst, + const std::vector &Targets, + const size_t N, + uint64_t NumCalls) const { + auto &BC = BB->getFunction()->getBinaryContext(); + const auto InstIdx = &Inst - &(*BB->begin()); + bool Separator = false; + + outs() << "BOLT-INFO: ICP candidate branch info: " + << *BB->getFunction() << " @ " << InstIdx + << " in " << BB->getName() + << " -> calls = " << NumCalls + << (BC.MIA->isTailCall(Inst) ? " (tail)" : ""); + for (size_t I = 0; I < N; I++) { + const auto Frequency = 100.0 * Targets[I].Branches / NumCalls; + const auto MisFrequency = 100.0 * Targets[I].Mispreds / NumCalls; + outs() << (Separator ? " | " : ", "); + Separator = true; + outs() << Targets[I].To.Name + << ", calls = " << Targets[I].Branches + << ", mispreds = " << Targets[I].Mispreds + << ", taken freq = " << format("%.1f", Frequency) << "%" + << ", mis. freq = " << format("%.1f", MisFrequency) << "%"; + } + outs() << "\n"; + + DEBUG({ + dbgs() << "BOLT-INFO: ICP original call instruction:\n"; + BC.printInstruction(dbgs(), Inst, Targets[0].From.Offset, nullptr, true); + }); +} + +void IndirectCallPromotion::runOnFunctions( + BinaryContext &BC, + std::map &BFs, + std::set &LargeFunctions +) { + for (auto &BFIt : BFs) { + auto &Function = BFIt.second; + + if (!Function.isSimple() || !opts::shouldProcess(Function)) + continue; + + const auto BranchDataOrErr = BC.DR.getFuncBranchData(Function.getNames()); + if (const auto EC = BranchDataOrErr.getError()) { + DEBUG(dbgs() << "BOLT-INFO: no branch data found for \"" + << Function << "\"\n"); + continue; + } + const FuncBranchData &BranchData = BranchDataOrErr.get(); + const bool HasLayout = !Function.layout_empty(); + + // Note: this is not just counting calls. + TotalCalls += BranchData.ExecutionCount; + + // Total number of indirect calls issued from the current Function. + // (a fraction of TotalIndirectCalls) + uint64_t FuncTotalIndirectCalls = 0; + + std::vector BBs; + for (auto &BB : Function) { + // Skip indirect calls in cold blocks. + if (!HasLayout || !Function.isSplit() || !BB.isCold()) { + BBs.push_back(&BB); + } + } + + while (!BBs.empty()) { + auto *BB = BBs.back(); + BBs.pop_back(); + + for (unsigned Idx = 0; Idx < BB->size(); ++Idx) { + auto &Inst = BB->getInstructionAtIndex(Idx); + const auto InstIdx = &Inst - &(*BB->begin()); + + if (!BC.MIA->hasAnnotation(Inst, "IndirectBranchData")) + continue; + + assert(BC.MIA->isCall(Inst)); + + ++TotalIndirectCallsites; + + const auto Targets = getCallTargets(BC, BranchData, Inst); + + // Compute the total number of calls from this particular callsite. + uint64_t NumCalls = 0; + for (const auto &BInfo : Targets) { + NumCalls += BInfo.Branches; + } + FuncTotalIndirectCalls += NumCalls; + + // Should this callsite be optimized? Return the number of targets + // to use when promoting this call. A value of zero means to skip + // this callsite. + size_t N = canPromoteCallsite(BB, Inst, Targets, NumCalls); + + if (!N) + continue; + + if (opts::Verbosity >= 1) { + printCallsiteInfo(BB, Inst, Targets, N, NumCalls); + } + + // Find MCSymbols or absolute addresses for each call target. + const auto SymTargets = findCallTargetSymbols(BC, Targets, N); + + // If we can't resolve any of the target symbols, punt on this callsite. + if (SymTargets.size() < N) { + const auto LastTarget = SymTargets.size(); + if (opts::Verbosity >= 1) { + outs() << "BOLT-INFO: ICP failed to find target symbol for " + << Targets[LastTarget].To.Name << " in " + << Function << " @ " << InstIdx << " in " + << BB->getName() << ", calls = " << NumCalls << "\n"; + } + continue; + } + + // Generate new promoted call code for this callsite. + auto ICPcode = + BC.MIA->indirectCallPromotion(Inst, + SymTargets, + opts::ICPOldCodeSequence, + BC.Ctx.get()); + + if (ICPcode.empty()) { + if (opts::Verbosity >= 1) { + outs() << "BOLT-INFO: ICP failed in " << Function << " @ " + << InstIdx << " in " << BB->getName() + << ", calls = " << NumCalls + << ", unable to generate promoted call code.\n"; + } + continue; + } + + DEBUG({ + auto Offset = Targets[0].From.Offset; + dbgs() << "BOLT-INFO: ICP indirect call code:\n"; + for (const auto &entry : ICPcode) { + const auto &Sym = entry.first; + const auto &Insts = entry.second; + if (Sym) dbgs() << Sym->getName() << ":\n"; + Offset = BC.printInstructions(dbgs(), + Insts.begin(), + Insts.end(), + Offset); + } + dbgs() << "---------------------------------------------------\n"; + }); + + // Rewrite the CFG with the newly generated ICP code. + const bool IsTailCall = BC.MIA->isTailCall(Inst); + auto NewBBs = rewriteCall(BC, Function, BB, Inst, std::move(ICPcode)); + + // Fix the CFG after inserting the new basic blocks. + auto MergeBlock = fixCFG(BC, Function, BB, IsTailCall, + std::move(NewBBs), Targets); + + // Since the tail of the original block was split off and it may contain + // additional indirect calls, we must add the merge block to the set of + // blocks to process. + if (MergeBlock) { + BBs.push_back(MergeBlock); + } + + if (opts::Verbosity >= 1) { + outs() << "BOLT-INFO: ICP succeeded in " + << Function << " @ " << InstIdx + << " in " << BB->getName() + << " -> calls = " << NumCalls << "\n"; + } + + ++TotalOptimizedIndirectCallsites; + + Modified.insert(&Function); + } + } + TotalIndirectCalls += FuncTotalIndirectCalls; + } + + outs() << "BOLT-INFO: ICP total indirect callsites = " + << TotalIndirectCallsites + << "\n" + << "BOLT-INFO: ICP total number of calls = " + << TotalCalls + << "\n" + << "BOLT-INFO: ICP percentage of calls that are indirect = " + << format("%.1f", (100.0 * TotalIndirectCalls) / TotalCalls) + << "%\n" + << "BOLT-INFO: ICP percentage of indirect calls that can be optimized = " + << format("%.1f", (100.0 * TotalNumFrequentCalls) / TotalIndirectCalls) + << "%\n" + << "BOLT-INFO: ICP percentage of indirect calls that are optimized = " + << format("%.1f", (100.0 * TotalOptimizedIndirectCallsites) / + TotalIndirectCallsites) + << "%\n"; +} + } // namespace bolt } // namespace llvm diff --git a/bolt/BinaryPasses.h b/bolt/BinaryPasses.h index 12daa65a4678..5e8fafec7a91 100644 --- a/bolt/BinaryPasses.h +++ b/bolt/BinaryPasses.h @@ -395,6 +395,154 @@ class PrintSortedBy : public BinaryFunctionPass { std::set &LargeFunctions) override; }; +/// Optimize indirect calls. +/// The indirect call promotion pass visits each indirect call and +/// examines the BranchData for each. If the most frequent targets +/// from that callsite exceed the specified threshold (default 90%), +/// the call is promoted. Otherwise, it is ignored. By default, +/// only one target is considered at each callsite. +/// +/// When an candidate callsite is processed, we modify the callsite +/// to test for the most common call targets before calling through +/// the original generic call mechanism. +/// +/// The CFG and layout are modified by ICP. +/// +/// A few new command line options have been added: +/// -indirect-call-promotion +/// -indirect-call-promotion-threshold= +/// -indirect-call-promotion-mispredict-threshold= +/// -indirect-call-promotion-topn= +/// +/// The threshold is the minimum frequency of a call target needed +/// before ICP is triggered. +/// +/// The mispredict threshold is used to disable the optimization at +/// any callsite where the branch predictor does a good enough job +/// that ICP wouldn't help regardless of the frequency of the most +/// common target. +/// +/// The topn option controls the number of targets to consider for +/// each callsite, e.g. ICP is triggered if topn=2 and the total +/// frequency of the top two call targets exceeds the threshold. +/// +/// The minimize code size option controls whether or not the hot +/// calls are to registers (callq %r10) or to function addresses +/// (callq $foo). +/// +/// Example of ICP: +/// +/// C++ code: +/// +/// int B_count = 0; +/// int C_count = 0; +/// +/// struct A { virtual void foo() = 0; } +/// struct B : public A { virtual void foo() { ++B_count; }; }; +/// struct C : public A { virtual void foo() { ++C_count; }; }; +/// +/// A* a = ... +/// a->foo(); +/// ... +/// +/// original assembly: +/// +/// B0: 49 8b 07 mov (%r15),%rax +/// 4c 89 ff mov %r15,%rdi +/// ff 10 callq *(%rax) +/// 41 83 e6 01 and $0x1,%r14d +/// 4d 89 e6 mov %r12,%r14 +/// 4c 0f 44 f5 cmove %rbp,%r14 +/// 4c 89 f7 mov %r14,%rdi +/// ... +/// +/// after ICP: +/// +/// B0: 49 8b 07 mov (%r15),%rax +/// 4c 89 ff mov %r15,%rdi +/// 48 81 38 e0 0b 40 00 cmpq $B::foo,(%rax) +/// 75 29 jne B3 +/// B1: e8 45 03 00 00 callq $B::foo +/// B2: 41 83 e6 01 and $0x1,%r14d +/// 4d 89 e6 mov %r12,%r14 +/// 4c 0f 44 f5 cmove %rbp,%r14 +/// 4c 89 f7 mov %r14,%rdi +/// ... +/// +/// B3: ff 10 callq *(%rax) +/// eb d6 jmp B2 +/// +class IndirectCallPromotion : public BinaryFunctionPass { + using BasicBlocksVector = std::vector>; + std::unordered_set Modified; + // Total number of calls from all callsites. + uint64_t TotalCalls{0}; + + // Total number of indirect calls from all callsites. + // (a fraction of TotalCalls) + uint64_t TotalIndirectCalls{0}; + + // Total number of callsites that use indirect calls. + // (the total number of callsites is not recorded) + uint64_t TotalIndirectCallsites{0}; + + // Total number of indirect callsites that are optimized by ICP. + // (a fraction of TotalIndirectCallsites) + uint64_t TotalOptimizedIndirectCallsites{0}; + + // Total number of indirect calls that are optimized by ICP. + // (a fraction of TotalCalls) + uint64_t TotalNumFrequentCalls{0}; + + std::vector getCallTargets(BinaryContext &BC, + const FuncBranchData &BranchData, + const MCInst &Inst) const; + + size_t canPromoteCallsite(const BinaryBasicBlock *BB, + const MCInst &Inst, + const std::vector &Targets, + uint64_t NumCalls); + + void printCallsiteInfo(const BinaryBasicBlock *BB, + const MCInst &Inst, + const std::vector &Targets, + const size_t N, + uint64_t NumCalls) const; + + std::vector> + findCallTargetSymbols(BinaryContext &BC, + const std::vector &Targets, + const size_t N) const; + + std::vector> + rewriteCall(BinaryContext &BC, + BinaryFunction &Function, + BinaryBasicBlock *IndCallBlock, + const MCInst &CallInst, + MCInstrAnalysis::ICPdata &&ICPcode) const; + + BinaryBasicBlock *fixCFG(BinaryContext &BC, + BinaryFunction &Function, + BinaryBasicBlock *IndCallBlock, + const bool IsTailCall, + BasicBlocksVector &&NewBBs, + const std::vector &Targets) const; + + public: + explicit IndirectCallPromotion(const cl::opt &PrintPass) + : BinaryFunctionPass(PrintPass) { } + + const char *getName() const { + return "indirect-call-promotion"; + } + bool shouldPrint(const BinaryFunction &BF) const override { + return BinaryFunctionPass::shouldPrint(BF) && Modified.count(&BF) > 0; + } + void runOnFunctions(BinaryContext &BC, + std::map &BFs, + std::set &LargeFunctions) override; +}; + } // namespace bolt } // namespace llvm diff --git a/bolt/DataReader.cpp b/bolt/DataReader.cpp index cee08aec39c5..20d5e4e4d987 100644 --- a/bolt/DataReader.cpp +++ b/bolt/DataReader.cpp @@ -19,6 +19,21 @@ namespace llvm { namespace bolt { +iterator_range +FuncBranchData::getBranchRange(uint64_t From) const { + assert(std::is_sorted(Data.begin(), Data.end())); + struct Compare { + bool operator()(const BranchInfo &BI, const uint64_t Val) const { + return BI.From.Offset < Val; + } + bool operator()(const uint64_t Val, const BranchInfo &BI) const { + return Val < BI.From.Offset; + } + }; + auto Range = std::equal_range(Data.begin(), Data.end(), From, Compare()); + return iterator_range(Range.first, Range.second); +} + void BranchInfo::mergeWith(const BranchInfo &BI) { // Merge branch and misprediction counts. diff --git a/bolt/DataReader.h b/bolt/DataReader.h index aea058f557a6..1f69424f704a 100644 --- a/bolt/DataReader.h +++ b/bolt/DataReader.h @@ -122,6 +122,10 @@ struct FuncBranchData { /// returned. If the offset corresponds to an indirect call the behavior is /// undefined. ErrorOr getDirectCallBranch(uint64_t From) const; + + /// Find all the branches originating at From. + iterator_range getBranchRange( + uint64_t From) const; }; //===----------------------------------------------------------------------===// diff --git a/bolt/RewriteInstance.cpp b/bolt/RewriteInstance.cpp index 8121d274688d..1aaae0803e60 100644 --- a/bolt/RewriteInstance.cpp +++ b/bolt/RewriteInstance.cpp @@ -126,6 +126,14 @@ DynoStatsAll("dyno-stats-all", cl::desc("print dyno stats after each stage"), cl::ZeroOrMore, cl::Hidden); +static cl::opt +TopCalledLimit("top-called-limit", + cl::desc("maximum number of functions to print in top called " + "functions section"), + cl::init(100), + cl::ZeroOrMore, + cl::Hidden); + cl::opt HotText("hot-text", cl::desc("hot text symbols support"), @@ -724,11 +732,9 @@ void RewriteInstance::run() { auto FunctionIt = BinaryFunctions.find(Address); assert(FunctionIt != BinaryFunctions.end() && "Invalid large function address."); - if (opts::Verbosity >= 1) { - errs() << "BOLT-WARNING: Function " << FunctionIt->second - << " is larger than its orginal size: emitting again marking it " - << "as not simple.\n"; - } + errs() << "BOLT-WARNING: Function " << FunctionIt->second + << " is larger than its orginal size: emitting again marking it " + << "as not simple.\n"; FunctionIt->second.setSimple(false); } @@ -1694,7 +1700,8 @@ void RewriteInstance::disassembleFunctions() { } ); auto SFI = ProfiledFunctions.begin(); - for (int i = 0; i < 100 && SFI != ProfiledFunctions.end(); ++SFI, ++i) { + auto SFIend = ProfiledFunctions.end(); + for (auto i = 0u; i < opts::TopCalledLimit && SFI != SFIend; ++SFI, ++i) { outs() << " " << **SFI << " : " << (*SFI)->getExecutionCount() << '\n'; }