diff --git a/bolt/BinaryBasicBlock.cpp b/bolt/BinaryBasicBlock.cpp index 7f46b354f620..3be44979feeb 100644 --- a/bolt/BinaryBasicBlock.cpp +++ b/bolt/BinaryBasicBlock.cpp @@ -29,6 +29,24 @@ bool operator<(const BinaryBasicBlock &LHS, const BinaryBasicBlock &RHS) { return LHS.Offset < RHS.Offset; } +BinaryBasicBlock *BinaryBasicBlock::getSuccessor(const MCSymbol *Label) const { + for (BinaryBasicBlock *BB : successors()) { + if (BB->getLabel() == Label) + return BB; + } + + return nullptr; +} + +BinaryBasicBlock *BinaryBasicBlock::getLandingPad(const MCSymbol *Label) const { + for (BinaryBasicBlock *BB : landing_pads()) { + if (BB->getLabel() == Label) + return BB; + } + + return nullptr; +} + void BinaryBasicBlock::addSuccessor(BinaryBasicBlock *Succ, uint64_t Count, uint64_t MispredictedCount) { diff --git a/bolt/BinaryBasicBlock.h b/bolt/BinaryBasicBlock.h index 3be0ab541717..344d8e28f49a 100644 --- a/bolt/BinaryBasicBlock.h +++ b/bolt/BinaryBasicBlock.h @@ -273,11 +273,28 @@ public: branch_info_begin(), branch_info_end()); } + /// Get instruction at given index. + MCInst &getInstructionAtIndex(unsigned Index) { + return Instructions.at(Index); + } + + const MCInst &getInstructionAtIndex(unsigned Index) const { + return Instructions.at(Index); + } + /// Return symbol marking the start of this basic block. MCSymbol *getLabel() const { return Label; } + /// Get successor with given label. Returns nullptr if no such + /// successor is found. + BinaryBasicBlock *getSuccessor(const MCSymbol *Label) const; + + /// Get landing pad with given label. Returns nullptr if no such + /// landing pad is found. + BinaryBasicBlock *getLandingPad(const MCSymbol *Label) const; + /// Return local name for the block. StringRef getName() const { return Label->getName(); diff --git a/bolt/BinaryFunction.cpp b/bolt/BinaryFunction.cpp index 8c25cd4c53f6..43faae7a83bb 100644 --- a/bolt/BinaryFunction.cpp +++ b/bolt/BinaryFunction.cpp @@ -29,6 +29,7 @@ #include #include #include +#include #undef DEBUG_TYPE #define DEBUG_TYPE "bolt" @@ -178,6 +179,8 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation, OS << "\n Exec Count : " << ExecutionCount; OS << "\n Profile Acc : " << format("%.1f%%", ProfileMatchRatio * 100.0f); } + if (IdenticalFunctionAddress != Address) + OS << "\n Id Fun Addr : 0x" << Twine::utohexstr(IdenticalFunctionAddress); OS << "\n}\n"; @@ -1538,6 +1541,376 @@ void BinaryFunction::propagateGnuArgsSizeInfo() { } } +void BinaryFunction::mergeProfileDataInto(BinaryFunction &BF) const { + if (!hasValidProfile() || !BF.hasValidProfile()) + return; + + // Update BF's execution count. + uint64_t MyExecutionCount = getExecutionCount(); + if (MyExecutionCount != BinaryFunction::COUNT_NO_PROFILE) { + uint64_t OldExecCount = BF.getExecutionCount(); + uint64_t NewExecCount = + OldExecCount == BinaryFunction::COUNT_NO_PROFILE ? + MyExecutionCount : + MyExecutionCount + OldExecCount; + BF.setExecutionCount(NewExecCount); + } + + // Update BF's basic block and edge counts. + auto BBMergeI = BF.begin(); + for (BinaryBasicBlock *BB : BasicBlocks) { + BinaryBasicBlock *BBMerge = &*BBMergeI; + assert(getIndex(BB) == BF.getIndex(BBMerge)); + + // Update BF's basic block count. + uint64_t MyBBExecutionCount = BB->getExecutionCount(); + if (MyBBExecutionCount != BinaryBasicBlock::COUNT_NO_PROFILE) { + uint64_t OldExecCount = BBMerge->getExecutionCount(); + uint64_t NewExecCount = + OldExecCount == BinaryBasicBlock::COUNT_NO_PROFILE ? + MyBBExecutionCount : + MyBBExecutionCount + OldExecCount; + BBMerge->ExecutionCount = NewExecCount; + } + + // Update BF's edge count for successors of this basic block. + auto BBMergeSI = BBMerge->succ_begin(); + auto BII = BB->BranchInfo.begin(); + auto BIMergeI = BBMerge->BranchInfo.begin(); + for (BinaryBasicBlock *BBSucc : BB->successors()) { + BinaryBasicBlock *BBMergeSucc = *BBMergeSI; + assert(getIndex(BBSucc) == BF.getIndex(BBMergeSucc)); + + if (BII->Count != BinaryBasicBlock::COUNT_FALLTHROUGH_EDGE) { + uint64_t OldBranchCount = BIMergeI->Count; + uint64_t NewBranchCount = + OldBranchCount == BinaryBasicBlock::COUNT_FALLTHROUGH_EDGE ? + BII->Count : + BII->Count + OldBranchCount; + BIMergeI->Count = NewBranchCount; + } + + if (BII->MispredictedCount != BinaryBasicBlock::COUNT_FALLTHROUGH_EDGE) { + uint64_t OldMispredictedCount = BIMergeI->MispredictedCount; + uint64_t NewMispredictedCount = + OldMispredictedCount == BinaryBasicBlock::COUNT_FALLTHROUGH_EDGE ? + BII->MispredictedCount : + BII->MispredictedCount + OldMispredictedCount; + BIMergeI->MispredictedCount = NewMispredictedCount; + } + + ++BBMergeSI; + ++BII; + ++BIMergeI; + } + assert(BBMergeSI == BBMerge->succ_end()); + + ++BBMergeI; + } + assert(BBMergeI == BF.end()); +} + +std::pair BinaryFunction::isCalleeEquivalentWith( + const MCInst &Inst, const BinaryBasicBlock &BB, const MCInst &InstOther, + const BinaryBasicBlock &BBOther, const BinaryFunction &BF) const { + // The callee operand in a direct call is the first operand. This + // operand should be a symbol corresponding to the callee function. + constexpr unsigned CalleeOpIndex = 0; + + // Helper function. + auto getGlobalAddress = [this] (const MCSymbol &Symbol) -> uint64_t { + auto AI = BC.GlobalSymbols.find(Symbol.getName()); + assert(AI != BC.GlobalSymbols.end()); + return AI->second; + }; + + const MCOperand &CalleeOp = Inst.getOperand(CalleeOpIndex); + const MCOperand &CalleeOpOther = InstOther.getOperand(CalleeOpIndex); + if (!CalleeOp.isExpr() || !CalleeOpOther.isExpr()) { + // At least one of these is actually an indirect call. + return std::make_pair(false, 0); + } + + const MCSymbol &CalleeSymbol = CalleeOp.getExpr()->getSymbol(); + uint64_t CalleeAddress = getGlobalAddress(CalleeSymbol); + + const MCSymbol &CalleeSymbolOther = CalleeOpOther.getExpr()->getSymbol(); + uint64_t CalleeAddressOther = getGlobalAddress(CalleeSymbolOther); + + bool BothRecursiveCalls = + CalleeAddress == getAddress() && + CalleeAddressOther == BF.getAddress(); + + bool SameCallee = CalleeAddress == CalleeAddressOther; + + return std::make_pair(BothRecursiveCalls || SameCallee, CalleeOpIndex); +} + +std::pair BinaryFunction::isTargetEquivalentWith( + const MCInst &Inst, const BinaryBasicBlock &BB, const MCInst &InstOther, + const BinaryBasicBlock &BBOther, const BinaryFunction &BF, + bool AreInvokes) const { + // The target operand in a (non-indirect) jump instruction is the + // first operand. + unsigned TargetOpIndex = 0; + if (AreInvokes) { + // The landing pad operand in an invoke is either the second or the + // sixth operand, depending on the number of operands of the invoke. + TargetOpIndex = 1; + if (Inst.getNumOperands() == 7 || Inst.getNumOperands() == 8) + TargetOpIndex = 5; + } + + const MCOperand &TargetOp = Inst.getOperand(TargetOpIndex); + const MCOperand &TargetOpOther = InstOther.getOperand(TargetOpIndex); + if (!TargetOp.isExpr() || !TargetOpOther.isExpr()) { + assert(AreInvokes); + // An invoke without a landing pad operand has no catch handler. As long + // as both invokes have no catch target, we can consider they have the + // same catch target. + return std::make_pair(!TargetOp.isExpr() && !TargetOpOther.isExpr(), + TargetOpIndex); + } + + const MCSymbol &TargetSymbol = TargetOp.getExpr()->getSymbol(); + BinaryBasicBlock *TargetBB = + AreInvokes ? + BB.getLandingPad(&TargetSymbol) : + BB.getSuccessor(&TargetSymbol); + + const MCSymbol &TargetSymbolOther = TargetOpOther.getExpr()->getSymbol(); + BinaryBasicBlock *TargetBBOther = + AreInvokes ? + BBOther.getLandingPad(&TargetSymbolOther) : + BBOther.getSuccessor(&TargetSymbolOther); + + if (TargetBB == nullptr || TargetBBOther == nullptr) { + assert(!AreInvokes); + // This is a tail call implemented with a jump that was not + // converted to a call (e.g. conditional jump). Since the + // instructions were not identical, the functions canot be + // proven identical either. + return std::make_pair(false, 0); + } + + return std::make_pair(getIndex(TargetBB) == BF.getIndex(TargetBBOther), + TargetOpIndex); +} + +bool BinaryFunction::isInstrEquivalentWith( + const MCInst &Inst, const BinaryBasicBlock &BB, const MCInst &InstOther, + const BinaryBasicBlock &BBOther, const BinaryFunction &BF) const { + // First check their opcodes. + if (Inst.getOpcode() != InstOther.getOpcode()) { + return false; + } + + // Then check if they have the same number of operands. + unsigned NumOperands = Inst.getNumOperands(); + unsigned NumOperandsOther = InstOther.getNumOperands(); + if (NumOperands != NumOperandsOther) { + return false; + } + + // We are interested in 3 special cases: + // + // a) both instructions are recursive calls. + // b) both instructions are local jumps to basic blocks with same indices. + // c) both instructions are invokes with landing pad blocks with same indices. + // + // In any of these cases the instructions will differ in some operands, but + // given identical CFG of the functions, they can still be considered + // equivalent. + bool BothCalls = + BC.MIA->isCall(Inst) && + BC.MIA->isCall(InstOther); + bool BothInvokes = + BC.MIA->isInvoke(Inst) && + BC.MIA->isInvoke(InstOther); + bool BothBranches = + BC.MIA->isBranch(Inst) && + !BC.MIA->isIndirectBranch(Inst) && + BC.MIA->isBranch(InstOther) && + !BC.MIA->isIndirectBranch(InstOther); + + if (!BothCalls && !BothInvokes && !BothBranches) { + return Inst.equals(InstOther); + } + + // We figure out if both instructions are recursive calls (case a) or else + // if they are calls to the same function. + bool EquivCallees = false; + unsigned CalleeOpIndex = 0; + if (BothCalls) { + std::tie(EquivCallees, CalleeOpIndex) = + isCalleeEquivalentWith(Inst, BB, InstOther, BBOther, BF); + } + + // We figure out if both instructions are jumps (case b) or invokes (case c) + // with equivalent jump targets or landing pads respectively. + assert(!(BothInvokes && BothBranches)); + bool SameTarget = false; + unsigned TargetOpIndex = 0; + if (BothInvokes || BothBranches) { + std::tie(SameTarget, TargetOpIndex) = + isTargetEquivalentWith(Inst, BB, InstOther, BBOther, BF, BothInvokes); + } + + // Compare all operands. + for (unsigned i = 0; i < NumOperands; ++i) { + if (i == CalleeOpIndex && BothCalls && EquivCallees) + continue; + + if (i == TargetOpIndex && (BothInvokes || BothBranches) && SameTarget) + continue; + + if (!Inst.getOperand(i).equals(InstOther.getOperand(i))) + return false; + } + + // The instructions are equal although (some of) their operands + // may differ. + return true; +} + +bool BinaryFunction::isIdenticalWith(const BinaryFunction &BF) const { + + assert(CurrentState == State::CFG && BF.CurrentState == State::CFG); + + // Compare the two functions, one basic block at a time. + // Currently we require two identical basic blocks to have identical + // instruction sequences and the same index in their corresponding + // functions. The latter is important for CFG equality. + + // We do not consider functions with just different pseudo instruction + // sequences non-identical by default. However we print a wanring + // in case two instructions that are identical have different pseudo + // instruction sequences. + bool PseudosDiffer = false; + + if (size() != BF.size()) + return false; + + auto BBI = BF.begin(); + for (const BinaryBasicBlock *BB : BasicBlocks) { + const BinaryBasicBlock *BBOther = &*BBI; + if (getIndex(BB) != BF.getIndex(BBOther)) + return false; + + // Compare successor basic blocks. + if (BB->succ_size() != BBOther->succ_size()) + return false; + + auto SuccBBI = BBOther->succ_begin(); + for (const BinaryBasicBlock *SuccBB : BB->successors()) { + const BinaryBasicBlock *SuccBBOther = *SuccBBI; + if (getIndex(SuccBB) != BF.getIndex(SuccBBOther)) + return false; + ++SuccBBI; + } + + // Compare landing pads. + if (BB->lp_size() != BBOther->lp_size()) + return false; + + auto LPI = BBOther->lp_begin(); + for (const BinaryBasicBlock *LP : BB->landing_pads()) { + const BinaryBasicBlock *LPOther = *LPI; + if (getIndex(LP) != BF.getIndex(LPOther)) + return false; + ++LPI; + } + + // Compare instructions. + auto I = BB->begin(), E = BB->end(); + auto OtherI = BBOther->begin(), OtherE = BBOther->end(); + while (I != E && OtherI != OtherE) { + const MCInst &Inst = *I; + const MCInst &InstOther = *OtherI; + + bool IsInstPseudo = BC.MII->get(Inst.getOpcode()).isPseudo(); + bool IsInstOtherPseudo = BC.MII->get(InstOther.getOpcode()).isPseudo(); + + if (IsInstPseudo == IsInstOtherPseudo) { + // Either both are pseudos or none is. + bool areEqual = + isInstrEquivalentWith(Inst, *BB, InstOther, *BBOther, BF); + + if (!areEqual && IsInstPseudo) { + // Different pseudo instructions. + PseudosDiffer = true; + } + else if (!areEqual) { + // Different non-pseudo instructions. + return false; + } + + ++I; ++OtherI; + } + else { + // One instruction is a pseudo while the other is not. + PseudosDiffer = true; + IsInstPseudo ? ++I : ++OtherI; + } + } + + // Check for trailing instructions or pseudos in one of the basic blocks. + auto TrailI = I == E ? OtherI : I; + auto TrailE = I == E ? OtherE : E; + while (TrailI != TrailE) { + const MCInst &InstTrail = *TrailI; + if (!BC.MII->get(InstTrail.getOpcode()).isPseudo()) { + // One of the functions has more instructions in this basic block + // than the other, hence not identical. + return false; + } + + // There are trailing pseudos only in one of the basic blocks. + PseudosDiffer = true; + ++TrailI; + } + + ++BBI; + } + + if (PseudosDiffer) { + errs() << "BOLT-WARNING: functions " << getName() << " and "; + errs() << BF.getName() << " are identical, but have different"; + errs() << " pseudo instruction sequences.\n"; + } + + return true; +} + +std::size_t BinaryFunction::hash() const { + assert(CurrentState == State::CFG); + + // The hash is computed by creating a string of all the opcodes + // in the function and hashing that string with std::hash. + std::string Opcodes; + for (const BinaryBasicBlock *BB : BasicBlocks) { + for (const MCInst &Inst : *BB) { + unsigned Opcode = Inst.getOpcode(); + + if (BC.MII->get(Opcode).isPseudo()) + continue; + + if (Opcode == 0) { + Opcodes.push_back(0); + continue; + } + + while (Opcode) { + uint8_t LSB = Opcode & 0xff; + Opcodes.push_back(LSB); + Opcode = Opcode >> 8; + } + } + } + + return std::hash{}(Opcodes); +} + BinaryFunction::~BinaryFunction() { for (auto BB : BasicBlocks) { delete BB; diff --git a/bolt/BinaryFunction.h b/bolt/BinaryFunction.h index aa812ec9bb54..527e5e5fd2eb 100644 --- a/bolt/BinaryFunction.h +++ b/bolt/BinaryFunction.h @@ -117,6 +117,16 @@ private: /// base address for position independent binaries. uint64_t Address; + /// Address of an identical function that can replace this one. By default + /// this is the same as the address of this functions, and the icf pass can + /// potentially set it to some other function's address. + /// + /// In case multiple functions are identical to each other, one of the + /// functions (the representative) will point to its own address, while the + /// rest of the functions will point to the representative through one or + /// more steps. + uint64_t IdenticalFunctionAddress; + /// Original size of the function. uint64_t Size; @@ -196,6 +206,32 @@ private: return *this; } + /// Helper function that compares an instruction of this function to the + /// given instruction of the given function. The functions should have + /// identical CFG. + bool isInstrEquivalentWith( + const MCInst &Inst, const BinaryBasicBlock &BB, const MCInst &InstOther, + const BinaryBasicBlock &BBOther, const BinaryFunction &BF) const; + + /// Helper function that compares the callees of two call instructions. + /// Callees are considered equivalent if both refer to the same function + /// or if both calls are recursive. Instructions should have same opcodes + /// and same number of operands. Returns true and the callee operand index + /// when callees are quivalent, and false, 0 otherwise. + std::pair isCalleeEquivalentWith( + const MCInst &Inst, const BinaryBasicBlock &BB, const MCInst &InstOther, + const BinaryBasicBlock &BBOther, const BinaryFunction &BF) const; + + /// Helper function that compares the targets two jump or invoke instructions. + /// A target of an invoke we consider its landing pad basic block. The + /// corresponding functions should have identical CFG. Instructions should + /// have same opcodes and same number of operands. Returns true and the target + /// operand index when targets are equivalent, and false, 0 otherwise. + std::pair isTargetEquivalentWith( + const MCInst &Inst, const BinaryBasicBlock &BB, const MCInst &InstOther, + const BinaryBasicBlock &BBOther, const BinaryFunction &BF, + bool AreInvokes) const; + /// Return basic block that originally was laid out immediately following /// the given /p BB basic block. const BinaryBasicBlock * @@ -381,8 +417,8 @@ public: BinaryFunction(const std::string &Name, SymbolRef Symbol, SectionRef Section, uint64_t Address, uint64_t Size, BinaryContext &BC, bool IsSimple = true) : - Names({Name}), Symbol(Symbol), Section(Section), - Address(Address), Size(Size), BC(BC), IsSimple(IsSimple), + Names({Name}), Symbol(Symbol), Section(Section), Address(Address), + IdenticalFunctionAddress(Address), Size(Size), BC(BC), IsSimple(IsSimple), CodeSectionName(".text." + Name), FunctionNumber(++Count) {} @@ -460,6 +496,10 @@ public: return Names; } + State getCurrentState() const { + return CurrentState; + } + /// Return containing file section. SectionRef getSection() const { return Section; @@ -778,6 +818,17 @@ public: return LSDAAddress; } + /// Return the address of an identical function. If none is found this will + /// return this function's address. + uint64_t getIdenticalFunctionAddress() const { + return IdenticalFunctionAddress; + } + + /// Set the address of an identical function. + void setIdenticalFunctionAddress(uint64_t Address) { + IdenticalFunctionAddress = Address; + } + /// Return symbol pointing to function's LSDA. MCSymbol *getLSDASymbol() { if (LSDASymbol) @@ -864,6 +915,18 @@ public: /// Emit exception handling ranges for the function. void emitLSDA(MCStreamer *Streamer); + /// Merge profile data of this function into those of the given + /// function. The functions should have been proven identical with + /// isIdenticalWith. + void mergeProfileDataInto(BinaryFunction &BF) const; + + /// Returns true if this function has identical code and + /// CFG with the given function. + bool isIdenticalWith(const BinaryFunction &BF) const; + + /// Returns a hash value for the function. To be used for ICF. + std::size_t hash() const; + /// Sets the associated .debug_info entry. void addSubprogramDIE(DWARFCompileUnit *Unit, const DWARFDebugInfoEntryMinimal *DIE) { diff --git a/bolt/BinaryPassManager.cpp b/bolt/BinaryPassManager.cpp index 080f485aa1cf..10c981a78382 100644 --- a/bolt/BinaryPassManager.cpp +++ b/bolt/BinaryPassManager.cpp @@ -50,6 +50,12 @@ SimplifyRODataLoads("simplify-rodata-loads", "section"), llvm::cl::Optional); +static llvm::cl::opt +IdenticalCodeFolding( + "icf", + llvm::cl::desc("fold functions with identical code"), + llvm::cl::Optional); + } // namespace opts namespace llvm { @@ -73,6 +79,9 @@ void BinaryFunctionPassManager::runAllPasses( // Here we manage dependencies/order manually, since passes are ran in the // order they're registered. + Manager.registerPass(llvm::make_unique(), + opts::IdenticalCodeFolding); + Manager.registerPass( std::move(llvm::make_unique(Manager.NagUser)), opts::EliminateUnreachable); diff --git a/bolt/BinaryPasses.cpp b/bolt/BinaryPasses.cpp index aa4c5241c9e1..c4394c65d67b 100644 --- a/bolt/BinaryPasses.cpp +++ b/bolt/BinaryPasses.cpp @@ -11,6 +11,7 @@ #include "BinaryPasses.h" #include "llvm/Support/Options.h" +#include #define DEBUG_TYPE "bolt" @@ -23,6 +24,7 @@ extern llvm::cl::opt PrintEHRanges; extern llvm::cl::opt PrintUCE; extern llvm::cl::opt PrintPeepholes; extern llvm::cl::opt PrintSimplifyROLoads; +extern llvm::cl::opt PrintICF; extern llvm::cl::opt SplitFunctions; extern bool shouldProcess(const llvm::bolt::BinaryFunction &Function); @@ -682,5 +684,231 @@ void SimplifyRODataLoads::runOnFunctions( outs() << "BOLT: dynamic loads found: " << NumDynamicLoadsFound << "\n"; } +void IdenticalCodeFolding::discoverCallers( + BinaryContext &BC, std::map &BFs) { + for (auto &I : BFs) { + BinaryFunction &Caller = I.second; + + if (!Caller.isSimple()) + continue; + + for (BinaryBasicBlock &BB : Caller) { + unsigned BlockIndex = Caller.getIndex(&BB); + unsigned InstrIndex = 0; + + for (MCInst &Inst : BB) { + if (!BC.MIA->isCall(Inst)) { + ++InstrIndex; + continue; + } + + const MCOperand &TargetOp = Inst.getOperand(0); + if (!TargetOp.isExpr()) { + // This is an inderect call, we cannot record + // a target. + ++InstrIndex; + continue; + } + + // Find the target function for this call. + const MCExpr *TargetExpr = TargetOp.getExpr(); + assert(TargetExpr->getKind() == MCExpr::SymbolRef); + const MCSymbol &TargetSymbol = + dyn_cast(TargetExpr)->getSymbol(); + auto AI = BC.GlobalSymbols.find(TargetSymbol.getName()); + assert(AI != BC.GlobalSymbols.end()); + uint64_t TargetAddress = AI->second; + auto FI = BFs.find(TargetAddress); + if (FI == BFs.end()) { + // Call to a function without a BinaryFunction object. + ++InstrIndex; + continue; + } + BinaryFunction *Callee = &FI->second; + + // Insert a tuple in the Callers map. + Callers[Callee].emplace_back( + CallSite(&Caller, BlockIndex, InstrIndex)); + + ++InstrIndex; + } + } + } +} + +void IdenticalCodeFolding::foldFunction( + BinaryContext &BC, + std::map &BFs, + BinaryFunction *BFToFold, + BinaryFunction *BFToReplaceWith, + std::set &Modified) { + + // Mark BFToFold as identical with BFTOreplaceWith. + BFToFold->setIdenticalFunctionAddress(BFToReplaceWith->getAddress()); + + // Add the size of BFToFold to the total size savings estimate. + BytesSavedEstimate += BFToFold->getSize(); + + // Get callers of BFToFold. + auto CI = Callers.find(BFToFold); + if (CI == Callers.end()) + return; + std::vector &BFToFoldCallers = CI->second; + + // Get callers of BFToReplaceWith. + std::vector &BFToReplaceWithCallers = Callers[BFToReplaceWith]; + + // Get MCSymbol for BFToReplaceWith. + MCSymbol *SymbolToReplaceWith = + BC.getOrCreateGlobalSymbol(BFToReplaceWith->getAddress(), ""); + + // Traverse callers of BFToFold and replace the calls with calls + // to BFToReplaceWith. + for (const CallSite &CS : BFToFoldCallers) { + // Get call instruction. + BinaryFunction *Caller = CS.Caller; + BinaryBasicBlock *CallBB = Caller->getBasicBlockAtIndex(CS.BlockIndex); + MCInst &CallInst = CallBB->getInstructionAtIndex(CS.InstrIndex); + + // Replace call target with BFToReplaceWith. + MCOperand CallTargetOp = + MCOperand::createExpr( + MCSymbolRefExpr::create( + SymbolToReplaceWith, MCSymbolRefExpr::VK_None, *BC.Ctx)); + assert(BC.MIA->replaceCallTargetOperand(CallInst, CallTargetOp) && + "unexpected call target prevented the replacement"); + + // Add this call site to the callers of BFToReplaceWith. + BFToReplaceWithCallers.emplace_back(CS); + + // Add caller to the set of modified functions. + Modified.insert(Caller); + + // Update dynamic calls folded stat. + if (Caller->hasValidProfile() && + CallBB->getExecutionCount() != BinaryBasicBlock::COUNT_NO_PROFILE) + NumDynamicCallsFolded += CallBB->getExecutionCount(); + } + + // Remove all callers of BFToFold. + BFToFoldCallers.clear(); + + ++NumFunctionsFolded; + + // Merge execution counts of BFToFold into those of BFToReplaceWith. + BFToFold->mergeProfileDataInto(*BFToReplaceWith); +} + +void IdenticalCodeFolding::runOnFunctions( + BinaryContext &BC, + std::map &BFs, + std::set & +) { + + discoverCallers(BC, BFs); + + // This hash table is used to identify identical functions. It maps + // a function to a bucket of functions identical to it. + struct KeyHash { + std::size_t operator()(const BinaryFunction *F) const { return F->hash(); } + }; + struct KeyEqual { + bool operator()(const BinaryFunction *A, const BinaryFunction *B) const { + return A->isIdenticalWith(*B); + } + }; + std::unordered_map, + KeyHash, KeyEqual> Buckets; + + // Set that holds the functions that were modified by the last pass. + std::set Mod; + + // Vector of all the candidate functions to be tested for being identical + // to each other. Initialized with all simple functions. + std::vector Cands; + for (auto &I : BFs) { + BinaryFunction *BF = &I.second; + if (BF->isSimple()) + Cands.emplace_back(BF); + } + + // We repeat the icf pass until no new modifications happen. + unsigned Iter = 1; + do { + Buckets.clear(); + Mod.clear(); + + errs() << "BOLT-INFO: icf pass " << Iter << "...\n"; + + uint64_t NumIdenticalFunctions = 0; + + // Compare candidate functions using the Buckets hash table. Identical + // functions are effiently discovered and added to the same bucket. + for (BinaryFunction *BF : Cands) { + Buckets[BF].emplace_back(BF); + } + + Cands.clear(); + + // Go through the functions of each bucket and fold any references to them + // with the references to the hottest function among them. + for (auto &I : Buckets) { + std::vector &IFs = I.second; + std::sort(IFs.begin(), IFs.end(), + [](const BinaryFunction *A, const BinaryFunction *B) { + if (!A->hasValidProfile() && !B->hasValidProfile()) + return false; + + if (!A->hasValidProfile()) + return false; + + if (!B->hasValidProfile()) + return true; + + return B->getExecutionCount() < A->getExecutionCount(); + } + ); + BinaryFunction *Hottest = IFs[0]; + + // For the next pass, we consider only one function from each set of + // identical functions. + Cands.emplace_back(Hottest); + + if (IFs.size() <= 1) + continue; + + NumIdenticalFunctions += IFs.size() - 1; + for (unsigned i = 1; i < IFs.size(); ++i) { + BinaryFunction *BF = IFs[i]; + foldFunction(BC, BFs, BF, Hottest, Mod); + } + } + + errs() << "BOLT-INFO: found " << NumIdenticalFunctions; + errs() << " identical functions.\n"; + errs() << "BOLT-INFO: modified " << Mod.size() << " functions.\n"; + + NumIdenticalFunctionsFound += NumIdenticalFunctions; + + ++Iter; + } while (!Mod.empty()); + + outs() << "BOLT: ICF pass found " << NumIdenticalFunctionsFound; + outs() << " functions identical to some other function.\n"; + outs() << "BOLT: ICF pass folded references to " << NumFunctionsFolded; + outs() << " functions.\n"; + outs() << "BOLT: ICF pass folded " << NumDynamicCallsFolded << " dynamic"; + outs() << " function calls.\n"; + outs() << "BOLT: Removing all identical functions could save "; + outs() << format("%.2lf", (double) BytesSavedEstimate / 1024); + outs() << " KB of code space.\n"; + + if (opts::PrintAll || opts::PrintICF) { + for (auto &I : BFs) { + I.second.print(errs(), "after identical code folding", true); + } + } +} + } // namespace bolt } // namespace llvm diff --git a/bolt/BinaryPasses.h b/bolt/BinaryPasses.h index 915ff285cdec..b227041649b5 100644 --- a/bolt/BinaryPasses.h +++ b/bolt/BinaryPasses.h @@ -182,6 +182,47 @@ public: std::set &LargeFunctions) override; }; +/// An optimization that replaces references to identical functions with +/// references to a single one of them. +/// +class IdenticalCodeFolding : public BinaryFunctionPass { + uint64_t NumIdenticalFunctionsFound{0}; + uint64_t NumFunctionsFolded{0}; + uint64_t NumDynamicCallsFolded{0}; + uint64_t BytesSavedEstimate{0}; + + /// Map from a binary function to its callers. + struct CallSite { + BinaryFunction *Caller; + unsigned BlockIndex; + unsigned InstrIndex; + + CallSite(BinaryFunction *Caller, unsigned BlockIndex, unsigned InstrIndex) : + Caller(Caller), BlockIndex(BlockIndex), InstrIndex(InstrIndex) { } + }; + using CallerMap = std::map>; + CallerMap Callers; + + /// Replaces all calls to BFTOFold with calls to BFToReplaceWith and merges + /// the profile data of BFToFold with those of BFToReplaceWith. All modified + /// functions are added to the Modified set. + void foldFunction(BinaryContext &BC, + std::map &BFs, + BinaryFunction *BFToFold, + BinaryFunction *BFToReplaceWith, + std::set &Modified); + + /// Finds callers for each binary function and populates the Callers + /// map. + void discoverCallers(BinaryContext &BC, + std::map &BFs); + + public: + void runOnFunctions(BinaryContext &BC, + std::map &BFs, + std::set &LargeFunctions) override; +}; + } // namespace bolt } // namespace llvm diff --git a/bolt/RewriteInstance.cpp b/bolt/RewriteInstance.cpp index a193c0131209..255514449e23 100644 --- a/bolt/RewriteInstance.cpp +++ b/bolt/RewriteInstance.cpp @@ -186,6 +186,11 @@ PrintReordered("print-reordered", cl::desc("print functions after layout optimization"), cl::Hidden); +cl::opt +PrintICF("print-icf", + cl::desc("print functions after ICF optimization"), + cl::Hidden); + static cl::opt KeepTmp("keep-tmp", cl::desc("preserve intermediate .o file"), @@ -805,6 +810,16 @@ void RewriteInstance::discoverFileObjects() { "wish to proceed, use -allow-stripped option.\n"; exit(1); } + + // Register the final names of functions with multiple names with BinaryContext + // data structures. + for (auto &BFI : BinaryFunctions) { + uint64_t Address = BFI.first; + const BinaryFunction &BF = BFI.second; + auto AI = BC->GlobalSymbols.find(BF.getName()); + if (AI == BC->GlobalSymbols.end()) + BC->registerNameAtAddress(BF.getName(), Address); + } } void RewriteInstance::readSpecialSections() {