From f8f52d01d012404ed76ea3dca703bd0776fd3b05 Mon Sep 17 00:00:00 2001 From: Rafael Auler Date: Thu, 9 Nov 2017 16:59:18 -0800 Subject: [PATCH] [BOLT-AArch64] Support SPEC17 programs and organize AArch64 tests Summary: Add a few new relocation types to support a wider variety of binaries, add support for constant island duplication (so we can split functions in large binaries) and make LongJmp pass really precise with respect to layout, so we don't miss stubs insertions at the correct places for really large binaries. In LongJmp, introduce "freeze" annotations so fixBranches won't mess the jumps we carefully determined that needed a stub. (cherry picked from FBD6294390) --- bolt/BinaryContext.cpp | 16 +++++ bolt/BinaryContext.h | 2 +- bolt/BinaryFunction.cpp | 58 ++++++++++++++--- bolt/BinaryFunction.h | 30 ++++++++- bolt/BinaryPassManager.cpp | 2 + bolt/Passes/BinaryPasses.cpp | 23 ++++--- bolt/Passes/BinaryPasses.h | 14 +++++ bolt/Passes/LongJmp.cpp | 117 +++++++++++++++++++++++++++-------- bolt/Passes/LongJmp.h | 7 ++- bolt/RewriteInstance.cpp | 29 ++++++++- 10 files changed, 253 insertions(+), 45 deletions(-) diff --git a/bolt/BinaryContext.cpp b/bolt/BinaryContext.cpp index 02821658b973..6be9c1cca959 100644 --- a/bolt/BinaryContext.cpp +++ b/bolt/BinaryContext.cpp @@ -565,7 +565,11 @@ size_t Relocation::getSizeForType(uint64_t Type) { case ELF::R_AARCH64_LDST16_ABS_LO12_NC: case ELF::R_AARCH64_LDST8_ABS_LO12_NC: case ELF::R_AARCH64_ADR_GOT_PAGE: + case ELF::R_AARCH64_TLSDESC_ADR_PAGE21: case ELF::R_AARCH64_LD64_GOT_LO12_NC: + case ELF::R_AARCH64_TLSDESC_LD64_LO12_NC: + case ELF::R_AARCH64_TLSDESC_ADD_LO12_NC: + case ELF::R_AARCH64_TLSDESC_CALL: case ELF::R_AARCH64_JUMP26: case ELF::R_AARCH64_PREL32: return 4; @@ -585,12 +589,14 @@ uint64_t Relocation::extractValue(uint64_t Type, uint64_t Contents, return Contents; case ELF::R_AARCH64_PREL32: return static_cast(PC) + SignExtend64<32>(Contents & 0xffffffff); + case ELF::R_AARCH64_TLSDESC_CALL: case ELF::R_AARCH64_JUMP26: case ELF::R_AARCH64_CALL26: // Immediate goes in bits 25:0 of B and BL. Contents &= ~0xfffffffffc000000ULL; return static_cast(PC) + SignExtend64<28>(Contents << 2); case ELF::R_AARCH64_ADR_GOT_PAGE: + case ELF::R_AARCH64_TLSDESC_ADR_PAGE21: case ELF::R_AARCH64_ADR_PREL_PG_HI21: { // Bits 32:12 of Symbol address goes in bits 30:29 + 23:5 of ADRP // instruction @@ -602,6 +608,7 @@ uint64_t Relocation::extractValue(uint64_t Type, uint64_t Contents, Contents &= ~0xfffUll; return Contents; } + case ELF::R_AARCH64_TLSDESC_LD64_LO12_NC: case ELF::R_AARCH64_LD64_GOT_LO12_NC: case ELF::R_AARCH64_LDST64_ABS_LO12_NC: { // Immediate goes in bits 21:10 of LD/ST instruction, taken @@ -609,6 +616,7 @@ uint64_t Relocation::extractValue(uint64_t Type, uint64_t Contents, Contents &= ~0xffffffffffc003ffU; return Contents >> (10 - 3); } + case ELF::R_AARCH64_TLSDESC_ADD_LO12_NC: case ELF::R_AARCH64_ADD_ABS_LO12_NC: { // Immediate goes in bits 21:10 of ADD instruction Contents &= ~0xffffffffffc003ffU; @@ -647,6 +655,10 @@ bool Relocation::isGOT(uint64_t Type) { return false; case ELF::R_AARCH64_ADR_GOT_PAGE: case ELF::R_AARCH64_LD64_GOT_LO12_NC: + case ELF::R_AARCH64_TLSDESC_ADR_PAGE21: + case ELF::R_AARCH64_TLSDESC_LD64_LO12_NC: + case ELF::R_AARCH64_TLSDESC_ADD_LO12_NC: + case ELF::R_AARCH64_TLSDESC_CALL: return true; } } @@ -668,6 +680,8 @@ bool Relocation::isPCRelative(uint64_t Type) { case ELF::R_AARCH64_LDST16_ABS_LO12_NC: case ELF::R_AARCH64_LDST8_ABS_LO12_NC: case ELF::R_AARCH64_LD64_GOT_LO12_NC: + case ELF::R_AARCH64_TLSDESC_LD64_LO12_NC: + case ELF::R_AARCH64_TLSDESC_ADD_LO12_NC: return false; case ELF::R_X86_64_PC8: @@ -677,9 +691,11 @@ bool Relocation::isPCRelative(uint64_t Type) { case ELF::R_X86_64_GOTTPOFF: case ELF::R_X86_64_GOTPCRELX: case ELF::R_X86_64_REX_GOTPCRELX: + case ELF::R_AARCH64_TLSDESC_CALL: case ELF::R_AARCH64_CALL26: case ELF::R_AARCH64_ADR_PREL_PG_HI21: case ELF::R_AARCH64_ADR_GOT_PAGE: + case ELF::R_AARCH64_TLSDESC_ADR_PAGE21: case ELF::R_AARCH64_JUMP26: case ELF::R_AARCH64_PREL32: return true; diff --git a/bolt/BinaryContext.h b/bolt/BinaryContext.h index 69b26b1d5c89..37266ec6f6fe 100644 --- a/bolt/BinaryContext.h +++ b/bolt/BinaryContext.h @@ -340,7 +340,7 @@ public: SmallString<256> Code; SmallVector Fixups; raw_svector_ostream VecOS(Code); - if (MIA->isCFI(*Beg)) { + if (MIA->isCFI(*Beg) || MIA->isEHLabel(*Beg)) { ++Beg; continue; } diff --git a/bolt/BinaryFunction.cpp b/bolt/BinaryFunction.cpp index a6c75fea32b2..8f4df24254fd 100644 --- a/bolt/BinaryFunction.cpp +++ b/bolt/BinaryFunction.cpp @@ -913,6 +913,10 @@ void BinaryFunction::disassemble(ArrayRef FunctionData) { isInConstantIsland(TargetAddress)) { TargetSymbol = BC.getOrCreateGlobalSymbol(TargetAddress, "ISLANDat"); IslandSymbols[TargetAddress - getAddress()] = TargetSymbol; + if (!ColdIslandSymbols.count(TargetSymbol)) { + ColdIslandSymbols[TargetSymbol] = + Ctx->getOrCreateSymbol(TargetSymbol->getName() + ".cold"); + } } // Note that the address does not necessarily have to reside inside @@ -2033,6 +2037,9 @@ uint64_t BinaryFunction::getEditDistance() const { } void BinaryFunction::emitBody(MCStreamer &Streamer, bool EmitColdPart) { + if (EmitColdPart && hasConstantIsland()) + duplicateConstantIslands(); + int64_t CurrentGnuArgsSize = 0; for (auto BB : layout()) { if (EmitColdPart != BB->isCold()) @@ -2078,8 +2085,7 @@ void BinaryFunction::emitBody(MCStreamer &Streamer, bool EmitColdPart) { } } - if (!EmitColdPart) - emitConstantIslands(Streamer); + emitConstantIslands(Streamer, EmitColdPart); } void BinaryFunction::emitBodyRaw(MCStreamer *Streamer) { @@ -2140,11 +2146,15 @@ void BinaryFunction::emitBodyRaw(MCStreamer *Streamer) { } } -void BinaryFunction::emitConstantIslands(MCStreamer &Streamer) { +void BinaryFunction::emitConstantIslands(MCStreamer &Streamer, + bool EmitColdPart) { if (DataOffsets.empty()) return; - Streamer.EmitLabel(getFunctionConstantIslandLabel()); + if (!EmitColdPart) + Streamer.EmitLabel(getFunctionConstantIslandLabel()); + else + Streamer.EmitLabel(getFunctionColdConstantIslandLabel()); // Raw contents of the function. StringRef SectionContents; Section.getContents(SectionContents); @@ -2196,7 +2206,10 @@ void BinaryFunction::emitConstantIslands(MCStreamer &Streamer) { if (IS != IslandSymbols.end() && FunctionOffset == IS->first) { DEBUG(dbgs() << "BOLT-DEBUG: emitted label " << IS->second->getName() << " at offset 0x" << Twine::utohexstr(IS->first) << '\n'); - Streamer.EmitLabel(IS->second); + if (!EmitColdPart) + Streamer.EmitLabel(IS->second); + else + Streamer.EmitLabel(ColdIslandSymbols[IS->second]); ++IS; } if (RI != MoveRelocations.end() && FunctionOffset == RI->first) { @@ -2218,6 +2231,33 @@ void BinaryFunction::emitConstantIslands(MCStreamer &Streamer) { assert(IS == IslandSymbols.end() && "some symbols were not emitted!"); } +void BinaryFunction::duplicateConstantIslands() { + for (auto BB : layout()) { + if (!BB->isCold()) + continue; + + for (auto &Inst : *BB) { + int OpNum = 0; + for (auto &Operand : Inst) { + if (!Operand.isExpr()) { + ++OpNum; + continue; + } + const auto *Symbol = BC.MIA->getTargetSymbol(Inst, OpNum); + auto ISym = ColdIslandSymbols.find(Symbol); + if (ISym == ColdIslandSymbols.end()) + continue; + Operand = MCOperand::createExpr(BC.MIA->getTargetExprFor( + Inst, + MCSymbolRefExpr::create(ISym->second, MCSymbolRefExpr::VK_None, + *BC.Ctx), + *BC.Ctx, 0)); + ++OpNum; + } + } + } +} + namespace { #ifndef MAX_PATH @@ -2480,7 +2520,8 @@ void BinaryFunction::fixBranches() { assert(CondBranch && "conditional branch expected"); const auto *TSuccessor = BB->getConditionalSuccessor(true); const auto *FSuccessor = BB->getConditionalSuccessor(false); - if (NextBB && NextBB == TSuccessor) { + if (NextBB && NextBB == TSuccessor && + !BC.MIA->hasAnnotation(*CondBranch, "DoNotChangeTarget")) { std::swap(TSuccessor, FSuccessor); MIA->reverseBranchCondition(*CondBranch, TSuccessor->getLabel(), Ctx); BB->swapConditionalSuccessors(); @@ -2490,7 +2531,10 @@ void BinaryFunction::fixBranches() { if (TSuccessor == FSuccessor) { BB->removeDuplicateConditionalSuccessor(CondBranch); } - if (!NextBB || (NextBB != TSuccessor && NextBB != FSuccessor)) { + if (!NextBB || + ((NextBB != TSuccessor || + BC.MIA->hasAnnotation(*CondBranch, "DoNotChangeTarget")) && + NextBB != FSuccessor)) { BB->addBranchInstruction(FSuccessor); } } diff --git a/bolt/BinaryFunction.h b/bolt/BinaryFunction.h index 472890a6e327..7d755eff2b07 100644 --- a/bolt/BinaryFunction.h +++ b/bolt/BinaryFunction.h @@ -431,6 +431,7 @@ private: /// The address offset where we emitted the constant island, that is, the /// chunk of data in the function code area (AArch only) int64_t OutputDataOffset; + int64_t OutputColdDataOffset; /// Map labels to corresponding basic blocks. std::unordered_map LabelToBB; @@ -639,6 +640,7 @@ private: /// Offsets in function that are data values in a constant island identified /// after disassembling std::map IslandSymbols; + std::map ColdIslandSymbols; // Blocks are kept sorted in the layout order. If we need to change the // layout (if BasicBlocksLayout stores a different order than BasicBlocks), @@ -677,6 +679,7 @@ private: mutable MCSymbol *FunctionColdEndLabel{nullptr}; mutable MCSymbol *FunctionConstantIslandLabel{nullptr}; + mutable MCSymbol *FunctionColdConstantIslandLabel{nullptr}; /// Unique number associated with the function. uint64_t FunctionNumber; @@ -1137,6 +1140,14 @@ public: return FunctionConstantIslandLabel; } + MCSymbol *getFunctionColdConstantIslandLabel() const { + if (!FunctionColdConstantIslandLabel) { + FunctionColdConstantIslandLabel = + BC.Ctx->createTempSymbol("func_cold_const_island", true); + } + return FunctionColdConstantIslandLabel; + } + /// Return true if this is a function representing a PLT entry. bool isPLTFunction() const { return PLTSymbol != nullptr; @@ -1168,13 +1179,16 @@ public: case ELF::R_X86_64_64: case ELF::R_AARCH64_ABS64: case ELF::R_AARCH64_LDST64_ABS_LO12_NC: + case ELF::R_AARCH64_TLSDESC_LD64_LO12_NC: case ELF::R_AARCH64_LD64_GOT_LO12_NC: + case ELF::R_AARCH64_TLSDESC_ADD_LO12_NC: case ELF::R_AARCH64_ADD_ABS_LO12_NC: case ELF::R_AARCH64_LDST16_ABS_LO12_NC: case ELF::R_AARCH64_LDST32_ABS_LO12_NC: case ELF::R_AARCH64_LDST8_ABS_LO12_NC: case ELF::R_AARCH64_LDST128_ABS_LO12_NC: case ELF::R_AARCH64_ADR_GOT_PAGE: + case ELF::R_AARCH64_TLSDESC_ADR_PAGE21: case ELF::R_AARCH64_ADR_PREL_PG_HI21: Relocations.emplace(Offset, Relocation{Offset, Symbol, RelType, Addend, Value}); @@ -1186,6 +1200,7 @@ public: case ELF::R_X86_64_REX_GOTPCRELX: case ELF::R_AARCH64_JUMP26: case ELF::R_AARCH64_CALL26: + case ELF::R_AARCH64_TLSDESC_CALL: break; // The following relocations are ignored. @@ -1714,6 +1729,14 @@ public: return OutputDataOffset; } + void setOutputColdDataAddress(uint64_t Address) { + OutputColdDataOffset = Address; + } + + uint64_t getOutputColdDataAddress() const { + return OutputColdDataOffset; + } + /// Detects whether \p Address is inside a data region in this function /// (constant islands). bool isInConstantIsland(uint64_t Address) const { @@ -1969,7 +1992,12 @@ public: void emitBodyRaw(MCStreamer *Streamer); /// Helper for emitBody to write data inside a function (used for AArch64) - void emitConstantIslands(MCStreamer &Streamer); + void emitConstantIslands(MCStreamer &Streamer, bool EmitColdPart); + + /// Traverse cold basic blocks and replace references to constants in islands + /// with a proxy symbol for the duplicated constant island that is going to be + /// emitted in the cold region. + void duplicateConstantIslands(); /// Merge profile data of this function into those of the given /// function. The functions should have been proven identical with diff --git a/bolt/BinaryPassManager.cpp b/bolt/BinaryPassManager.cpp index 687c10497765..48d3d63dfc7c 100644 --- a/bolt/BinaryPassManager.cpp +++ b/bolt/BinaryPassManager.cpp @@ -447,6 +447,8 @@ void BinaryFunctionPassManager::runAllPasses( Manager.registerPass( llvm::make_unique(PrintAfterLowering)); + Manager.registerPass(llvm::make_unique(NeverPrint)); + Manager.runPasses(); } diff --git a/bolt/Passes/BinaryPasses.cpp b/bolt/Passes/BinaryPasses.cpp index b90cc0f10c52..03727de0f6d2 100644 --- a/bolt/Passes/BinaryPasses.cpp +++ b/bolt/Passes/BinaryPasses.cpp @@ -591,13 +591,6 @@ void FinalizeFunctions::runOnFunctions( auto &Function = It.second; const auto ShouldOptimize = shouldOptimize(Function); - // Strip all annotations. - for (auto &BB : Function) { - for (auto &Inst : BB) { - BC.MIA->removeAllAnnotations(Inst); - } - } - // Always fix functions in relocation mode. if (!BC.HasRelocations && !ShouldOptimize) continue; @@ -620,6 +613,22 @@ void FinalizeFunctions::runOnFunctions( } } +void StripAnnotations::runOnFunctions( + BinaryContext &BC, + std::map &BFs, + std::set & +) { + for (auto &It : BFs) { + auto &Function = It.second; + + for (auto &BB : Function) { + for (auto &Inst : BB) { + BC.MIA->removeAllAnnotations(Inst); + } + } + } +} + namespace { // This peephole fixes jump instructions that jump to another basic diff --git a/bolt/Passes/BinaryPasses.h b/bolt/Passes/BinaryPasses.h index 0ef8e9027d55..955e9cfe3f50 100644 --- a/bolt/Passes/BinaryPasses.h +++ b/bolt/Passes/BinaryPasses.h @@ -227,6 +227,20 @@ class FinalizeFunctions : public BinaryFunctionPass { std::set &LargeFunctions) override; }; +/// Strip all BOLT-related annotations before LLVM code emission +class StripAnnotations : public BinaryFunctionPass { + public: + explicit StripAnnotations(const cl::opt &PrintPass) + : BinaryFunctionPass(PrintPass) { } + + const char *getName() const override { + return "strip-annotations"; + } + void runOnFunctions(BinaryContext &BC, + std::map &BFs, + std::set &LargeFunctions) override; +}; + /// An optimization to simplify conditional tail calls by removing /// unnecessary branches. /// diff --git a/bolt/Passes/LongJmp.cpp b/bolt/Passes/LongJmp.cpp index a3d004649899..d6a60bbced17 100644 --- a/bolt/Passes/LongJmp.cpp +++ b/bolt/Passes/LongJmp.cpp @@ -82,9 +82,20 @@ LongJmpPass::replaceTargetWithStub(const BinaryContext &BC, BinaryBasicBlock::BinaryBranchInfo BI{0, 0}; auto *TgtBB = BB.getSuccessor(TgtSym, BI); - // Do not issue a long jmp for blocks in the same region - if (TgtBB && TgtBB->isCold() == BB.isCold()) - return nullptr; + // Do not issue a long jmp for blocks in the same region, except if + // the region is too large to fit in this branch + if (TgtBB && TgtBB->isCold() == BB.isCold()) { + // Suppose we have half the available space to account for increase in the + // function size due to extra blocks being inserted (conservative estimate) + auto BitsAvail = BC.MIA->getPCRelEncodingSize(Inst) - 2; + uint64_t Mask = ~((1ULL << BitsAvail) - 1); + if (!(Func.getMaxSize() & Mask)) + return nullptr; + // This is a special case for fixBranches, which is usually free to swap + // targets when a block has two successors. The other successor may not + // fit in this instruction as well. + BC.MIA->addAnnotation(BC.Ctx.get(), Inst, "DoNotChangeTarget", true); + } BinaryBasicBlock *StubBB = BB.isCold() ? ColdStubs[&Func][TgtSym] : HotStubs[&Func][TgtSym]; @@ -155,8 +166,12 @@ void LongJmpPass::insertStubs(const BinaryContext &BC, BinaryFunction &Func) { // Insert stubs close to the patched BB if call, but far away from the // hot path if a branch, since this branch target is the cold region BinaryBasicBlock *InsertionPoint = &BB; - if (!BC.MIA->isCall(Inst) && Frontier && !BB.isCold()) - InsertionPoint = Frontier; + if (!BC.MIA->isCall(Inst) && Frontier && !BB.isCold()) { + auto BitsAvail = BC.MIA->getPCRelEncodingSize(Inst) - 2; + uint64_t Mask = ~((1ULL << BitsAvail) - 1); + if (!(Func.getMaxSize() & Mask)) + InsertionPoint = Frontier; + } // Create a stub to handle a far-away target Insertions.emplace_back(std::make_pair( InsertionPoint, replaceTargetWithStub(BC, Func, BB, Inst))); @@ -190,12 +205,49 @@ void LongJmpPass::tentativeBBLayout(const BinaryContext &BC, } } +uint64_t LongJmpPass::tentativeLayoutRelocColdPart( + const BinaryContext &BC, std::vector &SortedFunctions, + uint64_t DotAddress) { + for (auto Func : SortedFunctions) { + if (!Func->isSplit()) + continue; + DotAddress = RoundUpToAlignment(DotAddress, BinaryFunction::MinAlign); + auto Pad = OffsetToAlignment(DotAddress, opts::AlignFunctions); + if (Pad <= opts::AlignFunctionsMaxBytes) + DotAddress += Pad; + ColdAddresses[Func] = DotAddress; + DEBUG(dbgs() << Func->getPrintName() << " cold tentative: " + << Twine::utohexstr(DotAddress) << "\n"); + DotAddress += Func->estimateColdSize(); + DotAddress += Func->estimateConstantIslandSize(); + } + return DotAddress; +} + uint64_t LongJmpPass::tentativeLayoutRelocMode( const BinaryContext &BC, std::vector &SortedFunctions, uint64_t DotAddress) { + // Compute hot cold frontier + uint32_t LastHotIndex = -1u; + uint32_t CurrentIndex = 0; + for (auto *BF : SortedFunctions) { + if (!BF->hasValidIndex() && LastHotIndex == -1u) { + LastHotIndex = CurrentIndex; + } + ++CurrentIndex; + } + // Hot + CurrentIndex = 0; + bool ColdLayoutDone = false; for (auto Func : SortedFunctions) { + if (!ColdLayoutDone && CurrentIndex >= LastHotIndex){ + DotAddress = + tentativeLayoutRelocColdPart(BC, SortedFunctions, DotAddress); + ColdLayoutDone = true; + } + DotAddress = RoundUpToAlignment(DotAddress, BinaryFunction::MinAlign); auto Pad = OffsetToAlignment(DotAddress, opts::AlignFunctions); if (Pad <= opts::AlignFunctionsMaxBytes) @@ -203,30 +255,17 @@ uint64_t LongJmpPass::tentativeLayoutRelocMode( HotAddresses[Func] = DotAddress; DEBUG(dbgs() << Func->getPrintName() << " tentative: " << Twine::utohexstr(DotAddress) << "\n"); - if (!Func->isSimple()) { - DotAddress += Func->getMaxSize(); - } else { - if (!Func->isSplit()) { - DotAddress += Func->estimateSize(); - } else { - DotAddress += Func->estimateHotSize(); - DotAddress += Func->estimateConstantIslandSize(); - } - } + if (!Func->isSplit()) + DotAddress += Func->estimateSize(); + else + DotAddress += Func->estimateHotSize(); + DotAddress += Func->estimateConstantIslandSize(); + ++CurrentIndex; } - // Cold - for (auto Func : SortedFunctions) { - DotAddress = RoundUpToAlignment(DotAddress, BinaryFunction::MinAlign); - auto Pad = OffsetToAlignment(DotAddress, opts::AlignFunctions); - if (Pad <= opts::AlignFunctionsMaxBytes) - DotAddress += Pad; - HotAddresses[Func] = Func->getAddress(); - DotAddress = RoundUpToAlignment(DotAddress, ColdFragAlign); - ColdAddresses[Func] = DotAddress; - if (Func->isSplit()) - DotAddress += Func->estimateColdSize(); + // BBs + for (auto Func : SortedFunctions) tentativeBBLayout(BC, *Func); - } + return DotAddress; } @@ -337,6 +376,30 @@ bool LongJmpPass::removeOrShrinkStubs(const BinaryContext &BC, continue; } + // Compute DoNotChangeTarget annotation, when fixBranches cannot swap + // targets + if (BC.MIA->isConditionalBranch(Inst) && BB.succ_size() == 2) { + auto *SuccBB = BB.getConditionalSuccessor(false); + bool IsStub = false; + auto Iter = Stubs.find(&Func); + if (Iter != Stubs.end()) + IsStub = Iter->second.count(SuccBB); + auto *RealTargetSym = + IsStub ? BC.MIA->getTargetSymbol(*SuccBB->begin()) : nullptr; + if (IsStub) + SuccBB = Func.getBasicBlockForLabel(RealTargetSym); + uint64_t Offset = getSymbolAddress(BC, RealTargetSym, SuccBB); + auto BitsAvail = BC.MIA->getPCRelEncodingSize(Inst) - 1; + uint64_t Mask = ~((1ULL << BitsAvail) - 1); + if ((Offset & Mask) && + !BC.MIA->hasAnnotation(Inst, "DoNotChangeTarget")) { + BC.MIA->addAnnotation(BC.Ctx.get(), Inst, "DoNotChangeTarget", true); + } else if ((!(Offset & Mask)) && + BC.MIA->hasAnnotation(Inst, "DoNotChangeTarget")) { + BC.MIA->removeAnnotation(Inst, "DoNotChangeTarget"); + } + } + auto StubSym = BC.MIA->getTargetSymbol(Inst); auto *StubBB = Func.getBasicBlockForLabel(StubSym); auto *RealTargetSym = BC.MIA->getTargetSymbol(*StubBB->begin()); diff --git a/bolt/Passes/LongJmp.h b/bolt/Passes/LongJmp.h index e54cc1ccb2b7..e771b6767076 100644 --- a/bolt/Passes/LongJmp.h +++ b/bolt/Passes/LongJmp.h @@ -83,6 +83,10 @@ class LongJmpPass : public BinaryFunctionPass { tentativeLayoutRelocMode(const BinaryContext &BC, std::vector &SortedFunctions, uint64_t DotAddress); + uint64_t + tentativeLayoutRelocColdPart(const BinaryContext &BC, + std::vector &SortedFunctions, + uint64_t DotAddress); void tentativeBBLayout(const BinaryContext &BC, const BinaryFunction &Func); /// Helper to identify whether \p Inst is branching to a stub @@ -92,7 +96,8 @@ class LongJmpPass : public BinaryFunctionPass { /// Helper to resolve a symbol address according to our tentative layout uint64_t getSymbolAddress(const BinaryContext &BC, const MCSymbol *Target, const BinaryBasicBlock *TgtBB) const; - /// Change \p Inst to not use a stub anymore, back to its original form + + /// Change \p Inst to do not use a stub anymore, back to its original form void removeStubRef(const BinaryContext &BC, BinaryBasicBlock *BB, MCInst &Inst, BinaryBasicBlock *StubBB, diff --git a/bolt/RewriteInstance.cpp b/bolt/RewriteInstance.cpp index 22ffe9f273c0..518b9e9df5c7 100644 --- a/bolt/RewriteInstance.cpp +++ b/bolt/RewriteInstance.cpp @@ -1846,7 +1846,10 @@ void RewriteInstance::readRelocations(const SectionRef &Section) { uint64_t RefFunctionOffset = 0; MCSymbol *ReferencedSymbol = nullptr; if (ForceRelocation) { - ReferencedSymbol = BC->registerNameAtAddress(SymbolName, 0); + if (Relocation::isGOT(Rel.getType())) + ReferencedSymbol = BC->getOrCreateGlobalSymbol(0, "Zero"); + else + ReferencedSymbol = BC->registerNameAtAddress(SymbolName, 0); Addend = Address; DEBUG(dbgs() << "BOLT-DEBUG: creating relocations for huge pages against" " symbol " << SymbolName << " with addend " << Addend @@ -2633,6 +2636,11 @@ void RewriteInstance::updateOutputValues(const MCAsmLayout &Layout) { const auto ColdEndOffset = Layout.getSymbolOffset(*ColdEndSymbol); Function.cold().setAddress(BaseAddress + ColdStartOffset); Function.cold().setImageSize(ColdEndOffset - ColdStartOffset); + if (Function.hasConstantIsland()) { + const auto DataOffset = Layout.getSymbolOffset( + *Function.getFunctionColdConstantIslandLabel()); + Function.setOutputColdDataAddress(BaseAddress + DataOffset); + } } } else { Function.setOutputAddress(Function.getAddress()); @@ -3379,6 +3387,25 @@ void RewriteInstance::patchELFSymTabs(ELFObjectFile *File) { Write(0, reinterpret_cast(&CodeMarkSym), sizeof(CodeMarkSym)); } + if (!PatchExisting && Function->hasConstantIsland() && + Function->isSplit()) { + auto DataMark = Function->getOutputColdDataAddress(); + auto CISize = Function->estimateConstantIslandSize(); + auto CodeMark = DataMark + CISize; + auto DataMarkSym = NewSymbol; + DataMarkSym.st_name = AddToStrTab("$d"); + DataMarkSym.st_value = DataMark; + DataMarkSym.st_size = 0; + DataMarkSym.setType(ELF::STT_NOTYPE); + DataMarkSym.setBinding(ELF::STB_LOCAL); + auto CodeMarkSym = DataMarkSym; + CodeMarkSym.st_name = AddToStrTab("$x"); + CodeMarkSym.st_value = CodeMark; + Write(0, reinterpret_cast(&DataMarkSym), + sizeof(DataMarkSym)); + Write(0, reinterpret_cast(&CodeMarkSym), + sizeof(CodeMarkSym)); + } } else { if (NewSymbol.st_shndx < ELF::SHN_LORESERVE) { NewSymbol.st_shndx = NewSectionIndex[NewSymbol.st_shndx];