Files
llvm/bolt/lib/Core/BinaryEmitter.cpp

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

1218 lines
46 KiB
C++
Raw Normal View History

//===- bolt/Core/BinaryEmitter.cpp - Emit code and data -------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements the collection of functions and classes used for
// emission of code and data into object/binary file.
//
//===----------------------------------------------------------------------===//
#include "bolt/Core/BinaryEmitter.h"
#include "bolt/Core/BinaryContext.h"
#include "bolt/Core/BinaryFunction.h"
#include "bolt/Core/DebugData.h"
#include "bolt/Core/FunctionLayout.h"
#include "bolt/Utils/CommandLineOpts.h"
#include "bolt/Utils/Utils.h"
#include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/LEB128.h"
#include "llvm/Support/SMLoc.h"
#define DEBUG_TYPE "bolt"
using namespace llvm;
using namespace bolt;
namespace opts {
extern cl::opt<JumpTableSupportLevel> JumpTables;
extern cl::opt<bool> PreserveBlocksAlignment;
cl::opt<bool> AlignBlocks("align-blocks", cl::desc("align basic blocks"),
cl::cat(BoltOptCategory));
cl::opt<MacroFusionType>
AlignMacroOpFusion("align-macro-fusion",
cl::desc("fix instruction alignment for macro-fusion (x86 relocation mode)"),
cl::init(MFT_HOT),
cl::values(clEnumValN(MFT_NONE, "none",
"do not insert alignment no-ops for macro-fusion"),
clEnumValN(MFT_HOT, "hot",
"only insert alignment no-ops on hot execution paths (default)"),
clEnumValN(MFT_ALL, "all",
"always align instructions to allow macro-fusion")),
cl::ZeroOrMore,
cl::cat(BoltRelocCategory));
static cl::list<std::string>
BreakFunctionNames("break-funcs",
cl::CommaSeparated,
cl::desc("list of functions to core dump on (debugging)"),
cl::value_desc("func1,func2,func3,..."),
cl::Hidden,
cl::cat(BoltCategory));
static cl::list<std::string>
FunctionPadSpec("pad-funcs",
cl::CommaSeparated,
cl::desc("list of functions to pad with amount of bytes"),
cl::value_desc("func1:pad1,func2:pad2,func3:pad3,..."),
cl::Hidden,
cl::cat(BoltCategory));
static cl::opt<bool> MarkFuncs(
"mark-funcs",
cl::desc("mark function boundaries with break instruction to make "
"sure we accidentally don't cross them"),
cl::ReallyHidden, cl::cat(BoltCategory));
static cl::opt<bool> PrintJumpTables("print-jump-tables",
cl::desc("print jump tables"), cl::Hidden,
cl::cat(BoltCategory));
static cl::opt<bool>
X86AlignBranchBoundaryHotOnly("x86-align-branch-boundary-hot-only",
cl::desc("only apply branch boundary alignment in hot code"),
cl::init(true),
cl::cat(BoltOptCategory));
size_t padFunction(const BinaryFunction &Function) {
static std::map<std::string, size_t> FunctionPadding;
if (FunctionPadding.empty() && !FunctionPadSpec.empty()) {
for (std::string &Spec : FunctionPadSpec) {
size_t N = Spec.find(':');
if (N == std::string::npos)
continue;
std::string Name = Spec.substr(0, N);
size_t Padding = std::stoull(Spec.substr(N + 1));
FunctionPadding[Name] = Padding;
}
}
for (auto &FPI : FunctionPadding) {
std::string Name = FPI.first;
size_t Padding = FPI.second;
if (Function.hasNameRegex(Name))
return Padding;
}
return 0;
}
} // namespace opts
namespace {
using JumpTable = bolt::JumpTable;
class BinaryEmitter {
private:
BinaryEmitter(const BinaryEmitter &) = delete;
BinaryEmitter &operator=(const BinaryEmitter &) = delete;
MCStreamer &Streamer;
BinaryContext &BC;
public:
BinaryEmitter(MCStreamer &Streamer, BinaryContext &BC)
: Streamer(Streamer), BC(BC) {}
/// Emit all code and data.
void emitAll(StringRef OrgSecPrefix);
/// Emit function code. The caller is responsible for emitting function
/// symbol(s) and setting the section to emit the code to.
void emitFunctionBody(BinaryFunction &BF, FunctionFragment &FF,
bool EmitCodeOnly = false);
private:
/// Emit function code.
void emitFunctions();
/// Emit a single function.
bool emitFunction(BinaryFunction &BF, FunctionFragment &FF);
/// Helper for emitFunctionBody to write data inside a function
/// (used for AArch64)
void emitConstantIslands(BinaryFunction &BF, bool EmitColdPart,
BinaryFunction *OnBehalfOf = nullptr);
/// Emit jump tables for the function.
void emitJumpTables(const BinaryFunction &BF);
/// Emit jump table data. Callee supplies sections for the data.
void emitJumpTable(const JumpTable &JT, MCSection *HotSection,
MCSection *ColdSection);
void emitCFIInstruction(const MCCFIInstruction &Inst) const;
/// Emit exception handling ranges for the function.
void emitLSDA(BinaryFunction &BF, const FunctionFragment &FF);
/// Emit line number information corresponding to \p NewLoc. \p PrevLoc
/// provides a context for de-duplication of line number info.
/// \p FirstInstr indicates if \p NewLoc represents the first instruction
/// in a sequence, such as a function fragment.
///
/// If \p NewLoc location matches \p PrevLoc, no new line number entry will be
/// created and the function will return \p PrevLoc while \p InstrLabel will
/// be ignored. Otherwise, the caller should use \p InstrLabel to mark the
/// corresponding instruction by emitting \p InstrLabel before it.
/// If \p InstrLabel is set by the caller, its value will be used with \p
/// \p NewLoc. If it was nullptr on entry, it will be populated with a pointer
/// to a new temp symbol used with \p NewLoc.
///
/// Return new current location which is either \p NewLoc or \p PrevLoc.
SMLoc emitLineInfo(const BinaryFunction &BF, SMLoc NewLoc, SMLoc PrevLoc,
bool FirstInstr, MCSymbol *&InstrLabel);
/// Use \p FunctionEndSymbol to mark the end of the line info sequence.
/// Note that it does not automatically result in the insertion of the EOS
/// marker in the line table program, but provides one to the DWARF generator
/// when it needs it.
void emitLineInfoEnd(const BinaryFunction &BF, MCSymbol *FunctionEndSymbol);
/// Emit debug line info for unprocessed functions from CUs that include
/// emitted functions.
void emitDebugLineInfoForOriginalFunctions();
/// Emit debug line for CUs that were not modified.
void emitDebugLineInfoForUnprocessedCUs();
/// Emit data sections that have code references in them.
void emitDataSections(StringRef OrgSecPrefix);
};
} // anonymous namespace
void BinaryEmitter::emitAll(StringRef OrgSecPrefix) {
Streamer.initSections(false, *BC.STI);
if (opts::UpdateDebugSections && BC.isELF()) {
// Force the emission of debug line info into allocatable section to ensure
[BOLT] Move from RuntimeDyld to JITLink RuntimeDyld has been deprecated in favor of JITLink. [1] This patch replaces all uses of RuntimeDyld in BOLT with JITLink. Care has been taken to minimize the impact on the code structure in order to ease the inspection of this (rather large) changeset. Since BOLT relied on the RuntimeDyld API in multiple places, this wasn't always possible though and I'll explain the changes in code structure first. Design note: BOLT uses a JIT linker to perform what essentially is static linking. No linked code is ever executed; the result of linking is simply written back to an executable file. For this reason, I restricted myself to the use of the core JITLink library and avoided ORC as much as possible. RuntimeDyld contains methods for loading objects (loadObject) and symbol lookup (getSymbol). Since JITLink doesn't provide a class with a similar interface, the BOLTLinker abstract class was added to implement it. It was added to Core since both the Rewrite and RuntimeLibs libraries make use of it. Wherever a RuntimeDyld object was used before, it was replaced with a BOLTLinker object. There is one major difference between the RuntimeDyld and BOLTLinker interfaces: in JITLink, section allocation and the application of fixups (relocation) happens in a single call (jitlink::link). That is, there is no separate method like finalizeWithMemoryManagerLocking in RuntimeDyld. BOLT used to remap sections between allocating (loadObject) and linking them (finalizeWithMemoryManagerLocking). This doesn't work anymore with JITLink. Instead, BOLTLinker::loadObject accepts a callback that is called before fixups are applied which is used to remap sections. The actual implementation of the BOLTLinker interface lives in the JITLinkLinker class in the Rewrite library. It's the only part of the BOLT code that should directly interact with the JITLink API. For loading object, JITLinkLinker first creates a LinkGraph (jitlink::createLinkGraphFromObject) and then links it (jitlink::link). For the latter, it uses a custom JITLinkContext with the following properties: - Use BOLT's ExecutableFileMemoryManager. This one was updated to implement the JITLinkMemoryManager interface. Since BOLT never executes code, its finalization step is a no-op. - Pass config: don't use the default target passes since they modify DWARF sections in a way that seems incompatible with BOLT. Also run a custom pre-prune pass that makes sure sections without symbols are not pruned by JITLink. - Implement symbol lookup. This used to be implemented by BOLTSymbolResolver. - Call the section mapper callback before the final linking step. - Copy symbol values when the LinkGraph is resolved. Symbols are stored inside JITLinkLinker to ensure that later objects (i.e., instrumentation libraries) can find them. This functionality used to be provided by RuntimeDyld but I did not find a way to use JITLink directly for this. Some more minor points of interest: - BinarySection::SectionID: JITLink doesn't have something equivalent to RuntimeDyld's Section IDs. Instead, sections can only be referred to by name. Hence, SectionID was updated to a string. - There seem to be no tests for Mach-O. I've tested a small hello-world style binary but not more than that. - On Mach-O, JITLink "normalizes" section names to include the segment name. I had to parse the section name back from this manually which feels slightly hacky. [1] https://reviews.llvm.org/D145686#4222642 Reviewed By: rafauler Differential Revision: https://reviews.llvm.org/D147544
2023-06-15 10:52:11 +02:00
// JITLink will process it.
//
// NB: on MachO all sections are required for execution, hence no need
// to change flags/attributes.
MCSectionELF *ELFDwarfLineSection =
static_cast<MCSectionELF *>(BC.MOFI->getDwarfLineSection());
ELFDwarfLineSection->setFlags(ELF::SHF_ALLOC);
[BOLT] Move from RuntimeDyld to JITLink RuntimeDyld has been deprecated in favor of JITLink. [1] This patch replaces all uses of RuntimeDyld in BOLT with JITLink. Care has been taken to minimize the impact on the code structure in order to ease the inspection of this (rather large) changeset. Since BOLT relied on the RuntimeDyld API in multiple places, this wasn't always possible though and I'll explain the changes in code structure first. Design note: BOLT uses a JIT linker to perform what essentially is static linking. No linked code is ever executed; the result of linking is simply written back to an executable file. For this reason, I restricted myself to the use of the core JITLink library and avoided ORC as much as possible. RuntimeDyld contains methods for loading objects (loadObject) and symbol lookup (getSymbol). Since JITLink doesn't provide a class with a similar interface, the BOLTLinker abstract class was added to implement it. It was added to Core since both the Rewrite and RuntimeLibs libraries make use of it. Wherever a RuntimeDyld object was used before, it was replaced with a BOLTLinker object. There is one major difference between the RuntimeDyld and BOLTLinker interfaces: in JITLink, section allocation and the application of fixups (relocation) happens in a single call (jitlink::link). That is, there is no separate method like finalizeWithMemoryManagerLocking in RuntimeDyld. BOLT used to remap sections between allocating (loadObject) and linking them (finalizeWithMemoryManagerLocking). This doesn't work anymore with JITLink. Instead, BOLTLinker::loadObject accepts a callback that is called before fixups are applied which is used to remap sections. The actual implementation of the BOLTLinker interface lives in the JITLinkLinker class in the Rewrite library. It's the only part of the BOLT code that should directly interact with the JITLink API. For loading object, JITLinkLinker first creates a LinkGraph (jitlink::createLinkGraphFromObject) and then links it (jitlink::link). For the latter, it uses a custom JITLinkContext with the following properties: - Use BOLT's ExecutableFileMemoryManager. This one was updated to implement the JITLinkMemoryManager interface. Since BOLT never executes code, its finalization step is a no-op. - Pass config: don't use the default target passes since they modify DWARF sections in a way that seems incompatible with BOLT. Also run a custom pre-prune pass that makes sure sections without symbols are not pruned by JITLink. - Implement symbol lookup. This used to be implemented by BOLTSymbolResolver. - Call the section mapper callback before the final linking step. - Copy symbol values when the LinkGraph is resolved. Symbols are stored inside JITLinkLinker to ensure that later objects (i.e., instrumentation libraries) can find them. This functionality used to be provided by RuntimeDyld but I did not find a way to use JITLink directly for this. Some more minor points of interest: - BinarySection::SectionID: JITLink doesn't have something equivalent to RuntimeDyld's Section IDs. Instead, sections can only be referred to by name. Hence, SectionID was updated to a string. - There seem to be no tests for Mach-O. I've tested a small hello-world style binary but not more than that. - On Mach-O, JITLink "normalizes" section names to include the segment name. I had to parse the section name back from this manually which feels slightly hacky. [1] https://reviews.llvm.org/D145686#4222642 Reviewed By: rafauler Differential Revision: https://reviews.llvm.org/D147544
2023-06-15 10:52:11 +02:00
MCSectionELF *ELFDwarfLineStrSection =
static_cast<MCSectionELF *>(BC.MOFI->getDwarfLineStrSection());
ELFDwarfLineStrSection->setFlags(ELF::SHF_ALLOC);
}
if (RuntimeLibrary *RtLibrary = BC.getRuntimeLibrary())
Refactor runtime library Summary: As we are adding more types of runtime libraries, it would be better to move the runtime library out of RewriteInstance so that it could grow separately. This also requires splitting the current implementation of Instrumentation.cpp to two separate pieces, one as normal Pass, one as the runtime library. The Instrumentation Pass would pass over the generated data to the runtime library, which will use to emit binary and perform linking. This patch does the following: 1. Turn Instrumentation class into an optimization pass. Register the pass in the pass manager instead of in RewriteInstance. 2. Split all the data that are generated by Instrumentation that's needed by runtime library into a separate data structure called InstrumentationSummary. At the creation of Instrumentation pass, we create an instance of such data structure, which will be moved over to the runtime at the end of the pass. 3. Added a runtime library member to BinaryContext. Set the member at the end of Instrumentation pass. 4. In BinaryEmitter, make BinaryContext to also emit runtime library binary. 5. Created a base class RuntimeLibrary, that defines the interface of a runtime library, along with a few common helper functions. 6. Created InstrumentationRuntimeLibrary which inherits from RuntimeLibrary, that does all the work (mostly copied over) for emit and linking. 7. Added a new directory called RuntimeLibs, and put all the runtime library related files into it. (cherry picked from FBD21694762)
2020-05-21 14:28:47 -07:00
RtLibrary->emitBinary(BC, Streamer);
BC.getTextSection()->setAlignment(Align(opts::AlignText));
emitFunctions();
if (opts::UpdateDebugSections) {
emitDebugLineInfoForOriginalFunctions();
DwarfLineTable::emit(BC, Streamer);
}
emitDataSections(OrgSecPrefix);
// TODO Enable for Mach-O once BinaryContext::getDataSection supports it.
if (BC.isELF())
AddressMap::emit(Streamer, BC);
}
void BinaryEmitter::emitFunctions() {
auto emit = [&](const std::vector<BinaryFunction *> &Functions) {
const bool HasProfile = BC.NumProfiledFuncs > 0;
const bool OriginalAllowAutoPadding = Streamer.getAllowAutoPadding();
for (BinaryFunction *Function : Functions) {
if (!BC.shouldEmit(*Function))
continue;
LLVM_DEBUG(dbgs() << "BOLT: generating code for function \"" << *Function
<< "\" : " << Function->getFunctionNumber() << '\n');
// Was any part of the function emitted.
bool Emitted = false;
// Turn off Intel JCC Erratum mitigation for cold code if requested
if (HasProfile && opts::X86AlignBranchBoundaryHotOnly &&
!Function->hasValidProfile())
Streamer.setAllowAutoPadding(false);
FunctionLayout &Layout = Function->getLayout();
Emitted |= emitFunction(*Function, Layout.getMainFragment());
if (Function->isSplit()) {
if (opts::X86AlignBranchBoundaryHotOnly)
Streamer.setAllowAutoPadding(false);
assert((Layout.fragment_size() == 1 || Function->isSimple()) &&
"Only simple functions can have fragments");
for (FunctionFragment &FF : Layout.getSplitFragments()) {
// Skip empty fragments so no symbols and sections for empty fragments
// are generated
if (FF.empty() && !Function->hasConstantIsland())
continue;
Emitted |= emitFunction(*Function, FF);
}
}
Streamer.setAllowAutoPadding(OriginalAllowAutoPadding);
if (Emitted)
Function->setEmitted(/*KeepCFG=*/opts::PrintCacheMetrics);
}
};
// Mark the start of hot text.
if (opts::HotText) {
Streamer.switchSection(BC.getTextSection());
Streamer.emitLabel(BC.getHotTextStartSymbol());
}
// Emit functions in sorted order.
std::vector<BinaryFunction *> SortedFunctions = BC.getSortedFunctions();
emit(SortedFunctions);
// Emit functions added by BOLT.
emit(BC.getInjectedBinaryFunctions());
// Mark the end of hot text.
if (opts::HotText) {
if (BC.HasWarmSection)
Streamer.switchSection(BC.getCodeSection(BC.getWarmCodeSectionName()));
else
Streamer.switchSection(BC.getTextSection());
Streamer.emitLabel(BC.getHotTextEndSymbol());
}
}
bool BinaryEmitter::emitFunction(BinaryFunction &Function,
FunctionFragment &FF) {
if (Function.size() == 0 && !Function.hasIslandsInfo())
return false;
if (Function.getState() == BinaryFunction::State::Empty)
return false;
// Avoid emitting function without instructions when overwriting the original
// function in-place. Otherwise, emit the empty function to define the symbol.
if (!BC.HasRelocations && !Function.hasNonPseudoInstructions())
return false;
MCSection *Section =
BC.getCodeSection(Function.getCodeSectionName(FF.getFragmentNum()));
Streamer.switchSection(Section);
Section->setHasInstructions(true);
BC.Ctx->addGenDwarfSection(Section);
if (BC.HasRelocations) {
// Set section alignment to at least maximum possible object alignment.
// We need this to support LongJmp and other passes that calculates
// tentative layout.
Section->ensureMinAlignment(Align(opts::AlignFunctions));
Streamer.emitCodeAlignment(Function.getMinAlign(), &*BC.STI);
uint16_t MaxAlignBytes = FF.isSplitFragment()
? Function.getMaxColdAlignmentBytes()
: Function.getMaxAlignmentBytes();
if (MaxAlignBytes > 0)
Streamer.emitCodeAlignment(Function.getAlign(), &*BC.STI, MaxAlignBytes);
} else {
Streamer.emitCodeAlignment(Function.getAlign(), &*BC.STI);
}
MCContext &Context = Streamer.getContext();
const MCAsmInfo *MAI = Context.getAsmInfo();
MCSymbol *const StartSymbol = Function.getSymbol(FF.getFragmentNum());
// Emit all symbols associated with the main function entry.
if (FF.isMainFragment()) {
for (MCSymbol *Symbol : Function.getSymbols()) {
Streamer.emitSymbolAttribute(Symbol, MCSA_ELF_TypeFunction);
Streamer.emitLabel(Symbol);
}
} else {
Streamer.emitSymbolAttribute(StartSymbol, MCSA_ELF_TypeFunction);
Streamer.emitLabel(StartSymbol);
}
// Emit CFI start
if (Function.hasCFI()) {
Streamer.emitCFIStartProc(/*IsSimple=*/false);
if (Function.getPersonalityFunction() != nullptr)
Streamer.emitCFIPersonality(Function.getPersonalityFunction(),
Function.getPersonalityEncoding());
MCSymbol *LSDASymbol = Function.getLSDASymbol(FF.getFragmentNum());
if (LSDASymbol)
Streamer.emitCFILsda(LSDASymbol, BC.LSDAEncoding);
else
Streamer.emitCFILsda(0, dwarf::DW_EH_PE_omit);
// Emit CFI instructions relative to the CIE
for (const MCCFIInstruction &CFIInstr : Function.cie()) {
// Only write CIE CFI insns that LLVM will not already emit
const std::vector<MCCFIInstruction> &FrameInstrs =
MAI->getInitialFrameState();
if (!llvm::is_contained(FrameInstrs, CFIInstr))
emitCFIInstruction(CFIInstr);
}
}
assert((Function.empty() || !(*Function.begin()).isCold()) &&
"first basic block should never be cold");
// Emit UD2 at the beginning if requested by user.
if (!opts::BreakFunctionNames.empty()) {
for (std::string &Name : opts::BreakFunctionNames) {
if (Function.hasNameRegex(Name)) {
Streamer.emitIntValue(0x0B0F, 2); // UD2: 0F 0B
break;
}
}
}
// Emit code.
emitFunctionBody(Function, FF, /*EmitCodeOnly=*/false);
// Emit padding if requested.
if (size_t Padding = opts::padFunction(Function)) {
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: padding function " << Function << " with "
<< Padding << " bytes\n");
Streamer.emitFill(Padding, MAI->getTextAlignFillValue());
}
if (opts::MarkFuncs)
Streamer.emitBytes(BC.MIB->getTrapFillValue());
// Emit CFI end
if (Function.hasCFI())
Streamer.emitCFIEndProc();
MCSymbol *EndSymbol = Function.getFunctionEndLabel(FF.getFragmentNum());
Streamer.emitLabel(EndSymbol);
if (MAI->hasDotTypeDotSizeDirective()) {
const MCExpr *SizeExpr = MCBinaryExpr::createSub(
MCSymbolRefExpr::create(EndSymbol, Context),
MCSymbolRefExpr::create(StartSymbol, Context), Context);
Streamer.emitELFSize(StartSymbol, SizeExpr);
}
if (opts::UpdateDebugSections && Function.getDWARFUnit())
emitLineInfoEnd(Function, EndSymbol);
// Exception handling info for the function.
emitLSDA(Function, FF);
if (FF.isMainFragment() && opts::JumpTables > JTS_NONE)
emitJumpTables(Function);
return true;
}
void BinaryEmitter::emitFunctionBody(BinaryFunction &BF, FunctionFragment &FF,
bool EmitCodeOnly) {
if (!EmitCodeOnly && FF.isSplitFragment() && BF.hasConstantIsland()) {
assert(BF.getLayout().isHotColdSplit() &&
"Constant island support only with hot/cold split");
BF.duplicateConstantIslands();
}
if (!FF.empty() && FF.front()->isLandingPad()) {
assert(!FF.front()->isEntryPoint() &&
"Landing pad cannot be entry point of function");
// If the first block of the fragment is a landing pad, it's offset from the
// start of the area that the corresponding LSDA describes is zero. In this
// case, the call site entries in that LSDA have 0 as offset to the landing
// pad, which the runtime interprets as "no handler". To prevent this,
// insert some padding.
Streamer.emitBytes(BC.MIB->getTrapFillValue());
}
// Track the first emitted instruction with debug info.
bool FirstInstr = true;
for (BinaryBasicBlock *const BB : FF) {
if ((opts::AlignBlocks || opts::PreserveBlocksAlignment) &&
BB->getAlignment() > 1)
Streamer.emitCodeAlignment(BB->getAlign(), &*BC.STI,
BB->getAlignmentMaxBytes());
Streamer.emitLabel(BB->getLabel());
if (!EmitCodeOnly) {
if (MCSymbol *EntrySymbol = BF.getSecondaryEntryPointSymbol(*BB))
Streamer.emitLabel(EntrySymbol);
[BOLT] Change symbol handling for secondary function entries Summary: Some functions could be called at an address inside their function body. Typically, these functions are written in assembly as C/C++ does not have a multi-entry function concept. The addresses inside a function body that could be referenced from outside are called secondary entry points. In BOLT we support processing functions with secondary/multiple entry points. We used to mark basic blocks representing those entry points with a special flag. There was only one problem - each basic block has exactly one MCSymbol associated with it, and for the most efficient processing we prefer that symbol to be local/temporary. However, in certain scenarios, e.g. when running in non-relocation mode, we need the entry symbol to be global/non-temporary. We could create global symbols for secondary points ahead of time when the entry point is marked in the symbol table. But not all such entries are properly marked. This means that potentially we could discover an entry point only after disassembling the code that references it, and it could happen after a local label was already created at the same location together with all its references. Replacing the local symbol and updating the references turned out to be an error-prone process. This diff takes a different approach. All basic blocks are created with permanently local symbols. Whenever there's a need to add a secondary entry point, we create an extra global symbol or use an existing one at that location. Containing BinaryFunction maps a local symbol of a basic block to the global symbol representing a secondary entry point. This way we can tell if the basic block is a secondary entry point, and we emit both symbols for all secondary entry points. Since secondary entry points are quite rare, the overhead of this approach is minimal. Note that the same location could be referenced via local symbol from inside a function and via global entry point symbol from outside. This is true for both primary and secondary entry points. (cherry picked from FBD21150193)
2020-04-19 22:29:54 -07:00
}
// Check if special alignment for macro-fusion is needed.
bool MayNeedMacroFusionAlignment =
(opts::AlignMacroOpFusion == MFT_ALL) ||
(opts::AlignMacroOpFusion == MFT_HOT && BB->getKnownExecutionCount());
BinaryBasicBlock::const_iterator MacroFusionPair;
if (MayNeedMacroFusionAlignment) {
MacroFusionPair = BB->getMacroOpFusionPair();
if (MacroFusionPair == BB->end())
MayNeedMacroFusionAlignment = false;
}
SMLoc LastLocSeen;
// Remember if the last instruction emitted was a prefix.
bool LastIsPrefix = false;
for (auto I = BB->begin(), E = BB->end(); I != E; ++I) {
MCInst &Instr = *I;
if (EmitCodeOnly && BC.MIB->isPseudo(Instr))
continue;
// Handle pseudo instructions.
if (BC.MIB->isCFI(Instr)) {
emitCFIInstruction(*BF.getCFIFor(Instr));
continue;
}
// Handle macro-fusion alignment. If we emitted a prefix as
// the last instruction, we should've already emitted the associated
// alignment hint, so don't emit it twice.
if (MayNeedMacroFusionAlignment && !LastIsPrefix &&
I == MacroFusionPair) {
// This assumes the second instruction in the macro-op pair will get
// assigned to its own MCRelaxableFragment. Since all JCC instructions
// are relaxable, we should be safe.
}
if (!EmitCodeOnly) {
// A symbol to be emitted before the instruction to mark its location.
MCSymbol *InstrLabel = BC.MIB->getInstLabel(Instr);
if (opts::UpdateDebugSections && BF.getDWARFUnit()) {
LastLocSeen = emitLineInfo(BF, Instr.getLoc(), LastLocSeen,
FirstInstr, InstrLabel);
FirstInstr = false;
}
// Prepare to tag this location with a label if we need to keep track of
// the location of calls/returns for BOLT address translation maps
if (BF.requiresAddressTranslation() && BC.MIB->getOffset(Instr)) {
const uint32_t Offset = *BC.MIB->getOffset(Instr);
if (!InstrLabel)
InstrLabel = BC.Ctx->createTempSymbol();
BB->getLocSyms().emplace_back(Offset, InstrLabel);
}
if (InstrLabel)
Streamer.emitLabel(InstrLabel);
}
[BOLT] Improve handling of relocations targeting specific instructions (#66395) On RISC-V, there are certain relocations that target a specific instruction instead of a more abstract location like a function or basic block. Take the following example that loads a value from symbol `foo`: ``` nop 1: auipc t0, %pcrel_hi(foo) ld t0, %pcrel_lo(1b)(t0) ``` This results in two relocation: - auipc: `R_RISCV_PCREL_HI20` referencing `foo`; - ld: `R_RISCV_PCREL_LO12_I` referencing to local label `1` which points to the auipc instruction. It is of utmost importance that the `R_RISCV_PCREL_LO12_I` keeps referring to the auipc instruction; if not, the program will fail to assemble. However, BOLT currently does not guarantee this. BOLT currently assumes that all local symbols are jump targets and always starts a new basic block at symbol locations. The example above results in a CFG the looks like this: ``` .BB0: nop .BB1: auipc t0, %pcrel_hi(foo) ld t0, %pcrel_lo(.BB1)(t0) ``` While this currently works (i.e., the `R_RISCV_PCREL_LO12_I` relocation points to the correct instruction), it has two downsides: - Too many basic blocks are created (the example above is logically only one yet two are created); - If instructions are inserted in `.BB1` (e.g., by instrumentation), things will break since the label will not point to the auipc anymore. This patch proposes to fix this issue by teaching BOLT to track labels that should always point to a specific instruction. This is implemented as follows: - Add a new annotation type (`kLabel`) that allows us to annotate instructions with an `MCSymbol *`; - Whenever we encounter a relocation type that is used to refer to a specific instruction (`Relocation::isInstructionReference`), we register it without a symbol; - During disassembly, whenever we encounter an instruction with such a relocation, create a symbol for its target and store it in an offset to symbol map (to ensure multiple relocations referencing the same instruction use the same label); - After disassembly, iterate this map to attach labels to instructions via the new annotation type; - During emission, emit these labels right before the instruction. I believe the use of annotations works quite well for this use case as it allows us to reliably track instruction labels. If we were to store them as offsets in basic blocks, it would be error prone to keep them updated whenever instructions are inserted or removed. I have chosen to add labels as first-class annotations (as opposed to a generic one) because the documentation of `MCAnnotation` suggests that generic annotations are to be used for optional metadata that can be discarded without affecting correctness. As this is not the case for labels, a first-class annotation seemed more appropriate.
2023-10-06 06:46:16 +00:00
// Emit sized NOPs via MCAsmBackend::writeNopData() interface on x86.
// This is a workaround for invalid NOPs handling by asm/disasm layer.
if (BC.MIB->isNoop(Instr) && BC.isX86()) {
if (std::optional<uint32_t> Size = BC.MIB->getSize(Instr)) {
SmallString<15> Code;
raw_svector_ostream VecOS(Code);
BC.MAB->writeNopData(VecOS, *Size, BC.STI.get());
Streamer.emitBytes(Code);
continue;
}
}
Streamer.emitInstruction(Instr, *BC.STI);
LastIsPrefix = BC.MIB->isPrefix(Instr);
}
}
if (!EmitCodeOnly)
emitConstantIslands(BF, FF.isSplitFragment());
}
void BinaryEmitter::emitConstantIslands(BinaryFunction &BF, bool EmitColdPart,
BinaryFunction *OnBehalfOf) {
if (!BF.hasIslandsInfo())
return;
BinaryFunction::IslandInfo &Islands = BF.getIslandInfo();
if (Islands.DataOffsets.empty() && Islands.Dependency.empty())
return;
// AArch64 requires CI to be aligned to 8 bytes due to access instructions
// restrictions. E.g. the ldr with imm, where imm must be aligned to 8 bytes.
const uint16_t Alignment = OnBehalfOf
? OnBehalfOf->getConstantIslandAlignment()
: BF.getConstantIslandAlignment();
Streamer.emitCodeAlignment(Align(Alignment), &*BC.STI);
if (!OnBehalfOf) {
if (!EmitColdPart)
Streamer.emitLabel(BF.getFunctionConstantIslandLabel());
else
Streamer.emitLabel(BF.getFunctionColdConstantIslandLabel());
}
assert((!OnBehalfOf || Islands.Proxies[OnBehalfOf].size() > 0) &&
"spurious OnBehalfOf constant island emission");
assert(!BF.isInjected() &&
"injected functions should not have constant islands");
// Raw contents of the function.
StringRef SectionContents = BF.getOriginSection()->getContents();
// Raw contents of the function.
StringRef FunctionContents = SectionContents.substr(
BF.getAddress() - BF.getOriginSection()->getAddress(), BF.getMaxSize());
if (opts::Verbosity && !OnBehalfOf)
[BOLT][NFC] Log through JournalingStreams (#81524) Make core BOLT functionality more friendly to being used as a library instead of in our standalone driver llvm-bolt. To accomplish this, we augment BinaryContext with journaling streams that are to be used by most BOLT code whenever something needs to be logged to the screen. Users of the library can decide if logs should be printed to a file, no file or to the screen, as before. To illustrate this, this patch adds a new option `--log-file` that allows the user to redirect BOLT logging to a file on disk or completely hide it by using `--log-file=/dev/null`. Future BOLT code should now use `BinaryContext::outs()` for printing important messages instead of `llvm::outs()`. A new test log.test enforces this by verifying that no strings are print to screen once the `--log-file` option is used. In previous patches we also added a new BOLTError class to report common and fatal errors, so code shouldn't call exit(1) now. To easily handle problems as before (by quitting with exit(1)), callers can now use `BinaryContext::logBOLTErrorsAndQuitOnFatal(Error)` whenever code needs to deal with BOLT errors. To test this, we have fatal.s that checks we are correctly quitting and printing a fatal error to the screen. Because this is a significant change by itself, not all code was yet ported. Code from Profiler libs (DataAggregator and friends) still print errors directly to screen. Co-authored-by: Rafael Auler <rafaelauler@fb.com> Test Plan: NFC
2024-02-12 14:53:53 -08:00
BC.outs() << "BOLT-INFO: emitting constant island for function " << BF
<< "\n";
// We split the island into smaller blocks and output labels between them.
auto IS = Islands.Offsets.begin();
for (auto DataIter = Islands.DataOffsets.begin();
DataIter != Islands.DataOffsets.end(); ++DataIter) {
uint64_t FunctionOffset = *DataIter;
uint64_t EndOffset = 0ULL;
// Determine size of this data chunk
auto NextData = std::next(DataIter);
auto CodeIter = Islands.CodeOffsets.lower_bound(*DataIter);
if (CodeIter == Islands.CodeOffsets.end() &&
NextData == Islands.DataOffsets.end())
EndOffset = BF.getMaxSize();
else if (CodeIter == Islands.CodeOffsets.end())
EndOffset = *NextData;
else if (NextData == Islands.DataOffsets.end())
EndOffset = *CodeIter;
else
EndOffset = (*CodeIter > *NextData) ? *NextData : *CodeIter;
if (FunctionOffset == EndOffset)
continue; // Size is zero, nothing to emit
auto emitCI = [&](uint64_t &FunctionOffset, uint64_t EndOffset) {
if (FunctionOffset >= EndOffset)
return;
for (auto It = Islands.Relocations.lower_bound(FunctionOffset);
It != Islands.Relocations.end(); ++It) {
if (It->first >= EndOffset)
break;
const Relocation &Relocation = It->second;
if (FunctionOffset < Relocation.Offset) {
Streamer.emitBytes(
FunctionContents.slice(FunctionOffset, Relocation.Offset));
FunctionOffset = Relocation.Offset;
}
LLVM_DEBUG(
dbgs() << "BOLT-DEBUG: emitting constant island relocation"
<< " for " << BF << " at offset 0x"
<< Twine::utohexstr(Relocation.Offset) << " with size "
<< Relocation::getSizeForType(Relocation.Type) << '\n');
FunctionOffset += Relocation.emit(&Streamer);
}
assert(FunctionOffset <= EndOffset && "overflow error");
if (FunctionOffset < EndOffset) {
Streamer.emitBytes(FunctionContents.slice(FunctionOffset, EndOffset));
FunctionOffset = EndOffset;
}
};
// Emit labels, relocs and data
while (IS != Islands.Offsets.end() && IS->first < EndOffset) {
auto NextLabelOffset =
IS == Islands.Offsets.end() ? EndOffset : IS->first;
auto NextStop = std::min(NextLabelOffset, EndOffset);
assert(NextStop <= EndOffset && "internal overflow error");
emitCI(FunctionOffset, NextStop);
if (IS != Islands.Offsets.end() && FunctionOffset == IS->first) {
// This is a slightly complex code to decide which label to emit. We
// have 4 cases to handle: regular symbol, cold symbol, regular or cold
// symbol being emitted on behalf of an external function.
if (!OnBehalfOf) {
if (!EmitColdPart) {
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: emitted label "
<< IS->second->getName() << " at offset 0x"
<< Twine::utohexstr(IS->first) << '\n');
if (IS->second->isUndefined())
Streamer.emitLabel(IS->second);
else
assert(BF.hasName(std::string(IS->second->getName())));
} else if (Islands.ColdSymbols.count(IS->second) != 0) {
LLVM_DEBUG(dbgs()
<< "BOLT-DEBUG: emitted label "
<< Islands.ColdSymbols[IS->second]->getName() << '\n');
if (Islands.ColdSymbols[IS->second]->isUndefined())
Streamer.emitLabel(Islands.ColdSymbols[IS->second]);
}
} else {
if (!EmitColdPart) {
if (MCSymbol *Sym = Islands.Proxies[OnBehalfOf][IS->second]) {
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: emitted label "
<< Sym->getName() << '\n');
Streamer.emitLabel(Sym);
}
} else if (MCSymbol *Sym =
Islands.ColdProxies[OnBehalfOf][IS->second]) {
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: emitted label " << Sym->getName()
<< '\n');
Streamer.emitLabel(Sym);
}
}
++IS;
}
}
assert(FunctionOffset <= EndOffset && "overflow error");
emitCI(FunctionOffset, EndOffset);
}
assert(IS == Islands.Offsets.end() && "some symbols were not emitted!");
if (OnBehalfOf)
return;
// Now emit constant islands from other functions that we may have used in
// this function.
for (BinaryFunction *ExternalFunc : Islands.Dependency)
emitConstantIslands(*ExternalFunc, EmitColdPart, &BF);
}
SMLoc BinaryEmitter::emitLineInfo(const BinaryFunction &BF, SMLoc NewLoc,
SMLoc PrevLoc, bool FirstInstr,
MCSymbol *&InstrLabel) {
DWARFUnit *FunctionCU = BF.getDWARFUnit();
const DWARFDebugLine::LineTable *FunctionLineTable = BF.getDWARFLineTable();
assert(FunctionCU && "cannot emit line info for function without CU");
DebugLineTableRowRef RowReference = DebugLineTableRowRef::fromSMLoc(NewLoc);
// Check if no new line info needs to be emitted.
if (RowReference == DebugLineTableRowRef::NULL_ROW ||
NewLoc.getPointer() == PrevLoc.getPointer())
return PrevLoc;
unsigned CurrentFilenum = 0;
const DWARFDebugLine::LineTable *CurrentLineTable = FunctionLineTable;
// If the CU id from the current instruction location does not
// match the CU id from the current function, it means that we
// have come across some inlined code. We must look up the CU
// for the instruction's original function and get the line table
// from that.
const uint64_t FunctionUnitIndex = FunctionCU->getOffset();
const uint32_t CurrentUnitIndex = RowReference.DwCompileUnitIndex;
if (CurrentUnitIndex != FunctionUnitIndex) {
CurrentLineTable = BC.DwCtx->getLineTableForUnit(
BC.DwCtx->getCompileUnitForOffset(CurrentUnitIndex));
// Add filename from the inlined function to the current CU.
CurrentFilenum = BC.addDebugFilenameToUnit(
FunctionUnitIndex, CurrentUnitIndex,
CurrentLineTable->Rows[RowReference.RowIndex - 1].File);
}
const DWARFDebugLine::Row &CurrentRow =
CurrentLineTable->Rows[RowReference.RowIndex - 1];
if (!CurrentFilenum)
CurrentFilenum = CurrentRow.File;
unsigned Flags = (DWARF2_FLAG_IS_STMT * CurrentRow.IsStmt) |
(DWARF2_FLAG_BASIC_BLOCK * CurrentRow.BasicBlock) |
(DWARF2_FLAG_PROLOGUE_END * CurrentRow.PrologueEnd) |
(DWARF2_FLAG_EPILOGUE_BEGIN * CurrentRow.EpilogueBegin);
// Always emit is_stmt at the beginning of function fragment.
if (FirstInstr)
Flags |= DWARF2_FLAG_IS_STMT;
BC.Ctx->setCurrentDwarfLoc(CurrentFilenum, CurrentRow.Line, CurrentRow.Column,
Flags, CurrentRow.Isa, CurrentRow.Discriminator);
const MCDwarfLoc &DwarfLoc = BC.Ctx->getCurrentDwarfLoc();
BC.Ctx->clearDwarfLocSeen();
if (!InstrLabel)
InstrLabel = BC.Ctx->createTempSymbol();
BC.getDwarfLineTable(FunctionUnitIndex)
.getMCLineSections()
.addLineEntry(MCDwarfLineEntry(InstrLabel, DwarfLoc),
Streamer.getCurrentSectionOnly());
return NewLoc;
}
void BinaryEmitter::emitLineInfoEnd(const BinaryFunction &BF,
MCSymbol *FunctionEndLabel) {
DWARFUnit *FunctionCU = BF.getDWARFUnit();
assert(FunctionCU && "DWARF unit expected");
BC.Ctx->setCurrentDwarfLoc(0, 0, 0, DWARF2_FLAG_END_SEQUENCE, 0, 0);
const MCDwarfLoc &DwarfLoc = BC.Ctx->getCurrentDwarfLoc();
BC.Ctx->clearDwarfLocSeen();
BC.getDwarfLineTable(FunctionCU->getOffset())
.getMCLineSections()
.addLineEntry(MCDwarfLineEntry(FunctionEndLabel, DwarfLoc),
Streamer.getCurrentSectionOnly());
}
void BinaryEmitter::emitJumpTables(const BinaryFunction &BF) {
MCSection *ReadOnlySection = BC.MOFI->getReadOnlySection();
MCSection *ReadOnlyColdSection = BC.MOFI->getContext().getELFSection(
".rodata.cold", ELF::SHT_PROGBITS, ELF::SHF_ALLOC);
if (!BF.hasJumpTables())
return;
if (opts::PrintJumpTables)
[BOLT][NFC] Log through JournalingStreams (#81524) Make core BOLT functionality more friendly to being used as a library instead of in our standalone driver llvm-bolt. To accomplish this, we augment BinaryContext with journaling streams that are to be used by most BOLT code whenever something needs to be logged to the screen. Users of the library can decide if logs should be printed to a file, no file or to the screen, as before. To illustrate this, this patch adds a new option `--log-file` that allows the user to redirect BOLT logging to a file on disk or completely hide it by using `--log-file=/dev/null`. Future BOLT code should now use `BinaryContext::outs()` for printing important messages instead of `llvm::outs()`. A new test log.test enforces this by verifying that no strings are print to screen once the `--log-file` option is used. In previous patches we also added a new BOLTError class to report common and fatal errors, so code shouldn't call exit(1) now. To easily handle problems as before (by quitting with exit(1)), callers can now use `BinaryContext::logBOLTErrorsAndQuitOnFatal(Error)` whenever code needs to deal with BOLT errors. To test this, we have fatal.s that checks we are correctly quitting and printing a fatal error to the screen. Because this is a significant change by itself, not all code was yet ported. Code from Profiler libs (DataAggregator and friends) still print errors directly to screen. Co-authored-by: Rafael Auler <rafaelauler@fb.com> Test Plan: NFC
2024-02-12 14:53:53 -08:00
BC.outs() << "BOLT-INFO: jump tables for function " << BF << ":\n";
for (auto &JTI : BF.jumpTables()) {
JumpTable &JT = *JTI.second;
// Only emit shared jump tables once, when processing the first parent
if (JT.Parents.size() > 1 && JT.Parents[0] != &BF)
continue;
if (opts::PrintJumpTables)
[BOLT][NFC] Log through JournalingStreams (#81524) Make core BOLT functionality more friendly to being used as a library instead of in our standalone driver llvm-bolt. To accomplish this, we augment BinaryContext with journaling streams that are to be used by most BOLT code whenever something needs to be logged to the screen. Users of the library can decide if logs should be printed to a file, no file or to the screen, as before. To illustrate this, this patch adds a new option `--log-file` that allows the user to redirect BOLT logging to a file on disk or completely hide it by using `--log-file=/dev/null`. Future BOLT code should now use `BinaryContext::outs()` for printing important messages instead of `llvm::outs()`. A new test log.test enforces this by verifying that no strings are print to screen once the `--log-file` option is used. In previous patches we also added a new BOLTError class to report common and fatal errors, so code shouldn't call exit(1) now. To easily handle problems as before (by quitting with exit(1)), callers can now use `BinaryContext::logBOLTErrorsAndQuitOnFatal(Error)` whenever code needs to deal with BOLT errors. To test this, we have fatal.s that checks we are correctly quitting and printing a fatal error to the screen. Because this is a significant change by itself, not all code was yet ported. Code from Profiler libs (DataAggregator and friends) still print errors directly to screen. Co-authored-by: Rafael Auler <rafaelauler@fb.com> Test Plan: NFC
2024-02-12 14:53:53 -08:00
JT.print(BC.outs());
if (opts::JumpTables == JTS_BASIC && BC.HasRelocations) {
JT.updateOriginal();
} else {
MCSection *HotSection, *ColdSection;
if (opts::JumpTables == JTS_BASIC) {
// In non-relocation mode we have to emit jump tables in local sections.
// This way we only overwrite them when the corresponding function is
// overwritten.
std::string Name = ".local." + JT.Labels[0]->getName().str();
std::replace(Name.begin(), Name.end(), '/', '.');
BinarySection &Section =
BC.registerOrUpdateSection(Name, ELF::SHT_PROGBITS, ELF::SHF_ALLOC);
Section.setAnonymous(true);
JT.setOutputSection(Section);
HotSection = BC.getDataSection(Name);
ColdSection = HotSection;
} else {
if (BF.isSimple()) {
HotSection = ReadOnlySection;
ColdSection = ReadOnlyColdSection;
} else {
HotSection = BF.hasProfile() ? ReadOnlySection : ReadOnlyColdSection;
ColdSection = HotSection;
}
}
emitJumpTable(JT, HotSection, ColdSection);
}
}
}
void BinaryEmitter::emitJumpTable(const JumpTable &JT, MCSection *HotSection,
MCSection *ColdSection) {
// Pre-process entries for aggressive splitting.
// Each label represents a separate switch table and gets its own count
// determining its destination.
std::map<MCSymbol *, uint64_t> LabelCounts;
if (opts::JumpTables > JTS_SPLIT && !JT.Counts.empty()) {
MCSymbol *CurrentLabel = JT.Labels.at(0);
uint64_t CurrentLabelCount = 0;
for (unsigned Index = 0; Index < JT.Entries.size(); ++Index) {
auto LI = JT.Labels.find(Index * JT.EntrySize);
if (LI != JT.Labels.end()) {
LabelCounts[CurrentLabel] = CurrentLabelCount;
CurrentLabel = LI->second;
CurrentLabelCount = 0;
}
CurrentLabelCount += JT.Counts[Index].Count;
}
LabelCounts[CurrentLabel] = CurrentLabelCount;
} else {
Streamer.switchSection(JT.Count > 0 ? HotSection : ColdSection);
Streamer.emitValueToAlignment(Align(JT.EntrySize));
}
MCSymbol *LastLabel = nullptr;
uint64_t Offset = 0;
for (MCSymbol *Entry : JT.Entries) {
auto LI = JT.Labels.find(Offset);
if (LI != JT.Labels.end()) {
[BOLT] Section-handling refactoring/overhaul Simplify the logic of handling sections in BOLT. This change brings more direct and predictable mapping of BinarySection instances to sections in the input and output files. * Only sections from the input binary will have a non-null SectionRef. When a new section is created as a copy of the input section, its SectionRef is reset to null. * RewriteInstance::getOutputSectionName() is removed as the section name in the output file is now defined by BinarySection::getOutputName(). * Querying BinaryContext for sections by name uses their original name. E.g., getUniqueSectionByName(".rodata") will return the original section even if the new .rodata section was created. * Input file sections (with relocations applied) are emitted via MC with ".bolt.org" prefix. However, their name in the output binary is unchanged unless a new section with the same name is created. * New sections are emitted internally with ".bolt.new" prefix if there's a name conflict with an input file section. Their original name is preserved in the output file. * Section header string table is properly populated with section names that are actually used. Previously we used to include discarded section names as well. * Fix the problem when dynamic relocations were propagated to a new section with a name that matched a section in the input binary. E.g., the new .rodata with jump tables had dynamic relocations from the original .rodata. Reviewed By: rafauler Differential Revision: https://reviews.llvm.org/D135494
2022-09-22 12:05:12 -07:00
LLVM_DEBUG({
dbgs() << "BOLT-DEBUG: emitting jump table " << LI->second->getName()
<< " (originally was at address 0x"
<< Twine::utohexstr(JT.getAddress() + Offset)
<< (Offset ? ") as part of larger jump table\n" : ")\n");
});
if (!LabelCounts.empty()) {
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: jump table count: "
<< LabelCounts[LI->second] << '\n');
if (LabelCounts[LI->second] > 0)
Streamer.switchSection(HotSection);
else
Streamer.switchSection(ColdSection);
Streamer.emitValueToAlignment(Align(JT.EntrySize));
}
// Emit all labels registered at the address of this jump table
// to sync with our global symbol table. We may have two labels
// registered at this address if one label was created via
// getOrCreateGlobalSymbol() (e.g. LEA instructions referencing
// this location) and another via getOrCreateJumpTable(). This
// creates a race where the symbols created by these two
// functions may or may not be the same, but they are both
// registered in our symbol table at the same address. By
// emitting them all here we make sure there is no ambiguity
// that depends on the order that these symbols were created, so
// whenever this address is referenced in the binary, it is
// certain to point to the jump table identified at this
// address.
if (BinaryData *BD = BC.getBinaryDataByName(LI->second->getName())) {
for (MCSymbol *S : BD->getSymbols())
Streamer.emitLabel(S);
} else {
Streamer.emitLabel(LI->second);
}
LastLabel = LI->second;
}
if (JT.Type == JumpTable::JTT_NORMAL) {
Streamer.emitSymbolValue(Entry, JT.OutputEntrySize);
} else { // JTT_PIC
const MCSymbolRefExpr *JTExpr =
MCSymbolRefExpr::create(LastLabel, Streamer.getContext());
const MCSymbolRefExpr *E =
MCSymbolRefExpr::create(Entry, Streamer.getContext());
const MCBinaryExpr *Value =
MCBinaryExpr::createSub(E, JTExpr, Streamer.getContext());
Streamer.emitValue(Value, JT.EntrySize);
}
Offset += JT.EntrySize;
}
}
void BinaryEmitter::emitCFIInstruction(const MCCFIInstruction &Inst) const {
switch (Inst.getOperation()) {
default:
llvm_unreachable("Unexpected instruction");
case MCCFIInstruction::OpDefCfaOffset:
Streamer.emitCFIDefCfaOffset(Inst.getOffset());
break;
case MCCFIInstruction::OpAdjustCfaOffset:
Streamer.emitCFIAdjustCfaOffset(Inst.getOffset());
break;
case MCCFIInstruction::OpDefCfa:
Streamer.emitCFIDefCfa(Inst.getRegister(), Inst.getOffset());
break;
case MCCFIInstruction::OpDefCfaRegister:
Streamer.emitCFIDefCfaRegister(Inst.getRegister());
break;
case MCCFIInstruction::OpOffset:
Streamer.emitCFIOffset(Inst.getRegister(), Inst.getOffset());
break;
case MCCFIInstruction::OpRegister:
Streamer.emitCFIRegister(Inst.getRegister(), Inst.getRegister2());
break;
case MCCFIInstruction::OpWindowSave:
Streamer.emitCFIWindowSave();
break;
case MCCFIInstruction::OpNegateRAState:
Streamer.emitCFINegateRAState();
break;
case MCCFIInstruction::OpSameValue:
Streamer.emitCFISameValue(Inst.getRegister());
break;
case MCCFIInstruction::OpGnuArgsSize:
Streamer.emitCFIGnuArgsSize(Inst.getOffset());
break;
case MCCFIInstruction::OpEscape:
Streamer.AddComment(Inst.getComment());
Streamer.emitCFIEscape(Inst.getValues());
break;
case MCCFIInstruction::OpRestore:
Streamer.emitCFIRestore(Inst.getRegister());
break;
case MCCFIInstruction::OpUndefined:
Streamer.emitCFIUndefined(Inst.getRegister());
break;
}
}
// The code is based on EHStreamer::emitExceptionTable().
void BinaryEmitter::emitLSDA(BinaryFunction &BF, const FunctionFragment &FF) {
const BinaryFunction::CallSitesRange Sites =
BF.getCallSites(FF.getFragmentNum());
if (Sites.empty())
return;
// Calculate callsite table size. Size of each callsite entry is:
//
// sizeof(start) + sizeof(length) + sizeof(LP) + sizeof(uleb128(action))
//
// or
//
// sizeof(dwarf::DW_EH_PE_data4) * 3 + sizeof(uleb128(action))
uint64_t CallSiteTableLength = llvm::size(Sites) * 4 * 3;
for (const auto &FragmentCallSite : Sites)
CallSiteTableLength += getULEB128Size(FragmentCallSite.second.Action);
Streamer.switchSection(BC.MOFI->getLSDASection());
const unsigned TTypeEncoding = BF.getLSDATypeEncoding();
const unsigned TTypeEncodingSize = BC.getDWARFEncodingSize(TTypeEncoding);
const uint16_t TTypeAlignment = 4;
// Type tables have to be aligned at 4 bytes.
Streamer.emitValueToAlignment(Align(TTypeAlignment));
// Emit the LSDA label.
MCSymbol *LSDASymbol = BF.getLSDASymbol(FF.getFragmentNum());
assert(LSDASymbol && "no LSDA symbol set");
Streamer.emitLabel(LSDASymbol);
// Corresponding FDE start.
const MCSymbol *StartSymbol = BF.getSymbol(FF.getFragmentNum());
// Emit the LSDA header.
// If LPStart is omitted, then the start of the FDE is used as a base for
// landing pad displacements. Then if a cold fragment starts with
// a landing pad, this means that the first landing pad offset will be 0.
// As a result, the exception handling runtime will ignore this landing pad
// because zero offset denotes the absence of a landing pad.
// For this reason, when the binary has fixed starting address we emit LPStart
// as 0 and output the absolute value of the landing pad in the table.
//
// If the base address can change, we cannot use absolute addresses for
// landing pads (at least not without runtime relocations). Hence, we fall
// back to emitting landing pads relative to the FDE start.
// As we are emitting label differences, we have to guarantee both labels are
// defined in the same section and hence cannot place the landing pad into a
// cold fragment when the corresponding call site is in the hot fragment.
// Because of this issue and the previously described issue of possible
// zero-offset landing pad we have to place landing pads in the same section
// as the corresponding invokes for shared objects.
std::function<void(const MCSymbol *)> emitLandingPad;
if (BC.HasFixedLoadAddress) {
Streamer.emitIntValue(dwarf::DW_EH_PE_udata4, 1); // LPStart format
Streamer.emitIntValue(0, 4); // LPStart
emitLandingPad = [&](const MCSymbol *LPSymbol) {
if (!LPSymbol)
Streamer.emitIntValue(0, 4);
else
Streamer.emitSymbolValue(LPSymbol, 4);
};
} else {
Streamer.emitIntValue(dwarf::DW_EH_PE_omit, 1); // LPStart format
emitLandingPad = [&](const MCSymbol *LPSymbol) {
if (!LPSymbol)
Streamer.emitIntValue(0, 4);
else
Streamer.emitAbsoluteSymbolDiff(LPSymbol, StartSymbol, 4);
};
}
Streamer.emitIntValue(TTypeEncoding, 1); // TType format
// See the comment in EHStreamer::emitExceptionTable() on to use
// uleb128 encoding (which can use variable number of bytes to encode the same
// value) to ensure type info table is properly aligned at 4 bytes without
// iteratively fixing sizes of the tables.
unsigned CallSiteTableLengthSize = getULEB128Size(CallSiteTableLength);
unsigned TTypeBaseOffset =
sizeof(int8_t) + // Call site format
CallSiteTableLengthSize + // Call site table length size
CallSiteTableLength + // Call site table length
BF.getLSDAActionTable().size() + // Actions table size
BF.getLSDATypeTable().size() * TTypeEncodingSize; // Types table size
unsigned TTypeBaseOffsetSize = getULEB128Size(TTypeBaseOffset);
unsigned TotalSize = sizeof(int8_t) + // LPStart format
sizeof(int8_t) + // TType format
TTypeBaseOffsetSize + // TType base offset size
TTypeBaseOffset; // TType base offset
unsigned SizeAlign = (4 - TotalSize) & 3;
if (TTypeEncoding != dwarf::DW_EH_PE_omit)
// Account for any extra padding that will be added to the call site table
// length.
Streamer.emitULEB128IntValue(TTypeBaseOffset,
/*PadTo=*/TTypeBaseOffsetSize + SizeAlign);
// Emit the landing pad call site table. We use signed data4 since we can emit
// a landing pad in a different part of the split function that could appear
// earlier in the address space than LPStart.
Streamer.emitIntValue(dwarf::DW_EH_PE_sdata4, 1);
Streamer.emitULEB128IntValue(CallSiteTableLength);
for (const auto &FragmentCallSite : Sites) {
const BinaryFunction::CallSite &CallSite = FragmentCallSite.second;
const MCSymbol *BeginLabel = CallSite.Start;
const MCSymbol *EndLabel = CallSite.End;
assert(BeginLabel && "start EH label expected");
assert(EndLabel && "end EH label expected");
// Start of the range is emitted relative to the start of current
// function split part.
Streamer.emitAbsoluteSymbolDiff(BeginLabel, StartSymbol, 4);
Streamer.emitAbsoluteSymbolDiff(EndLabel, BeginLabel, 4);
emitLandingPad(CallSite.LP);
Streamer.emitULEB128IntValue(CallSite.Action);
}
// Write out action, type, and type index tables at the end.
//
// For action and type index tables there's no need to change the original
// table format unless we are doing function splitting, in which case we can
// split and optimize the tables.
//
// For type table we (re-)encode the table using TTypeEncoding matching
// the current assembler mode.
for (uint8_t const &Byte : BF.getLSDAActionTable())
Streamer.emitIntValue(Byte, 1);
const BinaryFunction::LSDATypeTableTy &TypeTable =
(TTypeEncoding & dwarf::DW_EH_PE_indirect) ? BF.getLSDATypeAddressTable()
: BF.getLSDATypeTable();
assert(TypeTable.size() == BF.getLSDATypeTable().size() &&
"indirect type table size mismatch");
for (int Index = TypeTable.size() - 1; Index >= 0; --Index) {
const uint64_t TypeAddress = TypeTable[Index];
switch (TTypeEncoding & 0x70) {
default:
llvm_unreachable("unsupported TTypeEncoding");
case dwarf::DW_EH_PE_absptr:
Streamer.emitIntValue(TypeAddress, TTypeEncodingSize);
break;
case dwarf::DW_EH_PE_pcrel: {
if (TypeAddress) {
const MCSymbol *TypeSymbol =
BC.getOrCreateGlobalSymbol(TypeAddress, "TI", 0, TTypeAlignment);
MCSymbol *DotSymbol = BC.Ctx->createNamedTempSymbol();
Streamer.emitLabel(DotSymbol);
const MCBinaryExpr *SubDotExpr = MCBinaryExpr::createSub(
MCSymbolRefExpr::create(TypeSymbol, *BC.Ctx),
MCSymbolRefExpr::create(DotSymbol, *BC.Ctx), *BC.Ctx);
Streamer.emitValue(SubDotExpr, TTypeEncodingSize);
} else {
Streamer.emitIntValue(0, TTypeEncodingSize);
}
break;
}
}
}
for (uint8_t const &Byte : BF.getLSDATypeIndexTable())
Streamer.emitIntValue(Byte, 1);
}
void BinaryEmitter::emitDebugLineInfoForOriginalFunctions() {
// If a function is in a CU containing at least one processed function, we
// have to rewrite the whole line table for that CU. For unprocessed functions
// we use data from the input line table.
for (auto &It : BC.getBinaryFunctions()) {
const BinaryFunction &Function = It.second;
// If the function was emitted, its line info was emitted with it.
if (Function.isEmitted())
continue;
const DWARFDebugLine::LineTable *LineTable = Function.getDWARFLineTable();
if (!LineTable)
continue; // nothing to update for this function
const uint64_t Address = Function.getAddress();
std::vector<uint32_t> Results;
if (!LineTable->lookupAddressRange(
{Address, object::SectionedAddress::UndefSection},
Function.getSize(), Results))
continue;
if (Results.empty())
continue;
// The first row returned could be the last row matching the start address.
// Find the first row with the same address that is not the end of the
// sequence.
uint64_t FirstRow = Results.front();
while (FirstRow > 0) {
const DWARFDebugLine::Row &PrevRow = LineTable->Rows[FirstRow - 1];
if (PrevRow.Address.Address != Address || PrevRow.EndSequence)
break;
--FirstRow;
}
const uint64_t EndOfSequenceAddress =
Function.getAddress() + Function.getMaxSize();
BC.getDwarfLineTable(Function.getDWARFUnit()->getOffset())
.addLineTableSequence(LineTable, FirstRow, Results.back(),
EndOfSequenceAddress);
}
// For units that are completely unprocessed, use original debug line contents
// eliminating the need to regenerate line info program.
emitDebugLineInfoForUnprocessedCUs();
}
void BinaryEmitter::emitDebugLineInfoForUnprocessedCUs() {
// Sorted list of section offsets provides boundaries for section fragments,
// where each fragment is the unit's contribution to debug line section.
std::vector<uint64_t> StmtListOffsets;
StmtListOffsets.reserve(BC.DwCtx->getNumCompileUnits());
for (const std::unique_ptr<DWARFUnit> &CU : BC.DwCtx->compile_units()) {
DWARFDie CUDie = CU->getUnitDIE();
auto StmtList = dwarf::toSectionOffset(CUDie.find(dwarf::DW_AT_stmt_list));
if (!StmtList)
continue;
StmtListOffsets.push_back(*StmtList);
}
llvm::sort(StmtListOffsets);
// For each CU that was not processed, emit its line info as a binary blob.
for (const std::unique_ptr<DWARFUnit> &CU : BC.DwCtx->compile_units()) {
if (BC.ProcessedCUs.count(CU.get()))
continue;
DWARFDie CUDie = CU->getUnitDIE();
auto StmtList = dwarf::toSectionOffset(CUDie.find(dwarf::DW_AT_stmt_list));
if (!StmtList)
continue;
StringRef DebugLineContents = CU->getLineSection().Data;
const uint64_t Begin = *StmtList;
// Statement list ends where the next unit contribution begins, or at the
// end of the section.
auto It = llvm::upper_bound(StmtListOffsets, Begin);
const uint64_t End =
It == StmtListOffsets.end() ? DebugLineContents.size() : *It;
BC.getDwarfLineTable(CU->getOffset())
.addRawContents(DebugLineContents.slice(Begin, End));
}
}
void BinaryEmitter::emitDataSections(StringRef OrgSecPrefix) {
for (BinarySection &Section : BC.sections()) {
[BOLT] Section-handling refactoring/overhaul Simplify the logic of handling sections in BOLT. This change brings more direct and predictable mapping of BinarySection instances to sections in the input and output files. * Only sections from the input binary will have a non-null SectionRef. When a new section is created as a copy of the input section, its SectionRef is reset to null. * RewriteInstance::getOutputSectionName() is removed as the section name in the output file is now defined by BinarySection::getOutputName(). * Querying BinaryContext for sections by name uses their original name. E.g., getUniqueSectionByName(".rodata") will return the original section even if the new .rodata section was created. * Input file sections (with relocations applied) are emitted via MC with ".bolt.org" prefix. However, their name in the output binary is unchanged unless a new section with the same name is created. * New sections are emitted internally with ".bolt.new" prefix if there's a name conflict with an input file section. Their original name is preserved in the output file. * Section header string table is properly populated with section names that are actually used. Previously we used to include discarded section names as well. * Fix the problem when dynamic relocations were propagated to a new section with a name that matched a section in the input binary. E.g., the new .rodata with jump tables had dynamic relocations from the original .rodata. Reviewed By: rafauler Differential Revision: https://reviews.llvm.org/D135494
2022-09-22 12:05:12 -07:00
if (!Section.hasRelocations())
continue;
[BOLT] Section-handling refactoring/overhaul Simplify the logic of handling sections in BOLT. This change brings more direct and predictable mapping of BinarySection instances to sections in the input and output files. * Only sections from the input binary will have a non-null SectionRef. When a new section is created as a copy of the input section, its SectionRef is reset to null. * RewriteInstance::getOutputSectionName() is removed as the section name in the output file is now defined by BinarySection::getOutputName(). * Querying BinaryContext for sections by name uses their original name. E.g., getUniqueSectionByName(".rodata") will return the original section even if the new .rodata section was created. * Input file sections (with relocations applied) are emitted via MC with ".bolt.org" prefix. However, their name in the output binary is unchanged unless a new section with the same name is created. * New sections are emitted internally with ".bolt.new" prefix if there's a name conflict with an input file section. Their original name is preserved in the output file. * Section header string table is properly populated with section names that are actually used. Previously we used to include discarded section names as well. * Fix the problem when dynamic relocations were propagated to a new section with a name that matched a section in the input binary. E.g., the new .rodata with jump tables had dynamic relocations from the original .rodata. Reviewed By: rafauler Differential Revision: https://reviews.llvm.org/D135494
2022-09-22 12:05:12 -07:00
StringRef Prefix = Section.hasSectionRef() ? OrgSecPrefix : "";
Section.emitAsData(Streamer, Prefix + Section.getName());
[BOLT] Support for lite mode with relocations Summary: Add '-lite' support for relocations for improved processing time, memory consumption, and more resilient processing of binaries with embedded assembly code. In lite relocation mode, BOLT will skip full processing of functions without a profile. It will run scanExternalRefs() on such functions to discover external references and to create internal relocations to update references to optimized functions. Note that we could have relied on the compiler/linker to provide relocations for function references. However, there's no assurance that all such references are reported. E.g., the compiler can resolve inter-procedural references internally, leaving no relocations for the linker. The scan process takes about <10 seconds per 100MB of code on modern hardware. It's a reasonable overhead to live with considering the flexibility it provides. If BOLT fails to scan or disassemble a function, .e.g., due to a data object embedded in code, or an unsupported instruction, it enables a patching mode to guarantee that the failed function will call optimized/moved versions of functions. The patching happens at original function entry points. '-skip=<func1,func2,...>' option now can be used to skip processing of arbitrary functions in the relocation mode. With '-use-old-text' or '-strict' we require all functions to be processed. As such, it is incompatible with '-lite' option, and '-skip' option will only disable optimizations of listed functions, not their disassembly and emission. (cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
Section.clearRelocations();
}
}
namespace llvm {
namespace bolt {
void emitBinaryContext(MCStreamer &Streamer, BinaryContext &BC,
StringRef OrgSecPrefix) {
BinaryEmitter(Streamer, BC).emitAll(OrgSecPrefix);
}
void emitFunctionBody(MCStreamer &Streamer, BinaryFunction &BF,
FunctionFragment &FF, bool EmitCodeOnly) {
BinaryEmitter(Streamer, BF.getBinaryContext())
.emitFunctionBody(BF, FF, EmitCodeOnly);
}
} // namespace bolt
} // namespace llvm