2021-12-21 10:21:41 -08:00
|
|
|
//===- bolt/Rewrite/RewriteInstance.cpp - ELF rewriter --------------------===//
|
2015-11-23 17:54:18 -08:00
|
|
|
//
|
2021-03-15 18:04:18 -07:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2015-11-23 17:54:18 -08:00
|
|
|
//
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
2021-10-08 11:47:10 -07:00
|
|
|
#include "bolt/Rewrite/RewriteInstance.h"
|
2023-08-21 10:10:48 +02:00
|
|
|
#include "bolt/Core/AddressMap.h"
|
2021-10-08 11:47:10 -07:00
|
|
|
#include "bolt/Core/BinaryContext.h"
|
|
|
|
|
#include "bolt/Core/BinaryEmitter.h"
|
|
|
|
|
#include "bolt/Core/BinaryFunction.h"
|
|
|
|
|
#include "bolt/Core/DebugData.h"
|
|
|
|
|
#include "bolt/Core/Exceptions.h"
|
2022-08-18 21:26:18 -07:00
|
|
|
#include "bolt/Core/FunctionLayout.h"
|
2021-10-08 11:47:10 -07:00
|
|
|
#include "bolt/Core/MCPlusBuilder.h"
|
|
|
|
|
#include "bolt/Core/ParallelUtilities.h"
|
|
|
|
|
#include "bolt/Core/Relocation.h"
|
2024-05-22 13:53:41 -07:00
|
|
|
#include "bolt/Passes/BinaryPasses.h"
|
2021-10-08 11:47:10 -07:00
|
|
|
#include "bolt/Passes/CacheMetrics.h"
|
2024-12-16 21:49:53 -08:00
|
|
|
#include "bolt/Passes/IdenticalCodeFolding.h"
|
2025-03-21 11:19:53 +03:00
|
|
|
#include "bolt/Passes/PAuthGadgetScanner.h"
|
2021-10-08 11:47:10 -07:00
|
|
|
#include "bolt/Passes/ReorderFunctions.h"
|
|
|
|
|
#include "bolt/Profile/BoltAddressTranslation.h"
|
|
|
|
|
#include "bolt/Profile/DataAggregator.h"
|
|
|
|
|
#include "bolt/Profile/DataReader.h"
|
|
|
|
|
#include "bolt/Profile/YAMLProfileReader.h"
|
|
|
|
|
#include "bolt/Profile/YAMLProfileWriter.h"
|
|
|
|
|
#include "bolt/Rewrite/BinaryPassManager.h"
|
|
|
|
|
#include "bolt/Rewrite/DWARFRewriter.h"
|
|
|
|
|
#include "bolt/Rewrite/ExecutableFileMemoryManager.h"
|
[BOLT] Move from RuntimeDyld to JITLink
RuntimeDyld has been deprecated in favor of JITLink. [1] This patch
replaces all uses of RuntimeDyld in BOLT with JITLink.
Care has been taken to minimize the impact on the code structure in
order to ease the inspection of this (rather large) changeset. Since
BOLT relied on the RuntimeDyld API in multiple places, this wasn't
always possible though and I'll explain the changes in code structure
first.
Design note: BOLT uses a JIT linker to perform what essentially is
static linking. No linked code is ever executed; the result of linking
is simply written back to an executable file. For this reason, I
restricted myself to the use of the core JITLink library and avoided ORC
as much as possible.
RuntimeDyld contains methods for loading objects (loadObject) and symbol
lookup (getSymbol). Since JITLink doesn't provide a class with a similar
interface, the BOLTLinker abstract class was added to implement it. It
was added to Core since both the Rewrite and RuntimeLibs libraries make
use of it. Wherever a RuntimeDyld object was used before, it was
replaced with a BOLTLinker object.
There is one major difference between the RuntimeDyld and BOLTLinker
interfaces: in JITLink, section allocation and the application of fixups
(relocation) happens in a single call (jitlink::link). That is, there is
no separate method like finalizeWithMemoryManagerLocking in RuntimeDyld.
BOLT used to remap sections between allocating (loadObject) and linking
them (finalizeWithMemoryManagerLocking). This doesn't work anymore with
JITLink. Instead, BOLTLinker::loadObject accepts a callback that is
called before fixups are applied which is used to remap sections.
The actual implementation of the BOLTLinker interface lives in the
JITLinkLinker class in the Rewrite library. It's the only part of the
BOLT code that should directly interact with the JITLink API.
For loading object, JITLinkLinker first creates a LinkGraph
(jitlink::createLinkGraphFromObject) and then links it (jitlink::link).
For the latter, it uses a custom JITLinkContext with the following
properties:
- Use BOLT's ExecutableFileMemoryManager. This one was updated to
implement the JITLinkMemoryManager interface. Since BOLT never
executes code, its finalization step is a no-op.
- Pass config: don't use the default target passes since they modify
DWARF sections in a way that seems incompatible with BOLT. Also run a
custom pre-prune pass that makes sure sections without symbols are not
pruned by JITLink.
- Implement symbol lookup. This used to be implemented by
BOLTSymbolResolver.
- Call the section mapper callback before the final linking step.
- Copy symbol values when the LinkGraph is resolved. Symbols are stored
inside JITLinkLinker to ensure that later objects (i.e.,
instrumentation libraries) can find them. This functionality used to
be provided by RuntimeDyld but I did not find a way to use JITLink
directly for this.
Some more minor points of interest:
- BinarySection::SectionID: JITLink doesn't have something equivalent to
RuntimeDyld's Section IDs. Instead, sections can only be referred to
by name. Hence, SectionID was updated to a string.
- There seem to be no tests for Mach-O. I've tested a small hello-world
style binary but not more than that.
- On Mach-O, JITLink "normalizes" section names to include the segment
name. I had to parse the section name back from this manually which
feels slightly hacky.
[1] https://reviews.llvm.org/D145686#4222642
Reviewed By: rafauler
Differential Revision: https://reviews.llvm.org/D147544
2023-06-15 10:52:11 +02:00
|
|
|
#include "bolt/Rewrite/JITLinkLinker.h"
|
2023-06-27 22:55:53 -07:00
|
|
|
#include "bolt/Rewrite/MetadataRewriters.h"
|
2021-10-08 11:47:10 -07:00
|
|
|
#include "bolt/RuntimeLibs/HugifyRuntimeLibrary.h"
|
|
|
|
|
#include "bolt/RuntimeLibs/InstrumentationRuntimeLibrary.h"
|
|
|
|
|
#include "bolt/Utils/CommandLineOpts.h"
|
|
|
|
|
#include "bolt/Utils/Utils.h"
|
2023-06-20 20:43:53 -07:00
|
|
|
#include "llvm/ADT/AddressRanges.h"
|
2022-08-19 15:57:24 -07:00
|
|
|
#include "llvm/ADT/STLExtras.h"
|
2015-11-23 17:54:18 -08:00
|
|
|
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
|
2022-02-14 16:27:04 +01:00
|
|
|
#include "llvm/DebugInfo/DWARF/DWARFDebugFrame.h"
|
2015-11-23 17:54:18 -08:00
|
|
|
#include "llvm/MC/MCAsmBackend.h"
|
|
|
|
|
#include "llvm/MC/MCAsmInfo.h"
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
|
2015-11-23 17:54:18 -08:00
|
|
|
#include "llvm/MC/MCObjectStreamer.h"
|
|
|
|
|
#include "llvm/MC/MCStreamer.h"
|
|
|
|
|
#include "llvm/MC/MCSymbol.h"
|
2021-10-08 11:47:10 -07:00
|
|
|
#include "llvm/MC/TargetRegistry.h"
|
2015-11-23 17:54:18 -08:00
|
|
|
#include "llvm/Object/ObjectFile.h"
|
2020-12-01 16:29:39 -08:00
|
|
|
#include "llvm/Support/Alignment.h"
|
2015-11-23 17:54:18 -08:00
|
|
|
#include "llvm/Support/Casting.h"
|
|
|
|
|
#include "llvm/Support/CommandLine.h"
|
2016-09-27 19:09:38 -07:00
|
|
|
#include "llvm/Support/DataExtractor.h"
|
2015-11-23 17:54:18 -08:00
|
|
|
#include "llvm/Support/Errc.h"
|
2022-02-16 20:39:59 -08:00
|
|
|
#include "llvm/Support/Error.h"
|
2021-03-29 16:04:57 -07:00
|
|
|
#include "llvm/Support/FileSystem.h"
|
2015-11-23 17:54:18 -08:00
|
|
|
#include "llvm/Support/ManagedStatic.h"
|
2017-11-27 18:00:24 -08:00
|
|
|
#include "llvm/Support/Timer.h"
|
2015-11-23 17:54:18 -08:00
|
|
|
#include "llvm/Support/ToolOutputFile.h"
|
2017-05-24 14:14:16 -07:00
|
|
|
#include "llvm/Support/raw_ostream.h"
|
2015-11-23 17:54:18 -08:00
|
|
|
#include <algorithm>
|
2016-01-26 16:03:58 -08:00
|
|
|
#include <fstream>
|
2022-02-16 20:39:59 -08:00
|
|
|
#include <memory>
|
2023-01-02 18:40:21 -08:00
|
|
|
#include <optional>
|
2015-11-23 17:54:18 -08:00
|
|
|
#include <system_error>
|
|
|
|
|
|
|
|
|
|
#undef DEBUG_TYPE
|
2016-02-05 14:42:04 -08:00
|
|
|
#define DEBUG_TYPE "bolt"
|
2015-11-23 17:54:18 -08:00
|
|
|
|
|
|
|
|
using namespace llvm;
|
|
|
|
|
using namespace object;
|
2016-02-05 14:42:04 -08:00
|
|
|
using namespace bolt;
|
2015-11-23 17:54:18 -08:00
|
|
|
|
[BOLT] Decoder cache friendly alignment wrt Intel JCC Erratum
Summary:
This diff ports reviews.llvm.org/D70157 to our LLVM tree, which
makes the integrated assembler able to align X86 control-flow changing
instructions in a way to reduce the performance impact of the ucode
update on Intel processors that implement the JCC erratum mitigation.
See white paper "Mitigations for Jump Conditional Code Erratum" by Intel
published November 2019.
To port this patch, I changed classifySecondInstInMacroFusion to analyze
instruction opcodes directly instead of analyzing the CondCond operand
(in more recent versions of LLVM, all conditional branches share the
same opcode, but with a different conditional operand). I also pulled to
our tree Alignment.h as a dependency, and the macroop analyzing helpers.
x86-align-branch-boundary and -x86-align-branch are the two flags that
control nop insertion to avoid disabling the decoder cache, following
the original patch. In BOLT, I added the flag
x86-align-branch-boundary-hot-only to request the alignment to only be
applied to hot code, which is turned on by default. The reason is
because such alignment is expensive to perform on large modules, but if
we limit it to hot code, the relaxation pass runtime becomes tolerable.
(cherry picked from FBD19828850)
2020-02-10 18:50:53 -08:00
|
|
|
extern cl::opt<uint32_t> X86AlignBranchBoundary;
|
|
|
|
|
extern cl::opt<bool> X86AlignBranchWithin32BBoundaries;
|
|
|
|
|
|
2015-11-23 17:54:18 -08:00
|
|
|
namespace opts {
|
|
|
|
|
|
2021-10-08 11:47:10 -07:00
|
|
|
extern cl::list<std::string> HotTextMoveSections;
|
Adding automatic huge page support
Summary:
This patch enables automated hugify for Bolt.
When running Bolt against a binary with -hugify specified, Bolt will inject a call to a runtime library function at the entry of the binary. The runtime library calls madvise to map the hot code region into a 2M huge page. We support both new kernel with THP support and old kernels. For kernels with THP support we simply make a madvise call, while for old kernels, we first copy the code out, remap the memory with huge page, and then copy the code back.
With this change, we no longer need to manually call into hugify_self and precompile it with --hot-text. Instead, we could simply combine --hugify option with existing optimizations, and at runtime it will automatically move hot code into 2M pages.
Some details around the changes made:
1. Add an command line option to support --hugify. --hugify will automatically turn on --hot-text to get the proper hot code symbols. However, running with both --hugify and --hot-text is not allowed, since --hot-text is used on binaries that has precompiled call to hugify_self, which contradicts with the purpose of --hugify.
2. Moved the common utility functions out of instr.cpp to common.h, which will also be used by hugify.cpp. Added a few new system calls definitions.
3. Added a new class that inherits RuntimeLibrary, and implemented the necessary emit and link logic for hugify.
4. Added a simple test for hugify.
(cherry picked from FBD21384529)
2020-05-02 11:14:38 -07:00
|
|
|
extern cl::opt<bool> Hugify;
|
Refactor runtime library
Summary:
As we are adding more types of runtime libraries, it would be better to move the runtime library out of RewriteInstance so that it could grow separately. This also requires splitting the current implementation of Instrumentation.cpp to two separate pieces, one as normal Pass, one as the runtime library. The Instrumentation Pass would pass over the generated data to the runtime library, which will use to emit binary and perform linking.
This patch does the following:
1. Turn Instrumentation class into an optimization pass. Register the pass in the pass manager instead of in RewriteInstance.
2. Split all the data that are generated by Instrumentation that's needed by runtime library into a separate data structure called InstrumentationSummary. At the creation of Instrumentation pass, we create an instance of such data structure, which will be moved over to the runtime at the end of the pass.
3. Added a runtime library member to BinaryContext. Set the member at the end of Instrumentation pass.
4. In BinaryEmitter, make BinaryContext to also emit runtime library binary.
5. Created a base class RuntimeLibrary, that defines the interface of a runtime library, along with a few common helper functions.
6. Created InstrumentationRuntimeLibrary which inherits from RuntimeLibrary, that does all the work (mostly copied over) for emit and linking.
7. Added a new directory called RuntimeLibs, and put all the runtime library related files into it.
(cherry picked from FBD21694762)
2020-05-21 14:28:47 -07:00
|
|
|
extern cl::opt<bool> Instrument;
|
2024-03-22 15:29:26 -07:00
|
|
|
extern cl::opt<bool> KeepNops;
|
2024-06-29 21:19:00 -07:00
|
|
|
extern cl::opt<bool> Lite;
|
2018-04-20 20:03:31 -07:00
|
|
|
extern cl::list<std::string> ReorderData;
|
2019-04-25 17:00:05 -07:00
|
|
|
extern cl::opt<bolt::ReorderFunctions::ReorderType> ReorderFunctions;
|
2025-08-19 14:41:13 -07:00
|
|
|
extern cl::opt<bool> TerminalHLT;
|
2024-03-29 16:41:15 -07:00
|
|
|
extern cl::opt<bool> TerminalTrap;
|
2019-07-12 07:25:50 -07:00
|
|
|
extern cl::opt<bool> TimeBuild;
|
2024-05-22 11:04:12 -07:00
|
|
|
extern cl::opt<bool> TimeRewrite;
|
2024-12-16 21:49:53 -08:00
|
|
|
extern cl::opt<bolt::IdenticalCodeFolding::ICFLevel, false,
|
|
|
|
|
llvm::bolt::DeprecatedICFNumericOptionParser>
|
|
|
|
|
ICF;
|
2016-09-27 19:09:38 -07:00
|
|
|
|
2025-03-06 14:11:05 +08:00
|
|
|
static cl::opt<bool>
|
|
|
|
|
AllowStripped("allow-stripped",
|
|
|
|
|
cl::desc("allow processing of stripped binaries"), cl::Hidden,
|
|
|
|
|
cl::cat(BoltCategory));
|
2023-02-06 18:06:54 -08:00
|
|
|
|
2022-06-05 13:29:49 -07:00
|
|
|
static cl::opt<bool> ForceToDataRelocations(
|
|
|
|
|
"force-data-relocations",
|
|
|
|
|
cl::desc("force relocations to data sections to always be processed"),
|
|
|
|
|
|
|
|
|
|
cl::Hidden, cl::cat(BoltCategory));
|
2018-01-24 05:42:11 -08:00
|
|
|
|
2025-03-06 14:11:05 +08:00
|
|
|
static cl::opt<std::string>
|
2022-06-05 13:29:49 -07:00
|
|
|
BoltID("bolt-id",
|
|
|
|
|
cl::desc("add any string to tag this execution in the "
|
|
|
|
|
"output binary via bolt info section"),
|
|
|
|
|
cl::cat(BoltCategory));
|
2020-05-06 17:31:25 -07:00
|
|
|
|
2022-06-02 00:26:23 -07:00
|
|
|
cl::opt<bool> DumpDotAll(
|
|
|
|
|
"dump-dot-all",
|
|
|
|
|
cl::desc("dump function CFGs to graphviz format after each stage;"
|
|
|
|
|
"enable '-print-loops' for color-coded blocks"),
|
2022-06-05 13:29:49 -07:00
|
|
|
cl::Hidden, cl::cat(BoltCategory));
|
2016-09-09 12:37:37 -07:00
|
|
|
|
2025-08-22 10:51:09 +01:00
|
|
|
cl::list<std::string> DumpDotFunc(
|
|
|
|
|
"dump-dot-func", cl::CommaSeparated,
|
|
|
|
|
cl::desc(
|
|
|
|
|
"dump function CFGs to graphviz format for specified functions only;"
|
|
|
|
|
"takes function name patterns (regex supported)"),
|
|
|
|
|
cl::value_desc("func1,func2,func3,..."), cl::Hidden, cl::cat(BoltCategory));
|
|
|
|
|
|
|
|
|
|
bool shouldDumpDot(const bolt::BinaryFunction &Function) {
|
|
|
|
|
// If dump-dot-all is enabled, dump all functions
|
|
|
|
|
if (DumpDotAll)
|
|
|
|
|
return !Function.isIgnored();
|
|
|
|
|
|
|
|
|
|
// If no specific functions specified in dump-dot-func, don't dump any
|
|
|
|
|
if (DumpDotFunc.empty())
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
if (Function.isIgnored())
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
// Check if function matches any of the specified patterns
|
|
|
|
|
for (const std::string &Name : DumpDotFunc) {
|
|
|
|
|
if (Function.hasNameRegex(Name)) {
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
2015-11-23 17:54:18 -08:00
|
|
|
static cl::list<std::string>
|
2020-05-03 13:54:45 -07:00
|
|
|
ForceFunctionNames("funcs",
|
2017-03-28 14:40:20 -07:00
|
|
|
cl::CommaSeparated,
|
2020-05-03 13:54:45 -07:00
|
|
|
cl::desc("limit optimizations to functions from the list"),
|
2017-03-28 14:40:20 -07:00
|
|
|
cl::value_desc("func1,func2,func3,..."),
|
|
|
|
|
cl::Hidden,
|
|
|
|
|
cl::cat(BoltCategory));
|
2015-11-23 17:54:18 -08:00
|
|
|
|
2016-01-26 16:03:58 -08:00
|
|
|
static cl::opt<std::string>
|
2016-04-21 09:54:33 -07:00
|
|
|
FunctionNamesFile("funcs-file",
|
2017-03-28 14:40:20 -07:00
|
|
|
cl::desc("file with list of functions to optimize"),
|
|
|
|
|
cl::Hidden,
|
|
|
|
|
cl::cat(BoltCategory));
|
2016-09-27 19:09:38 -07:00
|
|
|
|
2021-06-04 18:49:29 -07:00
|
|
|
static cl::list<std::string> ForceFunctionNamesNR(
|
|
|
|
|
"funcs-no-regex", cl::CommaSeparated,
|
|
|
|
|
cl::desc("limit optimizations to functions from the list (non-regex)"),
|
|
|
|
|
cl::value_desc("func1,func2,func3,..."), cl::Hidden, cl::cat(BoltCategory));
|
|
|
|
|
|
|
|
|
|
static cl::opt<std::string> FunctionNamesFileNR(
|
|
|
|
|
"funcs-file-no-regex",
|
|
|
|
|
cl::desc("file with list of functions to optimize (non-regex)"), cl::Hidden,
|
|
|
|
|
cl::cat(BoltCategory));
|
|
|
|
|
|
2020-05-26 04:21:04 -07:00
|
|
|
cl::opt<bool>
|
2017-03-28 14:40:20 -07:00
|
|
|
KeepTmp("keep-tmp",
|
|
|
|
|
cl::desc("preserve intermediate .o file"),
|
|
|
|
|
cl::Hidden,
|
|
|
|
|
cl::cat(BoltCategory));
|
2015-11-23 17:54:18 -08:00
|
|
|
|
2020-12-30 12:23:58 -08:00
|
|
|
static cl::opt<unsigned>
|
|
|
|
|
LiteThresholdPct("lite-threshold-pct",
|
|
|
|
|
cl::desc("threshold (in percent) for selecting functions to process in lite "
|
|
|
|
|
"mode. Higher threshold means fewer functions to process. E.g "
|
|
|
|
|
"threshold of 90 means only top 10 percent of functions with "
|
|
|
|
|
"profile will be processed."),
|
|
|
|
|
cl::init(0),
|
|
|
|
|
cl::ZeroOrMore,
|
|
|
|
|
cl::Hidden,
|
|
|
|
|
cl::cat(BoltOptCategory));
|
|
|
|
|
|
2022-06-05 13:29:49 -07:00
|
|
|
static cl::opt<unsigned> LiteThresholdCount(
|
|
|
|
|
"lite-threshold-count",
|
|
|
|
|
cl::desc("similar to '-lite-threshold-pct' but specify threshold using "
|
|
|
|
|
"absolute function call count. I.e. limit processing to functions "
|
|
|
|
|
"executed at least the specified number of times."),
|
|
|
|
|
cl::init(0), cl::Hidden, cl::cat(BoltOptCategory));
|
2020-12-30 12:23:58 -08:00
|
|
|
|
2015-11-23 17:54:18 -08:00
|
|
|
static cl::opt<unsigned>
|
2022-06-05 13:29:49 -07:00
|
|
|
MaxFunctions("max-funcs",
|
|
|
|
|
cl::desc("maximum number of functions to process"), cl::Hidden,
|
|
|
|
|
cl::cat(BoltCategory));
|
2015-11-23 17:54:18 -08:00
|
|
|
|
2022-06-05 13:29:49 -07:00
|
|
|
static cl::opt<unsigned> MaxDataRelocations(
|
|
|
|
|
"max-data-relocations",
|
|
|
|
|
cl::desc("maximum number of data relocations to process"), cl::Hidden,
|
|
|
|
|
cl::cat(BoltCategory));
|
2017-11-14 20:05:11 -08:00
|
|
|
|
2022-06-05 13:29:49 -07:00
|
|
|
cl::opt<bool> PrintAll("print-all",
|
|
|
|
|
cl::desc("print functions after each stage"), cl::Hidden,
|
|
|
|
|
cl::cat(BoltCategory));
|
2016-02-25 16:57:07 -08:00
|
|
|
|
2025-03-06 14:11:05 +08:00
|
|
|
static cl::opt<bool>
|
|
|
|
|
PrintProfile("print-profile",
|
|
|
|
|
cl::desc("print functions after attaching profile"),
|
|
|
|
|
cl::Hidden, cl::cat(BoltCategory));
|
2023-06-28 17:50:39 -07:00
|
|
|
|
2022-06-05 13:29:49 -07:00
|
|
|
cl::opt<bool> PrintCFG("print-cfg",
|
|
|
|
|
cl::desc("print functions after CFG construction"),
|
|
|
|
|
cl::Hidden, cl::cat(BoltCategory));
|
2016-04-11 17:46:18 -07:00
|
|
|
|
2020-02-11 14:30:33 -08:00
|
|
|
cl::opt<bool> PrintDisasm("print-disasm",
|
2022-06-05 13:29:49 -07:00
|
|
|
cl::desc("print function after disassembly"),
|
|
|
|
|
cl::Hidden, cl::cat(BoltCategory));
|
2016-02-12 19:01:53 -08:00
|
|
|
|
2017-11-14 20:05:11 -08:00
|
|
|
static cl::opt<bool>
|
2022-06-05 13:29:49 -07:00
|
|
|
PrintGlobals("print-globals",
|
|
|
|
|
cl::desc("print global symbols after disassembly"), cl::Hidden,
|
|
|
|
|
cl::cat(BoltCategory));
|
2017-11-14 20:05:11 -08:00
|
|
|
|
2020-01-30 13:10:48 -08:00
|
|
|
extern cl::opt<bool> PrintSections;
|
2018-02-01 16:33:43 -08:00
|
|
|
|
2022-06-05 13:29:49 -07:00
|
|
|
static cl::opt<bool> PrintLoopInfo("print-loops",
|
|
|
|
|
cl::desc("print loop related information"),
|
|
|
|
|
cl::Hidden, cl::cat(BoltCategory));
|
2015-11-23 17:54:18 -08:00
|
|
|
|
2022-06-05 13:29:49 -07:00
|
|
|
static cl::opt<cl::boolOrDefault> RelocationMode(
|
|
|
|
|
"relocs", cl::desc("use relocations in the binary (default=autodetect)"),
|
|
|
|
|
cl::cat(BoltCategory));
|
2015-11-23 17:54:18 -08:00
|
|
|
|
2024-03-21 14:32:13 -07:00
|
|
|
extern cl::opt<std::string> SaveProfile;
|
2017-12-13 23:12:01 -08:00
|
|
|
|
2017-03-28 14:40:20 -07:00
|
|
|
static cl::list<std::string>
|
|
|
|
|
SkipFunctionNames("skip-funcs",
|
|
|
|
|
cl::CommaSeparated,
|
|
|
|
|
cl::desc("list of functions to skip"),
|
|
|
|
|
cl::value_desc("func1,func2,func3,..."),
|
|
|
|
|
cl::Hidden,
|
|
|
|
|
cl::cat(BoltCategory));
|
2016-07-01 08:40:56 -07:00
|
|
|
|
2017-03-28 14:40:20 -07:00
|
|
|
static cl::opt<std::string>
|
|
|
|
|
SkipFunctionNamesFile("skip-funcs-file",
|
|
|
|
|
cl::desc("file with list of functions to skip"),
|
|
|
|
|
cl::Hidden,
|
|
|
|
|
cl::cat(BoltCategory));
|
2015-11-23 17:54:18 -08:00
|
|
|
|
2025-03-06 14:11:05 +08:00
|
|
|
static cl::opt<bool> TrapOldCode(
|
|
|
|
|
"trap-old-code",
|
|
|
|
|
cl::desc("insert traps in old function bodies (relocation mode)"),
|
|
|
|
|
cl::Hidden, cl::cat(BoltCategory));
|
2017-03-28 14:40:20 -07:00
|
|
|
|
2021-06-16 09:52:03 -07:00
|
|
|
static cl::opt<std::string> DWPPathName("dwp",
|
|
|
|
|
cl::desc("Path and name to DWP file."),
|
2022-06-04 00:10:42 -07:00
|
|
|
cl::Hidden, cl::init(""),
|
|
|
|
|
cl::cat(BoltCategory));
|
2021-06-16 09:52:03 -07:00
|
|
|
|
2016-02-08 10:08:28 -08:00
|
|
|
static cl::opt<bool>
|
2017-03-28 14:40:20 -07:00
|
|
|
UseGnuStack("use-gnu-stack",
|
|
|
|
|
cl::desc("use GNU_STACK program header for new segment (workaround for "
|
|
|
|
|
"issues with strip/objcopy)"),
|
|
|
|
|
cl::ZeroOrMore,
|
|
|
|
|
cl::cat(BoltCategory));
|
2015-11-23 17:54:18 -08:00
|
|
|
|
2025-04-18 21:02:09 -07:00
|
|
|
static cl::opt<uint64_t> CustomAllocationVMA(
|
|
|
|
|
"custom-allocation-vma",
|
|
|
|
|
cl::desc("use a custom address at which new code will be put, "
|
|
|
|
|
"bypassing BOLT's logic to detect where to put code"),
|
|
|
|
|
cl::Hidden, cl::cat(BoltCategory));
|
|
|
|
|
|
2019-07-12 07:25:50 -07:00
|
|
|
static cl::opt<bool>
|
|
|
|
|
SequentialDisassembly("sequential-disassembly",
|
|
|
|
|
cl::desc("performs disassembly sequentially"),
|
|
|
|
|
cl::init(false),
|
|
|
|
|
cl::cat(BoltOptCategory));
|
|
|
|
|
|
2022-06-05 13:29:49 -07:00
|
|
|
static cl::opt<bool> WriteBoltInfoSection(
|
|
|
|
|
"bolt-info", cl::desc("write bolt info section in the output binary"),
|
|
|
|
|
cl::init(true), cl::Hidden, cl::cat(BoltOutputCategory));
|
2019-07-30 17:55:27 -07:00
|
|
|
|
2025-04-03 16:40:34 +03:00
|
|
|
cl::bits<GadgetScannerKind> GadgetScannersToRun(
|
|
|
|
|
"scanners", cl::desc("which gadget scanners to run"),
|
|
|
|
|
cl::values(
|
|
|
|
|
clEnumValN(GS_PACRET, "pacret",
|
|
|
|
|
"pac-ret: return address protection (subset of \"pauth\")"),
|
|
|
|
|
clEnumValN(GS_PAUTH, "pauth", "All Pointer Authentication scanners"),
|
|
|
|
|
clEnumValN(GS_ALL, "all", "All implemented scanners")),
|
|
|
|
|
cl::ZeroOrMore, cl::CommaSeparated, cl::cat(BinaryAnalysisCategory));
|
2025-02-24 08:26:28 +01:00
|
|
|
|
2015-11-23 17:54:18 -08:00
|
|
|
} // namespace opts
|
|
|
|
|
|
2023-06-27 23:37:14 -07:00
|
|
|
// FIXME: implement a better way to mark sections for replacement.
|
2017-05-16 09:27:34 -07:00
|
|
|
constexpr const char *RewriteInstance::SectionsToOverwrite[];
|
2021-04-01 11:43:00 -07:00
|
|
|
std::vector<std::string> RewriteInstance::DebugSectionsToOverwrite = {
|
2022-04-21 15:47:49 -07:00
|
|
|
".debug_abbrev", ".debug_aranges", ".debug_line", ".debug_line_str",
|
|
|
|
|
".debug_loc", ".debug_loclists", ".debug_ranges", ".debug_rnglists",
|
2022-09-12 16:39:02 -07:00
|
|
|
".gdb_index", ".debug_addr", ".debug_abbrev", ".debug_info",
|
|
|
|
|
".debug_types", ".pseudo_probe"};
|
2016-07-22 20:52:57 -07:00
|
|
|
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
const char RewriteInstance::TimerGroupName[] = "rewrite";
|
|
|
|
|
const char RewriteInstance::TimerGroupDesc[] = "Rewrite passes";
|
2017-11-27 18:00:24 -08:00
|
|
|
|
2017-05-24 14:14:16 -07:00
|
|
|
namespace llvm {
|
|
|
|
|
namespace bolt {
|
2015-11-23 17:54:18 -08:00
|
|
|
|
2020-01-30 13:10:48 -08:00
|
|
|
extern const char *BoltRevision;
|
2015-11-23 17:54:18 -08:00
|
|
|
|
2024-03-30 20:43:23 -07:00
|
|
|
// Weird location for createMCPlusBuilder, but this is here to avoid a
|
|
|
|
|
// cyclic dependency of libCore (its natural place) and libTarget. libRewrite
|
|
|
|
|
// can depend on libTarget, but not libCore. Since libRewrite is the only
|
|
|
|
|
// user of this function, we define it here.
|
2021-10-08 11:47:10 -07:00
|
|
|
MCPlusBuilder *createMCPlusBuilder(const Triple::ArchType Arch,
|
|
|
|
|
const MCInstrAnalysis *Analysis,
|
|
|
|
|
const MCInstrInfo *Info,
|
2023-10-06 06:39:58 +00:00
|
|
|
const MCRegisterInfo *RegInfo,
|
|
|
|
|
const MCSubtargetInfo *STI) {
|
2021-10-08 11:47:10 -07:00
|
|
|
#ifdef X86_AVAILABLE
|
|
|
|
|
if (Arch == Triple::x86_64)
|
2023-10-06 06:39:58 +00:00
|
|
|
return createX86MCPlusBuilder(Analysis, Info, RegInfo, STI);
|
2021-10-08 11:47:10 -07:00
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#ifdef AARCH64_AVAILABLE
|
|
|
|
|
if (Arch == Triple::aarch64)
|
2023-10-06 06:39:58 +00:00
|
|
|
return createAArch64MCPlusBuilder(Analysis, Info, RegInfo, STI);
|
2021-10-08 11:47:10 -07:00
|
|
|
#endif
|
|
|
|
|
|
2023-06-16 11:49:19 +02:00
|
|
|
#ifdef RISCV_AVAILABLE
|
|
|
|
|
if (Arch == Triple::riscv64)
|
2023-10-06 06:39:58 +00:00
|
|
|
return createRISCVMCPlusBuilder(Analysis, Info, RegInfo, STI);
|
2023-06-16 11:49:19 +02:00
|
|
|
#endif
|
|
|
|
|
|
2021-10-08 11:47:10 -07:00
|
|
|
llvm_unreachable("architecture unsupported by MCPlusBuilder");
|
|
|
|
|
}
|
|
|
|
|
|
2022-01-26 23:45:46 +03:00
|
|
|
} // namespace bolt
|
|
|
|
|
} // namespace llvm
|
|
|
|
|
|
2023-04-18 18:14:21 -04:00
|
|
|
using ELF64LEPhdrTy = ELF64LEFile::Elf_Phdr;
|
|
|
|
|
|
2022-01-26 23:45:46 +03:00
|
|
|
namespace {
|
|
|
|
|
|
|
|
|
|
bool refersToReorderedSection(ErrorOr<BinarySection &> Section) {
|
2022-07-30 10:35:56 -07:00
|
|
|
return llvm::any_of(opts::ReorderData, [&](const std::string &SectionName) {
|
|
|
|
|
return Section && Section->getName() == SectionName;
|
|
|
|
|
});
|
2022-01-26 23:45:46 +03:00
|
|
|
}
|
|
|
|
|
|
2021-10-08 11:47:10 -07:00
|
|
|
} // anonymous namespace
|
2016-08-11 14:23:54 -07:00
|
|
|
|
2022-02-16 20:39:59 -08:00
|
|
|
Expected<std::unique_ptr<RewriteInstance>>
|
2022-05-03 20:29:13 -07:00
|
|
|
RewriteInstance::create(ELFObjectFileBase *File, const int Argc,
|
2024-02-12 14:53:53 -08:00
|
|
|
const char *const *Argv, StringRef ToolPath,
|
|
|
|
|
raw_ostream &Stdout, raw_ostream &Stderr) {
|
2022-02-16 20:39:59 -08:00
|
|
|
Error Err = Error::success();
|
2024-02-12 14:53:53 -08:00
|
|
|
auto RI = std::make_unique<RewriteInstance>(File, Argc, Argv, ToolPath,
|
|
|
|
|
Stdout, Stderr, Err);
|
2022-02-16 20:39:59 -08:00
|
|
|
if (Err)
|
|
|
|
|
return std::move(Err);
|
2022-04-19 18:48:27 +03:00
|
|
|
return std::move(RI);
|
2022-02-16 20:39:59 -08:00
|
|
|
}
|
|
|
|
|
|
2020-05-07 23:00:29 -07:00
|
|
|
RewriteInstance::RewriteInstance(ELFObjectFileBase *File, const int Argc,
|
2022-02-16 20:39:59 -08:00
|
|
|
const char *const *Argv, StringRef ToolPath,
|
2024-02-12 14:53:53 -08:00
|
|
|
raw_ostream &Stdout, raw_ostream &Stderr,
|
2022-02-16 20:39:59 -08:00
|
|
|
Error &Err)
|
2020-05-07 23:00:29 -07:00
|
|
|
: InputFile(File), Argc(Argc), Argv(Argv), ToolPath(ToolPath),
|
2019-04-03 15:52:01 -07:00
|
|
|
SHStrTab(StringTableBuilder::ELF) {
|
2022-02-16 20:39:59 -08:00
|
|
|
ErrorAsOutParameter EAO(&Err);
|
2020-11-04 11:44:02 -08:00
|
|
|
auto ELF64LEFile = dyn_cast<ELF64LEObjectFile>(InputFile);
|
|
|
|
|
if (!ELF64LEFile) {
|
2022-02-16 20:39:59 -08:00
|
|
|
Err = createStringError(errc::not_supported,
|
|
|
|
|
"Only 64-bit LE ELF binaries are supported");
|
|
|
|
|
return;
|
2019-04-03 15:52:01 -07:00
|
|
|
}
|
2020-11-04 11:44:02 -08:00
|
|
|
|
|
|
|
|
bool IsPIC = false;
|
2020-12-01 16:29:39 -08:00
|
|
|
const ELFFile<ELF64LE> &Obj = ELF64LEFile->getELFFile();
|
|
|
|
|
if (Obj.getHeader().e_type != ELF::ET_EXEC) {
|
2024-02-12 14:53:53 -08:00
|
|
|
Stdout << "BOLT-INFO: shared object or position-independent executable "
|
2020-11-04 11:44:02 -08:00
|
|
|
"detected\n";
|
|
|
|
|
IsPIC = true;
|
Adding automatic huge page support
Summary:
This patch enables automated hugify for Bolt.
When running Bolt against a binary with -hugify specified, Bolt will inject a call to a runtime library function at the entry of the binary. The runtime library calls madvise to map the hot code region into a 2M huge page. We support both new kernel with THP support and old kernels. For kernels with THP support we simply make a madvise call, while for old kernels, we first copy the code out, remap the memory with huge page, and then copy the code back.
With this change, we no longer need to manually call into hugify_self and precompile it with --hot-text. Instead, we could simply combine --hugify option with existing optimizations, and at runtime it will automatically move hot code into 2M pages.
Some details around the changes made:
1. Add an command line option to support --hugify. --hugify will automatically turn on --hot-text to get the proper hot code symbols. However, running with both --hugify and --hot-text is not allowed, since --hot-text is used on binaries that has precompiled call to hugify_self, which contradicts with the purpose of --hugify.
2. Moved the common utility functions out of instr.cpp to common.h, which will also be used by hugify.cpp. Added a few new system calls definitions.
3. Added a new class that inherits RuntimeLibrary, and implemented the necessary emit and link logic for hugify.
4. Added a simple test for hugify.
(cherry picked from FBD21384529)
2020-05-02 11:14:38 -07:00
|
|
|
}
|
2020-11-04 11:44:02 -08:00
|
|
|
|
2024-02-12 14:53:53 -08:00
|
|
|
// Make sure we don't miss any output on core dumps.
|
|
|
|
|
Stdout.SetUnbuffered();
|
|
|
|
|
Stderr.SetUnbuffered();
|
|
|
|
|
LLVM_DEBUG(dbgs().SetUnbuffered());
|
|
|
|
|
|
2024-03-30 20:43:23 -07:00
|
|
|
// Read RISCV subtarget features from input file
|
|
|
|
|
std::unique_ptr<SubtargetFeatures> Features;
|
|
|
|
|
Triple TheTriple = File->makeTriple();
|
|
|
|
|
if (TheTriple.getArch() == llvm::Triple::riscv64) {
|
|
|
|
|
Expected<SubtargetFeatures> FeaturesOrErr = File->getFeatures();
|
|
|
|
|
if (auto E = FeaturesOrErr.takeError()) {
|
|
|
|
|
Err = std::move(E);
|
|
|
|
|
return;
|
|
|
|
|
} else {
|
|
|
|
|
Features.reset(new SubtargetFeatures(*FeaturesOrErr));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2024-08-07 09:55:39 -07:00
|
|
|
Relocation::Arch = TheTriple.getArch();
|
2022-02-16 20:39:59 -08:00
|
|
|
auto BCOrErr = BinaryContext::createBinaryContext(
|
2024-12-06 10:22:09 +11:00
|
|
|
TheTriple, std::make_shared<orc::SymbolStringPool>(), File->getFileName(),
|
|
|
|
|
Features.get(), IsPIC,
|
2021-10-06 13:03:56 -07:00
|
|
|
DWARFContext::create(*File, DWARFContext::ProcessDebugRelocations::Ignore,
|
|
|
|
|
nullptr, opts::DWPPathName,
|
|
|
|
|
WithColor::defaultErrorHandler,
|
2024-02-12 14:53:53 -08:00
|
|
|
WithColor::defaultWarningHandler),
|
|
|
|
|
JournalingStreams{Stdout, Stderr});
|
2022-02-16 20:39:59 -08:00
|
|
|
if (Error E = BCOrErr.takeError()) {
|
|
|
|
|
Err = std::move(E);
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
BC = std::move(BCOrErr.get());
|
2023-10-06 06:39:58 +00:00
|
|
|
BC->initializeTarget(std::unique_ptr<MCPlusBuilder>(
|
|
|
|
|
createMCPlusBuilder(BC->TheTriple->getArch(), BC->MIA.get(),
|
|
|
|
|
BC->MII.get(), BC->MRI.get(), BC->STI.get())));
|
2021-10-08 11:47:10 -07:00
|
|
|
|
2022-07-06 15:53:27 -07:00
|
|
|
BAT = std::make_unique<BoltAddressTranslation>();
|
2020-11-04 11:44:02 -08:00
|
|
|
|
|
|
|
|
if (opts::UpdateDebugSections)
|
2021-03-18 13:06:18 -07:00
|
|
|
DebugInfoRewriter = std::make_unique<DWARFRewriter>(*BC);
|
2020-11-04 11:44:02 -08:00
|
|
|
|
2021-12-23 12:38:33 -08:00
|
|
|
if (opts::Instrument)
|
2021-03-15 16:34:25 -07:00
|
|
|
BC->setRuntimeLibrary(std::make_unique<InstrumentationRuntimeLibrary>());
|
2021-12-23 12:38:33 -08:00
|
|
|
else if (opts::Hugify)
|
2020-12-01 16:29:39 -08:00
|
|
|
BC->setRuntimeLibrary(std::make_unique<HugifyRuntimeLibrary>());
|
2019-04-03 15:52:01 -07:00
|
|
|
}
|
2015-11-23 17:54:18 -08:00
|
|
|
|
|
|
|
|
RewriteInstance::~RewriteInstance() {}
|
|
|
|
|
|
2020-05-07 23:00:29 -07:00
|
|
|
Error RewriteInstance::setProfile(StringRef Filename) {
|
|
|
|
|
if (!sys::fs::exists(Filename))
|
|
|
|
|
return errorCodeToError(make_error_code(errc::no_such_file_or_directory));
|
|
|
|
|
|
|
|
|
|
if (ProfileReader) {
|
|
|
|
|
// Already exists
|
2021-12-14 16:52:51 -08:00
|
|
|
return make_error<StringError>(Twine("multiple profiles specified: ") +
|
|
|
|
|
ProfileReader->getFilename() + " and " +
|
|
|
|
|
Filename,
|
|
|
|
|
inconvertibleErrorCode());
|
2020-05-07 23:00:29 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Spawn a profile reader based on file contents.
|
2021-12-23 12:38:33 -08:00
|
|
|
if (DataAggregator::checkPerfDataMagic(Filename))
|
2020-12-01 16:29:39 -08:00
|
|
|
ProfileReader = std::make_unique<DataAggregator>(Filename);
|
2021-12-23 12:38:33 -08:00
|
|
|
else if (YAMLProfileReader::isYAML(Filename))
|
2020-12-01 16:29:39 -08:00
|
|
|
ProfileReader = std::make_unique<YAMLProfileReader>(Filename);
|
2021-12-23 12:38:33 -08:00
|
|
|
else
|
2020-12-01 16:29:39 -08:00
|
|
|
ProfileReader = std::make_unique<DataReader>(Filename);
|
2020-05-07 23:00:29 -07:00
|
|
|
|
|
|
|
|
return Error::success();
|
|
|
|
|
}
|
|
|
|
|
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
/// Return true if the function \p BF should be disassembled.
|
|
|
|
|
static bool shouldDisassemble(const BinaryFunction &BF) {
|
|
|
|
|
if (BF.isPseudo())
|
2019-01-15 23:43:40 -08:00
|
|
|
return false;
|
|
|
|
|
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
if (opts::processAllFunctions())
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
|
|
return !BF.isIgnored();
|
2019-01-15 23:43:40 -08:00
|
|
|
}
|
|
|
|
|
|
2023-06-20 20:43:53 -07:00
|
|
|
// Return if a section stored in the image falls into a segment address space.
|
|
|
|
|
// If not, Set \p Overlap to true if there's a partial overlap.
|
|
|
|
|
template <class ELFT>
|
|
|
|
|
static bool checkOffsets(const typename ELFT::Phdr &Phdr,
|
|
|
|
|
const typename ELFT::Shdr &Sec, bool &Overlap) {
|
|
|
|
|
// SHT_NOBITS sections don't need to have an offset inside the segment.
|
|
|
|
|
if (Sec.sh_type == ELF::SHT_NOBITS)
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
|
|
// Only non-empty sections can be at the end of a segment.
|
2023-10-04 17:57:17 -07:00
|
|
|
uint64_t SectionSize = Sec.sh_size ? Sec.sh_size : 1ull;
|
|
|
|
|
AddressRange SectionAddressRange((uint64_t)Sec.sh_offset,
|
|
|
|
|
Sec.sh_offset + SectionSize);
|
2023-06-20 20:43:53 -07:00
|
|
|
AddressRange SegmentAddressRange(Phdr.p_offset,
|
|
|
|
|
Phdr.p_offset + Phdr.p_filesz);
|
|
|
|
|
if (SegmentAddressRange.contains(SectionAddressRange))
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
|
|
Overlap = SegmentAddressRange.intersects(SectionAddressRange);
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Check that an allocatable section belongs to a virtual address
|
|
|
|
|
// space of a segment.
|
|
|
|
|
template <class ELFT>
|
|
|
|
|
static bool checkVMA(const typename ELFT::Phdr &Phdr,
|
|
|
|
|
const typename ELFT::Shdr &Sec, bool &Overlap) {
|
|
|
|
|
// Only non-empty sections can be at the end of a segment.
|
2023-10-04 17:57:17 -07:00
|
|
|
uint64_t SectionSize = Sec.sh_size ? Sec.sh_size : 1ull;
|
|
|
|
|
AddressRange SectionAddressRange((uint64_t)Sec.sh_addr,
|
|
|
|
|
Sec.sh_addr + SectionSize);
|
2023-06-20 20:43:53 -07:00
|
|
|
AddressRange SegmentAddressRange(Phdr.p_vaddr, Phdr.p_vaddr + Phdr.p_memsz);
|
|
|
|
|
|
|
|
|
|
if (SegmentAddressRange.contains(SectionAddressRange))
|
|
|
|
|
return true;
|
|
|
|
|
Overlap = SegmentAddressRange.intersects(SectionAddressRange);
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void RewriteInstance::markGnuRelroSections() {
|
|
|
|
|
using ELFT = ELF64LE;
|
|
|
|
|
using ELFShdrTy = typename ELFObjectFile<ELFT>::Elf_Shdr;
|
|
|
|
|
auto ELF64LEFile = cast<ELF64LEObjectFile>(InputFile);
|
|
|
|
|
const ELFFile<ELFT> &Obj = ELF64LEFile->getELFFile();
|
|
|
|
|
|
|
|
|
|
auto handleSection = [&](const ELFT::Phdr &Phdr, SectionRef SecRef) {
|
|
|
|
|
BinarySection *BinarySection = BC->getSectionForSectionRef(SecRef);
|
|
|
|
|
// If the section is non-allocatable, ignore it for GNU_RELRO purposes:
|
|
|
|
|
// it can't be made read-only after runtime relocations processing.
|
|
|
|
|
if (!BinarySection || !BinarySection->isAllocatable())
|
|
|
|
|
return;
|
|
|
|
|
const ELFShdrTy *Sec = cantFail(Obj.getSection(SecRef.getIndex()));
|
|
|
|
|
bool ImageOverlap{false}, VMAOverlap{false};
|
|
|
|
|
bool ImageContains = checkOffsets<ELFT>(Phdr, *Sec, ImageOverlap);
|
|
|
|
|
bool VMAContains = checkVMA<ELFT>(Phdr, *Sec, VMAOverlap);
|
|
|
|
|
if (ImageOverlap) {
|
|
|
|
|
if (opts::Verbosity >= 1)
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->errs() << "BOLT-WARNING: GNU_RELRO segment has partial file offset "
|
|
|
|
|
<< "overlap with section " << BinarySection->getName()
|
|
|
|
|
<< '\n';
|
2023-06-20 20:43:53 -07:00
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
if (VMAOverlap) {
|
|
|
|
|
if (opts::Verbosity >= 1)
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->errs() << "BOLT-WARNING: GNU_RELRO segment has partial VMA overlap "
|
|
|
|
|
<< "with section " << BinarySection->getName() << '\n';
|
2023-06-20 20:43:53 -07:00
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
if (!ImageContains || !VMAContains)
|
|
|
|
|
return;
|
|
|
|
|
BinarySection->setRelro();
|
|
|
|
|
if (opts::Verbosity >= 1)
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->outs() << "BOLT-INFO: marking " << BinarySection->getName()
|
|
|
|
|
<< " as GNU_RELRO\n";
|
2023-06-20 20:43:53 -07:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
for (const ELFT::Phdr &Phdr : cantFail(Obj.program_headers()))
|
|
|
|
|
if (Phdr.p_type == ELF::PT_GNU_RELRO)
|
|
|
|
|
for (SectionRef SecRef : InputFile->sections())
|
|
|
|
|
handleSection(Phdr, SecRef);
|
|
|
|
|
}
|
|
|
|
|
|
2022-02-23 19:30:30 -08:00
|
|
|
Error RewriteInstance::discoverStorage() {
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
NamedRegionTimer T("discoverStorage", "discover storage", TimerGroupName,
|
|
|
|
|
TimerGroupDesc, opts::TimeRewrite);
|
2017-11-14 16:51:24 -08:00
|
|
|
|
2023-04-20 18:11:54 -04:00
|
|
|
auto ELF64LEFile = cast<ELF64LEObjectFile>(InputFile);
|
2021-04-08 00:19:26 -07:00
|
|
|
const ELFFile<ELF64LE> &Obj = ELF64LEFile->getELFFile();
|
2016-02-08 10:02:48 -08:00
|
|
|
|
2020-12-01 16:29:39 -08:00
|
|
|
BC->StartFunctionAddress = Obj.getHeader().e_entry;
|
2016-09-27 19:09:38 -07:00
|
|
|
|
2016-02-08 10:02:48 -08:00
|
|
|
NextAvailableAddress = 0;
|
2016-02-12 19:01:53 -08:00
|
|
|
uint64_t NextAvailableOffset = 0;
|
2022-02-23 19:30:30 -08:00
|
|
|
Expected<ELF64LE::PhdrRange> PHsOrErr = Obj.program_headers();
|
|
|
|
|
if (Error E = PHsOrErr.takeError())
|
|
|
|
|
return E;
|
|
|
|
|
|
|
|
|
|
ELF64LE::PhdrRange PHs = PHsOrErr.get();
|
2021-04-08 00:19:26 -07:00
|
|
|
for (const ELF64LE::Phdr &Phdr : PHs) {
|
2021-06-19 04:08:35 +08:00
|
|
|
switch (Phdr.p_type) {
|
|
|
|
|
case ELF::PT_LOAD:
|
2018-10-02 17:16:26 -07:00
|
|
|
BC->FirstAllocAddress = std::min(BC->FirstAllocAddress,
|
|
|
|
|
static_cast<uint64_t>(Phdr.p_vaddr));
|
2016-02-08 10:02:48 -08:00
|
|
|
NextAvailableAddress = std::max(NextAvailableAddress,
|
|
|
|
|
Phdr.p_vaddr + Phdr.p_memsz);
|
2016-02-12 19:01:53 -08:00
|
|
|
NextAvailableOffset = std::max(NextAvailableOffset,
|
|
|
|
|
Phdr.p_offset + Phdr.p_filesz);
|
2017-01-17 15:49:59 -08:00
|
|
|
|
2025-07-02 11:22:12 -07:00
|
|
|
BC->SegmentMapInfo[Phdr.p_vaddr] =
|
|
|
|
|
SegmentInfo{Phdr.p_vaddr,
|
|
|
|
|
Phdr.p_memsz,
|
|
|
|
|
Phdr.p_offset,
|
|
|
|
|
Phdr.p_filesz,
|
|
|
|
|
Phdr.p_align,
|
|
|
|
|
(Phdr.p_flags & ELF::PF_X) != 0,
|
|
|
|
|
(Phdr.p_flags & ELF::PF_W) != 0};
|
2024-01-30 18:04:29 -08:00
|
|
|
if (BC->TheTriple->getArch() == llvm::Triple::x86_64 &&
|
|
|
|
|
Phdr.p_vaddr >= BinaryContext::KernelStartX86_64)
|
|
|
|
|
BC->IsLinuxKernel = true;
|
2021-06-19 04:08:35 +08:00
|
|
|
break;
|
|
|
|
|
case ELF::PT_INTERP:
|
|
|
|
|
BC->HasInterpHeader = true;
|
|
|
|
|
break;
|
2016-02-08 10:02:48 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2024-01-30 18:04:29 -08:00
|
|
|
if (BC->IsLinuxKernel)
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->outs() << "BOLT-INFO: Linux kernel binary detected\n";
|
2024-01-30 18:04:29 -08:00
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
for (const SectionRef &Section : InputFile->sections()) {
|
2022-02-23 19:30:30 -08:00
|
|
|
Expected<StringRef> SectionNameOrErr = Section.getName();
|
|
|
|
|
if (Error E = SectionNameOrErr.takeError())
|
|
|
|
|
return E;
|
|
|
|
|
StringRef SectionName = SectionNameOrErr.get();
|
2024-04-11 06:29:51 -04:00
|
|
|
if (SectionName == BC->getMainCodeSectionName()) {
|
2017-09-20 10:43:01 -07:00
|
|
|
BC->OldTextSectionAddress = Section.getAddress();
|
|
|
|
|
BC->OldTextSectionSize = Section.getSize();
|
2020-02-24 17:12:41 -08:00
|
|
|
|
2022-02-23 19:30:30 -08:00
|
|
|
Expected<StringRef> SectionContentsOrErr = Section.getContents();
|
|
|
|
|
if (Error E = SectionContentsOrErr.takeError())
|
|
|
|
|
return E;
|
|
|
|
|
StringRef SectionContents = SectionContentsOrErr.get();
|
2017-09-20 10:43:01 -07:00
|
|
|
BC->OldTextSectionOffset =
|
2021-12-14 16:52:51 -08:00
|
|
|
SectionContents.data() - InputFile->getData().data();
|
2017-02-07 15:31:14 -08:00
|
|
|
}
|
|
|
|
|
|
2020-07-16 17:35:55 -07:00
|
|
|
if (!opts::HeatmapMode &&
|
2019-04-12 17:33:46 -07:00
|
|
|
!(opts::AggregateOnly && BAT->enabledFor(InputFile)) &&
|
2023-12-13 23:34:49 -08:00
|
|
|
(SectionName.starts_with(getOrgSecPrefix()) ||
|
2022-02-23 19:30:30 -08:00
|
|
|
SectionName == getBOLTTextSectionName()))
|
|
|
|
|
return createStringError(
|
|
|
|
|
errc::function_not_supported,
|
|
|
|
|
"BOLT-ERROR: input file was processed by BOLT. Cannot re-optimize");
|
2016-09-27 19:09:38 -07:00
|
|
|
}
|
|
|
|
|
|
2022-02-23 19:30:30 -08:00
|
|
|
if (!NextAvailableAddress || !NextAvailableOffset)
|
|
|
|
|
return createStringError(errc::executable_format_error,
|
|
|
|
|
"no PT_LOAD pheader seen");
|
2016-02-08 10:02:48 -08:00
|
|
|
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->outs() << "BOLT-INFO: first alloc address is 0x"
|
|
|
|
|
<< Twine::utohexstr(BC->FirstAllocAddress) << '\n';
|
2016-02-08 10:02:48 -08:00
|
|
|
|
2016-02-12 19:01:53 -08:00
|
|
|
FirstNonAllocatableOffset = NextAvailableOffset;
|
|
|
|
|
|
2025-04-18 21:02:09 -07:00
|
|
|
if (opts::CustomAllocationVMA) {
|
|
|
|
|
// If user specified a custom address where we should start writing new
|
|
|
|
|
// data, honor that.
|
|
|
|
|
NextAvailableAddress = opts::CustomAllocationVMA;
|
|
|
|
|
// Sanity check the user-supplied address and emit warnings if something
|
|
|
|
|
// seems off.
|
|
|
|
|
for (const ELF64LE::Phdr &Phdr : PHs) {
|
|
|
|
|
switch (Phdr.p_type) {
|
|
|
|
|
case ELF::PT_LOAD:
|
|
|
|
|
if (NextAvailableAddress >= Phdr.p_vaddr &&
|
|
|
|
|
NextAvailableAddress < Phdr.p_vaddr + Phdr.p_memsz) {
|
|
|
|
|
BC->errs() << "BOLT-WARNING: user-supplied allocation vma 0x"
|
|
|
|
|
<< Twine::utohexstr(NextAvailableAddress)
|
|
|
|
|
<< " conflicts with ELF segment at 0x"
|
|
|
|
|
<< Twine::utohexstr(Phdr.p_vaddr) << "\n";
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2018-09-24 20:58:31 -07:00
|
|
|
NextAvailableAddress = alignTo(NextAvailableAddress, BC->PageAlign);
|
|
|
|
|
NextAvailableOffset = alignTo(NextAvailableOffset, BC->PageAlign);
|
2016-02-12 19:01:53 -08:00
|
|
|
|
2022-06-27 12:37:53 +00:00
|
|
|
// Hugify: Additional huge page from left side due to
|
|
|
|
|
// weird ASLR mapping addresses (4KB aligned)
|
2025-04-15 12:59:05 +01:00
|
|
|
if (opts::Hugify && !BC->HasFixedLoadAddress) {
|
2022-06-27 12:37:53 +00:00
|
|
|
NextAvailableAddress += BC->PageAlign;
|
2025-04-15 12:59:05 +01:00
|
|
|
}
|
2022-06-27 12:37:53 +00:00
|
|
|
|
2025-06-26 12:09:11 -07:00
|
|
|
NewTextSegmentAddress = NextAvailableAddress;
|
|
|
|
|
NewTextSegmentOffset = NextAvailableOffset;
|
|
|
|
|
|
2024-01-30 18:04:29 -08:00
|
|
|
if (!opts::UseGnuStack && !BC->IsLinuxKernel) {
|
2016-02-12 19:01:53 -08:00
|
|
|
// This is where the black magic happens. Creating PHDR table in a segment
|
|
|
|
|
// other than that containing ELF header is tricky. Some loaders and/or
|
|
|
|
|
// parts of loaders will apply e_phoff from ELF header assuming both are in
|
|
|
|
|
// the same segment, while others will do the proper calculation.
|
|
|
|
|
// We create the new PHDR table in such a way that both of the methods
|
|
|
|
|
// of loading and locating the table work. There's a slight file size
|
|
|
|
|
// overhead because of that.
|
2016-03-03 10:13:11 -08:00
|
|
|
//
|
|
|
|
|
// NB: bfd's strip command cannot do the above and will corrupt the
|
|
|
|
|
// binary during the process of stripping non-allocatable sections.
|
2021-12-23 12:38:33 -08:00
|
|
|
if (NextAvailableOffset <= NextAvailableAddress - BC->FirstAllocAddress)
|
2018-10-02 17:16:26 -07:00
|
|
|
NextAvailableOffset = NextAvailableAddress - BC->FirstAllocAddress;
|
2021-12-23 12:38:33 -08:00
|
|
|
else
|
2018-10-02 17:16:26 -07:00
|
|
|
NextAvailableAddress = NextAvailableOffset + BC->FirstAllocAddress;
|
2021-12-23 12:38:33 -08:00
|
|
|
|
2021-12-14 16:52:51 -08:00
|
|
|
assert(NextAvailableOffset ==
|
|
|
|
|
NextAvailableAddress - BC->FirstAllocAddress &&
|
|
|
|
|
"PHDR table address calculation error");
|
2016-02-08 10:02:48 -08:00
|
|
|
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->outs() << "BOLT-INFO: creating new program header table at address 0x"
|
|
|
|
|
<< Twine::utohexstr(NextAvailableAddress) << ", offset 0x"
|
|
|
|
|
<< Twine::utohexstr(NextAvailableOffset) << '\n';
|
2016-02-08 10:02:48 -08:00
|
|
|
|
2016-02-12 19:01:53 -08:00
|
|
|
PHDRTableAddress = NextAvailableAddress;
|
|
|
|
|
PHDRTableOffset = NextAvailableOffset;
|
2025-06-26 12:09:11 -07:00
|
|
|
NewTextSegmentAddress = NextAvailableAddress;
|
|
|
|
|
NewTextSegmentOffset = NextAvailableOffset;
|
2016-02-08 10:02:48 -08:00
|
|
|
|
2016-02-12 19:01:53 -08:00
|
|
|
// Reserve space for 3 extra pheaders.
|
2020-12-01 16:29:39 -08:00
|
|
|
unsigned Phnum = Obj.getHeader().e_phnum;
|
2016-02-12 19:01:53 -08:00
|
|
|
Phnum += 3;
|
2016-02-08 10:02:48 -08:00
|
|
|
|
2025-02-27 16:13:57 -08:00
|
|
|
// Reserve two more pheaders to avoid having writeable and executable
|
|
|
|
|
// segment in instrumented binary.
|
|
|
|
|
if (opts::Instrument)
|
|
|
|
|
Phnum += 2;
|
|
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
NextAvailableAddress += Phnum * sizeof(ELF64LEPhdrTy);
|
2021-12-14 16:52:51 -08:00
|
|
|
NextAvailableOffset += Phnum * sizeof(ELF64LEPhdrTy);
|
2016-02-08 10:02:48 -08:00
|
|
|
|
2025-06-26 12:09:11 -07:00
|
|
|
// Align at cache line.
|
|
|
|
|
NextAvailableAddress = alignTo(NextAvailableAddress, 64);
|
|
|
|
|
NextAvailableOffset = alignTo(NextAvailableOffset, 64);
|
|
|
|
|
}
|
2016-02-08 10:02:48 -08:00
|
|
|
|
2017-08-31 11:45:37 -07:00
|
|
|
BC->LayoutStartAddress = NextAvailableAddress;
|
2020-06-26 16:52:07 -07:00
|
|
|
|
|
|
|
|
// Tools such as objcopy can strip section contents but leave header
|
|
|
|
|
// entries. Check that at least .text is mapped in the file.
|
2022-02-23 19:30:30 -08:00
|
|
|
if (!getFileOffsetForAddress(BC->OldTextSectionAddress))
|
|
|
|
|
return createStringError(errc::executable_format_error,
|
|
|
|
|
"BOLT-ERROR: input binary is not a valid ELF "
|
|
|
|
|
"executable as its text section is not "
|
|
|
|
|
"mapped to a valid segment");
|
|
|
|
|
return Error::success();
|
2016-02-08 10:02:48 -08:00
|
|
|
}
|
|
|
|
|
|
2022-02-23 19:30:30 -08:00
|
|
|
Error RewriteInstance::run() {
|
|
|
|
|
assert(BC && "failed to create a binary context");
|
2015-11-23 17:54:18 -08:00
|
|
|
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->outs() << "BOLT-INFO: Target architecture: "
|
|
|
|
|
<< Triple::getArchTypeName(
|
|
|
|
|
(llvm::Triple::ArchType)InputFile->getArch())
|
|
|
|
|
<< "\n";
|
|
|
|
|
BC->outs() << "BOLT-INFO: BOLT version: " << BoltRevision << "\n";
|
2019-06-11 13:24:10 -07:00
|
|
|
|
2022-02-23 19:30:30 -08:00
|
|
|
if (Error E = discoverStorage())
|
|
|
|
|
return E;
|
2022-03-08 09:12:19 -08:00
|
|
|
if (Error E = readSpecialSections())
|
|
|
|
|
return E;
|
[BOLT][non-reloc] Change function splitting in non-relocation mode
Summary:
This diff applies to non-relocation mode mostly. In this mode, we are
limited by original function boundaries, i.e. if a function becomes
larger after optimizations (e.g. because of the newly introduced
branches) then we might not be able to write the optimized version,
unless we split the function. At the same time, we do not benefit from
function splitting as we do in the relocation mode since we are not
moving functions/fragments, and the hot code does not become more
compact.
For the reasons described above, we used to execute multiple re-write
attempts to optimize the binary and we would only split functions that
were too large to fit into their original space.
After the first attempt, we would know functions that did not fit
into their original space. Then we would re-run all our passes again
feeding back the function information and forcefully splitting
such functions. Some functions still wouldn't fit even after the
splitting (mostly because of the branch relaxation for conditional tail
calls that does not happen in non-relocation mode). Yet we have emitted
debug info as if they were successfully overwritten. That's why we had
one more stage to write the functions again, marking failed-to-emit
functions non-simple. Sadly, there was a bug in the way 2nd and 3rd
attempts interacted, and we were not splitting the functions correctly
and as a result we were emitting less optimized code.
One of the reasons we had the multi-pass rewrite scheme in place, was
that we did not have an ability to precisely estimate the code size
before the actual code emission. Recently, BinaryContext obtained such
functionality, and now we can use it instead of relying on the
multi-pass rewrite. This eliminates redundant work of re-running
the same function passes multiple times.
Because function splitting runs before a number of optimization passes
that run on post-CFG state (those rely on the splitting pass), we
cannot estimate the non-split code size with 100% accuracy. However,
it is good enough for over 99% of the cases to extract most of the
performance gains for the binary.
As a result of eliminating the multi-pass rewrite, the processing time
in non-relocation mode with `-split-functions=2` is greatly reduced.
With debug info update, it is less than half of what it used to be.
New semantics for `-split-functions=<n>`:
-split-functions - split functions into hot and cold regions
=0 - do not split any function
=1 - in non-relocation mode only split functions too large to fit
into original code space
=2 - same as 1 (backwards compatibility)
=3 - split all functions
(cherry picked from FBD17362607)
2019-09-11 15:42:22 -07:00
|
|
|
adjustCommandLineOptions();
|
|
|
|
|
discoverFileObjects();
|
2019-08-19 17:11:42 -07:00
|
|
|
|
2023-11-08 11:01:10 +00:00
|
|
|
if (opts::Instrument && !BC->IsStaticExecutable)
|
|
|
|
|
if (Error E = discoverRtFiniAddress())
|
|
|
|
|
return E;
|
|
|
|
|
|
2020-12-30 12:23:58 -08:00
|
|
|
preprocessProfileData();
|
|
|
|
|
|
2020-05-03 13:54:45 -07:00
|
|
|
selectFunctionsToProcess();
|
|
|
|
|
|
[BOLT][non-reloc] Change function splitting in non-relocation mode
Summary:
This diff applies to non-relocation mode mostly. In this mode, we are
limited by original function boundaries, i.e. if a function becomes
larger after optimizations (e.g. because of the newly introduced
branches) then we might not be able to write the optimized version,
unless we split the function. At the same time, we do not benefit from
function splitting as we do in the relocation mode since we are not
moving functions/fragments, and the hot code does not become more
compact.
For the reasons described above, we used to execute multiple re-write
attempts to optimize the binary and we would only split functions that
were too large to fit into their original space.
After the first attempt, we would know functions that did not fit
into their original space. Then we would re-run all our passes again
feeding back the function information and forcefully splitting
such functions. Some functions still wouldn't fit even after the
splitting (mostly because of the branch relaxation for conditional tail
calls that does not happen in non-relocation mode). Yet we have emitted
debug info as if they were successfully overwritten. That's why we had
one more stage to write the functions again, marking failed-to-emit
functions non-simple. Sadly, there was a bug in the way 2nd and 3rd
attempts interacted, and we were not splitting the functions correctly
and as a result we were emitting less optimized code.
One of the reasons we had the multi-pass rewrite scheme in place, was
that we did not have an ability to precisely estimate the code size
before the actual code emission. Recently, BinaryContext obtained such
functionality, and now we can use it instead of relying on the
multi-pass rewrite. This eliminates redundant work of re-running
the same function passes multiple times.
Because function splitting runs before a number of optimization passes
that run on post-CFG state (those rely on the splitting pass), we
cannot estimate the non-split code size with 100% accuracy. However,
it is good enough for over 99% of the cases to extract most of the
performance gains for the binary.
As a result of eliminating the multi-pass rewrite, the processing time
in non-relocation mode with `-split-functions=2` is greatly reduced.
With debug info update, it is less than half of what it used to be.
New semantics for `-split-functions=<n>`:
-split-functions - split functions into hot and cold regions
=0 - do not split any function
=1 - in non-relocation mode only split functions too large to fit
into original code space
=2 - same as 1 (backwards compatibility)
=3 - split all functions
(cherry picked from FBD17362607)
2019-09-11 15:42:22 -07:00
|
|
|
readDebugInfo();
|
2019-06-11 13:24:10 -07:00
|
|
|
|
2020-05-07 23:00:29 -07:00
|
|
|
disassembleFunctions();
|
2019-06-11 13:24:10 -07:00
|
|
|
|
2023-06-27 22:55:53 -07:00
|
|
|
processMetadataPreCFG();
|
2019-06-11 13:24:10 -07:00
|
|
|
|
2020-05-07 23:00:29 -07:00
|
|
|
buildFunctionsCFG();
|
2019-06-11 13:24:10 -07:00
|
|
|
|
2020-05-07 23:00:29 -07:00
|
|
|
processProfileData();
|
2016-04-11 17:46:18 -07:00
|
|
|
|
2024-02-15 12:49:43 -08:00
|
|
|
// Save input binary metadata if BAT section needs to be emitted
|
|
|
|
|
if (opts::EnableBAT)
|
|
|
|
|
BAT->saveMetadata(*BC);
|
|
|
|
|
|
[BOLT][non-reloc] Change function splitting in non-relocation mode
Summary:
This diff applies to non-relocation mode mostly. In this mode, we are
limited by original function boundaries, i.e. if a function becomes
larger after optimizations (e.g. because of the newly introduced
branches) then we might not be able to write the optimized version,
unless we split the function. At the same time, we do not benefit from
function splitting as we do in the relocation mode since we are not
moving functions/fragments, and the hot code does not become more
compact.
For the reasons described above, we used to execute multiple re-write
attempts to optimize the binary and we would only split functions that
were too large to fit into their original space.
After the first attempt, we would know functions that did not fit
into their original space. Then we would re-run all our passes again
feeding back the function information and forcefully splitting
such functions. Some functions still wouldn't fit even after the
splitting (mostly because of the branch relaxation for conditional tail
calls that does not happen in non-relocation mode). Yet we have emitted
debug info as if they were successfully overwritten. That's why we had
one more stage to write the functions again, marking failed-to-emit
functions non-simple. Sadly, there was a bug in the way 2nd and 3rd
attempts interacted, and we were not splitting the functions correctly
and as a result we were emitting less optimized code.
One of the reasons we had the multi-pass rewrite scheme in place, was
that we did not have an ability to precisely estimate the code size
before the actual code emission. Recently, BinaryContext obtained such
functionality, and now we can use it instead of relying on the
multi-pass rewrite. This eliminates redundant work of re-running
the same function passes multiple times.
Because function splitting runs before a number of optimization passes
that run on post-CFG state (those rely on the splitting pass), we
cannot estimate the non-split code size with 100% accuracy. However,
it is good enough for over 99% of the cases to extract most of the
performance gains for the binary.
As a result of eliminating the multi-pass rewrite, the processing time
in non-relocation mode with `-split-functions=2` is greatly reduced.
With debug info update, it is less than half of what it used to be.
New semantics for `-split-functions=<n>`:
-split-functions - split functions into hot and cold regions
=0 - do not split any function
=1 - in non-relocation mode only split functions too large to fit
into original code space
=2 - same as 1 (backwards compatibility)
=3 - split all functions
(cherry picked from FBD17362607)
2019-09-11 15:42:22 -07:00
|
|
|
postProcessFunctions();
|
2017-07-25 09:11:42 -07:00
|
|
|
|
2023-07-12 21:36:29 -07:00
|
|
|
processMetadataPostCFG();
|
|
|
|
|
|
[BOLT][non-reloc] Change function splitting in non-relocation mode
Summary:
This diff applies to non-relocation mode mostly. In this mode, we are
limited by original function boundaries, i.e. if a function becomes
larger after optimizations (e.g. because of the newly introduced
branches) then we might not be able to write the optimized version,
unless we split the function. At the same time, we do not benefit from
function splitting as we do in the relocation mode since we are not
moving functions/fragments, and the hot code does not become more
compact.
For the reasons described above, we used to execute multiple re-write
attempts to optimize the binary and we would only split functions that
were too large to fit into their original space.
After the first attempt, we would know functions that did not fit
into their original space. Then we would re-run all our passes again
feeding back the function information and forcefully splitting
such functions. Some functions still wouldn't fit even after the
splitting (mostly because of the branch relaxation for conditional tail
calls that does not happen in non-relocation mode). Yet we have emitted
debug info as if they were successfully overwritten. That's why we had
one more stage to write the functions again, marking failed-to-emit
functions non-simple. Sadly, there was a bug in the way 2nd and 3rd
attempts interacted, and we were not splitting the functions correctly
and as a result we were emitting less optimized code.
One of the reasons we had the multi-pass rewrite scheme in place, was
that we did not have an ability to precisely estimate the code size
before the actual code emission. Recently, BinaryContext obtained such
functionality, and now we can use it instead of relying on the
multi-pass rewrite. This eliminates redundant work of re-running
the same function passes multiple times.
Because function splitting runs before a number of optimization passes
that run on post-CFG state (those rely on the splitting pass), we
cannot estimate the non-split code size with 100% accuracy. However,
it is good enough for over 99% of the cases to extract most of the
performance gains for the binary.
As a result of eliminating the multi-pass rewrite, the processing time
in non-relocation mode with `-split-functions=2` is greatly reduced.
With debug info update, it is less than half of what it used to be.
New semantics for `-split-functions=<n>`:
-split-functions - split functions into hot and cold regions
=0 - do not split any function
=1 - in non-relocation mode only split functions too large to fit
into original code space
=2 - same as 1 (backwards compatibility)
=3 - split all functions
(cherry picked from FBD17362607)
2019-09-11 15:42:22 -07:00
|
|
|
if (opts::DiffOnly)
|
2022-02-23 19:30:30 -08:00
|
|
|
return Error::success();
|
2016-03-31 16:38:49 -07:00
|
|
|
|
2024-12-12 11:06:27 +01:00
|
|
|
if (opts::BinaryAnalysisMode) {
|
|
|
|
|
runBinaryAnalyses();
|
|
|
|
|
return Error::success();
|
|
|
|
|
}
|
|
|
|
|
|
2022-09-22 12:05:12 -07:00
|
|
|
preregisterSections();
|
|
|
|
|
|
[BOLT][non-reloc] Change function splitting in non-relocation mode
Summary:
This diff applies to non-relocation mode mostly. In this mode, we are
limited by original function boundaries, i.e. if a function becomes
larger after optimizations (e.g. because of the newly introduced
branches) then we might not be able to write the optimized version,
unless we split the function. At the same time, we do not benefit from
function splitting as we do in the relocation mode since we are not
moving functions/fragments, and the hot code does not become more
compact.
For the reasons described above, we used to execute multiple re-write
attempts to optimize the binary and we would only split functions that
were too large to fit into their original space.
After the first attempt, we would know functions that did not fit
into their original space. Then we would re-run all our passes again
feeding back the function information and forcefully splitting
such functions. Some functions still wouldn't fit even after the
splitting (mostly because of the branch relaxation for conditional tail
calls that does not happen in non-relocation mode). Yet we have emitted
debug info as if they were successfully overwritten. That's why we had
one more stage to write the functions again, marking failed-to-emit
functions non-simple. Sadly, there was a bug in the way 2nd and 3rd
attempts interacted, and we were not splitting the functions correctly
and as a result we were emitting less optimized code.
One of the reasons we had the multi-pass rewrite scheme in place, was
that we did not have an ability to precisely estimate the code size
before the actual code emission. Recently, BinaryContext obtained such
functionality, and now we can use it instead of relying on the
multi-pass rewrite. This eliminates redundant work of re-running
the same function passes multiple times.
Because function splitting runs before a number of optimization passes
that run on post-CFG state (those rely on the splitting pass), we
cannot estimate the non-split code size with 100% accuracy. However,
it is good enough for over 99% of the cases to extract most of the
performance gains for the binary.
As a result of eliminating the multi-pass rewrite, the processing time
in non-relocation mode with `-split-functions=2` is greatly reduced.
With debug info update, it is less than half of what it used to be.
New semantics for `-split-functions=<n>`:
-split-functions - split functions into hot and cold regions
=0 - do not split any function
=1 - in non-relocation mode only split functions too large to fit
into original code space
=2 - same as 1 (backwards compatibility)
=3 - split all functions
(cherry picked from FBD17362607)
2019-09-11 15:42:22 -07:00
|
|
|
runOptimizationPasses();
|
2016-03-31 16:38:49 -07:00
|
|
|
|
2024-01-29 17:27:33 -08:00
|
|
|
finalizeMetadataPreEmit();
|
|
|
|
|
|
[BOLT][non-reloc] Change function splitting in non-relocation mode
Summary:
This diff applies to non-relocation mode mostly. In this mode, we are
limited by original function boundaries, i.e. if a function becomes
larger after optimizations (e.g. because of the newly introduced
branches) then we might not be able to write the optimized version,
unless we split the function. At the same time, we do not benefit from
function splitting as we do in the relocation mode since we are not
moving functions/fragments, and the hot code does not become more
compact.
For the reasons described above, we used to execute multiple re-write
attempts to optimize the binary and we would only split functions that
were too large to fit into their original space.
After the first attempt, we would know functions that did not fit
into their original space. Then we would re-run all our passes again
feeding back the function information and forcefully splitting
such functions. Some functions still wouldn't fit even after the
splitting (mostly because of the branch relaxation for conditional tail
calls that does not happen in non-relocation mode). Yet we have emitted
debug info as if they were successfully overwritten. That's why we had
one more stage to write the functions again, marking failed-to-emit
functions non-simple. Sadly, there was a bug in the way 2nd and 3rd
attempts interacted, and we were not splitting the functions correctly
and as a result we were emitting less optimized code.
One of the reasons we had the multi-pass rewrite scheme in place, was
that we did not have an ability to precisely estimate the code size
before the actual code emission. Recently, BinaryContext obtained such
functionality, and now we can use it instead of relying on the
multi-pass rewrite. This eliminates redundant work of re-running
the same function passes multiple times.
Because function splitting runs before a number of optimization passes
that run on post-CFG state (those rely on the splitting pass), we
cannot estimate the non-split code size with 100% accuracy. However,
it is good enough for over 99% of the cases to extract most of the
performance gains for the binary.
As a result of eliminating the multi-pass rewrite, the processing time
in non-relocation mode with `-split-functions=2` is greatly reduced.
With debug info update, it is less than half of what it used to be.
New semantics for `-split-functions=<n>`:
-split-functions - split functions into hot and cold regions
=0 - do not split any function
=1 - in non-relocation mode only split functions too large to fit
into original code space
=2 - same as 1 (backwards compatibility)
=3 - split all functions
(cherry picked from FBD17362607)
2019-09-11 15:42:22 -07:00
|
|
|
emitAndLink();
|
2016-04-11 17:46:18 -07:00
|
|
|
|
2019-11-03 21:57:15 -08:00
|
|
|
updateMetadata();
|
2015-11-23 17:54:18 -08:00
|
|
|
|
2023-11-08 11:01:10 +00:00
|
|
|
if (opts::Instrument && !BC->IsStaticExecutable)
|
|
|
|
|
updateRtFiniReloc();
|
|
|
|
|
|
2024-02-01 12:11:26 -08:00
|
|
|
if (opts::OutputFilename == "/dev/null") {
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->outs() << "BOLT-INFO: skipping writing final binary to disk\n";
|
2022-02-23 19:30:30 -08:00
|
|
|
return Error::success();
|
2024-02-01 12:11:26 -08:00
|
|
|
} else if (BC->IsLinuxKernel) {
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->errs() << "BOLT-WARNING: Linux kernel support is experimental\n";
|
2020-09-15 11:42:03 -07:00
|
|
|
}
|
|
|
|
|
|
2016-03-03 10:13:11 -08:00
|
|
|
// Rewrite allocatable contents and copy non-allocatable parts with mods.
|
2015-11-23 17:54:18 -08:00
|
|
|
rewriteFile();
|
2022-02-23 19:30:30 -08:00
|
|
|
return Error::success();
|
2015-11-23 17:54:18 -08:00
|
|
|
}
|
|
|
|
|
|
2016-03-11 11:09:34 -08:00
|
|
|
void RewriteInstance::discoverFileObjects() {
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
NamedRegionTimer T("discoverFileObjects", "discover file objects",
|
|
|
|
|
TimerGroupName, TimerGroupDesc, opts::TimeRewrite);
|
2015-11-23 17:54:18 -08:00
|
|
|
|
2016-09-29 11:19:06 -07:00
|
|
|
// For local symbols we want to keep track of associated FILE symbol name for
|
|
|
|
|
// disambiguation by combined name.
|
2021-04-08 00:19:26 -07:00
|
|
|
for (const ELFSymbolRef &Symbol : InputFile->symbols()) {
|
|
|
|
|
Expected<StringRef> NameOrError = Symbol.getName();
|
2023-12-13 23:34:49 -08:00
|
|
|
if (NameOrError && NameOrError->starts_with("__asan_init")) {
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->errs()
|
|
|
|
|
<< "BOLT-ERROR: input file was compiled or linked with sanitizer "
|
|
|
|
|
"support. Cannot optimize.\n";
|
2017-02-07 15:56:00 -08:00
|
|
|
exit(1);
|
|
|
|
|
}
|
2023-12-13 23:34:49 -08:00
|
|
|
if (NameOrError && NameOrError->starts_with("__llvm_coverage_mapping")) {
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->errs()
|
|
|
|
|
<< "BOLT-ERROR: input file was compiled or linked with coverage "
|
|
|
|
|
"support. Cannot optimize.\n";
|
2017-03-31 07:51:30 -07:00
|
|
|
exit(1);
|
|
|
|
|
}
|
2017-02-07 15:56:00 -08:00
|
|
|
|
2020-12-01 16:29:39 -08:00
|
|
|
if (cantFail(Symbol.getFlags()) & SymbolRef::SF_Undefined)
|
2015-11-23 17:54:18 -08:00
|
|
|
continue;
|
|
|
|
|
|
2025-06-20 14:29:32 -07:00
|
|
|
if (cantFail(Symbol.getType()) == SymbolRef::ST_File)
|
2024-04-29 20:14:31 +02:00
|
|
|
FileSymbols.emplace_back(Symbol);
|
2016-09-29 11:19:06 -07:00
|
|
|
}
|
|
|
|
|
|
2018-09-05 14:36:52 -07:00
|
|
|
// Sort symbols in the file by value. Ignore symbols from non-allocatable
|
2023-09-17 12:58:33 -07:00
|
|
|
// sections. We memoize getAddress(), as it has rather high overhead.
|
|
|
|
|
struct SymbolInfo {
|
|
|
|
|
uint64_t Address;
|
|
|
|
|
SymbolRef Symbol;
|
|
|
|
|
};
|
|
|
|
|
std::vector<SymbolInfo> SortedSymbols;
|
2018-09-05 14:36:52 -07:00
|
|
|
auto isSymbolInMemory = [this](const SymbolRef &Sym) {
|
|
|
|
|
if (cantFail(Sym.getType()) == SymbolRef::ST_File)
|
|
|
|
|
return false;
|
2020-12-01 16:29:39 -08:00
|
|
|
if (cantFail(Sym.getFlags()) & SymbolRef::SF_Absolute)
|
2018-09-05 14:36:52 -07:00
|
|
|
return true;
|
2020-12-01 16:29:39 -08:00
|
|
|
if (cantFail(Sym.getFlags()) & SymbolRef::SF_Undefined)
|
2018-09-05 14:36:52 -07:00
|
|
|
return false;
|
|
|
|
|
BinarySection Section(*BC, *cantFail(Sym.getSection()));
|
|
|
|
|
return Section.isAllocatable();
|
|
|
|
|
};
|
2024-11-07 12:16:14 +00:00
|
|
|
auto checkSymbolInSection = [this](const SymbolInfo &S) {
|
|
|
|
|
// Sometimes, we encounter symbols with addresses outside their section. If
|
|
|
|
|
// such symbols happen to fall into another section, they can interfere with
|
|
|
|
|
// disassembly. Notably, this occurs with AArch64 marker symbols ($d and $t)
|
|
|
|
|
// that belong to .eh_frame, but end up pointing into .text.
|
|
|
|
|
// As a workaround, we ignore all symbols that lie outside their sections.
|
|
|
|
|
auto Section = cantFail(S.Symbol.getSection());
|
|
|
|
|
|
|
|
|
|
// Accept all absolute symbols.
|
|
|
|
|
if (Section == InputFile->section_end())
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
|
|
uint64_t SecStart = Section->getAddress();
|
|
|
|
|
uint64_t SecEnd = SecStart + Section->getSize();
|
|
|
|
|
uint64_t SymEnd = S.Address + ELFSymbolRef(S.Symbol).getSize();
|
|
|
|
|
if (S.Address >= SecStart && SymEnd <= SecEnd)
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
|
|
auto SymType = cantFail(S.Symbol.getType());
|
|
|
|
|
// Skip warnings for common benign cases.
|
|
|
|
|
if (opts::Verbosity < 1 && SymType == SymbolRef::ST_Other)
|
|
|
|
|
return false; // E.g. ELF::STT_TLS.
|
|
|
|
|
|
|
|
|
|
auto SymName = S.Symbol.getName();
|
|
|
|
|
auto SecName = cantFail(S.Symbol.getSection())->getName();
|
|
|
|
|
BC->errs() << "BOLT-WARNING: ignoring symbol "
|
|
|
|
|
<< (SymName ? *SymName : "[unnamed]") << " at 0x"
|
|
|
|
|
<< Twine::utohexstr(S.Address) << ", which lies outside "
|
|
|
|
|
<< (SecName ? *SecName : "[unnamed]") << "\n";
|
|
|
|
|
|
|
|
|
|
return false;
|
|
|
|
|
};
|
2023-09-17 12:58:33 -07:00
|
|
|
for (const SymbolRef &Symbol : InputFile->symbols())
|
2024-11-07 12:16:14 +00:00
|
|
|
if (isSymbolInMemory(Symbol)) {
|
|
|
|
|
SymbolInfo SymInfo{cantFail(Symbol.getAddress()), Symbol};
|
|
|
|
|
if (checkSymbolInSection(SymInfo))
|
|
|
|
|
SortedSymbols.push_back(SymInfo);
|
|
|
|
|
}
|
2023-09-17 12:58:33 -07:00
|
|
|
|
|
|
|
|
auto CompareSymbols = [this](const SymbolInfo &A, const SymbolInfo &B) {
|
|
|
|
|
if (A.Address != B.Address)
|
|
|
|
|
return A.Address < B.Address;
|
|
|
|
|
|
|
|
|
|
const bool AMarker = BC->isMarker(A.Symbol);
|
|
|
|
|
const bool BMarker = BC->isMarker(B.Symbol);
|
2022-05-31 11:50:59 -07:00
|
|
|
if (AMarker || BMarker) {
|
|
|
|
|
return AMarker && !BMarker;
|
|
|
|
|
}
|
2021-12-14 16:52:51 -08:00
|
|
|
|
2023-09-17 12:58:33 -07:00
|
|
|
const auto AType = cantFail(A.Symbol.getType());
|
|
|
|
|
const auto BType = cantFail(B.Symbol.getType());
|
2022-05-31 11:50:59 -07:00
|
|
|
if (AType == SymbolRef::ST_Function && BType != SymbolRef::ST_Function)
|
|
|
|
|
return true;
|
|
|
|
|
if (BType == SymbolRef::ST_Debug && AType != SymbolRef::ST_Debug)
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
|
|
return false;
|
|
|
|
|
};
|
2023-09-17 12:58:33 -07:00
|
|
|
llvm::stable_sort(SortedSymbols, CompareSymbols);
|
2022-05-31 11:50:59 -07:00
|
|
|
|
2023-09-17 12:58:33 -07:00
|
|
|
auto LastSymbol = SortedSymbols.end();
|
|
|
|
|
if (!SortedSymbols.empty())
|
2022-07-26 00:07:34 -07:00
|
|
|
--LastSymbol;
|
2016-09-29 11:19:06 -07:00
|
|
|
|
2017-11-22 16:17:36 -08:00
|
|
|
// For aarch64, the ABI defines mapping symbols so we identify data in the
|
|
|
|
|
// code section (see IHI0056B). $d identifies data contents.
|
2022-05-31 11:50:59 -07:00
|
|
|
// Compilers usually merge multiple data objects in a single $d-$x interval,
|
|
|
|
|
// but we need every data object to be marked with $d. Because of that we
|
2025-08-20 14:18:56 -07:00
|
|
|
// keep track of marker symbols with all locations of data objects.
|
2022-05-31 11:50:59 -07:00
|
|
|
|
2025-08-20 14:18:56 -07:00
|
|
|
DenseMap<uint64_t, MarkerSymType> MarkerSymbols;
|
2023-09-17 12:58:33 -07:00
|
|
|
auto addExtraDataMarkerPerSymbol = [&]() {
|
|
|
|
|
bool IsData = false;
|
|
|
|
|
uint64_t LastAddr = 0;
|
|
|
|
|
for (const auto &SymInfo : SortedSymbols) {
|
|
|
|
|
if (LastAddr == SymInfo.Address) // don't repeat markers
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
MarkerSymType MarkerType = BC->getMarkerType(SymInfo.Symbol);
|
2025-07-29 12:01:06 -07:00
|
|
|
|
|
|
|
|
// Treat ST_Function as code.
|
|
|
|
|
Expected<object::SymbolRef::Type> TypeOrError = SymInfo.Symbol.getType();
|
|
|
|
|
consumeError(TypeOrError.takeError());
|
|
|
|
|
if (TypeOrError && *TypeOrError == SymbolRef::ST_Function) {
|
|
|
|
|
if (IsData) {
|
|
|
|
|
Expected<StringRef> NameOrError = SymInfo.Symbol.getName();
|
|
|
|
|
consumeError(NameOrError.takeError());
|
|
|
|
|
BC->errs() << "BOLT-WARNING: function symbol " << *NameOrError
|
|
|
|
|
<< " lacks code marker\n";
|
|
|
|
|
}
|
|
|
|
|
MarkerType = MarkerSymType::CODE;
|
|
|
|
|
}
|
|
|
|
|
|
2023-09-17 12:58:33 -07:00
|
|
|
if (MarkerType != MarkerSymType::NONE) {
|
2025-08-20 14:18:56 -07:00
|
|
|
MarkerSymbols[SymInfo.Address] = MarkerType;
|
2023-09-17 12:58:33 -07:00
|
|
|
LastAddr = SymInfo.Address;
|
|
|
|
|
IsData = MarkerType == MarkerSymType::DATA;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (IsData) {
|
2025-08-20 14:18:56 -07:00
|
|
|
MarkerSymbols[SymInfo.Address] = MarkerSymType::DATA;
|
2023-09-17 12:58:33 -07:00
|
|
|
LastAddr = SymInfo.Address;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
};
|
2022-05-31 11:50:59 -07:00
|
|
|
|
2023-07-29 09:14:44 +02:00
|
|
|
if (BC->isAArch64() || BC->isRISCV()) {
|
2023-09-17 12:58:33 -07:00
|
|
|
addExtraDataMarkerPerSymbol();
|
2019-10-08 11:03:33 -07:00
|
|
|
LastSymbol = std::stable_partition(
|
2023-09-17 12:58:33 -07:00
|
|
|
SortedSymbols.begin(), SortedSymbols.end(),
|
|
|
|
|
[this](const SymbolInfo &S) { return !BC->isMarker(S.Symbol); });
|
|
|
|
|
if (!SortedSymbols.empty())
|
2022-07-26 00:07:34 -07:00
|
|
|
--LastSymbol;
|
2017-11-22 16:17:36 -08:00
|
|
|
}
|
|
|
|
|
|
2016-09-27 19:09:38 -07:00
|
|
|
BinaryFunction *PreviousFunction = nullptr;
|
2017-11-14 20:05:11 -08:00
|
|
|
unsigned AnonymousId = 0;
|
|
|
|
|
|
2023-09-17 12:58:33 -07:00
|
|
|
const auto SortedSymbolsEnd =
|
|
|
|
|
LastSymbol == SortedSymbols.end() ? LastSymbol : std::next(LastSymbol);
|
|
|
|
|
for (auto Iter = SortedSymbols.begin(); Iter != SortedSymbolsEnd; ++Iter) {
|
|
|
|
|
const SymbolRef &Symbol = Iter->Symbol;
|
2023-09-17 13:13:09 -07:00
|
|
|
const uint64_t SymbolAddress = Iter->Address;
|
|
|
|
|
const auto SymbolFlags = cantFail(Symbol.getFlags());
|
2021-04-08 00:19:26 -07:00
|
|
|
const SymbolRef::Type SymbolType = cantFail(Symbol.getType());
|
2019-10-08 11:03:33 -07:00
|
|
|
|
|
|
|
|
if (SymbolType == SymbolRef::ST_File)
|
2016-09-29 11:19:06 -07:00
|
|
|
continue;
|
|
|
|
|
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
StringRef SymName = cantFail(Symbol.getName(), "cannot get symbol name");
|
2023-09-17 13:13:09 -07:00
|
|
|
if (SymbolAddress == 0) {
|
2019-10-08 11:03:33 -07:00
|
|
|
if (opts::Verbosity >= 1 && SymbolType == SymbolRef::ST_Function)
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->errs() << "BOLT-WARNING: function with 0 address seen\n";
|
2015-11-23 17:54:18 -08:00
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
2025-05-14 09:47:14 -07:00
|
|
|
// Ignore input hot markers unless in heatmap mode
|
|
|
|
|
if ((SymName == "__hot_start" || SymName == "__hot_end") &&
|
|
|
|
|
!opts::HeatmapMode)
|
2020-10-17 00:50:27 -07:00
|
|
|
continue;
|
|
|
|
|
|
2024-07-16 22:14:43 -07:00
|
|
|
FileSymRefs.emplace(SymbolAddress, Symbol);
|
2015-11-23 17:54:18 -08:00
|
|
|
|
2021-06-30 14:41:41 -07:00
|
|
|
// Skip section symbols that will be registered by disassemblePLT().
|
2023-09-17 13:13:09 -07:00
|
|
|
if (SymbolType == SymbolRef::ST_Debug) {
|
|
|
|
|
ErrorOr<BinarySection &> BSection =
|
|
|
|
|
BC->getSectionForAddress(SymbolAddress);
|
2021-06-30 14:41:41 -07:00
|
|
|
if (BSection && getPLTSectionInfo(BSection->getName()))
|
|
|
|
|
continue;
|
2020-12-01 16:29:39 -08:00
|
|
|
}
|
|
|
|
|
|
2016-07-11 18:51:13 -07:00
|
|
|
/// It is possible we are seeing a globalized local. LLVM might treat it as
|
|
|
|
|
/// a local if it has a "private global" prefix, e.g. ".L". Thus we have to
|
|
|
|
|
/// change the prefix to enforce global scope of the symbol.
|
2023-12-13 23:34:49 -08:00
|
|
|
std::string Name =
|
|
|
|
|
SymName.starts_with(BC->AsmInfo->getPrivateGlobalPrefix())
|
|
|
|
|
? "PG" + std::string(SymName)
|
|
|
|
|
: std::string(SymName);
|
2016-07-11 18:51:13 -07:00
|
|
|
|
2015-11-23 17:54:18 -08:00
|
|
|
// Disambiguate all local symbols before adding to symbol table.
|
2016-07-11 18:51:13 -07:00
|
|
|
// Since we don't know if we will see a global with the same name,
|
2015-11-23 17:54:18 -08:00
|
|
|
// always modify the local name.
|
2016-07-11 18:51:13 -07:00
|
|
|
//
|
|
|
|
|
// NOTE: the naming convention for local symbols should match
|
|
|
|
|
// the one we use for profile data.
|
2015-11-23 17:54:18 -08:00
|
|
|
std::string UniqueName;
|
2016-07-11 18:51:13 -07:00
|
|
|
std::string AlternativeName;
|
2017-11-14 20:05:11 -08:00
|
|
|
if (Name.empty()) {
|
|
|
|
|
UniqueName = "ANONYMOUS." + std::to_string(AnonymousId++);
|
2023-09-17 13:13:09 -07:00
|
|
|
} else if (SymbolFlags & SymbolRef::SF_Global) {
|
2022-10-17 19:01:46 -07:00
|
|
|
if (const BinaryData *BD = BC->getBinaryDataByName(Name)) {
|
|
|
|
|
if (BD->getSize() == ELFSymbolRef(Symbol).getSize() &&
|
2023-09-17 13:13:09 -07:00
|
|
|
BD->getAddress() == SymbolAddress) {
|
2022-10-17 19:01:46 -07:00
|
|
|
if (opts::Verbosity > 1)
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->errs() << "BOLT-WARNING: ignoring duplicate global symbol "
|
|
|
|
|
<< Name << "\n";
|
2022-10-17 19:01:46 -07:00
|
|
|
// Ignore duplicate entry - possibly a bug in the linker
|
|
|
|
|
continue;
|
|
|
|
|
}
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->errs() << "BOLT-ERROR: bad input binary, global symbol \"" << Name
|
|
|
|
|
<< "\" is not unique\n";
|
2022-10-17 19:01:46 -07:00
|
|
|
exit(1);
|
|
|
|
|
}
|
2016-07-11 18:51:13 -07:00
|
|
|
UniqueName = Name;
|
2015-11-23 17:54:18 -08:00
|
|
|
} else {
|
2016-07-11 18:51:13 -07:00
|
|
|
// If we have a local file name, we should create 2 variants for the
|
|
|
|
|
// function name. The reason is that perf profile might have been
|
|
|
|
|
// collected on a binary that did not have the local file name (e.g. as
|
|
|
|
|
// a side effect of stripping debug info from the binary):
|
|
|
|
|
//
|
|
|
|
|
// primary: <function>/<id>
|
|
|
|
|
// alternative: <function>/<file>/<id2>
|
|
|
|
|
//
|
|
|
|
|
// The <id> field is used for disambiguation of local symbols since there
|
|
|
|
|
// could be identical function names coming from identical file names
|
|
|
|
|
// (e.g. from different directories).
|
2025-06-20 14:29:32 -07:00
|
|
|
auto SFI = llvm::upper_bound(FileSymbols, ELFSymbolRef(Symbol));
|
|
|
|
|
if (SymbolType == SymbolRef::ST_Function && SFI != FileSymbols.begin()) {
|
|
|
|
|
StringRef FileSymbolName = cantFail(SFI[-1].getName());
|
|
|
|
|
if (!FileSymbolName.empty())
|
|
|
|
|
AlternativeName = NR.uniquify(Name + "/" + FileSymbolName.str());
|
|
|
|
|
}
|
2016-07-11 18:51:13 -07:00
|
|
|
|
2020-02-17 14:37:46 -08:00
|
|
|
UniqueName = NR.uniquify(Name);
|
2015-11-23 17:54:18 -08:00
|
|
|
}
|
|
|
|
|
|
2017-11-14 20:05:11 -08:00
|
|
|
uint64_t SymbolSize = ELFSymbolRef(Symbol).getSize();
|
|
|
|
|
uint64_t SymbolAlignment = Symbol.getAlignment();
|
|
|
|
|
|
2024-08-07 20:52:19 -07:00
|
|
|
auto registerName = [&](uint64_t FinalSize) {
|
2017-11-14 20:05:11 -08:00
|
|
|
// Register names even if it's not a function, e.g. for an entry point.
|
2023-09-17 13:13:09 -07:00
|
|
|
BC->registerNameAtAddress(UniqueName, SymbolAddress, FinalSize,
|
2024-08-07 20:52:19 -07:00
|
|
|
SymbolAlignment, SymbolFlags);
|
2017-11-14 20:05:11 -08:00
|
|
|
if (!AlternativeName.empty())
|
2023-09-17 13:13:09 -07:00
|
|
|
BC->registerNameAtAddress(AlternativeName, SymbolAddress, FinalSize,
|
2024-08-07 20:52:19 -07:00
|
|
|
SymbolAlignment, SymbolFlags);
|
2017-11-14 20:05:11 -08:00
|
|
|
};
|
2015-11-23 17:54:18 -08:00
|
|
|
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
section_iterator Section =
|
|
|
|
|
cantFail(Symbol.getSection(), "cannot get symbol section");
|
2016-03-03 10:13:11 -08:00
|
|
|
if (Section == InputFile->section_end()) {
|
2023-06-16 11:49:19 +02:00
|
|
|
// Could be an absolute symbol. Used on RISC-V for __global_pointer$ so we
|
|
|
|
|
// need to record it to handle relocations against it. For other instances
|
|
|
|
|
// of absolute symbols, we record for pretty printing.
|
2020-12-01 16:29:39 -08:00
|
|
|
LLVM_DEBUG(if (opts::Verbosity > 1) {
|
|
|
|
|
dbgs() << "BOLT-INFO: absolute sym " << UniqueName << "\n";
|
|
|
|
|
});
|
2021-03-15 12:06:56 -07:00
|
|
|
registerName(SymbolSize);
|
2015-11-23 17:54:18 -08:00
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
2024-04-29 14:44:04 -07:00
|
|
|
if (SymName == getBOLTReservedStart() || SymName == getBOLTReservedEnd()) {
|
|
|
|
|
registerName(SymbolSize);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
2020-12-01 16:29:39 -08:00
|
|
|
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: considering symbol " << UniqueName
|
|
|
|
|
<< " for function\n");
|
2016-09-29 11:19:06 -07:00
|
|
|
|
2023-09-17 13:13:09 -07:00
|
|
|
if (SymbolAddress == Section->getAddress() + Section->getSize()) {
|
2023-03-21 13:58:36 +04:00
|
|
|
assert(SymbolSize == 0 &&
|
|
|
|
|
"unexpect non-zero sized symbol at end of section");
|
2024-08-07 20:52:19 -07:00
|
|
|
LLVM_DEBUG(
|
|
|
|
|
dbgs()
|
|
|
|
|
<< "BOLT-DEBUG: rejecting as symbol points to end of its section\n");
|
|
|
|
|
registerName(SymbolSize);
|
2023-03-21 13:58:36 +04:00
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
2025-04-18 17:29:24 -07:00
|
|
|
if (!Section->isText() || Section->isVirtual()) {
|
2019-10-08 11:03:33 -07:00
|
|
|
assert(SymbolType != SymbolRef::ST_Function &&
|
2016-09-29 11:19:06 -07:00
|
|
|
"unexpected function inside non-code section");
|
2025-04-18 17:29:24 -07:00
|
|
|
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: rejecting as symbol is not in code or "
|
|
|
|
|
"is in nobits section\n");
|
2021-03-15 12:06:56 -07:00
|
|
|
registerName(SymbolSize);
|
2016-09-29 11:19:06 -07:00
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Assembly functions could be ST_NONE with 0 size. Check that the
|
|
|
|
|
// corresponding section is a code section and they are not inside any
|
|
|
|
|
// other known function to consider them.
|
|
|
|
|
//
|
|
|
|
|
// Sometimes assembly functions are not marked as functions and neither are
|
|
|
|
|
// their local labels. The only way to tell them apart is to look at
|
|
|
|
|
// symbol scope - global vs local.
|
2019-10-08 11:03:33 -07:00
|
|
|
if (PreviousFunction && SymbolType != SymbolRef::ST_Function) {
|
2023-09-17 13:13:09 -07:00
|
|
|
if (PreviousFunction->containsAddress(SymbolAddress)) {
|
2019-10-08 11:03:33 -07:00
|
|
|
if (PreviousFunction->isSymbolValidInScope(Symbol, SymbolSize)) {
|
2020-12-01 16:29:39 -08:00
|
|
|
LLVM_DEBUG(dbgs()
|
|
|
|
|
<< "BOLT-DEBUG: symbol is a function local symbol\n");
|
2023-09-17 13:13:09 -07:00
|
|
|
} else if (SymbolAddress == PreviousFunction->getAddress() &&
|
|
|
|
|
!SymbolSize) {
|
2020-12-01 16:29:39 -08:00
|
|
|
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ignoring symbol as a marker\n");
|
2019-10-08 11:03:33 -07:00
|
|
|
} else if (opts::Verbosity > 1) {
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->errs() << "BOLT-WARNING: symbol " << UniqueName
|
|
|
|
|
<< " seen in the middle of function " << *PreviousFunction
|
|
|
|
|
<< ". Could be a new entry.\n";
|
2016-09-29 11:19:06 -07:00
|
|
|
}
|
2019-10-08 11:03:33 -07:00
|
|
|
registerName(SymbolSize);
|
|
|
|
|
continue;
|
|
|
|
|
} else if (PreviousFunction->getSize() == 0 &&
|
|
|
|
|
PreviousFunction->isSymbolValidInScope(Symbol, SymbolSize)) {
|
2020-12-01 16:29:39 -08:00
|
|
|
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: symbol is a function local symbol\n");
|
2019-10-08 11:03:33 -07:00
|
|
|
registerName(SymbolSize);
|
|
|
|
|
continue;
|
2016-09-29 11:19:06 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2023-09-17 13:13:09 -07:00
|
|
|
if (PreviousFunction && PreviousFunction->containsAddress(SymbolAddress) &&
|
|
|
|
|
PreviousFunction->getAddress() != SymbolAddress) {
|
2016-09-27 19:09:38 -07:00
|
|
|
if (PreviousFunction->isSymbolValidInScope(Symbol, SymbolSize)) {
|
2021-12-23 12:38:33 -08:00
|
|
|
if (opts::Verbosity >= 1)
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->outs()
|
|
|
|
|
<< "BOLT-INFO: skipping possibly another entry for function "
|
|
|
|
|
<< *PreviousFunction << " : " << UniqueName << '\n';
|
2022-10-17 14:15:52 -07:00
|
|
|
registerName(SymbolSize);
|
2016-09-27 19:09:38 -07:00
|
|
|
} else {
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->outs() << "BOLT-INFO: using " << UniqueName
|
|
|
|
|
<< " as another entry to "
|
|
|
|
|
<< "function " << *PreviousFunction << '\n';
|
2016-09-27 19:09:38 -07:00
|
|
|
|
2020-06-22 16:16:08 -07:00
|
|
|
registerName(0);
|
|
|
|
|
|
2023-09-17 13:13:09 -07:00
|
|
|
PreviousFunction->addEntryPointAtOffset(SymbolAddress -
|
2021-12-14 16:52:51 -08:00
|
|
|
PreviousFunction->getAddress());
|
2016-09-27 19:09:38 -07:00
|
|
|
|
|
|
|
|
// Remove the symbol from FileSymRefs so that we can skip it from
|
|
|
|
|
// in the future.
|
2024-07-16 22:14:43 -07:00
|
|
|
auto SI = llvm::find_if(
|
|
|
|
|
llvm::make_range(FileSymRefs.equal_range(SymbolAddress)),
|
|
|
|
|
[&](auto SymIt) { return SymIt.second == Symbol; });
|
2016-09-27 19:09:38 -07:00
|
|
|
assert(SI != FileSymRefs.end() && "symbol expected to be present");
|
|
|
|
|
assert(SI->second == Symbol && "wrong symbol found");
|
|
|
|
|
FileSymRefs.erase(SI);
|
2016-09-29 11:19:06 -07:00
|
|
|
}
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
2016-03-11 11:09:34 -08:00
|
|
|
// Checkout for conflicts with function data from FDEs.
|
|
|
|
|
bool IsSimple = true;
|
2023-09-17 13:13:09 -07:00
|
|
|
auto FDEI = CFIRdWrt->getFDEs().lower_bound(SymbolAddress);
|
2016-03-11 11:09:34 -08:00
|
|
|
if (FDEI != CFIRdWrt->getFDEs().end()) {
|
2021-04-08 00:19:26 -07:00
|
|
|
const dwarf::FDE &FDE = *FDEI->second;
|
2023-09-17 13:13:09 -07:00
|
|
|
if (FDEI->first != SymbolAddress) {
|
2016-03-11 11:09:34 -08:00
|
|
|
// There's no matching starting address in FDE. Make sure the previous
|
|
|
|
|
// FDE does not contain this address.
|
|
|
|
|
if (FDEI != CFIRdWrt->getFDEs().begin()) {
|
|
|
|
|
--FDEI;
|
2021-04-08 00:19:26 -07:00
|
|
|
const dwarf::FDE &PrevFDE = *FDEI->second;
|
|
|
|
|
uint64_t PrevStart = PrevFDE.getInitialLocation();
|
|
|
|
|
uint64_t PrevLength = PrevFDE.getAddressRange();
|
2023-09-17 13:13:09 -07:00
|
|
|
if (SymbolAddress > PrevStart &&
|
|
|
|
|
SymbolAddress < PrevStart + PrevLength) {
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->errs() << "BOLT-ERROR: function " << UniqueName
|
|
|
|
|
<< " is in conflict with FDE ["
|
|
|
|
|
<< Twine::utohexstr(PrevStart) << ", "
|
|
|
|
|
<< Twine::utohexstr(PrevStart + PrevLength)
|
|
|
|
|
<< "). Skipping.\n";
|
2016-03-11 11:09:34 -08:00
|
|
|
IsSimple = false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
} else if (FDE.getAddressRange() != SymbolSize) {
|
2016-09-15 15:47:10 -07:00
|
|
|
if (SymbolSize) {
|
|
|
|
|
// Function addresses match but sizes differ.
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->errs() << "BOLT-WARNING: sizes differ for function " << UniqueName
|
|
|
|
|
<< ". FDE : " << FDE.getAddressRange()
|
|
|
|
|
<< "; symbol table : " << SymbolSize
|
|
|
|
|
<< ". Using max size.\n";
|
2016-09-02 14:15:29 -07:00
|
|
|
}
|
2016-03-11 11:09:34 -08:00
|
|
|
SymbolSize = std::max(SymbolSize, FDE.getAddressRange());
|
2023-09-17 13:13:09 -07:00
|
|
|
if (BC->getBinaryDataAtAddress(SymbolAddress)) {
|
|
|
|
|
BC->setBinaryDataSize(SymbolAddress, SymbolSize);
|
2017-11-14 20:05:11 -08:00
|
|
|
} else {
|
2020-12-01 16:29:39 -08:00
|
|
|
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: No BD @ 0x"
|
2023-09-17 13:13:09 -07:00
|
|
|
<< Twine::utohexstr(SymbolAddress) << "\n");
|
2017-11-14 20:05:11 -08:00
|
|
|
}
|
2016-03-11 11:09:34 -08:00
|
|
|
}
|
|
|
|
|
}
|
2019-04-16 14:35:29 -07:00
|
|
|
|
2021-05-13 10:50:47 -07:00
|
|
|
BinaryFunction *BF = nullptr;
|
2019-04-03 15:52:01 -07:00
|
|
|
// Since function may not have yet obtained its real size, do a search
|
|
|
|
|
// using the list of registered functions instead of calling
|
|
|
|
|
// getBinaryFunctionAtAddress().
|
2023-09-17 13:13:09 -07:00
|
|
|
auto BFI = BC->getBinaryFunctions().find(SymbolAddress);
|
2019-04-03 15:52:01 -07:00
|
|
|
if (BFI != BC->getBinaryFunctions().end()) {
|
2016-08-11 14:23:54 -07:00
|
|
|
BF = &BFI->second;
|
2019-04-03 15:52:01 -07:00
|
|
|
// Duplicate the function name. Make sure everything matches before we add
|
2016-06-10 17:13:05 -07:00
|
|
|
// an alternative name.
|
2016-09-15 15:47:10 -07:00
|
|
|
if (SymbolSize != BF->getSize()) {
|
|
|
|
|
if (opts::Verbosity >= 1) {
|
2021-12-23 12:38:33 -08:00
|
|
|
if (SymbolSize && BF->getSize())
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->errs() << "BOLT-WARNING: size mismatch for duplicate entries "
|
|
|
|
|
<< *BF << " and " << UniqueName << '\n';
|
|
|
|
|
BC->outs() << "BOLT-INFO: adjusting size of function " << *BF
|
|
|
|
|
<< " old " << BF->getSize() << " new " << SymbolSize
|
|
|
|
|
<< "\n";
|
2016-09-15 15:47:10 -07:00
|
|
|
}
|
|
|
|
|
BF->setSize(std::max(SymbolSize, BF->getSize()));
|
2023-09-17 13:13:09 -07:00
|
|
|
BC->setBinaryDataSize(SymbolAddress, BF->getSize());
|
2016-06-10 17:13:05 -07:00
|
|
|
}
|
2016-08-11 14:23:54 -07:00
|
|
|
BF->addAlternativeName(UniqueName);
|
2016-06-10 17:13:05 -07:00
|
|
|
} else {
|
2023-09-17 13:13:09 -07:00
|
|
|
ErrorOr<BinarySection &> Section =
|
|
|
|
|
BC->getSectionForAddress(SymbolAddress);
|
Generate heatmap for linux kernel
Summary:
This diff handles several challenges related to heatmap generation for Linux kernel (vmlinux elf file):
- If the input binary elf file contains the section `__ksymtab`, this diff assumes that this is the linux kernel `vmlinux` file and enables an extra flag `LinuxKernelMode`
- In `LinuxKernelMode`, we only support heat map generation right now, therefore it ensures that current BOLT mode is heat map generation. Otherwise, it exits with error.
- For some Linux symbol and section combinations, BOLT may not be able to find section for symbol (specially symbols that specifies the end of some section). For such cases, we show an warning message without exiting which was the previous behavior.
- Linux kernel elf file does not contain dynamic section, therefore, we don't exit when no dynamic section is found for linux kernel binary.
- Current `ParseMMap` logic does not work with linux kernel. MMap entries for linux kernel uses `PERF_RECORD_MMAP` format instead of typical `PERF_RECORD_MMAP2` format. Since linux kernel address mapping is absolute (same as specified in the ELF file), we avoid calling `ParseMMap` in linux kernel mode.
- Linux kernel entries are registered with PID -1, therefore `BinaryMMapInfo` lookup is not required for linux kernel entries. Similarly, `adjustLBR` is also not required.
- Default max address in linux kernel mode is highest unsigned 64-bit integer instead of current 4GBs.
- Added another new parameter for heatmap, `MinAddress`, in case of Linux kernel mode which is `KernelBaseAddress`, otherwise, it is 0. While registering Heatmap sample counts from LBR entries, any address lower than this `MinAddress` is ignored.
- `IgnoreInterruptLBR` is disabled in linux kernel mode to ensure that kernel entries are processed
Currently, linux kernel heat map also include heat map for Linux kernel modules that are not part of vmlinux elf file. This is intentional to identify other potential optimization opportunities. If reviewers think, those modules should be omitted, I will disable those modules based on highest end address of a vmlinux elf section.
(cherry picked from FBD21992765)
2020-06-10 23:00:39 -07:00
|
|
|
// Skip symbols from invalid sections
|
|
|
|
|
if (!Section) {
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->errs() << "BOLT-WARNING: " << UniqueName << " (0x"
|
|
|
|
|
<< Twine::utohexstr(SymbolAddress)
|
|
|
|
|
<< ") does not have any section\n";
|
Generate heatmap for linux kernel
Summary:
This diff handles several challenges related to heatmap generation for Linux kernel (vmlinux elf file):
- If the input binary elf file contains the section `__ksymtab`, this diff assumes that this is the linux kernel `vmlinux` file and enables an extra flag `LinuxKernelMode`
- In `LinuxKernelMode`, we only support heat map generation right now, therefore it ensures that current BOLT mode is heat map generation. Otherwise, it exits with error.
- For some Linux symbol and section combinations, BOLT may not be able to find section for symbol (specially symbols that specifies the end of some section). For such cases, we show an warning message without exiting which was the previous behavior.
- Linux kernel elf file does not contain dynamic section, therefore, we don't exit when no dynamic section is found for linux kernel binary.
- Current `ParseMMap` logic does not work with linux kernel. MMap entries for linux kernel uses `PERF_RECORD_MMAP` format instead of typical `PERF_RECORD_MMAP2` format. Since linux kernel address mapping is absolute (same as specified in the ELF file), we avoid calling `ParseMMap` in linux kernel mode.
- Linux kernel entries are registered with PID -1, therefore `BinaryMMapInfo` lookup is not required for linux kernel entries. Similarly, `adjustLBR` is also not required.
- Default max address in linux kernel mode is highest unsigned 64-bit integer instead of current 4GBs.
- Added another new parameter for heatmap, `MinAddress`, in case of Linux kernel mode which is `KernelBaseAddress`, otherwise, it is 0. While registering Heatmap sample counts from LBR entries, any address lower than this `MinAddress` is ignored.
- `IgnoreInterruptLBR` is disabled in linux kernel mode to ensure that kernel entries are processed
Currently, linux kernel heat map also include heat map for Linux kernel modules that are not part of vmlinux elf file. This is intentional to identify other potential optimization opportunities. If reviewers think, those modules should be omitted, I will disable those modules based on highest end address of a vmlinux elf section.
(cherry picked from FBD21992765)
2020-06-10 23:00:39 -07:00
|
|
|
continue;
|
|
|
|
|
}
|
2019-06-26 11:06:46 -07:00
|
|
|
|
2019-06-27 03:20:17 -07:00
|
|
|
// Skip symbols from zero-sized sections.
|
|
|
|
|
if (!Section->getSize())
|
2019-06-26 11:06:46 -07:00
|
|
|
continue;
|
2019-07-12 07:25:50 -07:00
|
|
|
|
2023-09-17 13:13:09 -07:00
|
|
|
BF = BC->createBinaryFunction(UniqueName, *Section, SymbolAddress,
|
|
|
|
|
SymbolSize);
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
if (!IsSimple)
|
|
|
|
|
BF->setSimple(false);
|
2016-06-10 17:13:05 -07:00
|
|
|
}
|
2023-02-08 19:11:13 -08:00
|
|
|
|
|
|
|
|
// Check if it's a cold function fragment.
|
2024-05-30 17:32:20 -07:00
|
|
|
if (FunctionFragmentTemplate.match(SymName)) {
|
2023-02-08 19:11:13 -08:00
|
|
|
static bool PrintedWarning = false;
|
|
|
|
|
if (!PrintedWarning) {
|
|
|
|
|
PrintedWarning = true;
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->errs() << "BOLT-WARNING: split function detected on input : "
|
|
|
|
|
<< SymName;
|
2023-02-08 19:11:13 -08:00
|
|
|
if (BC->HasRelocations)
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->errs() << ". The support is limited in relocation mode\n";
|
2023-07-10 14:28:49 -07:00
|
|
|
else
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->errs() << '\n';
|
2023-02-08 19:11:13 -08:00
|
|
|
}
|
|
|
|
|
BC->HasSplitFunctions = true;
|
|
|
|
|
BF->IsFragment = true;
|
|
|
|
|
}
|
|
|
|
|
|
2016-07-11 18:51:13 -07:00
|
|
|
if (!AlternativeName.empty())
|
2016-08-11 14:23:54 -07:00
|
|
|
BF->addAlternativeName(AlternativeName);
|
2016-09-29 11:19:06 -07:00
|
|
|
|
2017-11-14 20:05:11 -08:00
|
|
|
registerName(SymbolSize);
|
2016-09-29 11:19:06 -07:00
|
|
|
PreviousFunction = BF;
|
2016-07-11 18:51:13 -07:00
|
|
|
}
|
|
|
|
|
|
2021-06-22 13:46:06 -07:00
|
|
|
// Read dynamic relocation first as their presence affects the way we process
|
|
|
|
|
// static relocations. E.g. we will ignore a static relocation at an address
|
|
|
|
|
// that is a subject to dynamic relocation processing.
|
|
|
|
|
processDynamicRelocations();
|
|
|
|
|
|
2017-08-04 11:21:05 -07:00
|
|
|
// Process PLT section.
|
2022-01-25 03:22:47 +03:00
|
|
|
disassemblePLT();
|
2017-08-04 11:21:05 -07:00
|
|
|
|
2016-09-27 19:09:38 -07:00
|
|
|
// See if we missed any functions marked by FDE.
|
|
|
|
|
for (const auto &FDEI : CFIRdWrt->getFDEs()) {
|
2021-04-08 00:19:26 -07:00
|
|
|
const uint64_t Address = FDEI.first;
|
|
|
|
|
const dwarf::FDE *FDE = FDEI.second;
|
|
|
|
|
const BinaryFunction *BF = BC->getBinaryFunctionAtAddress(Address);
|
2019-04-03 15:52:01 -07:00
|
|
|
if (BF)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
BF = BC->getBinaryFunctionContainingAddress(Address);
|
|
|
|
|
if (BF) {
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->errs() << "BOLT-WARNING: FDE [0x" << Twine::utohexstr(Address)
|
|
|
|
|
<< ", 0x" << Twine::utohexstr(Address + FDE->getAddressRange())
|
|
|
|
|
<< ") conflicts with function " << *BF << '\n';
|
2019-04-03 15:52:01 -07:00
|
|
|
continue;
|
2017-06-02 18:41:31 -07:00
|
|
|
}
|
2019-04-03 15:52:01 -07:00
|
|
|
|
2021-12-23 12:38:33 -08:00
|
|
|
if (opts::Verbosity >= 1)
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->errs() << "BOLT-WARNING: FDE [0x" << Twine::utohexstr(Address)
|
|
|
|
|
<< ", 0x" << Twine::utohexstr(Address + FDE->getAddressRange())
|
|
|
|
|
<< ") has no corresponding symbol table entry\n";
|
2021-12-23 12:38:33 -08:00
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
ErrorOr<BinarySection &> Section = BC->getSectionForAddress(Address);
|
2019-04-03 15:52:01 -07:00
|
|
|
assert(Section && "cannot get section for address from FDE");
|
|
|
|
|
std::string FunctionName =
|
2021-12-14 16:52:51 -08:00
|
|
|
"__BOLT_FDE_FUNCat" + Twine::utohexstr(Address).str();
|
2019-04-03 15:52:01 -07:00
|
|
|
BC->createBinaryFunction(FunctionName, *Section, Address,
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
FDE->getAddressRange());
|
2017-06-02 18:41:31 -07:00
|
|
|
}
|
|
|
|
|
|
2025-06-20 14:29:32 -07:00
|
|
|
BC->setHasSymbolsWithFileName(FileSymbols.size());
|
2016-09-29 11:19:06 -07:00
|
|
|
|
|
|
|
|
// Now that all the functions were created - adjust their boundaries.
|
2025-08-20 14:18:56 -07:00
|
|
|
adjustFunctionBoundaries(MarkerSymbols);
|
2016-09-27 19:09:38 -07:00
|
|
|
|
2017-11-22 16:17:36 -08:00
|
|
|
// Annotate functions with code/data markers in AArch64
|
2025-08-20 14:18:56 -07:00
|
|
|
for (auto &[Address, Type] : MarkerSymbols) {
|
|
|
|
|
auto *BF = BC->getBinaryFunctionContainingAddress(Address, true, true);
|
2022-05-31 11:50:59 -07:00
|
|
|
|
2017-11-22 16:17:36 -08:00
|
|
|
if (!BF) {
|
|
|
|
|
// Stray marker
|
|
|
|
|
continue;
|
|
|
|
|
}
|
2025-08-20 14:18:56 -07:00
|
|
|
const auto EntryOffset = Address - BF->getAddress();
|
|
|
|
|
if (Type == MarkerSymType::CODE) {
|
2017-11-22 16:17:36 -08:00
|
|
|
BF->markCodeAtOffset(EntryOffset);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
2025-08-20 14:18:56 -07:00
|
|
|
if (Type == MarkerSymType::DATA) {
|
2017-11-22 16:17:36 -08:00
|
|
|
BF->markDataAtOffset(EntryOffset);
|
2025-08-20 14:18:56 -07:00
|
|
|
BC->AddressToConstantIslandMap[Address] = BF;
|
2017-11-22 16:17:36 -08:00
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
llvm_unreachable("Unknown marker");
|
|
|
|
|
}
|
|
|
|
|
|
2023-02-10 17:09:03 +04:00
|
|
|
if (BC->isAArch64()) {
|
|
|
|
|
// Check for dynamic relocations that might be contained in
|
|
|
|
|
// constant islands.
|
|
|
|
|
for (const BinarySection &Section : BC->allocatableSections()) {
|
|
|
|
|
const uint64_t SectionAddress = Section.getAddress();
|
|
|
|
|
for (const Relocation &Rel : Section.dynamicRelocations()) {
|
|
|
|
|
const uint64_t RelAddress = SectionAddress + Rel.Offset;
|
|
|
|
|
BinaryFunction *BF =
|
|
|
|
|
BC->getBinaryFunctionContainingAddress(RelAddress,
|
|
|
|
|
/*CheckPastEnd*/ false,
|
|
|
|
|
/*UseMaxSize*/ true);
|
|
|
|
|
if (BF) {
|
|
|
|
|
assert(Rel.isRelative() && "Expected relative relocation for island");
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->logBOLTErrorsAndQuitOnFatal(
|
|
|
|
|
BF->markIslandDynamicRelocationAtAddress(RelAddress));
|
2023-02-10 17:09:03 +04:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2025-05-29 19:24:34 -07:00
|
|
|
|
|
|
|
|
// The linker may omit data markers for absolute long veneers. Introduce
|
|
|
|
|
// those markers artificially to assist the disassembler.
|
|
|
|
|
for (BinaryFunction &BF :
|
|
|
|
|
llvm::make_second_range(BC->getBinaryFunctions())) {
|
|
|
|
|
if (BF.getOneName().starts_with("__AArch64AbsLongThunk_") &&
|
|
|
|
|
BF.getSize() == 16 && !BF.getSizeOfDataInCodeAt(8)) {
|
|
|
|
|
BC->errs() << "BOLT-WARNING: missing data marker detected in veneer "
|
|
|
|
|
<< BF << '\n';
|
|
|
|
|
BF.markDataAtOffset(8);
|
|
|
|
|
BC->AddressToConstantIslandMap[BF.getAddress() + 8] = &BF;
|
|
|
|
|
}
|
|
|
|
|
}
|
2023-02-10 17:09:03 +04:00
|
|
|
}
|
|
|
|
|
|
2024-01-30 18:04:29 -08:00
|
|
|
if (!BC->IsLinuxKernel) {
|
2020-09-15 11:42:03 -07:00
|
|
|
// Read all relocations now that we have binary functions mapped.
|
|
|
|
|
processRelocations();
|
|
|
|
|
}
|
2023-06-28 14:35:05 -07:00
|
|
|
|
2023-02-08 19:11:13 -08:00
|
|
|
registerFragments();
|
2024-04-29 20:14:31 +02:00
|
|
|
FileSymbols.clear();
|
2024-07-16 22:14:43 -07:00
|
|
|
FileSymRefs.clear();
|
2024-05-02 13:17:29 -07:00
|
|
|
|
|
|
|
|
discoverBOLTReserved();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void RewriteInstance::discoverBOLTReserved() {
|
|
|
|
|
BinaryData *StartBD = BC->getBinaryDataByName(getBOLTReservedStart());
|
|
|
|
|
BinaryData *EndBD = BC->getBinaryDataByName(getBOLTReservedEnd());
|
|
|
|
|
if (!StartBD != !EndBD) {
|
|
|
|
|
BC->errs() << "BOLT-ERROR: one of the symbols is missing from the binary: "
|
|
|
|
|
<< getBOLTReservedStart() << ", " << getBOLTReservedEnd()
|
|
|
|
|
<< '\n';
|
|
|
|
|
exit(1);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!StartBD)
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
if (StartBD->getAddress() >= EndBD->getAddress()) {
|
|
|
|
|
BC->errs() << "BOLT-ERROR: invalid reserved space boundaries\n";
|
|
|
|
|
exit(1);
|
|
|
|
|
}
|
|
|
|
|
BC->BOLTReserved = AddressRange(StartBD->getAddress(), EndBD->getAddress());
|
|
|
|
|
BC->outs() << "BOLT-INFO: using reserved space for allocating new sections\n";
|
|
|
|
|
|
|
|
|
|
PHDRTableOffset = 0;
|
|
|
|
|
PHDRTableAddress = 0;
|
|
|
|
|
NewTextSegmentAddress = 0;
|
|
|
|
|
NewTextSegmentOffset = 0;
|
|
|
|
|
NextAvailableAddress = BC->BOLTReserved.start();
|
2023-02-08 19:11:13 -08:00
|
|
|
}
|
|
|
|
|
|
2023-11-08 11:01:10 +00:00
|
|
|
Error RewriteInstance::discoverRtFiniAddress() {
|
|
|
|
|
// Use DT_FINI if it's available.
|
|
|
|
|
if (BC->FiniAddress) {
|
|
|
|
|
BC->FiniFunctionAddress = BC->FiniAddress;
|
|
|
|
|
return Error::success();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!BC->FiniArrayAddress || !BC->FiniArraySize) {
|
|
|
|
|
return createStringError(
|
|
|
|
|
std::errc::not_supported,
|
|
|
|
|
"Instrumentation needs either DT_FINI or DT_FINI_ARRAY");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (*BC->FiniArraySize < BC->AsmInfo->getCodePointerSize()) {
|
|
|
|
|
return createStringError(std::errc::not_supported,
|
|
|
|
|
"Need at least 1 DT_FINI_ARRAY slot");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ErrorOr<BinarySection &> FiniArraySection =
|
|
|
|
|
BC->getSectionForAddress(*BC->FiniArrayAddress);
|
|
|
|
|
if (auto EC = FiniArraySection.getError())
|
|
|
|
|
return errorCodeToError(EC);
|
|
|
|
|
|
|
|
|
|
if (const Relocation *Reloc = FiniArraySection->getDynamicRelocationAt(0)) {
|
|
|
|
|
BC->FiniFunctionAddress = Reloc->Addend;
|
|
|
|
|
return Error::success();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (const Relocation *Reloc = FiniArraySection->getRelocationAt(0)) {
|
|
|
|
|
BC->FiniFunctionAddress = Reloc->Value;
|
|
|
|
|
return Error::success();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return createStringError(std::errc::not_supported,
|
|
|
|
|
"No relocation for first DT_FINI_ARRAY slot");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void RewriteInstance::updateRtFiniReloc() {
|
|
|
|
|
// Updating DT_FINI is handled by patchELFDynamic.
|
|
|
|
|
if (BC->FiniAddress)
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
const RuntimeLibrary *RT = BC->getRuntimeLibrary();
|
|
|
|
|
if (!RT || !RT->getRuntimeFiniAddress())
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
assert(BC->FiniArrayAddress && BC->FiniArraySize &&
|
|
|
|
|
"inconsistent .fini_array state");
|
|
|
|
|
|
|
|
|
|
ErrorOr<BinarySection &> FiniArraySection =
|
|
|
|
|
BC->getSectionForAddress(*BC->FiniArrayAddress);
|
|
|
|
|
assert(FiniArraySection && ".fini_array removed");
|
|
|
|
|
|
|
|
|
|
if (std::optional<Relocation> Reloc =
|
|
|
|
|
FiniArraySection->takeDynamicRelocationAt(0)) {
|
|
|
|
|
assert(Reloc->Addend == BC->FiniFunctionAddress &&
|
|
|
|
|
"inconsistent .fini_array dynamic relocation");
|
|
|
|
|
Reloc->Addend = RT->getRuntimeFiniAddress();
|
|
|
|
|
FiniArraySection->addDynamicRelocation(*Reloc);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Update the static relocation by adding a pending relocation which will get
|
|
|
|
|
// patched when flushPendingRelocations is called in rewriteFile. Note that
|
|
|
|
|
// flushPendingRelocations will calculate the value to patch as
|
|
|
|
|
// "Symbol + Addend". Since we don't have a symbol, just set the addend to the
|
|
|
|
|
// desired value.
|
|
|
|
|
FiniArraySection->addPendingRelocation(Relocation{
|
|
|
|
|
/*Offset*/ 0, /*Symbol*/ nullptr, /*Type*/ Relocation::getAbs64(),
|
|
|
|
|
/*Addend*/ RT->getRuntimeFiniAddress(), /*Value*/ 0});
|
|
|
|
|
}
|
|
|
|
|
|
2023-02-08 19:11:13 -08:00
|
|
|
void RewriteInstance::registerFragments() {
|
2025-05-13 13:23:18 -07:00
|
|
|
if (!BC->HasSplitFunctions ||
|
|
|
|
|
opts::HeatmapMode == opts::HeatmapModeKind::HM_Exclusive)
|
2023-02-08 19:11:13 -08:00
|
|
|
return;
|
|
|
|
|
|
2024-04-29 20:14:31 +02:00
|
|
|
// Process fragments with ambiguous parents separately as they are typically a
|
|
|
|
|
// vanishing minority of cases and require expensive symbol table lookups.
|
|
|
|
|
std::vector<std::pair<StringRef, BinaryFunction *>> AmbiguousFragments;
|
2023-02-08 19:11:13 -08:00
|
|
|
for (auto &BFI : BC->getBinaryFunctions()) {
|
|
|
|
|
BinaryFunction &Function = BFI.second;
|
|
|
|
|
if (!Function.isFragment())
|
|
|
|
|
continue;
|
|
|
|
|
for (StringRef Name : Function.getNames()) {
|
2024-04-29 20:14:31 +02:00
|
|
|
StringRef BaseName = NR.restore(Name);
|
|
|
|
|
const bool IsGlobal = BaseName == Name;
|
2024-05-30 17:32:20 -07:00
|
|
|
SmallVector<StringRef> Matches;
|
|
|
|
|
if (!FunctionFragmentTemplate.match(BaseName, &Matches))
|
2023-02-08 19:11:13 -08:00
|
|
|
continue;
|
2024-05-30 17:32:20 -07:00
|
|
|
StringRef ParentName = Matches[1];
|
2023-02-08 19:11:13 -08:00
|
|
|
const BinaryData *BD = BC->getBinaryDataByName(ParentName);
|
2024-04-29 20:14:31 +02:00
|
|
|
const uint64_t NumPossibleLocalParents =
|
|
|
|
|
NR.getUniquifiedNameCount(ParentName);
|
|
|
|
|
// The most common case: single local parent fragment.
|
|
|
|
|
if (!BD && NumPossibleLocalParents == 1) {
|
|
|
|
|
BD = BC->getBinaryDataByName(NR.getUniqueName(ParentName, 1));
|
|
|
|
|
} else if (BD && (!NumPossibleLocalParents || IsGlobal)) {
|
|
|
|
|
// Global parent and either no local candidates (second most common), or
|
|
|
|
|
// the fragment is global as well (uncommon).
|
|
|
|
|
} else {
|
|
|
|
|
// Any other case: need to disambiguate using FILE symbols.
|
|
|
|
|
AmbiguousFragments.emplace_back(ParentName, &Function);
|
2023-02-08 19:11:13 -08:00
|
|
|
continue;
|
|
|
|
|
}
|
2024-04-29 20:14:31 +02:00
|
|
|
if (BD) {
|
|
|
|
|
BinaryFunction *BF = BC->getFunctionForSymbol(BD->getSymbol());
|
|
|
|
|
if (BF) {
|
|
|
|
|
BC->registerFragment(Function, *BF);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
2023-02-08 19:11:13 -08:00
|
|
|
}
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->errs() << "BOLT-ERROR: parent function not found for " << Function
|
|
|
|
|
<< '\n';
|
2023-02-08 19:11:13 -08:00
|
|
|
exit(1);
|
|
|
|
|
}
|
|
|
|
|
}
|
2024-04-29 20:14:31 +02:00
|
|
|
|
|
|
|
|
if (AmbiguousFragments.empty())
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
if (!BC->hasSymbolsWithFileName()) {
|
|
|
|
|
BC->errs() << "BOLT-ERROR: input file has split functions but does not "
|
|
|
|
|
"have FILE symbols. If the binary was stripped, preserve "
|
2024-05-22 13:57:52 -07:00
|
|
|
"FILE symbols with --keep-file-symbols strip option\n";
|
2024-04-29 20:14:31 +02:00
|
|
|
exit(1);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// The first global symbol is identified by the symbol table sh_info value.
|
|
|
|
|
// Used as local symbol search stopping point.
|
|
|
|
|
auto *ELF64LEFile = cast<ELF64LEObjectFile>(InputFile);
|
|
|
|
|
const ELFFile<ELF64LE> &Obj = ELF64LEFile->getELFFile();
|
|
|
|
|
auto *SymTab = llvm::find_if(cantFail(Obj.sections()), [](const auto &Sec) {
|
|
|
|
|
return Sec.sh_type == ELF::SHT_SYMTAB;
|
|
|
|
|
});
|
|
|
|
|
assert(SymTab);
|
|
|
|
|
// Symtab sh_info contains the value one greater than the symbol table index
|
|
|
|
|
// of the last local symbol.
|
|
|
|
|
ELFSymbolRef LocalSymEnd = ELF64LEFile->toSymbolRef(SymTab, SymTab->sh_info);
|
|
|
|
|
|
2024-07-17 23:17:12 +04:00
|
|
|
for (auto &Fragment : AmbiguousFragments) {
|
|
|
|
|
const StringRef &ParentName = Fragment.first;
|
|
|
|
|
BinaryFunction *BF = Fragment.second;
|
2024-04-29 20:14:31 +02:00
|
|
|
const uint64_t Address = BF->getAddress();
|
|
|
|
|
|
|
|
|
|
// Get fragment's own symbol
|
2024-07-16 22:14:43 -07:00
|
|
|
const auto SymIt = llvm::find_if(
|
|
|
|
|
llvm::make_range(FileSymRefs.equal_range(Address)), [&](auto SI) {
|
|
|
|
|
StringRef Name = cantFail(SI.second.getName());
|
|
|
|
|
return Name.contains(ParentName);
|
|
|
|
|
});
|
2024-04-29 20:14:31 +02:00
|
|
|
if (SymIt == FileSymRefs.end()) {
|
|
|
|
|
BC->errs()
|
|
|
|
|
<< "BOLT-ERROR: symbol lookup failed for function at address 0x"
|
|
|
|
|
<< Twine::utohexstr(Address) << '\n';
|
|
|
|
|
exit(1);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Find containing FILE symbol
|
|
|
|
|
ELFSymbolRef Symbol = SymIt->second;
|
|
|
|
|
auto FSI = llvm::upper_bound(FileSymbols, Symbol);
|
|
|
|
|
if (FSI == FileSymbols.begin()) {
|
|
|
|
|
BC->errs() << "BOLT-ERROR: owning FILE symbol not found for symbol "
|
|
|
|
|
<< cantFail(Symbol.getName()) << '\n';
|
|
|
|
|
exit(1);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ELFSymbolRef StopSymbol = LocalSymEnd;
|
|
|
|
|
if (FSI != FileSymbols.end())
|
|
|
|
|
StopSymbol = *FSI;
|
|
|
|
|
|
|
|
|
|
uint64_t ParentAddress{0};
|
2024-04-30 01:18:13 +02:00
|
|
|
|
2025-06-20 12:46:56 -07:00
|
|
|
// Check if containing FILE symbol is BOLT emitted synthetic symbol marking
|
|
|
|
|
// local fragments of global parents.
|
|
|
|
|
if (cantFail(FSI[-1].getName()) == getBOLTFileSymbolName())
|
|
|
|
|
goto registerParent;
|
|
|
|
|
|
2024-04-30 01:18:13 +02:00
|
|
|
// BOLT split fragment symbols are emitted just before the main function
|
|
|
|
|
// symbol.
|
|
|
|
|
for (ELFSymbolRef NextSymbol = Symbol; NextSymbol < StopSymbol;
|
|
|
|
|
NextSymbol.moveNext()) {
|
|
|
|
|
StringRef Name = cantFail(NextSymbol.getName());
|
|
|
|
|
if (Name == ParentName) {
|
|
|
|
|
ParentAddress = cantFail(NextSymbol.getValue());
|
|
|
|
|
goto registerParent;
|
|
|
|
|
}
|
|
|
|
|
if (Name.starts_with(ParentName))
|
|
|
|
|
// With multi-way splitting, there are multiple fragments with different
|
|
|
|
|
// suffixes. Parent follows the last fragment.
|
|
|
|
|
continue;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
2024-04-29 20:14:31 +02:00
|
|
|
// Iterate over local file symbols and check symbol names to match parent.
|
|
|
|
|
for (ELFSymbolRef Symbol(FSI[-1]); Symbol < StopSymbol; Symbol.moveNext()) {
|
|
|
|
|
if (cantFail(Symbol.getName()) == ParentName) {
|
|
|
|
|
ParentAddress = cantFail(Symbol.getAddress());
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2024-04-30 01:18:13 +02:00
|
|
|
registerParent:
|
2024-04-29 20:14:31 +02:00
|
|
|
// No local parent is found, use global parent function.
|
|
|
|
|
if (!ParentAddress)
|
|
|
|
|
if (BinaryData *ParentBD = BC->getBinaryDataByName(ParentName))
|
|
|
|
|
ParentAddress = ParentBD->getAddress();
|
|
|
|
|
|
|
|
|
|
if (BinaryFunction *ParentBF =
|
|
|
|
|
BC->getBinaryFunctionAtAddress(ParentAddress)) {
|
|
|
|
|
BC->registerFragment(*BF, *ParentBF);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
BC->errs() << "BOLT-ERROR: parent function not found for " << *BF << '\n';
|
|
|
|
|
exit(1);
|
|
|
|
|
}
|
2016-09-29 11:19:06 -07:00
|
|
|
}
|
|
|
|
|
|
2022-01-25 03:22:47 +03:00
|
|
|
void RewriteInstance::createPLTBinaryFunction(uint64_t TargetAddress,
|
|
|
|
|
uint64_t EntryAddress,
|
|
|
|
|
uint64_t EntrySize) {
|
|
|
|
|
if (!TargetAddress)
|
|
|
|
|
return;
|
2021-06-30 14:41:41 -07:00
|
|
|
|
2022-03-03 00:34:41 +03:00
|
|
|
auto setPLTSymbol = [&](BinaryFunction *BF, StringRef Name) {
|
|
|
|
|
const unsigned PtrSize = BC->AsmInfo->getCodePointerSize();
|
|
|
|
|
MCSymbol *TargetSymbol = BC->registerNameAtAddress(
|
|
|
|
|
Name.str() + "@GOT", TargetAddress, PtrSize, PtrSize);
|
|
|
|
|
BF->setPLTSymbol(TargetSymbol);
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
BinaryFunction *BF = BC->getBinaryFunctionAtAddress(EntryAddress);
|
|
|
|
|
if (BF && BC->isAArch64()) {
|
2023-11-08 11:41:43 +04:00
|
|
|
// Handle IFUNC trampoline with symbol
|
2022-03-03 00:34:41 +03:00
|
|
|
setPLTSymbol(BF, BF->getOneName());
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2022-01-25 03:22:47 +03:00
|
|
|
const Relocation *Rel = BC->getDynamicRelocationAt(TargetAddress);
|
2023-11-08 11:41:43 +04:00
|
|
|
if (!Rel)
|
2022-01-25 03:22:47 +03:00
|
|
|
return;
|
2021-07-14 01:35:34 -07:00
|
|
|
|
2023-11-08 11:41:43 +04:00
|
|
|
MCSymbol *Symbol = Rel->Symbol;
|
|
|
|
|
if (!Symbol) {
|
2024-09-23 18:22:43 +08:00
|
|
|
if (BC->isRISCV() || !Rel->Addend || !Rel->isIRelative())
|
2023-11-08 11:41:43 +04:00
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
// IFUNC trampoline without symbol
|
|
|
|
|
BinaryFunction *TargetBF = BC->getBinaryFunctionAtAddress(Rel->Addend);
|
|
|
|
|
if (!TargetBF) {
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->errs()
|
2023-11-08 11:41:43 +04:00
|
|
|
<< "BOLT-WARNING: Expected BF to be presented as IFUNC resolver at "
|
|
|
|
|
<< Twine::utohexstr(Rel->Addend) << ", skipping\n";
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Symbol = TargetBF->getSymbol();
|
|
|
|
|
}
|
|
|
|
|
|
2022-01-25 03:22:47 +03:00
|
|
|
ErrorOr<BinarySection &> Section = BC->getSectionForAddress(EntryAddress);
|
|
|
|
|
assert(Section && "cannot get section for address");
|
2022-10-24 17:54:25 -07:00
|
|
|
if (!BF)
|
2023-11-08 11:41:43 +04:00
|
|
|
BF = BC->createBinaryFunction(Symbol->getName().str() + "@PLT", *Section,
|
|
|
|
|
EntryAddress, 0, EntrySize,
|
2022-10-24 17:54:25 -07:00
|
|
|
Section->getAlignment());
|
|
|
|
|
else
|
2023-11-08 11:41:43 +04:00
|
|
|
BF->addAlternativeName(Symbol->getName().str() + "@PLT");
|
|
|
|
|
setPLTSymbol(BF, Symbol->getName());
|
2022-01-25 03:22:47 +03:00
|
|
|
}
|
2021-06-30 14:41:41 -07:00
|
|
|
|
2024-02-01 08:26:21 -08:00
|
|
|
void RewriteInstance::disassemblePLTInstruction(const BinarySection &Section,
|
|
|
|
|
uint64_t InstrOffset,
|
|
|
|
|
MCInst &Instruction,
|
|
|
|
|
uint64_t &InstrSize) {
|
2022-01-25 03:22:47 +03:00
|
|
|
const uint64_t SectionAddress = Section.getAddress();
|
|
|
|
|
const uint64_t SectionSize = Section.getSize();
|
|
|
|
|
StringRef PLTContents = Section.getContents();
|
|
|
|
|
ArrayRef<uint8_t> PLTData(
|
|
|
|
|
reinterpret_cast<const uint8_t *>(PLTContents.data()), SectionSize);
|
|
|
|
|
|
2024-02-01 08:26:21 -08:00
|
|
|
const uint64_t InstrAddr = SectionAddress + InstrOffset;
|
|
|
|
|
if (!BC->DisAsm->getInstruction(Instruction, InstrSize,
|
|
|
|
|
PLTData.slice(InstrOffset), InstrAddr,
|
|
|
|
|
nulls())) {
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->errs()
|
|
|
|
|
<< "BOLT-ERROR: unable to disassemble instruction in PLT section "
|
|
|
|
|
<< Section.getName() << formatv(" at offset {0:x}\n", InstrOffset);
|
2024-02-01 08:26:21 -08:00
|
|
|
exit(1);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void RewriteInstance::disassemblePLTSectionAArch64(BinarySection &Section) {
|
|
|
|
|
const uint64_t SectionAddress = Section.getAddress();
|
|
|
|
|
const uint64_t SectionSize = Section.getSize();
|
2022-01-25 03:22:47 +03:00
|
|
|
|
|
|
|
|
uint64_t InstrOffset = 0;
|
|
|
|
|
// Locate new plt entry
|
|
|
|
|
while (InstrOffset < SectionSize) {
|
|
|
|
|
InstructionListType Instructions;
|
|
|
|
|
MCInst Instruction;
|
|
|
|
|
uint64_t EntryOffset = InstrOffset;
|
|
|
|
|
uint64_t EntrySize = 0;
|
|
|
|
|
uint64_t InstrSize;
|
|
|
|
|
// Loop through entry instructions
|
|
|
|
|
while (InstrOffset < SectionSize) {
|
2024-02-01 08:26:21 -08:00
|
|
|
disassemblePLTInstruction(Section, InstrOffset, Instruction, InstrSize);
|
2022-01-25 03:22:47 +03:00
|
|
|
EntrySize += InstrSize;
|
|
|
|
|
if (!BC->MIB->isIndirectBranch(Instruction)) {
|
|
|
|
|
Instructions.emplace_back(Instruction);
|
2021-06-30 14:41:41 -07:00
|
|
|
InstrOffset += InstrSize;
|
2022-01-25 03:22:47 +03:00
|
|
|
continue;
|
2019-08-26 15:03:38 -07:00
|
|
|
}
|
2017-08-04 11:21:05 -07:00
|
|
|
|
2022-01-25 03:22:47 +03:00
|
|
|
const uint64_t EntryAddress = SectionAddress + EntryOffset;
|
|
|
|
|
const uint64_t TargetAddress = BC->MIB->analyzePLTEntry(
|
|
|
|
|
Instruction, Instructions.begin(), Instructions.end(), EntryAddress);
|
2017-08-04 11:21:05 -07:00
|
|
|
|
2022-01-25 03:22:47 +03:00
|
|
|
createPLTBinaryFunction(TargetAddress, EntryAddress, EntrySize);
|
|
|
|
|
break;
|
|
|
|
|
}
|
2020-05-04 13:57:21 -07:00
|
|
|
|
2022-01-25 03:22:47 +03:00
|
|
|
// Branch instruction
|
|
|
|
|
InstrOffset += InstrSize;
|
2017-08-04 11:21:05 -07:00
|
|
|
|
2022-01-25 03:22:47 +03:00
|
|
|
// Skip nops if any
|
|
|
|
|
while (InstrOffset < SectionSize) {
|
2024-02-01 08:26:21 -08:00
|
|
|
disassemblePLTInstruction(Section, InstrOffset, Instruction, InstrSize);
|
2022-01-25 03:22:47 +03:00
|
|
|
if (!BC->MIB->isNoop(Instruction))
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
InstrOffset += InstrSize;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2023-06-16 11:49:19 +02:00
|
|
|
void RewriteInstance::disassemblePLTSectionRISCV(BinarySection &Section) {
|
|
|
|
|
const uint64_t SectionAddress = Section.getAddress();
|
|
|
|
|
const uint64_t SectionSize = Section.getSize();
|
|
|
|
|
StringRef PLTContents = Section.getContents();
|
|
|
|
|
ArrayRef<uint8_t> PLTData(
|
|
|
|
|
reinterpret_cast<const uint8_t *>(PLTContents.data()), SectionSize);
|
|
|
|
|
|
|
|
|
|
auto disassembleInstruction = [&](uint64_t InstrOffset, MCInst &Instruction,
|
|
|
|
|
uint64_t &InstrSize) {
|
|
|
|
|
const uint64_t InstrAddr = SectionAddress + InstrOffset;
|
|
|
|
|
if (!BC->DisAsm->getInstruction(Instruction, InstrSize,
|
|
|
|
|
PLTData.slice(InstrOffset), InstrAddr,
|
|
|
|
|
nulls())) {
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->errs()
|
|
|
|
|
<< "BOLT-ERROR: unable to disassemble instruction in PLT section "
|
|
|
|
|
<< Section.getName() << " at offset 0x"
|
|
|
|
|
<< Twine::utohexstr(InstrOffset) << '\n';
|
2023-06-16 11:49:19 +02:00
|
|
|
exit(1);
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// Skip the first special entry since no relocation points to it.
|
|
|
|
|
uint64_t InstrOffset = 32;
|
|
|
|
|
|
|
|
|
|
while (InstrOffset < SectionSize) {
|
|
|
|
|
InstructionListType Instructions;
|
|
|
|
|
MCInst Instruction;
|
|
|
|
|
const uint64_t EntryOffset = InstrOffset;
|
|
|
|
|
const uint64_t EntrySize = 16;
|
|
|
|
|
uint64_t InstrSize;
|
|
|
|
|
|
|
|
|
|
while (InstrOffset < EntryOffset + EntrySize) {
|
|
|
|
|
disassembleInstruction(InstrOffset, Instruction, InstrSize);
|
|
|
|
|
Instructions.emplace_back(Instruction);
|
|
|
|
|
InstrOffset += InstrSize;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const uint64_t EntryAddress = SectionAddress + EntryOffset;
|
|
|
|
|
const uint64_t TargetAddress = BC->MIB->analyzePLTEntry(
|
|
|
|
|
Instruction, Instructions.begin(), Instructions.end(), EntryAddress);
|
|
|
|
|
|
|
|
|
|
createPLTBinaryFunction(TargetAddress, EntryAddress, EntrySize);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2022-01-25 03:22:47 +03:00
|
|
|
void RewriteInstance::disassemblePLTSectionX86(BinarySection &Section,
|
|
|
|
|
uint64_t EntrySize) {
|
|
|
|
|
const uint64_t SectionAddress = Section.getAddress();
|
|
|
|
|
const uint64_t SectionSize = Section.getSize();
|
2019-08-26 15:03:38 -07:00
|
|
|
|
2022-01-25 03:22:47 +03:00
|
|
|
for (uint64_t EntryOffset = 0; EntryOffset + EntrySize <= SectionSize;
|
|
|
|
|
EntryOffset += EntrySize) {
|
|
|
|
|
MCInst Instruction;
|
|
|
|
|
uint64_t InstrSize, InstrOffset = EntryOffset;
|
|
|
|
|
while (InstrOffset < EntryOffset + EntrySize) {
|
2024-02-01 08:26:21 -08:00
|
|
|
disassemblePLTInstruction(Section, InstrOffset, Instruction, InstrSize);
|
2022-01-25 03:22:47 +03:00
|
|
|
// Check if the entry size needs adjustment.
|
|
|
|
|
if (EntryOffset == 0 && BC->MIB->isTerminateBranch(Instruction) &&
|
|
|
|
|
EntrySize == 8)
|
|
|
|
|
EntrySize = 16;
|
|
|
|
|
|
|
|
|
|
if (BC->MIB->isIndirectBranch(Instruction))
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
InstrOffset += InstrSize;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (InstrOffset + InstrSize > EntryOffset + EntrySize)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
uint64_t TargetAddress;
|
|
|
|
|
if (!BC->MIB->evaluateMemOperandTarget(Instruction, TargetAddress,
|
|
|
|
|
SectionAddress + InstrOffset,
|
|
|
|
|
InstrSize)) {
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->errs() << "BOLT-ERROR: error evaluating PLT instruction at offset 0x"
|
|
|
|
|
<< Twine::utohexstr(SectionAddress + InstrOffset) << '\n';
|
2022-01-25 03:22:47 +03:00
|
|
|
exit(1);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
createPLTBinaryFunction(TargetAddress, SectionAddress + EntryOffset,
|
|
|
|
|
EntrySize);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void RewriteInstance::disassemblePLT() {
|
|
|
|
|
auto analyzeOnePLTSection = [&](BinarySection &Section, uint64_t EntrySize) {
|
|
|
|
|
if (BC->isAArch64())
|
|
|
|
|
return disassemblePLTSectionAArch64(Section);
|
2023-06-16 11:49:19 +02:00
|
|
|
if (BC->isRISCV())
|
|
|
|
|
return disassemblePLTSectionRISCV(Section);
|
2024-04-15 13:11:29 -04:00
|
|
|
if (BC->isX86())
|
|
|
|
|
return disassemblePLTSectionX86(Section, EntrySize);
|
|
|
|
|
llvm_unreachable("Unmplemented PLT");
|
2022-01-25 03:22:47 +03:00
|
|
|
};
|
|
|
|
|
|
2021-06-30 14:41:41 -07:00
|
|
|
for (BinarySection &Section : BC->allocatableSections()) {
|
|
|
|
|
const PLTSectionInfo *PLTSI = getPLTSectionInfo(Section.getName());
|
|
|
|
|
if (!PLTSI)
|
|
|
|
|
continue;
|
2017-08-04 11:21:05 -07:00
|
|
|
|
2021-06-30 14:41:41 -07:00
|
|
|
analyzeOnePLTSection(Section, PLTSI->EntrySize);
|
2022-10-24 17:54:25 -07:00
|
|
|
|
|
|
|
|
BinaryFunction *PltBF;
|
|
|
|
|
auto BFIter = BC->getBinaryFunctions().find(Section.getAddress());
|
|
|
|
|
if (BFIter != BC->getBinaryFunctions().end()) {
|
|
|
|
|
PltBF = &BFIter->second;
|
|
|
|
|
} else {
|
|
|
|
|
// If we did not register any function at the start of the section,
|
|
|
|
|
// then it must be a general PLT entry. Add a function at the location.
|
|
|
|
|
PltBF = BC->createBinaryFunction(
|
2021-06-30 14:41:41 -07:00
|
|
|
"__BOLT_PSEUDO_" + Section.getName().str(), Section,
|
|
|
|
|
Section.getAddress(), 0, PLTSI->EntrySize, Section.getAlignment());
|
2017-08-04 11:21:05 -07:00
|
|
|
}
|
2022-10-24 17:54:25 -07:00
|
|
|
PltBF->setPseudo(true);
|
2017-08-04 11:21:05 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2025-08-20 14:18:56 -07:00
|
|
|
void RewriteInstance::adjustFunctionBoundaries(
|
|
|
|
|
DenseMap<uint64_t, MarkerSymType> &MarkerSyms) {
|
2019-04-03 15:52:01 -07:00
|
|
|
for (auto BFI = BC->getBinaryFunctions().begin(),
|
|
|
|
|
BFE = BC->getBinaryFunctions().end();
|
2017-10-10 14:54:09 -07:00
|
|
|
BFI != BFE; ++BFI) {
|
2021-04-08 00:19:26 -07:00
|
|
|
BinaryFunction &Function = BFI->second;
|
2021-05-13 10:50:47 -07:00
|
|
|
const BinaryFunction *NextFunction = nullptr;
|
2019-06-28 11:53:34 -07:00
|
|
|
if (std::next(BFI) != BFE)
|
|
|
|
|
NextFunction = &std::next(BFI)->second;
|
2017-10-10 14:54:09 -07:00
|
|
|
|
|
|
|
|
// Check if there's a symbol or a function with a larger address in the
|
|
|
|
|
// same section. If there is - it determines the maximum size for the
|
|
|
|
|
// current function. Otherwise, it is the size of a containing section
|
|
|
|
|
// the defines it.
|
2016-09-29 11:19:06 -07:00
|
|
|
//
|
|
|
|
|
// NOTE: ignore some symbols that could be tolerated inside the body
|
|
|
|
|
// of a function.
|
|
|
|
|
auto NextSymRefI = FileSymRefs.upper_bound(Function.getAddress());
|
|
|
|
|
while (NextSymRefI != FileSymRefs.end()) {
|
2021-04-08 00:19:26 -07:00
|
|
|
SymbolRef &Symbol = NextSymRefI->second;
|
|
|
|
|
const uint64_t SymbolAddress = NextSymRefI->first;
|
|
|
|
|
const uint64_t SymbolSize = ELFSymbolRef(Symbol).getSize();
|
2019-06-28 11:53:34 -07:00
|
|
|
|
|
|
|
|
if (NextFunction && SymbolAddress >= NextFunction->getAddress())
|
|
|
|
|
break;
|
2016-09-29 11:19:06 -07:00
|
|
|
|
|
|
|
|
if (!Function.isSymbolValidInScope(Symbol, SymbolSize))
|
|
|
|
|
break;
|
|
|
|
|
|
2023-09-12 13:44:55 +02:00
|
|
|
// Skip basic block labels. This happens on RISC-V with linker relaxation
|
|
|
|
|
// enabled because every branch needs a relocation and corresponding
|
|
|
|
|
// symbol. We don't want to add such symbols as entry points.
|
|
|
|
|
const auto PrivateLabelPrefix = BC->AsmInfo->getPrivateLabelPrefix();
|
|
|
|
|
if (!PrivateLabelPrefix.empty() &&
|
|
|
|
|
cantFail(Symbol.getName()).starts_with(PrivateLabelPrefix)) {
|
|
|
|
|
++NextSymRefI;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
2025-08-20 14:18:56 -07:00
|
|
|
auto It = MarkerSyms.find(NextSymRefI->first);
|
|
|
|
|
if (It == MarkerSyms.end() || It->second != MarkerSymType::DATA) {
|
|
|
|
|
// This is potentially another entry point into the function.
|
|
|
|
|
uint64_t EntryOffset = NextSymRefI->first - Function.getAddress();
|
|
|
|
|
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: adding entry point to function "
|
|
|
|
|
<< Function << " at offset 0x"
|
|
|
|
|
<< Twine::utohexstr(EntryOffset) << '\n');
|
|
|
|
|
Function.addEntryPointAtOffset(EntryOffset);
|
|
|
|
|
}
|
2016-09-29 11:19:06 -07:00
|
|
|
|
|
|
|
|
++NextSymRefI;
|
|
|
|
|
}
|
|
|
|
|
|
2017-10-10 14:54:09 -07:00
|
|
|
// Function runs at most till the end of the containing section.
|
2020-10-09 16:06:27 -07:00
|
|
|
uint64_t NextObjectAddress = Function.getOriginSection()->getEndAddress();
|
2017-10-10 14:54:09 -07:00
|
|
|
// Or till the next object marked by a symbol.
|
2021-12-23 12:38:33 -08:00
|
|
|
if (NextSymRefI != FileSymRefs.end())
|
2017-10-10 14:54:09 -07:00
|
|
|
NextObjectAddress = std::min(NextSymRefI->first, NextObjectAddress);
|
2021-12-23 12:38:33 -08:00
|
|
|
|
2017-10-10 14:54:09 -07:00
|
|
|
// Or till the next function not marked by a symbol.
|
2021-12-23 12:38:33 -08:00
|
|
|
if (NextFunction)
|
2019-08-26 15:03:38 -07:00
|
|
|
NextObjectAddress =
|
|
|
|
|
std::min(NextFunction->getAddress(), NextObjectAddress);
|
2016-09-29 11:19:06 -07:00
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
const uint64_t MaxSize = NextObjectAddress - Function.getAddress();
|
2016-09-29 11:19:06 -07:00
|
|
|
if (MaxSize < Function.getSize()) {
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->errs() << "BOLT-ERROR: symbol seen in the middle of the function "
|
|
|
|
|
<< Function << ". Skipping.\n";
|
2016-09-29 11:19:06 -07:00
|
|
|
Function.setSimple(false);
|
2016-09-27 19:09:38 -07:00
|
|
|
Function.setMaxSize(Function.getSize());
|
2016-09-29 11:19:06 -07:00
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
Function.setMaxSize(MaxSize);
|
2016-09-27 19:09:38 -07:00
|
|
|
if (!Function.getSize() && Function.isSimple()) {
|
2016-09-29 11:19:06 -07:00
|
|
|
// Some assembly functions have their size set to 0, use the max
|
|
|
|
|
// size as their real size.
|
2021-12-23 12:38:33 -08:00
|
|
|
if (opts::Verbosity >= 1)
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->outs() << "BOLT-INFO: setting size of function " << Function
|
|
|
|
|
<< " to " << Function.getMaxSize() << " (was 0)\n";
|
2016-09-29 11:19:06 -07:00
|
|
|
Function.setSize(Function.getMaxSize());
|
|
|
|
|
}
|
|
|
|
|
}
|
2015-11-23 17:54:18 -08:00
|
|
|
}
|
|
|
|
|
|
2016-11-11 14:33:34 -08:00
|
|
|
void RewriteInstance::relocateEHFrameSection() {
|
2022-09-22 12:05:12 -07:00
|
|
|
assert(EHFrameSection && "Non-empty .eh_frame section expected.");
|
2016-11-11 14:33:34 -08:00
|
|
|
|
2022-09-22 12:05:12 -07:00
|
|
|
BinarySection *RelocatedEHFrameSection =
|
|
|
|
|
getSection(".relocated" + getEHFrameSectionName());
|
|
|
|
|
assert(RelocatedEHFrameSection &&
|
|
|
|
|
"Relocated eh_frame section should be preregistered.");
|
2018-02-01 16:33:43 -08:00
|
|
|
DWARFDataExtractor DE(EHFrameSection->getContents(),
|
|
|
|
|
BC->AsmInfo->isLittleEndian(),
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
BC->AsmInfo->getCodePointerSize());
|
2016-11-11 14:33:34 -08:00
|
|
|
auto createReloc = [&](uint64_t Value, uint64_t Offset, uint64_t DwarfType) {
|
|
|
|
|
if (DwarfType == dwarf::DW_EH_PE_omit)
|
|
|
|
|
return;
|
|
|
|
|
|
2020-04-16 00:02:35 -07:00
|
|
|
// Only fix references that are relative to other locations.
|
2016-11-11 14:33:34 -08:00
|
|
|
if (!(DwarfType & dwarf::DW_EH_PE_pcrel) &&
|
|
|
|
|
!(DwarfType & dwarf::DW_EH_PE_textrel) &&
|
|
|
|
|
!(DwarfType & dwarf::DW_EH_PE_funcrel) &&
|
2021-12-23 12:38:33 -08:00
|
|
|
!(DwarfType & dwarf::DW_EH_PE_datarel))
|
2016-11-11 14:33:34 -08:00
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
if (!(DwarfType & dwarf::DW_EH_PE_sdata4))
|
|
|
|
|
return;
|
|
|
|
|
|
2025-03-14 18:15:59 +00:00
|
|
|
uint32_t RelType;
|
2016-11-11 14:33:34 -08:00
|
|
|
switch (DwarfType & 0x0f) {
|
|
|
|
|
default:
|
|
|
|
|
llvm_unreachable("unsupported DWARF encoding type");
|
|
|
|
|
case dwarf::DW_EH_PE_sdata4:
|
|
|
|
|
case dwarf::DW_EH_PE_udata4:
|
2020-10-07 15:40:51 -07:00
|
|
|
RelType = Relocation::getPC32();
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
Offset -= 4;
|
2016-11-11 14:33:34 -08:00
|
|
|
break;
|
|
|
|
|
case dwarf::DW_EH_PE_sdata8:
|
|
|
|
|
case dwarf::DW_EH_PE_udata8:
|
2020-10-07 15:40:51 -07:00
|
|
|
RelType = Relocation::getPC64();
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
Offset -= 8;
|
2016-11-11 14:33:34 -08:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
2020-04-16 00:02:35 -07:00
|
|
|
// Create a relocation against an absolute value since the goal is to
|
|
|
|
|
// preserve the contents of the section independent of the new values
|
|
|
|
|
// of referenced symbols.
|
2022-09-22 12:05:12 -07:00
|
|
|
RelocatedEHFrameSection->addRelocation(Offset, nullptr, RelType, Value);
|
2016-11-11 14:33:34 -08:00
|
|
|
};
|
|
|
|
|
|
2021-12-14 16:52:51 -08:00
|
|
|
Error E = EHFrameParser::parse(DE, EHFrameSection->getAddress(), createReloc);
|
2020-12-01 16:29:39 -08:00
|
|
|
check_error(std::move(E), "failed to patch EH frame");
|
2016-11-11 14:33:34 -08:00
|
|
|
}
|
|
|
|
|
|
2022-03-08 09:12:19 -08:00
|
|
|
Error RewriteInstance::readSpecialSections() {
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
NamedRegionTimer T("readSpecialSections", "read special sections",
|
|
|
|
|
TimerGroupName, TimerGroupDesc, opts::TimeRewrite);
|
2017-11-27 18:00:24 -08:00
|
|
|
|
2017-03-22 22:05:50 -07:00
|
|
|
bool HasTextRelocations = false;
|
2022-07-28 23:08:45 -07:00
|
|
|
bool HasSymbolTable = false;
|
2019-04-26 15:30:12 -07:00
|
|
|
bool HasDebugInfo = false;
|
2017-03-22 22:05:50 -07:00
|
|
|
|
2015-11-23 17:54:18 -08:00
|
|
|
// Process special sections.
|
2021-04-08 00:19:26 -07:00
|
|
|
for (const SectionRef &Section : InputFile->sections()) {
|
2020-12-01 16:29:39 -08:00
|
|
|
Expected<StringRef> SectionNameOrErr = Section.getName();
|
|
|
|
|
check_error(SectionNameOrErr.takeError(), "cannot get section name");
|
|
|
|
|
StringRef SectionName = *SectionNameOrErr;
|
2016-07-21 12:45:35 -07:00
|
|
|
|
2022-09-22 12:05:12 -07:00
|
|
|
if (Error E = Section.getContents().takeError())
|
|
|
|
|
return E;
|
|
|
|
|
BC->registerSection(Section);
|
|
|
|
|
LLVM_DEBUG(
|
|
|
|
|
dbgs() << "BOLT-DEBUG: registering section " << SectionName << " @ 0x"
|
|
|
|
|
<< Twine::utohexstr(Section.getAddress()) << ":0x"
|
|
|
|
|
<< Twine::utohexstr(Section.getAddress() + Section.getSize())
|
|
|
|
|
<< "\n");
|
|
|
|
|
if (isDebugSection(SectionName))
|
|
|
|
|
HasDebugInfo = true;
|
2018-02-01 16:33:43 -08:00
|
|
|
}
|
|
|
|
|
|
2023-06-20 20:43:53 -07:00
|
|
|
// Set IsRelro section attribute based on PT_GNU_RELRO segment.
|
|
|
|
|
markGnuRelroSections();
|
|
|
|
|
|
2019-04-12 17:33:46 -07:00
|
|
|
if (HasDebugInfo && !opts::UpdateDebugSections && !opts::AggregateOnly) {
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->errs() << "BOLT-WARNING: debug info will be stripped from the binary. "
|
|
|
|
|
"Use -update-debug-sections to keep it.\n";
|
2019-04-26 15:30:12 -07:00
|
|
|
}
|
|
|
|
|
|
2024-04-11 06:29:51 -04:00
|
|
|
HasTextRelocations = (bool)BC->getUniqueSectionByName(
|
|
|
|
|
".rela" + std::string(BC->getMainCodeSectionName()));
|
2022-07-28 23:08:45 -07:00
|
|
|
HasSymbolTable = (bool)BC->getUniqueSectionByName(".symtab");
|
2018-04-20 20:03:31 -07:00
|
|
|
EHFrameSection = BC->getUniqueSectionByName(".eh_frame");
|
|
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
if (ErrorOr<BinarySection &> BATSec =
|
2019-04-12 17:33:46 -07:00
|
|
|
BC->getUniqueSectionByName(BoltAddressTranslation::SECTION_NAME)) {
|
2024-05-21 20:22:12 -07:00
|
|
|
BC->HasBATSection = true;
|
2019-10-11 13:32:14 -07:00
|
|
|
// Do not read BAT when plotting a heatmap
|
2025-05-13 13:23:18 -07:00
|
|
|
if (opts::HeatmapMode != opts::HeatmapModeKind::HM_Exclusive) {
|
2024-02-12 14:53:53 -08:00
|
|
|
if (std::error_code EC = BAT->parse(BC->outs(), BATSec->getContents())) {
|
|
|
|
|
BC->errs() << "BOLT-ERROR: failed to parse BOLT address translation "
|
|
|
|
|
"table.\n";
|
2019-10-11 13:32:14 -07:00
|
|
|
exit(1);
|
|
|
|
|
}
|
2019-04-12 17:33:46 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2018-02-01 16:33:43 -08:00
|
|
|
if (opts::PrintSections) {
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->outs() << "BOLT-INFO: Sections from original binary:\n";
|
|
|
|
|
BC->printSections(BC->outs());
|
2015-11-23 17:54:18 -08:00
|
|
|
}
|
|
|
|
|
|
2017-12-09 21:40:39 -08:00
|
|
|
if (opts::RelocationMode == cl::BOU_TRUE && !HasTextRelocations) {
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->errs()
|
|
|
|
|
<< "BOLT-ERROR: relocations against code are missing from the input "
|
|
|
|
|
"file. Cannot proceed in relocations mode (-relocs).\n";
|
2017-03-22 22:05:50 -07:00
|
|
|
exit(1);
|
|
|
|
|
}
|
|
|
|
|
|
2021-12-14 16:52:51 -08:00
|
|
|
BC->HasRelocations =
|
|
|
|
|
HasTextRelocations && (opts::RelocationMode != cl::BOU_FALSE);
|
2019-07-12 07:25:50 -07:00
|
|
|
|
2024-02-01 12:11:26 -08:00
|
|
|
if (BC->IsLinuxKernel && BC->HasRelocations) {
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->outs() << "BOLT-INFO: disabling relocation mode for Linux kernel\n";
|
2024-02-01 12:11:26 -08:00
|
|
|
BC->HasRelocations = false;
|
|
|
|
|
}
|
|
|
|
|
|
2022-07-28 23:08:45 -07:00
|
|
|
BC->IsStripped = !HasSymbolTable;
|
|
|
|
|
|
2023-02-06 18:06:54 -08:00
|
|
|
if (BC->IsStripped && !opts::AllowStripped) {
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->errs()
|
|
|
|
|
<< "BOLT-ERROR: stripped binaries are not supported. If you know "
|
|
|
|
|
"what you're doing, use --allow-stripped to proceed";
|
2023-02-06 18:06:54 -08:00
|
|
|
exit(1);
|
|
|
|
|
}
|
|
|
|
|
|
2019-06-26 11:06:46 -07:00
|
|
|
// Force non-relocation mode for heatmap generation
|
2025-05-13 13:23:18 -07:00
|
|
|
if (opts::HeatmapMode == opts::HeatmapModeKind::HM_Exclusive)
|
2019-06-26 11:06:46 -07:00
|
|
|
BC->HasRelocations = false;
|
2019-07-12 07:25:50 -07:00
|
|
|
|
2021-12-23 12:38:33 -08:00
|
|
|
if (BC->HasRelocations)
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->outs() << "BOLT-INFO: enabling " << (opts::StrictMode ? "strict " : "")
|
|
|
|
|
<< "relocation mode\n";
|
2017-12-09 21:40:39 -08:00
|
|
|
|
2021-04-21 11:24:15 -07:00
|
|
|
// Read EH frame for function boundaries info.
|
|
|
|
|
Expected<const DWARFDebugFrame *> EHFrameOrError = BC->DwCtx->getEHFrame();
|
|
|
|
|
if (!EHFrameOrError)
|
|
|
|
|
report_error("expected valid eh_frame section", EHFrameOrError.takeError());
|
2024-02-12 14:53:53 -08:00
|
|
|
CFIRdWrt.reset(new CFIReaderWriter(*BC, *EHFrameOrError.get()));
|
2018-08-08 17:55:24 -07:00
|
|
|
|
2024-06-03 21:39:47 -07:00
|
|
|
processSectionMetadata();
|
2019-05-15 17:19:18 -07:00
|
|
|
|
2020-06-26 16:52:07 -07:00
|
|
|
// Read .dynamic/PT_DYNAMIC.
|
2022-03-08 09:17:41 -08:00
|
|
|
return readELFDynamic();
|
2015-11-23 17:54:18 -08:00
|
|
|
}
|
|
|
|
|
|
2018-04-13 15:46:19 -07:00
|
|
|
void RewriteInstance::adjustCommandLineOptions() {
|
2021-12-23 12:38:33 -08:00
|
|
|
if (BC->isAArch64() && !BC->HasRelocations)
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->errs() << "BOLT-WARNING: non-relocation mode for AArch64 is not fully "
|
|
|
|
|
"supported\n";
|
2018-04-13 15:46:19 -07:00
|
|
|
|
2021-12-23 12:38:33 -08:00
|
|
|
if (RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary())
|
Adding automatic huge page support
Summary:
This patch enables automated hugify for Bolt.
When running Bolt against a binary with -hugify specified, Bolt will inject a call to a runtime library function at the entry of the binary. The runtime library calls madvise to map the hot code region into a 2M huge page. We support both new kernel with THP support and old kernels. For kernels with THP support we simply make a madvise call, while for old kernels, we first copy the code out, remap the memory with huge page, and then copy the code back.
With this change, we no longer need to manually call into hugify_self and precompile it with --hot-text. Instead, we could simply combine --hugify option with existing optimizations, and at runtime it will automatically move hot code into 2M pages.
Some details around the changes made:
1. Add an command line option to support --hugify. --hugify will automatically turn on --hot-text to get the proper hot code symbols. However, running with both --hugify and --hot-text is not allowed, since --hot-text is used on binaries that has precompiled call to hugify_self, which contradicts with the purpose of --hugify.
2. Moved the common utility functions out of instr.cpp to common.h, which will also be used by hugify.cpp. Added a few new system calls definitions.
3. Added a new class that inherits RuntimeLibrary, and implemented the necessary emit and link logic for hugify.
4. Added a simple test for hugify.
(cherry picked from FBD21384529)
2020-05-02 11:14:38 -07:00
|
|
|
RtLibrary->adjustCommandLineOptions(*BC);
|
2019-06-19 20:10:49 -07:00
|
|
|
|
2020-12-01 16:29:39 -08:00
|
|
|
if (BC->isX86() && BC->MAB->allowAutoPadding()) {
|
[BOLT] Decoder cache friendly alignment wrt Intel JCC Erratum
Summary:
This diff ports reviews.llvm.org/D70157 to our LLVM tree, which
makes the integrated assembler able to align X86 control-flow changing
instructions in a way to reduce the performance impact of the ucode
update on Intel processors that implement the JCC erratum mitigation.
See white paper "Mitigations for Jump Conditional Code Erratum" by Intel
published November 2019.
To port this patch, I changed classifySecondInstInMacroFusion to analyze
instruction opcodes directly instead of analyzing the CondCond operand
(in more recent versions of LLVM, all conditional branches share the
same opcode, but with a different conditional operand). I also pulled to
our tree Alignment.h as a dependency, and the macroop analyzing helpers.
x86-align-branch-boundary and -x86-align-branch are the two flags that
control nop insertion to avoid disabling the decoder cache, following
the original patch. In BOLT, I added the flag
x86-align-branch-boundary-hot-only to request the alignment to only be
applied to hot code, which is turned on by default. The reason is
because such alignment is expensive to perform on large modules, but if
we limit it to hot code, the relaxation pass runtime becomes tolerable.
(cherry picked from FBD19828850)
2020-02-10 18:50:53 -08:00
|
|
|
if (!BC->HasRelocations) {
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->errs()
|
|
|
|
|
<< "BOLT-ERROR: cannot apply mitigations for Intel JCC erratum in "
|
|
|
|
|
"non-relocation mode\n";
|
[BOLT] Decoder cache friendly alignment wrt Intel JCC Erratum
Summary:
This diff ports reviews.llvm.org/D70157 to our LLVM tree, which
makes the integrated assembler able to align X86 control-flow changing
instructions in a way to reduce the performance impact of the ucode
update on Intel processors that implement the JCC erratum mitigation.
See white paper "Mitigations for Jump Conditional Code Erratum" by Intel
published November 2019.
To port this patch, I changed classifySecondInstInMacroFusion to analyze
instruction opcodes directly instead of analyzing the CondCond operand
(in more recent versions of LLVM, all conditional branches share the
same opcode, but with a different conditional operand). I also pulled to
our tree Alignment.h as a dependency, and the macroop analyzing helpers.
x86-align-branch-boundary and -x86-align-branch are the two flags that
control nop insertion to avoid disabling the decoder cache, following
the original patch. In BOLT, I added the flag
x86-align-branch-boundary-hot-only to request the alignment to only be
applied to hot code, which is turned on by default. The reason is
because such alignment is expensive to perform on large modules, but if
we limit it to hot code, the relaxation pass runtime becomes tolerable.
(cherry picked from FBD19828850)
2020-02-10 18:50:53 -08:00
|
|
|
exit(1);
|
|
|
|
|
}
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->outs()
|
|
|
|
|
<< "BOLT-WARNING: using mitigation for Intel JCC erratum, layout "
|
|
|
|
|
"may take several minutes\n";
|
2018-04-13 15:46:19 -07:00
|
|
|
}
|
2019-03-15 13:43:36 -07:00
|
|
|
|
2018-06-25 14:55:48 -07:00
|
|
|
if (opts::SplitEH && !BC->HasRelocations) {
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->errs() << "BOLT-WARNING: disabling -split-eh in non-relocation mode\n";
|
2018-06-25 14:55:48 -07:00
|
|
|
opts::SplitEH = false;
|
|
|
|
|
}
|
2020-11-04 11:44:02 -08:00
|
|
|
|
2019-06-28 09:21:27 -07:00
|
|
|
if (opts::StrictMode && !BC->HasRelocations) {
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->errs()
|
|
|
|
|
<< "BOLT-WARNING: disabling strict mode (-strict) in non-relocation "
|
|
|
|
|
"mode\n";
|
2019-06-28 09:21:27 -07:00
|
|
|
opts::StrictMode = false;
|
|
|
|
|
}
|
|
|
|
|
|
2019-06-11 13:24:10 -07:00
|
|
|
if (BC->HasRelocations && opts::AggregateOnly &&
|
|
|
|
|
!opts::StrictMode.getNumOccurrences()) {
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->outs() << "BOLT-INFO: enabling strict relocation mode for aggregation "
|
|
|
|
|
"purposes\n";
|
2019-06-11 13:24:10 -07:00
|
|
|
opts::StrictMode = true;
|
|
|
|
|
}
|
|
|
|
|
|
2019-04-25 17:00:05 -07:00
|
|
|
if (!BC->HasRelocations &&
|
|
|
|
|
opts::ReorderFunctions != ReorderFunctions::RT_NONE) {
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->errs() << "BOLT-ERROR: function reordering only works when "
|
|
|
|
|
<< "relocations are enabled\n";
|
2019-04-25 17:00:05 -07:00
|
|
|
exit(1);
|
|
|
|
|
}
|
|
|
|
|
|
2024-12-16 21:49:53 -08:00
|
|
|
if (!BC->HasRelocations &&
|
|
|
|
|
opts::ICF == IdenticalCodeFolding::ICFLevel::Safe) {
|
|
|
|
|
BC->errs() << "BOLT-ERROR: binary built without relocations. Safe ICF is "
|
|
|
|
|
"not supported\n";
|
|
|
|
|
exit(1);
|
|
|
|
|
}
|
|
|
|
|
|
2023-06-29 20:38:50 +03:00
|
|
|
if (opts::Instrument ||
|
|
|
|
|
(opts::ReorderFunctions != ReorderFunctions::RT_NONE &&
|
|
|
|
|
!opts::HotText.getNumOccurrences())) {
|
2019-04-25 17:00:05 -07:00
|
|
|
opts::HotText = true;
|
|
|
|
|
} else if (opts::HotText && !BC->HasRelocations) {
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->errs() << "BOLT-WARNING: hot text is disabled in non-relocation mode\n";
|
2019-03-14 18:51:05 -07:00
|
|
|
opts::HotText = false;
|
|
|
|
|
}
|
2019-03-15 13:43:36 -07:00
|
|
|
|
2025-02-27 16:13:57 -08:00
|
|
|
if (opts::Instrument && opts::UseGnuStack) {
|
|
|
|
|
BC->errs() << "BOLT-ERROR: cannot avoid having writeable and executable "
|
|
|
|
|
"segment in instrumented binary if program headers will be "
|
|
|
|
|
"updated in place\n";
|
|
|
|
|
exit(1);
|
|
|
|
|
}
|
|
|
|
|
|
2019-03-15 13:43:36 -07:00
|
|
|
if (opts::HotText && opts::HotTextMoveSections.getNumOccurrences() == 0) {
|
|
|
|
|
opts::HotTextMoveSections.addValue(".stub");
|
|
|
|
|
opts::HotTextMoveSections.addValue(".mover");
|
2019-04-16 10:39:05 -07:00
|
|
|
opts::HotTextMoveSections.addValue(".never_hugify");
|
2019-03-15 13:43:36 -07:00
|
|
|
}
|
2020-02-24 17:12:41 -08:00
|
|
|
|
|
|
|
|
if (opts::UseOldText && !BC->OldTextSectionAddress) {
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->errs()
|
|
|
|
|
<< "BOLT-WARNING: cannot use old .text as the section was not found"
|
|
|
|
|
"\n";
|
2020-02-24 17:12:41 -08:00
|
|
|
opts::UseOldText = false;
|
|
|
|
|
}
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
if (opts::UseOldText && !BC->HasRelocations) {
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->errs() << "BOLT-WARNING: cannot use old .text in non-relocation mode\n";
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
opts::UseOldText = false;
|
|
|
|
|
}
|
|
|
|
|
|
2021-12-23 12:38:33 -08:00
|
|
|
if (!opts::AlignText.getNumOccurrences())
|
2020-04-19 15:02:50 -07:00
|
|
|
opts::AlignText = BC->PageAlign;
|
2020-05-03 15:49:58 -07:00
|
|
|
|
2022-03-15 22:17:51 +03:00
|
|
|
if (opts::AlignText < opts::AlignFunctions)
|
|
|
|
|
opts::AlignText = (unsigned)opts::AlignFunctions;
|
|
|
|
|
|
2021-12-14 16:52:51 -08:00
|
|
|
if (BC->isX86() && opts::Lite.getNumOccurrences() == 0 && !opts::StrictMode &&
|
2021-12-23 12:38:33 -08:00
|
|
|
!opts::UseOldText)
|
2020-05-03 15:49:58 -07:00
|
|
|
opts::Lite = true;
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
|
|
|
|
|
if (opts::Lite && opts::UseOldText) {
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->errs() << "BOLT-WARNING: cannot combine -lite with -use-old-text. "
|
|
|
|
|
"Disabling -use-old-text.\n";
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
opts::UseOldText = false;
|
|
|
|
|
}
|
|
|
|
|
|
2020-10-17 15:09:06 -07:00
|
|
|
if (opts::Lite && opts::StrictMode) {
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->errs()
|
|
|
|
|
<< "BOLT-ERROR: -strict and -lite cannot be used at the same time\n";
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
exit(1);
|
2020-05-03 15:49:58 -07:00
|
|
|
}
|
2020-08-06 14:43:33 -07:00
|
|
|
|
2021-12-23 12:38:33 -08:00
|
|
|
if (opts::Lite)
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->outs() << "BOLT-INFO: enabling lite mode\n";
|
2024-03-22 15:29:26 -07:00
|
|
|
|
2024-03-29 16:41:15 -07:00
|
|
|
if (BC->IsLinuxKernel) {
|
|
|
|
|
if (!opts::KeepNops.getNumOccurrences())
|
|
|
|
|
opts::KeepNops = true;
|
|
|
|
|
|
2025-08-19 14:41:13 -07:00
|
|
|
// Linux kernel may resume execution after a trap or x86 HLT instruction.
|
|
|
|
|
if (!opts::TerminalHLT.getNumOccurrences())
|
|
|
|
|
opts::TerminalHLT = false;
|
2024-03-29 16:41:15 -07:00
|
|
|
if (!opts::TerminalTrap.getNumOccurrences())
|
|
|
|
|
opts::TerminalTrap = false;
|
|
|
|
|
}
|
2018-04-13 15:46:19 -07:00
|
|
|
}
|
|
|
|
|
|
2016-09-27 19:09:38 -07:00
|
|
|
namespace {
|
|
|
|
|
template <typename ELFT>
|
|
|
|
|
int64_t getRelocationAddend(const ELFObjectFile<ELFT> *Obj,
|
|
|
|
|
const RelocationRef &RelRef) {
|
2021-04-08 00:19:26 -07:00
|
|
|
using ELFShdrTy = typename ELFT::Shdr;
|
|
|
|
|
using Elf_Rela = typename ELFT::Rela;
|
2016-09-27 19:09:38 -07:00
|
|
|
int64_t Addend = 0;
|
2020-12-01 16:29:39 -08:00
|
|
|
const ELFFile<ELFT> &EF = Obj->getELFFile();
|
2016-09-27 19:09:38 -07:00
|
|
|
DataRefImpl Rel = RelRef.getRawDataRefImpl();
|
2021-04-08 00:19:26 -07:00
|
|
|
const ELFShdrTy *RelocationSection = cantFail(EF.getSection(Rel.d.a));
|
2016-09-27 19:09:38 -07:00
|
|
|
switch (RelocationSection->sh_type) {
|
2021-12-14 16:52:51 -08:00
|
|
|
default:
|
|
|
|
|
llvm_unreachable("unexpected relocation section type");
|
2016-09-27 19:09:38 -07:00
|
|
|
case ELF::SHT_REL:
|
|
|
|
|
break;
|
|
|
|
|
case ELF::SHT_RELA: {
|
2021-04-08 00:19:26 -07:00
|
|
|
const Elf_Rela *RelA = Obj->getRela(Rel);
|
2016-09-27 19:09:38 -07:00
|
|
|
Addend = RelA->r_addend;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return Addend;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int64_t getRelocationAddend(const ELFObjectFileBase *Obj,
|
2021-12-14 16:52:51 -08:00
|
|
|
const RelocationRef &Rel) {
|
2023-04-21 13:42:20 -04:00
|
|
|
return getRelocationAddend(cast<ELF64LEObjectFile>(Obj), Rel);
|
2016-09-27 19:09:38 -07:00
|
|
|
}
|
2022-07-11 09:49:41 -07:00
|
|
|
|
|
|
|
|
template <typename ELFT>
|
|
|
|
|
uint32_t getRelocationSymbol(const ELFObjectFile<ELFT> *Obj,
|
|
|
|
|
const RelocationRef &RelRef) {
|
|
|
|
|
using ELFShdrTy = typename ELFT::Shdr;
|
|
|
|
|
uint32_t Symbol = 0;
|
|
|
|
|
const ELFFile<ELFT> &EF = Obj->getELFFile();
|
|
|
|
|
DataRefImpl Rel = RelRef.getRawDataRefImpl();
|
|
|
|
|
const ELFShdrTy *RelocationSection = cantFail(EF.getSection(Rel.d.a));
|
|
|
|
|
switch (RelocationSection->sh_type) {
|
|
|
|
|
default:
|
|
|
|
|
llvm_unreachable("unexpected relocation section type");
|
|
|
|
|
case ELF::SHT_REL:
|
|
|
|
|
Symbol = Obj->getRel(Rel)->getSymbol(EF.isMips64EL());
|
|
|
|
|
break;
|
|
|
|
|
case ELF::SHT_RELA:
|
|
|
|
|
Symbol = Obj->getRela(Rel)->getSymbol(EF.isMips64EL());
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return Symbol;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
uint32_t getRelocationSymbol(const ELFObjectFileBase *Obj,
|
|
|
|
|
const RelocationRef &Rel) {
|
2023-04-21 13:42:20 -04:00
|
|
|
return getRelocationSymbol(cast<ELF64LEObjectFile>(Obj), Rel);
|
2022-07-11 09:49:41 -07:00
|
|
|
}
|
2016-09-27 19:09:38 -07:00
|
|
|
} // anonymous namespace
|
|
|
|
|
|
2021-09-08 13:37:19 +03:00
|
|
|
bool RewriteInstance::analyzeRelocation(
|
2025-03-14 18:15:59 +00:00
|
|
|
const RelocationRef &Rel, uint32_t &RType, std::string &SymbolName,
|
2021-09-08 13:37:19 +03:00
|
|
|
bool &IsSectionRelocation, uint64_t &SymbolAddress, int64_t &Addend,
|
|
|
|
|
uint64_t &ExtractedValue, bool &Skip) const {
|
|
|
|
|
Skip = false;
|
2019-04-11 17:11:08 -07:00
|
|
|
if (!Relocation::isSupported(RType))
|
2018-01-24 05:42:11 -08:00
|
|
|
return false;
|
|
|
|
|
|
2024-08-07 18:02:42 +08:00
|
|
|
auto IsWeakReference = [](const SymbolRef &Symbol) {
|
|
|
|
|
Expected<uint32_t> SymFlagsOrErr = Symbol.getFlags();
|
|
|
|
|
if (!SymFlagsOrErr)
|
|
|
|
|
return false;
|
|
|
|
|
return (*SymFlagsOrErr & SymbolRef::SF_Undefined) &&
|
|
|
|
|
(*SymFlagsOrErr & SymbolRef::SF_Weak);
|
|
|
|
|
};
|
|
|
|
|
|
2018-03-20 14:34:58 -07:00
|
|
|
const bool IsAArch64 = BC->isAArch64();
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
const size_t RelSize = Relocation::getSizeForType(RType);
|
2019-04-09 12:29:40 -07:00
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
ErrorOr<uint64_t> Value =
|
|
|
|
|
BC->getUnsignedValueAtAddress(Rel.getOffset(), RelSize);
|
2019-04-09 12:29:40 -07:00
|
|
|
assert(Value && "failed to extract relocated value");
|
2021-09-08 13:37:19 +03:00
|
|
|
|
2021-12-14 16:52:51 -08:00
|
|
|
ExtractedValue = Relocation::extractValue(RType, *Value, Rel.getOffset());
|
2018-09-21 12:00:20 -07:00
|
|
|
Addend = getRelocationAddend(InputFile, Rel);
|
|
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
const bool IsPCRelative = Relocation::isPCRelative(RType);
|
|
|
|
|
const uint64_t PCRelOffset = IsPCRelative && !IsAArch64 ? Rel.getOffset() : 0;
|
2018-09-21 12:00:20 -07:00
|
|
|
bool SkipVerification = false;
|
|
|
|
|
auto SymbolIter = Rel.getSymbol();
|
|
|
|
|
if (SymbolIter == InputFile->symbol_end()) {
|
2019-06-27 03:20:17 -07:00
|
|
|
SymbolAddress = ExtractedValue - Addend + PCRelOffset;
|
2021-04-08 00:19:26 -07:00
|
|
|
MCSymbol *RelSymbol =
|
|
|
|
|
BC->getOrCreateGlobalSymbol(SymbolAddress, "RELSYMat");
|
2020-12-01 16:29:39 -08:00
|
|
|
SymbolName = std::string(RelSymbol->getName());
|
2018-09-21 12:00:20 -07:00
|
|
|
IsSectionRelocation = false;
|
|
|
|
|
} else {
|
2021-04-08 00:19:26 -07:00
|
|
|
const SymbolRef &Symbol = *SymbolIter;
|
2020-12-01 16:29:39 -08:00
|
|
|
SymbolName = std::string(cantFail(Symbol.getName()));
|
2018-09-21 12:00:20 -07:00
|
|
|
SymbolAddress = cantFail(Symbol.getAddress());
|
|
|
|
|
SkipVerification = (cantFail(Symbol.getType()) == SymbolRef::ST_Other);
|
|
|
|
|
// Section symbols are marked as ST_Debug.
|
|
|
|
|
IsSectionRelocation = (cantFail(Symbol.getType()) == SymbolRef::ST_Debug);
|
2022-01-25 03:22:47 +03:00
|
|
|
// Check for PLT entry registered with symbol name
|
2024-08-07 18:02:42 +08:00
|
|
|
if (!SymbolAddress && !IsWeakReference(Symbol) &&
|
|
|
|
|
(IsAArch64 || BC->isRISCV())) {
|
2022-04-03 19:11:31 +03:00
|
|
|
const BinaryData *BD = BC->getPLTBinaryDataByName(SymbolName);
|
2022-01-25 03:22:47 +03:00
|
|
|
SymbolAddress = BD ? BD->getAddress() : 0;
|
|
|
|
|
}
|
2019-06-27 03:20:17 -07:00
|
|
|
}
|
2019-11-14 16:07:11 -08:00
|
|
|
// For PIE or dynamic libs, the linker may choose not to put the relocation
|
|
|
|
|
// result at the address if it is a X86_64_64 one because it will emit a
|
|
|
|
|
// dynamic relocation (X86_RELATIVE) for the dynamic linker and loader to
|
|
|
|
|
// resolve it at run time. The static relocation result goes as the addend
|
|
|
|
|
// of the dynamic relocation in this case. We can't verify these cases.
|
|
|
|
|
// FIXME: perhaps we can try to find if it really emitted a corresponding
|
|
|
|
|
// RELATIVE relocation at this offset with the correct value as the addend.
|
|
|
|
|
if (!BC->HasFixedLoadAddress && RelSize == 8)
|
|
|
|
|
SkipVerification = true;
|
2019-06-27 03:20:17 -07:00
|
|
|
|
|
|
|
|
if (IsSectionRelocation && !IsAArch64) {
|
2021-04-08 00:19:26 -07:00
|
|
|
ErrorOr<BinarySection &> Section = BC->getSectionForAddress(SymbolAddress);
|
2019-06-27 03:20:17 -07:00
|
|
|
assert(Section && "section expected for section relocation");
|
|
|
|
|
SymbolName = "section " + std::string(Section->getName());
|
|
|
|
|
// Convert section symbol relocations to regular relocations inside
|
|
|
|
|
// non-section symbols.
|
|
|
|
|
if (Section->containsAddress(ExtractedValue) && !IsPCRelative) {
|
|
|
|
|
SymbolAddress = ExtractedValue;
|
|
|
|
|
Addend = 0;
|
|
|
|
|
} else {
|
|
|
|
|
Addend = ExtractedValue - (SymbolAddress - PCRelOffset);
|
2018-09-21 12:00:20 -07:00
|
|
|
}
|
|
|
|
|
}
|
2018-01-24 05:42:11 -08:00
|
|
|
|
2025-03-14 18:44:33 -07:00
|
|
|
// GOT relocation can cause the underlying instruction to be modified by the
|
|
|
|
|
// linker, resulting in the extracted value being different from the actual
|
|
|
|
|
// symbol. It's also possible to have a GOT entry for a symbol defined in the
|
|
|
|
|
// binary. In the latter case, the instruction can be using the GOT version
|
|
|
|
|
// causing the extracted value mismatch. Similar cases can happen for TLS.
|
|
|
|
|
// Pass the relocation information as is to the disassembler and let it decide
|
|
|
|
|
// how to use it for the operand symbolization.
|
|
|
|
|
if (Relocation::isGOT(RType) || Relocation::isTLS(RType)) {
|
2021-09-02 21:04:33 +03:00
|
|
|
SkipVerification = true;
|
2018-10-11 18:12:09 -07:00
|
|
|
} else if (!SymbolAddress) {
|
2018-09-21 12:00:20 -07:00
|
|
|
assert(!IsSectionRelocation);
|
2018-10-11 18:12:09 -07:00
|
|
|
if (ExtractedValue || Addend == 0 || IsPCRelative) {
|
2021-12-14 16:52:51 -08:00
|
|
|
SymbolAddress =
|
|
|
|
|
truncateToSize(ExtractedValue - Addend + PCRelOffset, RelSize);
|
2018-01-24 05:42:11 -08:00
|
|
|
} else {
|
|
|
|
|
// This is weird case. The extracted value is zero but the addend is
|
|
|
|
|
// non-zero and the relocation is not pc-rel. Using the previous logic,
|
|
|
|
|
// the SymbolAddress would end up as a huge number. Seen in
|
|
|
|
|
// exceptions_pic.test.
|
2020-12-01 16:29:39 -08:00
|
|
|
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: relocation @ 0x"
|
|
|
|
|
<< Twine::utohexstr(Rel.getOffset())
|
|
|
|
|
<< " value does not match addend for "
|
|
|
|
|
<< "relocation to undefined symbol.\n");
|
2018-01-24 05:42:11 -08:00
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2018-07-30 10:29:47 -07:00
|
|
|
auto verifyExtractedValue = [&]() {
|
2018-09-21 12:00:20 -07:00
|
|
|
if (SkipVerification)
|
|
|
|
|
return true;
|
|
|
|
|
|
2023-06-16 11:49:19 +02:00
|
|
|
if (IsAArch64 || BC->isRISCV())
|
2018-07-30 10:29:47 -07:00
|
|
|
return true;
|
|
|
|
|
|
|
|
|
|
if (SymbolName == "__hot_start" || SymbolName == "__hot_end")
|
|
|
|
|
return true;
|
|
|
|
|
|
2020-06-30 19:58:43 -07:00
|
|
|
if (RType == ELF::R_X86_64_PLT32)
|
|
|
|
|
return true;
|
|
|
|
|
|
2018-07-30 10:29:47 -07:00
|
|
|
return truncateToSize(ExtractedValue, RelSize) ==
|
|
|
|
|
truncateToSize(SymbolAddress + Addend - PCRelOffset, RelSize);
|
|
|
|
|
};
|
2018-01-24 05:42:11 -08:00
|
|
|
|
2021-06-29 12:11:56 -07:00
|
|
|
(void)verifyExtractedValue;
|
2018-07-30 10:29:47 -07:00
|
|
|
assert(verifyExtractedValue() && "mismatched extracted relocation value");
|
2018-01-24 05:42:11 -08:00
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2021-06-22 13:46:06 -07:00
|
|
|
void RewriteInstance::processDynamicRelocations() {
|
2023-03-15 00:08:11 +04:00
|
|
|
// Read .relr.dyn section containing compressed R_*_RELATIVE relocations.
|
|
|
|
|
if (DynamicRelrSize > 0) {
|
|
|
|
|
ErrorOr<BinarySection &> DynamicRelrSectionOrErr =
|
|
|
|
|
BC->getSectionForAddress(*DynamicRelrAddress);
|
|
|
|
|
if (!DynamicRelrSectionOrErr)
|
|
|
|
|
report_error("unable to find section corresponding to DT_RELR",
|
|
|
|
|
DynamicRelrSectionOrErr.getError());
|
|
|
|
|
if (DynamicRelrSectionOrErr->getSize() != DynamicRelrSize)
|
|
|
|
|
report_error("section size mismatch for DT_RELRSZ",
|
|
|
|
|
errc::executable_format_error);
|
|
|
|
|
readDynamicRelrRelocations(*DynamicRelrSectionOrErr);
|
|
|
|
|
}
|
|
|
|
|
|
2021-06-30 14:38:50 -07:00
|
|
|
// Read relocations for PLT - DT_JMPREL.
|
|
|
|
|
if (PLTRelocationsSize > 0) {
|
|
|
|
|
ErrorOr<BinarySection &> PLTRelSectionOrErr =
|
|
|
|
|
BC->getSectionForAddress(*PLTRelocationsAddress);
|
2021-12-23 12:38:33 -08:00
|
|
|
if (!PLTRelSectionOrErr)
|
2021-06-30 14:38:50 -07:00
|
|
|
report_error("unable to find section corresponding to DT_JMPREL",
|
|
|
|
|
PLTRelSectionOrErr.getError());
|
2021-12-23 12:38:33 -08:00
|
|
|
if (PLTRelSectionOrErr->getSize() != PLTRelocationsSize)
|
2021-06-30 14:38:50 -07:00
|
|
|
report_error("section size mismatch for DT_PLTRELSZ",
|
|
|
|
|
errc::executable_format_error);
|
2022-07-11 09:49:41 -07:00
|
|
|
readDynamicRelocations(PLTRelSectionOrErr->getSectionRef(),
|
|
|
|
|
/*IsJmpRel*/ true);
|
2021-06-30 14:38:50 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// The rest of dynamic relocations - DT_RELA.
|
2023-11-10 11:47:12 +04:00
|
|
|
// The static executable might have .rela.dyn secion and not have PT_DYNAMIC
|
|
|
|
|
if (!DynamicRelocationsSize && BC->IsStaticExecutable) {
|
|
|
|
|
ErrorOr<BinarySection &> DynamicRelSectionOrErr =
|
|
|
|
|
BC->getUniqueSectionByName(getRelaDynSectionName());
|
|
|
|
|
if (DynamicRelSectionOrErr) {
|
|
|
|
|
DynamicRelocationsAddress = DynamicRelSectionOrErr->getAddress();
|
|
|
|
|
DynamicRelocationsSize = DynamicRelSectionOrErr->getSize();
|
|
|
|
|
const SectionRef &SectionRef = DynamicRelSectionOrErr->getSectionRef();
|
|
|
|
|
DynamicRelativeRelocationsCount = std::distance(
|
|
|
|
|
SectionRef.relocation_begin(), SectionRef.relocation_end());
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2021-06-30 14:38:50 -07:00
|
|
|
if (DynamicRelocationsSize > 0) {
|
|
|
|
|
ErrorOr<BinarySection &> DynamicRelSectionOrErr =
|
|
|
|
|
BC->getSectionForAddress(*DynamicRelocationsAddress);
|
2021-12-23 12:38:33 -08:00
|
|
|
if (!DynamicRelSectionOrErr)
|
2021-06-30 14:38:50 -07:00
|
|
|
report_error("unable to find section corresponding to DT_RELA",
|
|
|
|
|
DynamicRelSectionOrErr.getError());
|
2023-06-16 11:49:19 +02:00
|
|
|
auto DynamicRelSectionSize = DynamicRelSectionOrErr->getSize();
|
|
|
|
|
// On RISC-V DT_RELASZ seems to include both .rela.dyn and .rela.plt
|
|
|
|
|
if (DynamicRelocationsSize == DynamicRelSectionSize + PLTRelocationsSize)
|
|
|
|
|
DynamicRelocationsSize = DynamicRelSectionSize;
|
|
|
|
|
if (DynamicRelSectionSize != DynamicRelocationsSize)
|
2021-06-30 14:38:50 -07:00
|
|
|
report_error("section size mismatch for DT_RELASZ",
|
|
|
|
|
errc::executable_format_error);
|
2022-07-11 09:49:41 -07:00
|
|
|
readDynamicRelocations(DynamicRelSectionOrErr->getSectionRef(),
|
|
|
|
|
/*IsJmpRel*/ false);
|
2020-06-23 12:22:58 -07:00
|
|
|
}
|
2021-06-22 13:46:06 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void RewriteInstance::processRelocations() {
|
|
|
|
|
if (!BC->HasRelocations)
|
|
|
|
|
return;
|
2020-06-23 12:22:58 -07:00
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
for (const SectionRef &Section : InputFile->sections()) {
|
2024-04-12 11:35:23 -04:00
|
|
|
section_iterator SecIter = cantFail(Section.getRelocatedSection());
|
|
|
|
|
if (SecIter == InputFile->section_end())
|
|
|
|
|
continue;
|
|
|
|
|
if (BinarySection(*BC, Section).isAllocatable())
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
readRelocations(Section);
|
2020-06-23 12:22:58 -07:00
|
|
|
}
|
2021-08-22 02:44:30 +03:00
|
|
|
|
|
|
|
|
if (NumFailedRelocations)
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->errs() << "BOLT-WARNING: Failed to analyze " << NumFailedRelocations
|
|
|
|
|
<< " relocations\n";
|
2020-06-23 12:22:58 -07:00
|
|
|
}
|
|
|
|
|
|
2022-07-11 09:49:41 -07:00
|
|
|
void RewriteInstance::readDynamicRelocations(const SectionRef &Section,
|
|
|
|
|
bool IsJmpRel) {
|
2020-06-23 12:22:58 -07:00
|
|
|
assert(BinarySection(*BC, Section).isAllocatable() && "allocatable expected");
|
|
|
|
|
|
2021-06-29 12:11:56 -07:00
|
|
|
LLVM_DEBUG({
|
|
|
|
|
StringRef SectionName = cantFail(Section.getName());
|
|
|
|
|
dbgs() << "BOLT-DEBUG: reading relocations for section " << SectionName
|
|
|
|
|
<< ":\n";
|
|
|
|
|
});
|
2020-06-23 12:22:58 -07:00
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
for (const RelocationRef &Rel : Section.relocations()) {
|
2025-03-14 18:15:59 +00:00
|
|
|
const uint32_t RType = Relocation::getType(Rel);
|
2021-02-17 15:36:58 -08:00
|
|
|
if (Relocation::isNone(RType))
|
|
|
|
|
continue;
|
2020-06-23 12:22:58 -07:00
|
|
|
|
|
|
|
|
StringRef SymbolName = "<none>";
|
|
|
|
|
MCSymbol *Symbol = nullptr;
|
|
|
|
|
uint64_t SymbolAddress = 0;
|
|
|
|
|
const uint64_t Addend = getRelocationAddend(InputFile, Rel);
|
|
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
symbol_iterator SymbolIter = Rel.getSymbol();
|
2020-06-23 12:22:58 -07:00
|
|
|
if (SymbolIter != InputFile->symbol_end()) {
|
|
|
|
|
SymbolName = cantFail(SymbolIter->getName());
|
2021-04-08 00:19:26 -07:00
|
|
|
BinaryData *BD = BC->getBinaryDataByName(SymbolName);
|
2021-06-30 14:38:50 -07:00
|
|
|
Symbol = BD ? BD->getSymbol()
|
|
|
|
|
: BC->getOrCreateUndefinedGlobalSymbol(SymbolName);
|
2020-06-23 12:22:58 -07:00
|
|
|
SymbolAddress = cantFail(SymbolIter->getAddress());
|
|
|
|
|
(void)SymbolAddress;
|
|
|
|
|
}
|
|
|
|
|
|
2020-12-01 16:29:39 -08:00
|
|
|
LLVM_DEBUG(
|
2020-06-23 12:22:58 -07:00
|
|
|
SmallString<16> TypeName;
|
|
|
|
|
Rel.getTypeName(TypeName);
|
|
|
|
|
dbgs() << "BOLT-DEBUG: dynamic relocation at 0x"
|
|
|
|
|
<< Twine::utohexstr(Rel.getOffset()) << " : " << TypeName
|
|
|
|
|
<< " : " << SymbolName << " : " << Twine::utohexstr(SymbolAddress)
|
|
|
|
|
<< " : + 0x" << Twine::utohexstr(Addend) << '\n'
|
|
|
|
|
);
|
|
|
|
|
|
2022-07-11 09:49:41 -07:00
|
|
|
if (IsJmpRel)
|
|
|
|
|
IsJmpRelocation[RType] = true;
|
|
|
|
|
|
|
|
|
|
if (Symbol)
|
|
|
|
|
SymbolIndex[Symbol] = getRelocationSymbol(InputFile, Rel);
|
|
|
|
|
|
2025-03-19 14:55:59 +00:00
|
|
|
const uint64_t ReferencedAddress = SymbolAddress + Addend;
|
|
|
|
|
BinaryFunction *Func =
|
|
|
|
|
BC->getBinaryFunctionContainingAddress(ReferencedAddress);
|
|
|
|
|
|
|
|
|
|
if (Relocation::isRelative(RType) && SymbolAddress == 0) {
|
|
|
|
|
if (Func) {
|
|
|
|
|
if (!Func->isInConstantIsland(ReferencedAddress)) {
|
|
|
|
|
if (const uint64_t ReferenceOffset =
|
|
|
|
|
ReferencedAddress - Func->getAddress()) {
|
|
|
|
|
Func->addEntryPointAtOffset(ReferenceOffset);
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
BC->errs() << "BOLT-ERROR: referenced address at 0x"
|
|
|
|
|
<< Twine::utohexstr(ReferencedAddress)
|
|
|
|
|
<< " is in constant island of function " << *Func << "\n";
|
|
|
|
|
exit(1);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
} else if (Relocation::isRelative(RType) && SymbolAddress != 0) {
|
|
|
|
|
BC->errs() << "BOLT-ERROR: symbol address non zero for RELATIVE "
|
|
|
|
|
"relocation type\n";
|
|
|
|
|
exit(1);
|
|
|
|
|
}
|
|
|
|
|
|
2022-07-11 09:49:41 -07:00
|
|
|
BC->addDynamicRelocation(Rel.getOffset(), Symbol, RType, Addend);
|
2020-02-24 17:10:02 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2023-03-15 00:08:11 +04:00
|
|
|
void RewriteInstance::readDynamicRelrRelocations(BinarySection &Section) {
|
|
|
|
|
assert(Section.isAllocatable() && "allocatable expected");
|
|
|
|
|
|
|
|
|
|
LLVM_DEBUG({
|
|
|
|
|
StringRef SectionName = Section.getName();
|
|
|
|
|
dbgs() << "BOLT-DEBUG: reading relocations in section " << SectionName
|
|
|
|
|
<< ":\n";
|
|
|
|
|
});
|
|
|
|
|
|
2025-03-14 18:15:59 +00:00
|
|
|
const uint32_t RType = Relocation::getRelative();
|
2023-03-15 00:08:11 +04:00
|
|
|
const uint8_t PSize = BC->AsmInfo->getCodePointerSize();
|
|
|
|
|
const uint64_t MaxDelta = ((CHAR_BIT * DynamicRelrEntrySize) - 1) * PSize;
|
|
|
|
|
|
|
|
|
|
auto ExtractAddendValue = [&](uint64_t Address) -> uint64_t {
|
|
|
|
|
ErrorOr<BinarySection &> Section = BC->getSectionForAddress(Address);
|
|
|
|
|
assert(Section && "cannot get section for data address from RELR");
|
|
|
|
|
DataExtractor DE = DataExtractor(Section->getContents(),
|
|
|
|
|
BC->AsmInfo->isLittleEndian(), PSize);
|
|
|
|
|
uint64_t Offset = Address - Section->getAddress();
|
|
|
|
|
return DE.getUnsigned(&Offset, PSize);
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
auto AddRelocation = [&](uint64_t Address) {
|
|
|
|
|
uint64_t Addend = ExtractAddendValue(Address);
|
|
|
|
|
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: R_*_RELATIVE relocation at 0x"
|
|
|
|
|
<< Twine::utohexstr(Address) << " to 0x"
|
|
|
|
|
<< Twine::utohexstr(Addend) << '\n';);
|
|
|
|
|
BC->addDynamicRelocation(Address, nullptr, RType, Addend);
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
DataExtractor DE = DataExtractor(Section.getContents(),
|
|
|
|
|
BC->AsmInfo->isLittleEndian(), PSize);
|
|
|
|
|
uint64_t Offset = 0, Address = 0;
|
|
|
|
|
uint64_t RelrCount = DynamicRelrSize / DynamicRelrEntrySize;
|
|
|
|
|
while (RelrCount--) {
|
|
|
|
|
assert(DE.isValidOffset(Offset));
|
|
|
|
|
uint64_t Entry = DE.getUnsigned(&Offset, DynamicRelrEntrySize);
|
|
|
|
|
if ((Entry & 1) == 0) {
|
|
|
|
|
AddRelocation(Entry);
|
|
|
|
|
Address = Entry + PSize;
|
|
|
|
|
} else {
|
|
|
|
|
const uint64_t StartAddress = Address;
|
|
|
|
|
while (Entry >>= 1) {
|
|
|
|
|
if (Entry & 1)
|
|
|
|
|
AddRelocation(Address);
|
|
|
|
|
|
|
|
|
|
Address += PSize;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Address = StartAddress + MaxDelta;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2022-08-17 16:28:22 -07:00
|
|
|
void RewriteInstance::printRelocationInfo(const RelocationRef &Rel,
|
|
|
|
|
StringRef SymbolName,
|
|
|
|
|
uint64_t SymbolAddress,
|
|
|
|
|
uint64_t Addend,
|
|
|
|
|
uint64_t ExtractedValue) const {
|
|
|
|
|
SmallString<16> TypeName;
|
|
|
|
|
Rel.getTypeName(TypeName);
|
|
|
|
|
const uint64_t Address = SymbolAddress + Addend;
|
|
|
|
|
const uint64_t Offset = Rel.getOffset();
|
|
|
|
|
ErrorOr<BinarySection &> Section = BC->getSectionForAddress(SymbolAddress);
|
|
|
|
|
BinaryFunction *Func =
|
|
|
|
|
BC->getBinaryFunctionContainingAddress(Offset, false, BC->isAArch64());
|
|
|
|
|
dbgs() << formatv("Relocation: offset = {0:x}; type = {1}; value = {2:x}; ",
|
|
|
|
|
Offset, TypeName, ExtractedValue)
|
|
|
|
|
<< formatv("symbol = {0} ({1}); symbol address = {2:x}; ", SymbolName,
|
|
|
|
|
Section ? Section->getName() : "", SymbolAddress)
|
|
|
|
|
<< formatv("addend = {0:x}; address = {1:x}; in = ", Addend, Address);
|
|
|
|
|
if (Func)
|
|
|
|
|
dbgs() << Func->getPrintName();
|
|
|
|
|
else
|
|
|
|
|
dbgs() << BC->getSectionForAddress(Rel.getOffset())->getName();
|
|
|
|
|
dbgs() << '\n';
|
|
|
|
|
}
|
|
|
|
|
|
2020-02-24 17:10:02 -08:00
|
|
|
void RewriteInstance::readRelocations(const SectionRef &Section) {
|
2021-06-29 12:11:56 -07:00
|
|
|
LLVM_DEBUG({
|
|
|
|
|
StringRef SectionName = cantFail(Section.getName());
|
|
|
|
|
dbgs() << "BOLT-DEBUG: reading relocations for section " << SectionName
|
|
|
|
|
<< ":\n";
|
|
|
|
|
});
|
2018-09-21 12:00:20 -07:00
|
|
|
if (BinarySection(*BC, Section).isAllocatable()) {
|
2020-12-01 16:29:39 -08:00
|
|
|
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ignoring runtime relocations\n");
|
2016-09-27 19:09:38 -07:00
|
|
|
return;
|
|
|
|
|
}
|
2021-04-08 00:19:26 -07:00
|
|
|
section_iterator SecIter = cantFail(Section.getRelocatedSection());
|
2016-09-27 19:09:38 -07:00
|
|
|
assert(SecIter != InputFile->section_end() && "relocated section expected");
|
2021-04-08 00:19:26 -07:00
|
|
|
SectionRef RelocatedSection = *SecIter;
|
2016-09-27 19:09:38 -07:00
|
|
|
|
2020-12-01 16:29:39 -08:00
|
|
|
StringRef RelocatedSectionName = cantFail(RelocatedSection.getName());
|
|
|
|
|
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: relocated section is "
|
|
|
|
|
<< RelocatedSectionName << '\n');
|
2016-09-27 19:09:38 -07:00
|
|
|
|
2018-09-21 12:00:20 -07:00
|
|
|
if (!BinarySection(*BC, RelocatedSection).isAllocatable()) {
|
2020-12-01 16:29:39 -08:00
|
|
|
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ignoring relocations against "
|
|
|
|
|
<< "non-allocatable section\n");
|
2016-09-27 19:09:38 -07:00
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
const bool SkipRelocs = StringSwitch<bool>(RelocatedSectionName)
|
2020-12-01 16:29:39 -08:00
|
|
|
.Cases(".plt", ".rela.plt", ".got.plt",
|
|
|
|
|
".eh_frame", ".gcc_except_table", true)
|
|
|
|
|
.Default(false);
|
2016-09-27 19:09:38 -07:00
|
|
|
if (SkipRelocs) {
|
2020-12-01 16:29:39 -08:00
|
|
|
LLVM_DEBUG(
|
|
|
|
|
dbgs() << "BOLT-DEBUG: ignoring relocations against known section\n");
|
2016-09-27 19:09:38 -07:00
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2022-08-17 20:43:21 -07:00
|
|
|
for (const RelocationRef &Rel : Section.relocations())
|
|
|
|
|
handleRelocation(RelocatedSection, Rel);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void RewriteInstance::handleRelocation(const SectionRef &RelocatedSection,
|
|
|
|
|
const RelocationRef &Rel) {
|
2018-03-20 14:34:58 -07:00
|
|
|
const bool IsAArch64 = BC->isAArch64();
|
2025-04-14 10:24:47 -07:00
|
|
|
const bool IsX86 = BC->isX86();
|
2018-01-24 05:42:11 -08:00
|
|
|
const bool IsFromCode = RelocatedSection.isText();
|
2025-04-14 10:24:47 -07:00
|
|
|
const bool IsWritable = BinarySection(*BC, RelocatedSection).isWritable();
|
2016-09-27 19:09:38 -07:00
|
|
|
|
2022-08-17 20:43:21 -07:00
|
|
|
SmallString<16> TypeName;
|
|
|
|
|
Rel.getTypeName(TypeName);
|
2025-03-14 18:15:59 +00:00
|
|
|
uint32_t RType = Relocation::getType(Rel);
|
2022-08-17 20:43:21 -07:00
|
|
|
if (Relocation::skipRelocationType(RType))
|
|
|
|
|
return;
|
2019-04-11 17:11:08 -07:00
|
|
|
|
2022-08-17 20:43:21 -07:00
|
|
|
// Adjust the relocation type as the linker might have skewed it.
|
2025-04-14 10:24:47 -07:00
|
|
|
if (IsX86 && (RType & ELF::R_X86_64_converted_reloc_bit)) {
|
2022-08-17 20:43:21 -07:00
|
|
|
if (opts::Verbosity >= 1)
|
|
|
|
|
dbgs() << "BOLT-WARNING: ignoring R_X86_64_converted_reloc_bit\n";
|
|
|
|
|
RType &= ~ELF::R_X86_64_converted_reloc_bit;
|
|
|
|
|
}
|
2018-01-24 05:42:11 -08:00
|
|
|
|
2022-08-17 20:43:21 -07:00
|
|
|
if (Relocation::isTLS(RType)) {
|
|
|
|
|
// No special handling required for TLS relocations on X86.
|
2025-04-14 10:24:47 -07:00
|
|
|
if (IsX86)
|
2022-08-17 20:43:21 -07:00
|
|
|
return;
|
2021-09-02 21:04:33 +03:00
|
|
|
|
2023-10-05 08:53:51 +00:00
|
|
|
// The non-got related TLS relocations on AArch64 and RISC-V also could be
|
|
|
|
|
// skipped.
|
2022-08-17 20:43:21 -07:00
|
|
|
if (!Relocation::isGOT(RType))
|
|
|
|
|
return;
|
|
|
|
|
}
|
2019-06-28 09:21:27 -07:00
|
|
|
|
2022-08-17 20:43:21 -07:00
|
|
|
if (!IsAArch64 && BC->getDynamicRelocationAt(Rel.getOffset())) {
|
2022-08-17 20:45:05 -07:00
|
|
|
LLVM_DEBUG({
|
|
|
|
|
dbgs() << formatv("BOLT-DEBUG: address {0:x} has a ", Rel.getOffset())
|
|
|
|
|
<< "dynamic relocation against it. Ignoring static relocation.\n";
|
|
|
|
|
});
|
2022-08-17 20:43:21 -07:00
|
|
|
return;
|
|
|
|
|
}
|
2020-06-23 12:22:58 -07:00
|
|
|
|
2022-08-17 20:43:21 -07:00
|
|
|
std::string SymbolName;
|
|
|
|
|
uint64_t SymbolAddress;
|
|
|
|
|
int64_t Addend;
|
|
|
|
|
uint64_t ExtractedValue;
|
|
|
|
|
bool IsSectionRelocation;
|
|
|
|
|
bool Skip;
|
|
|
|
|
if (!analyzeRelocation(Rel, RType, SymbolName, IsSectionRelocation,
|
|
|
|
|
SymbolAddress, Addend, ExtractedValue, Skip)) {
|
2022-08-17 20:45:05 -07:00
|
|
|
LLVM_DEBUG({
|
|
|
|
|
dbgs() << "BOLT-WARNING: failed to analyze relocation @ offset = "
|
|
|
|
|
<< formatv("{0:x}; type name = {1}\n", Rel.getOffset(), TypeName);
|
|
|
|
|
});
|
2022-08-17 20:43:21 -07:00
|
|
|
++NumFailedRelocations;
|
|
|
|
|
return;
|
|
|
|
|
}
|
2021-09-08 13:37:19 +03:00
|
|
|
|
2022-08-17 20:43:21 -07:00
|
|
|
if (Skip) {
|
2022-08-17 20:45:05 -07:00
|
|
|
LLVM_DEBUG({
|
|
|
|
|
dbgs() << "BOLT-DEBUG: skipping relocation @ offset = "
|
|
|
|
|
<< formatv("{0:x}; type name = {1}\n", Rel.getOffset(), TypeName);
|
|
|
|
|
});
|
2022-08-17 20:43:21 -07:00
|
|
|
return;
|
|
|
|
|
}
|
2017-09-13 11:21:47 -07:00
|
|
|
|
2025-04-14 10:24:47 -07:00
|
|
|
if (!IsFromCode && !IsWritable && (IsX86 || IsAArch64) &&
|
|
|
|
|
Relocation::isPCRelative(RType)) {
|
|
|
|
|
BinaryData *BD = BC->getBinaryDataContainingAddress(Rel.getOffset());
|
|
|
|
|
if (BD && (BD->nameStartsWith("_ZTV") || // vtable
|
|
|
|
|
BD->nameStartsWith("_ZTCN"))) { // construction vtable
|
|
|
|
|
BinaryFunction *BF = BC->getBinaryFunctionContainingAddress(
|
|
|
|
|
SymbolAddress, /*CheckPastEnd*/ false, /*UseMaxSize*/ true);
|
2025-04-29 08:13:44 -07:00
|
|
|
if (BF) {
|
|
|
|
|
if (BF->getAddress() != SymbolAddress) {
|
|
|
|
|
BC->errs()
|
|
|
|
|
<< "BOLT-ERROR: the virtual function table entry at offset 0x"
|
|
|
|
|
<< Twine::utohexstr(Rel.getOffset())
|
|
|
|
|
<< " points to the middle of a function @ 0x"
|
|
|
|
|
<< Twine::utohexstr(BF->getAddress()) << "\n";
|
|
|
|
|
exit(1);
|
|
|
|
|
}
|
|
|
|
|
BC->addRelocation(Rel.getOffset(), BF->getSymbol(), RType, Addend,
|
|
|
|
|
ExtractedValue);
|
|
|
|
|
return;
|
2025-04-14 10:24:47 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2022-08-17 20:43:21 -07:00
|
|
|
const uint64_t Address = SymbolAddress + Addend;
|
2016-09-27 19:09:38 -07:00
|
|
|
|
2022-08-17 20:43:21 -07:00
|
|
|
LLVM_DEBUG({
|
|
|
|
|
dbgs() << "BOLT-DEBUG: ";
|
|
|
|
|
printRelocationInfo(Rel, SymbolName, SymbolAddress, Addend, ExtractedValue);
|
|
|
|
|
});
|
2016-09-27 19:09:38 -07:00
|
|
|
|
2022-08-17 20:43:21 -07:00
|
|
|
BinaryFunction *ContainingBF = nullptr;
|
|
|
|
|
if (IsFromCode) {
|
|
|
|
|
ContainingBF =
|
|
|
|
|
BC->getBinaryFunctionContainingAddress(Rel.getOffset(),
|
|
|
|
|
/*CheckPastEnd*/ false,
|
|
|
|
|
/*UseMaxSize*/ true);
|
|
|
|
|
assert(ContainingBF && "cannot find function for address in code");
|
|
|
|
|
if (!IsAArch64 && !ContainingBF->containsAddress(Rel.getOffset())) {
|
|
|
|
|
if (opts::Verbosity >= 1)
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->outs() << formatv(
|
|
|
|
|
"BOLT-INFO: {0} has relocations in padding area\n", *ContainingBF);
|
2022-08-17 20:43:21 -07:00
|
|
|
ContainingBF->setSize(ContainingBF->getMaxSize());
|
|
|
|
|
ContainingBF->setSimple(false);
|
|
|
|
|
return;
|
2016-09-27 19:09:38 -07:00
|
|
|
}
|
2022-08-17 20:43:21 -07:00
|
|
|
}
|
2016-09-27 19:09:38 -07:00
|
|
|
|
2022-08-17 20:43:21 -07:00
|
|
|
MCSymbol *ReferencedSymbol = nullptr;
|
2023-08-17 18:14:53 -07:00
|
|
|
if (!IsSectionRelocation) {
|
2025-03-14 18:44:33 -07:00
|
|
|
if (BinaryData *BD = BC->getBinaryDataByName(SymbolName)) {
|
2022-08-17 20:43:21 -07:00
|
|
|
ReferencedSymbol = BD->getSymbol();
|
2025-03-14 18:44:33 -07:00
|
|
|
} else if (BC->isGOTSymbol(SymbolName)) {
|
2023-08-17 18:14:53 -07:00
|
|
|
if (BinaryData *BD = BC->getGOTSymbol())
|
|
|
|
|
ReferencedSymbol = BD->getSymbol();
|
2025-03-14 18:44:33 -07:00
|
|
|
} else if (BinaryData *BD = BC->getBinaryDataAtAddress(SymbolAddress)) {
|
|
|
|
|
ReferencedSymbol = BD->getSymbol();
|
|
|
|
|
}
|
2023-08-17 18:14:53 -07:00
|
|
|
}
|
2022-04-12 18:42:19 -07:00
|
|
|
|
2023-02-07 23:23:44 +03:00
|
|
|
ErrorOr<BinarySection &> ReferencedSection{std::errc::bad_address};
|
|
|
|
|
symbol_iterator SymbolIter = Rel.getSymbol();
|
|
|
|
|
if (SymbolIter != InputFile->symbol_end()) {
|
|
|
|
|
SymbolRef Symbol = *SymbolIter;
|
|
|
|
|
section_iterator Section =
|
|
|
|
|
cantFail(Symbol.getSection(), "cannot get symbol section");
|
|
|
|
|
if (Section != InputFile->section_end()) {
|
|
|
|
|
Expected<StringRef> SectionName = Section->getName();
|
|
|
|
|
if (SectionName && !SectionName->empty())
|
|
|
|
|
ReferencedSection = BC->getUniqueSectionByName(*SectionName);
|
2024-08-07 16:25:46 +04:00
|
|
|
} else if (BC->isRISCV() && ReferencedSymbol && ContainingBF &&
|
2023-06-16 11:49:19 +02:00
|
|
|
(cantFail(Symbol.getFlags()) & SymbolRef::SF_Absolute)) {
|
|
|
|
|
// This might be a relocation for an ABS symbols like __global_pointer$ on
|
|
|
|
|
// RISC-V
|
|
|
|
|
ContainingBF->addRelocation(Rel.getOffset(), ReferencedSymbol,
|
2025-03-14 18:15:59 +00:00
|
|
|
Relocation::getType(Rel), 0,
|
2023-06-16 11:49:19 +02:00
|
|
|
cantFail(Symbol.getValue()));
|
|
|
|
|
return;
|
2023-02-07 23:23:44 +03:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!ReferencedSection)
|
|
|
|
|
ReferencedSection = BC->getSectionForAddress(SymbolAddress);
|
2016-09-27 19:09:38 -07:00
|
|
|
|
2022-08-17 20:43:21 -07:00
|
|
|
const bool IsToCode = ReferencedSection && ReferencedSection->isText();
|
2016-09-27 19:09:38 -07:00
|
|
|
|
2022-08-17 20:43:21 -07:00
|
|
|
// Special handling of PC-relative relocations.
|
2025-04-14 10:24:47 -07:00
|
|
|
if (IsX86 && Relocation::isPCRelative(RType)) {
|
2022-08-17 20:43:21 -07:00
|
|
|
if (!IsFromCode && IsToCode) {
|
|
|
|
|
// PC-relative relocations from data to code are tricky since the
|
|
|
|
|
// original information is typically lost after linking, even with
|
|
|
|
|
// '--emit-relocs'. Such relocations are normally used by PIC-style
|
|
|
|
|
// jump tables and they reference both the jump table and jump
|
|
|
|
|
// targets by computing the difference between the two. If we blindly
|
|
|
|
|
// apply the relocation, it will appear that it references an arbitrary
|
|
|
|
|
// location in the code, possibly in a different function from the one
|
|
|
|
|
// containing the jump table.
|
|
|
|
|
//
|
|
|
|
|
// For that reason, we only register the fact that there is a
|
|
|
|
|
// PC-relative relocation at a given address against the code.
|
|
|
|
|
// The actual referenced label/address will be determined during jump
|
|
|
|
|
// table analysis.
|
|
|
|
|
BC->addPCRelativeDataRelocation(Rel.getOffset());
|
|
|
|
|
} else if (ContainingBF && !IsSectionRelocation && ReferencedSymbol) {
|
|
|
|
|
// If we know the referenced symbol, register the relocation from
|
|
|
|
|
// the code. It's required to properly handle cases where
|
|
|
|
|
// "symbol + addend" references an object different from "symbol".
|
|
|
|
|
ContainingBF->addRelocation(Rel.getOffset(), ReferencedSymbol, RType,
|
|
|
|
|
Addend, ExtractedValue);
|
|
|
|
|
} else {
|
2022-08-17 20:45:05 -07:00
|
|
|
LLVM_DEBUG({
|
|
|
|
|
dbgs() << "BOLT-DEBUG: not creating PC-relative relocation at"
|
|
|
|
|
<< formatv("{0:x} for {1}\n", Rel.getOffset(), SymbolName);
|
|
|
|
|
});
|
2022-04-12 18:42:19 -07:00
|
|
|
}
|
2016-09-27 19:09:38 -07:00
|
|
|
|
2022-08-17 20:43:21 -07:00
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool ForceRelocation = BC->forceSymbolRelocations(SymbolName);
|
2023-06-16 11:49:19 +02:00
|
|
|
if ((BC->isAArch64() || BC->isRISCV()) && Relocation::isGOT(RType))
|
2022-08-17 20:43:21 -07:00
|
|
|
ForceRelocation = true;
|
|
|
|
|
|
|
|
|
|
if (!ReferencedSection && !ForceRelocation) {
|
|
|
|
|
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: cannot determine referenced section.\n");
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Occasionally we may see a reference past the last byte of the function
|
|
|
|
|
// typically as a result of __builtin_unreachable(). Check it here.
|
|
|
|
|
BinaryFunction *ReferencedBF = BC->getBinaryFunctionContainingAddress(
|
|
|
|
|
Address, /*CheckPastEnd*/ true, /*UseMaxSize*/ IsAArch64);
|
|
|
|
|
|
|
|
|
|
if (!IsSectionRelocation) {
|
|
|
|
|
if (BinaryFunction *BF =
|
|
|
|
|
BC->getBinaryFunctionContainingAddress(SymbolAddress)) {
|
|
|
|
|
if (BF != ReferencedBF) {
|
|
|
|
|
// It's possible we are referencing a function without referencing any
|
|
|
|
|
// code, e.g. when taking a bitmask action on a function address.
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->errs()
|
|
|
|
|
<< "BOLT-WARNING: non-standard function reference (e.g. bitmask)"
|
|
|
|
|
<< formatv(" detected against function {0} from ", *BF);
|
2022-08-17 20:43:21 -07:00
|
|
|
if (IsFromCode)
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->errs() << formatv("function {0}\n", *ContainingBF);
|
2022-08-17 20:43:21 -07:00
|
|
|
else
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->errs() << formatv("data section at {0:x}\n", Rel.getOffset());
|
2022-08-17 20:43:21 -07:00
|
|
|
LLVM_DEBUG(printRelocationInfo(Rel, SymbolName, SymbolAddress, Addend,
|
|
|
|
|
ExtractedValue));
|
|
|
|
|
ReferencedBF = BF;
|
2019-06-27 03:20:17 -07:00
|
|
|
}
|
2018-05-14 11:10:26 -07:00
|
|
|
}
|
2022-08-17 20:43:21 -07:00
|
|
|
} else if (ReferencedBF) {
|
|
|
|
|
assert(ReferencedSection && "section expected for section relocation");
|
|
|
|
|
if (*ReferencedBF->getOriginSection() != *ReferencedSection) {
|
|
|
|
|
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ignoring false function reference\n");
|
|
|
|
|
ReferencedBF = nullptr;
|
|
|
|
|
}
|
|
|
|
|
}
|
2018-05-14 11:10:26 -07:00
|
|
|
|
2022-08-17 20:43:21 -07:00
|
|
|
// Workaround for a member function pointer de-virtualization bug. We check
|
|
|
|
|
// if a non-pc-relative relocation in the code is pointing to (fptr - 1).
|
|
|
|
|
if (IsToCode && ContainingBF && !Relocation::isPCRelative(RType) &&
|
|
|
|
|
(!ReferencedBF || (ReferencedBF->getAddress() != Address))) {
|
|
|
|
|
if (const BinaryFunction *RogueBF =
|
|
|
|
|
BC->getBinaryFunctionAtAddress(Address + 1)) {
|
|
|
|
|
// Do an extra check that the function was referenced previously.
|
|
|
|
|
// It's a linear search, but it should rarely happen.
|
2023-01-18 14:07:48 -08:00
|
|
|
auto CheckReloc = [&](const Relocation &Rel) {
|
|
|
|
|
return Rel.Symbol == RogueBF->getSymbol() &&
|
|
|
|
|
!Relocation::isPCRelative(Rel.Type);
|
|
|
|
|
};
|
|
|
|
|
bool Found = llvm::any_of(
|
|
|
|
|
llvm::make_second_range(ContainingBF->Relocations), CheckReloc);
|
2019-09-17 14:24:31 -07:00
|
|
|
|
2022-08-17 20:43:21 -07:00
|
|
|
if (Found) {
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->errs()
|
|
|
|
|
<< "BOLT-WARNING: detected possible compiler de-virtualization "
|
|
|
|
|
"bug: -1 addend used with non-pc-relative relocation against "
|
|
|
|
|
<< formatv("function {0} in function {1}\n", *RogueBF,
|
|
|
|
|
*ContainingBF);
|
2022-08-17 20:43:21 -07:00
|
|
|
return;
|
2019-09-17 14:24:31 -07:00
|
|
|
}
|
|
|
|
|
}
|
2022-08-17 20:43:21 -07:00
|
|
|
}
|
2019-09-17 14:24:31 -07:00
|
|
|
|
2025-03-14 18:44:33 -07:00
|
|
|
if (ForceRelocation && !ReferencedBF) {
|
|
|
|
|
// Create the relocation symbol if it's not defined in the binary.
|
|
|
|
|
if (SymbolAddress == 0)
|
|
|
|
|
ReferencedSymbol = BC->registerNameAtAddress(SymbolName, 0, 0, 0);
|
|
|
|
|
|
2025-04-04 11:42:21 -07:00
|
|
|
LLVM_DEBUG(
|
|
|
|
|
dbgs() << "BOLT-DEBUG: forcing relocation against symbol "
|
|
|
|
|
<< (ReferencedSymbol ? ReferencedSymbol->getName() : "<none>")
|
|
|
|
|
<< " with addend " << Addend << '\n');
|
2022-08-17 20:43:21 -07:00
|
|
|
} else if (ReferencedBF) {
|
|
|
|
|
ReferencedSymbol = ReferencedBF->getSymbol();
|
|
|
|
|
uint64_t RefFunctionOffset = 0;
|
|
|
|
|
|
|
|
|
|
// Adjust the point of reference to a code location inside a function.
|
|
|
|
|
if (ReferencedBF->containsAddress(Address, /*UseMaxSize = */ true)) {
|
|
|
|
|
RefFunctionOffset = Address - ReferencedBF->getAddress();
|
[BOLT] Improve handling of relocations targeting specific instructions (#66395)
On RISC-V, there are certain relocations that target a specific
instruction instead of a more abstract location like a function or basic
block. Take the following example that loads a value from symbol `foo`:
```
nop
1: auipc t0, %pcrel_hi(foo)
ld t0, %pcrel_lo(1b)(t0)
```
This results in two relocation:
- auipc: `R_RISCV_PCREL_HI20` referencing `foo`;
- ld: `R_RISCV_PCREL_LO12_I` referencing to local label `1` which points
to the auipc instruction.
It is of utmost importance that the `R_RISCV_PCREL_LO12_I` keeps
referring to the auipc instruction; if not, the program will fail to
assemble. However, BOLT currently does not guarantee this.
BOLT currently assumes that all local symbols are jump targets and
always starts a new basic block at symbol locations. The example above
results in a CFG the looks like this:
```
.BB0:
nop
.BB1:
auipc t0, %pcrel_hi(foo)
ld t0, %pcrel_lo(.BB1)(t0)
```
While this currently works (i.e., the `R_RISCV_PCREL_LO12_I` relocation
points to the correct instruction), it has two downsides:
- Too many basic blocks are created (the example above is logically only
one yet two are created);
- If instructions are inserted in `.BB1` (e.g., by instrumentation),
things will break since the label will not point to the auipc anymore.
This patch proposes to fix this issue by teaching BOLT to track labels
that should always point to a specific instruction. This is implemented
as follows:
- Add a new annotation type (`kLabel`) that allows us to annotate
instructions with an `MCSymbol *`;
- Whenever we encounter a relocation type that is used to refer to a
specific instruction (`Relocation::isInstructionReference`), we
register it without a symbol;
- During disassembly, whenever we encounter an instruction with such a
relocation, create a symbol for its target and store it in an offset
to symbol map (to ensure multiple relocations referencing the same
instruction use the same label);
- After disassembly, iterate this map to attach labels to instructions
via the new annotation type;
- During emission, emit these labels right before the instruction.
I believe the use of annotations works quite well for this use case as
it allows us to reliably track instruction labels. If we were to store
them as offsets in basic blocks, it would be error prone to keep them
updated whenever instructions are inserted or removed.
I have chosen to add labels as first-class annotations (as opposed to a
generic one) because the documentation of `MCAnnotation` suggests that
generic annotations are to be used for optional metadata that can be
discarded without affecting correctness. As this is not the case for
labels, a first-class annotation seemed more appropriate.
2023-10-06 06:46:16 +00:00
|
|
|
if (Relocation::isInstructionReference(RType)) {
|
|
|
|
|
// Instruction labels are created while disassembling so we just leave
|
|
|
|
|
// the symbol empty for now. Since the extracted value is typically
|
|
|
|
|
// unrelated to the referenced symbol (e.g., %pcrel_lo in RISC-V
|
|
|
|
|
// references an instruction but the patched value references the low
|
|
|
|
|
// bits of a data address), we set the extracted value to the symbol
|
|
|
|
|
// address in order to be able to correctly reconstruct the reference
|
|
|
|
|
// later.
|
|
|
|
|
ReferencedSymbol = nullptr;
|
|
|
|
|
ExtractedValue = Address;
|
|
|
|
|
} else if (RefFunctionOffset) {
|
2022-08-17 20:43:21 -07:00
|
|
|
if (ContainingBF && ContainingBF != ReferencedBF) {
|
|
|
|
|
ReferencedSymbol =
|
|
|
|
|
ReferencedBF->addEntryPointAtOffset(RefFunctionOffset);
|
|
|
|
|
} else {
|
|
|
|
|
ReferencedSymbol =
|
|
|
|
|
ReferencedBF->getOrCreateLocalLabel(Address,
|
|
|
|
|
/*CreatePastEnd =*/true);
|
2023-06-22 09:33:32 +02:00
|
|
|
|
|
|
|
|
// If ContainingBF != nullptr, it equals ReferencedBF (see
|
|
|
|
|
// if-condition above) so we're handling a relocation from a function
|
|
|
|
|
// to itself. RISC-V uses such relocations for branches, for example.
|
|
|
|
|
// These should not be registered as externally references offsets.
|
|
|
|
|
if (!ContainingBF)
|
|
|
|
|
ReferencedBF->registerReferencedOffset(RefFunctionOffset);
|
2018-05-14 11:10:26 -07:00
|
|
|
}
|
2022-08-17 20:43:21 -07:00
|
|
|
if (opts::Verbosity > 1 &&
|
2023-01-18 14:21:28 -08:00
|
|
|
BinarySection(*BC, RelocatedSection).isWritable())
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->errs()
|
|
|
|
|
<< "BOLT-WARNING: writable reference into the middle of the "
|
|
|
|
|
<< formatv("function {0} detected at address {1:x}\n",
|
|
|
|
|
*ReferencedBF, Rel.getOffset());
|
2016-09-27 19:09:38 -07:00
|
|
|
}
|
2022-08-17 20:43:21 -07:00
|
|
|
SymbolAddress = Address;
|
|
|
|
|
Addend = 0;
|
|
|
|
|
}
|
|
|
|
|
LLVM_DEBUG({
|
|
|
|
|
dbgs() << " referenced function " << *ReferencedBF;
|
|
|
|
|
if (Address != ReferencedBF->getAddress())
|
2022-08-17 20:45:05 -07:00
|
|
|
dbgs() << formatv(" at offset {0:x}", RefFunctionOffset);
|
2022-08-17 20:43:21 -07:00
|
|
|
dbgs() << '\n';
|
|
|
|
|
});
|
|
|
|
|
} else {
|
|
|
|
|
if (IsToCode && SymbolAddress) {
|
|
|
|
|
// This can happen e.g. with PIC-style jump tables.
|
|
|
|
|
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: no corresponding function for "
|
|
|
|
|
"relocation against code\n");
|
|
|
|
|
}
|
2017-11-14 20:05:11 -08:00
|
|
|
|
2022-08-17 20:43:21 -07:00
|
|
|
// In AArch64 there are zero reasons to keep a reference to the
|
|
|
|
|
// "original" symbol plus addend. The original symbol is probably just a
|
|
|
|
|
// section symbol. If we are here, this means we are probably accessing
|
|
|
|
|
// data, so it is imperative to keep the original address.
|
|
|
|
|
if (IsAArch64) {
|
2022-08-17 20:45:05 -07:00
|
|
|
SymbolName = formatv("SYMBOLat{0:x}", Address);
|
2022-08-17 20:43:21 -07:00
|
|
|
SymbolAddress = Address;
|
|
|
|
|
Addend = 0;
|
|
|
|
|
}
|
2018-03-20 14:34:58 -07:00
|
|
|
|
2022-08-17 20:43:21 -07:00
|
|
|
if (BinaryData *BD = BC->getBinaryDataContainingAddress(SymbolAddress)) {
|
|
|
|
|
// Note: this assertion is trying to check sanity of BinaryData objects
|
2025-04-17 14:01:00 +08:00
|
|
|
// but AArch64 and RISCV has inferred and incomplete object locations
|
|
|
|
|
// coming from GOT/TLS or any other non-trivial relocation (that requires
|
|
|
|
|
// creation of sections and whose symbol address is not really what should
|
|
|
|
|
// be encoded in the instruction). So we essentially disabled this check
|
2022-08-17 20:43:21 -07:00
|
|
|
// for AArch64 and live with bogus names for objects.
|
2025-04-17 14:01:00 +08:00
|
|
|
assert((IsAArch64 || BC->isRISCV() || IsSectionRelocation ||
|
2022-08-17 20:43:21 -07:00
|
|
|
BD->nameStartsWith(SymbolName) ||
|
|
|
|
|
BD->nameStartsWith("PG" + SymbolName) ||
|
|
|
|
|
(BD->nameStartsWith("ANONYMOUS") &&
|
2023-12-13 23:34:49 -08:00
|
|
|
(BD->getSectionName().starts_with(".plt") ||
|
|
|
|
|
BD->getSectionName().ends_with(".plt")))) &&
|
2022-08-17 20:45:05 -07:00
|
|
|
"BOLT symbol names of all non-section relocations must match up "
|
|
|
|
|
"with symbol names referenced in the relocation");
|
2022-08-17 20:43:21 -07:00
|
|
|
|
|
|
|
|
if (IsSectionRelocation)
|
|
|
|
|
BC->markAmbiguousRelocations(*BD, Address);
|
|
|
|
|
|
|
|
|
|
ReferencedSymbol = BD->getSymbol();
|
|
|
|
|
Addend += (SymbolAddress - BD->getAddress());
|
|
|
|
|
SymbolAddress = BD->getAddress();
|
|
|
|
|
assert(Address == SymbolAddress + Addend);
|
|
|
|
|
} else {
|
|
|
|
|
// These are mostly local data symbols but undefined symbols
|
|
|
|
|
// in relocation sections can get through here too, from .plt.
|
|
|
|
|
assert(
|
2023-06-16 11:49:19 +02:00
|
|
|
(IsAArch64 || BC->isRISCV() || IsSectionRelocation ||
|
2023-12-13 23:34:49 -08:00
|
|
|
BC->getSectionNameForAddress(SymbolAddress)->starts_with(".plt")) &&
|
2022-08-17 20:43:21 -07:00
|
|
|
"known symbols should not resolve to anonymous locals");
|
|
|
|
|
|
|
|
|
|
if (IsSectionRelocation) {
|
|
|
|
|
ReferencedSymbol =
|
|
|
|
|
BC->getOrCreateGlobalSymbol(SymbolAddress, "SYMBOLat");
|
2017-11-14 20:05:11 -08:00
|
|
|
} else {
|
2022-08-17 20:43:21 -07:00
|
|
|
SymbolRef Symbol = *Rel.getSymbol();
|
|
|
|
|
const uint64_t SymbolSize =
|
|
|
|
|
IsAArch64 ? 0 : ELFSymbolRef(Symbol).getSize();
|
|
|
|
|
const uint64_t SymbolAlignment = IsAArch64 ? 1 : Symbol.getAlignment();
|
|
|
|
|
const uint32_t SymbolFlags = cantFail(Symbol.getFlags());
|
|
|
|
|
std::string Name;
|
|
|
|
|
if (SymbolFlags & SymbolRef::SF_Global) {
|
|
|
|
|
Name = SymbolName;
|
2018-09-21 12:00:20 -07:00
|
|
|
} else {
|
2022-08-17 20:43:21 -07:00
|
|
|
if (StringRef(SymbolName)
|
2023-12-13 23:34:49 -08:00
|
|
|
.starts_with(BC->AsmInfo->getPrivateGlobalPrefix()))
|
2022-08-17 20:43:21 -07:00
|
|
|
Name = NR.uniquify("PG" + SymbolName);
|
|
|
|
|
else
|
|
|
|
|
Name = NR.uniquify(SymbolName);
|
2018-04-20 20:03:31 -07:00
|
|
|
}
|
2022-08-17 20:43:21 -07:00
|
|
|
ReferencedSymbol = BC->registerNameAtAddress(
|
|
|
|
|
Name, SymbolAddress, SymbolSize, SymbolAlignment, SymbolFlags);
|
|
|
|
|
}
|
2018-04-20 20:03:31 -07:00
|
|
|
|
2022-08-17 20:43:21 -07:00
|
|
|
if (IsSectionRelocation) {
|
|
|
|
|
BinaryData *BD = BC->getBinaryDataByName(ReferencedSymbol->getName());
|
|
|
|
|
BC->markAmbiguousRelocations(*BD, Address);
|
2017-11-14 20:05:11 -08:00
|
|
|
}
|
2016-09-27 19:09:38 -07:00
|
|
|
}
|
2022-08-17 20:43:21 -07:00
|
|
|
}
|
2016-09-27 19:09:38 -07:00
|
|
|
|
2022-08-17 20:43:21 -07:00
|
|
|
auto checkMaxDataRelocations = [&]() {
|
|
|
|
|
++NumDataRelocations;
|
2023-01-18 14:07:48 -08:00
|
|
|
LLVM_DEBUG(if (opts::MaxDataRelocations &&
|
|
|
|
|
NumDataRelocations + 1 == opts::MaxDataRelocations) {
|
|
|
|
|
dbgs() << "BOLT-DEBUG: processing ending on data relocation "
|
|
|
|
|
<< NumDataRelocations << ": ";
|
2022-08-17 20:43:21 -07:00
|
|
|
printRelocationInfo(Rel, ReferencedSymbol->getName(), SymbolAddress,
|
|
|
|
|
Addend, ExtractedValue);
|
2023-01-18 14:07:48 -08:00
|
|
|
});
|
2017-11-14 20:05:11 -08:00
|
|
|
|
2022-08-17 20:43:21 -07:00
|
|
|
return (!opts::MaxDataRelocations ||
|
|
|
|
|
NumDataRelocations < opts::MaxDataRelocations);
|
|
|
|
|
};
|
2017-11-14 20:05:11 -08:00
|
|
|
|
2022-08-17 20:43:21 -07:00
|
|
|
if ((ReferencedSection && refersToReorderedSection(ReferencedSection)) ||
|
2023-06-22 09:32:32 +02:00
|
|
|
(opts::ForceToDataRelocations && checkMaxDataRelocations()) ||
|
|
|
|
|
// RISC-V has ADD/SUB data-to-data relocations
|
|
|
|
|
BC->isRISCV())
|
2022-08-17 20:43:21 -07:00
|
|
|
ForceRelocation = true;
|
2018-07-12 10:13:03 -07:00
|
|
|
|
2024-04-15 13:12:53 -04:00
|
|
|
if (IsFromCode)
|
2022-08-17 20:43:21 -07:00
|
|
|
ContainingBF->addRelocation(Rel.getOffset(), ReferencedSymbol, RType,
|
|
|
|
|
Addend, ExtractedValue);
|
2024-04-15 13:12:53 -04:00
|
|
|
else if (IsToCode || ForceRelocation)
|
2022-08-17 20:43:21 -07:00
|
|
|
BC->addRelocation(Rel.getOffset(), ReferencedSymbol, RType, Addend,
|
|
|
|
|
ExtractedValue);
|
2024-04-15 13:12:53 -04:00
|
|
|
else
|
2022-08-17 20:43:21 -07:00
|
|
|
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ignoring relocation from data to data\n");
|
2016-09-27 19:09:38 -07:00
|
|
|
}
|
|
|
|
|
|
[BOLT] Skip _init; avoiding GOT breakage for static binaries (#117751)
_init is used during startup of binaires. Unfortunately, its
address can be shared (at least on AArch64 glibc static binaries) with a
data
reference that lives in the GOT. The GOT rewriting is currently unable
to distinguish between data addresses and function addresses. This leads
to the data address being incorrectly rewritten, causing a crash on
startup of the binary:
Unexpected reloc type in static binary.
To avoid this, don't consider _init for being moved, by skipping it.
~We could add further conditions to narrow the skipped case for known
crashes, but as a straw man I thought it'd be best to keep the condition
as simple as possible and see if there any objections to this.~
(Edit: this broke the test
bolt/test/runtime/X86/retpoline-synthetic.test,
because _init was skipped from the retpoline pass and it has an indirect
call in it, so I include a check for static binaries now, which avoids
the test failure,
but perhaps this could/should be narrowed further?)
For now, skip _init for static binaries on any architecture; we could
add further conditions to narrow the skipped case for known crashes, but
as a straw man I thought it'd be best to keep the condition as simple as
possible and see if there any objections to this.
Updates #100096.
2024-11-28 14:59:07 +00:00
|
|
|
static BinaryFunction *getInitFunctionIfStaticBinary(BinaryContext &BC) {
|
|
|
|
|
// Workaround for https://github.com/llvm/llvm-project/issues/100096
|
|
|
|
|
// ("[BOLT] GOT array pointer incorrectly rewritten"). In aarch64
|
|
|
|
|
// static glibc binaries, the .init section's _init function pointer can
|
|
|
|
|
// alias with a data pointer for the end of an array. GOT rewriting
|
|
|
|
|
// currently can't detect this and updates the data pointer to the
|
|
|
|
|
// moved _init, causing a runtime crash. Skipping _init on the other
|
|
|
|
|
// hand should be harmless.
|
|
|
|
|
if (!BC.IsStaticExecutable)
|
|
|
|
|
return nullptr;
|
|
|
|
|
const BinaryData *BD = BC.getBinaryDataByName("_init");
|
|
|
|
|
if (!BD || BD->getSectionName() != ".init")
|
|
|
|
|
return nullptr;
|
|
|
|
|
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: skip _init in for GOT workaround.\n");
|
|
|
|
|
return BC.getBinaryFunctionAtAddress(BD->getAddress());
|
|
|
|
|
}
|
|
|
|
|
|
2020-05-03 13:54:45 -07:00
|
|
|
void RewriteInstance::selectFunctionsToProcess() {
|
|
|
|
|
// Extend the list of functions to process or skip from a file.
|
|
|
|
|
auto populateFunctionNames = [](cl::opt<std::string> &FunctionNamesFile,
|
|
|
|
|
cl::list<std::string> &FunctionNames) {
|
|
|
|
|
if (FunctionNamesFile.empty())
|
|
|
|
|
return;
|
|
|
|
|
std::ifstream FuncsFile(FunctionNamesFile, std::ios::in);
|
|
|
|
|
std::string FuncName;
|
2021-12-23 12:38:33 -08:00
|
|
|
while (std::getline(FuncsFile, FuncName))
|
2020-05-03 13:54:45 -07:00
|
|
|
FunctionNames.push_back(FuncName);
|
|
|
|
|
};
|
|
|
|
|
populateFunctionNames(opts::FunctionNamesFile, opts::ForceFunctionNames);
|
|
|
|
|
populateFunctionNames(opts::SkipFunctionNamesFile, opts::SkipFunctionNames);
|
2021-06-04 18:49:29 -07:00
|
|
|
populateFunctionNames(opts::FunctionNamesFileNR, opts::ForceFunctionNamesNR);
|
2020-05-03 13:54:45 -07:00
|
|
|
|
2021-06-04 18:49:29 -07:00
|
|
|
// Make a set of functions to process to speed up lookups.
|
|
|
|
|
std::unordered_set<std::string> ForceFunctionsNR(
|
|
|
|
|
opts::ForceFunctionNamesNR.begin(), opts::ForceFunctionNamesNR.end());
|
|
|
|
|
|
|
|
|
|
if ((!opts::ForceFunctionNames.empty() ||
|
|
|
|
|
!opts::ForceFunctionNamesNR.empty()) &&
|
|
|
|
|
!opts::SkipFunctionNames.empty()) {
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->errs()
|
|
|
|
|
<< "BOLT-ERROR: cannot select functions to process and skip at the "
|
|
|
|
|
"same time. Please use only one type of selection.\n";
|
2020-05-03 13:54:45 -07:00
|
|
|
exit(1);
|
|
|
|
|
}
|
|
|
|
|
|
2020-12-30 12:23:58 -08:00
|
|
|
uint64_t LiteThresholdExecCount = 0;
|
|
|
|
|
if (opts::LiteThresholdPct) {
|
|
|
|
|
if (opts::LiteThresholdPct > 100)
|
|
|
|
|
opts::LiteThresholdPct = 100;
|
2020-05-03 13:54:45 -07:00
|
|
|
|
2020-12-30 12:23:58 -08:00
|
|
|
std::vector<const BinaryFunction *> TopFunctions;
|
|
|
|
|
for (auto &BFI : BC->getBinaryFunctions()) {
|
|
|
|
|
const BinaryFunction &Function = BFI.second;
|
|
|
|
|
if (ProfileReader->mayHaveProfileData(Function))
|
|
|
|
|
TopFunctions.push_back(&Function);
|
|
|
|
|
}
|
2022-06-23 22:15:47 -07:00
|
|
|
llvm::sort(
|
|
|
|
|
TopFunctions, [](const BinaryFunction *A, const BinaryFunction *B) {
|
|
|
|
|
return A->getKnownExecutionCount() < B->getKnownExecutionCount();
|
|
|
|
|
});
|
2020-12-30 12:23:58 -08:00
|
|
|
|
|
|
|
|
size_t Index = TopFunctions.size() * opts::LiteThresholdPct / 100;
|
|
|
|
|
if (Index)
|
|
|
|
|
--Index;
|
|
|
|
|
LiteThresholdExecCount = TopFunctions[Index]->getKnownExecutionCount();
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->outs() << "BOLT-INFO: limiting processing to functions with at least "
|
|
|
|
|
<< LiteThresholdExecCount << " invocations\n";
|
2020-12-30 12:23:58 -08:00
|
|
|
}
|
2021-12-14 16:52:51 -08:00
|
|
|
LiteThresholdExecCount = std::max(
|
|
|
|
|
LiteThresholdExecCount, static_cast<uint64_t>(opts::LiteThresholdCount));
|
2020-12-30 12:23:58 -08:00
|
|
|
|
2022-12-28 20:49:30 -08:00
|
|
|
StringSet<> ReorderFunctionsUserSet;
|
2023-01-25 11:37:55 -08:00
|
|
|
StringSet<> ReorderFunctionsLTOCommonSet;
|
2022-12-28 20:49:30 -08:00
|
|
|
if (opts::ReorderFunctions == ReorderFunctions::RT_USER) {
|
2024-02-12 14:51:15 -08:00
|
|
|
std::vector<std::string> FunctionNames;
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->logBOLTErrorsAndQuitOnFatal(
|
|
|
|
|
ReorderFunctions::readFunctionOrderFile(FunctionNames));
|
2024-02-12 14:51:15 -08:00
|
|
|
for (const std::string &Function : FunctionNames) {
|
2022-12-28 20:49:30 -08:00
|
|
|
ReorderFunctionsUserSet.insert(Function);
|
2023-01-25 11:37:55 -08:00
|
|
|
if (std::optional<StringRef> LTOCommonName = getLTOCommonName(Function))
|
|
|
|
|
ReorderFunctionsLTOCommonSet.insert(*LTOCommonName);
|
|
|
|
|
}
|
2022-12-28 20:49:30 -08:00
|
|
|
}
|
|
|
|
|
|
2020-12-30 12:23:58 -08:00
|
|
|
uint64_t NumFunctionsToProcess = 0;
|
2023-02-08 19:11:13 -08:00
|
|
|
auto mustSkip = [&](const BinaryFunction &Function) {
|
2023-06-12 13:16:09 -07:00
|
|
|
if (opts::MaxFunctions.getNumOccurrences() &&
|
|
|
|
|
NumFunctionsToProcess >= opts::MaxFunctions)
|
2023-02-08 19:11:13 -08:00
|
|
|
return true;
|
|
|
|
|
for (std::string &Name : opts::SkipFunctionNames)
|
|
|
|
|
if (Function.hasNameRegex(Name))
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
|
|
return false;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
auto shouldProcess = [&](const BinaryFunction &Function) {
|
|
|
|
|
if (mustSkip(Function))
|
2020-05-03 13:54:45 -07:00
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
// If the list is not empty, only process functions from the list.
|
2021-06-04 18:49:29 -07:00
|
|
|
if (!opts::ForceFunctionNames.empty() || !ForceFunctionsNR.empty()) {
|
|
|
|
|
// Regex check (-funcs and -funcs-file options).
|
2021-12-23 12:38:33 -08:00
|
|
|
for (std::string &Name : opts::ForceFunctionNames)
|
|
|
|
|
if (Function.hasNameRegex(Name))
|
2020-05-03 13:54:45 -07:00
|
|
|
return true;
|
2021-12-23 12:38:33 -08:00
|
|
|
|
2021-06-04 18:49:29 -07:00
|
|
|
// Non-regex check (-funcs-no-regex and -funcs-file-no-regex).
|
2023-09-14 11:56:57 -07:00
|
|
|
for (const StringRef Name : Function.getNames())
|
|
|
|
|
if (ForceFunctionsNR.count(Name.str()))
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
|
|
return false;
|
2020-05-03 13:54:45 -07:00
|
|
|
}
|
|
|
|
|
|
2020-05-03 15:49:58 -07:00
|
|
|
if (opts::Lite) {
|
2022-12-28 20:49:30 -08:00
|
|
|
// Forcibly include functions specified in the -function-order file.
|
|
|
|
|
if (opts::ReorderFunctions == ReorderFunctions::RT_USER) {
|
2023-09-14 11:56:57 -07:00
|
|
|
for (const StringRef Name : Function.getNames())
|
|
|
|
|
if (ReorderFunctionsUserSet.contains(Name))
|
|
|
|
|
return true;
|
2023-01-25 11:37:55 -08:00
|
|
|
for (const StringRef Name : Function.getNames())
|
|
|
|
|
if (std::optional<StringRef> LTOCommonName = getLTOCommonName(Name))
|
|
|
|
|
if (ReorderFunctionsLTOCommonSet.contains(*LTOCommonName))
|
|
|
|
|
return true;
|
2022-12-28 20:49:30 -08:00
|
|
|
}
|
|
|
|
|
|
2020-12-30 12:23:58 -08:00
|
|
|
if (ProfileReader && !ProfileReader->mayHaveProfileData(Function))
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
if (Function.getKnownExecutionCount() < LiteThresholdExecCount)
|
2020-05-03 15:49:58 -07:00
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
2020-05-03 13:54:45 -07:00
|
|
|
return true;
|
|
|
|
|
};
|
|
|
|
|
|
[BOLT] Skip _init; avoiding GOT breakage for static binaries (#117751)
_init is used during startup of binaires. Unfortunately, its
address can be shared (at least on AArch64 glibc static binaries) with a
data
reference that lives in the GOT. The GOT rewriting is currently unable
to distinguish between data addresses and function addresses. This leads
to the data address being incorrectly rewritten, causing a crash on
startup of the binary:
Unexpected reloc type in static binary.
To avoid this, don't consider _init for being moved, by skipping it.
~We could add further conditions to narrow the skipped case for known
crashes, but as a straw man I thought it'd be best to keep the condition
as simple as possible and see if there any objections to this.~
(Edit: this broke the test
bolt/test/runtime/X86/retpoline-synthetic.test,
because _init was skipped from the retpoline pass and it has an indirect
call in it, so I include a check for static binaries now, which avoids
the test failure,
but perhaps this could/should be narrowed further?)
For now, skip _init for static binaries on any architecture; we could
add further conditions to narrow the skipped case for known crashes, but
as a straw man I thought it'd be best to keep the condition as simple as
possible and see if there any objections to this.
Updates #100096.
2024-11-28 14:59:07 +00:00
|
|
|
if (BinaryFunction *Init = getInitFunctionIfStaticBinary(*BC))
|
|
|
|
|
Init->setIgnored();
|
|
|
|
|
|
2020-05-03 13:54:45 -07:00
|
|
|
for (auto &BFI : BC->getBinaryFunctions()) {
|
2021-04-08 00:19:26 -07:00
|
|
|
BinaryFunction &Function = BFI.second;
|
2020-05-03 13:54:45 -07:00
|
|
|
|
2020-12-30 12:23:58 -08:00
|
|
|
// Pseudo functions are explicitly marked by us not to be processed.
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
if (Function.isPseudo()) {
|
|
|
|
|
Function.IsIgnored = true;
|
|
|
|
|
Function.HasExternalRefRelocations = true;
|
2020-05-03 13:54:45 -07:00
|
|
|
continue;
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
}
|
2020-05-03 13:54:45 -07:00
|
|
|
|
2023-02-08 19:11:13 -08:00
|
|
|
// Decide what to do with fragments after parent functions are processed.
|
|
|
|
|
if (Function.isFragment())
|
|
|
|
|
continue;
|
|
|
|
|
|
2020-05-03 13:54:45 -07:00
|
|
|
if (!shouldProcess(Function)) {
|
2023-02-08 19:11:13 -08:00
|
|
|
if (opts::Verbosity >= 1) {
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->outs() << "BOLT-INFO: skipping processing " << Function
|
|
|
|
|
<< " per user request\n";
|
2023-02-08 19:11:13 -08:00
|
|
|
}
|
|
|
|
|
Function.setIgnored();
|
|
|
|
|
} else {
|
|
|
|
|
++NumFunctionsToProcess;
|
2023-06-12 13:16:09 -07:00
|
|
|
if (opts::MaxFunctions.getNumOccurrences() &&
|
|
|
|
|
NumFunctionsToProcess == opts::MaxFunctions)
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->outs() << "BOLT-INFO: processing ending on " << Function << '\n';
|
2023-02-08 19:11:13 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!BC->HasSplitFunctions)
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
// Fragment overrides:
|
|
|
|
|
// - If the fragment must be skipped, then the parent must be skipped as well.
|
|
|
|
|
// Otherwise, fragment should follow the parent function:
|
|
|
|
|
// - if the parent is skipped, skip fragment,
|
|
|
|
|
// - if the parent is processed, process the fragment(s) as well.
|
|
|
|
|
for (auto &BFI : BC->getBinaryFunctions()) {
|
|
|
|
|
BinaryFunction &Function = BFI.second;
|
|
|
|
|
if (!Function.isFragment())
|
|
|
|
|
continue;
|
|
|
|
|
if (mustSkip(Function)) {
|
|
|
|
|
for (BinaryFunction *Parent : Function.ParentFragments) {
|
|
|
|
|
if (opts::Verbosity >= 1) {
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->outs() << "BOLT-INFO: skipping processing " << *Parent
|
|
|
|
|
<< " together with fragment function\n";
|
2023-02-08 19:11:13 -08:00
|
|
|
}
|
|
|
|
|
Parent->setIgnored();
|
|
|
|
|
--NumFunctionsToProcess;
|
|
|
|
|
}
|
|
|
|
|
Function.setIgnored();
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool IgnoredParent =
|
|
|
|
|
llvm::any_of(Function.ParentFragments, [&](BinaryFunction *Parent) {
|
|
|
|
|
return Parent->isIgnored();
|
|
|
|
|
});
|
|
|
|
|
if (IgnoredParent) {
|
|
|
|
|
if (opts::Verbosity >= 1) {
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->outs() << "BOLT-INFO: skipping processing " << Function
|
|
|
|
|
<< " together with parent function\n";
|
2023-02-08 19:11:13 -08:00
|
|
|
}
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
Function.setIgnored();
|
2020-05-03 13:54:45 -07:00
|
|
|
} else {
|
|
|
|
|
++NumFunctionsToProcess;
|
2023-02-08 19:11:13 -08:00
|
|
|
if (opts::Verbosity >= 1) {
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->outs() << "BOLT-INFO: processing " << Function
|
|
|
|
|
<< " as a sibling of non-ignored function\n";
|
2023-02-08 19:11:13 -08:00
|
|
|
}
|
2021-12-23 12:38:33 -08:00
|
|
|
if (opts::MaxFunctions && NumFunctionsToProcess == opts::MaxFunctions)
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->outs() << "BOLT-INFO: processing ending on " << Function << '\n';
|
2020-05-03 13:54:45 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-03-14 18:48:05 -07:00
|
|
|
void RewriteInstance::readDebugInfo() {
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
NamedRegionTimer T("readDebugInfo", "read debug info", TimerGroupName,
|
|
|
|
|
TimerGroupDesc, opts::TimeRewrite);
|
2016-03-14 18:48:05 -07:00
|
|
|
if (!opts::UpdateDebugSections)
|
|
|
|
|
return;
|
|
|
|
|
|
2019-04-03 15:52:01 -07:00
|
|
|
BC->preprocessDebugInfo();
|
2016-03-14 18:48:05 -07:00
|
|
|
}
|
|
|
|
|
|
2019-01-15 23:43:40 -08:00
|
|
|
void RewriteInstance::preprocessProfileData() {
|
2020-05-07 23:00:29 -07:00
|
|
|
if (!ProfileReader)
|
|
|
|
|
return;
|
|
|
|
|
|
2019-01-15 23:43:40 -08:00
|
|
|
NamedRegionTimer T("preprocessprofile", "pre-process profile data",
|
|
|
|
|
TimerGroupName, TimerGroupDesc, opts::TimeRewrite);
|
2020-05-07 23:00:29 -07:00
|
|
|
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->outs() << "BOLT-INFO: pre-processing profile using "
|
|
|
|
|
<< ProfileReader->getReaderName() << '\n';
|
2020-05-07 23:00:29 -07:00
|
|
|
|
|
|
|
|
if (BAT->enabledFor(InputFile)) {
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->outs() << "BOLT-INFO: profile collection done on a binary already "
|
|
|
|
|
"processed by BOLT\n";
|
2020-05-07 23:00:29 -07:00
|
|
|
ProfileReader->setBAT(&*BAT);
|
2020-05-03 15:49:58 -07:00
|
|
|
}
|
|
|
|
|
|
2025-05-10 13:39:15 -07:00
|
|
|
if (Error E = ProfileReader->preprocessProfile(*BC))
|
2020-05-07 23:00:29 -07:00
|
|
|
report_error("cannot pre-process profile", std::move(E));
|
|
|
|
|
|
2024-05-23 16:40:08 -07:00
|
|
|
if (!BC->hasSymbolsWithFileName() && ProfileReader->hasLocalsWithFileName() &&
|
|
|
|
|
!opts::AllowStripped) {
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->errs()
|
|
|
|
|
<< "BOLT-ERROR: input binary does not have local file symbols "
|
|
|
|
|
"but profile data includes function names with embedded file "
|
|
|
|
|
"names. It appears that the input binary was stripped while a "
|
2024-05-23 16:40:08 -07:00
|
|
|
"profiled binary was not. If you know what you are doing and "
|
|
|
|
|
"wish to proceed, use -allow-stripped option.\n";
|
2020-05-07 23:00:29 -07:00
|
|
|
exit(1);
|
2019-04-12 17:33:46 -07:00
|
|
|
}
|
2019-01-15 23:43:40 -08:00
|
|
|
}
|
|
|
|
|
|
2023-06-27 22:55:53 -07:00
|
|
|
void RewriteInstance::initializeMetadataManager() {
|
2024-01-30 18:04:29 -08:00
|
|
|
if (BC->IsLinuxKernel)
|
2023-06-28 14:35:05 -07:00
|
|
|
MetadataManager.registerRewriter(createLinuxKernelRewriter(*BC));
|
|
|
|
|
|
2024-06-03 21:39:47 -07:00
|
|
|
MetadataManager.registerRewriter(createBuildIDRewriter(*BC));
|
|
|
|
|
|
2023-06-27 23:37:14 -07:00
|
|
|
MetadataManager.registerRewriter(createPseudoProbeRewriter(*BC));
|
|
|
|
|
|
2023-06-27 22:56:47 -07:00
|
|
|
MetadataManager.registerRewriter(createSDTRewriter(*BC));
|
2023-06-27 22:55:53 -07:00
|
|
|
}
|
|
|
|
|
|
2024-06-03 21:39:47 -07:00
|
|
|
void RewriteInstance::processSectionMetadata() {
|
2024-07-31 22:12:34 -07:00
|
|
|
NamedRegionTimer T("processmetadata-section", "process section metadata",
|
|
|
|
|
TimerGroupName, TimerGroupDesc, opts::TimeRewrite);
|
2023-06-27 22:55:53 -07:00
|
|
|
initializeMetadataManager();
|
|
|
|
|
|
2024-06-03 21:39:47 -07:00
|
|
|
MetadataManager.runSectionInitializers();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void RewriteInstance::processMetadataPreCFG() {
|
2024-07-31 22:12:34 -07:00
|
|
|
NamedRegionTimer T("processmetadata-precfg", "process metadata pre-CFG",
|
|
|
|
|
TimerGroupName, TimerGroupDesc, opts::TimeRewrite);
|
2023-06-27 22:55:53 -07:00
|
|
|
MetadataManager.runInitializersPreCFG();
|
|
|
|
|
|
|
|
|
|
processProfileDataPreCFG();
|
|
|
|
|
}
|
|
|
|
|
|
2023-07-12 21:36:29 -07:00
|
|
|
void RewriteInstance::processMetadataPostCFG() {
|
2024-07-31 22:12:34 -07:00
|
|
|
NamedRegionTimer T("processmetadata-postcfg", "process metadata post-CFG",
|
|
|
|
|
TimerGroupName, TimerGroupDesc, opts::TimeRewrite);
|
2023-07-12 21:36:29 -07:00
|
|
|
MetadataManager.runInitializersPostCFG();
|
|
|
|
|
}
|
|
|
|
|
|
2020-05-07 23:00:29 -07:00
|
|
|
void RewriteInstance::processProfileDataPreCFG() {
|
|
|
|
|
if (!ProfileReader)
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
NamedRegionTimer T("processprofile-precfg", "process profile data pre-CFG",
|
|
|
|
|
TimerGroupName, TimerGroupDesc, opts::TimeRewrite);
|
|
|
|
|
|
2025-05-10 13:39:15 -07:00
|
|
|
if (Error E = ProfileReader->readProfilePreCFG(*BC))
|
2020-05-07 23:00:29 -07:00
|
|
|
report_error("cannot read profile pre-CFG", std::move(E));
|
|
|
|
|
}
|
|
|
|
|
|
2017-12-13 23:12:01 -08:00
|
|
|
void RewriteInstance::processProfileData() {
|
2020-05-07 23:00:29 -07:00
|
|
|
if (!ProfileReader)
|
|
|
|
|
return;
|
|
|
|
|
|
2019-01-15 23:43:40 -08:00
|
|
|
NamedRegionTimer T("processprofile", "process profile data", TimerGroupName,
|
|
|
|
|
TimerGroupDesc, opts::TimeRewrite);
|
2020-05-03 15:49:58 -07:00
|
|
|
|
2025-05-10 13:39:15 -07:00
|
|
|
if (Error E = ProfileReader->readProfile(*BC))
|
2020-05-07 23:00:29 -07:00
|
|
|
report_error("cannot read profile", std::move(E));
|
2017-07-17 11:22:22 -07:00
|
|
|
|
2023-06-28 17:50:39 -07:00
|
|
|
if (opts::PrintProfile || opts::PrintAll) {
|
|
|
|
|
for (auto &BFI : BC->getBinaryFunctions()) {
|
|
|
|
|
BinaryFunction &Function = BFI.second;
|
|
|
|
|
if (Function.empty())
|
|
|
|
|
continue;
|
|
|
|
|
|
2024-02-12 14:53:53 -08:00
|
|
|
Function.print(BC->outs(), "after attaching profile");
|
2023-06-28 17:50:39 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2024-03-21 14:32:13 -07:00
|
|
|
if (!opts::SaveProfile.empty() && !BAT->enabledFor(InputFile)) {
|
2020-05-07 23:00:29 -07:00
|
|
|
YAMLProfileWriter PW(opts::SaveProfile);
|
2018-04-09 19:10:19 -07:00
|
|
|
PW.writeProfile(*this);
|
2017-07-17 11:22:22 -07:00
|
|
|
}
|
2022-09-19 13:36:50 -07:00
|
|
|
if (opts::AggregateOnly &&
|
2024-03-21 14:32:13 -07:00
|
|
|
opts::ProfileFormat == opts::ProfileFormatKind::PF_YAML &&
|
|
|
|
|
!BAT->enabledFor(InputFile)) {
|
2022-09-19 13:36:50 -07:00
|
|
|
YAMLProfileWriter PW(opts::OutputFilename);
|
|
|
|
|
PW.writeProfile(*this);
|
|
|
|
|
}
|
2020-05-07 23:00:29 -07:00
|
|
|
|
|
|
|
|
// Release memory used by profile reader.
|
|
|
|
|
ProfileReader.reset();
|
|
|
|
|
|
2024-05-22 13:53:41 -07:00
|
|
|
if (opts::AggregateOnly) {
|
|
|
|
|
PrintProgramStats PPS(&*BAT);
|
|
|
|
|
BC->logBOLTErrorsAndQuitOnFatal(PPS.runOnFunctions(*BC));
|
2024-07-31 22:14:52 -07:00
|
|
|
TimerGroup::printAll(outs());
|
2020-05-07 23:00:29 -07:00
|
|
|
exit(0);
|
2024-05-22 13:53:41 -07:00
|
|
|
}
|
2017-07-17 11:22:22 -07:00
|
|
|
}
|
|
|
|
|
|
2015-11-23 17:54:18 -08:00
|
|
|
void RewriteInstance::disassembleFunctions() {
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
NamedRegionTimer T("disassembleFunctions", "disassemble functions",
|
|
|
|
|
TimerGroupName, TimerGroupDesc, opts::TimeRewrite);
|
2019-04-03 15:52:01 -07:00
|
|
|
for (auto &BFI : BC->getBinaryFunctions()) {
|
2015-11-23 17:54:18 -08:00
|
|
|
BinaryFunction &Function = BFI.second;
|
|
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
ErrorOr<ArrayRef<uint8_t>> FunctionData = Function.getData();
|
2017-10-20 12:11:34 -07:00
|
|
|
if (!FunctionData) {
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->errs() << "BOLT-ERROR: corresponding section is non-executable or "
|
|
|
|
|
<< "empty for function " << Function << '\n';
|
2020-05-03 15:49:58 -07:00
|
|
|
exit(1);
|
2015-11-23 17:54:18 -08:00
|
|
|
}
|
|
|
|
|
|
2016-09-15 15:47:10 -07:00
|
|
|
// Treat zero-sized functions as non-simple ones.
|
|
|
|
|
if (Function.getSize() == 0) {
|
|
|
|
|
Function.setSimple(false);
|
|
|
|
|
continue;
|
2015-11-23 17:54:18 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Offset of the function in the file.
|
2017-11-28 09:57:21 -08:00
|
|
|
const auto *FileBegin =
|
2021-12-14 16:52:51 -08:00
|
|
|
reinterpret_cast<const uint8_t *>(InputFile->getData().data());
|
2017-10-20 12:11:34 -07:00
|
|
|
Function.setFileOffset(FunctionData->begin() - FileBegin);
|
2015-11-23 17:54:18 -08:00
|
|
|
|
2020-05-03 15:49:58 -07:00
|
|
|
if (!shouldDisassemble(Function)) {
|
|
|
|
|
NamedRegionTimer T("scan", "scan functions", "buildfuncs",
|
|
|
|
|
"Scan Binary Functions", opts::TimeBuild);
|
|
|
|
|
Function.scanExternalRefs();
|
|
|
|
|
Function.setSimple(false);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
2024-02-12 14:51:15 -08:00
|
|
|
bool DisasmFailed{false};
|
|
|
|
|
handleAllErrors(Function.disassemble(), [&](const BOLTError &E) {
|
|
|
|
|
DisasmFailed = true;
|
|
|
|
|
if (E.isFatal()) {
|
2024-02-12 14:53:53 -08:00
|
|
|
E.log(BC->errs());
|
|
|
|
|
exit(1);
|
|
|
|
|
}
|
|
|
|
|
if (opts::processAllFunctions()) {
|
|
|
|
|
BC->errs() << BC->generateBugReportMessage(
|
|
|
|
|
"function cannot be properly disassembled. "
|
|
|
|
|
"Unable to continue in relocation mode.",
|
|
|
|
|
Function);
|
2024-02-12 14:51:15 -08:00
|
|
|
exit(1);
|
|
|
|
|
}
|
2021-12-23 12:38:33 -08:00
|
|
|
if (opts::Verbosity >= 1)
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->outs() << "BOLT-INFO: could not disassemble function " << Function
|
|
|
|
|
<< ". Will ignore.\n";
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
// Forcefully ignore the function.
|
2025-06-02 12:33:54 -07:00
|
|
|
Function.scanExternalRefs();
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
Function.setIgnored();
|
2024-02-12 14:51:15 -08:00
|
|
|
});
|
|
|
|
|
|
|
|
|
|
if (DisasmFailed)
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
continue;
|
2016-09-27 19:09:38 -07:00
|
|
|
|
2015-11-23 17:54:18 -08:00
|
|
|
if (opts::PrintAll || opts::PrintDisasm)
|
2024-02-12 14:53:53 -08:00
|
|
|
Function.print(BC->outs(), "after disassembly");
|
2018-02-14 12:06:17 -08:00
|
|
|
}
|
|
|
|
|
|
2022-07-07 00:01:33 +03:00
|
|
|
BC->processInterproceduralReferences();
|
2019-06-12 18:21:02 -07:00
|
|
|
BC->populateJumpTables();
|
|
|
|
|
|
|
|
|
|
for (auto &BFI : BC->getBinaryFunctions()) {
|
|
|
|
|
BinaryFunction &Function = BFI.second;
|
|
|
|
|
|
|
|
|
|
if (!shouldDisassemble(Function))
|
|
|
|
|
continue;
|
|
|
|
|
|
2020-01-14 17:12:03 -08:00
|
|
|
Function.postProcessEntryPoints();
|
2019-06-12 18:21:02 -07:00
|
|
|
Function.postProcessJumpTables();
|
|
|
|
|
}
|
|
|
|
|
|
[BOLT] Support multiple parents for split jump table
There are two assumptions regarding jump table:
(a) It is accessed by only one fragment, say, Parent
(b) All entries target instructions in Parent
For (a), BOLT stores jump table entries as relative offset to Parent.
For (b), BOLT treats jump table entries target somewhere out of Parent
as INVALID_OFFSET, including fragment of same split function.
In this update, we extend (a) and (b) to include fragment of same split
functinon. For (a), we store jump table entries in absolute offset
instead. In addition, jump table will store all fragments that access
it. A fragment uses this information to only create label for jump table
entries that target to that fragment.
For (b), using absolute offset allows jump table entries to target
fragments of same split function, i.e., extend support for split jump
table. This can be done using relocation (fragment start/size) and
fragment detection heuristics (e.g., using symbol name pattern for
non-stripped binaries).
For jump table targets that can only be reached by one fragment, we
mark them as local label; otherwise, they would be the secondary
function entry to the target fragment.
Test Plan
```
ninja check-bolt
```
Reviewed By: Amir
Differential Revision: https://reviews.llvm.org/D128474
2022-07-13 23:35:51 -07:00
|
|
|
BC->clearJumpTableTempData();
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
BC->adjustCodePadding();
|
[BOLT] Add code padding verification
Summary:
In non-relocation mode, we allow data objects to be embedded in the
code. Such objects could be unmarked, and could occupy an area between
functions, the area which is considered to be code padding.
When we disassemble code, we detect references into the padding area
and adjust it, so that it is not overwritten during the code emission.
We assume the reference to be pointing to the beginning of the object.
However, assembly-written functions may reference the middle of an
object and use negative offsets to reference data fields. Thus,
conservatively, we reduce the possibly-overwritten padding area to
a minimum if the object reference was detected.
Since we also allow functions with unknown code in non-relocation mode,
it is possible that we miss references to some objects in code.
To cover such cases, we need to verify the padding area before we
allow to overwrite it.
(cherry picked from FBD16477787)
2019-07-23 20:48:41 -07:00
|
|
|
|
2019-04-03 15:52:01 -07:00
|
|
|
for (auto &BFI : BC->getBinaryFunctions()) {
|
2018-02-14 12:06:17 -08:00
|
|
|
BinaryFunction &Function = BFI.second;
|
|
|
|
|
|
2019-01-15 23:43:40 -08:00
|
|
|
if (!shouldDisassemble(Function))
|
2018-02-14 12:06:17 -08:00
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
if (!Function.isSimple()) {
|
2022-06-10 15:48:13 -07:00
|
|
|
assert((!BC->HasRelocations || Function.getSize() == 0 ||
|
[BOLT] Support multiple parents for split jump table
There are two assumptions regarding jump table:
(a) It is accessed by only one fragment, say, Parent
(b) All entries target instructions in Parent
For (a), BOLT stores jump table entries as relative offset to Parent.
For (b), BOLT treats jump table entries target somewhere out of Parent
as INVALID_OFFSET, including fragment of same split function.
In this update, we extend (a) and (b) to include fragment of same split
functinon. For (a), we store jump table entries in absolute offset
instead. In addition, jump table will store all fragments that access
it. A fragment uses this information to only create label for jump table
entries that target to that fragment.
For (b), using absolute offset allows jump table entries to target
fragments of same split function, i.e., extend support for split jump
table. This can be done using relocation (fragment start/size) and
fragment detection heuristics (e.g., using symbol name pattern for
non-stripped binaries).
For jump table targets that can only be reached by one fragment, we
mark them as local label; otherwise, they would be the secondary
function entry to the target fragment.
Test Plan
```
ninja check-bolt
```
Reviewed By: Amir
Differential Revision: https://reviews.llvm.org/D128474
2022-07-13 23:35:51 -07:00
|
|
|
Function.hasIndirectTargetToSplitFragment()) &&
|
2018-02-14 12:06:17 -08:00
|
|
|
"unexpected non-simple function in relocation mode");
|
|
|
|
|
continue;
|
|
|
|
|
}
|
2015-11-23 17:54:18 -08:00
|
|
|
|
|
|
|
|
// Fill in CFI information for this function
|
2021-12-23 12:38:33 -08:00
|
|
|
if (!Function.trapsOnEntry() && !CFIRdWrt->fillCFIInfoFor(Function)) {
|
|
|
|
|
if (BC->HasRelocations) {
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->errs() << BC->generateBugReportMessage("unable to fill CFI.",
|
|
|
|
|
Function);
|
|
|
|
|
exit(1);
|
2021-12-23 12:38:33 -08:00
|
|
|
} else {
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->errs() << "BOLT-WARNING: unable to fill CFI for function "
|
|
|
|
|
<< Function << ". Skipping.\n";
|
2021-12-23 12:38:33 -08:00
|
|
|
Function.setSimple(false);
|
|
|
|
|
continue;
|
2016-02-22 18:25:43 -08:00
|
|
|
}
|
2015-11-23 17:54:18 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Parse LSDA.
|
2022-07-14 18:04:58 -07:00
|
|
|
if (Function.getLSDAAddress() != 0 &&
|
2023-11-15 23:21:50 +04:00
|
|
|
!BC->getFragmentsToSkip().count(&Function)) {
|
|
|
|
|
ErrorOr<BinarySection &> LSDASection =
|
|
|
|
|
BC->getSectionForAddress(Function.getLSDAAddress());
|
|
|
|
|
check_error(LSDASection.getError(), "failed to get LSDA section");
|
|
|
|
|
ArrayRef<uint8_t> LSDAData = ArrayRef<uint8_t>(
|
|
|
|
|
LSDASection->getData(), LSDASection->getContents().size());
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->logBOLTErrorsAndQuitOnFatal(
|
|
|
|
|
Function.parseLSDA(LSDAData, LSDASection->getAddress()));
|
2023-11-15 23:21:50 +04:00
|
|
|
}
|
2020-05-07 23:00:29 -07:00
|
|
|
}
|
|
|
|
|
}
|
2015-11-23 17:54:18 -08:00
|
|
|
|
2020-05-07 23:00:29 -07:00
|
|
|
void RewriteInstance::buildFunctionsCFG() {
|
|
|
|
|
NamedRegionTimer T("buildCFG", "buildCFG", "buildfuncs",
|
|
|
|
|
"Build Binary Functions", opts::TimeBuild);
|
2019-07-12 07:25:50 -07:00
|
|
|
|
2020-05-07 23:00:29 -07:00
|
|
|
// Create annotation indices to allow lock-free execution
|
|
|
|
|
BC->MIB->getOrCreateAnnotationIndex("JTIndexReg");
|
2021-12-18 17:05:00 -08:00
|
|
|
BC->MIB->getOrCreateAnnotationIndex("NOP");
|
2019-07-12 07:25:50 -07:00
|
|
|
|
2020-05-07 23:00:29 -07:00
|
|
|
ParallelUtilities::WorkFuncWithAllocTy WorkFun =
|
|
|
|
|
[&](BinaryFunction &BF, MCPlusBuilder::AllocatorIdTy AllocId) {
|
2024-02-12 14:51:15 -08:00
|
|
|
bool HadErrors{false};
|
|
|
|
|
handleAllErrors(BF.buildCFG(AllocId), [&](const BOLTError &E) {
|
|
|
|
|
if (!E.getMessage().empty())
|
2024-02-12 14:53:53 -08:00
|
|
|
E.log(BC->errs());
|
2024-02-12 14:51:15 -08:00
|
|
|
if (E.isFatal())
|
|
|
|
|
exit(1);
|
|
|
|
|
HadErrors = true;
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
if (HadErrors)
|
2020-05-07 23:00:29 -07:00
|
|
|
return;
|
2019-07-12 07:25:50 -07:00
|
|
|
|
2022-03-09 20:27:15 -08:00
|
|
|
if (opts::PrintAll) {
|
|
|
|
|
auto L = BC->scopeLock();
|
2024-02-12 14:53:53 -08:00
|
|
|
BF.print(BC->outs(), "while building cfg");
|
2022-03-09 20:27:15 -08:00
|
|
|
}
|
2020-05-07 23:00:29 -07:00
|
|
|
};
|
2017-11-28 09:57:21 -08:00
|
|
|
|
2021-12-14 16:52:51 -08:00
|
|
|
ParallelUtilities::PredicateTy SkipPredicate = [&](const BinaryFunction &BF) {
|
|
|
|
|
return !shouldDisassemble(BF) || !BF.isSimple();
|
|
|
|
|
};
|
2019-07-12 07:25:50 -07:00
|
|
|
|
2020-05-07 23:00:29 -07:00
|
|
|
ParallelUtilities::runOnEachFunctionWithUniqueAllocId(
|
|
|
|
|
*BC, ParallelUtilities::SchedulingPolicy::SP_INST_LINEAR, WorkFun,
|
|
|
|
|
SkipPredicate, "disassembleFunctions-buildCFG",
|
|
|
|
|
/*ForceSequential*/ opts::SequentialDisassembly || opts::PrintAll);
|
2018-06-06 03:17:32 -07:00
|
|
|
|
|
|
|
|
BC->postProcessSymbolTable();
|
2017-11-28 09:57:21 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void RewriteInstance::postProcessFunctions() {
|
[BOLT] Support multiple parents for split jump table
There are two assumptions regarding jump table:
(a) It is accessed by only one fragment, say, Parent
(b) All entries target instructions in Parent
For (a), BOLT stores jump table entries as relative offset to Parent.
For (b), BOLT treats jump table entries target somewhere out of Parent
as INVALID_OFFSET, including fragment of same split function.
In this update, we extend (a) and (b) to include fragment of same split
functinon. For (a), we store jump table entries in absolute offset
instead. In addition, jump table will store all fragments that access
it. A fragment uses this information to only create label for jump table
entries that target to that fragment.
For (b), using absolute offset allows jump table entries to target
fragments of same split function, i.e., extend support for split jump
table. This can be done using relocation (fragment start/size) and
fragment detection heuristics (e.g., using symbol name pattern for
non-stripped binaries).
For jump table targets that can only be reached by one fragment, we
mark them as local label; otherwise, they would be the secondary
function entry to the target fragment.
Test Plan
```
ninja check-bolt
```
Reviewed By: Amir
Differential Revision: https://reviews.llvm.org/D128474
2022-07-13 23:35:51 -07:00
|
|
|
// We mark fragments as non-simple here, not during disassembly,
|
|
|
|
|
// So we can build their CFGs.
|
|
|
|
|
BC->skipMarkedFragments();
|
|
|
|
|
BC->clearFragmentsToSkip();
|
|
|
|
|
|
2017-11-28 09:57:21 -08:00
|
|
|
BC->TotalScore = 0;
|
|
|
|
|
BC->SumExecutionCount = 0;
|
2019-04-03 15:52:01 -07:00
|
|
|
for (auto &BFI : BC->getBinaryFunctions()) {
|
2017-11-28 09:57:21 -08:00
|
|
|
BinaryFunction &Function = BFI.second;
|
|
|
|
|
|
2023-02-10 17:09:03 +04:00
|
|
|
// Set function as non-simple if it has dynamic relocations
|
|
|
|
|
// in constant island, we don't want this function to be optimized
|
|
|
|
|
// e.g. function splitting is unsupported.
|
|
|
|
|
if (Function.hasDynamicRelocationAtIsland())
|
|
|
|
|
Function.setSimple(false);
|
|
|
|
|
|
2017-11-28 09:57:21 -08:00
|
|
|
if (Function.empty())
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
Function.postProcessCFG();
|
|
|
|
|
|
2015-11-23 17:54:18 -08:00
|
|
|
if (opts::PrintAll || opts::PrintCFG)
|
2024-02-12 14:53:53 -08:00
|
|
|
Function.print(BC->outs(), "after building cfg");
|
2015-11-23 17:54:18 -08:00
|
|
|
|
2025-08-22 10:51:09 +01:00
|
|
|
if (opts::shouldDumpDot(Function))
|
2020-10-21 17:08:32 -07:00
|
|
|
Function.dumpGraphForPass("00_build-cfg");
|
2016-07-01 08:40:56 -07:00
|
|
|
|
2016-05-26 10:58:01 -07:00
|
|
|
if (opts::PrintLoopInfo) {
|
|
|
|
|
Function.calculateLoopInfo();
|
2024-02-12 14:53:53 -08:00
|
|
|
Function.printLoopInfo(BC->outs());
|
2016-05-26 10:58:01 -07:00
|
|
|
}
|
|
|
|
|
|
2017-11-28 09:57:21 -08:00
|
|
|
BC->TotalScore += Function.getFunctionScore();
|
2017-05-01 16:52:54 -07:00
|
|
|
BC->SumExecutionCount += Function.getKnownExecutionCount();
|
2016-01-16 14:58:22 -08:00
|
|
|
}
|
2017-11-14 20:05:11 -08:00
|
|
|
|
|
|
|
|
if (opts::PrintGlobals) {
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->outs() << "BOLT-INFO: Global symbols:\n";
|
|
|
|
|
BC->printGlobalSymbols(BC->outs());
|
2017-11-14 20:05:11 -08:00
|
|
|
}
|
2015-11-23 17:54:18 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void RewriteInstance::runOptimizationPasses() {
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
NamedRegionTimer T("runOptimizationPasses", "run optimization passes",
|
|
|
|
|
TimerGroupName, TimerGroupDesc, opts::TimeRewrite);
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->logBOLTErrorsAndQuitOnFatal(BinaryFunctionPassManager::runAllPasses(*BC));
|
2015-11-23 17:54:18 -08:00
|
|
|
}
|
|
|
|
|
|
2025-02-24 08:26:28 +01:00
|
|
|
void RewriteInstance::runBinaryAnalyses() {
|
|
|
|
|
NamedRegionTimer T("runBinaryAnalyses", "run binary analysis passes",
|
|
|
|
|
TimerGroupName, TimerGroupDesc, opts::TimeRewrite);
|
|
|
|
|
BinaryFunctionPassManager Manager(*BC);
|
|
|
|
|
// FIXME: add a pass that warns about which functions do not have CFG,
|
|
|
|
|
// and therefore, analysis is most likely to be less accurate.
|
|
|
|
|
using GSK = opts::GadgetScannerKind;
|
2025-04-03 16:40:34 +03:00
|
|
|
using PAuthScanner = PAuthGadgetScanner::Analysis;
|
|
|
|
|
|
|
|
|
|
// If no command line option was given, act as if "all" was specified.
|
|
|
|
|
bool RunAll = !opts::GadgetScannersToRun.getBits() ||
|
|
|
|
|
opts::GadgetScannersToRun.isSet(GSK::GS_ALL);
|
|
|
|
|
|
|
|
|
|
if (RunAll || opts::GadgetScannersToRun.isSet(GSK::GS_PAUTH)) {
|
|
|
|
|
Manager.registerPass(
|
|
|
|
|
std::make_unique<PAuthScanner>(/*OnlyPacRetChecks=*/false));
|
|
|
|
|
} else if (RunAll || opts::GadgetScannersToRun.isSet(GSK::GS_PACRET)) {
|
|
|
|
|
Manager.registerPass(
|
|
|
|
|
std::make_unique<PAuthScanner>(/*OnlyPacRetChecks=*/true));
|
2025-02-24 08:26:28 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
BC->logBOLTErrorsAndQuitOnFatal(Manager.runPasses());
|
|
|
|
|
}
|
2024-12-12 11:06:27 +01:00
|
|
|
|
2022-09-22 12:05:12 -07:00
|
|
|
void RewriteInstance::preregisterSections() {
|
|
|
|
|
// Preregister sections before emission to set their order in the output.
|
|
|
|
|
const unsigned ROFlags = BinarySection::getFlags(/*IsReadOnly*/ true,
|
|
|
|
|
/*IsText*/ false,
|
|
|
|
|
/*IsAllocatable*/ true);
|
|
|
|
|
if (BinarySection *EHFrameSection = getSection(getEHFrameSectionName())) {
|
|
|
|
|
// New .eh_frame.
|
|
|
|
|
BC->registerOrUpdateSection(getNewSecPrefix() + getEHFrameSectionName(),
|
|
|
|
|
ELF::SHT_PROGBITS, ROFlags);
|
|
|
|
|
// Fully register a relocatable copy of the original .eh_frame.
|
|
|
|
|
BC->registerSection(".relocated.eh_frame", *EHFrameSection);
|
|
|
|
|
}
|
|
|
|
|
BC->registerOrUpdateSection(getNewSecPrefix() + ".gcc_except_table",
|
|
|
|
|
ELF::SHT_PROGBITS, ROFlags);
|
|
|
|
|
BC->registerOrUpdateSection(getNewSecPrefix() + ".rodata", ELF::SHT_PROGBITS,
|
|
|
|
|
ROFlags);
|
|
|
|
|
BC->registerOrUpdateSection(getNewSecPrefix() + ".rodata.cold",
|
|
|
|
|
ELF::SHT_PROGBITS, ROFlags);
|
|
|
|
|
}
|
|
|
|
|
|
2019-07-24 14:03:43 -07:00
|
|
|
void RewriteInstance::emitAndLink() {
|
|
|
|
|
NamedRegionTimer T("emitAndLink", "emit and link", TimerGroupName,
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
TimerGroupDesc, opts::TimeRewrite);
|
2015-11-23 17:54:18 -08:00
|
|
|
|
2023-07-11 09:32:44 +02:00
|
|
|
SmallString<0> ObjectBuffer;
|
|
|
|
|
raw_svector_ostream OS(ObjectBuffer);
|
2015-11-23 17:54:18 -08:00
|
|
|
|
|
|
|
|
// Implicitly MCObjectStreamer takes ownership of MCAsmBackend (MAB)
|
|
|
|
|
// and MCCodeEmitter (MCE). ~MCObjectStreamer() will delete these
|
|
|
|
|
// two instances.
|
2023-07-11 09:32:44 +02:00
|
|
|
std::unique_ptr<MCStreamer> Streamer = BC->createStreamer(OS);
|
2015-11-23 17:54:18 -08:00
|
|
|
|
2018-01-23 15:10:24 -08:00
|
|
|
if (EHFrameSection) {
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
if (opts::UseOldText || opts::StrictMode) {
|
2020-04-19 12:55:43 -07:00
|
|
|
// The section is going to be regenerated from scratch.
|
|
|
|
|
// Empty the contents, but keep the section reference.
|
2020-05-21 16:25:05 -07:00
|
|
|
EHFrameSection->clearContents();
|
2020-04-19 12:55:43 -07:00
|
|
|
} else {
|
|
|
|
|
// Make .eh_frame relocatable.
|
|
|
|
|
relocateEHFrameSection();
|
|
|
|
|
}
|
2016-11-11 14:33:34 -08:00
|
|
|
}
|
|
|
|
|
|
2020-03-11 15:51:32 -07:00
|
|
|
emitBinaryContext(*Streamer, *BC, getOrgSecPrefix());
|
2020-03-06 15:06:37 -08:00
|
|
|
|
2022-06-07 00:31:02 -07:00
|
|
|
Streamer->finish();
|
2022-04-08 21:07:27 -07:00
|
|
|
if (Streamer->getContext().hadError()) {
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->errs() << "BOLT-ERROR: Emission failed.\n";
|
2022-04-08 21:07:27 -07:00
|
|
|
exit(1);
|
|
|
|
|
}
|
2015-11-23 17:54:18 -08:00
|
|
|
|
2023-07-11 09:32:44 +02:00
|
|
|
if (opts::KeepTmp) {
|
|
|
|
|
SmallString<128> OutObjectPath;
|
|
|
|
|
sys::fs::getPotentiallyUniqueTempFileName("output", "o", OutObjectPath);
|
|
|
|
|
std::error_code EC;
|
|
|
|
|
raw_fd_ostream FOS(OutObjectPath, EC);
|
|
|
|
|
check_error(EC, "cannot create output object file");
|
|
|
|
|
FOS << ObjectBuffer;
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->outs()
|
|
|
|
|
<< "BOLT-INFO: intermediary output object file saved for debugging "
|
|
|
|
|
"purposes: "
|
|
|
|
|
<< OutObjectPath << "\n";
|
2023-07-11 09:32:44 +02:00
|
|
|
}
|
|
|
|
|
|
2022-09-22 12:05:12 -07:00
|
|
|
ErrorOr<BinarySection &> TextSection =
|
|
|
|
|
BC->getUniqueSectionByName(BC->getMainCodeSectionName());
|
|
|
|
|
if (BC->HasRelocations && TextSection)
|
2024-04-11 06:29:51 -04:00
|
|
|
BC->renameSection(*TextSection,
|
|
|
|
|
getOrgSecPrefix() + BC->getMainCodeSectionName());
|
2022-09-22 12:05:12 -07:00
|
|
|
|
2016-02-08 10:02:48 -08:00
|
|
|
//////////////////////////////////////////////////////////////////////////////
|
2017-05-08 22:51:36 -07:00
|
|
|
// Assign addresses to new sections.
|
2016-02-08 10:02:48 -08:00
|
|
|
//////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
2015-11-23 17:54:18 -08:00
|
|
|
// Get output object as ObjectFile.
|
|
|
|
|
std::unique_ptr<MemoryBuffer> ObjectMemBuffer =
|
2023-07-11 09:32:44 +02:00
|
|
|
MemoryBuffer::getMemBuffer(ObjectBuffer, "in-memory object file", false);
|
2015-11-23 17:54:18 -08:00
|
|
|
|
[BOLT] Move from RuntimeDyld to JITLink
RuntimeDyld has been deprecated in favor of JITLink. [1] This patch
replaces all uses of RuntimeDyld in BOLT with JITLink.
Care has been taken to minimize the impact on the code structure in
order to ease the inspection of this (rather large) changeset. Since
BOLT relied on the RuntimeDyld API in multiple places, this wasn't
always possible though and I'll explain the changes in code structure
first.
Design note: BOLT uses a JIT linker to perform what essentially is
static linking. No linked code is ever executed; the result of linking
is simply written back to an executable file. For this reason, I
restricted myself to the use of the core JITLink library and avoided ORC
as much as possible.
RuntimeDyld contains methods for loading objects (loadObject) and symbol
lookup (getSymbol). Since JITLink doesn't provide a class with a similar
interface, the BOLTLinker abstract class was added to implement it. It
was added to Core since both the Rewrite and RuntimeLibs libraries make
use of it. Wherever a RuntimeDyld object was used before, it was
replaced with a BOLTLinker object.
There is one major difference between the RuntimeDyld and BOLTLinker
interfaces: in JITLink, section allocation and the application of fixups
(relocation) happens in a single call (jitlink::link). That is, there is
no separate method like finalizeWithMemoryManagerLocking in RuntimeDyld.
BOLT used to remap sections between allocating (loadObject) and linking
them (finalizeWithMemoryManagerLocking). This doesn't work anymore with
JITLink. Instead, BOLTLinker::loadObject accepts a callback that is
called before fixups are applied which is used to remap sections.
The actual implementation of the BOLTLinker interface lives in the
JITLinkLinker class in the Rewrite library. It's the only part of the
BOLT code that should directly interact with the JITLink API.
For loading object, JITLinkLinker first creates a LinkGraph
(jitlink::createLinkGraphFromObject) and then links it (jitlink::link).
For the latter, it uses a custom JITLinkContext with the following
properties:
- Use BOLT's ExecutableFileMemoryManager. This one was updated to
implement the JITLinkMemoryManager interface. Since BOLT never
executes code, its finalization step is a no-op.
- Pass config: don't use the default target passes since they modify
DWARF sections in a way that seems incompatible with BOLT. Also run a
custom pre-prune pass that makes sure sections without symbols are not
pruned by JITLink.
- Implement symbol lookup. This used to be implemented by
BOLTSymbolResolver.
- Call the section mapper callback before the final linking step.
- Copy symbol values when the LinkGraph is resolved. Symbols are stored
inside JITLinkLinker to ensure that later objects (i.e.,
instrumentation libraries) can find them. This functionality used to
be provided by RuntimeDyld but I did not find a way to use JITLink
directly for this.
Some more minor points of interest:
- BinarySection::SectionID: JITLink doesn't have something equivalent to
RuntimeDyld's Section IDs. Instead, sections can only be referred to
by name. Hence, SectionID was updated to a string.
- There seem to be no tests for Mach-O. I've tested a small hello-world
style binary but not more than that.
- On Mach-O, JITLink "normalizes" section names to include the segment
name. I had to parse the section name back from this manually which
feels slightly hacky.
[1] https://reviews.llvm.org/D145686#4222642
Reviewed By: rafauler
Differential Revision: https://reviews.llvm.org/D147544
2023-06-15 10:52:11 +02:00
|
|
|
auto EFMM = std::make_unique<ExecutableFileMemoryManager>(*BC);
|
|
|
|
|
EFMM->setNewSecPrefix(getNewSecPrefix());
|
|
|
|
|
EFMM->setOrgSecPrefix(getOrgSecPrefix());
|
|
|
|
|
|
|
|
|
|
Linker = std::make_unique<JITLinkLinker>(*BC, std::move(EFMM));
|
|
|
|
|
Linker->loadObject(ObjectMemBuffer->getMemBufferRef(),
|
|
|
|
|
[this](auto MapSection) { mapFileSections(MapSection); });
|
2016-09-27 19:09:38 -07:00
|
|
|
|
2020-12-01 16:29:39 -08:00
|
|
|
// Update output addresses based on the new section map and
|
|
|
|
|
// layout. Only do this for the object created by ourselves.
|
2023-08-28 10:04:02 +02:00
|
|
|
updateOutputValues(*Linker);
|
2016-09-27 19:09:38 -07:00
|
|
|
|
2023-08-28 10:04:02 +02:00
|
|
|
if (opts::UpdateDebugSections) {
|
2024-07-01 18:02:34 -07:00
|
|
|
DebugInfoRewriter->updateLineTableOffsets(
|
|
|
|
|
static_cast<MCObjectStreamer &>(*Streamer).getAssembler());
|
2023-08-28 10:04:02 +02:00
|
|
|
}
|
2021-10-13 13:19:06 -07:00
|
|
|
|
2025-02-27 16:13:57 -08:00
|
|
|
if (RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary()) {
|
|
|
|
|
StartLinkingRuntimeLib = true;
|
[BOLT] Move from RuntimeDyld to JITLink
RuntimeDyld has been deprecated in favor of JITLink. [1] This patch
replaces all uses of RuntimeDyld in BOLT with JITLink.
Care has been taken to minimize the impact on the code structure in
order to ease the inspection of this (rather large) changeset. Since
BOLT relied on the RuntimeDyld API in multiple places, this wasn't
always possible though and I'll explain the changes in code structure
first.
Design note: BOLT uses a JIT linker to perform what essentially is
static linking. No linked code is ever executed; the result of linking
is simply written back to an executable file. For this reason, I
restricted myself to the use of the core JITLink library and avoided ORC
as much as possible.
RuntimeDyld contains methods for loading objects (loadObject) and symbol
lookup (getSymbol). Since JITLink doesn't provide a class with a similar
interface, the BOLTLinker abstract class was added to implement it. It
was added to Core since both the Rewrite and RuntimeLibs libraries make
use of it. Wherever a RuntimeDyld object was used before, it was
replaced with a BOLTLinker object.
There is one major difference between the RuntimeDyld and BOLTLinker
interfaces: in JITLink, section allocation and the application of fixups
(relocation) happens in a single call (jitlink::link). That is, there is
no separate method like finalizeWithMemoryManagerLocking in RuntimeDyld.
BOLT used to remap sections between allocating (loadObject) and linking
them (finalizeWithMemoryManagerLocking). This doesn't work anymore with
JITLink. Instead, BOLTLinker::loadObject accepts a callback that is
called before fixups are applied which is used to remap sections.
The actual implementation of the BOLTLinker interface lives in the
JITLinkLinker class in the Rewrite library. It's the only part of the
BOLT code that should directly interact with the JITLink API.
For loading object, JITLinkLinker first creates a LinkGraph
(jitlink::createLinkGraphFromObject) and then links it (jitlink::link).
For the latter, it uses a custom JITLinkContext with the following
properties:
- Use BOLT's ExecutableFileMemoryManager. This one was updated to
implement the JITLinkMemoryManager interface. Since BOLT never
executes code, its finalization step is a no-op.
- Pass config: don't use the default target passes since they modify
DWARF sections in a way that seems incompatible with BOLT. Also run a
custom pre-prune pass that makes sure sections without symbols are not
pruned by JITLink.
- Implement symbol lookup. This used to be implemented by
BOLTSymbolResolver.
- Call the section mapper callback before the final linking step.
- Copy symbol values when the LinkGraph is resolved. Symbols are stored
inside JITLinkLinker to ensure that later objects (i.e.,
instrumentation libraries) can find them. This functionality used to
be provided by RuntimeDyld but I did not find a way to use JITLink
directly for this.
Some more minor points of interest:
- BinarySection::SectionID: JITLink doesn't have something equivalent to
RuntimeDyld's Section IDs. Instead, sections can only be referred to
by name. Hence, SectionID was updated to a string.
- There seem to be no tests for Mach-O. I've tested a small hello-world
style binary but not more than that.
- On Mach-O, JITLink "normalizes" section names to include the segment
name. I had to parse the section name back from this manually which
feels slightly hacky.
[1] https://reviews.llvm.org/D145686#4222642
Reviewed By: rafauler
Differential Revision: https://reviews.llvm.org/D147544
2023-06-15 10:52:11 +02:00
|
|
|
RtLibrary->link(*BC, ToolPath, *Linker, [this](auto MapSection) {
|
2022-09-22 12:05:12 -07:00
|
|
|
// Map newly registered sections.
|
[BOLT] Move from RuntimeDyld to JITLink
RuntimeDyld has been deprecated in favor of JITLink. [1] This patch
replaces all uses of RuntimeDyld in BOLT with JITLink.
Care has been taken to minimize the impact on the code structure in
order to ease the inspection of this (rather large) changeset. Since
BOLT relied on the RuntimeDyld API in multiple places, this wasn't
always possible though and I'll explain the changes in code structure
first.
Design note: BOLT uses a JIT linker to perform what essentially is
static linking. No linked code is ever executed; the result of linking
is simply written back to an executable file. For this reason, I
restricted myself to the use of the core JITLink library and avoided ORC
as much as possible.
RuntimeDyld contains methods for loading objects (loadObject) and symbol
lookup (getSymbol). Since JITLink doesn't provide a class with a similar
interface, the BOLTLinker abstract class was added to implement it. It
was added to Core since both the Rewrite and RuntimeLibs libraries make
use of it. Wherever a RuntimeDyld object was used before, it was
replaced with a BOLTLinker object.
There is one major difference between the RuntimeDyld and BOLTLinker
interfaces: in JITLink, section allocation and the application of fixups
(relocation) happens in a single call (jitlink::link). That is, there is
no separate method like finalizeWithMemoryManagerLocking in RuntimeDyld.
BOLT used to remap sections between allocating (loadObject) and linking
them (finalizeWithMemoryManagerLocking). This doesn't work anymore with
JITLink. Instead, BOLTLinker::loadObject accepts a callback that is
called before fixups are applied which is used to remap sections.
The actual implementation of the BOLTLinker interface lives in the
JITLinkLinker class in the Rewrite library. It's the only part of the
BOLT code that should directly interact with the JITLink API.
For loading object, JITLinkLinker first creates a LinkGraph
(jitlink::createLinkGraphFromObject) and then links it (jitlink::link).
For the latter, it uses a custom JITLinkContext with the following
properties:
- Use BOLT's ExecutableFileMemoryManager. This one was updated to
implement the JITLinkMemoryManager interface. Since BOLT never
executes code, its finalization step is a no-op.
- Pass config: don't use the default target passes since they modify
DWARF sections in a way that seems incompatible with BOLT. Also run a
custom pre-prune pass that makes sure sections without symbols are not
pruned by JITLink.
- Implement symbol lookup. This used to be implemented by
BOLTSymbolResolver.
- Call the section mapper callback before the final linking step.
- Copy symbol values when the LinkGraph is resolved. Symbols are stored
inside JITLinkLinker to ensure that later objects (i.e.,
instrumentation libraries) can find them. This functionality used to
be provided by RuntimeDyld but I did not find a way to use JITLink
directly for this.
Some more minor points of interest:
- BinarySection::SectionID: JITLink doesn't have something equivalent to
RuntimeDyld's Section IDs. Instead, sections can only be referred to
by name. Hence, SectionID was updated to a string.
- There seem to be no tests for Mach-O. I've tested a small hello-world
style binary but not more than that.
- On Mach-O, JITLink "normalizes" section names to include the segment
name. I had to parse the section name back from this manually which
feels slightly hacky.
[1] https://reviews.llvm.org/D145686#4222642
Reviewed By: rafauler
Differential Revision: https://reviews.llvm.org/D147544
2023-06-15 10:52:11 +02:00
|
|
|
this->mapAllocatableSections(MapSection);
|
2020-12-01 16:29:39 -08:00
|
|
|
});
|
2025-02-27 16:13:57 -08:00
|
|
|
}
|
2019-07-24 14:03:43 -07:00
|
|
|
|
2019-03-14 18:51:05 -07:00
|
|
|
// Once the code is emitted, we can rename function sections to actual
|
|
|
|
|
// output sections and de-register sections used for emission.
|
2020-10-09 16:06:27 -07:00
|
|
|
for (BinaryFunction *Function : BC->getAllBinaryFunctions()) {
|
|
|
|
|
ErrorOr<BinarySection &> Section = Function->getCodeSection();
|
2021-12-14 16:52:51 -08:00
|
|
|
if (Section &&
|
2021-12-23 12:38:33 -08:00
|
|
|
(Function->getImageAddress() == 0 || Function->getImageSize() == 0))
|
2020-10-09 16:06:27 -07:00
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
// Restore origin section for functions that were emitted or supposed to
|
|
|
|
|
// be emitted to patch sections.
|
|
|
|
|
if (Section)
|
|
|
|
|
BC->deregisterSection(*Section);
|
|
|
|
|
assert(Function->getOriginSectionName() && "expected origin section");
|
2022-08-18 21:26:18 -07:00
|
|
|
Function->CodeSectionName = Function->getOriginSectionName()->str();
|
2022-08-24 17:47:01 -07:00
|
|
|
for (const FunctionFragment &FF :
|
2022-08-18 21:26:18 -07:00
|
|
|
Function->getLayout().getSplitFragments()) {
|
|
|
|
|
if (ErrorOr<BinarySection &> ColdSection =
|
2022-08-18 21:48:19 -07:00
|
|
|
Function->getCodeSection(FF.getFragmentNum()))
|
2020-10-09 16:06:27 -07:00
|
|
|
BC->deregisterSection(*ColdSection);
|
2019-03-14 18:51:05 -07:00
|
|
|
}
|
2022-08-18 21:26:18 -07:00
|
|
|
if (Function->getLayout().isSplit())
|
2022-08-18 21:48:19 -07:00
|
|
|
Function->setColdCodeSectionName(getBOLTTextSectionName());
|
2019-03-14 18:51:05 -07:00
|
|
|
}
|
|
|
|
|
|
2017-10-16 16:53:50 -07:00
|
|
|
if (opts::PrintCacheMetrics) {
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->outs() << "BOLT-INFO: cache metrics after emitting functions:\n";
|
|
|
|
|
CacheMetrics::printAll(BC->outs(), BC->getSortedFunctions());
|
2017-10-16 16:53:50 -07:00
|
|
|
}
|
2016-09-27 19:09:38 -07:00
|
|
|
}
|
2015-11-23 17:54:18 -08:00
|
|
|
|
2024-01-29 17:27:33 -08:00
|
|
|
void RewriteInstance::finalizeMetadataPreEmit() {
|
2024-07-31 22:12:34 -07:00
|
|
|
NamedRegionTimer T("finalizemetadata-preemit", "finalize metadata pre-emit",
|
|
|
|
|
TimerGroupName, TimerGroupDesc, opts::TimeRewrite);
|
2024-01-29 17:27:33 -08:00
|
|
|
MetadataManager.runFinalizersPreEmit();
|
|
|
|
|
}
|
|
|
|
|
|
2019-11-03 21:57:15 -08:00
|
|
|
void RewriteInstance::updateMetadata() {
|
2024-07-31 22:12:34 -07:00
|
|
|
NamedRegionTimer T("updatemetadata-postemit", "update metadata post-emit",
|
|
|
|
|
TimerGroupName, TimerGroupDesc, opts::TimeRewrite);
|
2023-06-27 22:55:53 -07:00
|
|
|
MetadataManager.runFinalizersAfterEmit();
|
|
|
|
|
|
2020-05-07 23:00:29 -07:00
|
|
|
if (opts::UpdateDebugSections) {
|
|
|
|
|
NamedRegionTimer T("updateDebugInfo", "update debug info", TimerGroupName,
|
|
|
|
|
TimerGroupDesc, opts::TimeRewrite);
|
|
|
|
|
DebugInfoRewriter->updateDebugInfo();
|
|
|
|
|
}
|
[BOLT][non-reloc] Change function splitting in non-relocation mode
Summary:
This diff applies to non-relocation mode mostly. In this mode, we are
limited by original function boundaries, i.e. if a function becomes
larger after optimizations (e.g. because of the newly introduced
branches) then we might not be able to write the optimized version,
unless we split the function. At the same time, we do not benefit from
function splitting as we do in the relocation mode since we are not
moving functions/fragments, and the hot code does not become more
compact.
For the reasons described above, we used to execute multiple re-write
attempts to optimize the binary and we would only split functions that
were too large to fit into their original space.
After the first attempt, we would know functions that did not fit
into their original space. Then we would re-run all our passes again
feeding back the function information and forcefully splitting
such functions. Some functions still wouldn't fit even after the
splitting (mostly because of the branch relaxation for conditional tail
calls that does not happen in non-relocation mode). Yet we have emitted
debug info as if they were successfully overwritten. That's why we had
one more stage to write the functions again, marking failed-to-emit
functions non-simple. Sadly, there was a bug in the way 2nd and 3rd
attempts interacted, and we were not splitting the functions correctly
and as a result we were emitting less optimized code.
One of the reasons we had the multi-pass rewrite scheme in place, was
that we did not have an ability to precisely estimate the code size
before the actual code emission. Recently, BinaryContext obtained such
functionality, and now we can use it instead of relying on the
multi-pass rewrite. This eliminates redundant work of re-running
the same function passes multiple times.
Because function splitting runs before a number of optimization passes
that run on post-CFG state (those rely on the splitting pass), we
cannot estimate the non-split code size with 100% accuracy. However,
it is good enough for over 99% of the cases to extract most of the
performance gains for the binary.
As a result of eliminating the multi-pass rewrite, the processing time
in non-relocation mode with `-split-functions=2` is greatly reduced.
With debug info update, it is less than half of what it used to be.
New semantics for `-split-functions=<n>`:
-split-functions - split functions into hot and cold regions
=0 - do not split any function
=1 - in non-relocation mode only split functions too large to fit
into original code space
=2 - same as 1 (backwards compatibility)
=3 - split all functions
(cherry picked from FBD17362607)
2019-09-11 15:42:22 -07:00
|
|
|
|
2021-12-23 12:38:33 -08:00
|
|
|
if (opts::WriteBoltInfoSection)
|
2020-05-07 23:00:29 -07:00
|
|
|
addBoltInfoSection();
|
[BOLT][non-reloc] Change function splitting in non-relocation mode
Summary:
This diff applies to non-relocation mode mostly. In this mode, we are
limited by original function boundaries, i.e. if a function becomes
larger after optimizations (e.g. because of the newly introduced
branches) then we might not be able to write the optimized version,
unless we split the function. At the same time, we do not benefit from
function splitting as we do in the relocation mode since we are not
moving functions/fragments, and the hot code does not become more
compact.
For the reasons described above, we used to execute multiple re-write
attempts to optimize the binary and we would only split functions that
were too large to fit into their original space.
After the first attempt, we would know functions that did not fit
into their original space. Then we would re-run all our passes again
feeding back the function information and forcefully splitting
such functions. Some functions still wouldn't fit even after the
splitting (mostly because of the branch relaxation for conditional tail
calls that does not happen in non-relocation mode). Yet we have emitted
debug info as if they were successfully overwritten. That's why we had
one more stage to write the functions again, marking failed-to-emit
functions non-simple. Sadly, there was a bug in the way 2nd and 3rd
attempts interacted, and we were not splitting the functions correctly
and as a result we were emitting less optimized code.
One of the reasons we had the multi-pass rewrite scheme in place, was
that we did not have an ability to precisely estimate the code size
before the actual code emission. Recently, BinaryContext obtained such
functionality, and now we can use it instead of relying on the
multi-pass rewrite. This eliminates redundant work of re-running
the same function passes multiple times.
Because function splitting runs before a number of optimization passes
that run on post-CFG state (those rely on the splitting pass), we
cannot estimate the non-split code size with 100% accuracy. However,
it is good enough for over 99% of the cases to extract most of the
performance gains for the binary.
As a result of eliminating the multi-pass rewrite, the processing time
in non-relocation mode with `-split-functions=2` is greatly reduced.
With debug info update, it is less than half of what it used to be.
New semantics for `-split-functions=<n>`:
-split-functions - split functions into hot and cold regions
=0 - do not split any function
=1 - in non-relocation mode only split functions too large to fit
into original code space
=2 - same as 1 (backwards compatibility)
=3 - split all functions
(cherry picked from FBD17362607)
2019-09-11 15:42:22 -07:00
|
|
|
}
|
|
|
|
|
|
[BOLT] Move from RuntimeDyld to JITLink
RuntimeDyld has been deprecated in favor of JITLink. [1] This patch
replaces all uses of RuntimeDyld in BOLT with JITLink.
Care has been taken to minimize the impact on the code structure in
order to ease the inspection of this (rather large) changeset. Since
BOLT relied on the RuntimeDyld API in multiple places, this wasn't
always possible though and I'll explain the changes in code structure
first.
Design note: BOLT uses a JIT linker to perform what essentially is
static linking. No linked code is ever executed; the result of linking
is simply written back to an executable file. For this reason, I
restricted myself to the use of the core JITLink library and avoided ORC
as much as possible.
RuntimeDyld contains methods for loading objects (loadObject) and symbol
lookup (getSymbol). Since JITLink doesn't provide a class with a similar
interface, the BOLTLinker abstract class was added to implement it. It
was added to Core since both the Rewrite and RuntimeLibs libraries make
use of it. Wherever a RuntimeDyld object was used before, it was
replaced with a BOLTLinker object.
There is one major difference between the RuntimeDyld and BOLTLinker
interfaces: in JITLink, section allocation and the application of fixups
(relocation) happens in a single call (jitlink::link). That is, there is
no separate method like finalizeWithMemoryManagerLocking in RuntimeDyld.
BOLT used to remap sections between allocating (loadObject) and linking
them (finalizeWithMemoryManagerLocking). This doesn't work anymore with
JITLink. Instead, BOLTLinker::loadObject accepts a callback that is
called before fixups are applied which is used to remap sections.
The actual implementation of the BOLTLinker interface lives in the
JITLinkLinker class in the Rewrite library. It's the only part of the
BOLT code that should directly interact with the JITLink API.
For loading object, JITLinkLinker first creates a LinkGraph
(jitlink::createLinkGraphFromObject) and then links it (jitlink::link).
For the latter, it uses a custom JITLinkContext with the following
properties:
- Use BOLT's ExecutableFileMemoryManager. This one was updated to
implement the JITLinkMemoryManager interface. Since BOLT never
executes code, its finalization step is a no-op.
- Pass config: don't use the default target passes since they modify
DWARF sections in a way that seems incompatible with BOLT. Also run a
custom pre-prune pass that makes sure sections without symbols are not
pruned by JITLink.
- Implement symbol lookup. This used to be implemented by
BOLTSymbolResolver.
- Call the section mapper callback before the final linking step.
- Copy symbol values when the LinkGraph is resolved. Symbols are stored
inside JITLinkLinker to ensure that later objects (i.e.,
instrumentation libraries) can find them. This functionality used to
be provided by RuntimeDyld but I did not find a way to use JITLink
directly for this.
Some more minor points of interest:
- BinarySection::SectionID: JITLink doesn't have something equivalent to
RuntimeDyld's Section IDs. Instead, sections can only be referred to
by name. Hence, SectionID was updated to a string.
- There seem to be no tests for Mach-O. I've tested a small hello-world
style binary but not more than that.
- On Mach-O, JITLink "normalizes" section names to include the segment
name. I had to parse the section name back from this manually which
feels slightly hacky.
[1] https://reviews.llvm.org/D145686#4222642
Reviewed By: rafauler
Differential Revision: https://reviews.llvm.org/D147544
2023-06-15 10:52:11 +02:00
|
|
|
void RewriteInstance::mapFileSections(BOLTLinker::SectionMapper MapSection) {
|
2022-09-22 12:05:12 -07:00
|
|
|
BC->deregisterUnusedSections();
|
|
|
|
|
|
|
|
|
|
// If no new .eh_frame was written, remove relocated original .eh_frame.
|
|
|
|
|
BinarySection *RelocatedEHFrameSection =
|
|
|
|
|
getSection(".relocated" + getEHFrameSectionName());
|
|
|
|
|
if (RelocatedEHFrameSection && RelocatedEHFrameSection->hasValidSectionID()) {
|
|
|
|
|
BinarySection *NewEHFrameSection =
|
|
|
|
|
getSection(getNewSecPrefix() + getEHFrameSectionName());
|
|
|
|
|
if (!NewEHFrameSection || !NewEHFrameSection->isFinalized()) {
|
[BOLT] Move from RuntimeDyld to JITLink
RuntimeDyld has been deprecated in favor of JITLink. [1] This patch
replaces all uses of RuntimeDyld in BOLT with JITLink.
Care has been taken to minimize the impact on the code structure in
order to ease the inspection of this (rather large) changeset. Since
BOLT relied on the RuntimeDyld API in multiple places, this wasn't
always possible though and I'll explain the changes in code structure
first.
Design note: BOLT uses a JIT linker to perform what essentially is
static linking. No linked code is ever executed; the result of linking
is simply written back to an executable file. For this reason, I
restricted myself to the use of the core JITLink library and avoided ORC
as much as possible.
RuntimeDyld contains methods for loading objects (loadObject) and symbol
lookup (getSymbol). Since JITLink doesn't provide a class with a similar
interface, the BOLTLinker abstract class was added to implement it. It
was added to Core since both the Rewrite and RuntimeLibs libraries make
use of it. Wherever a RuntimeDyld object was used before, it was
replaced with a BOLTLinker object.
There is one major difference between the RuntimeDyld and BOLTLinker
interfaces: in JITLink, section allocation and the application of fixups
(relocation) happens in a single call (jitlink::link). That is, there is
no separate method like finalizeWithMemoryManagerLocking in RuntimeDyld.
BOLT used to remap sections between allocating (loadObject) and linking
them (finalizeWithMemoryManagerLocking). This doesn't work anymore with
JITLink. Instead, BOLTLinker::loadObject accepts a callback that is
called before fixups are applied which is used to remap sections.
The actual implementation of the BOLTLinker interface lives in the
JITLinkLinker class in the Rewrite library. It's the only part of the
BOLT code that should directly interact with the JITLink API.
For loading object, JITLinkLinker first creates a LinkGraph
(jitlink::createLinkGraphFromObject) and then links it (jitlink::link).
For the latter, it uses a custom JITLinkContext with the following
properties:
- Use BOLT's ExecutableFileMemoryManager. This one was updated to
implement the JITLinkMemoryManager interface. Since BOLT never
executes code, its finalization step is a no-op.
- Pass config: don't use the default target passes since they modify
DWARF sections in a way that seems incompatible with BOLT. Also run a
custom pre-prune pass that makes sure sections without symbols are not
pruned by JITLink.
- Implement symbol lookup. This used to be implemented by
BOLTSymbolResolver.
- Call the section mapper callback before the final linking step.
- Copy symbol values when the LinkGraph is resolved. Symbols are stored
inside JITLinkLinker to ensure that later objects (i.e.,
instrumentation libraries) can find them. This functionality used to
be provided by RuntimeDyld but I did not find a way to use JITLink
directly for this.
Some more minor points of interest:
- BinarySection::SectionID: JITLink doesn't have something equivalent to
RuntimeDyld's Section IDs. Instead, sections can only be referred to
by name. Hence, SectionID was updated to a string.
- There seem to be no tests for Mach-O. I've tested a small hello-world
style binary but not more than that.
- On Mach-O, JITLink "normalizes" section names to include the segment
name. I had to parse the section name back from this manually which
feels slightly hacky.
[1] https://reviews.llvm.org/D145686#4222642
Reviewed By: rafauler
Differential Revision: https://reviews.llvm.org/D147544
2023-06-15 10:52:11 +02:00
|
|
|
// JITLink will still have to process relocations for the section, hence
|
2022-09-22 12:05:12 -07:00
|
|
|
// we need to assign it the address that wouldn't result in relocation
|
|
|
|
|
// processing failure.
|
[BOLT] Move from RuntimeDyld to JITLink
RuntimeDyld has been deprecated in favor of JITLink. [1] This patch
replaces all uses of RuntimeDyld in BOLT with JITLink.
Care has been taken to minimize the impact on the code structure in
order to ease the inspection of this (rather large) changeset. Since
BOLT relied on the RuntimeDyld API in multiple places, this wasn't
always possible though and I'll explain the changes in code structure
first.
Design note: BOLT uses a JIT linker to perform what essentially is
static linking. No linked code is ever executed; the result of linking
is simply written back to an executable file. For this reason, I
restricted myself to the use of the core JITLink library and avoided ORC
as much as possible.
RuntimeDyld contains methods for loading objects (loadObject) and symbol
lookup (getSymbol). Since JITLink doesn't provide a class with a similar
interface, the BOLTLinker abstract class was added to implement it. It
was added to Core since both the Rewrite and RuntimeLibs libraries make
use of it. Wherever a RuntimeDyld object was used before, it was
replaced with a BOLTLinker object.
There is one major difference between the RuntimeDyld and BOLTLinker
interfaces: in JITLink, section allocation and the application of fixups
(relocation) happens in a single call (jitlink::link). That is, there is
no separate method like finalizeWithMemoryManagerLocking in RuntimeDyld.
BOLT used to remap sections between allocating (loadObject) and linking
them (finalizeWithMemoryManagerLocking). This doesn't work anymore with
JITLink. Instead, BOLTLinker::loadObject accepts a callback that is
called before fixups are applied which is used to remap sections.
The actual implementation of the BOLTLinker interface lives in the
JITLinkLinker class in the Rewrite library. It's the only part of the
BOLT code that should directly interact with the JITLink API.
For loading object, JITLinkLinker first creates a LinkGraph
(jitlink::createLinkGraphFromObject) and then links it (jitlink::link).
For the latter, it uses a custom JITLinkContext with the following
properties:
- Use BOLT's ExecutableFileMemoryManager. This one was updated to
implement the JITLinkMemoryManager interface. Since BOLT never
executes code, its finalization step is a no-op.
- Pass config: don't use the default target passes since they modify
DWARF sections in a way that seems incompatible with BOLT. Also run a
custom pre-prune pass that makes sure sections without symbols are not
pruned by JITLink.
- Implement symbol lookup. This used to be implemented by
BOLTSymbolResolver.
- Call the section mapper callback before the final linking step.
- Copy symbol values when the LinkGraph is resolved. Symbols are stored
inside JITLinkLinker to ensure that later objects (i.e.,
instrumentation libraries) can find them. This functionality used to
be provided by RuntimeDyld but I did not find a way to use JITLink
directly for this.
Some more minor points of interest:
- BinarySection::SectionID: JITLink doesn't have something equivalent to
RuntimeDyld's Section IDs. Instead, sections can only be referred to
by name. Hence, SectionID was updated to a string.
- There seem to be no tests for Mach-O. I've tested a small hello-world
style binary but not more than that.
- On Mach-O, JITLink "normalizes" section names to include the segment
name. I had to parse the section name back from this manually which
feels slightly hacky.
[1] https://reviews.llvm.org/D145686#4222642
Reviewed By: rafauler
Differential Revision: https://reviews.llvm.org/D147544
2023-06-15 10:52:11 +02:00
|
|
|
MapSection(*RelocatedEHFrameSection, NextAvailableAddress);
|
2022-09-22 12:05:12 -07:00
|
|
|
BC->deregisterSection(*RelocatedEHFrameSection);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
[BOLT] Move from RuntimeDyld to JITLink
RuntimeDyld has been deprecated in favor of JITLink. [1] This patch
replaces all uses of RuntimeDyld in BOLT with JITLink.
Care has been taken to minimize the impact on the code structure in
order to ease the inspection of this (rather large) changeset. Since
BOLT relied on the RuntimeDyld API in multiple places, this wasn't
always possible though and I'll explain the changes in code structure
first.
Design note: BOLT uses a JIT linker to perform what essentially is
static linking. No linked code is ever executed; the result of linking
is simply written back to an executable file. For this reason, I
restricted myself to the use of the core JITLink library and avoided ORC
as much as possible.
RuntimeDyld contains methods for loading objects (loadObject) and symbol
lookup (getSymbol). Since JITLink doesn't provide a class with a similar
interface, the BOLTLinker abstract class was added to implement it. It
was added to Core since both the Rewrite and RuntimeLibs libraries make
use of it. Wherever a RuntimeDyld object was used before, it was
replaced with a BOLTLinker object.
There is one major difference between the RuntimeDyld and BOLTLinker
interfaces: in JITLink, section allocation and the application of fixups
(relocation) happens in a single call (jitlink::link). That is, there is
no separate method like finalizeWithMemoryManagerLocking in RuntimeDyld.
BOLT used to remap sections between allocating (loadObject) and linking
them (finalizeWithMemoryManagerLocking). This doesn't work anymore with
JITLink. Instead, BOLTLinker::loadObject accepts a callback that is
called before fixups are applied which is used to remap sections.
The actual implementation of the BOLTLinker interface lives in the
JITLinkLinker class in the Rewrite library. It's the only part of the
BOLT code that should directly interact with the JITLink API.
For loading object, JITLinkLinker first creates a LinkGraph
(jitlink::createLinkGraphFromObject) and then links it (jitlink::link).
For the latter, it uses a custom JITLinkContext with the following
properties:
- Use BOLT's ExecutableFileMemoryManager. This one was updated to
implement the JITLinkMemoryManager interface. Since BOLT never
executes code, its finalization step is a no-op.
- Pass config: don't use the default target passes since they modify
DWARF sections in a way that seems incompatible with BOLT. Also run a
custom pre-prune pass that makes sure sections without symbols are not
pruned by JITLink.
- Implement symbol lookup. This used to be implemented by
BOLTSymbolResolver.
- Call the section mapper callback before the final linking step.
- Copy symbol values when the LinkGraph is resolved. Symbols are stored
inside JITLinkLinker to ensure that later objects (i.e.,
instrumentation libraries) can find them. This functionality used to
be provided by RuntimeDyld but I did not find a way to use JITLink
directly for this.
Some more minor points of interest:
- BinarySection::SectionID: JITLink doesn't have something equivalent to
RuntimeDyld's Section IDs. Instead, sections can only be referred to
by name. Hence, SectionID was updated to a string.
- There seem to be no tests for Mach-O. I've tested a small hello-world
style binary but not more than that.
- On Mach-O, JITLink "normalizes" section names to include the segment
name. I had to parse the section name back from this manually which
feels slightly hacky.
[1] https://reviews.llvm.org/D145686#4222642
Reviewed By: rafauler
Differential Revision: https://reviews.llvm.org/D147544
2023-06-15 10:52:11 +02:00
|
|
|
mapCodeSections(MapSection);
|
2022-09-22 12:05:12 -07:00
|
|
|
|
|
|
|
|
// Map the rest of the sections.
|
[BOLT] Move from RuntimeDyld to JITLink
RuntimeDyld has been deprecated in favor of JITLink. [1] This patch
replaces all uses of RuntimeDyld in BOLT with JITLink.
Care has been taken to minimize the impact on the code structure in
order to ease the inspection of this (rather large) changeset. Since
BOLT relied on the RuntimeDyld API in multiple places, this wasn't
always possible though and I'll explain the changes in code structure
first.
Design note: BOLT uses a JIT linker to perform what essentially is
static linking. No linked code is ever executed; the result of linking
is simply written back to an executable file. For this reason, I
restricted myself to the use of the core JITLink library and avoided ORC
as much as possible.
RuntimeDyld contains methods for loading objects (loadObject) and symbol
lookup (getSymbol). Since JITLink doesn't provide a class with a similar
interface, the BOLTLinker abstract class was added to implement it. It
was added to Core since both the Rewrite and RuntimeLibs libraries make
use of it. Wherever a RuntimeDyld object was used before, it was
replaced with a BOLTLinker object.
There is one major difference between the RuntimeDyld and BOLTLinker
interfaces: in JITLink, section allocation and the application of fixups
(relocation) happens in a single call (jitlink::link). That is, there is
no separate method like finalizeWithMemoryManagerLocking in RuntimeDyld.
BOLT used to remap sections between allocating (loadObject) and linking
them (finalizeWithMemoryManagerLocking). This doesn't work anymore with
JITLink. Instead, BOLTLinker::loadObject accepts a callback that is
called before fixups are applied which is used to remap sections.
The actual implementation of the BOLTLinker interface lives in the
JITLinkLinker class in the Rewrite library. It's the only part of the
BOLT code that should directly interact with the JITLink API.
For loading object, JITLinkLinker first creates a LinkGraph
(jitlink::createLinkGraphFromObject) and then links it (jitlink::link).
For the latter, it uses a custom JITLinkContext with the following
properties:
- Use BOLT's ExecutableFileMemoryManager. This one was updated to
implement the JITLinkMemoryManager interface. Since BOLT never
executes code, its finalization step is a no-op.
- Pass config: don't use the default target passes since they modify
DWARF sections in a way that seems incompatible with BOLT. Also run a
custom pre-prune pass that makes sure sections without symbols are not
pruned by JITLink.
- Implement symbol lookup. This used to be implemented by
BOLTSymbolResolver.
- Call the section mapper callback before the final linking step.
- Copy symbol values when the LinkGraph is resolved. Symbols are stored
inside JITLinkLinker to ensure that later objects (i.e.,
instrumentation libraries) can find them. This functionality used to
be provided by RuntimeDyld but I did not find a way to use JITLink
directly for this.
Some more minor points of interest:
- BinarySection::SectionID: JITLink doesn't have something equivalent to
RuntimeDyld's Section IDs. Instead, sections can only be referred to
by name. Hence, SectionID was updated to a string.
- There seem to be no tests for Mach-O. I've tested a small hello-world
style binary but not more than that.
- On Mach-O, JITLink "normalizes" section names to include the segment
name. I had to parse the section name back from this manually which
feels slightly hacky.
[1] https://reviews.llvm.org/D145686#4222642
Reviewed By: rafauler
Differential Revision: https://reviews.llvm.org/D147544
2023-06-15 10:52:11 +02:00
|
|
|
mapAllocatableSections(MapSection);
|
2024-04-29 14:44:04 -07:00
|
|
|
|
2024-05-01 18:22:38 -07:00
|
|
|
if (!BC->BOLTReserved.empty()) {
|
|
|
|
|
const uint64_t AllocatedSize =
|
|
|
|
|
NextAvailableAddress - BC->BOLTReserved.start();
|
|
|
|
|
if (BC->BOLTReserved.size() < AllocatedSize) {
|
|
|
|
|
BC->errs() << "BOLT-ERROR: reserved space (" << BC->BOLTReserved.size()
|
|
|
|
|
<< " byte" << (BC->BOLTReserved.size() == 1 ? "" : "s")
|
2024-04-29 14:44:04 -07:00
|
|
|
<< ") is smaller than required for new allocations ("
|
|
|
|
|
<< AllocatedSize << " bytes)\n";
|
|
|
|
|
exit(1);
|
|
|
|
|
}
|
|
|
|
|
}
|
2018-04-20 20:03:31 -07:00
|
|
|
}
|
2018-06-20 12:03:24 -07:00
|
|
|
|
2021-12-14 16:52:51 -08:00
|
|
|
std::vector<BinarySection *> RewriteInstance::getCodeSections() {
|
2019-03-15 13:43:36 -07:00
|
|
|
std::vector<BinarySection *> CodeSections;
|
2021-12-23 12:38:33 -08:00
|
|
|
for (BinarySection &Section : BC->textSections())
|
2019-03-15 13:43:36 -07:00
|
|
|
if (Section.hasValidSectionID())
|
|
|
|
|
CodeSections.emplace_back(&Section);
|
|
|
|
|
|
|
|
|
|
auto compareSections = [&](const BinarySection *A, const BinarySection *B) {
|
[BOLT] Fixing relative ordering of cold sections under multi-way function splitting
Order code sections with names in the form of ".text.cold.i" based on the value of i
[Context] SplitFunctions.cpp implements splitting strategies that can potentially split each function into maximum N>2 fragments.
When such N-way splitting happens, new code sections with names ".text.cold.1", ..., ".text.cold.i", ... "text.cold.N-2" will be created
A section with name ".text.cold.i" contains the the (i+2)th fragment of each function.
As an example, if each function is splitted into N=3 fragments: hot, warm, cold, then code sections will now include
- a section with name ".text" containing hot fragments
- a section with name ".text.cold" containing warm fragments
- a section with name ".text.cold.1" containing cold fragments
The order of these new sections in the output binary currently depends on the order in which they are encountered by the emitter.
For example, under N=3-way splitting, if the first function is 2-way splitted into hot and cold and the second function is 3-way splitted into hot, warm, and cold
then the cold fragment is encountered first, resulting in the final section to be in the following order
.text (hot), .text.cold.1 (cold), .text.cold (warm)
The above is suboptimal because the distance of jumps/calls between the hot and the warm sections will be much bigger than when ordering the sections as follows
.text (hot), .text.cold (warm), .text.cold.1 (cold)
This diff orders the sections with names in the form of ".text.cold" or ".text.cold.i" based on the value of i (assuming the i-value of ".text.cold" is 0).
Reviewed By: rafauler
Differential Revision: https://reviews.llvm.org/D152941
2023-06-22 14:26:21 -07:00
|
|
|
// If both A and B have names starting with ".text.cold", then
|
|
|
|
|
// - if opts::HotFunctionsAtEnd is true, we want order
|
|
|
|
|
// ".text.cold.T", ".text.cold.T-1", ... ".text.cold.1", ".text.cold"
|
|
|
|
|
// - if opts::HotFunctionsAtEnd is false, we want order
|
|
|
|
|
// ".text.cold", ".text.cold.1", ... ".text.cold.T-1", ".text.cold.T"
|
2023-12-13 23:34:49 -08:00
|
|
|
if (A->getName().starts_with(BC->getColdCodeSectionName()) &&
|
|
|
|
|
B->getName().starts_with(BC->getColdCodeSectionName())) {
|
[BOLT] Fixing relative ordering of cold sections under multi-way function splitting
Order code sections with names in the form of ".text.cold.i" based on the value of i
[Context] SplitFunctions.cpp implements splitting strategies that can potentially split each function into maximum N>2 fragments.
When such N-way splitting happens, new code sections with names ".text.cold.1", ..., ".text.cold.i", ... "text.cold.N-2" will be created
A section with name ".text.cold.i" contains the the (i+2)th fragment of each function.
As an example, if each function is splitted into N=3 fragments: hot, warm, cold, then code sections will now include
- a section with name ".text" containing hot fragments
- a section with name ".text.cold" containing warm fragments
- a section with name ".text.cold.1" containing cold fragments
The order of these new sections in the output binary currently depends on the order in which they are encountered by the emitter.
For example, under N=3-way splitting, if the first function is 2-way splitted into hot and cold and the second function is 3-way splitted into hot, warm, and cold
then the cold fragment is encountered first, resulting in the final section to be in the following order
.text (hot), .text.cold.1 (cold), .text.cold (warm)
The above is suboptimal because the distance of jumps/calls between the hot and the warm sections will be much bigger than when ordering the sections as follows
.text (hot), .text.cold (warm), .text.cold.1 (cold)
This diff orders the sections with names in the form of ".text.cold" or ".text.cold.i" based on the value of i (assuming the i-value of ".text.cold" is 0).
Reviewed By: rafauler
Differential Revision: https://reviews.llvm.org/D152941
2023-06-22 14:26:21 -07:00
|
|
|
if (A->getName().size() != B->getName().size())
|
|
|
|
|
return (opts::HotFunctionsAtEnd)
|
|
|
|
|
? (A->getName().size() > B->getName().size())
|
|
|
|
|
: (A->getName().size() < B->getName().size());
|
|
|
|
|
return (opts::HotFunctionsAtEnd) ? (A->getName() > B->getName())
|
|
|
|
|
: (A->getName() < B->getName());
|
|
|
|
|
}
|
|
|
|
|
|
2019-03-15 13:43:36 -07:00
|
|
|
// Place movers before anything else.
|
|
|
|
|
if (A->getName() == BC->getHotTextMoverSectionName())
|
|
|
|
|
return true;
|
|
|
|
|
if (B->getName() == BC->getHotTextMoverSectionName())
|
|
|
|
|
return false;
|
|
|
|
|
|
2023-11-29 22:42:36 -05:00
|
|
|
// Depending on opts::HotFunctionsAtEnd, place main and warm sections in
|
|
|
|
|
// order.
|
|
|
|
|
if (opts::HotFunctionsAtEnd) {
|
|
|
|
|
if (B->getName() == BC->getMainCodeSectionName())
|
|
|
|
|
return true;
|
|
|
|
|
if (A->getName() == BC->getMainCodeSectionName())
|
|
|
|
|
return false;
|
|
|
|
|
return (B->getName() == BC->getWarmCodeSectionName());
|
|
|
|
|
} else {
|
|
|
|
|
if (A->getName() == BC->getMainCodeSectionName())
|
|
|
|
|
return true;
|
|
|
|
|
if (B->getName() == BC->getMainCodeSectionName())
|
|
|
|
|
return false;
|
|
|
|
|
return (A->getName() == BC->getWarmCodeSectionName());
|
|
|
|
|
}
|
2019-03-15 13:43:36 -07:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// Determine the order of sections.
|
2022-06-23 22:15:47 -07:00
|
|
|
llvm::stable_sort(CodeSections, compareSections);
|
2019-03-15 13:43:36 -07:00
|
|
|
|
|
|
|
|
return CodeSections;
|
|
|
|
|
}
|
|
|
|
|
|
[BOLT] Move from RuntimeDyld to JITLink
RuntimeDyld has been deprecated in favor of JITLink. [1] This patch
replaces all uses of RuntimeDyld in BOLT with JITLink.
Care has been taken to minimize the impact on the code structure in
order to ease the inspection of this (rather large) changeset. Since
BOLT relied on the RuntimeDyld API in multiple places, this wasn't
always possible though and I'll explain the changes in code structure
first.
Design note: BOLT uses a JIT linker to perform what essentially is
static linking. No linked code is ever executed; the result of linking
is simply written back to an executable file. For this reason, I
restricted myself to the use of the core JITLink library and avoided ORC
as much as possible.
RuntimeDyld contains methods for loading objects (loadObject) and symbol
lookup (getSymbol). Since JITLink doesn't provide a class with a similar
interface, the BOLTLinker abstract class was added to implement it. It
was added to Core since both the Rewrite and RuntimeLibs libraries make
use of it. Wherever a RuntimeDyld object was used before, it was
replaced with a BOLTLinker object.
There is one major difference between the RuntimeDyld and BOLTLinker
interfaces: in JITLink, section allocation and the application of fixups
(relocation) happens in a single call (jitlink::link). That is, there is
no separate method like finalizeWithMemoryManagerLocking in RuntimeDyld.
BOLT used to remap sections between allocating (loadObject) and linking
them (finalizeWithMemoryManagerLocking). This doesn't work anymore with
JITLink. Instead, BOLTLinker::loadObject accepts a callback that is
called before fixups are applied which is used to remap sections.
The actual implementation of the BOLTLinker interface lives in the
JITLinkLinker class in the Rewrite library. It's the only part of the
BOLT code that should directly interact with the JITLink API.
For loading object, JITLinkLinker first creates a LinkGraph
(jitlink::createLinkGraphFromObject) and then links it (jitlink::link).
For the latter, it uses a custom JITLinkContext with the following
properties:
- Use BOLT's ExecutableFileMemoryManager. This one was updated to
implement the JITLinkMemoryManager interface. Since BOLT never
executes code, its finalization step is a no-op.
- Pass config: don't use the default target passes since they modify
DWARF sections in a way that seems incompatible with BOLT. Also run a
custom pre-prune pass that makes sure sections without symbols are not
pruned by JITLink.
- Implement symbol lookup. This used to be implemented by
BOLTSymbolResolver.
- Call the section mapper callback before the final linking step.
- Copy symbol values when the LinkGraph is resolved. Symbols are stored
inside JITLinkLinker to ensure that later objects (i.e.,
instrumentation libraries) can find them. This functionality used to
be provided by RuntimeDyld but I did not find a way to use JITLink
directly for this.
Some more minor points of interest:
- BinarySection::SectionID: JITLink doesn't have something equivalent to
RuntimeDyld's Section IDs. Instead, sections can only be referred to
by name. Hence, SectionID was updated to a string.
- There seem to be no tests for Mach-O. I've tested a small hello-world
style binary but not more than that.
- On Mach-O, JITLink "normalizes" section names to include the segment
name. I had to parse the section name back from this manually which
feels slightly hacky.
[1] https://reviews.llvm.org/D145686#4222642
Reviewed By: rafauler
Differential Revision: https://reviews.llvm.org/D147544
2023-06-15 10:52:11 +02:00
|
|
|
void RewriteInstance::mapCodeSections(BOLTLinker::SectionMapper MapSection) {
|
2025-06-30 17:09:41 -07:00
|
|
|
if (!BC->HasRelocations) {
|
|
|
|
|
mapCodeSectionsInPlace(MapSection);
|
|
|
|
|
return;
|
|
|
|
|
}
|
2020-10-09 16:06:27 -07:00
|
|
|
|
2025-06-30 17:09:41 -07:00
|
|
|
// Map sections for functions with pre-assigned addresses.
|
|
|
|
|
for (BinaryFunction *InjectedFunction : BC->getInjectedBinaryFunctions()) {
|
|
|
|
|
const uint64_t OutputAddress = InjectedFunction->getOutputAddress();
|
|
|
|
|
if (!OutputAddress)
|
|
|
|
|
continue;
|
2016-09-27 19:09:38 -07:00
|
|
|
|
2025-06-30 17:09:41 -07:00
|
|
|
ErrorOr<BinarySection &> FunctionSection =
|
|
|
|
|
InjectedFunction->getCodeSection();
|
|
|
|
|
assert(FunctionSection && "function should have section");
|
|
|
|
|
FunctionSection->setOutputAddress(OutputAddress);
|
|
|
|
|
MapSection(*FunctionSection, OutputAddress);
|
|
|
|
|
InjectedFunction->setImageAddress(FunctionSection->getAllocAddress());
|
|
|
|
|
InjectedFunction->setImageSize(FunctionSection->getOutputSize());
|
|
|
|
|
}
|
2019-03-21 21:13:45 -07:00
|
|
|
|
2025-06-30 17:09:41 -07:00
|
|
|
// Populate the list of sections to be allocated.
|
|
|
|
|
std::vector<BinarySection *> CodeSections = getCodeSections();
|
2019-03-21 21:13:45 -07:00
|
|
|
|
2025-06-30 17:09:41 -07:00
|
|
|
// Remove sections that were pre-allocated (patch sections).
|
|
|
|
|
llvm::erase_if(CodeSections, [](BinarySection *Section) {
|
|
|
|
|
return Section->getOutputAddress();
|
|
|
|
|
});
|
|
|
|
|
LLVM_DEBUG(dbgs() << "Code sections in the order of output:\n";
|
|
|
|
|
for (const BinarySection *Section : CodeSections) dbgs()
|
|
|
|
|
<< Section->getName() << '\n';);
|
2019-03-14 20:32:04 -07:00
|
|
|
|
2025-06-30 17:09:41 -07:00
|
|
|
uint64_t PaddingSize = 0; // size of padding required at the end
|
2025-06-28 13:53:56 -07:00
|
|
|
|
2025-06-30 17:09:41 -07:00
|
|
|
// Allocate sections starting at a given Address.
|
|
|
|
|
auto allocateAt = [&](uint64_t Address) {
|
|
|
|
|
const char *LastNonColdSectionName = BC->HasWarmSection
|
|
|
|
|
? BC->getWarmCodeSectionName()
|
|
|
|
|
: BC->getMainCodeSectionName();
|
|
|
|
|
for (BinarySection *Section : CodeSections) {
|
|
|
|
|
Address = alignTo(Address, Section->getAlignment());
|
|
|
|
|
Section->setOutputAddress(Address);
|
|
|
|
|
Address += Section->getOutputSize();
|
|
|
|
|
|
|
|
|
|
// Hugify: Additional huge page from right side due to
|
|
|
|
|
// weird ASLR mapping addresses (4KB aligned)
|
|
|
|
|
if (opts::Hugify && !BC->HasFixedLoadAddress &&
|
|
|
|
|
Section->getName() == LastNonColdSectionName)
|
|
|
|
|
Address = alignTo(Address, Section->getAlignment());
|
|
|
|
|
}
|
2019-03-14 20:32:04 -07:00
|
|
|
|
2025-06-30 17:09:41 -07:00
|
|
|
// Make sure we allocate enough space for huge pages.
|
|
|
|
|
ErrorOr<BinarySection &> TextSection =
|
|
|
|
|
BC->getUniqueSectionByName(LastNonColdSectionName);
|
|
|
|
|
if (opts::HotText && TextSection && TextSection->hasValidSectionID()) {
|
|
|
|
|
uint64_t HotTextEnd =
|
|
|
|
|
TextSection->getOutputAddress() + TextSection->getOutputSize();
|
|
|
|
|
HotTextEnd = alignTo(HotTextEnd, BC->PageAlign);
|
|
|
|
|
if (HotTextEnd > Address) {
|
|
|
|
|
PaddingSize = HotTextEnd - Address;
|
|
|
|
|
Address = HotTextEnd;
|
2016-09-27 19:09:38 -07:00
|
|
|
}
|
2019-03-14 18:51:05 -07:00
|
|
|
}
|
2025-06-30 17:09:41 -07:00
|
|
|
return Address;
|
|
|
|
|
};
|
2019-03-14 18:51:05 -07:00
|
|
|
|
2025-06-30 17:09:41 -07:00
|
|
|
// Try to allocate sections before the \p Address and return an address for
|
|
|
|
|
// the allocation of the first section, or 0 if [0, Address) range is not
|
|
|
|
|
// big enough to fit all sections.
|
|
|
|
|
auto allocateBefore = [&](uint64_t Address) -> uint64_t {
|
|
|
|
|
for (BinarySection *Section : llvm::reverse(CodeSections)) {
|
|
|
|
|
if (Section->getOutputSize() > Address)
|
|
|
|
|
return 0;
|
|
|
|
|
Address -= Section->getOutputSize();
|
|
|
|
|
Address = alignDown(Address, Section->getAlignment());
|
|
|
|
|
Section->setOutputAddress(Address);
|
|
|
|
|
}
|
|
|
|
|
return Address;
|
|
|
|
|
};
|
2019-03-14 18:51:05 -07:00
|
|
|
|
2025-06-30 17:09:41 -07:00
|
|
|
// Check if we can fit code in the original .text
|
|
|
|
|
bool AllocationDone = false;
|
|
|
|
|
if (opts::UseOldText) {
|
|
|
|
|
uint64_t StartAddress;
|
|
|
|
|
uint64_t EndAddress;
|
|
|
|
|
if (opts::HotFunctionsAtEnd) {
|
|
|
|
|
EndAddress = BC->OldTextSectionAddress + BC->OldTextSectionSize;
|
|
|
|
|
StartAddress = allocateBefore(EndAddress);
|
|
|
|
|
} else {
|
|
|
|
|
StartAddress = BC->OldTextSectionAddress;
|
|
|
|
|
EndAddress = allocateAt(BC->OldTextSectionAddress);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const uint64_t CodeSize = EndAddress - StartAddress;
|
|
|
|
|
if (CodeSize <= BC->OldTextSectionSize) {
|
|
|
|
|
BC->outs() << "BOLT-INFO: using original .text for new code with 0x"
|
|
|
|
|
<< Twine::utohexstr(opts::AlignText) << " alignment";
|
|
|
|
|
if (StartAddress != BC->OldTextSectionAddress)
|
|
|
|
|
BC->outs() << " at 0x" << Twine::utohexstr(StartAddress);
|
|
|
|
|
BC->outs() << '\n';
|
|
|
|
|
AllocationDone = true;
|
|
|
|
|
} else {
|
|
|
|
|
BC->errs() << "BOLT-WARNING: original .text too small to fit the new code"
|
|
|
|
|
<< " using 0x" << Twine::utohexstr(opts::AlignText)
|
|
|
|
|
<< " alignment. " << CodeSize << " bytes needed, have "
|
|
|
|
|
<< BC->OldTextSectionSize << " bytes available.\n";
|
|
|
|
|
opts::UseOldText = false;
|
2016-11-09 11:19:02 -08:00
|
|
|
}
|
2025-06-30 17:09:41 -07:00
|
|
|
}
|
2016-11-09 11:19:02 -08:00
|
|
|
|
2025-06-30 17:09:41 -07:00
|
|
|
if (!AllocationDone)
|
|
|
|
|
NextAvailableAddress = allocateAt(NextAvailableAddress);
|
2018-07-08 12:14:08 -07:00
|
|
|
|
2025-06-30 17:09:41 -07:00
|
|
|
// Do the mapping for ORC layer based on the allocation.
|
|
|
|
|
for (BinarySection *Section : CodeSections) {
|
|
|
|
|
LLVM_DEBUG(dbgs() << "BOLT: mapping " << Section->getName() << " at 0x"
|
|
|
|
|
<< Twine::utohexstr(Section->getAllocAddress())
|
|
|
|
|
<< " to 0x"
|
|
|
|
|
<< Twine::utohexstr(Section->getOutputAddress()) << '\n');
|
|
|
|
|
MapSection(*Section, Section->getOutputAddress());
|
|
|
|
|
Section->setOutputFileOffset(
|
|
|
|
|
getFileOffsetForAddress(Section->getOutputAddress()));
|
2019-03-21 21:13:45 -07:00
|
|
|
}
|
2016-10-07 09:34:16 -07:00
|
|
|
|
2025-06-30 17:09:41 -07:00
|
|
|
// Check if we need to insert a padding section for hot text.
|
|
|
|
|
if (PaddingSize && !opts::UseOldText)
|
|
|
|
|
BC->outs() << "BOLT-INFO: padding code to 0x"
|
|
|
|
|
<< Twine::utohexstr(NextAvailableAddress)
|
|
|
|
|
<< " to accommodate hot text\n";
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void RewriteInstance::mapCodeSectionsInPlace(
|
|
|
|
|
BOLTLinker::SectionMapper MapSection) {
|
2019-03-21 21:13:45 -07:00
|
|
|
// Processing in non-relocation mode.
|
2021-04-08 00:19:26 -07:00
|
|
|
uint64_t NewTextSectionStartAddress = NextAvailableAddress;
|
2016-03-11 11:30:30 -08:00
|
|
|
|
2019-04-03 15:52:01 -07:00
|
|
|
for (auto &BFI : BC->getBinaryFunctions()) {
|
2021-04-08 00:19:26 -07:00
|
|
|
BinaryFunction &Function = BFI.second;
|
2020-05-03 13:54:45 -07:00
|
|
|
if (!Function.isEmitted())
|
2019-03-21 21:13:45 -07:00
|
|
|
continue;
|
2016-09-27 19:09:38 -07:00
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
ErrorOr<BinarySection &> FuncSection = Function.getCodeSection();
|
2019-03-21 21:13:45 -07:00
|
|
|
assert(FuncSection && "cannot find section for function");
|
|
|
|
|
FuncSection->setOutputAddress(Function.getAddress());
|
2020-12-01 16:29:39 -08:00
|
|
|
LLVM_DEBUG(dbgs() << "BOLT: mapping 0x"
|
|
|
|
|
<< Twine::utohexstr(FuncSection->getAllocAddress())
|
|
|
|
|
<< " to 0x" << Twine::utohexstr(Function.getAddress())
|
|
|
|
|
<< '\n');
|
[BOLT] Move from RuntimeDyld to JITLink
RuntimeDyld has been deprecated in favor of JITLink. [1] This patch
replaces all uses of RuntimeDyld in BOLT with JITLink.
Care has been taken to minimize the impact on the code structure in
order to ease the inspection of this (rather large) changeset. Since
BOLT relied on the RuntimeDyld API in multiple places, this wasn't
always possible though and I'll explain the changes in code structure
first.
Design note: BOLT uses a JIT linker to perform what essentially is
static linking. No linked code is ever executed; the result of linking
is simply written back to an executable file. For this reason, I
restricted myself to the use of the core JITLink library and avoided ORC
as much as possible.
RuntimeDyld contains methods for loading objects (loadObject) and symbol
lookup (getSymbol). Since JITLink doesn't provide a class with a similar
interface, the BOLTLinker abstract class was added to implement it. It
was added to Core since both the Rewrite and RuntimeLibs libraries make
use of it. Wherever a RuntimeDyld object was used before, it was
replaced with a BOLTLinker object.
There is one major difference between the RuntimeDyld and BOLTLinker
interfaces: in JITLink, section allocation and the application of fixups
(relocation) happens in a single call (jitlink::link). That is, there is
no separate method like finalizeWithMemoryManagerLocking in RuntimeDyld.
BOLT used to remap sections between allocating (loadObject) and linking
them (finalizeWithMemoryManagerLocking). This doesn't work anymore with
JITLink. Instead, BOLTLinker::loadObject accepts a callback that is
called before fixups are applied which is used to remap sections.
The actual implementation of the BOLTLinker interface lives in the
JITLinkLinker class in the Rewrite library. It's the only part of the
BOLT code that should directly interact with the JITLink API.
For loading object, JITLinkLinker first creates a LinkGraph
(jitlink::createLinkGraphFromObject) and then links it (jitlink::link).
For the latter, it uses a custom JITLinkContext with the following
properties:
- Use BOLT's ExecutableFileMemoryManager. This one was updated to
implement the JITLinkMemoryManager interface. Since BOLT never
executes code, its finalization step is a no-op.
- Pass config: don't use the default target passes since they modify
DWARF sections in a way that seems incompatible with BOLT. Also run a
custom pre-prune pass that makes sure sections without symbols are not
pruned by JITLink.
- Implement symbol lookup. This used to be implemented by
BOLTSymbolResolver.
- Call the section mapper callback before the final linking step.
- Copy symbol values when the LinkGraph is resolved. Symbols are stored
inside JITLinkLinker to ensure that later objects (i.e.,
instrumentation libraries) can find them. This functionality used to
be provided by RuntimeDyld but I did not find a way to use JITLink
directly for this.
Some more minor points of interest:
- BinarySection::SectionID: JITLink doesn't have something equivalent to
RuntimeDyld's Section IDs. Instead, sections can only be referred to
by name. Hence, SectionID was updated to a string.
- There seem to be no tests for Mach-O. I've tested a small hello-world
style binary but not more than that.
- On Mach-O, JITLink "normalizes" section names to include the segment
name. I had to parse the section name back from this manually which
feels slightly hacky.
[1] https://reviews.llvm.org/D145686#4222642
Reviewed By: rafauler
Differential Revision: https://reviews.llvm.org/D147544
2023-06-15 10:52:11 +02:00
|
|
|
MapSection(*FuncSection, Function.getAddress());
|
2019-03-21 21:13:45 -07:00
|
|
|
Function.setImageAddress(FuncSection->getAllocAddress());
|
|
|
|
|
Function.setImageSize(FuncSection->getOutputSize());
|
2024-11-13 09:58:44 -08:00
|
|
|
assert(Function.getImageSize() <= Function.getMaxSize() &&
|
|
|
|
|
"Unexpected large function");
|
2019-03-21 21:13:45 -07:00
|
|
|
|
|
|
|
|
if (!Function.isSplit())
|
|
|
|
|
continue;
|
|
|
|
|
|
2022-08-24 18:07:06 -07:00
|
|
|
assert(Function.getLayout().isHotColdSplit() &&
|
|
|
|
|
"Cannot allocate more than two fragments per function in "
|
|
|
|
|
"non-relocation mode.");
|
|
|
|
|
|
|
|
|
|
FunctionFragment &FF =
|
|
|
|
|
Function.getLayout().getFragment(FragmentNum::cold());
|
|
|
|
|
ErrorOr<BinarySection &> ColdSection =
|
|
|
|
|
Function.getCodeSection(FF.getFragmentNum());
|
|
|
|
|
assert(ColdSection && "cannot find section for cold part");
|
|
|
|
|
// Cold fragments are aligned at 16 bytes.
|
|
|
|
|
NextAvailableAddress = alignTo(NextAvailableAddress, 16);
|
2024-11-13 09:58:44 -08:00
|
|
|
FF.setAddress(NextAvailableAddress);
|
|
|
|
|
FF.setImageAddress(ColdSection->getAllocAddress());
|
|
|
|
|
FF.setImageSize(ColdSection->getOutputSize());
|
|
|
|
|
FF.setFileOffset(getFileOffsetForAddress(NextAvailableAddress));
|
|
|
|
|
ColdSection->setOutputAddress(FF.getAddress());
|
2019-03-21 21:13:45 -07:00
|
|
|
|
2022-08-24 18:07:06 -07:00
|
|
|
LLVM_DEBUG(
|
|
|
|
|
dbgs() << formatv(
|
|
|
|
|
"BOLT: mapping cold fragment {0:x+} to {1:x+} with size {2:x+}\n",
|
|
|
|
|
FF.getImageAddress(), FF.getAddress(), FF.getImageSize()));
|
[BOLT] Move from RuntimeDyld to JITLink
RuntimeDyld has been deprecated in favor of JITLink. [1] This patch
replaces all uses of RuntimeDyld in BOLT with JITLink.
Care has been taken to minimize the impact on the code structure in
order to ease the inspection of this (rather large) changeset. Since
BOLT relied on the RuntimeDyld API in multiple places, this wasn't
always possible though and I'll explain the changes in code structure
first.
Design note: BOLT uses a JIT linker to perform what essentially is
static linking. No linked code is ever executed; the result of linking
is simply written back to an executable file. For this reason, I
restricted myself to the use of the core JITLink library and avoided ORC
as much as possible.
RuntimeDyld contains methods for loading objects (loadObject) and symbol
lookup (getSymbol). Since JITLink doesn't provide a class with a similar
interface, the BOLTLinker abstract class was added to implement it. It
was added to Core since both the Rewrite and RuntimeLibs libraries make
use of it. Wherever a RuntimeDyld object was used before, it was
replaced with a BOLTLinker object.
There is one major difference between the RuntimeDyld and BOLTLinker
interfaces: in JITLink, section allocation and the application of fixups
(relocation) happens in a single call (jitlink::link). That is, there is
no separate method like finalizeWithMemoryManagerLocking in RuntimeDyld.
BOLT used to remap sections between allocating (loadObject) and linking
them (finalizeWithMemoryManagerLocking). This doesn't work anymore with
JITLink. Instead, BOLTLinker::loadObject accepts a callback that is
called before fixups are applied which is used to remap sections.
The actual implementation of the BOLTLinker interface lives in the
JITLinkLinker class in the Rewrite library. It's the only part of the
BOLT code that should directly interact with the JITLink API.
For loading object, JITLinkLinker first creates a LinkGraph
(jitlink::createLinkGraphFromObject) and then links it (jitlink::link).
For the latter, it uses a custom JITLinkContext with the following
properties:
- Use BOLT's ExecutableFileMemoryManager. This one was updated to
implement the JITLinkMemoryManager interface. Since BOLT never
executes code, its finalization step is a no-op.
- Pass config: don't use the default target passes since they modify
DWARF sections in a way that seems incompatible with BOLT. Also run a
custom pre-prune pass that makes sure sections without symbols are not
pruned by JITLink.
- Implement symbol lookup. This used to be implemented by
BOLTSymbolResolver.
- Call the section mapper callback before the final linking step.
- Copy symbol values when the LinkGraph is resolved. Symbols are stored
inside JITLinkLinker to ensure that later objects (i.e.,
instrumentation libraries) can find them. This functionality used to
be provided by RuntimeDyld but I did not find a way to use JITLink
directly for this.
Some more minor points of interest:
- BinarySection::SectionID: JITLink doesn't have something equivalent to
RuntimeDyld's Section IDs. Instead, sections can only be referred to
by name. Hence, SectionID was updated to a string.
- There seem to be no tests for Mach-O. I've tested a small hello-world
style binary but not more than that.
- On Mach-O, JITLink "normalizes" section names to include the segment
name. I had to parse the section name back from this manually which
feels slightly hacky.
[1] https://reviews.llvm.org/D145686#4222642
Reviewed By: rafauler
Differential Revision: https://reviews.llvm.org/D147544
2023-06-15 10:52:11 +02:00
|
|
|
MapSection(*ColdSection, FF.getAddress());
|
2019-03-21 21:13:45 -07:00
|
|
|
|
2022-08-24 18:07:06 -07:00
|
|
|
NextAvailableAddress += FF.getImageSize();
|
2019-03-21 21:13:45 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Add the new text section aggregating all existing code sections.
|
|
|
|
|
// This is pseudo-section that serves a purpose of creating a corresponding
|
|
|
|
|
// entry in section header table.
|
2024-04-29 14:44:04 -07:00
|
|
|
const uint64_t NewTextSectionSize =
|
2021-04-08 00:19:26 -07:00
|
|
|
NextAvailableAddress - NewTextSectionStartAddress;
|
2019-03-21 21:13:45 -07:00
|
|
|
if (NewTextSectionSize) {
|
2021-04-08 00:19:26 -07:00
|
|
|
const unsigned Flags = BinarySection::getFlags(/*IsReadOnly=*/true,
|
|
|
|
|
/*IsText=*/true,
|
|
|
|
|
/*IsAllocatable=*/true);
|
|
|
|
|
BinarySection &Section =
|
2020-03-11 15:51:32 -07:00
|
|
|
BC->registerOrUpdateSection(getBOLTTextSectionName(),
|
|
|
|
|
ELF::SHT_PROGBITS,
|
|
|
|
|
Flags,
|
|
|
|
|
/*Data=*/nullptr,
|
|
|
|
|
NewTextSectionSize,
|
|
|
|
|
16);
|
2019-03-21 21:13:45 -07:00
|
|
|
Section.setOutputAddress(NewTextSectionStartAddress);
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
Section.setOutputFileOffset(
|
2021-12-14 16:52:51 -08:00
|
|
|
getFileOffsetForAddress(NewTextSectionStartAddress));
|
2016-02-12 19:01:53 -08:00
|
|
|
}
|
2018-04-20 20:03:31 -07:00
|
|
|
}
|
2015-12-18 17:00:46 -08:00
|
|
|
|
[BOLT] Move from RuntimeDyld to JITLink
RuntimeDyld has been deprecated in favor of JITLink. [1] This patch
replaces all uses of RuntimeDyld in BOLT with JITLink.
Care has been taken to minimize the impact on the code structure in
order to ease the inspection of this (rather large) changeset. Since
BOLT relied on the RuntimeDyld API in multiple places, this wasn't
always possible though and I'll explain the changes in code structure
first.
Design note: BOLT uses a JIT linker to perform what essentially is
static linking. No linked code is ever executed; the result of linking
is simply written back to an executable file. For this reason, I
restricted myself to the use of the core JITLink library and avoided ORC
as much as possible.
RuntimeDyld contains methods for loading objects (loadObject) and symbol
lookup (getSymbol). Since JITLink doesn't provide a class with a similar
interface, the BOLTLinker abstract class was added to implement it. It
was added to Core since both the Rewrite and RuntimeLibs libraries make
use of it. Wherever a RuntimeDyld object was used before, it was
replaced with a BOLTLinker object.
There is one major difference between the RuntimeDyld and BOLTLinker
interfaces: in JITLink, section allocation and the application of fixups
(relocation) happens in a single call (jitlink::link). That is, there is
no separate method like finalizeWithMemoryManagerLocking in RuntimeDyld.
BOLT used to remap sections between allocating (loadObject) and linking
them (finalizeWithMemoryManagerLocking). This doesn't work anymore with
JITLink. Instead, BOLTLinker::loadObject accepts a callback that is
called before fixups are applied which is used to remap sections.
The actual implementation of the BOLTLinker interface lives in the
JITLinkLinker class in the Rewrite library. It's the only part of the
BOLT code that should directly interact with the JITLink API.
For loading object, JITLinkLinker first creates a LinkGraph
(jitlink::createLinkGraphFromObject) and then links it (jitlink::link).
For the latter, it uses a custom JITLinkContext with the following
properties:
- Use BOLT's ExecutableFileMemoryManager. This one was updated to
implement the JITLinkMemoryManager interface. Since BOLT never
executes code, its finalization step is a no-op.
- Pass config: don't use the default target passes since they modify
DWARF sections in a way that seems incompatible with BOLT. Also run a
custom pre-prune pass that makes sure sections without symbols are not
pruned by JITLink.
- Implement symbol lookup. This used to be implemented by
BOLTSymbolResolver.
- Call the section mapper callback before the final linking step.
- Copy symbol values when the LinkGraph is resolved. Symbols are stored
inside JITLinkLinker to ensure that later objects (i.e.,
instrumentation libraries) can find them. This functionality used to
be provided by RuntimeDyld but I did not find a way to use JITLink
directly for this.
Some more minor points of interest:
- BinarySection::SectionID: JITLink doesn't have something equivalent to
RuntimeDyld's Section IDs. Instead, sections can only be referred to
by name. Hence, SectionID was updated to a string.
- There seem to be no tests for Mach-O. I've tested a small hello-world
style binary but not more than that.
- On Mach-O, JITLink "normalizes" section names to include the segment
name. I had to parse the section name back from this manually which
feels slightly hacky.
[1] https://reviews.llvm.org/D145686#4222642
Reviewed By: rafauler
Differential Revision: https://reviews.llvm.org/D147544
2023-06-15 10:52:11 +02:00
|
|
|
void RewriteInstance::mapAllocatableSections(
|
|
|
|
|
BOLTLinker::SectionMapper MapSection) {
|
2024-11-19 12:59:05 -08:00
|
|
|
|
|
|
|
|
if (opts::UseOldText || opts::StrictMode) {
|
|
|
|
|
auto tryRewriteSection = [&](BinarySection &OldSection,
|
|
|
|
|
BinarySection &NewSection) {
|
|
|
|
|
if (OldSection.getSize() < NewSection.getOutputSize())
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
BC->outs() << "BOLT-INFO: rewriting " << OldSection.getName()
|
|
|
|
|
<< " in-place\n";
|
|
|
|
|
|
|
|
|
|
NewSection.setOutputAddress(OldSection.getAddress());
|
|
|
|
|
NewSection.setOutputFileOffset(OldSection.getInputFileOffset());
|
|
|
|
|
MapSection(NewSection, OldSection.getAddress());
|
|
|
|
|
|
|
|
|
|
// Pad contents with zeros.
|
|
|
|
|
NewSection.addPadding(OldSection.getSize() - NewSection.getOutputSize());
|
|
|
|
|
|
|
|
|
|
// Prevent the original section name from appearing in the section header
|
|
|
|
|
// table.
|
|
|
|
|
OldSection.setAnonymous(true);
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
if (EHFrameSection) {
|
|
|
|
|
BinarySection *NewEHFrameSection =
|
|
|
|
|
getSection(getNewSecPrefix() + getEHFrameSectionName());
|
|
|
|
|
assert(NewEHFrameSection && "New contents expected for .eh_frame");
|
|
|
|
|
tryRewriteSection(*EHFrameSection, *NewEHFrameSection);
|
|
|
|
|
}
|
|
|
|
|
BinarySection *EHSection = getSection(".gcc_except_table");
|
|
|
|
|
BinarySection *NewEHSection =
|
|
|
|
|
getSection(getNewSecPrefix() + ".gcc_except_table");
|
|
|
|
|
if (EHSection) {
|
|
|
|
|
assert(NewEHSection && "New contents expected for .gcc_except_table");
|
|
|
|
|
tryRewriteSection(*EHSection, *NewEHSection);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2022-09-22 12:05:12 -07:00
|
|
|
// Allocate read-only sections first, then writable sections.
|
|
|
|
|
enum : uint8_t { ST_READONLY, ST_READWRITE };
|
|
|
|
|
for (uint8_t SType = ST_READONLY; SType <= ST_READWRITE; ++SType) {
|
2023-02-06 17:13:46 +04:00
|
|
|
const uint64_t LastNextAvailableAddress = NextAvailableAddress;
|
|
|
|
|
if (SType == ST_READWRITE) {
|
|
|
|
|
// Align R+W segment to regular page size
|
|
|
|
|
NextAvailableAddress = alignTo(NextAvailableAddress, BC->RegularPageSize);
|
|
|
|
|
NewWritableSegmentAddress = NextAvailableAddress;
|
|
|
|
|
}
|
|
|
|
|
|
2022-09-22 12:05:12 -07:00
|
|
|
for (BinarySection &Section : BC->allocatableSections()) {
|
2023-08-21 10:10:48 +02:00
|
|
|
if (Section.isLinkOnly())
|
|
|
|
|
continue;
|
|
|
|
|
|
2022-09-22 12:05:12 -07:00
|
|
|
if (!Section.hasValidSectionID())
|
|
|
|
|
continue;
|
2020-12-01 16:29:39 -08:00
|
|
|
|
2023-01-18 14:21:28 -08:00
|
|
|
if (Section.isWritable() == (SType == ST_READONLY))
|
2022-09-22 12:05:12 -07:00
|
|
|
continue;
|
2016-09-16 15:54:32 -07:00
|
|
|
|
2022-09-22 12:05:12 -07:00
|
|
|
if (Section.getOutputAddress()) {
|
|
|
|
|
LLVM_DEBUG({
|
|
|
|
|
dbgs() << "BOLT-DEBUG: section " << Section.getName()
|
|
|
|
|
<< " is already mapped at 0x"
|
|
|
|
|
<< Twine::utohexstr(Section.getOutputAddress()) << '\n';
|
|
|
|
|
});
|
|
|
|
|
continue;
|
|
|
|
|
}
|
2015-12-18 17:00:46 -08:00
|
|
|
|
2022-09-22 12:05:12 -07:00
|
|
|
if (Section.hasSectionRef()) {
|
|
|
|
|
LLVM_DEBUG({
|
|
|
|
|
dbgs() << "BOLT-DEBUG: mapping original section " << Section.getName()
|
|
|
|
|
<< " to 0x" << Twine::utohexstr(Section.getAddress()) << '\n';
|
|
|
|
|
});
|
|
|
|
|
Section.setOutputAddress(Section.getAddress());
|
|
|
|
|
Section.setOutputFileOffset(Section.getInputFileOffset());
|
[BOLT] Move from RuntimeDyld to JITLink
RuntimeDyld has been deprecated in favor of JITLink. [1] This patch
replaces all uses of RuntimeDyld in BOLT with JITLink.
Care has been taken to minimize the impact on the code structure in
order to ease the inspection of this (rather large) changeset. Since
BOLT relied on the RuntimeDyld API in multiple places, this wasn't
always possible though and I'll explain the changes in code structure
first.
Design note: BOLT uses a JIT linker to perform what essentially is
static linking. No linked code is ever executed; the result of linking
is simply written back to an executable file. For this reason, I
restricted myself to the use of the core JITLink library and avoided ORC
as much as possible.
RuntimeDyld contains methods for loading objects (loadObject) and symbol
lookup (getSymbol). Since JITLink doesn't provide a class with a similar
interface, the BOLTLinker abstract class was added to implement it. It
was added to Core since both the Rewrite and RuntimeLibs libraries make
use of it. Wherever a RuntimeDyld object was used before, it was
replaced with a BOLTLinker object.
There is one major difference between the RuntimeDyld and BOLTLinker
interfaces: in JITLink, section allocation and the application of fixups
(relocation) happens in a single call (jitlink::link). That is, there is
no separate method like finalizeWithMemoryManagerLocking in RuntimeDyld.
BOLT used to remap sections between allocating (loadObject) and linking
them (finalizeWithMemoryManagerLocking). This doesn't work anymore with
JITLink. Instead, BOLTLinker::loadObject accepts a callback that is
called before fixups are applied which is used to remap sections.
The actual implementation of the BOLTLinker interface lives in the
JITLinkLinker class in the Rewrite library. It's the only part of the
BOLT code that should directly interact with the JITLink API.
For loading object, JITLinkLinker first creates a LinkGraph
(jitlink::createLinkGraphFromObject) and then links it (jitlink::link).
For the latter, it uses a custom JITLinkContext with the following
properties:
- Use BOLT's ExecutableFileMemoryManager. This one was updated to
implement the JITLinkMemoryManager interface. Since BOLT never
executes code, its finalization step is a no-op.
- Pass config: don't use the default target passes since they modify
DWARF sections in a way that seems incompatible with BOLT. Also run a
custom pre-prune pass that makes sure sections without symbols are not
pruned by JITLink.
- Implement symbol lookup. This used to be implemented by
BOLTSymbolResolver.
- Call the section mapper callback before the final linking step.
- Copy symbol values when the LinkGraph is resolved. Symbols are stored
inside JITLinkLinker to ensure that later objects (i.e.,
instrumentation libraries) can find them. This functionality used to
be provided by RuntimeDyld but I did not find a way to use JITLink
directly for this.
Some more minor points of interest:
- BinarySection::SectionID: JITLink doesn't have something equivalent to
RuntimeDyld's Section IDs. Instead, sections can only be referred to
by name. Hence, SectionID was updated to a string.
- There seem to be no tests for Mach-O. I've tested a small hello-world
style binary but not more than that.
- On Mach-O, JITLink "normalizes" section names to include the segment
name. I had to parse the section name back from this manually which
feels slightly hacky.
[1] https://reviews.llvm.org/D145686#4222642
Reviewed By: rafauler
Differential Revision: https://reviews.llvm.org/D147544
2023-06-15 10:52:11 +02:00
|
|
|
MapSection(Section, Section.getAddress());
|
2022-09-22 12:05:12 -07:00
|
|
|
} else {
|
2025-02-27 16:13:57 -08:00
|
|
|
uint64_t Alignment = Section.getAlignment();
|
|
|
|
|
if (opts::Instrument && StartLinkingRuntimeLib) {
|
|
|
|
|
Alignment = BC->RegularPageSize;
|
|
|
|
|
StartLinkingRuntimeLib = false;
|
|
|
|
|
}
|
|
|
|
|
NextAvailableAddress = alignTo(NextAvailableAddress, Alignment);
|
|
|
|
|
|
2022-09-22 12:05:12 -07:00
|
|
|
LLVM_DEBUG({
|
2025-02-27 16:13:57 -08:00
|
|
|
dbgs() << "BOLT-DEBUG: mapping section " << Section.getName()
|
|
|
|
|
<< " (0x" << Twine::utohexstr(Section.getAllocAddress())
|
|
|
|
|
<< ") to 0x" << Twine::utohexstr(NextAvailableAddress) << ":0x"
|
2022-09-22 12:05:12 -07:00
|
|
|
<< Twine::utohexstr(NextAvailableAddress +
|
|
|
|
|
Section.getOutputSize())
|
|
|
|
|
<< '\n';
|
|
|
|
|
});
|
2018-01-23 15:10:24 -08:00
|
|
|
|
[BOLT] Move from RuntimeDyld to JITLink
RuntimeDyld has been deprecated in favor of JITLink. [1] This patch
replaces all uses of RuntimeDyld in BOLT with JITLink.
Care has been taken to minimize the impact on the code structure in
order to ease the inspection of this (rather large) changeset. Since
BOLT relied on the RuntimeDyld API in multiple places, this wasn't
always possible though and I'll explain the changes in code structure
first.
Design note: BOLT uses a JIT linker to perform what essentially is
static linking. No linked code is ever executed; the result of linking
is simply written back to an executable file. For this reason, I
restricted myself to the use of the core JITLink library and avoided ORC
as much as possible.
RuntimeDyld contains methods for loading objects (loadObject) and symbol
lookup (getSymbol). Since JITLink doesn't provide a class with a similar
interface, the BOLTLinker abstract class was added to implement it. It
was added to Core since both the Rewrite and RuntimeLibs libraries make
use of it. Wherever a RuntimeDyld object was used before, it was
replaced with a BOLTLinker object.
There is one major difference between the RuntimeDyld and BOLTLinker
interfaces: in JITLink, section allocation and the application of fixups
(relocation) happens in a single call (jitlink::link). That is, there is
no separate method like finalizeWithMemoryManagerLocking in RuntimeDyld.
BOLT used to remap sections between allocating (loadObject) and linking
them (finalizeWithMemoryManagerLocking). This doesn't work anymore with
JITLink. Instead, BOLTLinker::loadObject accepts a callback that is
called before fixups are applied which is used to remap sections.
The actual implementation of the BOLTLinker interface lives in the
JITLinkLinker class in the Rewrite library. It's the only part of the
BOLT code that should directly interact with the JITLink API.
For loading object, JITLinkLinker first creates a LinkGraph
(jitlink::createLinkGraphFromObject) and then links it (jitlink::link).
For the latter, it uses a custom JITLinkContext with the following
properties:
- Use BOLT's ExecutableFileMemoryManager. This one was updated to
implement the JITLinkMemoryManager interface. Since BOLT never
executes code, its finalization step is a no-op.
- Pass config: don't use the default target passes since they modify
DWARF sections in a way that seems incompatible with BOLT. Also run a
custom pre-prune pass that makes sure sections without symbols are not
pruned by JITLink.
- Implement symbol lookup. This used to be implemented by
BOLTSymbolResolver.
- Call the section mapper callback before the final linking step.
- Copy symbol values when the LinkGraph is resolved. Symbols are stored
inside JITLinkLinker to ensure that later objects (i.e.,
instrumentation libraries) can find them. This functionality used to
be provided by RuntimeDyld but I did not find a way to use JITLink
directly for this.
Some more minor points of interest:
- BinarySection::SectionID: JITLink doesn't have something equivalent to
RuntimeDyld's Section IDs. Instead, sections can only be referred to
by name. Hence, SectionID was updated to a string.
- There seem to be no tests for Mach-O. I've tested a small hello-world
style binary but not more than that.
- On Mach-O, JITLink "normalizes" section names to include the segment
name. I had to parse the section name back from this manually which
feels slightly hacky.
[1] https://reviews.llvm.org/D145686#4222642
Reviewed By: rafauler
Differential Revision: https://reviews.llvm.org/D147544
2023-06-15 10:52:11 +02:00
|
|
|
MapSection(Section, NextAvailableAddress);
|
2022-09-22 12:05:12 -07:00
|
|
|
Section.setOutputAddress(NextAvailableAddress);
|
|
|
|
|
Section.setOutputFileOffset(
|
|
|
|
|
getFileOffsetForAddress(NextAvailableAddress));
|
2016-04-06 18:03:44 -07:00
|
|
|
|
2022-09-22 12:05:12 -07:00
|
|
|
NextAvailableAddress += Section.getOutputSize();
|
|
|
|
|
}
|
2016-03-28 17:45:22 -07:00
|
|
|
}
|
2023-02-06 17:13:46 +04:00
|
|
|
|
|
|
|
|
if (SType == ST_READONLY) {
|
2025-06-26 12:09:11 -07:00
|
|
|
if (NewTextSegmentAddress)
|
2023-02-06 17:13:46 +04:00
|
|
|
NewTextSegmentSize = NextAvailableAddress - NewTextSegmentAddress;
|
|
|
|
|
} else if (SType == ST_READWRITE) {
|
|
|
|
|
NewWritableSegmentSize = NextAvailableAddress - NewWritableSegmentAddress;
|
|
|
|
|
// Restore NextAvailableAddress if no new writable sections
|
|
|
|
|
if (!NewWritableSegmentSize)
|
|
|
|
|
NextAvailableAddress = LastNextAvailableAddress;
|
|
|
|
|
}
|
2019-07-24 14:03:43 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2023-08-28 10:04:02 +02:00
|
|
|
void RewriteInstance::updateOutputValues(const BOLTLinker &Linker) {
|
2023-10-24 12:22:43 -07:00
|
|
|
if (std::optional<AddressMap> Map = AddressMap::parse(*BC))
|
|
|
|
|
BC->setIOAddressMap(std::move(*Map));
|
2023-08-21 10:10:48 +02:00
|
|
|
|
2021-12-23 12:38:33 -08:00
|
|
|
for (BinaryFunction *Function : BC->getAllBinaryFunctions())
|
2023-08-28 10:04:02 +02:00
|
|
|
Function->updateOutputValues(Linker);
|
2017-05-08 22:51:36 -07:00
|
|
|
}
|
|
|
|
|
|
2025-07-02 11:22:12 -07:00
|
|
|
void RewriteInstance::updateSegmentInfo() {
|
|
|
|
|
// NOTE Currently .eh_frame_hdr appends to the last segment, recalculate
|
|
|
|
|
// last segments size based on the NextAvailableAddress variable.
|
|
|
|
|
if (!NewWritableSegmentSize) {
|
|
|
|
|
if (NewTextSegmentAddress)
|
|
|
|
|
NewTextSegmentSize = NextAvailableAddress - NewTextSegmentAddress;
|
|
|
|
|
} else {
|
|
|
|
|
NewWritableSegmentSize = NextAvailableAddress - NewWritableSegmentAddress;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (NewTextSegmentSize) {
|
|
|
|
|
SegmentInfo TextSegment = {NewTextSegmentAddress,
|
|
|
|
|
NewTextSegmentSize,
|
|
|
|
|
NewTextSegmentOffset,
|
|
|
|
|
NewTextSegmentSize,
|
|
|
|
|
BC->PageAlign,
|
|
|
|
|
true,
|
|
|
|
|
false};
|
|
|
|
|
if (!opts::Instrument) {
|
|
|
|
|
BC->NewSegments.push_back(TextSegment);
|
|
|
|
|
} else {
|
|
|
|
|
ErrorOr<BinarySection &> Sec =
|
|
|
|
|
BC->getUniqueSectionByName(".bolt.instr.counters");
|
|
|
|
|
assert(Sec && "expected one and only one `.bolt.instr.counters` section");
|
|
|
|
|
const uint64_t Addr = Sec->getOutputAddress();
|
|
|
|
|
const uint64_t Offset = Sec->getOutputFileOffset();
|
|
|
|
|
const uint64_t Size = Sec->getOutputSize();
|
|
|
|
|
assert(Addr > TextSegment.Address &&
|
|
|
|
|
Addr + Size < TextSegment.Address + TextSegment.Size &&
|
|
|
|
|
"`.bolt.instr.counters` section is expected to be included in the "
|
|
|
|
|
"new text segment");
|
|
|
|
|
|
|
|
|
|
// Set correct size for the previous header since we are breaking the
|
|
|
|
|
// new text segment into three segments.
|
|
|
|
|
uint64_t Delta = Addr - TextSegment.Address;
|
|
|
|
|
TextSegment.Size = Delta;
|
|
|
|
|
TextSegment.FileSize = Delta;
|
|
|
|
|
BC->NewSegments.push_back(TextSegment);
|
|
|
|
|
|
|
|
|
|
// Create RW segment that includes the `.bolt.instr.counters` section.
|
|
|
|
|
SegmentInfo RWSegment = {Addr, Size, Offset, Size, BC->RegularPageSize,
|
|
|
|
|
false, true};
|
|
|
|
|
BC->NewSegments.push_back(RWSegment);
|
|
|
|
|
|
|
|
|
|
// Create RX segment that includes all RX sections from runtime library.
|
|
|
|
|
const uint64_t AddrRX = alignTo(Addr + Size, BC->RegularPageSize);
|
|
|
|
|
const uint64_t OffsetRX = alignTo(Offset + Size, BC->RegularPageSize);
|
|
|
|
|
const uint64_t SizeRX =
|
|
|
|
|
NewTextSegmentSize - (AddrRX - TextSegment.Address);
|
|
|
|
|
SegmentInfo RXSegment = {
|
|
|
|
|
AddrRX, SizeRX, OffsetRX, SizeRX, BC->RegularPageSize, true, false};
|
|
|
|
|
BC->NewSegments.push_back(RXSegment);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (NewWritableSegmentSize) {
|
|
|
|
|
SegmentInfo DataSegmentInfo = {
|
|
|
|
|
NewWritableSegmentAddress,
|
|
|
|
|
NewWritableSegmentSize,
|
|
|
|
|
getFileOffsetForAddress(NewWritableSegmentAddress),
|
|
|
|
|
NewWritableSegmentSize,
|
|
|
|
|
BC->RegularPageSize,
|
|
|
|
|
false,
|
|
|
|
|
true};
|
|
|
|
|
BC->NewSegments.push_back(DataSegmentInfo);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-03-03 10:13:11 -08:00
|
|
|
void RewriteInstance::patchELFPHDRTable() {
|
2023-04-20 18:11:54 -04:00
|
|
|
auto ELF64LEFile = cast<ELF64LEObjectFile>(InputFile);
|
2021-04-08 00:19:26 -07:00
|
|
|
const ELFFile<ELF64LE> &Obj = ELF64LEFile->getELFFile();
|
|
|
|
|
raw_fd_ostream &OS = Out->os();
|
2016-02-08 10:02:48 -08:00
|
|
|
|
2020-12-01 16:29:39 -08:00
|
|
|
Phnum = Obj.getHeader().e_phnum;
|
2025-07-24 11:47:27 -07:00
|
|
|
|
|
|
|
|
if (BC->NewSegments.empty()) {
|
|
|
|
|
BC->outs() << "BOLT-INFO: not adding new segments\n";
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (opts::UseGnuStack) {
|
2016-02-12 19:01:53 -08:00
|
|
|
assert(!PHDRTableAddress && "unexpected address for program header table");
|
2025-07-24 11:47:27 -07:00
|
|
|
if (BC->NewSegments.size() > 1) {
|
2024-04-29 14:44:04 -07:00
|
|
|
BC->errs() << "BOLT-ERROR: unable to add writable segment\n";
|
2023-02-06 17:13:46 +04:00
|
|
|
exit(1);
|
|
|
|
|
}
|
2025-07-24 11:47:27 -07:00
|
|
|
} else {
|
|
|
|
|
Phnum += BC->NewSegments.size();
|
2016-02-12 19:01:53 -08:00
|
|
|
}
|
2023-02-06 17:13:46 +04:00
|
|
|
|
2025-07-24 11:47:27 -07:00
|
|
|
if (!PHDRTableOffset)
|
|
|
|
|
PHDRTableOffset = Obj.getHeader().e_phoff;
|
2025-06-27 09:12:08 -07:00
|
|
|
|
2024-04-30 10:51:08 -07:00
|
|
|
const uint64_t SavedPos = OS.tell();
|
2016-02-12 19:01:53 -08:00
|
|
|
OS.seek(PHDRTableOffset);
|
2016-02-08 10:02:48 -08:00
|
|
|
|
2025-07-02 11:22:12 -07:00
|
|
|
auto createPhdr = [](const SegmentInfo &SI) {
|
|
|
|
|
ELF64LEPhdrTy Phdr;
|
|
|
|
|
Phdr.p_type = ELF::PT_LOAD;
|
|
|
|
|
Phdr.p_offset = SI.FileOffset;
|
|
|
|
|
Phdr.p_vaddr = SI.Address;
|
|
|
|
|
Phdr.p_paddr = SI.Address;
|
|
|
|
|
Phdr.p_filesz = SI.FileSize;
|
|
|
|
|
Phdr.p_memsz = SI.Size;
|
|
|
|
|
Phdr.p_flags = ELF::PF_R;
|
|
|
|
|
if (SI.IsExecutable)
|
|
|
|
|
Phdr.p_flags |= ELF::PF_X;
|
|
|
|
|
if (SI.IsWritable)
|
|
|
|
|
Phdr.p_flags |= ELF::PF_W;
|
|
|
|
|
Phdr.p_align = SI.Alignment;
|
|
|
|
|
|
|
|
|
|
return Phdr;
|
2020-06-26 16:52:07 -07:00
|
|
|
};
|
|
|
|
|
|
2024-04-26 16:29:42 -07:00
|
|
|
auto writeNewSegmentPhdrs = [&]() {
|
2025-07-02 11:22:12 -07:00
|
|
|
for (const SegmentInfo &SI : BC->NewSegments) {
|
|
|
|
|
ELF64LEPhdrTy Phdr = createPhdr(SI);
|
|
|
|
|
OS.write(reinterpret_cast<const char *>(&Phdr), sizeof(Phdr));
|
2024-04-26 16:29:42 -07:00
|
|
|
}
|
2023-02-06 17:13:46 +04:00
|
|
|
};
|
|
|
|
|
|
2024-04-26 16:29:42 -07:00
|
|
|
bool ModdedGnuStack = false;
|
|
|
|
|
bool AddedSegment = false;
|
|
|
|
|
|
2016-02-08 10:02:48 -08:00
|
|
|
// Copy existing program headers with modifications.
|
2021-04-08 00:19:26 -07:00
|
|
|
for (const ELF64LE::Phdr &Phdr : cantFail(Obj.program_headers())) {
|
|
|
|
|
ELF64LE::Phdr NewPhdr = Phdr;
|
2024-04-26 16:29:42 -07:00
|
|
|
switch (Phdr.p_type) {
|
|
|
|
|
case ELF::PT_PHDR:
|
|
|
|
|
if (PHDRTableAddress) {
|
|
|
|
|
NewPhdr.p_offset = PHDRTableOffset;
|
|
|
|
|
NewPhdr.p_vaddr = PHDRTableAddress;
|
|
|
|
|
NewPhdr.p_paddr = PHDRTableAddress;
|
|
|
|
|
NewPhdr.p_filesz = sizeof(NewPhdr) * Phnum;
|
|
|
|
|
NewPhdr.p_memsz = sizeof(NewPhdr) * Phnum;
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
case ELF::PT_GNU_EH_FRAME: {
|
2024-04-26 14:13:23 -07:00
|
|
|
ErrorOr<BinarySection &> EHFrameHdrSec = BC->getUniqueSectionByName(
|
|
|
|
|
getNewSecPrefix() + getEHFrameHdrSectionName());
|
2021-12-14 16:52:51 -08:00
|
|
|
if (EHFrameHdrSec && EHFrameHdrSec->isAllocatable() &&
|
2018-02-01 16:33:43 -08:00
|
|
|
EHFrameHdrSec->isFinalized()) {
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
NewPhdr.p_offset = EHFrameHdrSec->getOutputFileOffset();
|
2019-03-14 18:51:05 -07:00
|
|
|
NewPhdr.p_vaddr = EHFrameHdrSec->getOutputAddress();
|
|
|
|
|
NewPhdr.p_paddr = EHFrameHdrSec->getOutputAddress();
|
2018-02-01 16:33:43 -08:00
|
|
|
NewPhdr.p_filesz = EHFrameHdrSec->getOutputSize();
|
|
|
|
|
NewPhdr.p_memsz = EHFrameHdrSec->getOutputSize();
|
2016-07-12 16:43:53 -07:00
|
|
|
}
|
2024-04-26 16:29:42 -07:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
case ELF::PT_GNU_STACK:
|
|
|
|
|
if (opts::UseGnuStack) {
|
2025-02-27 16:13:57 -08:00
|
|
|
// Overwrite the header with the new segment header.
|
2025-07-02 11:22:12 -07:00
|
|
|
assert(BC->NewSegments.size() == 1 &&
|
|
|
|
|
"Expected exactly one new segment");
|
|
|
|
|
NewPhdr = createPhdr(BC->NewSegments.front());
|
2024-04-26 16:29:42 -07:00
|
|
|
ModdedGnuStack = true;
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
case ELF::PT_DYNAMIC:
|
|
|
|
|
if (!opts::UseGnuStack) {
|
|
|
|
|
// Insert new headers before DYNAMIC.
|
|
|
|
|
writeNewSegmentPhdrs();
|
|
|
|
|
AddedSegment = true;
|
2023-02-06 17:13:46 +04:00
|
|
|
}
|
2024-04-26 16:29:42 -07:00
|
|
|
break;
|
2016-02-08 10:02:48 -08:00
|
|
|
}
|
2016-02-12 19:01:53 -08:00
|
|
|
OS.write(reinterpret_cast<const char *>(&NewPhdr), sizeof(NewPhdr));
|
2015-11-23 17:54:18 -08:00
|
|
|
}
|
|
|
|
|
|
2020-06-26 16:52:07 -07:00
|
|
|
if (!opts::UseGnuStack && !AddedSegment) {
|
2024-04-26 16:29:42 -07:00
|
|
|
// Append new headers to the end of the table.
|
|
|
|
|
writeNewSegmentPhdrs();
|
2020-06-26 16:52:07 -07:00
|
|
|
}
|
|
|
|
|
|
2024-04-26 16:29:42 -07:00
|
|
|
if (opts::UseGnuStack && !ModdedGnuStack) {
|
|
|
|
|
BC->errs()
|
|
|
|
|
<< "BOLT-ERROR: could not find PT_GNU_STACK program header to modify\n";
|
|
|
|
|
exit(1);
|
|
|
|
|
}
|
2024-04-30 10:51:08 -07:00
|
|
|
|
|
|
|
|
OS.seek(SavedPos);
|
2016-03-03 10:13:11 -08:00
|
|
|
}
|
|
|
|
|
|
2016-09-16 15:54:32 -07:00
|
|
|
namespace {
|
2017-04-06 10:49:59 -07:00
|
|
|
|
|
|
|
|
/// Write padding to \p OS such that its current \p Offset becomes aligned
|
|
|
|
|
/// at \p Alignment. Return new (aligned) offset.
|
2021-12-14 16:52:51 -08:00
|
|
|
uint64_t appendPadding(raw_pwrite_stream &OS, uint64_t Offset,
|
2017-04-06 10:49:59 -07:00
|
|
|
uint64_t Alignment) {
|
2017-05-16 17:29:31 -07:00
|
|
|
if (!Alignment)
|
|
|
|
|
return Offset;
|
|
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
const uint64_t PaddingSize =
|
|
|
|
|
offsetToAlignment(Offset, llvm::Align(Alignment));
|
2017-04-06 10:49:59 -07:00
|
|
|
for (unsigned I = 0; I < PaddingSize; ++I)
|
2016-09-16 15:54:32 -07:00
|
|
|
OS.write((unsigned char)0);
|
2017-04-06 10:49:59 -07:00
|
|
|
return Offset + PaddingSize;
|
2016-09-16 15:54:32 -07:00
|
|
|
}
|
2017-04-06 10:49:59 -07:00
|
|
|
|
2016-09-16 15:54:32 -07:00
|
|
|
}
|
|
|
|
|
|
2016-03-03 10:13:11 -08:00
|
|
|
void RewriteInstance::rewriteNoteSections() {
|
2023-04-20 18:11:54 -04:00
|
|
|
auto ELF64LEFile = cast<ELF64LEObjectFile>(InputFile);
|
2021-04-08 00:19:26 -07:00
|
|
|
const ELFFile<ELF64LE> &Obj = ELF64LEFile->getELFFile();
|
|
|
|
|
raw_fd_ostream &OS = Out->os();
|
2016-02-08 10:02:48 -08:00
|
|
|
|
2024-04-29 14:44:04 -07:00
|
|
|
uint64_t NextAvailableOffset = std::max(
|
|
|
|
|
getFileOffsetForAddress(NextAvailableAddress), FirstNonAllocatableOffset);
|
2016-03-03 10:13:11 -08:00
|
|
|
OS.seek(NextAvailableOffset);
|
|
|
|
|
|
|
|
|
|
// Copy over non-allocatable section contents and update file offsets.
|
2021-04-08 00:19:26 -07:00
|
|
|
for (const ELF64LE::Shdr &Section : cantFail(Obj.sections())) {
|
2016-03-03 10:13:11 -08:00
|
|
|
if (Section.sh_type == ELF::SHT_NULL)
|
|
|
|
|
continue;
|
|
|
|
|
if (Section.sh_flags & ELF::SHF_ALLOC)
|
|
|
|
|
continue;
|
2016-02-08 10:02:48 -08:00
|
|
|
|
2022-09-22 12:05:12 -07:00
|
|
|
SectionRef SecRef = ELF64LEFile->toSectionRef(&Section);
|
|
|
|
|
BinarySection *BSec = BC->getSectionForSectionRef(SecRef);
|
|
|
|
|
assert(BSec && !BSec->isAllocatable() &&
|
|
|
|
|
"Matching non-allocatable BinarySection should exist.");
|
|
|
|
|
|
2019-10-29 14:49:49 -07:00
|
|
|
StringRef SectionName =
|
2020-12-01 16:29:39 -08:00
|
|
|
cantFail(Obj.getSectionName(Section), "cannot get section name");
|
2019-10-29 14:49:49 -07:00
|
|
|
if (shouldStrip(Section, SectionName))
|
2016-09-27 19:09:38 -07:00
|
|
|
continue;
|
|
|
|
|
|
2016-03-03 10:13:11 -08:00
|
|
|
// Insert padding as needed.
|
2017-04-06 10:49:59 -07:00
|
|
|
NextAvailableOffset =
|
2021-12-14 16:52:51 -08:00
|
|
|
appendPadding(OS, NextAvailableOffset, Section.sh_addralign);
|
2016-03-03 10:13:11 -08:00
|
|
|
|
2016-05-16 17:02:17 -07:00
|
|
|
// New section size.
|
2016-03-11 11:30:30 -08:00
|
|
|
uint64_t Size = 0;
|
2021-04-01 11:43:00 -07:00
|
|
|
bool DataWritten = false;
|
2016-11-11 14:33:34 -08:00
|
|
|
// Copy over section contents unless it's one of the sections we overwrite.
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
if (!willOverwriteSection(SectionName)) {
|
2016-03-11 11:30:30 -08:00
|
|
|
Size = Section.sh_size;
|
2021-11-15 17:19:24 -08:00
|
|
|
StringRef Dataref = InputFile->getData().substr(Section.sh_offset, Size);
|
|
|
|
|
std::string Data;
|
2022-09-22 12:05:12 -07:00
|
|
|
if (BSec->getPatcher()) {
|
2021-11-15 17:19:24 -08:00
|
|
|
Data = BSec->getPatcher()->patchBinary(Dataref);
|
|
|
|
|
Dataref = StringRef(Data);
|
|
|
|
|
}
|
2017-04-06 10:49:59 -07:00
|
|
|
|
2021-04-01 11:43:00 -07:00
|
|
|
// Section was expanded, so need to treat it as overwrite.
|
2021-11-15 17:19:24 -08:00
|
|
|
if (Size != Dataref.size()) {
|
2022-09-22 12:05:12 -07:00
|
|
|
BSec = &BC->registerOrUpdateNoteSection(
|
2021-11-15 17:19:24 -08:00
|
|
|
SectionName, copyByteArray(Dataref), Dataref.size());
|
2021-04-01 11:43:00 -07:00
|
|
|
Size = 0;
|
|
|
|
|
} else {
|
2021-11-15 17:19:24 -08:00
|
|
|
OS << Dataref;
|
2021-04-01 11:43:00 -07:00
|
|
|
DataWritten = true;
|
|
|
|
|
|
|
|
|
|
// Add padding as the section extension might rely on the alignment.
|
|
|
|
|
Size = appendPadding(OS, Size, Section.sh_addralign);
|
|
|
|
|
}
|
2016-03-11 11:30:30 -08:00
|
|
|
}
|
2016-03-03 10:13:11 -08:00
|
|
|
|
2016-03-09 16:06:41 -08:00
|
|
|
// Perform section post-processing.
|
2022-09-22 12:05:12 -07:00
|
|
|
assert(BSec->getAlignment() <= Section.sh_addralign &&
|
|
|
|
|
"alignment exceeds value in file");
|
|
|
|
|
|
|
|
|
|
if (BSec->getAllocAddress()) {
|
|
|
|
|
assert(!DataWritten && "Writing section twice.");
|
|
|
|
|
(void)DataWritten;
|
2024-11-19 12:59:05 -08:00
|
|
|
Size += BSec->write(OS);
|
2016-03-03 10:13:11 -08:00
|
|
|
}
|
|
|
|
|
|
2022-09-22 12:05:12 -07:00
|
|
|
BSec->setOutputFileOffset(NextAvailableOffset);
|
|
|
|
|
BSec->flushPendingRelocations(OS, [this](const MCSymbol *S) {
|
|
|
|
|
return getNewValueForSymbol(S->getName());
|
|
|
|
|
});
|
|
|
|
|
|
2024-02-21 11:54:34 -08:00
|
|
|
// Section contents are no longer needed, but we need to update the size so
|
|
|
|
|
// that it will be reflected in the section header table.
|
|
|
|
|
BSec->updateContents(nullptr, Size);
|
2016-03-03 10:13:11 -08:00
|
|
|
|
|
|
|
|
NextAvailableOffset += Size;
|
|
|
|
|
}
|
2017-05-16 17:29:31 -07:00
|
|
|
|
|
|
|
|
// Write new note sections.
|
2021-04-08 00:19:26 -07:00
|
|
|
for (BinarySection &Section : BC->nonAllocatableSections()) {
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
if (Section.getOutputFileOffset() || !Section.getAllocAddress())
|
2017-05-16 17:29:31 -07:00
|
|
|
continue;
|
|
|
|
|
|
2018-02-01 16:33:43 -08:00
|
|
|
assert(!Section.hasPendingRelocations() && "cannot have pending relocs");
|
2017-05-16 17:29:31 -07:00
|
|
|
|
2021-12-14 16:52:51 -08:00
|
|
|
NextAvailableOffset =
|
|
|
|
|
appendPadding(OS, NextAvailableOffset, Section.getAlignment());
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
Section.setOutputFileOffset(NextAvailableOffset);
|
2017-05-16 17:29:31 -07:00
|
|
|
|
2020-12-01 16:29:39 -08:00
|
|
|
LLVM_DEBUG(
|
|
|
|
|
dbgs() << "BOLT-DEBUG: writing out new section " << Section.getName()
|
|
|
|
|
<< " of size " << Section.getOutputSize() << " at offset 0x"
|
|
|
|
|
<< Twine::utohexstr(Section.getOutputFileOffset()) << '\n');
|
2017-05-16 17:29:31 -07:00
|
|
|
|
2024-11-19 12:59:05 -08:00
|
|
|
NextAvailableOffset += Section.write(OS);
|
2017-05-16 17:29:31 -07:00
|
|
|
}
|
2016-03-03 10:13:11 -08:00
|
|
|
}
|
|
|
|
|
|
2017-02-07 12:20:46 -08:00
|
|
|
template <typename ELFT>
|
2017-05-16 17:29:31 -07:00
|
|
|
void RewriteInstance::finalizeSectionStringTable(ELFObjectFile<ELFT> *File) {
|
2017-02-07 12:20:46 -08:00
|
|
|
// Pre-populate section header string table.
|
2021-12-23 12:38:33 -08:00
|
|
|
for (const BinarySection &Section : BC->sections())
|
2022-09-22 12:05:12 -07:00
|
|
|
if (!Section.isAnonymous())
|
|
|
|
|
SHStrTab.add(Section.getOutputName());
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
SHStrTab.finalize();
|
2017-02-07 12:20:46 -08:00
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
const size_t SHStrTabSize = SHStrTab.getSize();
|
2017-05-16 17:29:31 -07:00
|
|
|
uint8_t *DataCopy = new uint8_t[SHStrTabSize];
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
memset(DataCopy, 0, SHStrTabSize);
|
|
|
|
|
SHStrTab.write(DataCopy);
|
2018-02-01 16:33:43 -08:00
|
|
|
BC->registerOrUpdateNoteSection(".shstrtab",
|
|
|
|
|
DataCopy,
|
|
|
|
|
SHStrTabSize,
|
|
|
|
|
/*Alignment=*/1,
|
|
|
|
|
/*IsReadOnly=*/true,
|
|
|
|
|
ELF::SHT_STRTAB);
|
2017-02-07 12:20:46 -08:00
|
|
|
}
|
|
|
|
|
|
2018-08-08 17:55:24 -07:00
|
|
|
void RewriteInstance::addBoltInfoSection() {
|
|
|
|
|
std::string DescStr;
|
|
|
|
|
raw_string_ostream DescOS(DescStr);
|
2017-05-24 14:14:16 -07:00
|
|
|
|
2018-08-08 17:55:24 -07:00
|
|
|
DescOS << "BOLT revision: " << BoltRevision << ", "
|
|
|
|
|
<< "command line:";
|
2021-12-23 12:38:33 -08:00
|
|
|
for (int I = 0; I < Argc; ++I)
|
2018-08-08 17:55:24 -07:00
|
|
|
DescOS << " " << Argv[I];
|
|
|
|
|
|
2019-08-02 11:20:13 -07:00
|
|
|
// Encode as GNU GOLD VERSION so it is easily printable by 'readelf -n'
|
2021-04-08 00:19:26 -07:00
|
|
|
const std::string BoltInfo =
|
2019-08-02 11:20:13 -07:00
|
|
|
BinarySection::encodeELFNote("GNU", DescStr, 4 /*NT_GNU_GOLD_VERSION*/);
|
2018-08-08 17:55:24 -07:00
|
|
|
BC->registerOrUpdateNoteSection(".note.bolt_info", copyByteArray(BoltInfo),
|
|
|
|
|
BoltInfo.size(),
|
|
|
|
|
/*Alignment=*/1,
|
|
|
|
|
/*IsReadOnly=*/true, ELF::SHT_NOTE);
|
2017-05-24 14:14:16 -07:00
|
|
|
}
|
|
|
|
|
|
2019-04-12 17:33:46 -07:00
|
|
|
void RewriteInstance::addBATSection() {
|
|
|
|
|
BC->registerOrUpdateNoteSection(BoltAddressTranslation::SECTION_NAME, nullptr,
|
|
|
|
|
0,
|
|
|
|
|
/*Alignment=*/1,
|
|
|
|
|
/*IsReadOnly=*/true, ELF::SHT_NOTE);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void RewriteInstance::encodeBATSection() {
|
|
|
|
|
std::string DescStr;
|
|
|
|
|
raw_string_ostream DescOS(DescStr);
|
|
|
|
|
|
2022-07-06 15:53:27 -07:00
|
|
|
BAT->write(*BC, DescOS);
|
2019-04-12 17:33:46 -07:00
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
const std::string BoltInfo =
|
2019-08-02 11:20:13 -07:00
|
|
|
BinarySection::encodeELFNote("BOLT", DescStr, BinarySection::NT_BOLT_BAT);
|
2019-04-12 17:33:46 -07:00
|
|
|
BC->registerOrUpdateNoteSection(BoltAddressTranslation::SECTION_NAME,
|
|
|
|
|
copyByteArray(BoltInfo), BoltInfo.size(),
|
|
|
|
|
/*Alignment=*/1,
|
|
|
|
|
/*IsReadOnly=*/true, ELF::SHT_NOTE);
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->outs() << "BOLT-INFO: BAT section size (bytes): " << BoltInfo.size()
|
|
|
|
|
<< '\n';
|
2019-04-12 17:33:46 -07:00
|
|
|
}
|
|
|
|
|
|
2019-10-29 14:49:49 -07:00
|
|
|
template <typename ELFShdrTy>
|
|
|
|
|
bool RewriteInstance::shouldStrip(const ELFShdrTy &Section,
|
|
|
|
|
StringRef SectionName) {
|
|
|
|
|
// Strip non-allocatable relocation sections.
|
|
|
|
|
if (!(Section.sh_flags & ELF::SHF_ALLOC) && Section.sh_type == ELF::SHT_RELA)
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
|
|
// Strip debug sections if not updating them.
|
|
|
|
|
if (isDebugSection(SectionName) && !opts::UpdateDebugSections)
|
|
|
|
|
return true;
|
|
|
|
|
|
2021-10-16 17:02:45 +03:00
|
|
|
// Strip symtab section if needed
|
|
|
|
|
if (opts::RemoveSymtab && Section.sh_type == ELF::SHT_SYMTAB)
|
|
|
|
|
return true;
|
|
|
|
|
|
2019-10-29 14:49:49 -07:00
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
[BOLT][NFC] Remove redundant dependent template type
Summary:
Reduce code size by removing redundant dependent template type
from RewriteInstance methods.
Code size savings (via bloaty on llvm-bolt Debug build):
```
symbol,vmsize,filesize -> vmsize,filesize (delta vmsize,filesize)
updateELFSymbolTable 57096,59600 -> 56656,59048 (440,552)
updateELFSymbolTable::lambda 35957,55277 -> 35949,54485 (8,792)
getOutputSections 20592,21440 -> 20372,21156 (220,284)
getOutputSections::lambda 1792,5300 -> 1792,5372 (0,-72)
total delta (668,1556)
```
Reviewed By: maksfb
FBD33589393
2022-01-14 12:57:37 -08:00
|
|
|
template <typename ELFT>
|
|
|
|
|
std::vector<typename object::ELFObjectFile<ELFT>::Elf_Shdr>
|
2021-12-14 16:52:51 -08:00
|
|
|
RewriteInstance::getOutputSections(ELFObjectFile<ELFT> *File,
|
|
|
|
|
std::vector<uint32_t> &NewSectionIndex) {
|
[BOLT][NFC] Remove redundant dependent template type
Summary:
Reduce code size by removing redundant dependent template type
from RewriteInstance methods.
Code size savings (via bloaty on llvm-bolt Debug build):
```
symbol,vmsize,filesize -> vmsize,filesize (delta vmsize,filesize)
updateELFSymbolTable 57096,59600 -> 56656,59048 (440,552)
updateELFSymbolTable::lambda 35957,55277 -> 35949,54485 (8,792)
getOutputSections 20592,21440 -> 20372,21156 (220,284)
getOutputSections::lambda 1792,5300 -> 1792,5372 (0,-72)
total delta (668,1556)
```
Reviewed By: maksfb
FBD33589393
2022-01-14 12:57:37 -08:00
|
|
|
using ELFShdrTy = typename ELFObjectFile<ELFT>::Elf_Shdr;
|
2021-04-08 00:19:26 -07:00
|
|
|
const ELFFile<ELFT> &Obj = File->getELFFile();
|
|
|
|
|
typename ELFT::ShdrRange Sections = cantFail(Obj.sections());
|
2016-02-12 19:01:53 -08:00
|
|
|
|
2022-09-22 12:05:12 -07:00
|
|
|
// Keep track of section header entries attached to the corresponding section.
|
|
|
|
|
std::vector<std::pair<BinarySection *, ELFShdrTy>> OutputSections;
|
2024-02-01 12:08:41 -08:00
|
|
|
auto addSection = [&](const ELFShdrTy &Section, BinarySection &BinSec) {
|
2021-04-08 00:19:26 -07:00
|
|
|
ELFShdrTy NewSection = Section;
|
2024-02-01 12:08:41 -08:00
|
|
|
NewSection.sh_name = SHStrTab.getOffset(BinSec.getOutputName());
|
|
|
|
|
OutputSections.emplace_back(&BinSec, std::move(NewSection));
|
2019-03-14 18:51:05 -07:00
|
|
|
};
|
2016-09-16 15:54:32 -07:00
|
|
|
|
2019-03-14 18:51:05 -07:00
|
|
|
// Copy over entries for original allocatable sections using modified name.
|
2021-04-08 00:19:26 -07:00
|
|
|
for (const ELFShdrTy &Section : Sections) {
|
2016-02-12 19:01:53 -08:00
|
|
|
// Always ignore this section.
|
|
|
|
|
if (Section.sh_type == ELF::SHT_NULL) {
|
2022-09-22 12:05:12 -07:00
|
|
|
OutputSections.emplace_back(nullptr, Section);
|
2016-02-12 19:01:53 -08:00
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
2016-03-03 10:13:11 -08:00
|
|
|
if (!(Section.sh_flags & ELF::SHF_ALLOC))
|
2016-09-27 19:09:38 -07:00
|
|
|
continue;
|
2016-02-12 19:01:53 -08:00
|
|
|
|
2022-09-22 12:05:12 -07:00
|
|
|
SectionRef SecRef = File->toSectionRef(&Section);
|
|
|
|
|
BinarySection *BinSec = BC->getSectionForSectionRef(SecRef);
|
|
|
|
|
assert(BinSec && "Matching BinarySection should exist.");
|
|
|
|
|
|
2024-11-19 12:59:05 -08:00
|
|
|
// Exclude anonymous sections.
|
|
|
|
|
if (BinSec->isAnonymous())
|
|
|
|
|
continue;
|
|
|
|
|
|
2024-02-01 12:08:41 -08:00
|
|
|
addSection(Section, *BinSec);
|
2016-03-03 10:13:11 -08:00
|
|
|
}
|
2016-02-12 19:01:53 -08:00
|
|
|
|
2022-09-22 12:05:12 -07:00
|
|
|
for (BinarySection &Section : BC->allocatableSections()) {
|
2017-11-14 20:05:11 -08:00
|
|
|
if (!Section.isFinalized())
|
2018-02-01 16:33:43 -08:00
|
|
|
continue;
|
|
|
|
|
|
2022-09-22 12:05:12 -07:00
|
|
|
if (Section.hasSectionRef() || Section.isAnonymous()) {
|
2016-09-27 19:09:38 -07:00
|
|
|
if (opts::Verbosity)
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->outs() << "BOLT-INFO: not writing section header for section "
|
|
|
|
|
<< Section.getOutputName() << '\n';
|
2016-03-03 10:13:11 -08:00
|
|
|
continue;
|
2016-09-02 14:15:29 -07:00
|
|
|
}
|
2017-06-27 16:25:59 -07:00
|
|
|
|
2016-09-27 19:09:38 -07:00
|
|
|
if (opts::Verbosity >= 1)
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->outs() << "BOLT-INFO: writing section header for "
|
|
|
|
|
<< Section.getOutputName() << '\n';
|
2017-06-27 16:25:59 -07:00
|
|
|
ELFShdrTy NewSection;
|
2016-03-03 10:13:11 -08:00
|
|
|
NewSection.sh_type = ELF::SHT_PROGBITS;
|
2019-03-14 18:51:05 -07:00
|
|
|
NewSection.sh_addr = Section.getOutputAddress();
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
NewSection.sh_offset = Section.getOutputFileOffset();
|
2019-03-14 18:51:05 -07:00
|
|
|
NewSection.sh_size = Section.getOutputSize();
|
2016-03-03 10:13:11 -08:00
|
|
|
NewSection.sh_entsize = 0;
|
2019-03-14 18:51:05 -07:00
|
|
|
NewSection.sh_flags = Section.getELFFlags();
|
2016-03-03 10:13:11 -08:00
|
|
|
NewSection.sh_link = 0;
|
|
|
|
|
NewSection.sh_info = 0;
|
2019-03-14 18:51:05 -07:00
|
|
|
NewSection.sh_addralign = Section.getAlignment();
|
2024-02-01 12:08:41 -08:00
|
|
|
addSection(NewSection, Section);
|
2019-03-14 18:51:05 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Sort all allocatable sections by their offset.
|
2022-09-22 12:05:12 -07:00
|
|
|
llvm::stable_sort(OutputSections, [](const auto &A, const auto &B) {
|
|
|
|
|
return A.second.sh_offset < B.second.sh_offset;
|
|
|
|
|
});
|
2019-03-14 18:51:05 -07:00
|
|
|
|
|
|
|
|
// Fix section sizes to prevent overlapping.
|
2021-03-04 16:31:12 -08:00
|
|
|
ELFShdrTy *PrevSection = nullptr;
|
2022-09-22 12:05:12 -07:00
|
|
|
BinarySection *PrevBinSec = nullptr;
|
2021-03-04 16:31:12 -08:00
|
|
|
for (auto &SectionKV : OutputSections) {
|
|
|
|
|
ELFShdrTy &Section = SectionKV.second;
|
|
|
|
|
|
2024-04-12 08:34:43 -04:00
|
|
|
// Ignore NOBITS sections as they don't take any space in the file.
|
2024-02-01 12:08:41 -08:00
|
|
|
if (Section.sh_type == ELF::SHT_NOBITS)
|
2019-03-14 18:51:05 -07:00
|
|
|
continue;
|
|
|
|
|
|
2024-02-01 12:08:41 -08:00
|
|
|
// Note that address continuity is not guaranteed as sections could be
|
|
|
|
|
// placed in different loadable segments.
|
2021-03-04 16:31:12 -08:00
|
|
|
if (PrevSection &&
|
2024-02-01 12:08:41 -08:00
|
|
|
PrevSection->sh_offset + PrevSection->sh_size > Section.sh_offset) {
|
2024-04-12 08:34:43 -04:00
|
|
|
if (opts::Verbosity > 1)
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->outs() << "BOLT-INFO: adjusting size for section "
|
|
|
|
|
<< PrevBinSec->getOutputName() << '\n';
|
2024-02-01 12:08:41 -08:00
|
|
|
PrevSection->sh_size = Section.sh_offset - PrevSection->sh_offset;
|
2019-03-14 18:51:05 -07:00
|
|
|
}
|
2021-03-04 16:31:12 -08:00
|
|
|
|
|
|
|
|
PrevSection = &Section;
|
2022-09-22 12:05:12 -07:00
|
|
|
PrevBinSec = SectionKV.first;
|
2016-03-03 10:13:11 -08:00
|
|
|
}
|
2016-02-12 19:01:53 -08:00
|
|
|
|
2017-05-16 17:29:31 -07:00
|
|
|
uint64_t LastFileOffset = 0;
|
2016-02-12 19:01:53 -08:00
|
|
|
|
2016-03-03 10:13:11 -08:00
|
|
|
// Copy over entries for non-allocatable sections performing necessary
|
2016-09-27 19:09:38 -07:00
|
|
|
// adjustments.
|
2021-04-08 00:19:26 -07:00
|
|
|
for (const ELFShdrTy &Section : Sections) {
|
2016-03-03 10:13:11 -08:00
|
|
|
if (Section.sh_type == ELF::SHT_NULL)
|
|
|
|
|
continue;
|
|
|
|
|
if (Section.sh_flags & ELF::SHF_ALLOC)
|
|
|
|
|
continue;
|
2016-09-27 19:09:38 -07:00
|
|
|
|
2018-04-20 20:03:31 -07:00
|
|
|
StringRef SectionName =
|
2020-12-01 16:29:39 -08:00
|
|
|
cantFail(Obj.getSectionName(Section), "cannot get section name");
|
2018-04-20 20:03:31 -07:00
|
|
|
|
2019-10-29 14:49:49 -07:00
|
|
|
if (shouldStrip(Section, SectionName))
|
2019-04-26 15:30:12 -07:00
|
|
|
continue;
|
|
|
|
|
|
2022-09-22 12:05:12 -07:00
|
|
|
SectionRef SecRef = File->toSectionRef(&Section);
|
|
|
|
|
BinarySection *BinSec = BC->getSectionForSectionRef(SecRef);
|
|
|
|
|
assert(BinSec && "Matching BinarySection should exist.");
|
2016-02-12 19:01:53 -08:00
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
ELFShdrTy NewSection = Section;
|
2022-09-22 12:05:12 -07:00
|
|
|
NewSection.sh_offset = BinSec->getOutputFileOffset();
|
|
|
|
|
NewSection.sh_size = BinSec->getOutputSize();
|
2016-02-12 19:01:53 -08:00
|
|
|
|
2021-12-23 12:38:33 -08:00
|
|
|
if (NewSection.sh_type == ELF::SHT_SYMTAB)
|
2018-10-22 18:48:12 -07:00
|
|
|
NewSection.sh_info = NumLocalSymbols;
|
|
|
|
|
|
2024-02-01 12:08:41 -08:00
|
|
|
addSection(NewSection, *BinSec);
|
2017-05-16 17:29:31 -07:00
|
|
|
|
2022-09-22 12:05:12 -07:00
|
|
|
LastFileOffset = BinSec->getOutputFileOffset();
|
2016-02-12 19:01:53 -08:00
|
|
|
}
|
|
|
|
|
|
2017-05-16 17:29:31 -07:00
|
|
|
// Create entries for new non-allocatable sections.
|
2021-04-08 00:19:26 -07:00
|
|
|
for (BinarySection &Section : BC->nonAllocatableSections()) {
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
if (Section.getOutputFileOffset() <= LastFileOffset)
|
2017-05-16 17:29:31 -07:00
|
|
|
continue;
|
2017-02-07 12:20:46 -08:00
|
|
|
|
2021-12-23 12:38:33 -08:00
|
|
|
if (opts::Verbosity >= 1)
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->outs() << "BOLT-INFO: writing section header for "
|
|
|
|
|
<< Section.getOutputName() << '\n';
|
2021-12-23 12:38:33 -08:00
|
|
|
|
2017-06-27 16:25:59 -07:00
|
|
|
ELFShdrTy NewSection;
|
2018-02-01 16:33:43 -08:00
|
|
|
NewSection.sh_type = Section.getELFType();
|
2017-05-16 17:29:31 -07:00
|
|
|
NewSection.sh_addr = 0;
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
NewSection.sh_offset = Section.getOutputFileOffset();
|
2018-02-01 16:33:43 -08:00
|
|
|
NewSection.sh_size = Section.getOutputSize();
|
2017-05-16 17:29:31 -07:00
|
|
|
NewSection.sh_entsize = 0;
|
2018-02-01 16:33:43 -08:00
|
|
|
NewSection.sh_flags = Section.getELFFlags();
|
2017-05-16 17:29:31 -07:00
|
|
|
NewSection.sh_link = 0;
|
|
|
|
|
NewSection.sh_info = 0;
|
2018-02-01 16:33:43 -08:00
|
|
|
NewSection.sh_addralign = Section.getAlignment();
|
2019-03-14 18:51:05 -07:00
|
|
|
|
2024-02-01 12:08:41 -08:00
|
|
|
addSection(NewSection, Section);
|
2019-03-14 18:51:05 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Assign indices to sections.
|
2022-09-22 12:05:12 -07:00
|
|
|
for (uint32_t Index = 1; Index < OutputSections.size(); ++Index)
|
|
|
|
|
OutputSections[Index].first->setIndex(Index);
|
2019-03-14 18:51:05 -07:00
|
|
|
|
|
|
|
|
// Update section index mapping
|
|
|
|
|
NewSectionIndex.clear();
|
|
|
|
|
NewSectionIndex.resize(Sections.size(), 0);
|
2021-04-08 00:19:26 -07:00
|
|
|
for (const ELFShdrTy &Section : Sections) {
|
2019-03-14 18:51:05 -07:00
|
|
|
if (Section.sh_type == ELF::SHT_NULL)
|
|
|
|
|
continue;
|
|
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
size_t OrgIndex = std::distance(Sections.begin(), &Section);
|
2022-09-22 12:05:12 -07:00
|
|
|
|
|
|
|
|
SectionRef SecRef = File->toSectionRef(&Section);
|
|
|
|
|
BinarySection *BinSec = BC->getSectionForSectionRef(SecRef);
|
|
|
|
|
assert(BinSec && "BinarySection should exist for an input section.");
|
2019-03-14 18:51:05 -07:00
|
|
|
|
|
|
|
|
// Some sections are stripped
|
2022-09-22 12:05:12 -07:00
|
|
|
if (!BinSec->hasValidIndex())
|
2019-03-14 18:51:05 -07:00
|
|
|
continue;
|
|
|
|
|
|
2022-09-22 12:05:12 -07:00
|
|
|
NewSectionIndex[OrgIndex] = BinSec->getIndex();
|
2017-05-16 17:29:31 -07:00
|
|
|
}
|
|
|
|
|
|
2019-03-14 18:51:05 -07:00
|
|
|
std::vector<ELFShdrTy> SectionsOnly(OutputSections.size());
|
2023-02-02 12:02:02 -08:00
|
|
|
llvm::copy(llvm::make_second_range(OutputSections), SectionsOnly.begin());
|
2019-03-14 18:51:05 -07:00
|
|
|
|
|
|
|
|
return SectionsOnly;
|
2017-06-27 16:25:59 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Rewrite section header table inserting new entries as needed. The sections
|
|
|
|
|
// header table size itself may affect the offsets of other sections,
|
|
|
|
|
// so we are placing it at the end of the binary.
|
|
|
|
|
//
|
|
|
|
|
// As we rewrite entries we need to track how many sections were inserted
|
|
|
|
|
// as it changes the sh_link value. We map old indices to new ones for
|
|
|
|
|
// existing sections.
|
|
|
|
|
template <typename ELFT>
|
|
|
|
|
void RewriteInstance::patchELFSectionHeaderTable(ELFObjectFile<ELFT> *File) {
|
2020-02-26 20:43:18 -08:00
|
|
|
using ELFShdrTy = typename ELFObjectFile<ELFT>::Elf_Shdr;
|
2021-04-08 00:19:26 -07:00
|
|
|
using ELFEhdrTy = typename ELFObjectFile<ELFT>::Elf_Ehdr;
|
|
|
|
|
raw_fd_ostream &OS = Out->os();
|
|
|
|
|
const ELFFile<ELFT> &Obj = File->getELFFile();
|
2017-06-27 16:25:59 -07:00
|
|
|
|
2024-04-11 14:44:11 -04:00
|
|
|
// Mapping from old section indices to new ones
|
2019-03-14 18:51:05 -07:00
|
|
|
std::vector<uint32_t> NewSectionIndex;
|
2021-04-08 00:19:26 -07:00
|
|
|
std::vector<ELFShdrTy> OutputSections =
|
|
|
|
|
getOutputSections(File, NewSectionIndex);
|
2020-12-01 16:29:39 -08:00
|
|
|
LLVM_DEBUG(
|
2017-06-07 20:06:29 -07:00
|
|
|
dbgs() << "BOLT-DEBUG: old to new section index mapping:\n";
|
2021-12-23 12:38:33 -08:00
|
|
|
for (uint64_t I = 0; I < NewSectionIndex.size(); ++I)
|
2017-06-07 20:06:29 -07:00
|
|
|
dbgs() << " " << I << " -> " << NewSectionIndex[I] << '\n';
|
|
|
|
|
);
|
|
|
|
|
|
2023-04-18 18:13:16 -04:00
|
|
|
// Align starting address for section header table. There's no architecutal
|
|
|
|
|
// need to align this, it is just for pleasant human readability.
|
2021-04-08 00:19:26 -07:00
|
|
|
uint64_t SHTOffset = OS.tell();
|
2023-04-18 18:13:16 -04:00
|
|
|
SHTOffset = appendPadding(OS, SHTOffset, 16);
|
2017-06-07 20:06:29 -07:00
|
|
|
|
|
|
|
|
// Write all section header entries while patching section references.
|
2021-04-08 00:19:26 -07:00
|
|
|
for (ELFShdrTy &Section : OutputSections) {
|
2017-06-07 20:06:29 -07:00
|
|
|
Section.sh_link = NewSectionIndex[Section.sh_link];
|
2024-04-11 14:44:11 -04:00
|
|
|
if (Section.sh_type == ELF::SHT_REL || Section.sh_type == ELF::SHT_RELA)
|
|
|
|
|
Section.sh_info = NewSectionIndex[Section.sh_info];
|
2017-06-07 20:06:29 -07:00
|
|
|
OS.write(reinterpret_cast<const char *>(&Section), sizeof(Section));
|
2017-05-16 17:29:31 -07:00
|
|
|
}
|
2017-02-22 11:29:52 -08:00
|
|
|
|
2016-02-12 19:01:53 -08:00
|
|
|
// Fix ELF header.
|
2021-04-08 00:19:26 -07:00
|
|
|
ELFEhdrTy NewEhdr = Obj.getHeader();
|
2017-05-08 22:51:36 -07:00
|
|
|
|
2017-12-09 21:40:39 -08:00
|
|
|
if (BC->HasRelocations) {
|
2021-12-23 12:38:33 -08:00
|
|
|
if (RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary())
|
Refactor runtime library
Summary:
As we are adding more types of runtime libraries, it would be better to move the runtime library out of RewriteInstance so that it could grow separately. This also requires splitting the current implementation of Instrumentation.cpp to two separate pieces, one as normal Pass, one as the runtime library. The Instrumentation Pass would pass over the generated data to the runtime library, which will use to emit binary and perform linking.
This patch does the following:
1. Turn Instrumentation class into an optimization pass. Register the pass in the pass manager instead of in RewriteInstance.
2. Split all the data that are generated by Instrumentation that's needed by runtime library into a separate data structure called InstrumentationSummary. At the creation of Instrumentation pass, we create an instance of such data structure, which will be moved over to the runtime at the end of the pass.
3. Added a runtime library member to BinaryContext. Set the member at the end of Instrumentation pass.
4. In BinaryEmitter, make BinaryContext to also emit runtime library binary.
5. Created a base class RuntimeLibrary, that defines the interface of a runtime library, along with a few common helper functions.
6. Created InstrumentationRuntimeLibrary which inherits from RuntimeLibrary, that does all the work (mostly copied over) for emit and linking.
7. Added a new directory called RuntimeLibs, and put all the runtime library related files into it.
(cherry picked from FBD21694762)
2020-05-21 14:28:47 -07:00
|
|
|
NewEhdr.e_entry = RtLibrary->getRuntimeStartAddress();
|
2021-12-23 12:38:33 -08:00
|
|
|
else
|
Refactor runtime library
Summary:
As we are adding more types of runtime libraries, it would be better to move the runtime library out of RewriteInstance so that it could grow separately. This also requires splitting the current implementation of Instrumentation.cpp to two separate pieces, one as normal Pass, one as the runtime library. The Instrumentation Pass would pass over the generated data to the runtime library, which will use to emit binary and perform linking.
This patch does the following:
1. Turn Instrumentation class into an optimization pass. Register the pass in the pass manager instead of in RewriteInstance.
2. Split all the data that are generated by Instrumentation that's needed by runtime library into a separate data structure called InstrumentationSummary. At the creation of Instrumentation pass, we create an instance of such data structure, which will be moved over to the runtime at the end of the pass.
3. Added a runtime library member to BinaryContext. Set the member at the end of Instrumentation pass.
4. In BinaryEmitter, make BinaryContext to also emit runtime library binary.
5. Created a base class RuntimeLibrary, that defines the interface of a runtime library, along with a few common helper functions.
6. Created InstrumentationRuntimeLibrary which inherits from RuntimeLibrary, that does all the work (mostly copied over) for emit and linking.
7. Added a new directory called RuntimeLibs, and put all the runtime library related files into it.
(cherry picked from FBD21694762)
2020-05-21 14:28:47 -07:00
|
|
|
NewEhdr.e_entry = getNewFunctionAddress(NewEhdr.e_entry);
|
2020-12-01 16:29:39 -08:00
|
|
|
assert((NewEhdr.e_entry || !Obj.getHeader().e_entry) &&
|
2020-06-23 12:22:58 -07:00
|
|
|
"cannot find new address for entry point");
|
2017-05-08 22:51:36 -07:00
|
|
|
}
|
2024-02-01 12:11:26 -08:00
|
|
|
if (PHDRTableOffset) {
|
|
|
|
|
NewEhdr.e_phoff = PHDRTableOffset;
|
|
|
|
|
NewEhdr.e_phnum = Phnum;
|
|
|
|
|
}
|
2016-03-03 10:13:11 -08:00
|
|
|
NewEhdr.e_shoff = SHTOffset;
|
2017-06-27 16:25:59 -07:00
|
|
|
NewEhdr.e_shnum = OutputSections.size();
|
2017-06-07 20:06:29 -07:00
|
|
|
NewEhdr.e_shstrndx = NewSectionIndex[NewEhdr.e_shstrndx];
|
2016-02-12 19:01:53 -08:00
|
|
|
OS.pwrite(reinterpret_cast<const char *>(&NewEhdr), sizeof(NewEhdr), 0);
|
2016-09-27 19:09:38 -07:00
|
|
|
}
|
|
|
|
|
|
[BOLT][NFC] Remove redundant dependent template type
Summary:
Reduce code size by removing redundant dependent template type
from RewriteInstance methods.
Code size savings (via bloaty on llvm-bolt Debug build):
```
symbol,vmsize,filesize -> vmsize,filesize (delta vmsize,filesize)
updateELFSymbolTable 57096,59600 -> 56656,59048 (440,552)
updateELFSymbolTable::lambda 35957,55277 -> 35949,54485 (8,792)
getOutputSections 20592,21440 -> 20372,21156 (220,284)
getOutputSections::lambda 1792,5300 -> 1792,5372 (0,-72)
total delta (668,1556)
```
Reviewed By: maksfb
FBD33589393
2022-01-14 12:57:37 -08:00
|
|
|
template <typename ELFT, typename WriteFuncTy, typename StrTabFuncTy>
|
2020-02-26 20:43:18 -08:00
|
|
|
void RewriteInstance::updateELFSymbolTable(
|
[BOLT][NFC] Remove redundant dependent template type
Summary:
Reduce code size by removing redundant dependent template type
from RewriteInstance methods.
Code size savings (via bloaty on llvm-bolt Debug build):
```
symbol,vmsize,filesize -> vmsize,filesize (delta vmsize,filesize)
updateELFSymbolTable 57096,59600 -> 56656,59048 (440,552)
updateELFSymbolTable::lambda 35957,55277 -> 35949,54485 (8,792)
getOutputSections 20592,21440 -> 20372,21156 (220,284)
getOutputSections::lambda 1792,5300 -> 1792,5372 (0,-72)
total delta (668,1556)
```
Reviewed By: maksfb
FBD33589393
2022-01-14 12:57:37 -08:00
|
|
|
ELFObjectFile<ELFT> *File, bool IsDynSym,
|
|
|
|
|
const typename object::ELFObjectFile<ELFT>::Elf_Shdr &SymTabSection,
|
|
|
|
|
const std::vector<uint32_t> &NewSectionIndex, WriteFuncTy Write,
|
2020-02-26 20:43:18 -08:00
|
|
|
StrTabFuncTy AddToStrTab) {
|
2021-04-08 00:19:26 -07:00
|
|
|
const ELFFile<ELFT> &Obj = File->getELFFile();
|
2021-12-14 16:52:51 -08:00
|
|
|
using ELFSymTy = typename ELFObjectFile<ELFT>::Elf_Sym;
|
2016-09-27 19:09:38 -07:00
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
StringRef StringSection =
|
|
|
|
|
cantFail(Obj.getStringTableForSymtab(SymTabSection));
|
2016-09-27 19:09:38 -07:00
|
|
|
|
2020-02-26 20:43:18 -08:00
|
|
|
unsigned NumHotTextSymsUpdated = 0;
|
|
|
|
|
unsigned NumHotDataSymsUpdated = 0;
|
2018-04-20 20:03:31 -07:00
|
|
|
|
2020-02-26 20:43:18 -08:00
|
|
|
std::map<const BinaryFunction *, uint64_t> IslandSizes;
|
|
|
|
|
auto getConstantIslandSize = [&IslandSizes](const BinaryFunction &BF) {
|
|
|
|
|
auto Itr = IslandSizes.find(&BF);
|
|
|
|
|
if (Itr != IslandSizes.end())
|
|
|
|
|
return Itr->second;
|
|
|
|
|
return IslandSizes[&BF] = BF.estimateConstantIslandSize();
|
|
|
|
|
};
|
2017-11-14 20:05:11 -08:00
|
|
|
|
2020-02-26 20:43:18 -08:00
|
|
|
// Symbols for the new symbol table.
|
|
|
|
|
std::vector<ELFSymTy> Symbols;
|
|
|
|
|
|
2024-04-25 04:53:15 +02:00
|
|
|
bool EmittedColdFileSymbol = false;
|
|
|
|
|
|
2020-10-22 16:35:29 -07:00
|
|
|
auto getNewSectionIndex = [&](uint32_t OldIndex) {
|
2022-07-22 11:23:38 -07:00
|
|
|
// For dynamic symbol table, the section index could be wrong on the input,
|
|
|
|
|
// and its value is ignored by the runtime if it's different from
|
|
|
|
|
// SHN_UNDEF and SHN_ABS.
|
|
|
|
|
// However, we still need to update dynamic symbol table, so return a
|
|
|
|
|
// section index, even though the index is broken.
|
|
|
|
|
if (IsDynSym && OldIndex >= NewSectionIndex.size())
|
|
|
|
|
return OldIndex;
|
|
|
|
|
|
2020-10-22 16:35:29 -07:00
|
|
|
assert(OldIndex < NewSectionIndex.size() && "section index out of bounds");
|
|
|
|
|
const uint32_t NewIndex = NewSectionIndex[OldIndex];
|
|
|
|
|
|
|
|
|
|
// We may have stripped the section that dynsym was referencing due to
|
|
|
|
|
// the linker bug. In that case return the old index avoiding marking
|
|
|
|
|
// the symbol as undefined.
|
|
|
|
|
if (IsDynSym && NewIndex != OldIndex && NewIndex == ELF::SHN_UNDEF)
|
|
|
|
|
return OldIndex;
|
|
|
|
|
return NewIndex;
|
|
|
|
|
};
|
|
|
|
|
|
2023-11-29 22:42:36 -05:00
|
|
|
// Get the extra symbol name of a split fragment; used in addExtraSymbols.
|
|
|
|
|
auto getSplitSymbolName = [&](const FunctionFragment &FF,
|
|
|
|
|
const ELFSymTy &FunctionSymbol) {
|
|
|
|
|
SmallString<256> SymbolName;
|
|
|
|
|
if (BC->HasWarmSection)
|
|
|
|
|
SymbolName =
|
|
|
|
|
formatv("{0}.{1}", cantFail(FunctionSymbol.getName(StringSection)),
|
|
|
|
|
FF.getFragmentNum() == FragmentNum::warm() ? "warm" : "cold");
|
|
|
|
|
else
|
|
|
|
|
SymbolName = formatv("{0}.cold.{1}",
|
|
|
|
|
cantFail(FunctionSymbol.getName(StringSection)),
|
|
|
|
|
FF.getFragmentNum().get() - 1);
|
|
|
|
|
return SymbolName;
|
|
|
|
|
};
|
|
|
|
|
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
// Add extra symbols for the function.
|
2020-06-24 12:36:15 -07:00
|
|
|
//
|
|
|
|
|
// Note that addExtraSymbols() could be called multiple times for the same
|
|
|
|
|
// function with different FunctionSymbol matching the main function entry
|
|
|
|
|
// point.
|
2020-02-26 20:43:18 -08:00
|
|
|
auto addExtraSymbols = [&](const BinaryFunction &Function,
|
|
|
|
|
const ELFSymTy &FunctionSymbol) {
|
2020-04-04 20:12:38 -07:00
|
|
|
if (Function.isFolded()) {
|
2021-04-08 00:19:26 -07:00
|
|
|
BinaryFunction *ICFParent = Function.getFoldedIntoFunction();
|
2020-04-04 20:12:38 -07:00
|
|
|
while (ICFParent->isFolded())
|
|
|
|
|
ICFParent = ICFParent->getFoldedIntoFunction();
|
2021-04-08 00:19:26 -07:00
|
|
|
ELFSymTy ICFSymbol = FunctionSymbol;
|
2020-04-04 20:12:38 -07:00
|
|
|
SmallVector<char, 256> Buf;
|
|
|
|
|
ICFSymbol.st_name =
|
2021-12-14 16:52:51 -08:00
|
|
|
AddToStrTab(Twine(cantFail(FunctionSymbol.getName(StringSection)))
|
|
|
|
|
.concat(".icf.0")
|
|
|
|
|
.toStringRef(Buf));
|
2020-04-04 20:12:38 -07:00
|
|
|
ICFSymbol.st_value = ICFParent->getOutputAddress();
|
|
|
|
|
ICFSymbol.st_size = ICFParent->getOutputSize();
|
2020-06-09 19:12:06 -07:00
|
|
|
ICFSymbol.st_shndx = ICFParent->getCodeSection()->getIndex();
|
2020-04-04 20:12:38 -07:00
|
|
|
Symbols.emplace_back(ICFSymbol);
|
|
|
|
|
}
|
2022-08-24 18:07:06 -07:00
|
|
|
if (Function.isSplit()) {
|
2024-04-25 04:53:15 +02:00
|
|
|
// Prepend synthetic FILE symbol to prevent local cold fragments from
|
|
|
|
|
// colliding with existing symbols with the same name.
|
|
|
|
|
if (!EmittedColdFileSymbol &&
|
|
|
|
|
FunctionSymbol.getBinding() == ELF::STB_GLOBAL) {
|
|
|
|
|
ELFSymTy FileSymbol;
|
|
|
|
|
FileSymbol.st_shndx = ELF::SHN_ABS;
|
|
|
|
|
FileSymbol.st_name = AddToStrTab(getBOLTFileSymbolName());
|
|
|
|
|
FileSymbol.st_value = 0;
|
|
|
|
|
FileSymbol.st_size = 0;
|
|
|
|
|
FileSymbol.st_other = 0;
|
|
|
|
|
FileSymbol.setBindingAndType(ELF::STB_LOCAL, ELF::STT_FILE);
|
|
|
|
|
Symbols.emplace_back(FileSymbol);
|
|
|
|
|
EmittedColdFileSymbol = true;
|
|
|
|
|
}
|
2022-08-24 17:47:01 -07:00
|
|
|
for (const FunctionFragment &FF :
|
2022-08-18 21:26:18 -07:00
|
|
|
Function.getLayout().getSplitFragments()) {
|
2022-08-24 18:07:06 -07:00
|
|
|
if (FF.getAddress()) {
|
|
|
|
|
ELFSymTy NewColdSym = FunctionSymbol;
|
2023-11-29 22:42:36 -05:00
|
|
|
const SmallString<256> SymbolName =
|
|
|
|
|
getSplitSymbolName(FF, FunctionSymbol);
|
2022-08-24 18:07:06 -07:00
|
|
|
NewColdSym.st_name = AddToStrTab(SymbolName);
|
|
|
|
|
NewColdSym.st_shndx =
|
|
|
|
|
Function.getCodeSection(FF.getFragmentNum())->getIndex();
|
|
|
|
|
NewColdSym.st_value = FF.getAddress();
|
|
|
|
|
NewColdSym.st_size = FF.getImageSize();
|
|
|
|
|
NewColdSym.setBindingAndType(ELF::STB_LOCAL, ELF::STT_FUNC);
|
|
|
|
|
Symbols.emplace_back(NewColdSym);
|
|
|
|
|
}
|
2022-08-18 21:26:18 -07:00
|
|
|
}
|
2020-02-26 20:43:18 -08:00
|
|
|
}
|
|
|
|
|
if (Function.hasConstantIsland()) {
|
2021-04-08 00:19:26 -07:00
|
|
|
uint64_t DataMark = Function.getOutputDataAddress();
|
|
|
|
|
uint64_t CISize = getConstantIslandSize(Function);
|
|
|
|
|
uint64_t CodeMark = DataMark + CISize;
|
|
|
|
|
ELFSymTy DataMarkSym = FunctionSymbol;
|
2020-02-26 20:43:18 -08:00
|
|
|
DataMarkSym.st_name = AddToStrTab("$d");
|
|
|
|
|
DataMarkSym.st_value = DataMark;
|
|
|
|
|
DataMarkSym.st_size = 0;
|
|
|
|
|
DataMarkSym.setType(ELF::STT_NOTYPE);
|
|
|
|
|
DataMarkSym.setBinding(ELF::STB_LOCAL);
|
2021-04-08 00:19:26 -07:00
|
|
|
ELFSymTy CodeMarkSym = DataMarkSym;
|
2020-02-26 20:43:18 -08:00
|
|
|
CodeMarkSym.st_name = AddToStrTab("$x");
|
|
|
|
|
CodeMarkSym.st_value = CodeMark;
|
|
|
|
|
Symbols.emplace_back(DataMarkSym);
|
|
|
|
|
Symbols.emplace_back(CodeMarkSym);
|
|
|
|
|
}
|
|
|
|
|
if (Function.hasConstantIsland() && Function.isSplit()) {
|
2021-04-08 00:19:26 -07:00
|
|
|
uint64_t DataMark = Function.getOutputColdDataAddress();
|
|
|
|
|
uint64_t CISize = getConstantIslandSize(Function);
|
|
|
|
|
uint64_t CodeMark = DataMark + CISize;
|
|
|
|
|
ELFSymTy DataMarkSym = FunctionSymbol;
|
2020-02-26 20:43:18 -08:00
|
|
|
DataMarkSym.st_name = AddToStrTab("$d");
|
|
|
|
|
DataMarkSym.st_value = DataMark;
|
|
|
|
|
DataMarkSym.st_size = 0;
|
|
|
|
|
DataMarkSym.setType(ELF::STT_NOTYPE);
|
|
|
|
|
DataMarkSym.setBinding(ELF::STB_LOCAL);
|
2021-04-08 00:19:26 -07:00
|
|
|
ELFSymTy CodeMarkSym = DataMarkSym;
|
2020-02-26 20:43:18 -08:00
|
|
|
CodeMarkSym.st_name = AddToStrTab("$x");
|
|
|
|
|
CodeMarkSym.st_value = CodeMark;
|
|
|
|
|
Symbols.emplace_back(DataMarkSym);
|
|
|
|
|
Symbols.emplace_back(CodeMarkSym);
|
|
|
|
|
}
|
|
|
|
|
};
|
2018-07-08 12:14:08 -07:00
|
|
|
|
2020-02-26 20:43:18 -08:00
|
|
|
// For regular (non-dynamic) symbol table, exclude symbols referring
|
|
|
|
|
// to non-allocatable sections.
|
|
|
|
|
auto shouldStrip = [&](const ELFSymTy &Symbol) {
|
|
|
|
|
if (Symbol.isAbsolute() || !Symbol.isDefined())
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
// If we cannot link the symbol to a section, leave it as is.
|
2021-04-08 00:19:26 -07:00
|
|
|
Expected<const typename ELFT::Shdr *> Section =
|
|
|
|
|
Obj.getSection(Symbol.st_shndx);
|
2020-02-26 20:43:18 -08:00
|
|
|
if (!Section)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
// Remove the section symbol iif the corresponding section was stripped.
|
|
|
|
|
if (Symbol.getType() == ELF::STT_SECTION) {
|
2020-10-22 16:35:29 -07:00
|
|
|
if (!getNewSectionIndex(Symbol.st_shndx))
|
2020-02-26 20:43:18 -08:00
|
|
|
return true;
|
|
|
|
|
return false;
|
2018-07-08 12:14:08 -07:00
|
|
|
}
|
|
|
|
|
|
2020-02-26 20:43:18 -08:00
|
|
|
// Symbols in non-allocatable sections are typically remnants of relocations
|
|
|
|
|
// emitted under "-emit-relocs" linker option. Delete those as we delete
|
|
|
|
|
// relocations against non-allocatable sections.
|
|
|
|
|
if (!((*Section)->sh_flags & ELF::SHF_ALLOC))
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
|
|
return false;
|
|
|
|
|
};
|
|
|
|
|
|
2020-12-01 16:29:39 -08:00
|
|
|
for (const ELFSymTy &Symbol : cantFail(Obj.symbols(&SymTabSection))) {
|
2020-02-26 20:43:18 -08:00
|
|
|
// For regular (non-dynamic) symbol table strip unneeded symbols.
|
2020-10-22 16:35:29 -07:00
|
|
|
if (!IsDynSym && shouldStrip(Symbol))
|
2020-02-26 20:43:18 -08:00
|
|
|
continue;
|
|
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
const BinaryFunction *Function =
|
|
|
|
|
BC->getBinaryFunctionAtAddress(Symbol.st_value);
|
2020-02-26 20:43:18 -08:00
|
|
|
// Ignore false function references, e.g. when the section address matches
|
|
|
|
|
// the address of the function.
|
|
|
|
|
if (Function && Symbol.getType() == ELF::STT_SECTION)
|
|
|
|
|
Function = nullptr;
|
|
|
|
|
|
|
|
|
|
// For non-dynamic symtab, make sure the symbol section matches that of
|
|
|
|
|
// the function. It can mismatch e.g. if the symbol is a section marker
|
|
|
|
|
// in which case we treat the symbol separately from the function.
|
|
|
|
|
// For dynamic symbol table, the section index could be wrong on the input,
|
|
|
|
|
// and its value is ignored by the runtime if it's different from
|
|
|
|
|
// SHN_UNDEF and SHN_ABS.
|
2020-10-22 16:35:29 -07:00
|
|
|
if (!IsDynSym && Function &&
|
2020-10-09 16:06:27 -07:00
|
|
|
Symbol.st_shndx !=
|
2021-12-14 16:52:51 -08:00
|
|
|
Function->getOriginSection()->getSectionRef().getIndex())
|
2020-02-26 20:43:18 -08:00
|
|
|
Function = nullptr;
|
|
|
|
|
|
|
|
|
|
// Create a new symbol based on the existing symbol.
|
2021-04-08 00:19:26 -07:00
|
|
|
ELFSymTy NewSymbol = Symbol;
|
2020-02-26 20:43:18 -08:00
|
|
|
|
2024-05-20 16:55:11 -07:00
|
|
|
// Handle special symbols based on their name.
|
|
|
|
|
Expected<StringRef> SymbolName = Symbol.getName(StringSection);
|
|
|
|
|
assert(SymbolName && "cannot get symbol name");
|
|
|
|
|
|
|
|
|
|
auto updateSymbolValue = [&](const StringRef Name,
|
|
|
|
|
std::optional<uint64_t> Value = std::nullopt) {
|
|
|
|
|
NewSymbol.st_value = Value ? *Value : getNewValueForSymbol(Name);
|
|
|
|
|
NewSymbol.st_shndx = ELF::SHN_ABS;
|
|
|
|
|
BC->outs() << "BOLT-INFO: setting " << Name << " to 0x"
|
|
|
|
|
<< Twine::utohexstr(NewSymbol.st_value) << '\n';
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
if (*SymbolName == "__hot_start" || *SymbolName == "__hot_end") {
|
|
|
|
|
if (opts::HotText) {
|
|
|
|
|
updateSymbolValue(*SymbolName);
|
|
|
|
|
++NumHotTextSymsUpdated;
|
|
|
|
|
}
|
|
|
|
|
goto registerSymbol;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (*SymbolName == "__hot_data_start" || *SymbolName == "__hot_data_end") {
|
|
|
|
|
if (opts::HotData) {
|
|
|
|
|
updateSymbolValue(*SymbolName);
|
|
|
|
|
++NumHotDataSymsUpdated;
|
|
|
|
|
}
|
|
|
|
|
goto registerSymbol;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (*SymbolName == "_end") {
|
|
|
|
|
if (NextAvailableAddress > Symbol.st_value)
|
|
|
|
|
updateSymbolValue(*SymbolName, NextAvailableAddress);
|
|
|
|
|
goto registerSymbol;
|
|
|
|
|
}
|
|
|
|
|
|
2020-04-16 00:05:01 -07:00
|
|
|
if (Function) {
|
2020-06-09 19:12:06 -07:00
|
|
|
// If the symbol matched a function that was not emitted, update the
|
|
|
|
|
// corresponding section index but otherwise leave it unchanged.
|
2020-04-16 00:05:01 -07:00
|
|
|
if (Function->isEmitted()) {
|
|
|
|
|
NewSymbol.st_value = Function->getOutputAddress();
|
|
|
|
|
NewSymbol.st_size = Function->getOutputSize();
|
|
|
|
|
NewSymbol.st_shndx = Function->getCodeSection()->getIndex();
|
2020-06-09 19:12:06 -07:00
|
|
|
} else if (Symbol.st_shndx < ELF::SHN_LORESERVE) {
|
2020-10-22 16:35:29 -07:00
|
|
|
NewSymbol.st_shndx = getNewSectionIndex(Symbol.st_shndx);
|
2020-04-16 00:05:01 -07:00
|
|
|
}
|
2020-02-26 20:43:18 -08:00
|
|
|
|
|
|
|
|
// Add new symbols to the symbol table if necessary.
|
2020-10-22 16:35:29 -07:00
|
|
|
if (!IsDynSym)
|
2020-02-26 20:43:18 -08:00
|
|
|
addExtraSymbols(*Function, NewSymbol);
|
2020-04-16 00:05:01 -07:00
|
|
|
} else {
|
2020-02-26 20:43:18 -08:00
|
|
|
// Check if the function symbol matches address inside a function, i.e.
|
|
|
|
|
// it marks a secondary entry point.
|
2021-12-14 16:52:51 -08:00
|
|
|
Function =
|
|
|
|
|
(Symbol.getType() == ELF::STT_FUNC)
|
|
|
|
|
? BC->getBinaryFunctionContainingAddress(Symbol.st_value,
|
|
|
|
|
/*CheckPastEnd=*/false,
|
|
|
|
|
/*UseMaxSize=*/true)
|
|
|
|
|
: nullptr;
|
2020-02-26 20:43:18 -08:00
|
|
|
|
|
|
|
|
if (Function && Function->isEmitted()) {
|
2022-08-18 21:26:18 -07:00
|
|
|
assert(Function->getLayout().isHotColdSplit() &&
|
|
|
|
|
"Adding symbols based on cold fragment when there are more than "
|
|
|
|
|
"2 fragments");
|
2021-04-08 00:19:26 -07:00
|
|
|
const uint64_t OutputAddress =
|
|
|
|
|
Function->translateInputToOutputAddress(Symbol.st_value);
|
2020-02-26 20:43:18 -08:00
|
|
|
|
|
|
|
|
NewSymbol.st_value = OutputAddress;
|
|
|
|
|
// Force secondary entry points to have zero size.
|
|
|
|
|
NewSymbol.st_size = 0;
|
2022-08-24 18:07:06 -07:00
|
|
|
|
|
|
|
|
// Find fragment containing entrypoint
|
|
|
|
|
FunctionLayout::fragment_const_iterator FF = llvm::find_if(
|
|
|
|
|
Function->getLayout().fragments(), [&](const FunctionFragment &FF) {
|
|
|
|
|
uint64_t Lo = FF.getAddress();
|
|
|
|
|
uint64_t Hi = Lo + FF.getImageSize();
|
|
|
|
|
return Lo <= OutputAddress && OutputAddress < Hi;
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
if (FF == Function->getLayout().fragment_end()) {
|
|
|
|
|
assert(
|
|
|
|
|
OutputAddress >= Function->getCodeSection()->getOutputAddress() &&
|
|
|
|
|
OutputAddress < (Function->getCodeSection()->getOutputAddress() +
|
|
|
|
|
Function->getCodeSection()->getOutputSize()) &&
|
2023-11-09 13:29:46 -06:00
|
|
|
"Cannot locate fragment containing secondary entrypoint");
|
2022-08-24 18:07:06 -07:00
|
|
|
FF = Function->getLayout().fragment_begin();
|
|
|
|
|
}
|
|
|
|
|
|
2021-12-14 16:52:51 -08:00
|
|
|
NewSymbol.st_shndx =
|
2022-08-24 18:07:06 -07:00
|
|
|
Function->getCodeSection(FF->getFragmentNum())->getIndex();
|
2016-09-27 19:09:38 -07:00
|
|
|
} else {
|
2020-02-26 20:43:18 -08:00
|
|
|
// Check if the symbol belongs to moved data object and update it.
|
|
|
|
|
BinaryData *BD = opts::ReorderData.empty()
|
2021-12-14 16:52:51 -08:00
|
|
|
? nullptr
|
|
|
|
|
: BC->getBinaryDataAtAddress(Symbol.st_value);
|
2020-02-26 20:43:18 -08:00
|
|
|
if (BD && BD->isMoved() && !BD->isJumpTable()) {
|
|
|
|
|
assert((!BD->getSize() || !Symbol.st_size ||
|
|
|
|
|
Symbol.st_size == BD->getSize()) &&
|
2018-04-20 20:03:31 -07:00
|
|
|
"sizes must match");
|
|
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
BinarySection &OutputSection = BD->getOutputSection();
|
2019-03-14 18:51:05 -07:00
|
|
|
assert(OutputSection.getIndex());
|
2020-12-01 16:29:39 -08:00
|
|
|
LLVM_DEBUG(dbgs()
|
|
|
|
|
<< "BOLT-DEBUG: moving " << BD->getName() << " from "
|
|
|
|
|
<< *BC->getSectionNameForAddress(Symbol.st_value) << " ("
|
|
|
|
|
<< Symbol.st_shndx << ") to " << OutputSection.getName()
|
|
|
|
|
<< " (" << OutputSection.getIndex() << ")\n");
|
2019-03-14 18:51:05 -07:00
|
|
|
NewSymbol.st_shndx = OutputSection.getIndex();
|
2018-04-20 20:03:31 -07:00
|
|
|
NewSymbol.st_value = BD->getOutputAddress();
|
2020-02-26 20:43:18 -08:00
|
|
|
} else {
|
|
|
|
|
// Otherwise just update the section for the symbol.
|
2021-12-23 12:38:33 -08:00
|
|
|
if (Symbol.st_shndx < ELF::SHN_LORESERVE)
|
2020-10-22 16:35:29 -07:00
|
|
|
NewSymbol.st_shndx = getNewSectionIndex(Symbol.st_shndx);
|
2016-09-27 19:09:38 -07:00
|
|
|
}
|
2018-04-20 20:03:31 -07:00
|
|
|
|
2017-06-27 16:25:59 -07:00
|
|
|
// Detect local syms in the text section that we didn't update
|
2020-02-26 20:43:18 -08:00
|
|
|
// and that were preserved by the linker to support relocations against
|
|
|
|
|
// .text. Remove them from the symtab.
|
|
|
|
|
if (Symbol.getType() == ELF::STT_NOTYPE &&
|
2021-12-14 16:52:51 -08:00
|
|
|
Symbol.getBinding() == ELF::STB_LOCAL && Symbol.st_size == 0) {
|
2020-02-26 20:43:18 -08:00
|
|
|
if (BC->getBinaryFunctionContainingAddress(Symbol.st_value,
|
|
|
|
|
/*CheckPastEnd=*/false,
|
2020-09-14 15:48:32 -07:00
|
|
|
/*UseMaxSize=*/true)) {
|
2020-02-26 20:43:18 -08:00
|
|
|
// Can only delete the symbol if not patching. Such symbols should
|
|
|
|
|
// not exist in the dynamic symbol table.
|
2020-10-22 16:35:29 -07:00
|
|
|
assert(!IsDynSym && "cannot delete symbol");
|
2019-04-26 19:52:36 -07:00
|
|
|
continue;
|
2020-02-26 20:43:18 -08:00
|
|
|
}
|
2017-06-16 20:04:43 -07:00
|
|
|
}
|
2016-09-27 19:09:38 -07:00
|
|
|
}
|
2020-02-26 20:43:18 -08:00
|
|
|
}
|
2016-09-27 19:09:38 -07:00
|
|
|
|
2024-05-20 16:55:11 -07:00
|
|
|
registerSymbol:
|
2021-12-23 12:38:33 -08:00
|
|
|
if (IsDynSym)
|
2020-12-01 16:29:39 -08:00
|
|
|
Write((&Symbol - cantFail(Obj.symbols(&SymTabSection)).begin()) *
|
2020-02-26 20:43:18 -08:00
|
|
|
sizeof(ELFSymTy),
|
|
|
|
|
NewSymbol);
|
2021-12-23 12:38:33 -08:00
|
|
|
else
|
2020-02-26 20:43:18 -08:00
|
|
|
Symbols.emplace_back(NewSymbol);
|
|
|
|
|
}
|
2017-10-10 18:06:45 -07:00
|
|
|
|
2020-10-22 16:35:29 -07:00
|
|
|
if (IsDynSym) {
|
2020-02-26 20:43:18 -08:00
|
|
|
assert(Symbols.empty());
|
|
|
|
|
return;
|
|
|
|
|
}
|
2019-03-19 13:46:21 -07:00
|
|
|
|
2020-02-26 20:43:18 -08:00
|
|
|
// Add symbols of injected functions
|
|
|
|
|
for (BinaryFunction *Function : BC->getInjectedBinaryFunctions()) {
|
[BOLT][AArch64] Add partial support for lite mode (#133014)
In lite mode, we only emit code for a subset of functions while
preserving the original code in .bolt.org.text. This requires updating
code references in non-emitted functions to ensure that:
* Non-optimized versions of the optimized code never execute.
* Function pointer comparison semantics is preserved.
On x86-64, we can update code references in-place using "pending
relocations" added in scanExternalRefs(). However, on AArch64, this is
not always possible due to address range limitations and linker address
"relaxation".
There are two types of code-to-code references: control transfer (e.g.,
calls and branches) and function pointer materialization.
AArch64-specific control transfer instructions are covered by #116964.
For function pointer materialization, simply changing the immediate
field of an instruction is not always sufficient. In some cases, we need
to modify a pair of instructions, such as undoing linker relaxation and
converting NOP+ADR into ADRP+ADD sequence.
To achieve this, we use the instruction patch mechanism instead of
pending relocations. Instruction patches are emitted via the regular MC
layer, just like regular functions. However, they have a fixed address
and do not have an associated symbol table entry. This allows us to make
more complex changes to the code, ensuring that function pointers are
correctly updated. Such mechanism should also be portable to RISC-V and
other architectures.
To summarize, for AArch64, we extend the scanExternalRefs() process to
undo linker relaxation and use instruction patches to partially
overwrite unoptimized code.
2025-03-27 21:33:25 -07:00
|
|
|
if (Function->isAnonymous())
|
|
|
|
|
continue;
|
2020-02-26 20:43:18 -08:00
|
|
|
ELFSymTy NewSymbol;
|
2020-10-09 16:06:27 -07:00
|
|
|
BinarySection *OriginSection = Function->getOriginSection();
|
2021-12-14 16:52:51 -08:00
|
|
|
NewSymbol.st_shndx =
|
|
|
|
|
OriginSection
|
|
|
|
|
? getNewSectionIndex(OriginSection->getSectionRef().getIndex())
|
|
|
|
|
: Function->getCodeSection()->getIndex();
|
2020-02-26 20:43:18 -08:00
|
|
|
NewSymbol.st_value = Function->getOutputAddress();
|
|
|
|
|
NewSymbol.st_name = AddToStrTab(Function->getOneName());
|
|
|
|
|
NewSymbol.st_size = Function->getOutputSize();
|
|
|
|
|
NewSymbol.st_other = 0;
|
|
|
|
|
NewSymbol.setBindingAndType(ELF::STB_LOCAL, ELF::STT_FUNC);
|
|
|
|
|
Symbols.emplace_back(NewSymbol);
|
|
|
|
|
|
|
|
|
|
if (Function->isSplit()) {
|
2022-08-18 21:48:19 -07:00
|
|
|
assert(Function->getLayout().isHotColdSplit() &&
|
|
|
|
|
"Adding symbols based on cold fragment when there are more than "
|
|
|
|
|
"2 fragments");
|
2021-04-08 00:19:26 -07:00
|
|
|
ELFSymTy NewColdSym = NewSymbol;
|
2020-02-26 20:43:18 -08:00
|
|
|
NewColdSym.setType(ELF::STT_NOTYPE);
|
|
|
|
|
SmallVector<char, 256> Buf;
|
|
|
|
|
NewColdSym.st_name = AddToStrTab(
|
2021-12-14 16:52:51 -08:00
|
|
|
Twine(Function->getPrintName()).concat(".cold.0").toStringRef(Buf));
|
2022-08-24 18:07:06 -07:00
|
|
|
const FunctionFragment &ColdFF =
|
|
|
|
|
Function->getLayout().getFragment(FragmentNum::cold());
|
|
|
|
|
NewColdSym.st_value = ColdFF.getAddress();
|
|
|
|
|
NewColdSym.st_size = ColdFF.getImageSize();
|
2020-02-26 20:43:18 -08:00
|
|
|
Symbols.emplace_back(NewColdSym);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2023-10-10 13:27:14 +04:00
|
|
|
auto AddSymbol = [&](const StringRef &Name, uint64_t Address) {
|
|
|
|
|
if (!Address)
|
|
|
|
|
return;
|
2020-02-26 20:43:18 -08:00
|
|
|
|
|
|
|
|
ELFSymTy Symbol;
|
2023-10-10 13:27:14 +04:00
|
|
|
Symbol.st_value = Address;
|
2020-02-26 20:43:18 -08:00
|
|
|
Symbol.st_shndx = ELF::SHN_ABS;
|
|
|
|
|
Symbol.st_name = AddToStrTab(Name);
|
|
|
|
|
Symbol.st_size = 0;
|
|
|
|
|
Symbol.st_other = 0;
|
|
|
|
|
Symbol.setBindingAndType(ELF::STB_WEAK, ELF::STT_NOTYPE);
|
|
|
|
|
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->outs() << "BOLT-INFO: setting " << Name << " to 0x"
|
|
|
|
|
<< Twine::utohexstr(Symbol.st_value) << '\n';
|
2020-02-26 20:43:18 -08:00
|
|
|
|
|
|
|
|
Symbols.emplace_back(Symbol);
|
|
|
|
|
};
|
2018-04-20 20:03:31 -07:00
|
|
|
|
2023-10-10 13:27:14 +04:00
|
|
|
// Add runtime library start and fini address symbols
|
|
|
|
|
if (RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary()) {
|
|
|
|
|
AddSymbol("__bolt_runtime_start", RtLibrary->getRuntimeStartAddress());
|
|
|
|
|
AddSymbol("__bolt_runtime_fini", RtLibrary->getRuntimeFiniAddress());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
assert((!NumHotTextSymsUpdated || NumHotTextSymsUpdated == 2) &&
|
|
|
|
|
"either none or both __hot_start/__hot_end symbols were expected");
|
|
|
|
|
assert((!NumHotDataSymsUpdated || NumHotDataSymsUpdated == 2) &&
|
|
|
|
|
"either none or both __hot_data_start/__hot_data_end symbols were "
|
|
|
|
|
"expected");
|
|
|
|
|
|
|
|
|
|
auto AddEmittedSymbol = [&](const StringRef &Name) {
|
|
|
|
|
AddSymbol(Name, getNewValueForSymbol(Name));
|
|
|
|
|
};
|
|
|
|
|
|
2020-02-26 20:43:18 -08:00
|
|
|
if (opts::HotText && !NumHotTextSymsUpdated) {
|
2023-10-10 13:27:14 +04:00
|
|
|
AddEmittedSymbol("__hot_start");
|
|
|
|
|
AddEmittedSymbol("__hot_end");
|
2020-02-26 20:43:18 -08:00
|
|
|
}
|
2018-04-20 20:03:31 -07:00
|
|
|
|
2020-02-26 20:43:18 -08:00
|
|
|
if (opts::HotData && !NumHotDataSymsUpdated) {
|
2023-10-10 13:27:14 +04:00
|
|
|
AddEmittedSymbol("__hot_data_start");
|
|
|
|
|
AddEmittedSymbol("__hot_data_end");
|
2020-02-26 20:43:18 -08:00
|
|
|
}
|
2017-10-10 18:06:45 -07:00
|
|
|
|
2020-02-26 20:43:18 -08:00
|
|
|
// Put local symbols at the beginning.
|
2022-06-23 22:15:47 -07:00
|
|
|
llvm::stable_sort(Symbols, [](const ELFSymTy &A, const ELFSymTy &B) {
|
|
|
|
|
if (A.getBinding() == ELF::STB_LOCAL && B.getBinding() != ELF::STB_LOCAL)
|
|
|
|
|
return true;
|
|
|
|
|
return false;
|
|
|
|
|
});
|
2018-04-20 20:03:31 -07:00
|
|
|
|
2021-12-23 12:38:33 -08:00
|
|
|
for (const ELFSymTy &Symbol : Symbols)
|
2020-02-26 20:43:18 -08:00
|
|
|
Write(0, Symbol);
|
|
|
|
|
}
|
2019-03-19 13:46:21 -07:00
|
|
|
|
2020-02-26 20:43:18 -08:00
|
|
|
template <typename ELFT>
|
|
|
|
|
void RewriteInstance::patchELFSymTabs(ELFObjectFile<ELFT> *File) {
|
2021-04-08 00:19:26 -07:00
|
|
|
const ELFFile<ELFT> &Obj = File->getELFFile();
|
2020-02-26 20:43:18 -08:00
|
|
|
using ELFShdrTy = typename ELFObjectFile<ELFT>::Elf_Shdr;
|
2021-12-14 16:52:51 -08:00
|
|
|
using ELFSymTy = typename ELFObjectFile<ELFT>::Elf_Sym;
|
2019-03-19 13:46:21 -07:00
|
|
|
|
2020-02-26 20:43:18 -08:00
|
|
|
// Compute a preview of how section indices will change after rewriting, so
|
|
|
|
|
// we can properly update the symbol table based on new section indices.
|
|
|
|
|
std::vector<uint32_t> NewSectionIndex;
|
|
|
|
|
getOutputSections(File, NewSectionIndex);
|
|
|
|
|
|
2016-09-27 19:09:38 -07:00
|
|
|
// Update dynamic symbol table.
|
2020-02-26 20:43:18 -08:00
|
|
|
const ELFShdrTy *DynSymSection = nullptr;
|
2021-04-08 00:19:26 -07:00
|
|
|
for (const ELFShdrTy &Section : cantFail(Obj.sections())) {
|
2016-09-27 19:09:38 -07:00
|
|
|
if (Section.sh_type == ELF::SHT_DYNSYM) {
|
|
|
|
|
DynSymSection = &Section;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-06-26 16:52:07 -07:00
|
|
|
assert((DynSymSection || BC->IsStaticExecutable) &&
|
|
|
|
|
"dynamic symbol table expected");
|
|
|
|
|
if (DynSymSection) {
|
|
|
|
|
updateELFSymbolTable(
|
|
|
|
|
File,
|
2020-10-22 16:35:29 -07:00
|
|
|
/*IsDynSym=*/true,
|
2020-06-26 16:52:07 -07:00
|
|
|
*DynSymSection,
|
|
|
|
|
NewSectionIndex,
|
|
|
|
|
[&](size_t Offset, const ELFSymTy &Sym) {
|
|
|
|
|
Out->os().pwrite(reinterpret_cast<const char *>(&Sym),
|
|
|
|
|
sizeof(ELFSymTy),
|
|
|
|
|
DynSymSection->sh_offset + Offset);
|
|
|
|
|
},
|
|
|
|
|
[](StringRef) -> size_t { return 0; });
|
|
|
|
|
}
|
2017-06-27 16:25:59 -07:00
|
|
|
|
2021-10-16 17:02:45 +03:00
|
|
|
if (opts::RemoveSymtab)
|
|
|
|
|
return;
|
|
|
|
|
|
2017-06-27 16:25:59 -07:00
|
|
|
// (re)create regular symbol table.
|
2020-02-26 20:43:18 -08:00
|
|
|
const ELFShdrTy *SymTabSection = nullptr;
|
2021-04-08 00:19:26 -07:00
|
|
|
for (const ELFShdrTy &Section : cantFail(Obj.sections())) {
|
2016-09-27 19:09:38 -07:00
|
|
|
if (Section.sh_type == ELF::SHT_SYMTAB) {
|
|
|
|
|
SymTabSection = &Section;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (!SymTabSection) {
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->errs() << "BOLT-WARNING: no symbol table found\n";
|
2016-09-27 19:09:38 -07:00
|
|
|
return;
|
|
|
|
|
}
|
2017-06-27 16:25:59 -07:00
|
|
|
|
2020-02-26 20:43:18 -08:00
|
|
|
const ELFShdrTy *StrTabSection =
|
2020-12-01 16:29:39 -08:00
|
|
|
cantFail(Obj.getSection(SymTabSection->sh_link));
|
2017-06-27 16:25:59 -07:00
|
|
|
std::string NewContents;
|
2020-12-01 16:29:39 -08:00
|
|
|
std::string NewStrTab = std::string(
|
|
|
|
|
File->getData().substr(StrTabSection->sh_offset, StrTabSection->sh_size));
|
2021-04-08 00:19:26 -07:00
|
|
|
StringRef SecName = cantFail(Obj.getSectionName(*SymTabSection));
|
|
|
|
|
StringRef StrSecName = cantFail(Obj.getSectionName(*StrTabSection));
|
2017-06-27 16:25:59 -07:00
|
|
|
|
2018-10-22 18:48:12 -07:00
|
|
|
NumLocalSymbols = 0;
|
2020-02-26 20:43:18 -08:00
|
|
|
updateELFSymbolTable(
|
|
|
|
|
File,
|
2020-10-22 16:35:29 -07:00
|
|
|
/*IsDynSym=*/false,
|
2020-02-26 20:43:18 -08:00
|
|
|
*SymTabSection,
|
|
|
|
|
NewSectionIndex,
|
|
|
|
|
[&](size_t Offset, const ELFSymTy &Sym) {
|
|
|
|
|
if (Sym.getBinding() == ELF::STB_LOCAL)
|
|
|
|
|
++NumLocalSymbols;
|
|
|
|
|
NewContents.append(reinterpret_cast<const char *>(&Sym),
|
|
|
|
|
sizeof(ELFSymTy));
|
|
|
|
|
},
|
|
|
|
|
[&](StringRef Str) {
|
|
|
|
|
size_t Idx = NewStrTab.size();
|
2020-10-09 16:06:27 -07:00
|
|
|
NewStrTab.append(NameResolver::restore(Str).str());
|
2020-02-26 20:43:18 -08:00
|
|
|
NewStrTab.append(1, '\0');
|
|
|
|
|
return Idx;
|
|
|
|
|
});
|
2017-06-27 16:25:59 -07:00
|
|
|
|
2018-02-01 16:33:43 -08:00
|
|
|
BC->registerOrUpdateNoteSection(SecName,
|
|
|
|
|
copyByteArray(NewContents),
|
|
|
|
|
NewContents.size(),
|
|
|
|
|
/*Alignment=*/1,
|
|
|
|
|
/*IsReadOnly=*/true,
|
|
|
|
|
ELF::SHT_SYMTAB);
|
|
|
|
|
|
|
|
|
|
BC->registerOrUpdateNoteSection(StrSecName,
|
|
|
|
|
copyByteArray(NewStrTab),
|
|
|
|
|
NewStrTab.size(),
|
|
|
|
|
/*Alignment=*/1,
|
|
|
|
|
/*IsReadOnly=*/true,
|
|
|
|
|
ELF::SHT_STRTAB);
|
2016-09-27 19:09:38 -07:00
|
|
|
}
|
|
|
|
|
|
2023-03-15 00:08:11 +04:00
|
|
|
template <typename ELFT>
|
|
|
|
|
void RewriteInstance::patchELFAllocatableRelrSection(
|
|
|
|
|
ELFObjectFile<ELFT> *File) {
|
|
|
|
|
if (!DynamicRelrAddress)
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
raw_fd_ostream &OS = Out->os();
|
|
|
|
|
const uint8_t PSize = BC->AsmInfo->getCodePointerSize();
|
|
|
|
|
const uint64_t MaxDelta = ((CHAR_BIT * DynamicRelrEntrySize) - 1) * PSize;
|
|
|
|
|
|
2023-09-27 10:27:56 +04:00
|
|
|
auto FixAddend = [&](const BinarySection &Section, const Relocation &Rel,
|
|
|
|
|
uint64_t FileOffset) {
|
2023-03-15 00:08:11 +04:00
|
|
|
// Fix relocation symbol value in place if no static relocation found
|
2023-09-27 10:27:56 +04:00
|
|
|
// on the same address. We won't check the BF relocations here since it
|
|
|
|
|
// is rare case and no optimization is required.
|
2023-03-15 00:08:11 +04:00
|
|
|
if (Section.getRelocationAt(Rel.Offset))
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
// No fixup needed if symbol address was not changed
|
|
|
|
|
const uint64_t Addend = getNewFunctionOrDataAddress(Rel.Addend);
|
|
|
|
|
if (!Addend)
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
OS.pwrite(reinterpret_cast<const char *>(&Addend), PSize, FileOffset);
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// Fill new relative relocation offsets set
|
|
|
|
|
std::set<uint64_t> RelOffsets;
|
|
|
|
|
for (const BinarySection &Section : BC->allocatableSections()) {
|
|
|
|
|
const uint64_t SectionInputAddress = Section.getAddress();
|
|
|
|
|
uint64_t SectionAddress = Section.getOutputAddress();
|
|
|
|
|
if (!SectionAddress)
|
|
|
|
|
SectionAddress = SectionInputAddress;
|
|
|
|
|
|
|
|
|
|
for (const Relocation &Rel : Section.dynamicRelocations()) {
|
|
|
|
|
if (!Rel.isRelative())
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
uint64_t RelOffset =
|
|
|
|
|
getNewFunctionOrDataAddress(SectionInputAddress + Rel.Offset);
|
|
|
|
|
|
|
|
|
|
RelOffset = RelOffset == 0 ? SectionAddress + Rel.Offset : RelOffset;
|
|
|
|
|
assert((RelOffset & 1) == 0 && "Wrong relocation offset");
|
|
|
|
|
RelOffsets.emplace(RelOffset);
|
2023-09-27 10:27:56 +04:00
|
|
|
FixAddend(Section, Rel, RelOffset);
|
2023-03-15 00:08:11 +04:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ErrorOr<BinarySection &> Section =
|
|
|
|
|
BC->getSectionForAddress(*DynamicRelrAddress);
|
|
|
|
|
assert(Section && "cannot get .relr.dyn section");
|
|
|
|
|
assert(Section->isRelr() && "Expected section to be SHT_RELR type");
|
|
|
|
|
uint64_t RelrDynOffset = Section->getInputFileOffset();
|
|
|
|
|
const uint64_t RelrDynEndOffset = RelrDynOffset + Section->getSize();
|
|
|
|
|
|
|
|
|
|
auto WriteRelr = [&](uint64_t Value) {
|
|
|
|
|
if (RelrDynOffset + DynamicRelrEntrySize > RelrDynEndOffset) {
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->errs() << "BOLT-ERROR: Offset overflow for relr.dyn section\n";
|
2023-03-15 00:08:11 +04:00
|
|
|
exit(1);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
OS.pwrite(reinterpret_cast<const char *>(&Value), DynamicRelrEntrySize,
|
|
|
|
|
RelrDynOffset);
|
|
|
|
|
RelrDynOffset += DynamicRelrEntrySize;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
for (auto RelIt = RelOffsets.begin(); RelIt != RelOffsets.end();) {
|
|
|
|
|
WriteRelr(*RelIt);
|
|
|
|
|
uint64_t Base = *RelIt++ + PSize;
|
|
|
|
|
while (1) {
|
|
|
|
|
uint64_t Bitmap = 0;
|
|
|
|
|
for (; RelIt != RelOffsets.end(); ++RelIt) {
|
|
|
|
|
const uint64_t Delta = *RelIt - Base;
|
|
|
|
|
if (Delta >= MaxDelta || Delta % PSize)
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
Bitmap |= (1ULL << (Delta / PSize));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!Bitmap)
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
WriteRelr((Bitmap << 1) | 1);
|
|
|
|
|
Base += MaxDelta;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Fill the rest of the section with empty bitmap value
|
|
|
|
|
while (RelrDynOffset != RelrDynEndOffset)
|
|
|
|
|
WriteRelr(1);
|
|
|
|
|
}
|
|
|
|
|
|
2016-09-27 19:09:38 -07:00
|
|
|
template <typename ELFT>
|
2018-08-16 16:53:14 -07:00
|
|
|
void
|
|
|
|
|
RewriteInstance::patchELFAllocatableRelaSections(ELFObjectFile<ELFT> *File) {
|
2021-04-08 00:19:26 -07:00
|
|
|
using Elf_Rela = typename ELFT::Rela;
|
|
|
|
|
raw_fd_ostream &OS = Out->os();
|
2022-07-11 09:49:41 -07:00
|
|
|
const ELFFile<ELFT> &EF = File->getELFFile();
|
2022-02-16 18:13:44 +03:00
|
|
|
|
2022-07-11 09:49:41 -07:00
|
|
|
uint64_t RelDynOffset = 0, RelDynEndOffset = 0;
|
|
|
|
|
uint64_t RelPltOffset = 0, RelPltEndOffset = 0;
|
|
|
|
|
|
|
|
|
|
auto setSectionFileOffsets = [&](uint64_t Address, uint64_t &Start,
|
|
|
|
|
uint64_t &End) {
|
|
|
|
|
ErrorOr<BinarySection &> Section = BC->getSectionForAddress(Address);
|
2023-02-10 17:09:03 +04:00
|
|
|
assert(Section && "cannot get relocation section");
|
2022-07-11 09:49:41 -07:00
|
|
|
Start = Section->getInputFileOffset();
|
|
|
|
|
End = Start + Section->getSize();
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
if (!DynamicRelocationsAddress && !PLTRelocationsAddress)
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
if (DynamicRelocationsAddress)
|
|
|
|
|
setSectionFileOffsets(*DynamicRelocationsAddress, RelDynOffset,
|
|
|
|
|
RelDynEndOffset);
|
|
|
|
|
|
|
|
|
|
if (PLTRelocationsAddress)
|
|
|
|
|
setSectionFileOffsets(*PLTRelocationsAddress, RelPltOffset,
|
|
|
|
|
RelPltEndOffset);
|
|
|
|
|
|
|
|
|
|
DynamicRelativeRelocationsCount = 0;
|
|
|
|
|
|
|
|
|
|
auto writeRela = [&OS](const Elf_Rela *RelA, uint64_t &Offset) {
|
|
|
|
|
OS.pwrite(reinterpret_cast<const char *>(RelA), sizeof(*RelA), Offset);
|
|
|
|
|
Offset += sizeof(*RelA);
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
auto writeRelocations = [&](bool PatchRelative) {
|
|
|
|
|
for (BinarySection &Section : BC->allocatableSections()) {
|
2023-02-10 17:09:03 +04:00
|
|
|
const uint64_t SectionInputAddress = Section.getAddress();
|
|
|
|
|
uint64_t SectionAddress = Section.getOutputAddress();
|
|
|
|
|
if (!SectionAddress)
|
|
|
|
|
SectionAddress = SectionInputAddress;
|
|
|
|
|
|
2022-07-11 09:49:41 -07:00
|
|
|
for (const Relocation &Rel : Section.dynamicRelocations()) {
|
|
|
|
|
const bool IsRelative = Rel.isRelative();
|
|
|
|
|
if (PatchRelative != IsRelative)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
if (IsRelative)
|
|
|
|
|
++DynamicRelativeRelocationsCount;
|
|
|
|
|
|
|
|
|
|
Elf_Rela NewRelA;
|
|
|
|
|
MCSymbol *Symbol = Rel.Symbol;
|
|
|
|
|
uint32_t SymbolIdx = 0;
|
|
|
|
|
uint64_t Addend = Rel.Addend;
|
2023-02-10 17:09:03 +04:00
|
|
|
uint64_t RelOffset =
|
|
|
|
|
getNewFunctionOrDataAddress(SectionInputAddress + Rel.Offset);
|
2022-07-11 09:49:41 -07:00
|
|
|
|
2023-02-10 17:09:03 +04:00
|
|
|
RelOffset = RelOffset == 0 ? SectionAddress + Rel.Offset : RelOffset;
|
2022-07-11 09:49:41 -07:00
|
|
|
if (Rel.Symbol) {
|
|
|
|
|
SymbolIdx = getOutputDynamicSymbolIndex(Symbol);
|
|
|
|
|
} else {
|
|
|
|
|
// Usually this case is used for R_*_(I)RELATIVE relocations
|
|
|
|
|
const uint64_t Address = getNewFunctionOrDataAddress(Addend);
|
|
|
|
|
if (Address)
|
|
|
|
|
Addend = Address;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
NewRelA.setSymbolAndType(SymbolIdx, Rel.Type, EF.isMips64EL());
|
2023-02-10 17:09:03 +04:00
|
|
|
NewRelA.r_offset = RelOffset;
|
2022-07-11 09:49:41 -07:00
|
|
|
NewRelA.r_addend = Addend;
|
|
|
|
|
|
2023-03-15 22:55:35 -07:00
|
|
|
const bool IsJmpRel = IsJmpRelocation.contains(Rel.Type);
|
2022-07-11 09:49:41 -07:00
|
|
|
uint64_t &Offset = IsJmpRel ? RelPltOffset : RelDynOffset;
|
|
|
|
|
const uint64_t &EndOffset =
|
|
|
|
|
IsJmpRel ? RelPltEndOffset : RelDynEndOffset;
|
|
|
|
|
if (!Offset || !EndOffset) {
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->errs() << "BOLT-ERROR: Invalid offsets for dynamic relocation\n";
|
2022-07-11 09:49:41 -07:00
|
|
|
exit(1);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (Offset + sizeof(NewRelA) > EndOffset) {
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->errs() << "BOLT-ERROR: Offset overflow for dynamic relocation\n";
|
2022-07-11 09:49:41 -07:00
|
|
|
exit(1);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
writeRela(&NewRelA, Offset);
|
|
|
|
|
}
|
2016-09-27 19:09:38 -07:00
|
|
|
}
|
2022-07-11 09:49:41 -07:00
|
|
|
};
|
|
|
|
|
|
2023-03-15 00:08:11 +04:00
|
|
|
// Place R_*_RELATIVE relocations in RELA section if RELR is not presented.
|
|
|
|
|
// The dynamic linker expects all R_*_RELATIVE relocations in RELA
|
|
|
|
|
// to be emitted first.
|
|
|
|
|
if (!DynamicRelrAddress)
|
|
|
|
|
writeRelocations(/* PatchRelative */ true);
|
2022-07-11 09:49:41 -07:00
|
|
|
writeRelocations(/* PatchRelative */ false);
|
|
|
|
|
|
|
|
|
|
auto fillNone = [&](uint64_t &Offset, uint64_t EndOffset) {
|
|
|
|
|
if (!Offset)
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
typename ELFObjectFile<ELFT>::Elf_Rela RelA;
|
|
|
|
|
RelA.setSymbolAndType(0, Relocation::getNone(), EF.isMips64EL());
|
|
|
|
|
RelA.r_offset = 0;
|
|
|
|
|
RelA.r_addend = 0;
|
|
|
|
|
while (Offset < EndOffset)
|
|
|
|
|
writeRela(&RelA, Offset);
|
|
|
|
|
|
|
|
|
|
assert(Offset == EndOffset && "Unexpected section overflow");
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// Fill the rest of the sections with R_*_NONE relocations
|
|
|
|
|
fillNone(RelDynOffset, RelDynEndOffset);
|
|
|
|
|
fillNone(RelPltOffset, RelPltEndOffset);
|
2016-09-27 19:09:38 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <typename ELFT>
|
|
|
|
|
void RewriteInstance::patchELFGOT(ELFObjectFile<ELFT> *File) {
|
2021-04-08 00:19:26 -07:00
|
|
|
raw_fd_ostream &OS = Out->os();
|
2016-09-27 19:09:38 -07:00
|
|
|
|
|
|
|
|
SectionRef GOTSection;
|
2021-04-08 00:19:26 -07:00
|
|
|
for (const SectionRef &Section : File->sections()) {
|
2020-12-01 16:29:39 -08:00
|
|
|
StringRef SectionName = cantFail(Section.getName());
|
2016-09-27 19:09:38 -07:00
|
|
|
if (SectionName == ".got") {
|
|
|
|
|
GOTSection = Section;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (!GOTSection.getObject()) {
|
2022-07-11 09:49:41 -07:00
|
|
|
if (!BC->IsStaticExecutable)
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->errs() << "BOLT-INFO: no .got section found\n";
|
2016-09-27 19:09:38 -07:00
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2020-12-01 16:29:39 -08:00
|
|
|
StringRef GOTContents = cantFail(GOTSection.getContents());
|
2016-09-27 19:09:38 -07:00
|
|
|
for (const uint64_t *GOTEntry =
|
2021-12-14 16:52:51 -08:00
|
|
|
reinterpret_cast<const uint64_t *>(GOTContents.data());
|
2016-09-27 19:09:38 -07:00
|
|
|
GOTEntry < reinterpret_cast<const uint64_t *>(GOTContents.data() +
|
2021-12-14 16:52:51 -08:00
|
|
|
GOTContents.size());
|
2016-09-27 19:09:38 -07:00
|
|
|
++GOTEntry) {
|
2021-04-08 00:19:26 -07:00
|
|
|
if (uint64_t NewAddress = getNewFunctionAddress(*GOTEntry)) {
|
2020-12-01 16:29:39 -08:00
|
|
|
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: patching GOT entry 0x"
|
|
|
|
|
<< Twine::utohexstr(*GOTEntry) << " with 0x"
|
|
|
|
|
<< Twine::utohexstr(NewAddress) << '\n');
|
2016-09-27 19:09:38 -07:00
|
|
|
OS.pwrite(reinterpret_cast<const char *>(&NewAddress), sizeof(NewAddress),
|
2021-12-14 16:52:51 -08:00
|
|
|
reinterpret_cast<const char *>(GOTEntry) -
|
|
|
|
|
File->getData().data());
|
2016-09-27 19:09:38 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <typename ELFT>
|
|
|
|
|
void RewriteInstance::patchELFDynamic(ELFObjectFile<ELFT> *File) {
|
2020-06-26 16:52:07 -07:00
|
|
|
if (BC->IsStaticExecutable)
|
|
|
|
|
return;
|
|
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
const ELFFile<ELFT> &Obj = File->getELFFile();
|
|
|
|
|
raw_fd_ostream &OS = Out->os();
|
2016-09-27 19:09:38 -07:00
|
|
|
|
|
|
|
|
using Elf_Phdr = typename ELFFile<ELFT>::Elf_Phdr;
|
2021-12-14 16:52:51 -08:00
|
|
|
using Elf_Dyn = typename ELFFile<ELFT>::Elf_Dyn;
|
2016-09-27 19:09:38 -07:00
|
|
|
|
|
|
|
|
// Locate DYNAMIC by looking through program headers.
|
|
|
|
|
uint64_t DynamicOffset = 0;
|
2022-07-30 10:35:48 -07:00
|
|
|
const Elf_Phdr *DynamicPhdr = nullptr;
|
2021-04-08 00:19:26 -07:00
|
|
|
for (const Elf_Phdr &Phdr : cantFail(Obj.program_headers())) {
|
2016-09-27 19:09:38 -07:00
|
|
|
if (Phdr.p_type == ELF::PT_DYNAMIC) {
|
|
|
|
|
DynamicOffset = Phdr.p_offset;
|
|
|
|
|
DynamicPhdr = &Phdr;
|
|
|
|
|
assert(Phdr.p_memsz == Phdr.p_filesz && "dynamic sizes should match");
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
assert(DynamicPhdr && "missing dynamic in ELF binary");
|
|
|
|
|
|
2017-08-04 11:21:05 -07:00
|
|
|
bool ZNowSet = false;
|
|
|
|
|
|
2016-09-27 19:09:38 -07:00
|
|
|
// Go through all dynamic entries and patch functions addresses with
|
|
|
|
|
// new ones.
|
2021-04-08 00:19:26 -07:00
|
|
|
typename ELFT::DynRange DynamicEntries =
|
2020-12-01 16:29:39 -08:00
|
|
|
cantFail(Obj.dynamicEntries(), "error accessing dynamic table");
|
|
|
|
|
auto DTB = DynamicEntries.begin();
|
|
|
|
|
for (const Elf_Dyn &Dyn : DynamicEntries) {
|
2021-04-08 00:19:26 -07:00
|
|
|
Elf_Dyn NewDE = Dyn;
|
2016-09-27 19:09:38 -07:00
|
|
|
bool ShouldPatch = true;
|
2020-12-01 16:29:39 -08:00
|
|
|
switch (Dyn.d_tag) {
|
2016-09-27 19:09:38 -07:00
|
|
|
default:
|
|
|
|
|
ShouldPatch = false;
|
|
|
|
|
break;
|
2022-07-11 09:49:41 -07:00
|
|
|
case ELF::DT_RELACOUNT:
|
|
|
|
|
NewDE.d_un.d_val = DynamicRelativeRelocationsCount;
|
|
|
|
|
break;
|
2016-09-27 19:09:38 -07:00
|
|
|
case ELF::DT_INIT:
|
2021-12-28 13:46:45 -08:00
|
|
|
case ELF::DT_FINI: {
|
2017-12-09 21:40:39 -08:00
|
|
|
if (BC->HasRelocations) {
|
2021-04-08 00:19:26 -07:00
|
|
|
if (uint64_t NewAddress = getNewFunctionAddress(Dyn.getPtr())) {
|
2020-12-01 16:29:39 -08:00
|
|
|
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: patching dynamic entry of type "
|
|
|
|
|
<< Dyn.getTag() << '\n');
|
2017-08-04 11:21:05 -07:00
|
|
|
NewDE.d_un.d_ptr = NewAddress;
|
|
|
|
|
}
|
|
|
|
|
}
|
2021-12-28 13:46:45 -08:00
|
|
|
RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary();
|
|
|
|
|
if (RtLibrary && Dyn.getTag() == ELF::DT_FINI) {
|
|
|
|
|
if (uint64_t Addr = RtLibrary->getRuntimeFiniAddress())
|
|
|
|
|
NewDE.d_un.d_ptr = Addr;
|
Refactor runtime library
Summary:
As we are adding more types of runtime libraries, it would be better to move the runtime library out of RewriteInstance so that it could grow separately. This also requires splitting the current implementation of Instrumentation.cpp to two separate pieces, one as normal Pass, one as the runtime library. The Instrumentation Pass would pass over the generated data to the runtime library, which will use to emit binary and perform linking.
This patch does the following:
1. Turn Instrumentation class into an optimization pass. Register the pass in the pass manager instead of in RewriteInstance.
2. Split all the data that are generated by Instrumentation that's needed by runtime library into a separate data structure called InstrumentationSummary. At the creation of Instrumentation pass, we create an instance of such data structure, which will be moved over to the runtime at the end of the pass.
3. Added a runtime library member to BinaryContext. Set the member at the end of Instrumentation pass.
4. In BinaryEmitter, make BinaryContext to also emit runtime library binary.
5. Created a base class RuntimeLibrary, that defines the interface of a runtime library, along with a few common helper functions.
6. Created InstrumentationRuntimeLibrary which inherits from RuntimeLibrary, that does all the work (mostly copied over) for emit and linking.
7. Added a new directory called RuntimeLibs, and put all the runtime library related files into it.
(cherry picked from FBD21694762)
2020-05-21 14:28:47 -07:00
|
|
|
}
|
2021-12-28 13:46:45 -08:00
|
|
|
if (RtLibrary && Dyn.getTag() == ELF::DT_INIT && !BC->HasInterpHeader) {
|
|
|
|
|
if (auto Addr = RtLibrary->getRuntimeStartAddress()) {
|
|
|
|
|
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: Set DT_INIT to 0x"
|
|
|
|
|
<< Twine::utohexstr(Addr) << '\n');
|
|
|
|
|
NewDE.d_un.d_ptr = Addr;
|
2021-06-19 04:08:35 +08:00
|
|
|
}
|
|
|
|
|
}
|
2017-08-04 11:21:05 -07:00
|
|
|
break;
|
2021-12-28 13:46:45 -08:00
|
|
|
}
|
2017-08-04 11:21:05 -07:00
|
|
|
case ELF::DT_FLAGS:
|
|
|
|
|
if (BC->RequiresZNow) {
|
|
|
|
|
NewDE.d_un.d_val |= ELF::DF_BIND_NOW;
|
|
|
|
|
ZNowSet = true;
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
case ELF::DT_FLAGS_1:
|
|
|
|
|
if (BC->RequiresZNow) {
|
|
|
|
|
NewDE.d_un.d_val |= ELF::DF_1_NOW;
|
|
|
|
|
ZNowSet = true;
|
2016-09-27 19:09:38 -07:00
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
}
|
2021-12-23 12:38:33 -08:00
|
|
|
if (ShouldPatch)
|
2016-09-27 19:09:38 -07:00
|
|
|
OS.pwrite(reinterpret_cast<const char *>(&NewDE), sizeof(NewDE),
|
2020-12-01 16:29:39 -08:00
|
|
|
DynamicOffset + (&Dyn - DTB) * sizeof(Dyn));
|
2016-09-27 19:09:38 -07:00
|
|
|
}
|
2017-08-04 11:21:05 -07:00
|
|
|
|
|
|
|
|
if (BC->RequiresZNow && !ZNowSet) {
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->errs()
|
|
|
|
|
<< "BOLT-ERROR: output binary requires immediate relocation "
|
|
|
|
|
"processing which depends on DT_FLAGS or DT_FLAGS_1 presence in "
|
|
|
|
|
".dynamic. Please re-link the binary with -znow.\n";
|
2017-08-04 11:21:05 -07:00
|
|
|
exit(1);
|
|
|
|
|
}
|
2016-09-27 19:09:38 -07:00
|
|
|
}
|
|
|
|
|
|
2019-12-13 17:27:03 -08:00
|
|
|
template <typename ELFT>
|
2022-03-08 09:17:41 -08:00
|
|
|
Error RewriteInstance::readELFDynamic(ELFObjectFile<ELFT> *File) {
|
2021-04-08 00:19:26 -07:00
|
|
|
const ELFFile<ELFT> &Obj = File->getELFFile();
|
2019-12-13 17:27:03 -08:00
|
|
|
|
|
|
|
|
using Elf_Phdr = typename ELFFile<ELFT>::Elf_Phdr;
|
2021-12-14 16:52:51 -08:00
|
|
|
using Elf_Dyn = typename ELFFile<ELFT>::Elf_Dyn;
|
2019-12-13 17:27:03 -08:00
|
|
|
|
|
|
|
|
// Locate DYNAMIC by looking through program headers.
|
2022-07-30 10:35:48 -07:00
|
|
|
const Elf_Phdr *DynamicPhdr = nullptr;
|
2021-04-08 00:19:26 -07:00
|
|
|
for (const Elf_Phdr &Phdr : cantFail(Obj.program_headers())) {
|
2019-12-13 17:27:03 -08:00
|
|
|
if (Phdr.p_type == ELF::PT_DYNAMIC) {
|
|
|
|
|
DynamicPhdr = &Phdr;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2020-06-26 16:52:07 -07:00
|
|
|
if (!DynamicPhdr) {
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->outs() << "BOLT-INFO: static input executable detected\n";
|
2021-06-21 01:59:38 +08:00
|
|
|
// TODO: static PIE executable might have dynamic header
|
2020-06-26 16:52:07 -07:00
|
|
|
BC->IsStaticExecutable = true;
|
2022-03-08 09:17:41 -08:00
|
|
|
return Error::success();
|
2020-03-08 19:04:39 -07:00
|
|
|
}
|
|
|
|
|
|
2022-03-08 09:17:41 -08:00
|
|
|
if (DynamicPhdr->p_memsz != DynamicPhdr->p_filesz)
|
|
|
|
|
return createStringError(errc::executable_format_error,
|
|
|
|
|
"dynamic section sizes should match");
|
2020-03-08 19:04:39 -07:00
|
|
|
|
|
|
|
|
// Go through all dynamic entries to locate entries of interest.
|
2022-03-09 20:24:20 -08:00
|
|
|
auto DynamicEntriesOrErr = Obj.dynamicEntries();
|
|
|
|
|
if (!DynamicEntriesOrErr)
|
|
|
|
|
return DynamicEntriesOrErr.takeError();
|
|
|
|
|
typename ELFT::DynRange DynamicEntries = DynamicEntriesOrErr.get();
|
2020-12-01 16:29:39 -08:00
|
|
|
|
|
|
|
|
for (const Elf_Dyn &Dyn : DynamicEntries) {
|
|
|
|
|
switch (Dyn.d_tag) {
|
2021-06-19 04:08:35 +08:00
|
|
|
case ELF::DT_INIT:
|
|
|
|
|
if (!BC->HasInterpHeader) {
|
|
|
|
|
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: Set start function address\n");
|
|
|
|
|
BC->StartFunctionAddress = Dyn.getPtr();
|
|
|
|
|
}
|
|
|
|
|
break;
|
2020-06-23 12:22:58 -07:00
|
|
|
case ELF::DT_FINI:
|
2023-11-08 11:01:10 +00:00
|
|
|
BC->FiniAddress = Dyn.getPtr();
|
|
|
|
|
break;
|
|
|
|
|
case ELF::DT_FINI_ARRAY:
|
|
|
|
|
BC->FiniArrayAddress = Dyn.getPtr();
|
|
|
|
|
break;
|
|
|
|
|
case ELF::DT_FINI_ARRAYSZ:
|
|
|
|
|
BC->FiniArraySize = Dyn.getPtr();
|
2020-06-23 12:22:58 -07:00
|
|
|
break;
|
|
|
|
|
case ELF::DT_RELA:
|
2021-06-30 14:38:50 -07:00
|
|
|
DynamicRelocationsAddress = Dyn.getPtr();
|
2020-06-23 12:22:58 -07:00
|
|
|
break;
|
|
|
|
|
case ELF::DT_RELASZ:
|
2021-06-30 14:38:50 -07:00
|
|
|
DynamicRelocationsSize = Dyn.getVal();
|
|
|
|
|
break;
|
|
|
|
|
case ELF::DT_JMPREL:
|
|
|
|
|
PLTRelocationsAddress = Dyn.getPtr();
|
|
|
|
|
break;
|
|
|
|
|
case ELF::DT_PLTRELSZ:
|
|
|
|
|
PLTRelocationsSize = Dyn.getVal();
|
2020-06-23 12:22:58 -07:00
|
|
|
break;
|
2022-07-11 09:49:41 -07:00
|
|
|
case ELF::DT_RELACOUNT:
|
|
|
|
|
DynamicRelativeRelocationsCount = Dyn.getVal();
|
|
|
|
|
break;
|
2023-03-15 00:08:11 +04:00
|
|
|
case ELF::DT_RELR:
|
|
|
|
|
DynamicRelrAddress = Dyn.getPtr();
|
|
|
|
|
break;
|
|
|
|
|
case ELF::DT_RELRSZ:
|
|
|
|
|
DynamicRelrSize = Dyn.getVal();
|
|
|
|
|
break;
|
|
|
|
|
case ELF::DT_RELRENT:
|
|
|
|
|
DynamicRelrEntrySize = Dyn.getVal();
|
|
|
|
|
break;
|
2020-06-23 12:22:58 -07:00
|
|
|
}
|
2019-12-13 17:27:03 -08:00
|
|
|
}
|
2021-06-30 14:38:50 -07:00
|
|
|
|
2022-07-11 09:49:41 -07:00
|
|
|
if (!DynamicRelocationsAddress || !DynamicRelocationsSize) {
|
|
|
|
|
DynamicRelocationsAddress.reset();
|
2021-06-30 14:38:50 -07:00
|
|
|
DynamicRelocationsSize = 0;
|
2022-07-11 09:49:41 -07:00
|
|
|
}
|
2021-06-30 14:38:50 -07:00
|
|
|
|
2022-07-11 09:49:41 -07:00
|
|
|
if (!PLTRelocationsAddress || !PLTRelocationsSize) {
|
|
|
|
|
PLTRelocationsAddress.reset();
|
2021-06-30 14:38:50 -07:00
|
|
|
PLTRelocationsSize = 0;
|
2022-07-11 09:49:41 -07:00
|
|
|
}
|
2023-03-15 00:08:11 +04:00
|
|
|
|
|
|
|
|
if (!DynamicRelrAddress || !DynamicRelrSize) {
|
|
|
|
|
DynamicRelrAddress.reset();
|
|
|
|
|
DynamicRelrSize = 0;
|
|
|
|
|
} else if (!DynamicRelrEntrySize) {
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->errs() << "BOLT-ERROR: expected DT_RELRENT to be presented "
|
|
|
|
|
<< "in DYNAMIC section\n";
|
2023-03-15 00:08:11 +04:00
|
|
|
exit(1);
|
|
|
|
|
} else if (DynamicRelrSize % DynamicRelrEntrySize) {
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->errs() << "BOLT-ERROR: expected RELR table size to be divisible "
|
|
|
|
|
<< "by RELR entry size\n";
|
2023-03-15 00:08:11 +04:00
|
|
|
exit(1);
|
|
|
|
|
}
|
|
|
|
|
|
2022-03-08 09:17:41 -08:00
|
|
|
return Error::success();
|
2019-12-13 17:27:03 -08:00
|
|
|
}
|
|
|
|
|
|
2016-09-27 19:09:38 -07:00
|
|
|
uint64_t RewriteInstance::getNewFunctionAddress(uint64_t OldAddress) {
|
2021-04-08 00:19:26 -07:00
|
|
|
const BinaryFunction *Function = BC->getBinaryFunctionAtAddress(OldAddress);
|
2016-09-27 19:09:38 -07:00
|
|
|
if (!Function)
|
|
|
|
|
return 0;
|
2020-09-14 15:48:32 -07:00
|
|
|
|
2017-05-08 22:51:36 -07:00
|
|
|
return Function->getOutputAddress();
|
2016-02-08 10:02:48 -08:00
|
|
|
}
|
2015-11-23 17:54:18 -08:00
|
|
|
|
2022-07-11 09:49:41 -07:00
|
|
|
uint64_t RewriteInstance::getNewFunctionOrDataAddress(uint64_t OldAddress) {
|
|
|
|
|
if (uint64_t Function = getNewFunctionAddress(OldAddress))
|
|
|
|
|
return Function;
|
|
|
|
|
|
|
|
|
|
const BinaryData *BD = BC->getBinaryDataAtAddress(OldAddress);
|
|
|
|
|
if (BD && BD->isMoved())
|
|
|
|
|
return BD->getOutputAddress();
|
|
|
|
|
|
2024-04-24 14:03:33 -07:00
|
|
|
if (const BinaryFunction *BF =
|
|
|
|
|
BC->getBinaryFunctionContainingAddress(OldAddress)) {
|
|
|
|
|
if (BF->isEmitted()) {
|
2024-08-07 15:57:25 +08:00
|
|
|
// If OldAddress is the another entry point of
|
|
|
|
|
// the function, then BOLT could get the new address.
|
|
|
|
|
if (BF->isMultiEntry()) {
|
|
|
|
|
for (const BinaryBasicBlock &BB : *BF)
|
|
|
|
|
if (BB.isEntryPoint() &&
|
|
|
|
|
(BF->getAddress() + BB.getOffset()) == OldAddress)
|
2025-03-19 14:55:59 +00:00
|
|
|
return BB.getOutputStartAddress();
|
2024-08-07 15:57:25 +08:00
|
|
|
}
|
2024-04-24 14:03:33 -07:00
|
|
|
BC->errs() << "BOLT-ERROR: unable to get new address corresponding to "
|
|
|
|
|
"input address 0x"
|
|
|
|
|
<< Twine::utohexstr(OldAddress) << " in function " << *BF
|
|
|
|
|
<< ". Consider adding this function to --skip-funcs=...\n";
|
|
|
|
|
exit(1);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2022-07-11 09:49:41 -07:00
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
2015-11-23 17:54:18 -08:00
|
|
|
void RewriteInstance::rewriteFile() {
|
2020-05-07 23:00:29 -07:00
|
|
|
std::error_code EC;
|
2020-12-01 16:29:39 -08:00
|
|
|
Out = std::make_unique<ToolOutputFile>(opts::OutputFilename, EC,
|
2021-12-14 16:52:51 -08:00
|
|
|
sys::fs::OF_None);
|
2020-05-07 23:00:29 -07:00
|
|
|
check_error(EC, "cannot create output executable file");
|
|
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
raw_fd_ostream &OS = Out->os();
|
2016-09-27 19:09:38 -07:00
|
|
|
|
2020-05-07 23:00:29 -07:00
|
|
|
// Copy allocatable part of the input.
|
|
|
|
|
OS << InputFile->getData().substr(0, FirstNonAllocatableOffset);
|
|
|
|
|
|
2021-04-01 11:43:00 -07:00
|
|
|
auto Streamer = BC->createStreamer(OS);
|
2016-02-12 19:01:53 -08:00
|
|
|
// Make sure output stream has enough reserved space, otherwise
|
|
|
|
|
// pwrite() will fail.
|
2024-04-29 14:44:04 -07:00
|
|
|
uint64_t Offset = std::max(getFileOffsetForAddress(NextAvailableAddress),
|
|
|
|
|
FirstNonAllocatableOffset);
|
|
|
|
|
Offset = OS.seek(Offset);
|
|
|
|
|
assert((Offset != (uint64_t)-1) && "Error resizing output file");
|
2015-11-23 17:54:18 -08:00
|
|
|
|
2021-09-10 16:19:50 -07:00
|
|
|
// Overwrite functions with fixed output address. This is mostly used by
|
|
|
|
|
// non-relocation mode, with one exception: injected functions are covered
|
|
|
|
|
// here in both modes.
|
2020-10-09 16:06:27 -07:00
|
|
|
uint64_t CountOverwrittenFunctions = 0;
|
|
|
|
|
uint64_t OverwrittenScore = 0;
|
|
|
|
|
for (BinaryFunction *Function : BC->getAllBinaryFunctions()) {
|
|
|
|
|
if (Function->getImageAddress() == 0 || Function->getImageSize() == 0)
|
|
|
|
|
continue;
|
2016-04-05 19:35:45 -07:00
|
|
|
|
2024-12-13 13:14:02 -08:00
|
|
|
assert(Function->getImageSize() <= Function->getMaxSize() &&
|
|
|
|
|
"Unexpected large function");
|
2015-11-23 17:54:18 -08:00
|
|
|
|
2022-08-24 18:07:06 -07:00
|
|
|
const auto HasAddress = [](const FunctionFragment &FF) {
|
|
|
|
|
return FF.empty() ||
|
|
|
|
|
(FF.getImageAddress() != 0 && FF.getImageSize() != 0);
|
|
|
|
|
};
|
|
|
|
|
const bool SplitFragmentsHaveAddress =
|
|
|
|
|
llvm::all_of(Function->getLayout().getSplitFragments(), HasAddress);
|
|
|
|
|
if (Function->isSplit() && !SplitFragmentsHaveAddress) {
|
|
|
|
|
const auto HasNoAddress = [](const FunctionFragment &FF) {
|
|
|
|
|
return FF.getImageAddress() == 0 && FF.getImageSize() == 0;
|
|
|
|
|
};
|
|
|
|
|
assert(llvm::all_of(Function->getLayout().getSplitFragments(),
|
|
|
|
|
HasNoAddress) &&
|
|
|
|
|
"Some split fragments have an address while others do not");
|
2022-09-19 10:42:49 -07:00
|
|
|
(void)HasNoAddress;
|
2020-10-09 16:06:27 -07:00
|
|
|
continue;
|
2022-08-24 18:07:06 -07:00
|
|
|
}
|
2016-09-08 14:52:26 -07:00
|
|
|
|
2020-10-09 16:06:27 -07:00
|
|
|
OverwrittenScore += Function->getFunctionScore();
|
2023-06-12 13:16:09 -07:00
|
|
|
++CountOverwrittenFunctions;
|
|
|
|
|
|
2020-10-09 16:06:27 -07:00
|
|
|
// Overwrite function in the output file.
|
2021-12-23 12:38:33 -08:00
|
|
|
if (opts::Verbosity >= 2)
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->outs() << "BOLT: rewriting function \"" << *Function << "\"\n";
|
2021-12-23 12:38:33 -08:00
|
|
|
|
2020-10-09 16:06:27 -07:00
|
|
|
OS.pwrite(reinterpret_cast<char *>(Function->getImageAddress()),
|
2021-12-14 16:52:51 -08:00
|
|
|
Function->getImageSize(), Function->getFileOffset());
|
2016-09-27 19:09:38 -07:00
|
|
|
|
2020-10-09 16:06:27 -07:00
|
|
|
// Write nops at the end of the function.
|
|
|
|
|
if (Function->getMaxSize() != std::numeric_limits<uint64_t>::max()) {
|
2021-04-08 00:19:26 -07:00
|
|
|
uint64_t Pos = OS.tell();
|
2020-10-09 16:06:27 -07:00
|
|
|
OS.seek(Function->getFileOffset() + Function->getImageSize());
|
2023-06-12 18:21:58 -07:00
|
|
|
BC->MAB->writeNopData(
|
|
|
|
|
OS, Function->getMaxSize() - Function->getImageSize(), &*BC->STI);
|
2020-12-01 16:29:39 -08:00
|
|
|
|
2017-01-17 15:49:59 -08:00
|
|
|
OS.seek(Pos);
|
2020-10-09 16:06:27 -07:00
|
|
|
}
|
2017-01-17 15:49:59 -08:00
|
|
|
|
2023-06-12 13:16:09 -07:00
|
|
|
if (!Function->isSplit())
|
2020-10-09 16:06:27 -07:00
|
|
|
continue;
|
2015-11-23 17:54:18 -08:00
|
|
|
|
2020-10-09 16:06:27 -07:00
|
|
|
// Write cold part
|
2023-06-12 13:16:09 -07:00
|
|
|
if (opts::Verbosity >= 2) {
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->outs() << formatv("BOLT: rewriting function \"{0}\" (split parts)\n",
|
|
|
|
|
*Function);
|
2023-06-12 13:16:09 -07:00
|
|
|
}
|
2021-12-23 12:38:33 -08:00
|
|
|
|
2022-08-24 18:07:06 -07:00
|
|
|
for (const FunctionFragment &FF :
|
|
|
|
|
Function->getLayout().getSplitFragments()) {
|
|
|
|
|
OS.pwrite(reinterpret_cast<char *>(FF.getImageAddress()),
|
|
|
|
|
FF.getImageSize(), FF.getFileOffset());
|
|
|
|
|
}
|
2020-10-09 16:06:27 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Print function statistics for non-relocation mode.
|
|
|
|
|
if (!BC->HasRelocations) {
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->outs() << "BOLT: " << CountOverwrittenFunctions << " out of "
|
|
|
|
|
<< BC->getBinaryFunctions().size()
|
|
|
|
|
<< " functions were overwritten.\n";
|
2017-11-28 09:57:21 -08:00
|
|
|
if (BC->TotalScore != 0) {
|
2021-12-14 16:52:51 -08:00
|
|
|
double Coverage = OverwrittenScore / (double)BC->TotalScore * 100.0;
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->outs() << format("BOLT-INFO: rewritten functions cover %.2lf",
|
|
|
|
|
Coverage)
|
|
|
|
|
<< "% of the execution count of simple functions of "
|
|
|
|
|
"this binary\n";
|
2015-11-23 17:54:18 -08:00
|
|
|
}
|
|
|
|
|
}
|
2015-12-18 17:00:46 -08:00
|
|
|
|
2017-12-09 21:40:39 -08:00
|
|
|
if (BC->HasRelocations && opts::TrapOldCode) {
|
2021-04-08 00:19:26 -07:00
|
|
|
uint64_t SavedPos = OS.tell();
|
2016-09-27 19:09:38 -07:00
|
|
|
// Overwrite function body to make sure we never execute these instructions.
|
2019-04-03 15:52:01 -07:00
|
|
|
for (auto &BFI : BC->getBinaryFunctions()) {
|
2021-04-08 00:19:26 -07:00
|
|
|
BinaryFunction &BF = BFI.second;
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
if (!BF.getFileOffset() || !BF.isEmitted())
|
2016-09-27 19:09:38 -07:00
|
|
|
continue;
|
2017-01-17 15:49:59 -08:00
|
|
|
OS.seek(BF.getFileOffset());
|
2023-07-27 11:48:08 -04:00
|
|
|
StringRef TrapInstr = BC->MIB->getTrapFillValue();
|
|
|
|
|
unsigned NInstr = BF.getMaxSize() / TrapInstr.size();
|
|
|
|
|
for (unsigned I = 0; I < NInstr; ++I)
|
|
|
|
|
OS.write(TrapInstr.data(), TrapInstr.size());
|
2016-09-27 19:09:38 -07:00
|
|
|
}
|
2017-01-17 15:49:59 -08:00
|
|
|
OS.seek(SavedPos);
|
2016-03-03 10:13:11 -08:00
|
|
|
}
|
2015-12-18 17:00:46 -08:00
|
|
|
|
2021-09-10 16:19:50 -07:00
|
|
|
// Write all allocatable sections - reloc-mode text is written here as well
|
2021-04-08 00:19:26 -07:00
|
|
|
for (BinarySection &Section : BC->allocatableSections()) {
|
2025-04-15 12:59:05 +01:00
|
|
|
if (!Section.isFinalized() || !Section.getOutputData()) {
|
|
|
|
|
LLVM_DEBUG(if (opts::Verbosity > 1) {
|
|
|
|
|
dbgs() << "BOLT-INFO: new section is finalized or !getOutputData, skip "
|
|
|
|
|
<< Section.getName() << '\n';
|
|
|
|
|
});
|
2016-02-08 10:02:48 -08:00
|
|
|
continue;
|
2025-04-15 12:59:05 +01:00
|
|
|
}
|
|
|
|
|
if (Section.isLinkOnly()) {
|
|
|
|
|
LLVM_DEBUG(if (opts::Verbosity > 1) {
|
|
|
|
|
dbgs() << "BOLT-INFO: new section is link only, skip "
|
|
|
|
|
<< Section.getName() << '\n';
|
|
|
|
|
});
|
2023-08-21 10:10:48 +02:00
|
|
|
continue;
|
2025-04-15 12:59:05 +01:00
|
|
|
}
|
2020-02-18 09:20:17 -08:00
|
|
|
|
2021-12-23 12:38:33 -08:00
|
|
|
if (opts::Verbosity >= 1)
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->outs() << "BOLT: writing new section " << Section.getName()
|
|
|
|
|
<< "\n data at 0x"
|
|
|
|
|
<< Twine::utohexstr(Section.getAllocAddress()) << "\n of size "
|
|
|
|
|
<< Section.getOutputSize() << "\n at offset "
|
2025-04-15 12:59:05 +01:00
|
|
|
<< Section.getOutputFileOffset() << " with content size "
|
|
|
|
|
<< Section.getOutputContents().size() << '\n';
|
2024-11-19 12:59:05 -08:00
|
|
|
OS.seek(Section.getOutputFileOffset());
|
|
|
|
|
Section.write(OS);
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
}
|
|
|
|
|
|
2021-12-23 12:38:33 -08:00
|
|
|
for (BinarySection &Section : BC->allocatableSections())
|
2021-12-14 16:52:51 -08:00
|
|
|
Section.flushPendingRelocations(OS, [this](const MCSymbol *S) {
|
|
|
|
|
return getNewValueForSymbol(S->getName());
|
|
|
|
|
});
|
2016-02-08 10:02:48 -08:00
|
|
|
|
2016-11-11 14:33:34 -08:00
|
|
|
// If .eh_frame is present create .eh_frame_hdr.
|
2022-09-22 12:05:12 -07:00
|
|
|
if (EHFrameSection)
|
2018-02-01 16:33:43 -08:00
|
|
|
writeEHFrameHeader();
|
2015-11-23 17:54:18 -08:00
|
|
|
|
2019-04-12 17:33:46 -07:00
|
|
|
// Add BOLT Addresses Translation maps to allow profile collection to
|
|
|
|
|
// happen in the output binary
|
|
|
|
|
if (opts::EnableBAT)
|
|
|
|
|
addBATSection();
|
|
|
|
|
|
2016-03-03 10:13:11 -08:00
|
|
|
// Patch program header table.
|
2025-07-02 11:22:12 -07:00
|
|
|
if (!BC->IsLinuxKernel) {
|
|
|
|
|
updateSegmentInfo();
|
2024-02-01 12:11:26 -08:00
|
|
|
patchELFPHDRTable();
|
2025-07-02 11:22:12 -07:00
|
|
|
}
|
2016-02-08 10:02:48 -08:00
|
|
|
|
2017-05-16 17:29:31 -07:00
|
|
|
// Finalize memory image of section string table.
|
|
|
|
|
finalizeSectionStringTable();
|
|
|
|
|
|
2017-09-20 10:43:01 -07:00
|
|
|
// Update symbol tables.
|
|
|
|
|
patchELFSymTabs();
|
2017-06-27 16:25:59 -07:00
|
|
|
|
2019-04-12 17:33:46 -07:00
|
|
|
if (opts::EnableBAT)
|
|
|
|
|
encodeBATSection();
|
|
|
|
|
|
2016-03-03 10:13:11 -08:00
|
|
|
// Copy non-allocatable sections once allocatable part is finished.
|
|
|
|
|
rewriteNoteSections();
|
|
|
|
|
|
2017-12-09 21:40:39 -08:00
|
|
|
if (BC->HasRelocations) {
|
2018-08-16 16:53:14 -07:00
|
|
|
patchELFAllocatableRelaSections();
|
2023-03-15 00:08:11 +04:00
|
|
|
patchELFAllocatableRelrSection();
|
2017-01-17 15:49:59 -08:00
|
|
|
patchELFGOT();
|
|
|
|
|
}
|
2016-09-27 19:09:38 -07:00
|
|
|
|
2022-07-11 09:49:41 -07:00
|
|
|
// Patch dynamic section/segment.
|
|
|
|
|
patchELFDynamic();
|
|
|
|
|
|
2016-03-03 10:13:11 -08:00
|
|
|
// Update ELF book-keeping info.
|
|
|
|
|
patchELFSectionHeaderTable();
|
2015-11-23 17:54:18 -08:00
|
|
|
|
2018-02-01 16:33:43 -08:00
|
|
|
if (opts::PrintSections) {
|
2024-02-12 14:53:53 -08:00
|
|
|
BC->outs() << "BOLT-INFO: Sections after processing:\n";
|
|
|
|
|
BC->printSections(BC->outs());
|
2018-02-01 16:33:43 -08:00
|
|
|
}
|
|
|
|
|
|
2015-11-23 17:54:18 -08:00
|
|
|
Out->keep();
|
2023-09-17 00:12:16 +08:00
|
|
|
EC = sys::fs::setPermissions(
|
|
|
|
|
opts::OutputFilename,
|
|
|
|
|
static_cast<sys::fs::perms>(sys::fs::perms::all_all &
|
|
|
|
|
~sys::fs::getUmask()));
|
2020-12-01 16:29:39 -08:00
|
|
|
check_error(EC, "cannot set permissions of output file");
|
2015-11-23 17:54:18 -08:00
|
|
|
}
|
2016-03-02 18:40:10 -08:00
|
|
|
|
2018-02-01 16:33:43 -08:00
|
|
|
void RewriteInstance::writeEHFrameHeader() {
|
2022-09-22 12:05:12 -07:00
|
|
|
BinarySection *NewEHFrameSection =
|
|
|
|
|
getSection(getNewSecPrefix() + getEHFrameSectionName());
|
|
|
|
|
|
|
|
|
|
// No need to update the header if no new .eh_frame was created.
|
|
|
|
|
if (!NewEHFrameSection)
|
|
|
|
|
return;
|
|
|
|
|
|
2020-12-01 16:29:39 -08:00
|
|
|
DWARFDebugFrame NewEHFrame(BC->TheTriple->getArch(), true,
|
2022-09-22 12:05:12 -07:00
|
|
|
NewEHFrameSection->getOutputAddress());
|
2020-12-01 16:29:39 -08:00
|
|
|
Error E = NewEHFrame.parse(DWARFDataExtractor(
|
2022-09-22 12:05:12 -07:00
|
|
|
NewEHFrameSection->getOutputContents(), BC->AsmInfo->isLittleEndian(),
|
2020-12-01 16:29:39 -08:00
|
|
|
BC->AsmInfo->getCodePointerSize()));
|
|
|
|
|
check_error(std::move(E), "failed to parse EH frame");
|
2018-02-01 16:33:43 -08:00
|
|
|
|
2022-09-22 12:05:12 -07:00
|
|
|
uint64_t RelocatedEHFrameAddress = 0;
|
|
|
|
|
StringRef RelocatedEHFrameContents;
|
|
|
|
|
BinarySection *RelocatedEHFrameSection =
|
|
|
|
|
getSection(".relocated" + getEHFrameSectionName());
|
|
|
|
|
if (RelocatedEHFrameSection) {
|
|
|
|
|
RelocatedEHFrameAddress = RelocatedEHFrameSection->getOutputAddress();
|
|
|
|
|
RelocatedEHFrameContents = RelocatedEHFrameSection->getOutputContents();
|
|
|
|
|
}
|
|
|
|
|
DWARFDebugFrame RelocatedEHFrame(BC->TheTriple->getArch(), true,
|
|
|
|
|
RelocatedEHFrameAddress);
|
|
|
|
|
Error Er = RelocatedEHFrame.parse(DWARFDataExtractor(
|
|
|
|
|
RelocatedEHFrameContents, BC->AsmInfo->isLittleEndian(),
|
|
|
|
|
BC->AsmInfo->getCodePointerSize()));
|
2020-12-01 16:29:39 -08:00
|
|
|
check_error(std::move(Er), "failed to parse EH frame");
|
2016-11-11 14:33:34 -08:00
|
|
|
|
2024-04-26 14:13:23 -07:00
|
|
|
LLVM_DEBUG(dbgs() << "BOLT: writing a new " << getEHFrameHdrSectionName()
|
|
|
|
|
<< '\n');
|
2016-11-11 14:33:34 -08:00
|
|
|
|
2024-11-18 20:42:38 -08:00
|
|
|
// Try to overwrite the original .eh_frame_hdr if the size permits.
|
|
|
|
|
uint64_t EHFrameHdrOutputAddress = 0;
|
|
|
|
|
uint64_t EHFrameHdrFileOffset = 0;
|
|
|
|
|
std::vector<char> NewEHFrameHdr;
|
|
|
|
|
BinarySection *OldEHFrameHdrSection = getSection(getEHFrameHdrSectionName());
|
|
|
|
|
if (OldEHFrameHdrSection) {
|
|
|
|
|
NewEHFrameHdr = CFIRdWrt->generateEHFrameHeader(
|
|
|
|
|
RelocatedEHFrame, NewEHFrame, OldEHFrameHdrSection->getAddress());
|
|
|
|
|
if (NewEHFrameHdr.size() <= OldEHFrameHdrSection->getSize()) {
|
|
|
|
|
BC->outs() << "BOLT-INFO: rewriting " << getEHFrameHdrSectionName()
|
|
|
|
|
<< " in-place\n";
|
|
|
|
|
EHFrameHdrOutputAddress = OldEHFrameHdrSection->getAddress();
|
|
|
|
|
EHFrameHdrFileOffset = OldEHFrameHdrSection->getInputFileOffset();
|
|
|
|
|
} else {
|
|
|
|
|
OldEHFrameHdrSection->setOutputName(getOrgSecPrefix() +
|
|
|
|
|
getEHFrameHdrSectionName());
|
|
|
|
|
OldEHFrameHdrSection = nullptr;
|
|
|
|
|
}
|
|
|
|
|
}
|
2016-11-11 14:33:34 -08:00
|
|
|
|
2024-11-18 20:42:38 -08:00
|
|
|
// If there was not enough space, allocate more memory for .eh_frame_hdr.
|
|
|
|
|
if (!OldEHFrameHdrSection) {
|
|
|
|
|
NextAvailableAddress =
|
|
|
|
|
appendPadding(Out->os(), NextAvailableAddress, EHFrameHdrAlign);
|
2016-11-11 14:33:34 -08:00
|
|
|
|
2024-11-18 20:42:38 -08:00
|
|
|
EHFrameHdrOutputAddress = NextAvailableAddress;
|
|
|
|
|
EHFrameHdrFileOffset = getFileOffsetForAddress(NextAvailableAddress);
|
|
|
|
|
|
|
|
|
|
NewEHFrameHdr = CFIRdWrt->generateEHFrameHeader(
|
|
|
|
|
RelocatedEHFrame, NewEHFrame, EHFrameHdrOutputAddress);
|
|
|
|
|
|
|
|
|
|
NextAvailableAddress += NewEHFrameHdr.size();
|
|
|
|
|
if (!BC->BOLTReserved.empty() &&
|
|
|
|
|
(NextAvailableAddress > BC->BOLTReserved.end())) {
|
|
|
|
|
BC->errs() << "BOLT-ERROR: unable to fit " << getEHFrameHdrSectionName()
|
|
|
|
|
<< " into reserved space\n";
|
|
|
|
|
exit(1);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Create a new entry in the section header table.
|
|
|
|
|
const unsigned Flags = BinarySection::getFlags(/*IsReadOnly=*/true,
|
|
|
|
|
/*IsText=*/false,
|
|
|
|
|
/*IsAllocatable=*/true);
|
|
|
|
|
BinarySection &EHFrameHdrSec = BC->registerOrUpdateSection(
|
|
|
|
|
getNewSecPrefix() + getEHFrameHdrSectionName(), ELF::SHT_PROGBITS,
|
|
|
|
|
Flags, nullptr, NewEHFrameHdr.size(), /*Alignment=*/1);
|
|
|
|
|
EHFrameHdrSec.setOutputFileOffset(EHFrameHdrFileOffset);
|
|
|
|
|
EHFrameHdrSec.setOutputAddress(EHFrameHdrOutputAddress);
|
|
|
|
|
EHFrameHdrSec.setOutputName(getEHFrameHdrSectionName());
|
|
|
|
|
}
|
2016-11-11 14:33:34 -08:00
|
|
|
|
2024-04-29 14:44:04 -07:00
|
|
|
Out->os().seek(EHFrameHdrFileOffset);
|
2018-02-01 16:33:43 -08:00
|
|
|
Out->os().write(NewEHFrameHdr.data(), NewEHFrameHdr.size());
|
2016-11-11 14:33:34 -08:00
|
|
|
|
2024-11-18 20:42:38 -08:00
|
|
|
// Pad the contents if overwriting in-place.
|
2022-09-22 12:05:12 -07:00
|
|
|
if (OldEHFrameHdrSection)
|
2024-11-18 20:42:38 -08:00
|
|
|
Out->os().write_zeros(OldEHFrameHdrSection->getSize() -
|
|
|
|
|
NewEHFrameHdr.size());
|
2024-04-29 14:44:04 -07:00
|
|
|
|
2022-09-22 12:05:12 -07:00
|
|
|
// Merge new .eh_frame with the relocated original so that gdb can locate all
|
|
|
|
|
// FDEs.
|
|
|
|
|
if (RelocatedEHFrameSection) {
|
|
|
|
|
const uint64_t NewEHFrameSectionSize =
|
|
|
|
|
RelocatedEHFrameSection->getOutputAddress() +
|
|
|
|
|
RelocatedEHFrameSection->getOutputSize() -
|
|
|
|
|
NewEHFrameSection->getOutputAddress();
|
|
|
|
|
NewEHFrameSection->updateContents(NewEHFrameSection->getOutputData(),
|
|
|
|
|
NewEHFrameSectionSize);
|
|
|
|
|
BC->deregisterSection(*RelocatedEHFrameSection);
|
2020-04-19 12:55:43 -07:00
|
|
|
}
|
2018-02-01 16:33:43 -08:00
|
|
|
|
2020-12-01 16:29:39 -08:00
|
|
|
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: size of .eh_frame after merge is "
|
2022-09-22 12:05:12 -07:00
|
|
|
<< NewEHFrameSection->getOutputSize() << '\n');
|
2016-11-11 14:33:34 -08:00
|
|
|
}
|
|
|
|
|
|
2020-06-22 16:16:08 -07:00
|
|
|
uint64_t RewriteInstance::getNewValueForSymbol(const StringRef Name) {
|
2025-02-20 17:14:33 -08:00
|
|
|
auto Value = Linker->lookupSymbolInfo(Name);
|
[BOLT] Move from RuntimeDyld to JITLink
RuntimeDyld has been deprecated in favor of JITLink. [1] This patch
replaces all uses of RuntimeDyld in BOLT with JITLink.
Care has been taken to minimize the impact on the code structure in
order to ease the inspection of this (rather large) changeset. Since
BOLT relied on the RuntimeDyld API in multiple places, this wasn't
always possible though and I'll explain the changes in code structure
first.
Design note: BOLT uses a JIT linker to perform what essentially is
static linking. No linked code is ever executed; the result of linking
is simply written back to an executable file. For this reason, I
restricted myself to the use of the core JITLink library and avoided ORC
as much as possible.
RuntimeDyld contains methods for loading objects (loadObject) and symbol
lookup (getSymbol). Since JITLink doesn't provide a class with a similar
interface, the BOLTLinker abstract class was added to implement it. It
was added to Core since both the Rewrite and RuntimeLibs libraries make
use of it. Wherever a RuntimeDyld object was used before, it was
replaced with a BOLTLinker object.
There is one major difference between the RuntimeDyld and BOLTLinker
interfaces: in JITLink, section allocation and the application of fixups
(relocation) happens in a single call (jitlink::link). That is, there is
no separate method like finalizeWithMemoryManagerLocking in RuntimeDyld.
BOLT used to remap sections between allocating (loadObject) and linking
them (finalizeWithMemoryManagerLocking). This doesn't work anymore with
JITLink. Instead, BOLTLinker::loadObject accepts a callback that is
called before fixups are applied which is used to remap sections.
The actual implementation of the BOLTLinker interface lives in the
JITLinkLinker class in the Rewrite library. It's the only part of the
BOLT code that should directly interact with the JITLink API.
For loading object, JITLinkLinker first creates a LinkGraph
(jitlink::createLinkGraphFromObject) and then links it (jitlink::link).
For the latter, it uses a custom JITLinkContext with the following
properties:
- Use BOLT's ExecutableFileMemoryManager. This one was updated to
implement the JITLinkMemoryManager interface. Since BOLT never
executes code, its finalization step is a no-op.
- Pass config: don't use the default target passes since they modify
DWARF sections in a way that seems incompatible with BOLT. Also run a
custom pre-prune pass that makes sure sections without symbols are not
pruned by JITLink.
- Implement symbol lookup. This used to be implemented by
BOLTSymbolResolver.
- Call the section mapper callback before the final linking step.
- Copy symbol values when the LinkGraph is resolved. Symbols are stored
inside JITLinkLinker to ensure that later objects (i.e.,
instrumentation libraries) can find them. This functionality used to
be provided by RuntimeDyld but I did not find a way to use JITLink
directly for this.
Some more minor points of interest:
- BinarySection::SectionID: JITLink doesn't have something equivalent to
RuntimeDyld's Section IDs. Instead, sections can only be referred to
by name. Hence, SectionID was updated to a string.
- There seem to be no tests for Mach-O. I've tested a small hello-world
style binary but not more than that.
- On Mach-O, JITLink "normalizes" section names to include the segment
name. I had to parse the section name back from this manually which
feels slightly hacky.
[1] https://reviews.llvm.org/D145686#4222642
Reviewed By: rafauler
Differential Revision: https://reviews.llvm.org/D147544
2023-06-15 10:52:11 +02:00
|
|
|
if (Value)
|
2025-02-20 17:14:33 -08:00
|
|
|
return Value->Address;
|
2020-06-22 16:16:08 -07:00
|
|
|
|
|
|
|
|
// Return the original value if we haven't emitted the symbol.
|
2021-04-08 00:19:26 -07:00
|
|
|
BinaryData *BD = BC->getBinaryDataByName(Name);
|
2020-06-22 16:16:08 -07:00
|
|
|
if (!BD)
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
|
|
return BD->getAddress();
|
|
|
|
|
}
|
|
|
|
|
|
2017-01-17 15:49:59 -08:00
|
|
|
uint64_t RewriteInstance::getFileOffsetForAddress(uint64_t Address) const {
|
|
|
|
|
// Check if it's possibly part of the new segment.
|
2024-05-01 18:22:38 -07:00
|
|
|
if (NewTextSegmentAddress && Address >= NewTextSegmentAddress)
|
2017-01-17 15:49:59 -08:00
|
|
|
return Address - NewTextSegmentAddress + NewTextSegmentOffset;
|
|
|
|
|
|
|
|
|
|
// Find an existing segment that matches the address.
|
2020-06-26 16:52:07 -07:00
|
|
|
const auto SegmentInfoI = BC->SegmentMapInfo.upper_bound(Address);
|
|
|
|
|
if (SegmentInfoI == BC->SegmentMapInfo.begin())
|
2017-01-17 15:49:59 -08:00
|
|
|
return 0;
|
|
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
const SegmentInfo &SegmentInfo = std::prev(SegmentInfoI)->second;
|
2017-01-17 15:49:59 -08:00
|
|
|
if (Address < SegmentInfo.Address ||
|
|
|
|
|
Address >= SegmentInfo.Address + SegmentInfo.FileSize)
|
|
|
|
|
return 0;
|
|
|
|
|
|
2021-12-14 16:52:51 -08:00
|
|
|
return SegmentInfo.FileOffset + Address - SegmentInfo.Address;
|
2017-01-17 15:49:59 -08:00
|
|
|
}
|
|
|
|
|
|
2017-02-07 12:20:46 -08:00
|
|
|
bool RewriteInstance::willOverwriteSection(StringRef SectionName) {
|
2023-06-18 11:53:01 -07:00
|
|
|
if (llvm::is_contained(SectionsToOverwrite, SectionName))
|
|
|
|
|
return true;
|
|
|
|
|
if (llvm::is_contained(DebugSectionsToOverwrite, SectionName))
|
|
|
|
|
return true;
|
2019-04-12 17:33:46 -07:00
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
ErrorOr<BinarySection &> Section = BC->getUniqueSectionByName(SectionName);
|
2018-02-01 16:33:43 -08:00
|
|
|
return Section && Section->isAllocatable() && Section->isFinalized();
|
2016-05-16 17:02:17 -07:00
|
|
|
}
|
2019-04-26 15:30:12 -07:00
|
|
|
|
|
|
|
|
bool RewriteInstance::isDebugSection(StringRef SectionName) {
|
2023-12-13 23:34:49 -08:00
|
|
|
if (SectionName.starts_with(".debug_") ||
|
|
|
|
|
SectionName.starts_with(".zdebug_") || SectionName == ".gdb_index" ||
|
|
|
|
|
SectionName == ".stab" || SectionName == ".stabstr")
|
2019-04-26 15:30:12 -07:00
|
|
|
return true;
|
|
|
|
|
|
|
|
|
|
return false;
|
|
|
|
|
}
|