2021-12-21 10:21:41 -08:00
|
|
|
//===- bolt/Core/BinaryContext.cpp - Low-level context --------------------===//
|
2015-10-14 15:35:14 -07:00
|
|
|
//
|
2021-03-15 18:04:18 -07:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2015-10-14 15:35:14 -07:00
|
|
|
//
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
//
|
2021-12-21 10:21:41 -08:00
|
|
|
// This file implements the BinaryContext class.
|
|
|
|
|
//
|
2015-10-14 15:35:14 -07:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
2021-10-08 11:47:10 -07:00
|
|
|
#include "bolt/Core/BinaryContext.h"
|
|
|
|
|
#include "bolt/Core/BinaryEmitter.h"
|
|
|
|
|
#include "bolt/Core/BinaryFunction.h"
|
|
|
|
|
#include "bolt/Utils/CommandLineOpts.h"
|
|
|
|
|
#include "bolt/Utils/Utils.h"
|
2023-02-02 12:02:02 -08:00
|
|
|
#include "llvm/ADT/STLExtras.h"
|
2015-10-14 15:35:14 -07:00
|
|
|
#include "llvm/ADT/Twine.h"
|
2022-02-14 16:27:04 +01:00
|
|
|
#include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
|
2017-05-16 09:27:34 -07:00
|
|
|
#include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
|
2016-03-28 17:45:22 -07:00
|
|
|
#include "llvm/DebugInfo/DWARF/DWARFUnit.h"
|
2019-07-17 20:54:53 -07:00
|
|
|
#include "llvm/MC/MCAssembler.h"
|
2015-10-14 15:35:14 -07:00
|
|
|
#include "llvm/MC/MCContext.h"
|
2021-04-30 13:54:02 -07:00
|
|
|
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
|
|
|
|
|
#include "llvm/MC/MCInstPrinter.h"
|
2018-11-15 16:02:16 -08:00
|
|
|
#include "llvm/MC/MCObjectStreamer.h"
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
#include "llvm/MC/MCObjectWriter.h"
|
2022-02-09 08:26:30 -05:00
|
|
|
#include "llvm/MC/MCRegisterInfo.h"
|
2018-11-15 16:02:16 -08:00
|
|
|
#include "llvm/MC/MCSectionELF.h"
|
2017-02-21 16:15:15 -08:00
|
|
|
#include "llvm/MC/MCStreamer.h"
|
2022-02-09 08:26:30 -05:00
|
|
|
#include "llvm/MC/MCSubtargetInfo.h"
|
2015-10-14 15:35:14 -07:00
|
|
|
#include "llvm/MC/MCSymbol.h"
|
2016-07-23 08:01:53 -07:00
|
|
|
#include "llvm/Support/CommandLine.h"
|
2022-02-16 20:39:59 -08:00
|
|
|
#include "llvm/Support/Error.h"
|
2020-11-06 11:19:03 -08:00
|
|
|
#include "llvm/Support/Regex.h"
|
2021-12-01 21:14:56 -08:00
|
|
|
#include <algorithm>
|
[BOLT] Add code padding verification
Summary:
In non-relocation mode, we allow data objects to be embedded in the
code. Such objects could be unmarked, and could occupy an area between
functions, the area which is considered to be code padding.
When we disassemble code, we detect references into the padding area
and adjust it, so that it is not overwritten during the code emission.
We assume the reference to be pointing to the beginning of the object.
However, assembly-written functions may reference the middle of an
object and use negative offsets to reference data fields. Thus,
conservatively, we reduce the possibly-overwritten padding area to
a minimum if the object reference was detected.
Since we also allow functions with unknown code in non-relocation mode,
it is possible that we miss references to some objects in code.
To cover such cases, we need to verify the padding area before we
allow to overwrite it.
(cherry picked from FBD16477787)
2019-07-23 20:48:41 -07:00
|
|
|
#include <functional>
|
2017-11-14 20:05:11 -08:00
|
|
|
#include <iterator>
|
2021-12-01 21:14:56 -08:00
|
|
|
#include <unordered_set>
|
2015-10-14 15:35:14 -07:00
|
|
|
|
2016-12-21 17:13:56 -08:00
|
|
|
using namespace llvm;
|
2015-10-14 15:35:14 -07:00
|
|
|
|
2018-02-01 16:33:43 -08:00
|
|
|
#undef DEBUG_TYPE
|
|
|
|
|
#define DEBUG_TYPE "bolt"
|
|
|
|
|
|
2016-07-23 08:01:53 -07:00
|
|
|
namespace opts {
|
|
|
|
|
|
2025-03-06 14:11:05 +08:00
|
|
|
static cl::opt<bool>
|
|
|
|
|
NoHugePages("no-huge-pages",
|
|
|
|
|
cl::desc("use regular size pages for code alignment"),
|
|
|
|
|
cl::Hidden, cl::cat(BoltCategory));
|
2018-09-24 20:58:31 -07:00
|
|
|
|
2016-07-23 08:01:53 -07:00
|
|
|
static cl::opt<bool>
|
|
|
|
|
PrintDebugInfo("print-debug-info",
|
2017-03-28 14:40:20 -07:00
|
|
|
cl::desc("print debug info when printing functions"),
|
|
|
|
|
cl::Hidden,
|
2018-02-01 16:33:43 -08:00
|
|
|
cl::ZeroOrMore,
|
2017-03-28 14:40:20 -07:00
|
|
|
cl::cat(BoltCategory));
|
2016-07-23 08:01:53 -07:00
|
|
|
|
2022-06-05 13:29:49 -07:00
|
|
|
cl::opt<bool> PrintRelocations(
|
|
|
|
|
"print-relocations",
|
|
|
|
|
cl::desc("print relocations when printing functions/objects"), cl::Hidden,
|
|
|
|
|
cl::cat(BoltCategory));
|
2017-10-20 12:11:34 -07:00
|
|
|
|
|
|
|
|
static cl::opt<bool>
|
|
|
|
|
PrintMemData("print-mem-data",
|
|
|
|
|
cl::desc("print memory data annotations when printing functions"),
|
|
|
|
|
cl::Hidden,
|
2018-02-01 16:33:43 -08:00
|
|
|
cl::ZeroOrMore,
|
2017-10-20 12:11:34 -07:00
|
|
|
cl::cat(BoltCategory));
|
|
|
|
|
|
2024-01-25 15:00:52 -08:00
|
|
|
cl::opt<std::string> CompDirOverride(
|
|
|
|
|
"comp-dir-override",
|
2024-07-22 18:35:20 -07:00
|
|
|
cl::desc("overrides DW_AT_comp_dir, and provides an alternative base "
|
2024-01-25 15:00:52 -08:00
|
|
|
"location, which is used with DW_AT_dwo_name to construct a path "
|
|
|
|
|
"to *.dwo files."),
|
|
|
|
|
cl::Hidden, cl::init(""), cl::cat(BoltCategory));
|
2016-07-23 08:01:53 -07:00
|
|
|
} // namespace opts
|
|
|
|
|
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
namespace llvm {
|
|
|
|
|
namespace bolt {
|
|
|
|
|
|
2024-02-12 14:39:59 -08:00
|
|
|
char BOLTError::ID = 0;
|
|
|
|
|
|
|
|
|
|
BOLTError::BOLTError(bool IsFatal, const Twine &S)
|
|
|
|
|
: IsFatal(IsFatal), Msg(S.str()) {}
|
|
|
|
|
|
|
|
|
|
void BOLTError::log(raw_ostream &OS) const {
|
|
|
|
|
if (IsFatal)
|
|
|
|
|
OS << "FATAL ";
|
|
|
|
|
StringRef ErrMsg = StringRef(Msg);
|
|
|
|
|
// Prepend our error prefix if it is missing
|
|
|
|
|
if (ErrMsg.empty()) {
|
|
|
|
|
OS << "BOLT-ERROR\n";
|
|
|
|
|
} else {
|
|
|
|
|
if (!ErrMsg.starts_with("BOLT-ERROR"))
|
|
|
|
|
OS << "BOLT-ERROR: ";
|
|
|
|
|
OS << ErrMsg << "\n";
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::error_code BOLTError::convertToErrorCode() const {
|
|
|
|
|
return inconvertibleErrorCode();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Error createNonFatalBOLTError(const Twine &S) {
|
|
|
|
|
return make_error<BOLTError>(/*IsFatal*/ false, S);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Error createFatalBOLTError(const Twine &S) {
|
|
|
|
|
return make_error<BOLTError>(/*IsFatal*/ true, S);
|
|
|
|
|
}
|
|
|
|
|
|
2024-02-12 14:53:53 -08:00
|
|
|
void BinaryContext::logBOLTErrorsAndQuitOnFatal(Error E) {
|
|
|
|
|
handleAllErrors(Error(std::move(E)), [&](const BOLTError &E) {
|
|
|
|
|
if (!E.getMessage().empty())
|
|
|
|
|
E.log(this->errs());
|
|
|
|
|
if (E.isFatal())
|
|
|
|
|
exit(1);
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
2018-09-24 20:58:31 -07:00
|
|
|
BinaryContext::BinaryContext(std::unique_ptr<MCContext> Ctx,
|
|
|
|
|
std::unique_ptr<DWARFContext> DwCtx,
|
|
|
|
|
std::unique_ptr<Triple> TheTriple,
|
2024-12-06 10:22:09 +11:00
|
|
|
std::shared_ptr<orc::SymbolStringPool> SSP,
|
2021-12-14 16:52:51 -08:00
|
|
|
const Target *TheTarget, std::string TripleName,
|
2018-09-24 20:58:31 -07:00
|
|
|
std::unique_ptr<MCCodeEmitter> MCE,
|
|
|
|
|
std::unique_ptr<MCObjectFileInfo> MOFI,
|
|
|
|
|
std::unique_ptr<const MCAsmInfo> AsmInfo,
|
|
|
|
|
std::unique_ptr<const MCInstrInfo> MII,
|
|
|
|
|
std::unique_ptr<const MCSubtargetInfo> STI,
|
|
|
|
|
std::unique_ptr<MCInstPrinter> InstPrinter,
|
|
|
|
|
std::unique_ptr<const MCInstrAnalysis> MIA,
|
|
|
|
|
std::unique_ptr<MCPlusBuilder> MIB,
|
|
|
|
|
std::unique_ptr<const MCRegisterInfo> MRI,
|
2024-02-12 14:53:53 -08:00
|
|
|
std::unique_ptr<MCDisassembler> DisAsm,
|
|
|
|
|
JournalingStreams Logger)
|
2021-12-14 16:52:51 -08:00
|
|
|
: Ctx(std::move(Ctx)), DwCtx(std::move(DwCtx)),
|
2024-12-06 10:22:09 +11:00
|
|
|
TheTriple(std::move(TheTriple)), SSP(std::move(SSP)),
|
|
|
|
|
TheTarget(TheTarget), TripleName(TripleName), MCE(std::move(MCE)),
|
|
|
|
|
MOFI(std::move(MOFI)), AsmInfo(std::move(AsmInfo)), MII(std::move(MII)),
|
|
|
|
|
STI(std::move(STI)), InstPrinter(std::move(InstPrinter)),
|
|
|
|
|
MIA(std::move(MIA)), MIB(std::move(MIB)), MRI(std::move(MRI)),
|
|
|
|
|
DisAsm(std::move(DisAsm)), Logger(Logger), InitialDynoStats(isAArch64()) {
|
2022-03-10 23:04:03 +03:00
|
|
|
RegularPageSize = isAArch64() ? RegularPageSizeAArch64 : RegularPageSizeX86;
|
2018-09-24 20:58:31 -07:00
|
|
|
PageAlign = opts::NoHugePages ? RegularPageSize : HugePageSize;
|
|
|
|
|
}
|
|
|
|
|
|
2018-02-01 16:33:43 -08:00
|
|
|
BinaryContext::~BinaryContext() {
|
2021-12-20 11:07:46 -08:00
|
|
|
for (BinarySection *Section : Sections)
|
2018-02-01 16:33:43 -08:00
|
|
|
delete Section;
|
2021-12-20 11:07:46 -08:00
|
|
|
for (BinaryFunction *InjectedFunction : InjectedBinaryFunctions)
|
2018-07-08 12:14:08 -07:00
|
|
|
delete InjectedFunction;
|
2021-12-20 11:07:46 -08:00
|
|
|
for (std::pair<const uint64_t, JumpTable *> JTI : JumpTables)
|
2019-06-28 09:21:27 -07:00
|
|
|
delete JTI.second;
|
2017-11-14 20:05:11 -08:00
|
|
|
clearBinaryData();
|
2018-02-01 16:33:43 -08:00
|
|
|
}
|
2016-03-28 17:45:22 -07:00
|
|
|
|
2020-01-15 15:23:45 -08:00
|
|
|
/// Create BinaryContext for a given architecture \p ArchName and
|
|
|
|
|
/// triple \p TripleName.
|
2024-03-30 20:43:23 -07:00
|
|
|
Expected<std::unique_ptr<BinaryContext>> BinaryContext::createBinaryContext(
|
2024-12-06 10:22:09 +11:00
|
|
|
Triple TheTriple, std::shared_ptr<orc::SymbolStringPool> SSP,
|
|
|
|
|
StringRef InputFileName, SubtargetFeatures *Features, bool IsPIC,
|
|
|
|
|
std::unique_ptr<DWARFContext> DwCtx, JournalingStreams Logger) {
|
2020-01-15 15:23:45 -08:00
|
|
|
StringRef ArchName = "";
|
2023-10-23 06:40:25 +00:00
|
|
|
std::string FeaturesStr = "";
|
2024-03-30 20:43:23 -07:00
|
|
|
switch (TheTriple.getArch()) {
|
2020-01-15 15:23:45 -08:00
|
|
|
case llvm::Triple::x86_64:
|
2024-03-30 20:43:23 -07:00
|
|
|
if (Features)
|
|
|
|
|
return createFatalBOLTError(
|
|
|
|
|
"x86_64 target does not use SubtargetFeatures");
|
2020-01-15 15:23:45 -08:00
|
|
|
ArchName = "x86-64";
|
2020-02-19 16:13:58 -08:00
|
|
|
FeaturesStr = "+nopl";
|
2020-01-15 15:23:45 -08:00
|
|
|
break;
|
|
|
|
|
case llvm::Triple::aarch64:
|
2024-03-30 20:43:23 -07:00
|
|
|
if (Features)
|
|
|
|
|
return createFatalBOLTError(
|
|
|
|
|
"AArch64 target does not use SubtargetFeatures");
|
2020-01-15 15:23:45 -08:00
|
|
|
ArchName = "aarch64";
|
2022-07-12 03:31:18 -04:00
|
|
|
FeaturesStr = "+all";
|
2020-01-15 15:23:45 -08:00
|
|
|
break;
|
2023-10-23 06:40:25 +00:00
|
|
|
case llvm::Triple::riscv64: {
|
2023-06-16 11:49:19 +02:00
|
|
|
ArchName = "riscv64";
|
2024-03-30 20:43:23 -07:00
|
|
|
if (!Features)
|
|
|
|
|
return createFatalBOLTError("RISCV target needs SubtargetFeatures");
|
2023-10-23 06:40:25 +00:00
|
|
|
// We rely on relaxation for some transformations (e.g., promoting all calls
|
|
|
|
|
// to PseudoCALL and then making JITLink relax them). Since the relax
|
|
|
|
|
// feature is not stored in the object file, we manually enable it.
|
|
|
|
|
Features->AddFeature("relax");
|
|
|
|
|
FeaturesStr = Features->getString();
|
2023-06-16 11:49:19 +02:00
|
|
|
break;
|
2023-10-23 06:40:25 +00:00
|
|
|
}
|
2020-01-15 15:23:45 -08:00
|
|
|
default:
|
2022-02-16 20:39:59 -08:00
|
|
|
return createStringError(std::errc::not_supported,
|
|
|
|
|
"BOLT-ERROR: Unrecognized machine in ELF file");
|
2020-01-15 15:23:45 -08:00
|
|
|
}
|
|
|
|
|
|
2024-03-30 20:43:23 -07:00
|
|
|
const std::string TripleName = TheTriple.str();
|
2020-01-15 15:23:45 -08:00
|
|
|
|
|
|
|
|
std::string Error;
|
|
|
|
|
const Target *TheTarget =
|
2025-05-21 20:32:40 -07:00
|
|
|
TargetRegistry::lookupTarget(ArchName, TheTriple, Error);
|
2022-02-16 20:39:59 -08:00
|
|
|
if (!TheTarget)
|
|
|
|
|
return createStringError(make_error_code(std::errc::not_supported),
|
|
|
|
|
Twine("BOLT-ERROR: ", Error));
|
2020-01-15 15:23:45 -08:00
|
|
|
|
|
|
|
|
std::unique_ptr<const MCRegisterInfo> MRI(
|
|
|
|
|
TheTarget->createMCRegInfo(TripleName));
|
2022-02-16 20:39:59 -08:00
|
|
|
if (!MRI)
|
|
|
|
|
return createStringError(
|
|
|
|
|
make_error_code(std::errc::not_supported),
|
|
|
|
|
Twine("BOLT-ERROR: no register info for target ", TripleName));
|
2020-01-15 15:23:45 -08:00
|
|
|
|
|
|
|
|
// Set up disassembler.
|
2022-03-29 15:54:08 -07:00
|
|
|
std::unique_ptr<MCAsmInfo> AsmInfo(
|
2020-12-01 16:29:39 -08:00
|
|
|
TheTarget->createMCAsmInfo(*MRI, TripleName, MCTargetOptions()));
|
2022-02-16 20:39:59 -08:00
|
|
|
if (!AsmInfo)
|
|
|
|
|
return createStringError(
|
|
|
|
|
make_error_code(std::errc::not_supported),
|
|
|
|
|
Twine("BOLT-ERROR: no assembly info for target ", TripleName));
|
2022-03-29 15:54:08 -07:00
|
|
|
// BOLT creates "func@PLT" symbols for PLT entries. In function assembly dump
|
|
|
|
|
// we want to emit such names as using @PLT without double quotes to convey
|
|
|
|
|
// variant kind to the assembler. BOLT doesn't rely on the linker so we can
|
|
|
|
|
// override the default AsmInfo behavior to emit names the way we want.
|
|
|
|
|
AsmInfo->setAllowAtInName(true);
|
2020-01-15 15:23:45 -08:00
|
|
|
|
|
|
|
|
std::unique_ptr<const MCSubtargetInfo> STI(
|
|
|
|
|
TheTarget->createMCSubtargetInfo(TripleName, "", FeaturesStr));
|
2022-02-16 20:39:59 -08:00
|
|
|
if (!STI)
|
|
|
|
|
return createStringError(
|
|
|
|
|
make_error_code(std::errc::not_supported),
|
|
|
|
|
Twine("BOLT-ERROR: no subtarget info for target ", TripleName));
|
2020-01-15 15:23:45 -08:00
|
|
|
|
|
|
|
|
std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo());
|
2022-02-16 20:39:59 -08:00
|
|
|
if (!MII)
|
|
|
|
|
return createStringError(
|
|
|
|
|
make_error_code(std::errc::not_supported),
|
|
|
|
|
Twine("BOLT-ERROR: no instruction info for target ", TripleName));
|
2020-01-15 15:23:45 -08:00
|
|
|
|
2020-12-01 16:29:39 -08:00
|
|
|
std::unique_ptr<MCContext> Ctx(
|
2024-03-30 20:43:23 -07:00
|
|
|
new MCContext(TheTriple, AsmInfo.get(), MRI.get(), STI.get()));
|
2020-12-01 16:29:39 -08:00
|
|
|
std::unique_ptr<MCObjectFileInfo> MOFI(
|
|
|
|
|
TheTarget->createMCObjectFileInfo(*Ctx, IsPIC));
|
|
|
|
|
Ctx->setObjectFileInfo(MOFI.get());
|
|
|
|
|
// We do not support X86 Large code model. Change this in the future.
|
|
|
|
|
bool Large = false;
|
2024-03-30 20:43:23 -07:00
|
|
|
if (TheTriple.getArch() == llvm::Triple::aarch64)
|
2020-12-01 16:29:39 -08:00
|
|
|
Large = true;
|
|
|
|
|
unsigned LSDAEncoding =
|
|
|
|
|
Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4;
|
|
|
|
|
if (IsPIC) {
|
|
|
|
|
LSDAEncoding = dwarf::DW_EH_PE_pcrel |
|
|
|
|
|
(Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4);
|
|
|
|
|
}
|
2020-01-15 15:23:45 -08:00
|
|
|
|
|
|
|
|
std::unique_ptr<MCDisassembler> DisAsm(
|
|
|
|
|
TheTarget->createMCDisassembler(*STI, *Ctx));
|
|
|
|
|
|
2022-02-16 20:39:59 -08:00
|
|
|
if (!DisAsm)
|
|
|
|
|
return createStringError(
|
|
|
|
|
make_error_code(std::errc::not_supported),
|
|
|
|
|
Twine("BOLT-ERROR: no disassembler info for target ", TripleName));
|
2020-01-15 15:23:45 -08:00
|
|
|
|
|
|
|
|
std::unique_ptr<const MCInstrAnalysis> MIA(
|
|
|
|
|
TheTarget->createMCInstrAnalysis(MII.get()));
|
2022-02-16 20:39:59 -08:00
|
|
|
if (!MIA)
|
|
|
|
|
return createStringError(
|
|
|
|
|
make_error_code(std::errc::not_supported),
|
|
|
|
|
Twine("BOLT-ERROR: failed to create instruction analysis for target ",
|
|
|
|
|
TripleName));
|
2020-01-15 15:23:45 -08:00
|
|
|
|
|
|
|
|
int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
|
|
|
|
|
std::unique_ptr<MCInstPrinter> InstructionPrinter(
|
2024-03-30 20:43:23 -07:00
|
|
|
TheTarget->createMCInstPrinter(TheTriple, AsmPrinterVariant, *AsmInfo,
|
2020-01-15 15:23:45 -08:00
|
|
|
*MII, *MRI));
|
2022-02-16 20:39:59 -08:00
|
|
|
if (!InstructionPrinter)
|
|
|
|
|
return createStringError(
|
|
|
|
|
make_error_code(std::errc::not_supported),
|
|
|
|
|
Twine("BOLT-ERROR: no instruction printer for target ", TripleName));
|
2020-01-15 15:23:45 -08:00
|
|
|
InstructionPrinter->setPrintImmHex(true);
|
|
|
|
|
|
|
|
|
|
std::unique_ptr<MCCodeEmitter> MCE(
|
2022-02-16 13:09:59 +08:00
|
|
|
TheTarget->createMCCodeEmitter(*MII, *Ctx));
|
2020-01-15 15:23:45 -08:00
|
|
|
|
2020-12-01 16:29:39 -08:00
|
|
|
auto BC = std::make_unique<BinaryContext>(
|
2024-03-30 20:43:23 -07:00
|
|
|
std::move(Ctx), std::move(DwCtx), std::make_unique<Triple>(TheTriple),
|
2024-12-06 10:22:09 +11:00
|
|
|
std::move(SSP), TheTarget, std::string(TripleName), std::move(MCE),
|
|
|
|
|
std::move(MOFI), std::move(AsmInfo), std::move(MII), std::move(STI),
|
2021-12-14 16:52:51 -08:00
|
|
|
std::move(InstructionPrinter), std::move(MIA), nullptr, std::move(MRI),
|
2024-02-12 14:53:53 -08:00
|
|
|
std::move(DisAsm), Logger);
|
2020-12-01 16:29:39 -08:00
|
|
|
|
|
|
|
|
BC->LSDAEncoding = LSDAEncoding;
|
2020-05-07 23:00:29 -07:00
|
|
|
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
BC->MAB = std::unique_ptr<MCAsmBackend>(
|
|
|
|
|
BC->TheTarget->createMCAsmBackend(*BC->STI, *BC->MRI, MCTargetOptions()));
|
|
|
|
|
|
2024-03-30 20:43:23 -07:00
|
|
|
BC->setFilename(InputFileName);
|
2020-01-15 15:23:45 -08:00
|
|
|
|
2020-11-04 11:44:02 -08:00
|
|
|
BC->HasFixedLoadAddress = !IsPIC;
|
|
|
|
|
|
2022-02-22 19:06:25 -08:00
|
|
|
BC->SymbolicDisAsm = std::unique_ptr<MCDisassembler>(
|
|
|
|
|
BC->TheTarget->createMCDisassembler(*BC->STI, *BC->Ctx));
|
|
|
|
|
|
|
|
|
|
if (!BC->SymbolicDisAsm)
|
|
|
|
|
return createStringError(
|
|
|
|
|
make_error_code(std::errc::not_supported),
|
|
|
|
|
Twine("BOLT-ERROR: no disassembler info for target ", TripleName));
|
|
|
|
|
|
2022-04-19 18:48:27 +03:00
|
|
|
return std::move(BC);
|
2020-01-15 15:23:45 -08:00
|
|
|
}
|
|
|
|
|
|
2020-06-18 11:10:41 -07:00
|
|
|
bool BinaryContext::forceSymbolRelocations(StringRef SymbolName) const {
|
2021-12-14 16:52:51 -08:00
|
|
|
if (opts::HotText &&
|
|
|
|
|
(SymbolName == "__hot_start" || SymbolName == "__hot_end"))
|
2020-06-18 11:10:41 -07:00
|
|
|
return true;
|
|
|
|
|
|
2021-12-14 16:52:51 -08:00
|
|
|
if (opts::HotData &&
|
|
|
|
|
(SymbolName == "__hot_data_start" || SymbolName == "__hot_data_end"))
|
2020-06-18 11:10:41 -07:00
|
|
|
return true;
|
|
|
|
|
|
|
|
|
|
if (SymbolName == "_end")
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
std::unique_ptr<MCObjectWriter>
|
|
|
|
|
BinaryContext::createObjectWriter(raw_pwrite_stream &OS) {
|
2017-05-16 09:27:34 -07:00
|
|
|
return MAB->createObjectWriter(OS);
|
|
|
|
|
}
|
|
|
|
|
|
2017-11-14 20:05:11 -08:00
|
|
|
bool BinaryContext::validateObjectNesting() const {
|
|
|
|
|
auto Itr = BinaryDataMap.begin();
|
|
|
|
|
auto End = BinaryDataMap.end();
|
|
|
|
|
bool Valid = true;
|
|
|
|
|
while (Itr != End) {
|
|
|
|
|
auto Next = std::next(Itr);
|
|
|
|
|
while (Next != End &&
|
|
|
|
|
Itr->second->getSection() == Next->second->getSection() &&
|
|
|
|
|
Itr->second->containsRange(Next->second->getAddress(),
|
|
|
|
|
Next->second->getSize())) {
|
|
|
|
|
if (Next->second->Parent != Itr->second) {
|
2024-02-12 14:53:53 -08:00
|
|
|
this->errs() << "BOLT-WARNING: object nesting incorrect for:\n"
|
|
|
|
|
<< "BOLT-WARNING: " << *Itr->second << "\n"
|
|
|
|
|
<< "BOLT-WARNING: " << *Next->second << "\n";
|
2017-11-14 20:05:11 -08:00
|
|
|
Valid = false;
|
|
|
|
|
}
|
|
|
|
|
++Next;
|
|
|
|
|
}
|
|
|
|
|
Itr = Next;
|
|
|
|
|
}
|
|
|
|
|
return Valid;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool BinaryContext::validateHoles() const {
|
|
|
|
|
bool Valid = true;
|
2021-04-08 00:19:26 -07:00
|
|
|
for (BinarySection &Section : sections()) {
|
|
|
|
|
for (const Relocation &Rel : Section.relocations()) {
|
|
|
|
|
uint64_t RelAddr = Rel.Offset + Section.getAddress();
|
|
|
|
|
const BinaryData *BD = getBinaryDataContainingAddress(RelAddr);
|
2017-11-14 20:05:11 -08:00
|
|
|
if (!BD) {
|
2024-02-12 14:53:53 -08:00
|
|
|
this->errs()
|
|
|
|
|
<< "BOLT-WARNING: no BinaryData found for relocation at address"
|
|
|
|
|
<< " 0x" << Twine::utohexstr(RelAddr) << " in " << Section.getName()
|
|
|
|
|
<< "\n";
|
2017-11-14 20:05:11 -08:00
|
|
|
Valid = false;
|
|
|
|
|
} else if (!BD->getAtomicRoot()) {
|
2024-02-12 14:53:53 -08:00
|
|
|
this->errs()
|
|
|
|
|
<< "BOLT-WARNING: no atomic BinaryData found for relocation at "
|
|
|
|
|
<< "address 0x" << Twine::utohexstr(RelAddr) << " in "
|
|
|
|
|
<< Section.getName() << "\n";
|
2017-11-14 20:05:11 -08:00
|
|
|
Valid = false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return Valid;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void BinaryContext::updateObjectNesting(BinaryDataMapType::iterator GAI) {
|
2021-04-08 00:19:26 -07:00
|
|
|
const uint64_t Address = GAI->second->getAddress();
|
|
|
|
|
const uint64_t Size = GAI->second->getSize();
|
2017-11-14 20:05:11 -08:00
|
|
|
|
2021-12-14 16:52:51 -08:00
|
|
|
auto fixParents = [&](BinaryDataMapType::iterator Itr,
|
|
|
|
|
BinaryData *NewParent) {
|
2021-04-08 00:19:26 -07:00
|
|
|
BinaryData *OldParent = Itr->second->Parent;
|
2018-06-06 03:17:32 -07:00
|
|
|
Itr->second->Parent = NewParent;
|
|
|
|
|
++Itr;
|
|
|
|
|
while (Itr != BinaryDataMap.end() && OldParent &&
|
|
|
|
|
Itr->second->Parent == OldParent) {
|
2017-11-14 20:05:11 -08:00
|
|
|
Itr->second->Parent = NewParent;
|
|
|
|
|
++Itr;
|
2018-06-06 03:17:32 -07:00
|
|
|
}
|
2017-11-14 20:05:11 -08:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// Check if the previous symbol contains the newly added symbol.
|
|
|
|
|
if (GAI != BinaryDataMap.begin()) {
|
2021-04-08 00:19:26 -07:00
|
|
|
BinaryData *Prev = std::prev(GAI)->second;
|
2017-11-14 20:05:11 -08:00
|
|
|
while (Prev) {
|
|
|
|
|
if (Prev->getSection() == GAI->second->getSection() &&
|
|
|
|
|
Prev->containsRange(Address, Size)) {
|
|
|
|
|
fixParents(GAI, Prev);
|
|
|
|
|
} else {
|
|
|
|
|
fixParents(GAI, nullptr);
|
|
|
|
|
}
|
|
|
|
|
Prev = Prev->Parent;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Check if the newly added symbol contains any subsequent symbols.
|
|
|
|
|
if (Size != 0) {
|
2021-04-08 00:19:26 -07:00
|
|
|
BinaryData *BD = GAI->second->Parent ? GAI->second->Parent : GAI->second;
|
2017-11-14 20:05:11 -08:00
|
|
|
auto Itr = std::next(GAI);
|
2021-12-14 16:52:51 -08:00
|
|
|
while (
|
|
|
|
|
Itr != BinaryDataMap.end() &&
|
|
|
|
|
BD->containsRange(Itr->second->getAddress(), Itr->second->getSize())) {
|
2017-11-14 20:05:11 -08:00
|
|
|
Itr->second->Parent = BD;
|
|
|
|
|
++Itr;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2018-04-20 20:03:31 -07:00
|
|
|
iterator_range<BinaryContext::binary_data_iterator>
|
|
|
|
|
BinaryContext::getSubBinaryData(BinaryData *BD) {
|
|
|
|
|
auto Start = std::next(BinaryDataMap.find(BD->getAddress()));
|
|
|
|
|
auto End = Start;
|
2021-12-20 11:07:46 -08:00
|
|
|
while (End != BinaryDataMap.end() && BD->isAncestorOf(End->second))
|
2018-04-20 20:03:31 -07:00
|
|
|
++End;
|
|
|
|
|
return make_range(Start, End);
|
|
|
|
|
}
|
|
|
|
|
|
2019-06-28 09:21:27 -07:00
|
|
|
std::pair<const MCSymbol *, uint64_t>
|
|
|
|
|
BinaryContext::handleAddressRef(uint64_t Address, BinaryFunction &BF,
|
|
|
|
|
bool IsPCRel) {
|
2019-06-04 15:30:22 -07:00
|
|
|
if (isAArch64()) {
|
|
|
|
|
// Check if this is an access to a constant island and create bookkeeping
|
|
|
|
|
// to keep track of it and emit it later as part of this function.
|
2019-06-28 09:21:27 -07:00
|
|
|
if (MCSymbol *IslandSym = BF.getOrCreateIslandAccess(Address))
|
2022-09-16 16:20:00 -07:00
|
|
|
return std::make_pair(IslandSym, 0);
|
2019-06-28 09:21:27 -07:00
|
|
|
|
|
|
|
|
// Detect custom code written in assembly that refers to arbitrary
|
|
|
|
|
// constant islands from other functions. Write this reference so we
|
|
|
|
|
// can pull this constant island and emit it as part of this function
|
|
|
|
|
// too.
|
|
|
|
|
auto IslandIter = AddressToConstantIslandMap.lower_bound(Address);
|
2022-08-25 04:27:42 -04:00
|
|
|
|
|
|
|
|
if (IslandIter != AddressToConstantIslandMap.begin() &&
|
|
|
|
|
(IslandIter == AddressToConstantIslandMap.end() ||
|
|
|
|
|
IslandIter->first > Address))
|
|
|
|
|
--IslandIter;
|
|
|
|
|
|
2019-06-28 09:21:27 -07:00
|
|
|
if (IslandIter != AddressToConstantIslandMap.end()) {
|
2023-02-10 17:09:03 +04:00
|
|
|
// Fall-back to referencing the original constant island in the presence
|
|
|
|
|
// of dynamic relocs, as we currently do not support cloning them.
|
|
|
|
|
// Notice: we might fail to link because of this, if the original constant
|
|
|
|
|
// island we are referring would be emitted too far away.
|
|
|
|
|
if (IslandIter->second->hasDynamicRelocationAtIsland()) {
|
|
|
|
|
MCSymbol *IslandSym =
|
|
|
|
|
IslandIter->second->getOrCreateIslandAccess(Address);
|
|
|
|
|
if (IslandSym)
|
|
|
|
|
return std::make_pair(IslandSym, 0);
|
|
|
|
|
} else if (MCSymbol *IslandSym =
|
|
|
|
|
IslandIter->second->getOrCreateProxyIslandAccess(Address,
|
|
|
|
|
BF)) {
|
2021-10-16 14:44:29 +03:00
|
|
|
BF.createIslandDependency(IslandSym, IslandIter->second);
|
2022-09-16 16:20:00 -07:00
|
|
|
return std::make_pair(IslandSym, 0);
|
2019-06-04 15:30:22 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Note that the address does not necessarily have to reside inside
|
|
|
|
|
// a section, it could be an absolute address too.
|
2021-04-08 00:19:26 -07:00
|
|
|
ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
|
2019-06-04 15:30:22 -07:00
|
|
|
if (Section && Section->isText()) {
|
2021-12-14 16:52:51 -08:00
|
|
|
if (BF.containsAddress(Address, /*UseMaxSize=*/isAArch64())) {
|
2019-06-04 15:30:22 -07:00
|
|
|
if (Address != BF.getAddress()) {
|
|
|
|
|
// The address could potentially escape. Mark it as another entry
|
|
|
|
|
// point into the function.
|
|
|
|
|
if (opts::Verbosity >= 1) {
|
2024-02-12 14:53:53 -08:00
|
|
|
this->outs() << "BOLT-INFO: potentially escaped address 0x"
|
|
|
|
|
<< Twine::utohexstr(Address) << " in function " << BF
|
|
|
|
|
<< '\n';
|
2019-06-04 15:30:22 -07:00
|
|
|
}
|
2019-06-28 09:21:27 -07:00
|
|
|
BF.HasInternalLabelReference = true;
|
2019-06-04 15:30:22 -07:00
|
|
|
return std::make_pair(
|
2022-09-16 16:20:00 -07:00
|
|
|
BF.addEntryPointAtOffset(Address - BF.getAddress()), 0);
|
2019-06-04 15:30:22 -07:00
|
|
|
}
|
|
|
|
|
} else {
|
2022-07-07 00:01:33 +03:00
|
|
|
addInterproceduralReference(&BF, Address);
|
2019-06-04 15:30:22 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2020-04-26 17:51:07 -07:00
|
|
|
// With relocations, catch jump table references outside of the basic block
|
|
|
|
|
// containing the indirect jump.
|
|
|
|
|
if (HasRelocations) {
|
2021-04-08 00:19:26 -07:00
|
|
|
const MemoryContentsType MemType = analyzeMemoryAt(Address, BF);
|
2019-08-19 14:06:36 -07:00
|
|
|
if (MemType == MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE && IsPCRel) {
|
|
|
|
|
const MCSymbol *Symbol =
|
2021-12-14 16:52:51 -08:00
|
|
|
getOrCreateJumpTable(BF, Address, JumpTable::JTT_PIC);
|
2019-08-19 14:06:36 -07:00
|
|
|
|
2022-09-16 16:20:00 -07:00
|
|
|
return std::make_pair(Symbol, 0);
|
2019-08-19 14:06:36 -07:00
|
|
|
}
|
2019-06-28 09:21:27 -07:00
|
|
|
}
|
|
|
|
|
|
2021-12-20 11:07:46 -08:00
|
|
|
if (BinaryData *BD = getBinaryDataContainingAddress(Address))
|
2019-06-04 15:30:22 -07:00
|
|
|
return std::make_pair(BD->getSymbol(), Address - BD->getAddress());
|
|
|
|
|
|
|
|
|
|
// TODO: use DWARF info to get size/alignment here?
|
2021-04-08 00:19:26 -07:00
|
|
|
MCSymbol *TargetSymbol = getOrCreateGlobalSymbol(Address, "DATAat");
|
2020-12-01 16:29:39 -08:00
|
|
|
LLVM_DEBUG(dbgs() << "Created symbol " << TargetSymbol->getName() << '\n');
|
2022-09-16 16:20:00 -07:00
|
|
|
return std::make_pair(TargetSymbol, 0);
|
2019-06-04 15:30:22 -07:00
|
|
|
}
|
|
|
|
|
|
2021-12-14 16:52:51 -08:00
|
|
|
MemoryContentsType BinaryContext::analyzeMemoryAt(uint64_t Address,
|
|
|
|
|
BinaryFunction &BF) {
|
2019-06-12 18:21:02 -07:00
|
|
|
if (!isX86())
|
|
|
|
|
return MemoryContentsType::UNKNOWN;
|
|
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
|
2019-06-12 18:21:02 -07:00
|
|
|
if (!Section) {
|
|
|
|
|
// No section - possibly an absolute address. Since we don't allow
|
|
|
|
|
// internal function addresses to escape the function scope - we
|
|
|
|
|
// consider it a tail call.
|
|
|
|
|
if (opts::Verbosity > 1) {
|
2024-02-12 14:53:53 -08:00
|
|
|
this->errs() << "BOLT-WARNING: no section for address 0x"
|
|
|
|
|
<< Twine::utohexstr(Address) << " referenced from function "
|
|
|
|
|
<< BF << '\n';
|
2019-06-12 18:21:02 -07:00
|
|
|
}
|
|
|
|
|
return MemoryContentsType::UNKNOWN;
|
|
|
|
|
}
|
2019-06-28 09:21:27 -07:00
|
|
|
|
2019-06-12 18:21:02 -07:00
|
|
|
if (Section->isVirtual()) {
|
|
|
|
|
// The contents are filled at runtime.
|
|
|
|
|
return MemoryContentsType::UNKNOWN;
|
|
|
|
|
}
|
|
|
|
|
|
2019-06-28 09:21:27 -07:00
|
|
|
// No support for jump tables in code yet.
|
|
|
|
|
if (Section->isText())
|
|
|
|
|
return MemoryContentsType::UNKNOWN;
|
|
|
|
|
|
2019-08-19 14:06:36 -07:00
|
|
|
// Start with checking for PIC jump table. We expect non-PIC jump tables
|
|
|
|
|
// to have high 32 bits set to 0.
|
|
|
|
|
if (analyzeJumpTable(Address, JumpTable::JTT_PIC, BF))
|
|
|
|
|
return MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE;
|
2019-06-12 18:21:02 -07:00
|
|
|
|
2019-08-19 14:06:36 -07:00
|
|
|
if (analyzeJumpTable(Address, JumpTable::JTT_NORMAL, BF))
|
|
|
|
|
return MemoryContentsType::POSSIBLE_JUMP_TABLE;
|
2019-06-12 18:21:02 -07:00
|
|
|
|
2019-08-19 14:06:36 -07:00
|
|
|
return MemoryContentsType::UNKNOWN;
|
|
|
|
|
}
|
2019-06-12 18:21:02 -07:00
|
|
|
|
2023-02-21 22:09:17 -08:00
|
|
|
bool BinaryContext::analyzeJumpTable(const uint64_t Address,
|
|
|
|
|
const JumpTable::JumpTableType Type,
|
|
|
|
|
const BinaryFunction &BF,
|
|
|
|
|
const uint64_t NextJTAddress,
|
|
|
|
|
JumpTable::AddressesType *EntriesAsAddress,
|
|
|
|
|
bool *HasEntryInFragment) const {
|
2024-04-11 16:11:00 -07:00
|
|
|
// Target address of __builtin_unreachable.
|
|
|
|
|
const uint64_t UnreachableAddress = BF.getAddress() + BF.getSize();
|
|
|
|
|
|
2019-08-19 14:06:36 -07:00
|
|
|
// Is one of the targets __builtin_unreachable?
|
2021-05-13 10:50:47 -07:00
|
|
|
bool HasUnreachable = false;
|
2019-06-12 18:21:02 -07:00
|
|
|
|
2023-07-21 16:21:44 -07:00
|
|
|
// Does one of the entries match function start address?
|
|
|
|
|
bool HasStartAsEntry = false;
|
|
|
|
|
|
2019-08-19 14:06:36 -07:00
|
|
|
// Number of targets other than __builtin_unreachable.
|
2021-05-13 10:50:47 -07:00
|
|
|
uint64_t NumRealEntries = 0;
|
2019-06-12 18:21:02 -07:00
|
|
|
|
2024-04-11 16:11:00 -07:00
|
|
|
// Size of the jump table without trailing __builtin_unreachable entries.
|
|
|
|
|
size_t TrimmedSize = 0;
|
|
|
|
|
|
|
|
|
|
auto addEntryAddress = [&](uint64_t EntryAddress, bool Unreachable = false) {
|
|
|
|
|
if (!EntriesAsAddress)
|
|
|
|
|
return;
|
|
|
|
|
EntriesAsAddress->emplace_back(EntryAddress);
|
|
|
|
|
if (!Unreachable)
|
|
|
|
|
TrimmedSize = EntriesAsAddress->size();
|
2019-08-19 14:06:36 -07:00
|
|
|
};
|
2019-06-12 18:21:02 -07:00
|
|
|
|
2025-04-10 21:17:04 -07:00
|
|
|
auto printEntryDiagnostics = [&](raw_ostream &OS,
|
|
|
|
|
const BinaryFunction *TargetBF) {
|
|
|
|
|
OS << "FAIL: function doesn't contain this address\n";
|
|
|
|
|
if (!TargetBF)
|
|
|
|
|
return;
|
|
|
|
|
OS << " ! function containing this address: " << *TargetBF << '\n';
|
|
|
|
|
if (!TargetBF->isFragment())
|
|
|
|
|
return;
|
|
|
|
|
OS << " ! is a fragment with parents: ";
|
|
|
|
|
ListSeparator LS;
|
|
|
|
|
for (BinaryFunction *Parent : TargetBF->ParentFragments)
|
|
|
|
|
OS << LS << *Parent;
|
|
|
|
|
OS << '\n';
|
|
|
|
|
};
|
|
|
|
|
|
2023-02-21 22:09:17 -08:00
|
|
|
ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
|
2019-08-19 14:06:36 -07:00
|
|
|
if (!Section)
|
2019-06-12 18:21:02 -07:00
|
|
|
return false;
|
|
|
|
|
|
2019-08-19 14:06:36 -07:00
|
|
|
// The upper bound is defined by containing object, section limits, and
|
|
|
|
|
// the next jump table in memory.
|
2021-04-08 00:19:26 -07:00
|
|
|
uint64_t UpperBound = Section->getEndAddress();
|
|
|
|
|
const BinaryData *JumpTableBD = getBinaryDataAtAddress(Address);
|
2019-08-19 14:06:36 -07:00
|
|
|
if (JumpTableBD && JumpTableBD->getSize()) {
|
|
|
|
|
assert(JumpTableBD->getEndAddress() <= UpperBound &&
|
|
|
|
|
"data object cannot cross a section boundary");
|
|
|
|
|
UpperBound = JumpTableBD->getEndAddress();
|
|
|
|
|
}
|
2021-12-20 11:07:46 -08:00
|
|
|
if (NextJTAddress)
|
2019-08-19 14:06:36 -07:00
|
|
|
UpperBound = std::min(NextJTAddress, UpperBound);
|
2019-06-12 18:21:02 -07:00
|
|
|
|
2022-08-15 20:34:25 -07:00
|
|
|
LLVM_DEBUG({
|
|
|
|
|
using JTT = JumpTable::JumpTableType;
|
|
|
|
|
dbgs() << formatv("BOLT-DEBUG: analyzeJumpTable @{0:x} in {1}, JTT={2}\n",
|
|
|
|
|
Address, BF.getPrintName(),
|
|
|
|
|
Type == JTT::JTT_PIC ? "PIC" : "Normal");
|
|
|
|
|
});
|
2021-04-08 00:19:26 -07:00
|
|
|
const uint64_t EntrySize = getJumpTableEntrySize(Type);
|
|
|
|
|
for (uint64_t EntryAddress = Address; EntryAddress <= UpperBound - EntrySize;
|
2019-08-19 14:06:36 -07:00
|
|
|
EntryAddress += EntrySize) {
|
2020-12-01 16:29:39 -08:00
|
|
|
LLVM_DEBUG(dbgs() << " * Checking 0x" << Twine::utohexstr(EntryAddress)
|
|
|
|
|
<< " -> ");
|
2019-08-19 14:06:36 -07:00
|
|
|
// Check if there's a proper relocation against the jump table entry.
|
2019-11-19 18:52:08 -08:00
|
|
|
if (HasRelocations) {
|
2020-11-12 11:54:38 -08:00
|
|
|
if (Type == JumpTable::JTT_PIC &&
|
|
|
|
|
!DataPCRelocations.count(EntryAddress)) {
|
2020-12-01 16:29:39 -08:00
|
|
|
LLVM_DEBUG(
|
2020-11-12 11:54:38 -08:00
|
|
|
dbgs() << "FAIL: JTT_PIC table, no relocation for this address\n");
|
2019-11-19 18:52:08 -08:00
|
|
|
break;
|
2020-11-12 11:54:38 -08:00
|
|
|
}
|
|
|
|
|
if (Type == JumpTable::JTT_NORMAL && !getRelocationAt(EntryAddress)) {
|
2020-12-01 16:29:39 -08:00
|
|
|
LLVM_DEBUG(
|
|
|
|
|
dbgs()
|
|
|
|
|
<< "FAIL: JTT_NORMAL table, no relocation for this address\n");
|
2019-11-19 18:52:08 -08:00
|
|
|
break;
|
2020-11-12 11:54:38 -08:00
|
|
|
}
|
2019-11-19 18:52:08 -08:00
|
|
|
}
|
2019-08-19 14:06:36 -07:00
|
|
|
|
2021-12-14 16:52:51 -08:00
|
|
|
const uint64_t Value =
|
|
|
|
|
(Type == JumpTable::JTT_PIC)
|
|
|
|
|
? Address + *getSignedValueAtAddress(EntryAddress, EntrySize)
|
|
|
|
|
: *getPointerAtAddress(EntryAddress);
|
2019-08-19 14:06:36 -07:00
|
|
|
|
|
|
|
|
// __builtin_unreachable() case.
|
2024-04-11 16:11:00 -07:00
|
|
|
if (Value == UnreachableAddress) {
|
|
|
|
|
addEntryAddress(Value, /*Unreachable*/ true);
|
2019-08-19 14:06:36 -07:00
|
|
|
HasUnreachable = true;
|
2022-08-15 20:34:25 -07:00
|
|
|
LLVM_DEBUG(dbgs() << formatv("OK: {0:x} __builtin_unreachable\n", Value));
|
2019-08-19 14:06:36 -07:00
|
|
|
continue;
|
|
|
|
|
}
|
2019-06-12 18:21:02 -07:00
|
|
|
|
2023-07-21 16:21:44 -07:00
|
|
|
// Function start is another special case. It is allowed in the jump table,
|
|
|
|
|
// but we need at least one another regular entry to distinguish the table
|
|
|
|
|
// from, e.g. a function pointer array.
|
|
|
|
|
if (Value == BF.getAddress()) {
|
|
|
|
|
HasStartAsEntry = true;
|
|
|
|
|
addEntryAddress(Value);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
2020-11-06 11:19:03 -08:00
|
|
|
// Function or one of its fragments.
|
2023-02-21 22:09:17 -08:00
|
|
|
const BinaryFunction *TargetBF = getBinaryFunctionContainingAddress(Value);
|
2025-04-10 21:17:04 -07:00
|
|
|
if (!TargetBF || !areRelatedFragments(TargetBF, &BF)) {
|
|
|
|
|
LLVM_DEBUG(printEntryDiagnostics(dbgs(), TargetBF));
|
2025-04-12 23:35:48 -07:00
|
|
|
(void)printEntryDiagnostics;
|
2019-08-19 14:06:36 -07:00
|
|
|
break;
|
2020-11-12 11:54:38 -08:00
|
|
|
}
|
2019-08-19 14:06:36 -07:00
|
|
|
|
|
|
|
|
// Check there's an instruction at this offset.
|
2020-11-06 11:19:03 -08:00
|
|
|
if (TargetBF->getState() == BinaryFunction::State::Disassembled &&
|
|
|
|
|
!TargetBF->getInstructionAtOffset(Value - TargetBF->getAddress())) {
|
2022-08-15 20:34:25 -07:00
|
|
|
LLVM_DEBUG(dbgs() << formatv("FAIL: no instruction at {0:x}\n", Value));
|
2019-08-19 14:06:36 -07:00
|
|
|
break;
|
2020-11-06 11:19:03 -08:00
|
|
|
}
|
2019-08-19 14:06:36 -07:00
|
|
|
|
|
|
|
|
++NumRealEntries;
|
2022-08-15 20:34:25 -07:00
|
|
|
LLVM_DEBUG(dbgs() << formatv("OK: {0:x} real entry\n", Value));
|
2020-11-06 11:19:03 -08:00
|
|
|
|
2023-02-21 22:09:17 -08:00
|
|
|
if (TargetBF != &BF && HasEntryInFragment)
|
|
|
|
|
*HasEntryInFragment = true;
|
[BOLT] Support multiple parents for split jump table
There are two assumptions regarding jump table:
(a) It is accessed by only one fragment, say, Parent
(b) All entries target instructions in Parent
For (a), BOLT stores jump table entries as relative offset to Parent.
For (b), BOLT treats jump table entries target somewhere out of Parent
as INVALID_OFFSET, including fragment of same split function.
In this update, we extend (a) and (b) to include fragment of same split
functinon. For (a), we store jump table entries in absolute offset
instead. In addition, jump table will store all fragments that access
it. A fragment uses this information to only create label for jump table
entries that target to that fragment.
For (b), using absolute offset allows jump table entries to target
fragments of same split function, i.e., extend support for split jump
table. This can be done using relocation (fragment start/size) and
fragment detection heuristics (e.g., using symbol name pattern for
non-stripped binaries).
For jump table targets that can only be reached by one fragment, we
mark them as local label; otherwise, they would be the secondary
function entry to the target fragment.
Test Plan
```
ninja check-bolt
```
Reviewed By: Amir
Differential Revision: https://reviews.llvm.org/D128474
2022-07-13 23:35:51 -07:00
|
|
|
addEntryAddress(Value);
|
2019-08-19 14:06:36 -07:00
|
|
|
}
|
|
|
|
|
|
2024-04-11 16:11:00 -07:00
|
|
|
// Trim direct/normal jump table to exclude trailing unreachable entries that
|
|
|
|
|
// can collide with a function address.
|
|
|
|
|
if (Type == JumpTable::JTT_NORMAL && EntriesAsAddress &&
|
|
|
|
|
TrimmedSize != EntriesAsAddress->size() &&
|
|
|
|
|
getBinaryFunctionAtAddress(UnreachableAddress))
|
|
|
|
|
EntriesAsAddress->resize(TrimmedSize);
|
|
|
|
|
|
2019-08-19 14:06:36 -07:00
|
|
|
// It's a jump table if the number of real entries is more than 1, or there's
|
2023-07-21 16:21:44 -07:00
|
|
|
// one real entry and one or more special targets. If there are only multiple
|
|
|
|
|
// special targets, then it's not a jump table.
|
|
|
|
|
return NumRealEntries + (HasUnreachable || HasStartAsEntry) >= 2;
|
2019-06-12 18:21:02 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void BinaryContext::populateJumpTables() {
|
2020-12-01 16:29:39 -08:00
|
|
|
LLVM_DEBUG(dbgs() << "DataPCRelocations: " << DataPCRelocations.size()
|
|
|
|
|
<< '\n');
|
2019-06-12 18:21:02 -07:00
|
|
|
for (auto JTI = JumpTables.begin(), JTE = JumpTables.end(); JTI != JTE;
|
|
|
|
|
++JTI) {
|
2021-04-08 00:19:26 -07:00
|
|
|
JumpTable *JT = JTI->second;
|
2019-06-12 18:21:02 -07:00
|
|
|
|
2025-04-10 21:17:04 -07:00
|
|
|
if (!llvm::all_of(JT->Parents, std::mem_fn(&BinaryFunction::isSimple)))
|
2019-11-10 21:09:01 -08:00
|
|
|
continue;
|
|
|
|
|
|
2021-05-13 10:50:47 -07:00
|
|
|
uint64_t NextJTAddress = 0;
|
2019-06-12 18:21:02 -07:00
|
|
|
auto NextJTI = std::next(JTI);
|
2021-12-20 11:07:46 -08:00
|
|
|
if (NextJTI != JTE)
|
2019-08-19 14:06:36 -07:00
|
|
|
NextJTAddress = NextJTI->second->getAddress();
|
2019-06-12 18:21:02 -07:00
|
|
|
|
[BOLT] Support multiple parents for split jump table
There are two assumptions regarding jump table:
(a) It is accessed by only one fragment, say, Parent
(b) All entries target instructions in Parent
For (a), BOLT stores jump table entries as relative offset to Parent.
For (b), BOLT treats jump table entries target somewhere out of Parent
as INVALID_OFFSET, including fragment of same split function.
In this update, we extend (a) and (b) to include fragment of same split
functinon. For (a), we store jump table entries in absolute offset
instead. In addition, jump table will store all fragments that access
it. A fragment uses this information to only create label for jump table
entries that target to that fragment.
For (b), using absolute offset allows jump table entries to target
fragments of same split function, i.e., extend support for split jump
table. This can be done using relocation (fragment start/size) and
fragment detection heuristics (e.g., using symbol name pattern for
non-stripped binaries).
For jump table targets that can only be reached by one fragment, we
mark them as local label; otherwise, they would be the secondary
function entry to the target fragment.
Test Plan
```
ninja check-bolt
```
Reviewed By: Amir
Differential Revision: https://reviews.llvm.org/D128474
2022-07-13 23:35:51 -07:00
|
|
|
const bool Success =
|
|
|
|
|
analyzeJumpTable(JT->getAddress(), JT->Type, *(JT->Parents[0]),
|
2023-02-21 22:09:17 -08:00
|
|
|
NextJTAddress, &JT->EntriesAsAddress, &JT->IsSplit);
|
2019-08-19 14:06:36 -07:00
|
|
|
if (!Success) {
|
2022-08-17 17:34:49 -07:00
|
|
|
LLVM_DEBUG({
|
|
|
|
|
dbgs() << "failed to analyze ";
|
|
|
|
|
JT->print(dbgs());
|
|
|
|
|
if (NextJTI != JTE) {
|
|
|
|
|
dbgs() << "next ";
|
|
|
|
|
NextJTI->second->print(dbgs());
|
|
|
|
|
}
|
|
|
|
|
});
|
2022-07-30 10:35:10 -07:00
|
|
|
llvm_unreachable("jump table heuristic failure");
|
2019-06-28 09:21:27 -07:00
|
|
|
}
|
[BOLT] Support multiple parents for split jump table
There are two assumptions regarding jump table:
(a) It is accessed by only one fragment, say, Parent
(b) All entries target instructions in Parent
For (a), BOLT stores jump table entries as relative offset to Parent.
For (b), BOLT treats jump table entries target somewhere out of Parent
as INVALID_OFFSET, including fragment of same split function.
In this update, we extend (a) and (b) to include fragment of same split
functinon. For (a), we store jump table entries in absolute offset
instead. In addition, jump table will store all fragments that access
it. A fragment uses this information to only create label for jump table
entries that target to that fragment.
For (b), using absolute offset allows jump table entries to target
fragments of same split function, i.e., extend support for split jump
table. This can be done using relocation (fragment start/size) and
fragment detection heuristics (e.g., using symbol name pattern for
non-stripped binaries).
For jump table targets that can only be reached by one fragment, we
mark them as local label; otherwise, they would be the secondary
function entry to the target fragment.
Test Plan
```
ninja check-bolt
```
Reviewed By: Amir
Differential Revision: https://reviews.llvm.org/D128474
2022-07-13 23:35:51 -07:00
|
|
|
for (BinaryFunction *Frag : JT->Parents) {
|
2023-02-21 22:09:17 -08:00
|
|
|
if (JT->IsSplit)
|
|
|
|
|
Frag->setHasIndirectTargetToSplitFragment(true);
|
[BOLT] Support multiple parents for split jump table
There are two assumptions regarding jump table:
(a) It is accessed by only one fragment, say, Parent
(b) All entries target instructions in Parent
For (a), BOLT stores jump table entries as relative offset to Parent.
For (b), BOLT treats jump table entries target somewhere out of Parent
as INVALID_OFFSET, including fragment of same split function.
In this update, we extend (a) and (b) to include fragment of same split
functinon. For (a), we store jump table entries in absolute offset
instead. In addition, jump table will store all fragments that access
it. A fragment uses this information to only create label for jump table
entries that target to that fragment.
For (b), using absolute offset allows jump table entries to target
fragments of same split function, i.e., extend support for split jump
table. This can be done using relocation (fragment start/size) and
fragment detection heuristics (e.g., using symbol name pattern for
non-stripped binaries).
For jump table targets that can only be reached by one fragment, we
mark them as local label; otherwise, they would be the secondary
function entry to the target fragment.
Test Plan
```
ninja check-bolt
```
Reviewed By: Amir
Differential Revision: https://reviews.llvm.org/D128474
2022-07-13 23:35:51 -07:00
|
|
|
for (uint64_t EntryAddress : JT->EntriesAsAddress)
|
|
|
|
|
// if target is builtin_unreachable
|
|
|
|
|
if (EntryAddress == Frag->getAddress() + Frag->getSize()) {
|
|
|
|
|
Frag->IgnoredBranches.emplace_back(EntryAddress - Frag->getAddress(),
|
|
|
|
|
Frag->getSize());
|
|
|
|
|
} else if (EntryAddress >= Frag->getAddress() &&
|
|
|
|
|
EntryAddress < Frag->getAddress() + Frag->getSize()) {
|
|
|
|
|
Frag->registerReferencedOffset(EntryAddress - Frag->getAddress());
|
|
|
|
|
}
|
2019-08-19 14:06:36 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// In strict mode, erase PC-relative relocation record. Later we check that
|
|
|
|
|
// all such records are erased and thus have been accounted for.
|
|
|
|
|
if (opts::StrictMode && JT->Type == JumpTable::JTT_PIC) {
|
2021-04-08 00:19:26 -07:00
|
|
|
for (uint64_t Address = JT->getAddress();
|
2019-06-28 09:21:27 -07:00
|
|
|
Address < JT->getAddress() + JT->getSize();
|
|
|
|
|
Address += JT->EntrySize) {
|
2019-11-19 18:52:08 -08:00
|
|
|
DataPCRelocations.erase(DataPCRelocations.find(Address));
|
2019-06-28 09:21:27 -07:00
|
|
|
}
|
|
|
|
|
}
|
2020-11-06 11:19:03 -08:00
|
|
|
|
|
|
|
|
// Mark to skip the function and all its fragments.
|
[BOLT] Support multiple parents for split jump table
There are two assumptions regarding jump table:
(a) It is accessed by only one fragment, say, Parent
(b) All entries target instructions in Parent
For (a), BOLT stores jump table entries as relative offset to Parent.
For (b), BOLT treats jump table entries target somewhere out of Parent
as INVALID_OFFSET, including fragment of same split function.
In this update, we extend (a) and (b) to include fragment of same split
functinon. For (a), we store jump table entries in absolute offset
instead. In addition, jump table will store all fragments that access
it. A fragment uses this information to only create label for jump table
entries that target to that fragment.
For (b), using absolute offset allows jump table entries to target
fragments of same split function, i.e., extend support for split jump
table. This can be done using relocation (fragment start/size) and
fragment detection heuristics (e.g., using symbol name pattern for
non-stripped binaries).
For jump table targets that can only be reached by one fragment, we
mark them as local label; otherwise, they would be the secondary
function entry to the target fragment.
Test Plan
```
ninja check-bolt
```
Reviewed By: Amir
Differential Revision: https://reviews.llvm.org/D128474
2022-07-13 23:35:51 -07:00
|
|
|
for (BinaryFunction *Frag : JT->Parents)
|
|
|
|
|
if (Frag->hasIndirectTargetToSplitFragment())
|
|
|
|
|
addFragmentsToSkip(Frag);
|
2019-06-12 18:21:02 -07:00
|
|
|
}
|
2019-06-28 09:21:27 -07:00
|
|
|
|
2020-11-12 11:54:38 -08:00
|
|
|
if (opts::StrictMode && DataPCRelocations.size()) {
|
2020-12-01 16:29:39 -08:00
|
|
|
LLVM_DEBUG({
|
2020-11-12 11:54:38 -08:00
|
|
|
dbgs() << DataPCRelocations.size()
|
|
|
|
|
<< " unclaimed PC-relative relocations left in data:\n";
|
2021-04-08 00:19:26 -07:00
|
|
|
for (uint64_t Reloc : DataPCRelocations)
|
2020-11-12 11:54:38 -08:00
|
|
|
dbgs() << Twine::utohexstr(Reloc) << '\n';
|
|
|
|
|
});
|
|
|
|
|
assert(0 && "unclaimed PC-relative relocations left in data\n");
|
|
|
|
|
}
|
2019-11-19 18:52:08 -08:00
|
|
|
clearList(DataPCRelocations);
|
2021-12-01 21:14:56 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void BinaryContext::skipMarkedFragments() {
|
[BOLT] Support multiple parents for split jump table
There are two assumptions regarding jump table:
(a) It is accessed by only one fragment, say, Parent
(b) All entries target instructions in Parent
For (a), BOLT stores jump table entries as relative offset to Parent.
For (b), BOLT treats jump table entries target somewhere out of Parent
as INVALID_OFFSET, including fragment of same split function.
In this update, we extend (a) and (b) to include fragment of same split
functinon. For (a), we store jump table entries in absolute offset
instead. In addition, jump table will store all fragments that access
it. A fragment uses this information to only create label for jump table
entries that target to that fragment.
For (b), using absolute offset allows jump table entries to target
fragments of same split function, i.e., extend support for split jump
table. This can be done using relocation (fragment start/size) and
fragment detection heuristics (e.g., using symbol name pattern for
non-stripped binaries).
For jump table targets that can only be reached by one fragment, we
mark them as local label; otherwise, they would be the secondary
function entry to the target fragment.
Test Plan
```
ninja check-bolt
```
Reviewed By: Amir
Differential Revision: https://reviews.llvm.org/D128474
2022-07-13 23:35:51 -07:00
|
|
|
std::vector<BinaryFunction *> FragmentQueue;
|
|
|
|
|
// Copy the functions to FragmentQueue.
|
|
|
|
|
FragmentQueue.assign(FragmentsToSkip.begin(), FragmentsToSkip.end());
|
2021-12-01 21:14:56 -08:00
|
|
|
auto addToWorklist = [&](BinaryFunction *Function) -> void {
|
[BOLT] Support multiple parents for split jump table
There are two assumptions regarding jump table:
(a) It is accessed by only one fragment, say, Parent
(b) All entries target instructions in Parent
For (a), BOLT stores jump table entries as relative offset to Parent.
For (b), BOLT treats jump table entries target somewhere out of Parent
as INVALID_OFFSET, including fragment of same split function.
In this update, we extend (a) and (b) to include fragment of same split
functinon. For (a), we store jump table entries in absolute offset
instead. In addition, jump table will store all fragments that access
it. A fragment uses this information to only create label for jump table
entries that target to that fragment.
For (b), using absolute offset allows jump table entries to target
fragments of same split function, i.e., extend support for split jump
table. This can be done using relocation (fragment start/size) and
fragment detection heuristics (e.g., using symbol name pattern for
non-stripped binaries).
For jump table targets that can only be reached by one fragment, we
mark them as local label; otherwise, they would be the secondary
function entry to the target fragment.
Test Plan
```
ninja check-bolt
```
Reviewed By: Amir
Differential Revision: https://reviews.llvm.org/D128474
2022-07-13 23:35:51 -07:00
|
|
|
if (FragmentsToSkip.count(Function))
|
2021-12-01 21:14:56 -08:00
|
|
|
return;
|
[BOLT] Support multiple parents for split jump table
There are two assumptions regarding jump table:
(a) It is accessed by only one fragment, say, Parent
(b) All entries target instructions in Parent
For (a), BOLT stores jump table entries as relative offset to Parent.
For (b), BOLT treats jump table entries target somewhere out of Parent
as INVALID_OFFSET, including fragment of same split function.
In this update, we extend (a) and (b) to include fragment of same split
functinon. For (a), we store jump table entries in absolute offset
instead. In addition, jump table will store all fragments that access
it. A fragment uses this information to only create label for jump table
entries that target to that fragment.
For (b), using absolute offset allows jump table entries to target
fragments of same split function, i.e., extend support for split jump
table. This can be done using relocation (fragment start/size) and
fragment detection heuristics (e.g., using symbol name pattern for
non-stripped binaries).
For jump table targets that can only be reached by one fragment, we
mark them as local label; otherwise, they would be the secondary
function entry to the target fragment.
Test Plan
```
ninja check-bolt
```
Reviewed By: Amir
Differential Revision: https://reviews.llvm.org/D128474
2022-07-13 23:35:51 -07:00
|
|
|
FragmentQueue.push_back(Function);
|
|
|
|
|
addFragmentsToSkip(Function);
|
2021-12-01 21:14:56 -08:00
|
|
|
};
|
2020-11-06 11:19:03 -08:00
|
|
|
// Functions containing split jump tables need to be skipped with all
|
2021-12-01 21:14:56 -08:00
|
|
|
// fragments (transitively).
|
[BOLT] Support multiple parents for split jump table
There are two assumptions regarding jump table:
(a) It is accessed by only one fragment, say, Parent
(b) All entries target instructions in Parent
For (a), BOLT stores jump table entries as relative offset to Parent.
For (b), BOLT treats jump table entries target somewhere out of Parent
as INVALID_OFFSET, including fragment of same split function.
In this update, we extend (a) and (b) to include fragment of same split
functinon. For (a), we store jump table entries in absolute offset
instead. In addition, jump table will store all fragments that access
it. A fragment uses this information to only create label for jump table
entries that target to that fragment.
For (b), using absolute offset allows jump table entries to target
fragments of same split function, i.e., extend support for split jump
table. This can be done using relocation (fragment start/size) and
fragment detection heuristics (e.g., using symbol name pattern for
non-stripped binaries).
For jump table targets that can only be reached by one fragment, we
mark them as local label; otherwise, they would be the secondary
function entry to the target fragment.
Test Plan
```
ninja check-bolt
```
Reviewed By: Amir
Differential Revision: https://reviews.llvm.org/D128474
2022-07-13 23:35:51 -07:00
|
|
|
for (size_t I = 0; I != FragmentQueue.size(); I++) {
|
|
|
|
|
BinaryFunction *BF = FragmentQueue[I];
|
|
|
|
|
assert(FragmentsToSkip.count(BF) &&
|
2021-12-01 21:14:56 -08:00
|
|
|
"internal error in traversing function fragments");
|
|
|
|
|
if (opts::Verbosity >= 1)
|
2024-02-12 14:53:53 -08:00
|
|
|
this->errs() << "BOLT-WARNING: Ignoring " << BF->getPrintName() << '\n';
|
2022-06-10 15:48:13 -07:00
|
|
|
BF->setSimple(false);
|
[BOLT] Support multiple parents for split jump table
There are two assumptions regarding jump table:
(a) It is accessed by only one fragment, say, Parent
(b) All entries target instructions in Parent
For (a), BOLT stores jump table entries as relative offset to Parent.
For (b), BOLT treats jump table entries target somewhere out of Parent
as INVALID_OFFSET, including fragment of same split function.
In this update, we extend (a) and (b) to include fragment of same split
functinon. For (a), we store jump table entries in absolute offset
instead. In addition, jump table will store all fragments that access
it. A fragment uses this information to only create label for jump table
entries that target to that fragment.
For (b), using absolute offset allows jump table entries to target
fragments of same split function, i.e., extend support for split jump
table. This can be done using relocation (fragment start/size) and
fragment detection heuristics (e.g., using symbol name pattern for
non-stripped binaries).
For jump table targets that can only be reached by one fragment, we
mark them as local label; otherwise, they would be the secondary
function entry to the target fragment.
Test Plan
```
ninja check-bolt
```
Reviewed By: Amir
Differential Revision: https://reviews.llvm.org/D128474
2022-07-13 23:35:51 -07:00
|
|
|
BF->setHasIndirectTargetToSplitFragment(true);
|
2022-06-10 15:48:13 -07:00
|
|
|
|
2022-06-23 22:15:47 -07:00
|
|
|
llvm::for_each(BF->Fragments, addToWorklist);
|
|
|
|
|
llvm::for_each(BF->ParentFragments, addToWorklist);
|
2020-11-06 11:19:03 -08:00
|
|
|
}
|
2022-02-14 10:31:43 -08:00
|
|
|
if (!FragmentsToSkip.empty())
|
2024-02-12 14:53:53 -08:00
|
|
|
this->errs() << "BOLT-WARNING: skipped " << FragmentsToSkip.size()
|
|
|
|
|
<< " function" << (FragmentsToSkip.size() == 1 ? "" : "s")
|
|
|
|
|
<< " due to cold fragments\n";
|
2019-06-12 18:21:02 -07:00
|
|
|
}
|
|
|
|
|
|
2021-12-14 16:52:51 -08:00
|
|
|
MCSymbol *BinaryContext::getOrCreateGlobalSymbol(uint64_t Address, Twine Prefix,
|
2017-11-14 20:05:11 -08:00
|
|
|
uint64_t Size,
|
|
|
|
|
uint16_t Alignment,
|
2018-04-20 20:03:31 -07:00
|
|
|
unsigned Flags) {
|
2017-11-14 20:05:11 -08:00
|
|
|
auto Itr = BinaryDataMap.find(Address);
|
|
|
|
|
if (Itr != BinaryDataMap.end()) {
|
|
|
|
|
assert(Itr->second->getSize() == Size || !Size);
|
|
|
|
|
return Itr->second->getSymbol();
|
2015-10-14 15:35:14 -07:00
|
|
|
}
|
|
|
|
|
|
2017-11-14 20:05:11 -08:00
|
|
|
std::string Name = (Prefix + "0x" + Twine::utohexstr(Address)).str();
|
|
|
|
|
assert(!GlobalSymbols.count(Name) && "created name is not unique");
|
2018-04-20 20:03:31 -07:00
|
|
|
return registerNameAtAddress(Name, Address, Size, Alignment, Flags);
|
2017-11-14 20:05:11 -08:00
|
|
|
}
|
2015-10-14 15:35:14 -07:00
|
|
|
|
2021-06-30 14:38:50 -07:00
|
|
|
MCSymbol *BinaryContext::getOrCreateUndefinedGlobalSymbol(StringRef Name) {
|
|
|
|
|
return Ctx->getOrCreateSymbol(Name);
|
|
|
|
|
}
|
|
|
|
|
|
2019-04-03 15:52:01 -07:00
|
|
|
BinaryFunction *BinaryContext::createBinaryFunction(
|
|
|
|
|
const std::string &Name, BinarySection &Section, uint64_t Address,
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
uint64_t Size, uint64_t SymbolSize, uint16_t Alignment) {
|
2019-04-03 15:52:01 -07:00
|
|
|
auto Result = BinaryFunctions.emplace(
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
Address, BinaryFunction(Name, Section, Address, Size, *this));
|
2019-04-03 15:52:01 -07:00
|
|
|
assert(Result.second == true && "unexpected duplicate function");
|
2021-04-08 00:19:26 -07:00
|
|
|
BinaryFunction *BF = &Result.first->second;
|
2019-04-03 15:52:01 -07:00
|
|
|
registerNameAtAddress(Name, Address, SymbolSize ? SymbolSize : Size,
|
|
|
|
|
Alignment);
|
|
|
|
|
setSymbolToFunctionMap(BF->getSymbol(), BF);
|
|
|
|
|
return BF;
|
|
|
|
|
}
|
|
|
|
|
|
2019-07-02 16:56:41 -07:00
|
|
|
const MCSymbol *
|
|
|
|
|
BinaryContext::getOrCreateJumpTable(BinaryFunction &Function, uint64_t Address,
|
|
|
|
|
JumpTable::JumpTableType Type) {
|
[BOLT] Support multiple parents for split jump table
There are two assumptions regarding jump table:
(a) It is accessed by only one fragment, say, Parent
(b) All entries target instructions in Parent
For (a), BOLT stores jump table entries as relative offset to Parent.
For (b), BOLT treats jump table entries target somewhere out of Parent
as INVALID_OFFSET, including fragment of same split function.
In this update, we extend (a) and (b) to include fragment of same split
functinon. For (a), we store jump table entries in absolute offset
instead. In addition, jump table will store all fragments that access
it. A fragment uses this information to only create label for jump table
entries that target to that fragment.
For (b), using absolute offset allows jump table entries to target
fragments of same split function, i.e., extend support for split jump
table. This can be done using relocation (fragment start/size) and
fragment detection heuristics (e.g., using symbol name pattern for
non-stripped binaries).
For jump table targets that can only be reached by one fragment, we
mark them as local label; otherwise, they would be the secondary
function entry to the target fragment.
Test Plan
```
ninja check-bolt
```
Reviewed By: Amir
Differential Revision: https://reviews.llvm.org/D128474
2022-07-13 23:35:51 -07:00
|
|
|
// Two fragments of same function access same jump table
|
2021-04-08 00:19:26 -07:00
|
|
|
if (JumpTable *JT = getJumpTableContainingAddress(Address)) {
|
2019-05-02 17:42:06 -07:00
|
|
|
assert(JT->Type == Type && "jump table types have to match");
|
2019-06-28 09:21:27 -07:00
|
|
|
assert(Address == JT->getAddress() && "unexpected non-empty jump table");
|
2019-05-02 17:42:06 -07:00
|
|
|
|
2025-04-10 21:17:04 -07:00
|
|
|
if (llvm::is_contained(JT->Parents, &Function))
|
|
|
|
|
return JT->getFirstLabel();
|
[BOLT] Support multiple parents for split jump table
There are two assumptions regarding jump table:
(a) It is accessed by only one fragment, say, Parent
(b) All entries target instructions in Parent
For (a), BOLT stores jump table entries as relative offset to Parent.
For (b), BOLT treats jump table entries target somewhere out of Parent
as INVALID_OFFSET, including fragment of same split function.
In this update, we extend (a) and (b) to include fragment of same split
functinon. For (a), we store jump table entries in absolute offset
instead. In addition, jump table will store all fragments that access
it. A fragment uses this information to only create label for jump table
entries that target to that fragment.
For (b), using absolute offset allows jump table entries to target
fragments of same split function, i.e., extend support for split jump
table. This can be done using relocation (fragment start/size) and
fragment detection heuristics (e.g., using symbol name pattern for
non-stripped binaries).
For jump table targets that can only be reached by one fragment, we
mark them as local label; otherwise, they would be the secondary
function entry to the target fragment.
Test Plan
```
ninja check-bolt
```
Reviewed By: Amir
Differential Revision: https://reviews.llvm.org/D128474
2022-07-13 23:35:51 -07:00
|
|
|
|
2025-04-10 21:17:04 -07:00
|
|
|
// Prevent associating a jump table to a specific fragment twice.
|
|
|
|
|
auto isSibling = std::bind(&BinaryContext::areRelatedFragments, this,
|
|
|
|
|
&Function, std::placeholders::_1);
|
|
|
|
|
assert(llvm::all_of(JT->Parents, isSibling) &&
|
[BOLT] Support multiple parents for split jump table
There are two assumptions regarding jump table:
(a) It is accessed by only one fragment, say, Parent
(b) All entries target instructions in Parent
For (a), BOLT stores jump table entries as relative offset to Parent.
For (b), BOLT treats jump table entries target somewhere out of Parent
as INVALID_OFFSET, including fragment of same split function.
In this update, we extend (a) and (b) to include fragment of same split
functinon. For (a), we store jump table entries in absolute offset
instead. In addition, jump table will store all fragments that access
it. A fragment uses this information to only create label for jump table
entries that target to that fragment.
For (b), using absolute offset allows jump table entries to target
fragments of same split function, i.e., extend support for split jump
table. This can be done using relocation (fragment start/size) and
fragment detection heuristics (e.g., using symbol name pattern for
non-stripped binaries).
For jump table targets that can only be reached by one fragment, we
mark them as local label; otherwise, they would be the secondary
function entry to the target fragment.
Test Plan
```
ninja check-bolt
```
Reviewed By: Amir
Differential Revision: https://reviews.llvm.org/D128474
2022-07-13 23:35:51 -07:00
|
|
|
"cannot re-use jump table of a different function");
|
2025-04-12 23:35:48 -07:00
|
|
|
(void)isSibling;
|
2025-04-10 21:17:04 -07:00
|
|
|
if (opts::Verbosity > 2) {
|
|
|
|
|
this->outs() << "BOLT-INFO: multiple fragments access the same jump table"
|
|
|
|
|
<< ": " << *JT->Parents[0] << "; " << Function << '\n';
|
|
|
|
|
JT->print(this->outs());
|
|
|
|
|
}
|
|
|
|
|
if (JT->Parents.size() == 1)
|
|
|
|
|
JT->Parents.front()->setHasIndirectTargetToSplitFragment(true);
|
|
|
|
|
Function.setHasIndirectTargetToSplitFragment(true);
|
|
|
|
|
// Duplicate the entry for the parent function for easy access
|
|
|
|
|
JT->Parents.push_back(&Function);
|
|
|
|
|
Function.JumpTables.emplace(Address, JT);
|
2019-07-02 16:56:41 -07:00
|
|
|
return JT->getFirstLabel();
|
2019-05-02 17:42:06 -07:00
|
|
|
}
|
|
|
|
|
|
2019-06-28 09:21:27 -07:00
|
|
|
// Re-use the existing symbol if possible.
|
2021-05-13 10:50:47 -07:00
|
|
|
MCSymbol *JTLabel = nullptr;
|
2021-04-08 00:19:26 -07:00
|
|
|
if (BinaryData *Object = getBinaryDataAtAddress(Address)) {
|
2019-06-28 09:21:27 -07:00
|
|
|
if (!isInternalSymbolName(Object->getSymbol()->getName()))
|
|
|
|
|
JTLabel = Object->getSymbol();
|
|
|
|
|
}
|
2019-08-19 14:06:36 -07:00
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
const uint64_t EntrySize = getJumpTableEntrySize(Type);
|
2019-06-28 09:21:27 -07:00
|
|
|
if (!JTLabel) {
|
2021-04-08 00:19:26 -07:00
|
|
|
const std::string JumpTableName = generateJumpTableName(Function, Address);
|
2020-01-10 16:17:47 -08:00
|
|
|
JTLabel = registerNameAtAddress(JumpTableName, Address, 0, EntrySize);
|
2019-06-28 09:21:27 -07:00
|
|
|
}
|
|
|
|
|
|
2020-12-01 16:29:39 -08:00
|
|
|
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: creating jump table " << JTLabel->getName()
|
|
|
|
|
<< " in function " << Function << '\n');
|
2019-05-02 17:42:06 -07:00
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
JumpTable *JT = new JumpTable(*JTLabel, Address, EntrySize, Type,
|
[BOLT] Support multiple parents for split jump table
There are two assumptions regarding jump table:
(a) It is accessed by only one fragment, say, Parent
(b) All entries target instructions in Parent
For (a), BOLT stores jump table entries as relative offset to Parent.
For (b), BOLT treats jump table entries target somewhere out of Parent
as INVALID_OFFSET, including fragment of same split function.
In this update, we extend (a) and (b) to include fragment of same split
functinon. For (a), we store jump table entries in absolute offset
instead. In addition, jump table will store all fragments that access
it. A fragment uses this information to only create label for jump table
entries that target to that fragment.
For (b), using absolute offset allows jump table entries to target
fragments of same split function, i.e., extend support for split jump
table. This can be done using relocation (fragment start/size) and
fragment detection heuristics (e.g., using symbol name pattern for
non-stripped binaries).
For jump table targets that can only be reached by one fragment, we
mark them as local label; otherwise, they would be the secondary
function entry to the target fragment.
Test Plan
```
ninja check-bolt
```
Reviewed By: Amir
Differential Revision: https://reviews.llvm.org/D128474
2022-07-13 23:35:51 -07:00
|
|
|
JumpTable::LabelMapType{{0, JTLabel}},
|
2021-04-08 00:19:26 -07:00
|
|
|
*getSectionForAddress(Address));
|
[BOLT] Support multiple parents for split jump table
There are two assumptions regarding jump table:
(a) It is accessed by only one fragment, say, Parent
(b) All entries target instructions in Parent
For (a), BOLT stores jump table entries as relative offset to Parent.
For (b), BOLT treats jump table entries target somewhere out of Parent
as INVALID_OFFSET, including fragment of same split function.
In this update, we extend (a) and (b) to include fragment of same split
functinon. For (a), we store jump table entries in absolute offset
instead. In addition, jump table will store all fragments that access
it. A fragment uses this information to only create label for jump table
entries that target to that fragment.
For (b), using absolute offset allows jump table entries to target
fragments of same split function, i.e., extend support for split jump
table. This can be done using relocation (fragment start/size) and
fragment detection heuristics (e.g., using symbol name pattern for
non-stripped binaries).
For jump table targets that can only be reached by one fragment, we
mark them as local label; otherwise, they would be the secondary
function entry to the target fragment.
Test Plan
```
ninja check-bolt
```
Reviewed By: Amir
Differential Revision: https://reviews.llvm.org/D128474
2022-07-13 23:35:51 -07:00
|
|
|
JT->Parents.push_back(&Function);
|
|
|
|
|
if (opts::Verbosity > 2)
|
2024-02-12 14:53:53 -08:00
|
|
|
JT->print(this->outs());
|
2019-05-02 17:42:06 -07:00
|
|
|
JumpTables.emplace(Address, JT);
|
|
|
|
|
|
|
|
|
|
// Duplicate the entry for the parent function for easy access.
|
|
|
|
|
Function.JumpTables.emplace(Address, JT);
|
2019-07-02 16:56:41 -07:00
|
|
|
return JTLabel;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::pair<uint64_t, const MCSymbol *>
|
|
|
|
|
BinaryContext::duplicateJumpTable(BinaryFunction &Function, JumpTable *JT,
|
|
|
|
|
const MCSymbol *OldLabel) {
|
2019-08-07 16:09:50 -07:00
|
|
|
auto L = scopeLock();
|
2019-07-02 16:56:41 -07:00
|
|
|
unsigned Offset = 0;
|
|
|
|
|
bool Found = false;
|
2021-04-08 00:19:26 -07:00
|
|
|
for (std::pair<const unsigned, MCSymbol *> Elmt : JT->Labels) {
|
2019-07-02 16:56:41 -07:00
|
|
|
if (Elmt.second != OldLabel)
|
|
|
|
|
continue;
|
|
|
|
|
Offset = Elmt.first;
|
|
|
|
|
Found = true;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
assert(Found && "Label not found");
|
2022-05-17 14:30:00 -07:00
|
|
|
(void)Found;
|
2021-04-08 00:19:26 -07:00
|
|
|
MCSymbol *NewLabel = Ctx->createNamedTempSymbol("duplicatedJT");
|
|
|
|
|
JumpTable *NewJT =
|
|
|
|
|
new JumpTable(*NewLabel, JT->getAddress(), JT->EntrySize, JT->Type,
|
[BOLT] Support multiple parents for split jump table
There are two assumptions regarding jump table:
(a) It is accessed by only one fragment, say, Parent
(b) All entries target instructions in Parent
For (a), BOLT stores jump table entries as relative offset to Parent.
For (b), BOLT treats jump table entries target somewhere out of Parent
as INVALID_OFFSET, including fragment of same split function.
In this update, we extend (a) and (b) to include fragment of same split
functinon. For (a), we store jump table entries in absolute offset
instead. In addition, jump table will store all fragments that access
it. A fragment uses this information to only create label for jump table
entries that target to that fragment.
For (b), using absolute offset allows jump table entries to target
fragments of same split function, i.e., extend support for split jump
table. This can be done using relocation (fragment start/size) and
fragment detection heuristics (e.g., using symbol name pattern for
non-stripped binaries).
For jump table targets that can only be reached by one fragment, we
mark them as local label; otherwise, they would be the secondary
function entry to the target fragment.
Test Plan
```
ninja check-bolt
```
Reviewed By: Amir
Differential Revision: https://reviews.llvm.org/D128474
2022-07-13 23:35:51 -07:00
|
|
|
JumpTable::LabelMapType{{Offset, NewLabel}},
|
2021-04-08 00:19:26 -07:00
|
|
|
*getSectionForAddress(JT->getAddress()));
|
[BOLT] Support multiple parents for split jump table
There are two assumptions regarding jump table:
(a) It is accessed by only one fragment, say, Parent
(b) All entries target instructions in Parent
For (a), BOLT stores jump table entries as relative offset to Parent.
For (b), BOLT treats jump table entries target somewhere out of Parent
as INVALID_OFFSET, including fragment of same split function.
In this update, we extend (a) and (b) to include fragment of same split
functinon. For (a), we store jump table entries in absolute offset
instead. In addition, jump table will store all fragments that access
it. A fragment uses this information to only create label for jump table
entries that target to that fragment.
For (b), using absolute offset allows jump table entries to target
fragments of same split function, i.e., extend support for split jump
table. This can be done using relocation (fragment start/size) and
fragment detection heuristics (e.g., using symbol name pattern for
non-stripped binaries).
For jump table targets that can only be reached by one fragment, we
mark them as local label; otherwise, they would be the secondary
function entry to the target fragment.
Test Plan
```
ninja check-bolt
```
Reviewed By: Amir
Differential Revision: https://reviews.llvm.org/D128474
2022-07-13 23:35:51 -07:00
|
|
|
NewJT->Parents = JT->Parents;
|
2019-07-02 16:56:41 -07:00
|
|
|
NewJT->Entries = JT->Entries;
|
|
|
|
|
NewJT->Counts = JT->Counts;
|
|
|
|
|
uint64_t JumpTableID = ++DuplicatedJumpTables;
|
|
|
|
|
// Invert it to differentiate from regular jump tables whose IDs are their
|
|
|
|
|
// addresses in the input binary memory space
|
|
|
|
|
JumpTableID = ~JumpTableID;
|
|
|
|
|
JumpTables.emplace(JumpTableID, NewJT);
|
|
|
|
|
Function.JumpTables.emplace(JumpTableID, NewJT);
|
|
|
|
|
return std::make_pair(JumpTableID, NewLabel);
|
2019-05-02 17:42:06 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::string BinaryContext::generateJumpTableName(const BinaryFunction &BF,
|
|
|
|
|
uint64_t Address) {
|
|
|
|
|
size_t Id;
|
|
|
|
|
uint64_t Offset = 0;
|
2021-04-08 00:19:26 -07:00
|
|
|
if (const JumpTable *JT = BF.getJumpTableContainingAddress(Address)) {
|
2019-05-02 17:42:06 -07:00
|
|
|
Offset = Address - JT->getAddress();
|
2024-05-22 09:27:14 -07:00
|
|
|
auto JTLabelsIt = JT->Labels.find(Offset);
|
|
|
|
|
if (JTLabelsIt != JT->Labels.end())
|
|
|
|
|
return std::string(JTLabelsIt->second->getName());
|
|
|
|
|
|
|
|
|
|
auto JTIdsIt = JumpTableIds.find(JT->getAddress());
|
|
|
|
|
assert(JTIdsIt != JumpTableIds.end());
|
|
|
|
|
Id = JTIdsIt->second;
|
2019-05-02 17:42:06 -07:00
|
|
|
} else {
|
|
|
|
|
Id = JumpTableIds[Address] = BF.JumpTables.size();
|
|
|
|
|
}
|
2020-01-13 11:56:59 -08:00
|
|
|
return ("JUMP_TABLE/" + BF.getOneName().str() + "." + std::to_string(Id) +
|
2019-05-02 17:42:06 -07:00
|
|
|
(Offset ? ("." + std::to_string(Offset)) : ""));
|
|
|
|
|
}
|
|
|
|
|
|
[BOLT] Add code padding verification
Summary:
In non-relocation mode, we allow data objects to be embedded in the
code. Such objects could be unmarked, and could occupy an area between
functions, the area which is considered to be code padding.
When we disassemble code, we detect references into the padding area
and adjust it, so that it is not overwritten during the code emission.
We assume the reference to be pointing to the beginning of the object.
However, assembly-written functions may reference the middle of an
object and use negative offsets to reference data fields. Thus,
conservatively, we reduce the possibly-overwritten padding area to
a minimum if the object reference was detected.
Since we also allow functions with unknown code in non-relocation mode,
it is possible that we miss references to some objects in code.
To cover such cases, we need to verify the padding area before we
allow to overwrite it.
(cherry picked from FBD16477787)
2019-07-23 20:48:41 -07:00
|
|
|
bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) {
|
|
|
|
|
// FIXME: aarch64 support is missing.
|
|
|
|
|
if (!isX86())
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
|
|
if (BF.getSize() == BF.getMaxSize())
|
|
|
|
|
return true;
|
|
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
ErrorOr<ArrayRef<unsigned char>> FunctionData = BF.getData();
|
[BOLT] Add code padding verification
Summary:
In non-relocation mode, we allow data objects to be embedded in the
code. Such objects could be unmarked, and could occupy an area between
functions, the area which is considered to be code padding.
When we disassemble code, we detect references into the padding area
and adjust it, so that it is not overwritten during the code emission.
We assume the reference to be pointing to the beginning of the object.
However, assembly-written functions may reference the middle of an
object and use negative offsets to reference data fields. Thus,
conservatively, we reduce the possibly-overwritten padding area to
a minimum if the object reference was detected.
Since we also allow functions with unknown code in non-relocation mode,
it is possible that we miss references to some objects in code.
To cover such cases, we need to verify the padding area before we
allow to overwrite it.
(cherry picked from FBD16477787)
2019-07-23 20:48:41 -07:00
|
|
|
assert(FunctionData && "cannot get function as data");
|
|
|
|
|
|
|
|
|
|
uint64_t Offset = BF.getSize();
|
|
|
|
|
MCInst Instr;
|
2021-05-13 10:50:47 -07:00
|
|
|
uint64_t InstrSize = 0;
|
[BOLT] Add code padding verification
Summary:
In non-relocation mode, we allow data objects to be embedded in the
code. Such objects could be unmarked, and could occupy an area between
functions, the area which is considered to be code padding.
When we disassemble code, we detect references into the padding area
and adjust it, so that it is not overwritten during the code emission.
We assume the reference to be pointing to the beginning of the object.
However, assembly-written functions may reference the middle of an
object and use negative offsets to reference data fields. Thus,
conservatively, we reduce the possibly-overwritten padding area to
a minimum if the object reference was detected.
Since we also allow functions with unknown code in non-relocation mode,
it is possible that we miss references to some objects in code.
To cover such cases, we need to verify the padding area before we
allow to overwrite it.
(cherry picked from FBD16477787)
2019-07-23 20:48:41 -07:00
|
|
|
uint64_t InstrAddress = BF.getAddress() + Offset;
|
|
|
|
|
using std::placeholders::_1;
|
|
|
|
|
|
|
|
|
|
// Skip instructions that satisfy the predicate condition.
|
|
|
|
|
auto skipInstructions = [&](std::function<bool(const MCInst &)> Predicate) {
|
2021-04-08 00:19:26 -07:00
|
|
|
const uint64_t StartOffset = Offset;
|
[BOLT] Add code padding verification
Summary:
In non-relocation mode, we allow data objects to be embedded in the
code. Such objects could be unmarked, and could occupy an area between
functions, the area which is considered to be code padding.
When we disassemble code, we detect references into the padding area
and adjust it, so that it is not overwritten during the code emission.
We assume the reference to be pointing to the beginning of the object.
However, assembly-written functions may reference the middle of an
object and use negative offsets to reference data fields. Thus,
conservatively, we reduce the possibly-overwritten padding area to
a minimum if the object reference was detected.
Since we also allow functions with unknown code in non-relocation mode,
it is possible that we miss references to some objects in code.
To cover such cases, we need to verify the padding area before we
allow to overwrite it.
(cherry picked from FBD16477787)
2019-07-23 20:48:41 -07:00
|
|
|
for (; Offset < BF.getMaxSize();
|
|
|
|
|
Offset += InstrSize, InstrAddress += InstrSize) {
|
2021-12-14 16:52:51 -08:00
|
|
|
if (!DisAsm->getInstruction(Instr, InstrSize, FunctionData->slice(Offset),
|
|
|
|
|
InstrAddress, nulls()))
|
[BOLT] Add code padding verification
Summary:
In non-relocation mode, we allow data objects to be embedded in the
code. Such objects could be unmarked, and could occupy an area between
functions, the area which is considered to be code padding.
When we disassemble code, we detect references into the padding area
and adjust it, so that it is not overwritten during the code emission.
We assume the reference to be pointing to the beginning of the object.
However, assembly-written functions may reference the middle of an
object and use negative offsets to reference data fields. Thus,
conservatively, we reduce the possibly-overwritten padding area to
a minimum if the object reference was detected.
Since we also allow functions with unknown code in non-relocation mode,
it is possible that we miss references to some objects in code.
To cover such cases, we need to verify the padding area before we
allow to overwrite it.
(cherry picked from FBD16477787)
2019-07-23 20:48:41 -07:00
|
|
|
break;
|
|
|
|
|
if (!Predicate(Instr))
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return Offset - StartOffset;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// Skip a sequence of zero bytes.
|
|
|
|
|
auto skipZeros = [&]() {
|
2021-04-08 00:19:26 -07:00
|
|
|
const uint64_t StartOffset = Offset;
|
[BOLT] Add code padding verification
Summary:
In non-relocation mode, we allow data objects to be embedded in the
code. Such objects could be unmarked, and could occupy an area between
functions, the area which is considered to be code padding.
When we disassemble code, we detect references into the padding area
and adjust it, so that it is not overwritten during the code emission.
We assume the reference to be pointing to the beginning of the object.
However, assembly-written functions may reference the middle of an
object and use negative offsets to reference data fields. Thus,
conservatively, we reduce the possibly-overwritten padding area to
a minimum if the object reference was detected.
Since we also allow functions with unknown code in non-relocation mode,
it is possible that we miss references to some objects in code.
To cover such cases, we need to verify the padding area before we
allow to overwrite it.
(cherry picked from FBD16477787)
2019-07-23 20:48:41 -07:00
|
|
|
for (; Offset < BF.getMaxSize(); ++Offset)
|
|
|
|
|
if ((*FunctionData)[Offset] != 0)
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
return Offset - StartOffset;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// Accept the whole padding area filled with breakpoints.
|
|
|
|
|
auto isBreakpoint = std::bind(&MCPlusBuilder::isBreakpoint, MIB.get(), _1);
|
|
|
|
|
if (skipInstructions(isBreakpoint) && Offset == BF.getMaxSize())
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
|
|
auto isNoop = std::bind(&MCPlusBuilder::isNoop, MIB.get(), _1);
|
|
|
|
|
|
|
|
|
|
// Some functions have a jump to the next function or to the padding area
|
|
|
|
|
// inserted after the body.
|
|
|
|
|
auto isSkipJump = [&](const MCInst &Instr) {
|
2021-05-13 10:50:47 -07:00
|
|
|
uint64_t TargetAddress = 0;
|
[BOLT] Add code padding verification
Summary:
In non-relocation mode, we allow data objects to be embedded in the
code. Such objects could be unmarked, and could occupy an area between
functions, the area which is considered to be code padding.
When we disassemble code, we detect references into the padding area
and adjust it, so that it is not overwritten during the code emission.
We assume the reference to be pointing to the beginning of the object.
However, assembly-written functions may reference the middle of an
object and use negative offsets to reference data fields. Thus,
conservatively, we reduce the possibly-overwritten padding area to
a minimum if the object reference was detected.
Since we also allow functions with unknown code in non-relocation mode,
it is possible that we miss references to some objects in code.
To cover such cases, we need to verify the padding area before we
allow to overwrite it.
(cherry picked from FBD16477787)
2019-07-23 20:48:41 -07:00
|
|
|
if (MIB->isUnconditionalBranch(Instr) &&
|
|
|
|
|
MIB->evaluateBranch(Instr, InstrAddress, InstrSize, TargetAddress)) {
|
|
|
|
|
if (TargetAddress >= InstrAddress + InstrSize &&
|
|
|
|
|
TargetAddress <= BF.getAddress() + BF.getMaxSize()) {
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return false;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// Skip over nops, jumps, and zero padding. Allow interleaving (this happens).
|
2021-12-14 16:52:51 -08:00
|
|
|
while (skipInstructions(isNoop) || skipInstructions(isSkipJump) ||
|
[BOLT] Add code padding verification
Summary:
In non-relocation mode, we allow data objects to be embedded in the
code. Such objects could be unmarked, and could occupy an area between
functions, the area which is considered to be code padding.
When we disassemble code, we detect references into the padding area
and adjust it, so that it is not overwritten during the code emission.
We assume the reference to be pointing to the beginning of the object.
However, assembly-written functions may reference the middle of an
object and use negative offsets to reference data fields. Thus,
conservatively, we reduce the possibly-overwritten padding area to
a minimum if the object reference was detected.
Since we also allow functions with unknown code in non-relocation mode,
it is possible that we miss references to some objects in code.
To cover such cases, we need to verify the padding area before we
allow to overwrite it.
(cherry picked from FBD16477787)
2019-07-23 20:48:41 -07:00
|
|
|
skipZeros())
|
|
|
|
|
;
|
|
|
|
|
|
|
|
|
|
if (Offset == BF.getMaxSize())
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
|
|
if (opts::Verbosity >= 1) {
|
2024-02-12 14:53:53 -08:00
|
|
|
this->errs() << "BOLT-WARNING: bad padding at address 0x"
|
|
|
|
|
<< Twine::utohexstr(BF.getAddress() + BF.getSize())
|
|
|
|
|
<< " starting at offset " << (Offset - BF.getSize())
|
|
|
|
|
<< " in function " << BF << '\n'
|
|
|
|
|
<< FunctionData->slice(BF.getSize(),
|
|
|
|
|
BF.getMaxSize() - BF.getSize())
|
|
|
|
|
<< '\n';
|
[BOLT] Add code padding verification
Summary:
In non-relocation mode, we allow data objects to be embedded in the
code. Such objects could be unmarked, and could occupy an area between
functions, the area which is considered to be code padding.
When we disassemble code, we detect references into the padding area
and adjust it, so that it is not overwritten during the code emission.
We assume the reference to be pointing to the beginning of the object.
However, assembly-written functions may reference the middle of an
object and use negative offsets to reference data fields. Thus,
conservatively, we reduce the possibly-overwritten padding area to
a minimum if the object reference was detected.
Since we also allow functions with unknown code in non-relocation mode,
it is possible that we miss references to some objects in code.
To cover such cases, we need to verify the padding area before we
allow to overwrite it.
(cherry picked from FBD16477787)
2019-07-23 20:48:41 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void BinaryContext::adjustCodePadding() {
|
|
|
|
|
for (auto &BFI : BinaryFunctions) {
|
2021-04-08 00:19:26 -07:00
|
|
|
BinaryFunction &BF = BFI.second;
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
if (!shouldEmit(BF))
|
[BOLT] Add code padding verification
Summary:
In non-relocation mode, we allow data objects to be embedded in the
code. Such objects could be unmarked, and could occupy an area between
functions, the area which is considered to be code padding.
When we disassemble code, we detect references into the padding area
and adjust it, so that it is not overwritten during the code emission.
We assume the reference to be pointing to the beginning of the object.
However, assembly-written functions may reference the middle of an
object and use negative offsets to reference data fields. Thus,
conservatively, we reduce the possibly-overwritten padding area to
a minimum if the object reference was detected.
Since we also allow functions with unknown code in non-relocation mode,
it is possible that we miss references to some objects in code.
To cover such cases, we need to verify the padding area before we
allow to overwrite it.
(cherry picked from FBD16477787)
2019-07-23 20:48:41 -07:00
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
if (!hasValidCodePadding(BF)) {
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
if (HasRelocations) {
|
2025-06-02 12:33:54 -07:00
|
|
|
this->errs() << "BOLT-WARNING: function " << BF
|
|
|
|
|
<< " has invalid padding. Ignoring the function\n";
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
BF.setIgnored();
|
|
|
|
|
} else {
|
|
|
|
|
BF.setMaxSize(BF.getSize());
|
|
|
|
|
}
|
[BOLT] Add code padding verification
Summary:
In non-relocation mode, we allow data objects to be embedded in the
code. Such objects could be unmarked, and could occupy an area between
functions, the area which is considered to be code padding.
When we disassemble code, we detect references into the padding area
and adjust it, so that it is not overwritten during the code emission.
We assume the reference to be pointing to the beginning of the object.
However, assembly-written functions may reference the middle of an
object and use negative offsets to reference data fields. Thus,
conservatively, we reduce the possibly-overwritten padding area to
a minimum if the object reference was detected.
Since we also allow functions with unknown code in non-relocation mode,
it is possible that we miss references to some objects in code.
To cover such cases, we need to verify the padding area before we
allow to overwrite it.
(cherry picked from FBD16477787)
2019-07-23 20:48:41 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2021-12-14 16:52:51 -08:00
|
|
|
MCSymbol *BinaryContext::registerNameAtAddress(StringRef Name, uint64_t Address,
|
2017-11-14 20:05:11 -08:00
|
|
|
uint64_t Size,
|
2018-04-20 20:03:31 -07:00
|
|
|
uint16_t Alignment,
|
2024-08-07 20:52:19 -07:00
|
|
|
unsigned Flags) {
|
2020-01-10 16:17:47 -08:00
|
|
|
// Register the name with MCContext.
|
2021-04-08 00:19:26 -07:00
|
|
|
MCSymbol *Symbol = Ctx->getOrCreateSymbol(Name);
|
2020-01-10 16:17:47 -08:00
|
|
|
|
2017-11-14 20:05:11 -08:00
|
|
|
auto GAI = BinaryDataMap.find(Address);
|
2024-08-07 20:52:19 -07:00
|
|
|
BinaryData *BD;
|
2017-11-14 20:05:11 -08:00
|
|
|
if (GAI == BinaryDataMap.end()) {
|
2021-04-08 00:19:26 -07:00
|
|
|
ErrorOr<BinarySection &> SectionOrErr = getSectionForAddress(Address);
|
2024-08-07 20:52:19 -07:00
|
|
|
BinarySection &Section =
|
|
|
|
|
SectionOrErr ? SectionOrErr.get() : absoluteSection();
|
2021-12-14 16:52:51 -08:00
|
|
|
BD = new BinaryData(*Symbol, Address, Size, Alignment ? Alignment : 1,
|
2024-08-07 20:52:19 -07:00
|
|
|
Section, Flags);
|
2017-11-14 20:05:11 -08:00
|
|
|
GAI = BinaryDataMap.emplace(Address, BD).first;
|
|
|
|
|
GlobalSymbols[Name] = BD;
|
|
|
|
|
updateObjectNesting(GAI);
|
2020-01-10 16:17:47 -08:00
|
|
|
} else {
|
|
|
|
|
BD = GAI->second;
|
|
|
|
|
if (!BD->hasName(Name)) {
|
|
|
|
|
GlobalSymbols[Name] = BD;
|
2024-12-27 01:54:23 +08:00
|
|
|
BD->updateSize(Size);
|
2020-01-10 16:17:47 -08:00
|
|
|
BD->Symbols.push_back(Symbol);
|
|
|
|
|
}
|
2017-11-14 20:05:11 -08:00
|
|
|
}
|
|
|
|
|
|
2015-10-14 15:35:14 -07:00
|
|
|
return Symbol;
|
|
|
|
|
}
|
|
|
|
|
|
2017-11-14 20:05:11 -08:00
|
|
|
const BinaryData *
|
2020-03-03 15:51:24 -08:00
|
|
|
BinaryContext::getBinaryDataContainingAddressImpl(uint64_t Address) const {
|
2017-11-14 20:05:11 -08:00
|
|
|
auto NI = BinaryDataMap.lower_bound(Address);
|
|
|
|
|
auto End = BinaryDataMap.end();
|
2020-03-03 15:51:24 -08:00
|
|
|
if ((NI != End && Address == NI->first) ||
|
2020-03-03 13:36:32 -08:00
|
|
|
((NI != BinaryDataMap.begin()) && (NI-- != BinaryDataMap.begin()))) {
|
2021-12-20 11:07:46 -08:00
|
|
|
if (NI->second->containsAddress(Address))
|
2017-11-14 20:05:11 -08:00
|
|
|
return NI->second;
|
2016-09-29 11:19:06 -07:00
|
|
|
|
2017-11-14 20:05:11 -08:00
|
|
|
// If this is a sub-symbol, see if a parent data contains the address.
|
2021-04-08 00:19:26 -07:00
|
|
|
const BinaryData *BD = NI->second->getParent();
|
2017-11-14 20:05:11 -08:00
|
|
|
while (BD) {
|
2020-03-03 15:51:24 -08:00
|
|
|
if (BD->containsAddress(Address))
|
2017-11-14 20:05:11 -08:00
|
|
|
return BD;
|
|
|
|
|
BD = BD->getParent();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return nullptr;
|
|
|
|
|
}
|
2016-09-29 11:19:06 -07:00
|
|
|
|
2023-08-17 18:14:53 -07:00
|
|
|
BinaryData *BinaryContext::getGOTSymbol() {
|
|
|
|
|
// First tries to find a global symbol with that name
|
|
|
|
|
BinaryData *GOTSymBD = getBinaryDataByName("_GLOBAL_OFFSET_TABLE_");
|
|
|
|
|
if (GOTSymBD)
|
|
|
|
|
return GOTSymBD;
|
|
|
|
|
|
|
|
|
|
// This symbol might be hidden from run-time link, so fetch the local
|
|
|
|
|
// definition if available.
|
|
|
|
|
GOTSymBD = getBinaryDataByName("_GLOBAL_OFFSET_TABLE_/1");
|
|
|
|
|
if (!GOTSymBD)
|
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
|
|
// If the local symbol is not unique, fail
|
|
|
|
|
unsigned Index = 2;
|
|
|
|
|
SmallString<30> Storage;
|
|
|
|
|
while (const BinaryData *BD =
|
|
|
|
|
getBinaryDataByName(Twine("_GLOBAL_OFFSET_TABLE_/")
|
|
|
|
|
.concat(Twine(Index++))
|
|
|
|
|
.toStringRef(Storage)))
|
|
|
|
|
if (BD->getAddress() != GOTSymBD->getAddress())
|
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
|
|
return GOTSymBD;
|
|
|
|
|
}
|
|
|
|
|
|
2017-11-14 20:05:11 -08:00
|
|
|
bool BinaryContext::setBinaryDataSize(uint64_t Address, uint64_t Size) {
|
|
|
|
|
auto NI = BinaryDataMap.find(Address);
|
|
|
|
|
assert(NI != BinaryDataMap.end());
|
|
|
|
|
if (NI == BinaryDataMap.end())
|
|
|
|
|
return false;
|
2018-03-13 18:59:22 -07:00
|
|
|
// TODO: it's possible that a jump table starts at the same address
|
|
|
|
|
// as a larger blob of private data. When we set the size of the
|
|
|
|
|
// jump table, it might be smaller than the total blob size. In this
|
|
|
|
|
// case we just leave the original size since (currently) it won't really
|
2021-11-08 19:54:05 -08:00
|
|
|
// affect anything.
|
2018-03-13 18:59:22 -07:00
|
|
|
assert((!NI->second->Size || NI->second->Size == Size ||
|
|
|
|
|
(NI->second->isJumpTable() && NI->second->Size > Size)) &&
|
|
|
|
|
"can't change the size of a symbol that has already had its "
|
|
|
|
|
"size set");
|
|
|
|
|
if (!NI->second->Size) {
|
|
|
|
|
NI->second->Size = Size;
|
|
|
|
|
updateObjectNesting(NI);
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
return false;
|
2016-09-29 11:19:06 -07:00
|
|
|
}
|
|
|
|
|
|
2018-06-06 03:17:32 -07:00
|
|
|
void BinaryContext::generateSymbolHashes() {
|
|
|
|
|
auto isPadding = [](const BinaryData &BD) {
|
2021-04-08 00:19:26 -07:00
|
|
|
StringRef Contents = BD.getSection().getContents();
|
|
|
|
|
StringRef SymData = Contents.substr(BD.getOffset(), BD.getSize());
|
2023-12-13 23:34:49 -08:00
|
|
|
return (BD.getName().starts_with("HOLEat") ||
|
2018-06-06 03:17:32 -07:00
|
|
|
SymData.find_first_not_of(0) == StringRef::npos);
|
|
|
|
|
};
|
|
|
|
|
|
2018-06-11 17:17:25 -07:00
|
|
|
uint64_t NumCollisions = 0;
|
2018-06-06 03:17:32 -07:00
|
|
|
for (auto &Entry : BinaryDataMap) {
|
2021-04-08 00:19:26 -07:00
|
|
|
BinaryData &BD = *Entry.second;
|
|
|
|
|
StringRef Name = BD.getName();
|
2018-06-06 03:17:32 -07:00
|
|
|
|
2019-06-28 09:21:27 -07:00
|
|
|
if (!isInternalSymbolName(Name))
|
2018-06-06 03:17:32 -07:00
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
// First check if a non-anonymous alias exists and move it to the front.
|
2020-01-10 16:17:47 -08:00
|
|
|
if (BD.getSymbols().size() > 1) {
|
2022-06-23 22:15:47 -07:00
|
|
|
auto Itr = llvm::find_if(BD.getSymbols(), [&](const MCSymbol *Symbol) {
|
|
|
|
|
return !isInternalSymbolName(Symbol->getName());
|
|
|
|
|
});
|
2020-01-10 16:17:47 -08:00
|
|
|
if (Itr != BD.getSymbols().end()) {
|
2021-04-08 00:19:26 -07:00
|
|
|
size_t Idx = std::distance(BD.getSymbols().begin(), Itr);
|
2020-01-10 16:17:47 -08:00
|
|
|
std::swap(BD.getSymbols()[0], BD.getSymbols()[Idx]);
|
2018-06-06 03:17:32 -07:00
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// We have to skip 0 size symbols since they will all collide.
|
|
|
|
|
if (BD.getSize() == 0) {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
const uint64_t Hash = BD.getSection().hash(BD);
|
|
|
|
|
const size_t Idx = Name.find("0x");
|
2021-12-14 16:52:51 -08:00
|
|
|
std::string NewName =
|
|
|
|
|
(Twine(Name.substr(0, Idx)) + "_" + Twine::utohexstr(Hash)).str();
|
2018-06-06 03:17:32 -07:00
|
|
|
if (getBinaryDataByName(NewName)) {
|
|
|
|
|
// Ignore collisions for symbols that appear to be padding
|
|
|
|
|
// (i.e. all zeros or a "hole")
|
|
|
|
|
if (!isPadding(BD)) {
|
2018-06-11 17:17:25 -07:00
|
|
|
if (opts::Verbosity) {
|
2024-02-12 14:53:53 -08:00
|
|
|
this->errs() << "BOLT-WARNING: collision detected when hashing " << BD
|
|
|
|
|
<< " with new name (" << NewName << "), skipping.\n";
|
2018-06-11 17:17:25 -07:00
|
|
|
}
|
|
|
|
|
++NumCollisions;
|
2018-06-06 03:17:32 -07:00
|
|
|
}
|
|
|
|
|
continue;
|
|
|
|
|
}
|
2021-12-14 16:52:51 -08:00
|
|
|
BD.Symbols.insert(BD.Symbols.begin(), Ctx->getOrCreateSymbol(NewName));
|
2018-06-06 03:17:32 -07:00
|
|
|
GlobalSymbols[NewName] = &BD;
|
|
|
|
|
}
|
2018-06-11 17:17:25 -07:00
|
|
|
if (NumCollisions) {
|
2024-02-12 14:53:53 -08:00
|
|
|
this->errs() << "BOLT-WARNING: " << NumCollisions
|
|
|
|
|
<< " collisions detected while hashing binary objects";
|
2018-06-11 17:17:25 -07:00
|
|
|
if (!opts::Verbosity)
|
2024-02-12 14:53:53 -08:00
|
|
|
this->errs() << ". Use -v=1 to see the list.";
|
|
|
|
|
this->errs() << '\n';
|
2018-06-11 17:17:25 -07:00
|
|
|
}
|
2018-06-06 03:17:32 -07:00
|
|
|
}
|
|
|
|
|
|
2021-12-01 21:14:56 -08:00
|
|
|
bool BinaryContext::registerFragment(BinaryFunction &TargetFunction,
|
2024-07-24 07:15:10 -07:00
|
|
|
BinaryFunction &Function) {
|
2021-12-01 21:14:56 -08:00
|
|
|
assert(TargetFunction.isFragment() && "TargetFunction must be a fragment");
|
2023-02-09 10:56:55 -08:00
|
|
|
if (TargetFunction.isChildOf(Function))
|
2021-12-01 21:14:56 -08:00
|
|
|
return true;
|
|
|
|
|
TargetFunction.addParentFragment(Function);
|
2020-11-06 10:27:33 -08:00
|
|
|
Function.addFragment(TargetFunction);
|
2024-07-24 07:15:10 -07:00
|
|
|
FragmentClasses.unionSets(&TargetFunction, &Function);
|
2020-11-06 10:27:33 -08:00
|
|
|
if (!HasRelocations) {
|
|
|
|
|
TargetFunction.setSimple(false);
|
|
|
|
|
Function.setSimple(false);
|
|
|
|
|
}
|
|
|
|
|
if (opts::Verbosity >= 1) {
|
2024-02-12 14:53:53 -08:00
|
|
|
this->outs() << "BOLT-INFO: marking " << TargetFunction
|
|
|
|
|
<< " as a fragment of " << Function << '\n';
|
2020-11-06 10:27:33 -08:00
|
|
|
}
|
2021-12-01 21:14:56 -08:00
|
|
|
return true;
|
2020-11-06 10:27:33 -08:00
|
|
|
}
|
|
|
|
|
|
2022-07-07 00:01:33 +03:00
|
|
|
void BinaryContext::addAdrpAddRelocAArch64(BinaryFunction &BF,
|
|
|
|
|
MCInst &LoadLowBits,
|
|
|
|
|
MCInst &LoadHiBits,
|
|
|
|
|
uint64_t Target) {
|
|
|
|
|
const MCSymbol *TargetSymbol;
|
|
|
|
|
uint64_t Addend = 0;
|
|
|
|
|
std::tie(TargetSymbol, Addend) = handleAddressRef(Target, BF,
|
|
|
|
|
/*IsPCRel*/ true);
|
|
|
|
|
int64_t Val;
|
|
|
|
|
MIB->replaceImmWithSymbolRef(LoadHiBits, TargetSymbol, Addend, Ctx.get(), Val,
|
|
|
|
|
ELF::R_AARCH64_ADR_PREL_PG_HI21);
|
|
|
|
|
MIB->replaceImmWithSymbolRef(LoadLowBits, TargetSymbol, Addend, Ctx.get(),
|
|
|
|
|
Val, ELF::R_AARCH64_ADD_ABS_LO12_NC);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool BinaryContext::handleAArch64Veneer(uint64_t Address, bool MatchOnly) {
|
|
|
|
|
BinaryFunction *TargetFunction = getBinaryFunctionContainingAddress(Address);
|
|
|
|
|
if (TargetFunction)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
|
|
|
|
|
assert(Section && "cannot get section for referenced address");
|
|
|
|
|
if (!Section->isText())
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
bool Ret = false;
|
|
|
|
|
StringRef SectionContents = Section->getContents();
|
|
|
|
|
uint64_t Offset = Address - Section->getAddress();
|
|
|
|
|
const uint64_t MaxSize = SectionContents.size() - Offset;
|
|
|
|
|
const uint8_t *Bytes =
|
|
|
|
|
reinterpret_cast<const uint8_t *>(SectionContents.data());
|
|
|
|
|
ArrayRef<uint8_t> Data(Bytes + Offset, MaxSize);
|
|
|
|
|
|
|
|
|
|
auto matchVeneer = [&](BinaryFunction::InstrMapType &Instructions,
|
|
|
|
|
MCInst &Instruction, uint64_t Offset,
|
|
|
|
|
uint64_t AbsoluteInstrAddr,
|
|
|
|
|
uint64_t TotalSize) -> bool {
|
|
|
|
|
MCInst *TargetHiBits, *TargetLowBits;
|
|
|
|
|
uint64_t TargetAddress, Count;
|
|
|
|
|
Count = MIB->matchLinkerVeneer(Instructions.begin(), Instructions.end(),
|
|
|
|
|
AbsoluteInstrAddr, Instruction, TargetHiBits,
|
|
|
|
|
TargetLowBits, TargetAddress);
|
|
|
|
|
if (!Count)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
if (MatchOnly)
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
|
|
// NOTE The target symbol was created during disassemble's
|
|
|
|
|
// handleExternalReference
|
|
|
|
|
const MCSymbol *VeneerSymbol = getOrCreateGlobalSymbol(Address, "FUNCat");
|
|
|
|
|
BinaryFunction *Veneer = createBinaryFunction(VeneerSymbol->getName().str(),
|
|
|
|
|
*Section, Address, TotalSize);
|
|
|
|
|
addAdrpAddRelocAArch64(*Veneer, *TargetLowBits, *TargetHiBits,
|
|
|
|
|
TargetAddress);
|
|
|
|
|
MIB->addAnnotation(Instruction, "AArch64Veneer", true);
|
|
|
|
|
Veneer->addInstruction(Offset, std::move(Instruction));
|
|
|
|
|
--Count;
|
[bolt] Fix std::prev()-past-begin in veneer handling code
matchLinkerVeneer() returns 3 if `Instruction` and the last
two instructions in `[Instructions.begin, Instructions.end())`
match the pattern
ADRP x16, imm
ADD x16, x16, imm
BR x16
BinaryContext.cpp used to use
--Count;
for (auto It = std::prev(Instructions.end()); Count != 0;
It = std::prev(It), --Count) {
...use It...
}
to walk these instructions. The first `--Count` skips the
instruction that's in `Instruction` instead of in `Instructions`.
The loop then walks over `Instructions`.
However, on the last iteration, this calls `std::prev()` on an
iterator that points at the container's begin(), which can blow
up.
Instead, use rbegin(), which sidesteps this issue.
Fixes test/AArch64/veneer-gold.s on a macOS host.
With this, check-bolt passes on macOS.
Differential Revision: https://reviews.llvm.org/D138313
2022-11-18 14:28:59 -05:00
|
|
|
for (auto It = Instructions.rbegin(); Count != 0; ++It, --Count) {
|
2022-07-07 00:01:33 +03:00
|
|
|
MIB->addAnnotation(It->second, "AArch64Veneer", true);
|
|
|
|
|
Veneer->addInstruction(It->first, std::move(It->second));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Veneer->getOrCreateLocalLabel(Address);
|
|
|
|
|
Veneer->setMaxSize(TotalSize);
|
|
|
|
|
Veneer->updateState(BinaryFunction::State::Disassembled);
|
2024-10-18 09:46:41 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: handling veneer function at 0x"
|
|
|
|
|
<< Twine::utohexstr(Address) << "\n");
|
2022-07-07 00:01:33 +03:00
|
|
|
return true;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
uint64_t Size = 0, TotalSize = 0;
|
|
|
|
|
BinaryFunction::InstrMapType VeneerInstructions;
|
|
|
|
|
for (Offset = 0; Offset < MaxSize; Offset += Size) {
|
|
|
|
|
MCInst Instruction;
|
|
|
|
|
const uint64_t AbsoluteInstrAddr = Address + Offset;
|
|
|
|
|
if (!SymbolicDisAsm->getInstruction(Instruction, Size, Data.slice(Offset),
|
|
|
|
|
AbsoluteInstrAddr, nulls()))
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
TotalSize += Size;
|
|
|
|
|
if (MIB->isBranch(Instruction)) {
|
|
|
|
|
Ret = matchVeneer(VeneerInstructions, Instruction, Offset,
|
|
|
|
|
AbsoluteInstrAddr, TotalSize);
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
VeneerInstructions.emplace(Offset, std::move(Instruction));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return Ret;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void BinaryContext::processInterproceduralReferences() {
|
|
|
|
|
for (const std::pair<BinaryFunction *, uint64_t> &It :
|
|
|
|
|
InterproceduralReferences) {
|
|
|
|
|
BinaryFunction &Function = *It.first;
|
|
|
|
|
uint64_t Address = It.second;
|
2024-05-21 20:22:12 -07:00
|
|
|
// Process interprocedural references from ignored functions in BAT mode
|
|
|
|
|
// (non-simple in non-relocation mode) to properly register entry points
|
|
|
|
|
if (!Address || (Function.isIgnored() && !HasBATSection))
|
2019-05-22 11:26:58 -07:00
|
|
|
continue;
|
|
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
BinaryFunction *TargetFunction =
|
|
|
|
|
getBinaryFunctionContainingAddress(Address);
|
2020-09-14 15:48:32 -07:00
|
|
|
if (&Function == TargetFunction)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
if (TargetFunction) {
|
2022-07-07 00:01:33 +03:00
|
|
|
if (TargetFunction->isFragment() &&
|
2024-07-24 07:15:10 -07:00
|
|
|
!areRelatedFragments(TargetFunction, &Function)) {
|
2024-02-12 14:53:53 -08:00
|
|
|
this->errs()
|
|
|
|
|
<< "BOLT-WARNING: interprocedural reference between unrelated "
|
|
|
|
|
"fragments: "
|
|
|
|
|
<< Function.getPrintName() << " and "
|
|
|
|
|
<< TargetFunction->getPrintName() << '\n';
|
2021-12-01 21:14:56 -08:00
|
|
|
}
|
2021-04-08 00:19:26 -07:00
|
|
|
if (uint64_t Offset = Address - TargetFunction->getAddress())
|
2020-11-06 10:57:47 -08:00
|
|
|
TargetFunction->addEntryPointAtOffset(Offset);
|
2019-05-22 11:26:58 -07:00
|
|
|
|
2020-09-14 15:48:32 -07:00
|
|
|
continue;
|
|
|
|
|
}
|
2019-05-22 11:26:58 -07:00
|
|
|
|
2020-09-14 15:48:32 -07:00
|
|
|
// Check if address falls in function padding space - this could be
|
|
|
|
|
// unmarked data in code. In this case adjust the padding space size.
|
2021-04-08 00:19:26 -07:00
|
|
|
ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
|
2020-09-14 15:48:32 -07:00
|
|
|
assert(Section && "cannot get section for referenced address");
|
2019-05-22 11:26:58 -07:00
|
|
|
|
2020-09-14 15:48:32 -07:00
|
|
|
if (!Section->isText())
|
|
|
|
|
continue;
|
2019-05-22 11:26:58 -07:00
|
|
|
|
2020-09-14 15:48:32 -07:00
|
|
|
// PLT requires special handling and could be ignored in this context.
|
|
|
|
|
StringRef SectionName = Section->getName();
|
|
|
|
|
if (SectionName == ".plt" || SectionName == ".plt.got")
|
|
|
|
|
continue;
|
2019-05-22 11:26:58 -07:00
|
|
|
|
2022-07-07 00:01:33 +03:00
|
|
|
// Check if it is aarch64 veneer written at Address
|
|
|
|
|
if (isAArch64() && handleAArch64Veneer(Address))
|
|
|
|
|
continue;
|
|
|
|
|
|
2020-09-14 15:48:32 -07:00
|
|
|
if (opts::processAllFunctions()) {
|
2024-02-12 14:53:53 -08:00
|
|
|
this->errs() << "BOLT-ERROR: cannot process binaries with unmarked "
|
|
|
|
|
<< "object in code at address 0x"
|
|
|
|
|
<< Twine::utohexstr(Address) << " belonging to section "
|
|
|
|
|
<< SectionName << " in current mode\n";
|
2020-09-14 15:48:32 -07:00
|
|
|
exit(1);
|
|
|
|
|
}
|
|
|
|
|
|
2021-12-14 16:52:51 -08:00
|
|
|
TargetFunction = getBinaryFunctionContainingAddress(Address,
|
|
|
|
|
/*CheckPastEnd=*/false,
|
|
|
|
|
/*UseMaxSize=*/true);
|
2020-09-14 15:48:32 -07:00
|
|
|
// We are not going to overwrite non-simple functions, but for simple
|
|
|
|
|
// ones - adjust the padding size.
|
|
|
|
|
if (TargetFunction && TargetFunction->isSimple()) {
|
2024-02-12 14:53:53 -08:00
|
|
|
this->errs()
|
|
|
|
|
<< "BOLT-WARNING: function " << *TargetFunction
|
|
|
|
|
<< " has an object detected in a padding region at address 0x"
|
|
|
|
|
<< Twine::utohexstr(Address) << '\n';
|
2020-09-14 15:48:32 -07:00
|
|
|
TargetFunction->setMaxSize(TargetFunction->getSize());
|
2019-05-22 11:26:58 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2022-07-07 00:01:33 +03:00
|
|
|
InterproceduralReferences.clear();
|
2019-05-22 11:26:58 -07:00
|
|
|
}
|
|
|
|
|
|
2017-11-14 20:05:11 -08:00
|
|
|
void BinaryContext::postProcessSymbolTable() {
|
|
|
|
|
fixBinaryDataHoles();
|
|
|
|
|
bool Valid = true;
|
|
|
|
|
for (auto &Entry : BinaryDataMap) {
|
2021-04-08 00:19:26 -07:00
|
|
|
BinaryData *BD = Entry.second;
|
2023-12-13 23:34:49 -08:00
|
|
|
if ((BD->getName().starts_with("SYMBOLat") ||
|
|
|
|
|
BD->getName().starts_with("DATAat")) &&
|
2021-12-14 16:52:51 -08:00
|
|
|
!BD->getParent() && !BD->getSize() && !BD->isAbsolute() &&
|
2024-08-07 20:52:19 -07:00
|
|
|
BD->getSection()) {
|
2024-02-12 14:53:53 -08:00
|
|
|
this->errs() << "BOLT-WARNING: zero-sized top level symbol: " << *BD
|
|
|
|
|
<< "\n";
|
2017-11-14 20:05:11 -08:00
|
|
|
Valid = false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
assert(Valid);
|
2022-05-17 14:30:00 -07:00
|
|
|
(void)Valid;
|
2018-06-06 03:17:32 -07:00
|
|
|
generateSymbolHashes();
|
2017-06-09 13:17:36 -07:00
|
|
|
}
|
|
|
|
|
|
2016-12-21 17:13:56 -08:00
|
|
|
void BinaryContext::foldFunction(BinaryFunction &ChildBF,
|
2020-01-06 14:57:15 -08:00
|
|
|
BinaryFunction &ParentBF) {
|
2020-01-13 11:56:59 -08:00
|
|
|
assert(!ChildBF.isMultiEntry() && !ParentBF.isMultiEntry() &&
|
|
|
|
|
"cannot merge functions with multiple entry points");
|
|
|
|
|
|
2022-11-21 08:45:45 -05:00
|
|
|
std::unique_lock<llvm::sys::RWMutex> WriteCtxLock(CtxMutex, std::defer_lock);
|
|
|
|
|
std::unique_lock<llvm::sys::RWMutex> WriteSymbolMapLock(
|
2019-05-31 16:45:31 -07:00
|
|
|
SymbolToFunctionMapMutex, std::defer_lock);
|
|
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
const StringRef ChildName = ChildBF.getOneName();
|
2016-12-21 17:13:56 -08:00
|
|
|
|
2020-01-13 11:56:59 -08:00
|
|
|
// Move symbols over and update bookkeeping info.
|
2021-04-08 00:19:26 -07:00
|
|
|
for (MCSymbol *Symbol : ChildBF.getSymbols()) {
|
2020-01-13 11:56:59 -08:00
|
|
|
ParentBF.getSymbols().push_back(Symbol);
|
2019-05-31 16:45:31 -07:00
|
|
|
WriteSymbolMapLock.lock();
|
|
|
|
|
SymbolToFunctionMap[Symbol] = &ParentBF;
|
|
|
|
|
WriteSymbolMapLock.unlock();
|
2017-11-14 20:05:11 -08:00
|
|
|
// NB: there's no need to update BinaryDataMap and GlobalSymbols.
|
2016-12-21 17:13:56 -08:00
|
|
|
}
|
2020-01-13 11:56:59 -08:00
|
|
|
ChildBF.getSymbols().clear();
|
|
|
|
|
|
|
|
|
|
// Move other names the child function is known under.
|
2022-06-23 22:15:47 -07:00
|
|
|
llvm::move(ChildBF.Aliases, std::back_inserter(ParentBF.Aliases));
|
2020-01-13 11:56:59 -08:00
|
|
|
ChildBF.Aliases.clear();
|
2016-12-21 17:13:56 -08:00
|
|
|
|
2017-12-09 21:40:39 -08:00
|
|
|
if (HasRelocations) {
|
2020-04-04 20:12:38 -07:00
|
|
|
// Merge execution counts of ChildBF into those of ParentBF.
|
|
|
|
|
// Without relocations, we cannot reliably merge profiles as both functions
|
|
|
|
|
// continue to exist and either one can be executed.
|
|
|
|
|
ChildBF.mergeProfileDataInto(ParentBF);
|
|
|
|
|
|
2022-11-21 08:45:45 -05:00
|
|
|
std::shared_lock<llvm::sys::RWMutex> ReadBfsLock(BinaryFunctionsMutex,
|
|
|
|
|
std::defer_lock);
|
|
|
|
|
std::unique_lock<llvm::sys::RWMutex> WriteBfsLock(BinaryFunctionsMutex,
|
|
|
|
|
std::defer_lock);
|
2016-12-21 17:13:56 -08:00
|
|
|
// Remove ChildBF from the global set of functions in relocs mode.
|
2019-05-31 16:45:31 -07:00
|
|
|
ReadBfsLock.lock();
|
2019-04-03 15:52:01 -07:00
|
|
|
auto FI = BinaryFunctions.find(ChildBF.getAddress());
|
2019-05-31 16:45:31 -07:00
|
|
|
ReadBfsLock.unlock();
|
|
|
|
|
|
2019-04-03 15:52:01 -07:00
|
|
|
assert(FI != BinaryFunctions.end() && "function not found");
|
2016-12-21 17:13:56 -08:00
|
|
|
assert(&ChildBF == &FI->second && "function mismatch");
|
2019-05-31 16:45:31 -07:00
|
|
|
|
|
|
|
|
WriteBfsLock.lock();
|
2021-06-18 14:35:39 -07:00
|
|
|
ChildBF.clearDisasmState();
|
2019-04-03 15:52:01 -07:00
|
|
|
FI = BinaryFunctions.erase(FI);
|
2019-05-31 16:45:31 -07:00
|
|
|
WriteBfsLock.unlock();
|
|
|
|
|
|
2016-12-21 17:13:56 -08:00
|
|
|
} else {
|
|
|
|
|
// In non-relocation mode we keep the function, but rename it.
|
2020-01-13 11:56:59 -08:00
|
|
|
std::string NewName = "__ICF_" + ChildName.str();
|
2019-05-31 16:45:31 -07:00
|
|
|
|
|
|
|
|
WriteCtxLock.lock();
|
2020-01-13 11:56:59 -08:00
|
|
|
ChildBF.getSymbols().push_back(Ctx->getOrCreateSymbol(NewName));
|
2019-05-31 16:45:31 -07:00
|
|
|
WriteCtxLock.unlock();
|
|
|
|
|
|
2020-04-04 20:12:38 -07:00
|
|
|
ChildBF.setFolded(&ParentBF);
|
2016-12-21 17:13:56 -08:00
|
|
|
}
|
2023-02-24 15:45:21 -08:00
|
|
|
|
|
|
|
|
ParentBF.setHasFunctionsFoldedInto();
|
2016-12-21 17:13:56 -08:00
|
|
|
}
|
|
|
|
|
|
2017-11-14 20:05:11 -08:00
|
|
|
void BinaryContext::fixBinaryDataHoles() {
|
2023-11-09 13:29:46 -06:00
|
|
|
assert(validateObjectNesting() && "object nesting inconsistency detected");
|
2017-11-14 20:05:11 -08:00
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
for (BinarySection &Section : allocatableSections()) {
|
2017-11-14 20:05:11 -08:00
|
|
|
std::vector<std::pair<uint64_t, uint64_t>> Holes;
|
|
|
|
|
|
|
|
|
|
auto isNotHole = [&Section](const binary_data_iterator &Itr) {
|
2021-04-08 00:19:26 -07:00
|
|
|
BinaryData *BD = Itr->second;
|
2021-12-14 16:52:51 -08:00
|
|
|
bool isHole = (!BD->getParent() && !BD->getSize() && BD->isObject() &&
|
2023-12-13 23:34:49 -08:00
|
|
|
(BD->getName().starts_with("SYMBOLat0x") ||
|
|
|
|
|
BD->getName().starts_with("DATAat0x") ||
|
|
|
|
|
BD->getName().starts_with("ANONYMOUS")));
|
2017-11-14 20:05:11 -08:00
|
|
|
return !isHole && BD->getSection() == Section && !BD->getParent();
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
auto BDStart = BinaryDataMap.begin();
|
|
|
|
|
auto BDEnd = BinaryDataMap.end();
|
|
|
|
|
auto Itr = FilteredBinaryDataIterator(isNotHole, BDStart, BDEnd);
|
|
|
|
|
auto End = FilteredBinaryDataIterator(isNotHole, BDEnd, BDEnd);
|
|
|
|
|
|
|
|
|
|
uint64_t EndAddress = Section.getAddress();
|
|
|
|
|
|
|
|
|
|
while (Itr != End) {
|
2018-03-16 09:03:12 -07:00
|
|
|
if (Itr->second->getAddress() > EndAddress) {
|
2021-04-08 00:19:26 -07:00
|
|
|
uint64_t Gap = Itr->second->getAddress() - EndAddress;
|
2021-05-07 18:43:25 -07:00
|
|
|
Holes.emplace_back(EndAddress, Gap);
|
2017-11-14 20:05:11 -08:00
|
|
|
}
|
|
|
|
|
EndAddress = Itr->second->getEndAddress();
|
|
|
|
|
++Itr;
|
|
|
|
|
}
|
|
|
|
|
|
2021-12-20 11:07:46 -08:00
|
|
|
if (EndAddress < Section.getEndAddress())
|
2021-05-07 18:43:25 -07:00
|
|
|
Holes.emplace_back(EndAddress, Section.getEndAddress() - EndAddress);
|
2017-11-14 20:05:11 -08:00
|
|
|
|
|
|
|
|
// If there is already a symbol at the start of the hole, grow that symbol
|
|
|
|
|
// to cover the rest. Otherwise, create a new symbol to cover the hole.
|
2021-04-08 00:19:26 -07:00
|
|
|
for (std::pair<uint64_t, uint64_t> &Hole : Holes) {
|
|
|
|
|
BinaryData *BD = getBinaryDataAtAddress(Hole.first);
|
2017-11-14 20:05:11 -08:00
|
|
|
if (BD) {
|
|
|
|
|
// BD->getSection() can be != Section if there are sections that
|
|
|
|
|
// overlap. In this case it is probably safe to just skip the holes
|
|
|
|
|
// since the overlapping section will not(?) have any symbols in it.
|
|
|
|
|
if (BD->getSection() == Section)
|
|
|
|
|
setBinaryDataSize(Hole.first, Hole.second);
|
|
|
|
|
} else {
|
2018-09-21 12:00:20 -07:00
|
|
|
getOrCreateGlobalSymbol(Hole.first, "HOLEat", Hole.second, 1);
|
2017-11-14 20:05:11 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2023-11-09 13:29:46 -06:00
|
|
|
assert(validateObjectNesting() && "object nesting inconsistency detected");
|
2017-11-14 20:05:11 -08:00
|
|
|
assert(validateHoles() && "top level hole detected in object map");
|
|
|
|
|
}
|
|
|
|
|
|
2021-12-14 16:52:51 -08:00
|
|
|
void BinaryContext::printGlobalSymbols(raw_ostream &OS) const {
|
|
|
|
|
const BinarySection *CurrentSection = nullptr;
|
2017-11-14 20:05:11 -08:00
|
|
|
bool FirstSection = true;
|
|
|
|
|
|
|
|
|
|
for (auto &Entry : BinaryDataMap) {
|
2021-04-08 00:19:26 -07:00
|
|
|
const BinaryData *BD = Entry.second;
|
|
|
|
|
const BinarySection &Section = BD->getSection();
|
2017-11-14 20:05:11 -08:00
|
|
|
if (FirstSection || Section != *CurrentSection) {
|
|
|
|
|
uint64_t Address, Size;
|
|
|
|
|
StringRef Name = Section.getName();
|
|
|
|
|
if (Section) {
|
|
|
|
|
Address = Section.getAddress();
|
|
|
|
|
Size = Section.getSize();
|
|
|
|
|
} else {
|
|
|
|
|
Address = BD->getAddress();
|
|
|
|
|
Size = BD->getSize();
|
|
|
|
|
}
|
|
|
|
|
OS << "BOLT-INFO: Section " << Name << ", "
|
|
|
|
|
<< "0x" + Twine::utohexstr(Address) << ":"
|
2021-12-14 16:52:51 -08:00
|
|
|
<< "0x" + Twine::utohexstr(Address + Size) << "/" << Size << "\n";
|
2017-11-14 20:05:11 -08:00
|
|
|
CurrentSection = &Section;
|
|
|
|
|
FirstSection = false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
OS << "BOLT-INFO: ";
|
2021-04-08 00:19:26 -07:00
|
|
|
const BinaryData *P = BD->getParent();
|
2017-11-14 20:05:11 -08:00
|
|
|
while (P) {
|
|
|
|
|
OS << " ";
|
|
|
|
|
P = P->getParent();
|
|
|
|
|
}
|
|
|
|
|
OS << *BD << "\n";
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2022-04-21 15:47:49 -07:00
|
|
|
Expected<unsigned> BinaryContext::getDwarfFile(
|
|
|
|
|
StringRef Directory, StringRef FileName, unsigned FileNumber,
|
2022-12-04 21:36:08 +00:00
|
|
|
std::optional<MD5::MD5Result> Checksum, std::optional<StringRef> Source,
|
2022-04-21 15:47:49 -07:00
|
|
|
unsigned CUID, unsigned DWARFVersion) {
|
2021-09-01 21:40:54 -07:00
|
|
|
DwarfLineTable &Table = DwarfLineTablesCUMap[CUID];
|
2022-04-21 15:47:49 -07:00
|
|
|
return Table.tryGetFile(Directory, FileName, Checksum, Source, DWARFVersion,
|
|
|
|
|
FileNumber);
|
2021-09-01 21:40:54 -07:00
|
|
|
}
|
|
|
|
|
|
2016-09-02 11:58:53 -07:00
|
|
|
unsigned BinaryContext::addDebugFilenameToUnit(const uint32_t DestCUID,
|
|
|
|
|
const uint32_t SrcCUID,
|
|
|
|
|
unsigned FileIndex) {
|
2021-04-08 00:19:26 -07:00
|
|
|
DWARFCompileUnit *SrcUnit = DwCtx->getCompileUnitForOffset(SrcCUID);
|
|
|
|
|
const DWARFDebugLine::LineTable *LineTable =
|
|
|
|
|
DwCtx->getLineTableForUnit(SrcUnit);
|
|
|
|
|
const std::vector<DWARFDebugLine::FileNameEntry> &FileNames =
|
|
|
|
|
LineTable->Prologue.FileNames;
|
2016-09-02 11:58:53 -07:00
|
|
|
// Dir indexes start at 1, as DWARF file numbers, and a dir index 0
|
|
|
|
|
// means empty dir.
|
|
|
|
|
assert(FileIndex > 0 && FileIndex <= FileNames.size() &&
|
|
|
|
|
"FileIndex out of range for the compilation unit.");
|
2018-05-04 10:10:41 -07:00
|
|
|
StringRef Dir = "";
|
|
|
|
|
if (FileNames[FileIndex - 1].DirIdx != 0) {
|
2022-12-05 00:09:22 +00:00
|
|
|
if (std::optional<const char *> DirName = dwarf::toString(
|
2018-05-04 10:10:41 -07:00
|
|
|
LineTable->Prologue
|
2020-12-01 16:29:39 -08:00
|
|
|
.IncludeDirectories[FileNames[FileIndex - 1].DirIdx - 1])) {
|
2018-05-04 10:10:41 -07:00
|
|
|
Dir = *DirName;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
StringRef FileName = "";
|
2022-12-05 00:09:22 +00:00
|
|
|
if (std::optional<const char *> FName =
|
2021-04-08 00:19:26 -07:00
|
|
|
dwarf::toString(FileNames[FileIndex - 1].Name))
|
2018-05-04 10:10:41 -07:00
|
|
|
FileName = *FName;
|
|
|
|
|
assert(FileName != "");
|
2022-04-21 15:47:49 -07:00
|
|
|
DWARFCompileUnit *DstUnit = DwCtx->getCompileUnitForOffset(DestCUID);
|
2022-12-02 23:12:38 -08:00
|
|
|
return cantFail(getDwarfFile(Dir, FileName, 0, std::nullopt, std::nullopt,
|
|
|
|
|
DestCUID, DstUnit->getVersion()));
|
2016-09-02 11:58:53 -07:00
|
|
|
}
|
|
|
|
|
|
2019-04-03 15:52:01 -07:00
|
|
|
std::vector<BinaryFunction *> BinaryContext::getSortedFunctions() {
|
2017-08-31 11:45:37 -07:00
|
|
|
std::vector<BinaryFunction *> SortedFunctions(BinaryFunctions.size());
|
2023-02-02 12:02:02 -08:00
|
|
|
llvm::transform(llvm::make_second_range(BinaryFunctions),
|
|
|
|
|
SortedFunctions.begin(),
|
|
|
|
|
[](BinaryFunction &BF) { return &BF; });
|
2022-06-23 22:15:47 -07:00
|
|
|
|
2024-11-27 09:01:12 +08:00
|
|
|
llvm::stable_sort(SortedFunctions, compareBinaryFunctionByIndex);
|
2017-08-31 11:45:37 -07:00
|
|
|
return SortedFunctions;
|
|
|
|
|
}
|
|
|
|
|
|
2020-10-09 16:06:27 -07:00
|
|
|
std::vector<BinaryFunction *> BinaryContext::getAllBinaryFunctions() {
|
|
|
|
|
std::vector<BinaryFunction *> AllFunctions;
|
|
|
|
|
AllFunctions.reserve(BinaryFunctions.size() + InjectedBinaryFunctions.size());
|
2023-02-02 12:02:02 -08:00
|
|
|
llvm::transform(llvm::make_second_range(BinaryFunctions),
|
|
|
|
|
std::back_inserter(AllFunctions),
|
|
|
|
|
[](BinaryFunction &BF) { return &BF; });
|
2022-06-23 22:15:47 -07:00
|
|
|
llvm::copy(InjectedBinaryFunctions, std::back_inserter(AllFunctions));
|
2020-10-09 16:06:27 -07:00
|
|
|
|
|
|
|
|
return AllFunctions;
|
|
|
|
|
}
|
|
|
|
|
|
2022-12-11 12:02:21 -08:00
|
|
|
std::optional<DWARFUnit *> BinaryContext::getDWOCU(uint64_t DWOId) {
|
2021-04-01 11:43:00 -07:00
|
|
|
auto Iter = DWOCUs.find(DWOId);
|
|
|
|
|
if (Iter == DWOCUs.end())
|
2022-12-02 23:12:38 -08:00
|
|
|
return std::nullopt;
|
2021-04-01 11:43:00 -07:00
|
|
|
|
|
|
|
|
return Iter->second;
|
|
|
|
|
}
|
|
|
|
|
|
2022-06-11 11:58:10 -07:00
|
|
|
DWARFContext *BinaryContext::getDWOContext() const {
|
2021-06-18 15:57:34 -07:00
|
|
|
if (DWOCUs.empty())
|
|
|
|
|
return nullptr;
|
|
|
|
|
return &DWOCUs.begin()->second->getContext();
|
|
|
|
|
}
|
|
|
|
|
|
2021-04-01 11:43:00 -07:00
|
|
|
/// Handles DWO sections that can either be in .o, .dwo or .dwp files.
|
|
|
|
|
void BinaryContext::preprocessDWODebugInfo() {
|
2021-06-16 09:52:03 -07:00
|
|
|
for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
|
|
|
|
|
DWARFUnit *const DwarfUnit = CU.get();
|
2022-12-05 00:09:22 +00:00
|
|
|
if (std::optional<uint64_t> DWOId = DwarfUnit->getDWOId()) {
|
2024-01-25 15:00:52 -08:00
|
|
|
std::string DWOName = dwarf::toString(
|
|
|
|
|
DwarfUnit->getUnitDIE().find(
|
|
|
|
|
{dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}),
|
|
|
|
|
"");
|
|
|
|
|
SmallString<16> AbsolutePath;
|
|
|
|
|
if (!opts::CompDirOverride.empty()) {
|
|
|
|
|
sys::path::append(AbsolutePath, opts::CompDirOverride);
|
|
|
|
|
sys::path::append(AbsolutePath, DWOName);
|
|
|
|
|
}
|
|
|
|
|
DWARFUnit *DWOCU =
|
|
|
|
|
DwarfUnit->getNonSkeletonUnitDIE(false, AbsolutePath).getDwarfUnit();
|
2021-06-16 09:52:03 -07:00
|
|
|
if (!DWOCU->isDWOUnit()) {
|
2024-02-12 14:53:53 -08:00
|
|
|
this->outs()
|
|
|
|
|
<< "BOLT-WARNING: Debug Fission: DWO debug information for "
|
|
|
|
|
<< DWOName
|
|
|
|
|
<< " was not retrieved and won't be updated. Please check "
|
|
|
|
|
"relative path.\n";
|
2021-06-16 09:52:03 -07:00
|
|
|
continue;
|
2021-04-01 11:43:00 -07:00
|
|
|
}
|
2021-06-16 09:52:03 -07:00
|
|
|
DWOCUs[*DWOId] = DWOCU;
|
2021-04-01 11:43:00 -07:00
|
|
|
}
|
|
|
|
|
}
|
2023-01-27 15:49:01 -08:00
|
|
|
if (!DWOCUs.empty())
|
2024-02-12 14:53:53 -08:00
|
|
|
this->outs() << "BOLT-INFO: processing split DWARF\n";
|
2021-04-01 11:43:00 -07:00
|
|
|
}
|
|
|
|
|
|
2019-04-03 15:52:01 -07:00
|
|
|
void BinaryContext::preprocessDebugInfo() {
|
2020-10-12 21:04:42 -07:00
|
|
|
struct CURange {
|
|
|
|
|
uint64_t LowPC;
|
|
|
|
|
uint64_t HighPC;
|
|
|
|
|
DWARFUnit *Unit;
|
|
|
|
|
|
2021-12-14 16:52:51 -08:00
|
|
|
bool operator<(const CURange &Other) const { return LowPC < Other.LowPC; }
|
2020-10-12 21:04:42 -07:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// Building a map of address ranges to CUs similar to .debug_aranges and use
|
|
|
|
|
// it to assign CU to functions.
|
|
|
|
|
std::vector<CURange> AllRanges;
|
2020-12-01 16:29:39 -08:00
|
|
|
AllRanges.reserve(DwCtx->getNumCompileUnits());
|
2021-04-08 00:19:26 -07:00
|
|
|
for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
|
|
|
|
|
Expected<DWARFAddressRangesVector> RangesOrError =
|
|
|
|
|
CU->getUnitDIE().getAddressRanges();
|
2021-04-06 12:57:09 -07:00
|
|
|
if (!RangesOrError) {
|
|
|
|
|
consumeError(RangesOrError.takeError());
|
|
|
|
|
continue;
|
|
|
|
|
}
|
2021-04-08 00:19:26 -07:00
|
|
|
for (DWARFAddressRange &Range : *RangesOrError) {
|
2020-10-12 21:04:42 -07:00
|
|
|
// Parts of the debug info could be invalidated due to corresponding code
|
|
|
|
|
// being removed from the binary by the linker. Hence we check if the
|
|
|
|
|
// address is a valid one.
|
|
|
|
|
if (containsAddress(Range.LowPC))
|
|
|
|
|
AllRanges.emplace_back(CURange{Range.LowPC, Range.HighPC, CU.get()});
|
|
|
|
|
}
|
2022-04-21 15:47:49 -07:00
|
|
|
|
|
|
|
|
ContainsDwarf5 |= CU->getVersion() >= 5;
|
|
|
|
|
ContainsDwarfLegacy |= CU->getVersion() < 5;
|
2020-10-12 21:04:42 -07:00
|
|
|
}
|
|
|
|
|
|
2022-06-23 22:15:47 -07:00
|
|
|
llvm::sort(AllRanges);
|
2020-10-12 21:04:42 -07:00
|
|
|
for (auto &KV : BinaryFunctions) {
|
|
|
|
|
const uint64_t FunctionAddress = KV.first;
|
|
|
|
|
BinaryFunction &Function = KV.second;
|
|
|
|
|
|
2022-06-23 22:15:47 -07:00
|
|
|
auto It = llvm::partition_point(
|
|
|
|
|
AllRanges, [=](CURange R) { return R.HighPC <= FunctionAddress; });
|
|
|
|
|
if (It != AllRanges.end() && It->LowPC <= FunctionAddress)
|
2020-10-12 21:04:42 -07:00
|
|
|
Function.setDWARFUnit(It->Unit);
|
|
|
|
|
}
|
|
|
|
|
|
2021-10-11 12:05:34 -07:00
|
|
|
// Discover units with debug info that needs to be updated.
|
|
|
|
|
for (const auto &KV : BinaryFunctions) {
|
|
|
|
|
const BinaryFunction &BF = KV.second;
|
|
|
|
|
if (shouldEmit(BF) && BF.getDWARFUnit())
|
|
|
|
|
ProcessedCUs.insert(BF.getDWARFUnit());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Clear debug info for functions from units that we are not going to process.
|
|
|
|
|
for (auto &KV : BinaryFunctions) {
|
|
|
|
|
BinaryFunction &BF = KV.second;
|
|
|
|
|
if (BF.getDWARFUnit() && !ProcessedCUs.count(BF.getDWARFUnit()))
|
|
|
|
|
BF.setDWARFUnit(nullptr);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (opts::Verbosity >= 1) {
|
2024-02-12 14:53:53 -08:00
|
|
|
this->outs() << "BOLT-INFO: " << ProcessedCUs.size() << " out of "
|
|
|
|
|
<< DwCtx->getNumCompileUnits() << " CUs will be updated\n";
|
2021-10-11 12:05:34 -07:00
|
|
|
}
|
|
|
|
|
|
2022-05-05 14:57:14 -07:00
|
|
|
preprocessDWODebugInfo();
|
|
|
|
|
|
2020-10-12 21:04:42 -07:00
|
|
|
// Populate MCContext with DWARF files from all units.
|
2020-12-01 16:29:39 -08:00
|
|
|
StringRef GlobalPrefix = AsmInfo->getPrivateGlobalPrefix();
|
2021-04-08 00:19:26 -07:00
|
|
|
for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
|
2020-12-01 16:29:39 -08:00
|
|
|
const uint64_t CUID = CU->getOffset();
|
2022-04-21 15:47:49 -07:00
|
|
|
DwarfLineTable &BinaryLineTable = getDwarfLineTable(CUID);
|
|
|
|
|
BinaryLineTable.setLabel(Ctx->getOrCreateSymbol(
|
2021-10-11 12:05:34 -07:00
|
|
|
GlobalPrefix + "line_table_start" + Twine(CUID)));
|
|
|
|
|
|
|
|
|
|
if (!ProcessedCUs.count(CU.get()))
|
|
|
|
|
continue;
|
|
|
|
|
|
2020-10-12 21:04:42 -07:00
|
|
|
const DWARFDebugLine::LineTable *LineTable =
|
2021-10-11 12:05:34 -07:00
|
|
|
DwCtx->getLineTableForUnit(CU.get());
|
2021-04-08 00:19:26 -07:00
|
|
|
const std::vector<DWARFDebugLine::FileNameEntry> &FileNames =
|
|
|
|
|
LineTable->Prologue.FileNames;
|
2020-12-01 16:29:39 -08:00
|
|
|
|
2022-04-21 15:47:49 -07:00
|
|
|
uint16_t DwarfVersion = LineTable->Prologue.getVersion();
|
|
|
|
|
if (DwarfVersion >= 5) {
|
2022-12-04 21:36:08 +00:00
|
|
|
std::optional<MD5::MD5Result> Checksum;
|
2022-04-21 15:47:49 -07:00
|
|
|
if (LineTable->Prologue.ContentTypes.HasMD5)
|
|
|
|
|
Checksum = LineTable->Prologue.FileNames[0].Checksum;
|
2022-12-05 00:09:22 +00:00
|
|
|
std::optional<const char *> Name =
|
2022-05-05 14:57:14 -07:00
|
|
|
dwarf::toString(CU->getUnitDIE().find(dwarf::DW_AT_name), nullptr);
|
2022-12-05 00:09:22 +00:00
|
|
|
if (std::optional<uint64_t> DWOID = CU->getDWOId()) {
|
2022-05-05 14:57:14 -07:00
|
|
|
auto Iter = DWOCUs.find(*DWOID);
|
2025-02-06 10:01:12 -08:00
|
|
|
if (Iter == DWOCUs.end()) {
|
|
|
|
|
this->errs() << "BOLT-ERROR: DWO CU was not found for " << Name
|
|
|
|
|
<< '\n';
|
|
|
|
|
exit(1);
|
|
|
|
|
}
|
2022-05-05 14:57:14 -07:00
|
|
|
Name = dwarf::toString(
|
|
|
|
|
Iter->second->getUnitDIE().find(dwarf::DW_AT_name), nullptr);
|
|
|
|
|
}
|
|
|
|
|
BinaryLineTable.setRootFile(CU->getCompilationDir(), *Name, Checksum,
|
2022-12-02 23:12:38 -08:00
|
|
|
std::nullopt);
|
2022-04-21 15:47:49 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
BinaryLineTable.setDwarfVersion(DwarfVersion);
|
|
|
|
|
|
2020-12-01 16:29:39 -08:00
|
|
|
// Assign a unique label to every line table, one per CU.
|
2018-08-27 20:12:59 -07:00
|
|
|
// Make sure empty debug line tables are registered too.
|
|
|
|
|
if (FileNames.empty()) {
|
2022-12-02 23:12:38 -08:00
|
|
|
cantFail(getDwarfFile("", "<unknown>", 0, std::nullopt, std::nullopt,
|
|
|
|
|
CUID, DwarfVersion));
|
2018-08-27 20:12:59 -07:00
|
|
|
continue;
|
|
|
|
|
}
|
2022-04-21 15:47:49 -07:00
|
|
|
const uint32_t Offset = DwarfVersion < 5 ? 1 : 0;
|
2016-03-14 18:48:05 -07:00
|
|
|
for (size_t I = 0, Size = FileNames.size(); I != Size; ++I) {
|
|
|
|
|
// Dir indexes start at 1, as DWARF file numbers, and a dir index 0
|
|
|
|
|
// means empty dir.
|
2018-05-04 10:10:41 -07:00
|
|
|
StringRef Dir = "";
|
2022-04-21 15:47:49 -07:00
|
|
|
if (FileNames[I].DirIdx != 0 || DwarfVersion >= 5)
|
2022-12-05 00:09:22 +00:00
|
|
|
if (std::optional<const char *> DirName = dwarf::toString(
|
2020-12-01 16:29:39 -08:00
|
|
|
LineTable->Prologue
|
2022-04-21 15:47:49 -07:00
|
|
|
.IncludeDirectories[FileNames[I].DirIdx - Offset]))
|
2018-05-04 10:10:41 -07:00
|
|
|
Dir = *DirName;
|
|
|
|
|
StringRef FileName = "";
|
2022-12-05 00:09:22 +00:00
|
|
|
if (std::optional<const char *> FName =
|
|
|
|
|
dwarf::toString(FileNames[I].Name))
|
2018-05-04 10:10:41 -07:00
|
|
|
FileName = *FName;
|
|
|
|
|
assert(FileName != "");
|
2022-12-04 21:36:08 +00:00
|
|
|
std::optional<MD5::MD5Result> Checksum;
|
2022-04-21 15:47:49 -07:00
|
|
|
if (DwarfVersion >= 5 && LineTable->Prologue.ContentTypes.HasMD5)
|
|
|
|
|
Checksum = LineTable->Prologue.FileNames[I].Checksum;
|
2022-12-02 23:12:38 -08:00
|
|
|
cantFail(getDwarfFile(Dir, FileName, 0, Checksum, std::nullopt, CUID,
|
|
|
|
|
DwarfVersion));
|
2016-03-14 18:48:05 -07:00
|
|
|
}
|
|
|
|
|
}
|
2019-10-14 17:57:36 -07:00
|
|
|
}
|
2016-05-27 20:19:19 -07:00
|
|
|
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
bool BinaryContext::shouldEmit(const BinaryFunction &Function) const {
|
2022-03-20 16:10:27 +03:00
|
|
|
if (Function.isPseudo())
|
|
|
|
|
return false;
|
|
|
|
|
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
if (opts::processAllFunctions())
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
|
|
if (Function.isIgnored())
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
// In relocation mode we will emit non-simple functions with CFG.
|
|
|
|
|
// If the function does not have a CFG it should be marked as ignored.
|
|
|
|
|
return HasRelocations || Function.isSimple();
|
|
|
|
|
}
|
|
|
|
|
|
2023-11-21 20:30:44 +08:00
|
|
|
void BinaryContext::dump(const MCInst &Inst) const {
|
|
|
|
|
if (LLVM_UNLIKELY(!InstPrinter)) {
|
|
|
|
|
dbgs() << "Cannot dump for InstPrinter is not initialized.\n";
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
InstPrinter->printInst(&Inst, 0, "", *STI, dbgs());
|
|
|
|
|
dbgs() << "\n";
|
|
|
|
|
}
|
|
|
|
|
|
2017-05-01 16:52:54 -07:00
|
|
|
void BinaryContext::printCFI(raw_ostream &OS, const MCCFIInstruction &Inst) {
|
|
|
|
|
uint32_t Operation = Inst.getOperation();
|
|
|
|
|
switch (Operation) {
|
|
|
|
|
case MCCFIInstruction::OpSameValue:
|
|
|
|
|
OS << "OpSameValue Reg" << Inst.getRegister();
|
|
|
|
|
break;
|
|
|
|
|
case MCCFIInstruction::OpRememberState:
|
|
|
|
|
OS << "OpRememberState";
|
|
|
|
|
break;
|
|
|
|
|
case MCCFIInstruction::OpRestoreState:
|
|
|
|
|
OS << "OpRestoreState";
|
|
|
|
|
break;
|
|
|
|
|
case MCCFIInstruction::OpOffset:
|
|
|
|
|
OS << "OpOffset Reg" << Inst.getRegister() << " " << Inst.getOffset();
|
|
|
|
|
break;
|
|
|
|
|
case MCCFIInstruction::OpDefCfaRegister:
|
|
|
|
|
OS << "OpDefCfaRegister Reg" << Inst.getRegister();
|
|
|
|
|
break;
|
|
|
|
|
case MCCFIInstruction::OpDefCfaOffset:
|
|
|
|
|
OS << "OpDefCfaOffset " << Inst.getOffset();
|
|
|
|
|
break;
|
|
|
|
|
case MCCFIInstruction::OpDefCfa:
|
|
|
|
|
OS << "OpDefCfa Reg" << Inst.getRegister() << " " << Inst.getOffset();
|
|
|
|
|
break;
|
|
|
|
|
case MCCFIInstruction::OpRelOffset:
|
2018-09-05 14:36:52 -07:00
|
|
|
OS << "OpRelOffset Reg" << Inst.getRegister() << " " << Inst.getOffset();
|
2017-05-01 16:52:54 -07:00
|
|
|
break;
|
|
|
|
|
case MCCFIInstruction::OpAdjustCfaOffset:
|
2018-09-05 14:36:52 -07:00
|
|
|
OS << "OfAdjustCfaOffset " << Inst.getOffset();
|
2017-05-01 16:52:54 -07:00
|
|
|
break;
|
|
|
|
|
case MCCFIInstruction::OpEscape:
|
|
|
|
|
OS << "OpEscape";
|
|
|
|
|
break;
|
|
|
|
|
case MCCFIInstruction::OpRestore:
|
2018-09-05 14:36:52 -07:00
|
|
|
OS << "OpRestore Reg" << Inst.getRegister();
|
2017-05-01 16:52:54 -07:00
|
|
|
break;
|
|
|
|
|
case MCCFIInstruction::OpUndefined:
|
2018-09-05 14:36:52 -07:00
|
|
|
OS << "OpUndefined Reg" << Inst.getRegister();
|
2017-05-01 16:52:54 -07:00
|
|
|
break;
|
|
|
|
|
case MCCFIInstruction::OpRegister:
|
2018-09-05 14:36:52 -07:00
|
|
|
OS << "OpRegister Reg" << Inst.getRegister() << " Reg"
|
|
|
|
|
<< Inst.getRegister2();
|
2017-05-01 16:52:54 -07:00
|
|
|
break;
|
|
|
|
|
case MCCFIInstruction::OpWindowSave:
|
|
|
|
|
OS << "OpWindowSave";
|
|
|
|
|
break;
|
|
|
|
|
case MCCFIInstruction::OpGnuArgsSize:
|
|
|
|
|
OS << "OpGnuArgsSize";
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
OS << "Op#" << Operation;
|
|
|
|
|
break;
|
2016-07-23 08:01:53 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2022-05-31 11:50:59 -07:00
|
|
|
MarkerSymType BinaryContext::getMarkerType(const SymbolRef &Symbol) const {
|
2023-07-29 09:14:44 +02:00
|
|
|
// For aarch64 and riscv, the ABI defines mapping symbols so we identify data
|
|
|
|
|
// in the code section (see IHI0056B). $x identifies a symbol starting code or
|
2023-11-09 13:29:46 -06:00
|
|
|
// the end of a data chunk inside code, $d identifies start of data.
|
2024-04-15 13:11:29 -04:00
|
|
|
if (isX86() || ELFSymbolRef(Symbol).getSize())
|
2022-05-31 11:50:59 -07:00
|
|
|
return MarkerSymType::NONE;
|
|
|
|
|
|
|
|
|
|
Expected<StringRef> NameOrError = Symbol.getName();
|
|
|
|
|
Expected<object::SymbolRef::Type> TypeOrError = Symbol.getType();
|
|
|
|
|
|
|
|
|
|
if (!TypeOrError || !NameOrError)
|
|
|
|
|
return MarkerSymType::NONE;
|
|
|
|
|
|
|
|
|
|
if (*TypeOrError != SymbolRef::ST_Unknown)
|
|
|
|
|
return MarkerSymType::NONE;
|
|
|
|
|
|
2023-12-13 23:34:49 -08:00
|
|
|
if (*NameOrError == "$x" || NameOrError->starts_with("$x."))
|
2022-05-31 11:50:59 -07:00
|
|
|
return MarkerSymType::CODE;
|
|
|
|
|
|
2023-10-13 10:34:13 +00:00
|
|
|
// $x<ISA>
|
2023-12-13 23:34:49 -08:00
|
|
|
if (isRISCV() && NameOrError->starts_with("$x"))
|
2023-10-13 10:34:13 +00:00
|
|
|
return MarkerSymType::CODE;
|
|
|
|
|
|
2023-12-13 23:34:49 -08:00
|
|
|
if (*NameOrError == "$d" || NameOrError->starts_with("$d."))
|
2022-05-31 11:50:59 -07:00
|
|
|
return MarkerSymType::DATA;
|
|
|
|
|
|
|
|
|
|
return MarkerSymType::NONE;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool BinaryContext::isMarker(const SymbolRef &Symbol) const {
|
|
|
|
|
return getMarkerType(Symbol) != MarkerSymType::NONE;
|
|
|
|
|
}
|
|
|
|
|
|
2022-06-11 11:58:10 -07:00
|
|
|
static void printDebugInfo(raw_ostream &OS, const MCInst &Instruction,
|
|
|
|
|
const BinaryFunction *Function,
|
|
|
|
|
DWARFContext *DwCtx) {
|
|
|
|
|
DebugLineTableRowRef RowRef =
|
|
|
|
|
DebugLineTableRowRef::fromSMLoc(Instruction.getLoc());
|
|
|
|
|
if (RowRef == DebugLineTableRowRef::NULL_ROW)
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
const DWARFDebugLine::LineTable *LineTable;
|
|
|
|
|
if (Function && Function->getDWARFUnit() &&
|
|
|
|
|
Function->getDWARFUnit()->getOffset() == RowRef.DwCompileUnitIndex) {
|
|
|
|
|
LineTable = Function->getDWARFLineTable();
|
|
|
|
|
} else {
|
|
|
|
|
LineTable = DwCtx->getLineTableForUnit(
|
|
|
|
|
DwCtx->getCompileUnitForOffset(RowRef.DwCompileUnitIndex));
|
|
|
|
|
}
|
|
|
|
|
assert(LineTable && "line table expected for instruction with debug info");
|
|
|
|
|
|
|
|
|
|
const DWARFDebugLine::Row &Row = LineTable->Rows[RowRef.RowIndex - 1];
|
|
|
|
|
StringRef FileName = "";
|
2022-12-05 00:09:22 +00:00
|
|
|
if (std::optional<const char *> FName =
|
2022-06-11 11:58:10 -07:00
|
|
|
dwarf::toString(LineTable->Prologue.FileNames[Row.File - 1].Name))
|
|
|
|
|
FileName = *FName;
|
|
|
|
|
OS << " # debug line " << FileName << ":" << Row.Line;
|
|
|
|
|
if (Row.Column)
|
|
|
|
|
OS << ":" << Row.Column;
|
|
|
|
|
if (Row.Discriminator)
|
|
|
|
|
OS << " discriminator:" << Row.Discriminator;
|
|
|
|
|
}
|
|
|
|
|
|
2025-02-05 22:41:40 -08:00
|
|
|
ArrayRef<uint8_t> BinaryContext::extractData(uint64_t Address,
|
|
|
|
|
uint64_t Size) const {
|
|
|
|
|
ArrayRef<uint8_t> Res;
|
|
|
|
|
|
|
|
|
|
const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
|
|
|
|
|
if (!Section || Section->isVirtual())
|
|
|
|
|
return Res;
|
|
|
|
|
|
|
|
|
|
if (!Section->containsRange(Address, Size))
|
|
|
|
|
return Res;
|
|
|
|
|
|
|
|
|
|
auto *Bytes =
|
|
|
|
|
reinterpret_cast<const uint8_t *>(Section->getContents().data());
|
|
|
|
|
return ArrayRef<uint8_t>(Bytes + Address - Section->getAddress(), Size);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void BinaryContext::printData(raw_ostream &OS, ArrayRef<uint8_t> Data,
|
|
|
|
|
uint64_t Offset) const {
|
|
|
|
|
DataExtractor DE(Data, AsmInfo->isLittleEndian(),
|
|
|
|
|
AsmInfo->getCodePointerSize());
|
|
|
|
|
uint64_t DataOffset = 0;
|
|
|
|
|
while (DataOffset + 4 <= Data.size()) {
|
|
|
|
|
OS << format(" %08" PRIx64 ": \t.word\t0x", Offset + DataOffset);
|
|
|
|
|
const auto Word = DE.getUnsigned(&DataOffset, 4);
|
|
|
|
|
OS << Twine::utohexstr(Word) << '\n';
|
|
|
|
|
}
|
|
|
|
|
if (DataOffset + 2 <= Data.size()) {
|
|
|
|
|
OS << format(" %08" PRIx64 ": \t.short\t0x", Offset + DataOffset);
|
|
|
|
|
const auto Short = DE.getUnsigned(&DataOffset, 2);
|
|
|
|
|
OS << Twine::utohexstr(Short) << '\n';
|
|
|
|
|
}
|
|
|
|
|
if (DataOffset + 1 == Data.size()) {
|
|
|
|
|
OS << format(" %08" PRIx64 ": \t.byte\t0x%x\n", Offset + DataOffset,
|
|
|
|
|
Data[DataOffset]);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2021-12-14 16:52:51 -08:00
|
|
|
void BinaryContext::printInstruction(raw_ostream &OS, const MCInst &Instruction,
|
2016-07-23 08:01:53 -07:00
|
|
|
uint64_t Offset,
|
2021-12-14 16:52:51 -08:00
|
|
|
const BinaryFunction *Function,
|
|
|
|
|
bool PrintMCInst, bool PrintMemData,
|
2022-05-24 18:25:40 -07:00
|
|
|
bool PrintRelocations,
|
|
|
|
|
StringRef Endl) const {
|
2016-07-23 08:01:53 -07:00
|
|
|
OS << format(" %08" PRIx64 ": ", Offset);
|
2018-03-09 09:45:13 -08:00
|
|
|
if (MIB->isCFI(Instruction)) {
|
2016-07-23 08:01:53 -07:00
|
|
|
uint32_t Offset = Instruction.getOperand(0).getImm();
|
|
|
|
|
OS << "\t!CFI\t$" << Offset << "\t; ";
|
2016-08-22 14:24:09 -07:00
|
|
|
if (Function)
|
2017-05-01 16:52:54 -07:00
|
|
|
printCFI(OS, *Function->getCFIFor(Instruction));
|
2022-05-24 18:25:40 -07:00
|
|
|
OS << Endl;
|
2016-07-23 08:01:53 -07:00
|
|
|
return;
|
|
|
|
|
}
|
2024-03-21 14:05:21 -07:00
|
|
|
if (std::optional<uint32_t> DynamicID =
|
|
|
|
|
MIB->getDynamicBranchID(Instruction)) {
|
|
|
|
|
OS << "\tjit\t" << MIB->getTargetSymbol(Instruction)->getName()
|
|
|
|
|
<< " # ID: " << DynamicID;
|
|
|
|
|
} else {
|
2024-12-20 09:54:07 +01:00
|
|
|
// If there are annotations on the instruction, the MCInstPrinter will fail
|
|
|
|
|
// to print the preferred alias as it only does so when the number of
|
|
|
|
|
// operands is as expected. See
|
|
|
|
|
// https://github.com/llvm/llvm-project/blob/782f1a0d895646c364a53f9dcdd6d4ec1f3e5ea0/llvm/lib/MC/MCInstPrinter.cpp#L142
|
|
|
|
|
// Therefore, create a temporary copy of the Inst from which the annotations
|
|
|
|
|
// are removed, and print that Inst.
|
|
|
|
|
MCInst InstNoAnnot = Instruction;
|
|
|
|
|
MIB->stripAnnotations(InstNoAnnot);
|
|
|
|
|
InstPrinter->printInst(&InstNoAnnot, 0, "", *STI, OS);
|
2024-03-21 14:05:21 -07:00
|
|
|
}
|
2018-03-09 09:45:13 -08:00
|
|
|
if (MIB->isCall(Instruction)) {
|
|
|
|
|
if (MIB->isTailCall(Instruction))
|
2016-07-23 08:01:53 -07:00
|
|
|
OS << " # TAILCALL ";
|
2018-03-09 09:45:13 -08:00
|
|
|
if (MIB->isInvoke(Instruction)) {
|
2022-12-06 14:15:54 -08:00
|
|
|
const std::optional<MCPlus::MCLandingPad> EHInfo =
|
|
|
|
|
MIB->getEHInfo(Instruction);
|
2019-01-31 11:23:02 -08:00
|
|
|
OS << " # handler: ";
|
|
|
|
|
if (EHInfo->first)
|
|
|
|
|
OS << *EHInfo->first;
|
|
|
|
|
else
|
|
|
|
|
OS << '0';
|
|
|
|
|
OS << "; action: " << EHInfo->second;
|
2021-04-08 00:19:26 -07:00
|
|
|
const int64_t GnuArgsSize = MIB->getGnuArgsSize(Instruction);
|
2016-07-23 08:01:53 -07:00
|
|
|
if (GnuArgsSize >= 0)
|
|
|
|
|
OS << "; GNU_args_size = " << GnuArgsSize;
|
|
|
|
|
}
|
2019-06-28 09:21:27 -07:00
|
|
|
} else if (MIB->isIndirectBranch(Instruction)) {
|
2021-04-08 00:19:26 -07:00
|
|
|
if (uint64_t JTAddress = MIB->getJumpTable(Instruction)) {
|
2016-09-16 15:54:32 -07:00
|
|
|
OS << " # JUMPTABLE @0x" << Twine::utohexstr(JTAddress);
|
2019-06-28 09:21:27 -07:00
|
|
|
} else {
|
|
|
|
|
OS << " # UNKNOWN CONTROL FLOW";
|
2016-09-14 16:45:40 -07:00
|
|
|
}
|
|
|
|
|
}
|
2022-12-06 14:15:54 -08:00
|
|
|
if (std::optional<uint32_t> Offset = MIB->getOffset(Instruction))
|
2021-08-03 17:53:32 -07:00
|
|
|
OS << " # Offset: " << *Offset;
|
2023-11-13 14:33:39 -08:00
|
|
|
if (std::optional<uint32_t> Size = MIB->getSize(Instruction))
|
|
|
|
|
OS << " # Size: " << *Size;
|
2024-02-27 18:44:28 -08:00
|
|
|
if (MCSymbol *Label = MIB->getInstLabel(Instruction))
|
2023-11-06 11:41:47 -08:00
|
|
|
OS << " # Label: " << *Label;
|
2016-07-23 08:01:53 -07:00
|
|
|
|
[BOLT][Refactoring] Isolate changes to MC layer
Summary:
Changes that we made to MCInst, MCOperand, MCExpr, etc. are now all
moved into tools/llvm-bolt. That required a change to the way we handle
annotations and any extra operands for MCInst.
Any MCPlus information is now attached via an extra operand of type
MCInst with an opcode ANNOTATION_LABEL. Since this operand is MCInst, we
attach extra info as operands to this instruction. For first-level
annotations use functions to access the information, such as
getConditionalTailCall() or getEHInfo(), etc. For the rest, optional or
second-class annotations, use a general named-annotation interface such
as getAnnotationAs<uint64_t>(Inst, "Count").
I did a test on HHVM binary, and a memory consumption went down a little
bit while the runtime remained the same.
(cherry picked from FBD7405412)
2018-03-19 18:32:12 -07:00
|
|
|
MIB->printAnnotations(Instruction, OS);
|
Indirect call promotion optimization.
Summary:
Perform indirect call promotion optimization in BOLT.
The code scans the instructions during CFG creation for all
indirect calls. Right now indirect tail calls are not handled
since the functions are marked not simple. The offsets of the
indirect calls are stored for later use by the ICP pass.
The indirect call promotion pass visits each indirect call and
examines the BranchData for each. If the most frequent targets
from that callsite exceed the specified threshold (default 90%),
the call is promoted. Otherwise, it is ignored. By default,
only one target is considered at each callsite.
When an candiate callsite is processed, we modify the callsite
to test for the most common call targets before calling through
the original generic call mechanism.
The CFG and layout are modified by ICP.
A few new command line options have been added:
-indirect-call-promotion
-indirect-call-promotion-threshold=<percentage>
-indirect-call-promotion-topn=<int>
The threshold is the minimum frequency of a call target needed
before ICP is triggered.
The topn option controls the number of targets to consider for
each callsite, e.g. ICP is triggered if topn=2 and the total
requency of the top two call targets exceeds the threshold.
Example of ICP:
C++ code:
int B_count = 0;
int C_count = 0;
struct A { virtual void foo() = 0; }
struct B : public A { virtual void foo() { ++B_count; }; };
struct C : public A { virtual void foo() { ++C_count; }; };
A* a = ...
a->foo();
...
original:
400863: 49 8b 07 mov (%r15),%rax
400866: 4c 89 ff mov %r15,%rdi
400869: ff 10 callq *(%rax)
40086b: 41 83 e6 01 and $0x1,%r14d
40086f: 4d 89 e6 mov %r12,%r14
400872: 4c 0f 44 f5 cmove %rbp,%r14
400876: 4c 89 f7 mov %r14,%rdi
...
after ICP:
40085e: 49 8b 07 mov (%r15),%rax
400861: 4c 89 ff mov %r15,%rdi
400864: 49 ba e0 0b 40 00 00 movabs $0x400be0,%r10
40086b: 00 00 00
40086e: 4c 3b 10 cmp (%rax),%r10
400871: 75 29 jne 40089c <main+0x9c>
400873: 41 ff d2 callq *%r10
400876: 41 83 e6 01 and $0x1,%r14d
40087a: 4d 89 e6 mov %r12,%r14
40087d: 4c 0f 44 f5 cmove %rbp,%r14
400881: 4c 89 f7 mov %r14,%rdi
...
40089c: ff 10 callq *(%rax)
40089e: eb d6 jmp 400876 <main+0x76>
(cherry picked from FBD3612218)
2016-09-07 18:59:23 -07:00
|
|
|
|
2022-06-11 11:58:10 -07:00
|
|
|
if (opts::PrintDebugInfo)
|
|
|
|
|
printDebugInfo(OS, Instruction, Function, DwCtx.get());
|
2016-07-23 08:01:53 -07:00
|
|
|
|
2017-10-20 12:11:34 -07:00
|
|
|
if ((opts::PrintRelocations || PrintRelocations) && Function) {
|
2021-04-08 00:19:26 -07:00
|
|
|
const uint64_t Size = computeCodeSize(&Instruction, &Instruction + 1);
|
2017-10-20 12:11:34 -07:00
|
|
|
Function->printRelocations(OS, Offset, Size);
|
|
|
|
|
}
|
|
|
|
|
|
2022-05-24 18:25:40 -07:00
|
|
|
OS << Endl;
|
2016-07-23 08:01:53 -07:00
|
|
|
|
2017-10-20 12:11:34 -07:00
|
|
|
if (PrintMCInst) {
|
2016-07-23 08:01:53 -07:00
|
|
|
Instruction.dump_pretty(OS, InstPrinter.get());
|
2022-05-24 18:25:40 -07:00
|
|
|
OS << Endl;
|
2016-07-23 08:01:53 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2022-12-11 12:02:21 -08:00
|
|
|
std::optional<uint64_t>
|
2022-04-13 19:39:39 -07:00
|
|
|
BinaryContext::getBaseAddressForMapping(uint64_t MMapAddress,
|
|
|
|
|
uint64_t FileOffset) const {
|
|
|
|
|
// Find a segment with a matching file offset.
|
|
|
|
|
for (auto &KV : SegmentMapInfo) {
|
|
|
|
|
const SegmentInfo &SegInfo = KV.second;
|
2024-09-23 15:14:51 +02:00
|
|
|
// Only consider executable segments.
|
|
|
|
|
if (!SegInfo.IsExecutable)
|
|
|
|
|
continue;
|
2023-11-16 15:05:06 +08:00
|
|
|
// FileOffset is got from perf event,
|
|
|
|
|
// and it is equal to alignDown(SegInfo.FileOffset, pagesize).
|
|
|
|
|
// If the pagesize is not equal to SegInfo.Alignment.
|
|
|
|
|
// FileOffset and SegInfo.FileOffset should be aligned first,
|
|
|
|
|
// and then judge whether they are equal.
|
|
|
|
|
if (alignDown(SegInfo.FileOffset, SegInfo.Alignment) ==
|
|
|
|
|
alignDown(FileOffset, SegInfo.Alignment)) {
|
|
|
|
|
// The function's offset from base address in VAS is aligned by pagesize
|
|
|
|
|
// instead of SegInfo.Alignment. Pagesize can't be got from perf events.
|
|
|
|
|
// However, The ELF document says that SegInfo.FileOffset should equal
|
|
|
|
|
// to SegInfo.Address, modulo the pagesize.
|
|
|
|
|
// Reference: https://refspecs.linuxfoundation.org/elf/elf.pdf
|
|
|
|
|
|
|
|
|
|
// So alignDown(SegInfo.Address, pagesize) can be calculated by:
|
|
|
|
|
// alignDown(SegInfo.Address, pagesize)
|
|
|
|
|
// = SegInfo.Address - (SegInfo.Address % pagesize)
|
|
|
|
|
// = SegInfo.Address - (SegInfo.FileOffset % pagesize)
|
|
|
|
|
// = SegInfo.Address - SegInfo.FileOffset +
|
|
|
|
|
// alignDown(SegInfo.FileOffset, pagesize)
|
|
|
|
|
// = SegInfo.Address - SegInfo.FileOffset + FileOffset
|
|
|
|
|
return MMapAddress - (SegInfo.Address - SegInfo.FileOffset + FileOffset);
|
2022-04-13 19:39:39 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2022-12-02 23:12:38 -08:00
|
|
|
return std::nullopt;
|
2022-04-13 19:39:39 -07:00
|
|
|
}
|
|
|
|
|
|
2021-06-30 14:38:50 -07:00
|
|
|
ErrorOr<BinarySection &> BinaryContext::getSectionForAddress(uint64_t Address) {
|
2018-01-31 12:12:59 -08:00
|
|
|
auto SI = AddressToSection.upper_bound(Address);
|
|
|
|
|
if (SI != AddressToSection.begin()) {
|
2018-01-23 15:10:24 -08:00
|
|
|
--SI;
|
2021-04-08 00:19:26 -07:00
|
|
|
uint64_t UpperBound = SI->first + SI->second->getSize();
|
2019-06-27 03:20:17 -07:00
|
|
|
if (!SI->second->getSize())
|
|
|
|
|
UpperBound += 1;
|
|
|
|
|
if (UpperBound > Address)
|
2018-01-31 12:12:59 -08:00
|
|
|
return *SI->second;
|
2016-07-21 12:45:35 -07:00
|
|
|
}
|
|
|
|
|
return std::make_error_code(std::errc::bad_address);
|
|
|
|
|
}
|
|
|
|
|
|
2017-11-14 20:05:11 -08:00
|
|
|
ErrorOr<StringRef>
|
|
|
|
|
BinaryContext::getSectionNameForAddress(uint64_t Address) const {
|
2021-12-20 11:07:46 -08:00
|
|
|
if (ErrorOr<const BinarySection &> Section = getSectionForAddress(Address))
|
2017-11-14 20:05:11 -08:00
|
|
|
return Section->getName();
|
|
|
|
|
return std::make_error_code(std::errc::bad_address);
|
|
|
|
|
}
|
|
|
|
|
|
2018-02-01 16:33:43 -08:00
|
|
|
BinarySection &BinaryContext::registerSection(BinarySection *Section) {
|
|
|
|
|
auto Res = Sections.insert(Section);
|
2021-06-29 12:11:56 -07:00
|
|
|
(void)Res;
|
2018-01-31 12:12:59 -08:00
|
|
|
assert(Res.second && "can't register the same section twice.");
|
2020-07-06 14:39:44 -07:00
|
|
|
|
|
|
|
|
// Only register allocatable sections in the AddressToSection map.
|
2020-09-14 14:31:50 -07:00
|
|
|
if (Section->isAllocatable() && Section->getAddress())
|
2018-02-01 16:33:43 -08:00
|
|
|
AddressToSection.insert(std::make_pair(Section->getAddress(), Section));
|
2020-12-01 16:29:39 -08:00
|
|
|
NameToSection.insert(
|
|
|
|
|
std::make_pair(std::string(Section->getName()), Section));
|
2022-09-22 12:05:12 -07:00
|
|
|
if (Section->hasSectionRef())
|
|
|
|
|
SectionRefToBinarySection.insert(
|
|
|
|
|
std::make_pair(Section->getSectionRef(), Section));
|
|
|
|
|
|
2020-12-01 16:29:39 -08:00
|
|
|
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: registering " << *Section << "\n");
|
2018-02-01 16:33:43 -08:00
|
|
|
return *Section;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
BinarySection &BinaryContext::registerSection(SectionRef Section) {
|
2018-04-20 20:03:31 -07:00
|
|
|
return registerSection(new BinarySection(*this, Section));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
BinarySection &
|
2022-09-22 12:05:12 -07:00
|
|
|
BinaryContext::registerSection(const Twine &SectionName,
|
2018-04-20 20:03:31 -07:00
|
|
|
const BinarySection &OriginalSection) {
|
2021-12-14 16:52:51 -08:00
|
|
|
return registerSection(
|
|
|
|
|
new BinarySection(*this, SectionName, OriginalSection));
|
2018-02-01 16:33:43 -08:00
|
|
|
}
|
|
|
|
|
|
2021-12-14 16:52:51 -08:00
|
|
|
BinarySection &
|
2022-09-22 12:05:12 -07:00
|
|
|
BinaryContext::registerOrUpdateSection(const Twine &Name, unsigned ELFType,
|
2021-12-14 16:52:51 -08:00
|
|
|
unsigned ELFFlags, uint8_t *Data,
|
|
|
|
|
uint64_t Size, unsigned Alignment) {
|
2018-02-01 16:33:43 -08:00
|
|
|
auto NamedSections = getSectionByName(Name);
|
|
|
|
|
if (NamedSections.begin() != NamedSections.end()) {
|
|
|
|
|
assert(std::next(NamedSections.begin()) == NamedSections.end() &&
|
|
|
|
|
"can only update unique sections");
|
2021-04-08 00:19:26 -07:00
|
|
|
BinarySection *Section = NamedSections.begin()->second;
|
2018-02-01 16:33:43 -08:00
|
|
|
|
2020-12-01 16:29:39 -08:00
|
|
|
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: updating " << *Section << " -> ");
|
2021-04-08 00:19:26 -07:00
|
|
|
const bool Flag = Section->isAllocatable();
|
2021-06-29 12:11:56 -07:00
|
|
|
(void)Flag;
|
2020-02-18 09:20:17 -08:00
|
|
|
Section->update(Data, Size, Alignment, ELFType, ELFFlags);
|
2020-12-01 16:29:39 -08:00
|
|
|
LLVM_DEBUG(dbgs() << *Section << "\n");
|
2020-06-12 20:16:27 -07:00
|
|
|
// FIXME: Fix section flags/attributes for MachO.
|
|
|
|
|
if (isELF())
|
|
|
|
|
assert(Flag == Section->isAllocatable() &&
|
|
|
|
|
"can't change section allocation status");
|
2018-02-01 16:33:43 -08:00
|
|
|
return *Section;
|
|
|
|
|
}
|
|
|
|
|
|
2021-12-14 16:52:51 -08:00
|
|
|
return registerSection(
|
|
|
|
|
new BinarySection(*this, Name, Data, Size, Alignment, ELFType, ELFFlags));
|
2018-02-01 16:33:43 -08:00
|
|
|
}
|
|
|
|
|
|
2022-09-22 12:05:12 -07:00
|
|
|
void BinaryContext::deregisterSectionName(const BinarySection &Section) {
|
|
|
|
|
auto NameRange = NameToSection.equal_range(Section.getName().str());
|
|
|
|
|
while (NameRange.first != NameRange.second) {
|
|
|
|
|
if (NameRange.first->second == &Section) {
|
|
|
|
|
NameToSection.erase(NameRange.first);
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
++NameRange.first;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void BinaryContext::deregisterUnusedSections() {
|
|
|
|
|
ErrorOr<BinarySection &> AbsSection = getUniqueSectionByName("<absolute>");
|
|
|
|
|
for (auto SI = Sections.begin(); SI != Sections.end();) {
|
|
|
|
|
BinarySection *Section = *SI;
|
[BOLT] Move from RuntimeDyld to JITLink
RuntimeDyld has been deprecated in favor of JITLink. [1] This patch
replaces all uses of RuntimeDyld in BOLT with JITLink.
Care has been taken to minimize the impact on the code structure in
order to ease the inspection of this (rather large) changeset. Since
BOLT relied on the RuntimeDyld API in multiple places, this wasn't
always possible though and I'll explain the changes in code structure
first.
Design note: BOLT uses a JIT linker to perform what essentially is
static linking. No linked code is ever executed; the result of linking
is simply written back to an executable file. For this reason, I
restricted myself to the use of the core JITLink library and avoided ORC
as much as possible.
RuntimeDyld contains methods for loading objects (loadObject) and symbol
lookup (getSymbol). Since JITLink doesn't provide a class with a similar
interface, the BOLTLinker abstract class was added to implement it. It
was added to Core since both the Rewrite and RuntimeLibs libraries make
use of it. Wherever a RuntimeDyld object was used before, it was
replaced with a BOLTLinker object.
There is one major difference between the RuntimeDyld and BOLTLinker
interfaces: in JITLink, section allocation and the application of fixups
(relocation) happens in a single call (jitlink::link). That is, there is
no separate method like finalizeWithMemoryManagerLocking in RuntimeDyld.
BOLT used to remap sections between allocating (loadObject) and linking
them (finalizeWithMemoryManagerLocking). This doesn't work anymore with
JITLink. Instead, BOLTLinker::loadObject accepts a callback that is
called before fixups are applied which is used to remap sections.
The actual implementation of the BOLTLinker interface lives in the
JITLinkLinker class in the Rewrite library. It's the only part of the
BOLT code that should directly interact with the JITLink API.
For loading object, JITLinkLinker first creates a LinkGraph
(jitlink::createLinkGraphFromObject) and then links it (jitlink::link).
For the latter, it uses a custom JITLinkContext with the following
properties:
- Use BOLT's ExecutableFileMemoryManager. This one was updated to
implement the JITLinkMemoryManager interface. Since BOLT never
executes code, its finalization step is a no-op.
- Pass config: don't use the default target passes since they modify
DWARF sections in a way that seems incompatible with BOLT. Also run a
custom pre-prune pass that makes sure sections without symbols are not
pruned by JITLink.
- Implement symbol lookup. This used to be implemented by
BOLTSymbolResolver.
- Call the section mapper callback before the final linking step.
- Copy symbol values when the LinkGraph is resolved. Symbols are stored
inside JITLinkLinker to ensure that later objects (i.e.,
instrumentation libraries) can find them. This functionality used to
be provided by RuntimeDyld but I did not find a way to use JITLink
directly for this.
Some more minor points of interest:
- BinarySection::SectionID: JITLink doesn't have something equivalent to
RuntimeDyld's Section IDs. Instead, sections can only be referred to
by name. Hence, SectionID was updated to a string.
- There seem to be no tests for Mach-O. I've tested a small hello-world
style binary but not more than that.
- On Mach-O, JITLink "normalizes" section names to include the segment
name. I had to parse the section name back from this manually which
feels slightly hacky.
[1] https://reviews.llvm.org/D145686#4222642
Reviewed By: rafauler
Differential Revision: https://reviews.llvm.org/D147544
2023-06-15 10:52:11 +02:00
|
|
|
// We check getOutputData() instead of getOutputSize() because sometimes
|
|
|
|
|
// zero-sized .text.cold sections are allocated.
|
|
|
|
|
if (Section->hasSectionRef() || Section->getOutputData() ||
|
2022-09-22 12:05:12 -07:00
|
|
|
(AbsSection && Section == &AbsSection.get())) {
|
|
|
|
|
++SI;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
LLVM_DEBUG(dbgs() << "LLVM-DEBUG: deregistering " << Section->getName()
|
|
|
|
|
<< '\n';);
|
|
|
|
|
deregisterSectionName(*Section);
|
|
|
|
|
SI = Sections.erase(SI);
|
|
|
|
|
delete Section;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2018-02-01 16:33:43 -08:00
|
|
|
bool BinaryContext::deregisterSection(BinarySection &Section) {
|
2021-04-08 00:19:26 -07:00
|
|
|
BinarySection *SectionPtr = &Section;
|
2018-02-01 16:33:43 -08:00
|
|
|
auto Itr = Sections.find(SectionPtr);
|
|
|
|
|
if (Itr != Sections.end()) {
|
|
|
|
|
auto Range = AddressToSection.equal_range(SectionPtr->getAddress());
|
|
|
|
|
while (Range.first != Range.second) {
|
|
|
|
|
if (Range.first->second == SectionPtr) {
|
|
|
|
|
AddressToSection.erase(Range.first);
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
++Range.first;
|
|
|
|
|
}
|
|
|
|
|
|
2022-09-22 12:05:12 -07:00
|
|
|
deregisterSectionName(*SectionPtr);
|
2018-02-01 16:33:43 -08:00
|
|
|
Sections.erase(Itr);
|
|
|
|
|
delete SectionPtr;
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
2022-09-22 12:05:12 -07:00
|
|
|
void BinaryContext::renameSection(BinarySection &Section,
|
|
|
|
|
const Twine &NewName) {
|
|
|
|
|
auto Itr = Sections.find(&Section);
|
|
|
|
|
assert(Itr != Sections.end() && "Section must exist to be renamed.");
|
|
|
|
|
Sections.erase(Itr);
|
|
|
|
|
|
|
|
|
|
deregisterSectionName(Section);
|
|
|
|
|
|
|
|
|
|
Section.Name = NewName.str();
|
2022-11-14 13:25:20 +00:00
|
|
|
Section.setOutputName(Section.Name);
|
2022-09-22 12:05:12 -07:00
|
|
|
|
2022-11-14 13:25:20 +00:00
|
|
|
NameToSection.insert(std::make_pair(Section.Name, &Section));
|
2022-09-22 12:05:12 -07:00
|
|
|
|
|
|
|
|
// Reinsert with the new name.
|
|
|
|
|
Sections.insert(&Section);
|
|
|
|
|
}
|
|
|
|
|
|
2018-02-01 16:33:43 -08:00
|
|
|
void BinaryContext::printSections(raw_ostream &OS) const {
|
2021-12-20 11:07:46 -08:00
|
|
|
for (BinarySection *const &Section : Sections)
|
2018-02-01 16:33:43 -08:00
|
|
|
OS << "BOLT-INFO: " << *Section << "\n";
|
2018-01-31 12:12:59 -08:00
|
|
|
}
|
|
|
|
|
|
2017-11-14 20:05:11 -08:00
|
|
|
BinarySection &BinaryContext::absoluteSection() {
|
2021-04-08 00:19:26 -07:00
|
|
|
if (ErrorOr<BinarySection &> Section = getUniqueSectionByName("<absolute>"))
|
2017-11-14 20:05:11 -08:00
|
|
|
return *Section;
|
|
|
|
|
return registerOrUpdateSection("<absolute>", ELF::SHT_NULL, 0u);
|
|
|
|
|
}
|
|
|
|
|
|
2021-12-14 16:52:51 -08:00
|
|
|
ErrorOr<uint64_t> BinaryContext::getUnsignedValueAtAddress(uint64_t Address,
|
|
|
|
|
size_t Size) const {
|
2021-04-08 00:19:26 -07:00
|
|
|
const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
|
2019-04-09 12:29:40 -07:00
|
|
|
if (!Section)
|
|
|
|
|
return std::make_error_code(std::errc::bad_address);
|
|
|
|
|
|
|
|
|
|
if (Section->isVirtual())
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
|
|
DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(),
|
|
|
|
|
AsmInfo->getCodePointerSize());
|
2020-12-01 16:29:39 -08:00
|
|
|
auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress());
|
2019-04-09 12:29:40 -07:00
|
|
|
return DE.getUnsigned(&ValueOffset, Size);
|
|
|
|
|
}
|
|
|
|
|
|
2024-05-24 16:04:06 -07:00
|
|
|
ErrorOr<int64_t> BinaryContext::getSignedValueAtAddress(uint64_t Address,
|
|
|
|
|
size_t Size) const {
|
2021-04-08 00:19:26 -07:00
|
|
|
const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
|
2017-08-27 17:04:06 -07:00
|
|
|
if (!Section)
|
2018-01-23 15:10:24 -08:00
|
|
|
return std::make_error_code(std::errc::bad_address);
|
2017-08-27 17:04:06 -07:00
|
|
|
|
2019-04-09 12:29:40 -07:00
|
|
|
if (Section->isVirtual())
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
|
|
DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(),
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-06 15:00:23 -08:00
|
|
|
AsmInfo->getCodePointerSize());
|
2020-12-01 16:29:39 -08:00
|
|
|
auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress());
|
2019-04-09 12:29:40 -07:00
|
|
|
return DE.getSigned(&ValueOffset, Size);
|
2017-08-27 17:04:06 -07:00
|
|
|
}
|
|
|
|
|
|
2021-12-14 16:52:51 -08:00
|
|
|
void BinaryContext::addRelocation(uint64_t Address, MCSymbol *Symbol,
|
2025-03-14 18:15:59 +00:00
|
|
|
uint32_t Type, uint64_t Addend,
|
2018-02-01 16:33:43 -08:00
|
|
|
uint64_t Value) {
|
2021-04-08 00:19:26 -07:00
|
|
|
ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
|
2018-01-23 15:10:24 -08:00
|
|
|
assert(Section && "cannot find section for address");
|
2021-12-14 16:52:51 -08:00
|
|
|
Section->addRelocation(Address - Section->getAddress(), Symbol, Type, Addend,
|
2018-02-01 16:33:43 -08:00
|
|
|
Value);
|
2017-02-21 16:15:15 -08:00
|
|
|
}
|
|
|
|
|
|
2021-12-14 16:52:51 -08:00
|
|
|
void BinaryContext::addDynamicRelocation(uint64_t Address, MCSymbol *Symbol,
|
2025-03-14 18:15:59 +00:00
|
|
|
uint32_t Type, uint64_t Addend,
|
2020-06-23 12:22:58 -07:00
|
|
|
uint64_t Value) {
|
2021-04-08 00:19:26 -07:00
|
|
|
ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
|
2020-06-23 12:22:58 -07:00
|
|
|
assert(Section && "cannot find section for address");
|
2021-12-14 16:52:51 -08:00
|
|
|
Section->addDynamicRelocation(Address - Section->getAddress(), Symbol, Type,
|
|
|
|
|
Addend, Value);
|
2020-06-23 12:22:58 -07:00
|
|
|
}
|
|
|
|
|
|
2018-02-01 16:33:43 -08:00
|
|
|
bool BinaryContext::removeRelocationAt(uint64_t Address) {
|
2021-04-08 00:19:26 -07:00
|
|
|
ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
|
2018-01-23 15:10:24 -08:00
|
|
|
assert(Section && "cannot find section for address");
|
2018-02-01 16:33:43 -08:00
|
|
|
return Section->removeRelocationAt(Address - Section->getAddress());
|
2017-02-21 16:15:15 -08:00
|
|
|
}
|
|
|
|
|
|
2023-02-21 22:09:17 -08:00
|
|
|
const Relocation *BinaryContext::getRelocationAt(uint64_t Address) const {
|
|
|
|
|
ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
|
2019-06-28 09:21:27 -07:00
|
|
|
if (!Section)
|
|
|
|
|
return nullptr;
|
|
|
|
|
|
2018-01-23 15:10:24 -08:00
|
|
|
return Section->getRelocationAt(Address - Section->getAddress());
|
2017-10-20 12:11:34 -07:00
|
|
|
}
|
2018-06-20 12:03:24 -07:00
|
|
|
|
2023-06-12 09:54:42 -07:00
|
|
|
const Relocation *
|
|
|
|
|
BinaryContext::getDynamicRelocationAt(uint64_t Address) const {
|
|
|
|
|
ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
|
2020-06-23 12:22:58 -07:00
|
|
|
if (!Section)
|
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
|
|
return Section->getDynamicRelocationAt(Address - Section->getAddress());
|
|
|
|
|
}
|
|
|
|
|
|
2019-11-18 14:08:17 -08:00
|
|
|
void BinaryContext::markAmbiguousRelocations(BinaryData &BD,
|
|
|
|
|
const uint64_t Address) {
|
|
|
|
|
auto setImmovable = [&](BinaryData &BD) {
|
2021-04-08 00:19:26 -07:00
|
|
|
BinaryData *Root = BD.getAtomicRoot();
|
2020-12-01 16:29:39 -08:00
|
|
|
LLVM_DEBUG(if (Root->isMoveable()) {
|
2019-11-18 14:08:17 -08:00
|
|
|
dbgs() << "BOLT-DEBUG: setting " << *Root << " as immovable "
|
|
|
|
|
<< "due to ambiguous relocation referencing 0x"
|
|
|
|
|
<< Twine::utohexstr(Address) << '\n';
|
|
|
|
|
});
|
|
|
|
|
Root->setIsMoveable(false);
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
if (Address == BD.getAddress()) {
|
|
|
|
|
setImmovable(BD);
|
|
|
|
|
|
|
|
|
|
// Set previous symbol as immovable
|
2021-04-08 00:19:26 -07:00
|
|
|
BinaryData *Prev = getBinaryDataContainingAddress(Address - 1);
|
2019-11-18 14:08:17 -08:00
|
|
|
if (Prev && Prev->getEndAddress() == BD.getAddress())
|
|
|
|
|
setImmovable(*Prev);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (Address == BD.getEndAddress()) {
|
|
|
|
|
setImmovable(BD);
|
|
|
|
|
|
|
|
|
|
// Set next symbol as immovable
|
2021-04-08 00:19:26 -07:00
|
|
|
BinaryData *Next = getBinaryDataContainingAddress(BD.getEndAddress());
|
2019-11-18 14:08:17 -08:00
|
|
|
if (Next && Next->getAddress() == BD.getEndAddress())
|
|
|
|
|
setImmovable(*Next);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2020-01-06 14:57:15 -08:00
|
|
|
BinaryFunction *BinaryContext::getFunctionForSymbol(const MCSymbol *Symbol,
|
|
|
|
|
uint64_t *EntryDesc) {
|
2022-11-21 08:45:45 -05:00
|
|
|
std::shared_lock<llvm::sys::RWMutex> Lock(SymbolToFunctionMapMutex);
|
2020-01-06 14:57:15 -08:00
|
|
|
auto BFI = SymbolToFunctionMap.find(Symbol);
|
|
|
|
|
if (BFI == SymbolToFunctionMap.end())
|
|
|
|
|
return nullptr;
|
|
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
BinaryFunction *BF = BFI->second;
|
2020-01-06 14:57:15 -08:00
|
|
|
if (EntryDesc)
|
2020-04-19 22:29:54 -07:00
|
|
|
*EntryDesc = BF->getEntryIDForSymbol(Symbol);
|
2020-01-06 14:57:15 -08:00
|
|
|
|
|
|
|
|
return BF;
|
|
|
|
|
}
|
|
|
|
|
|
2024-02-12 14:53:53 -08:00
|
|
|
std::string
|
|
|
|
|
BinaryContext::generateBugReportMessage(StringRef Message,
|
|
|
|
|
const BinaryFunction &Function) const {
|
|
|
|
|
std::string Msg;
|
|
|
|
|
raw_string_ostream SS(Msg);
|
|
|
|
|
SS << "=======================================\n";
|
|
|
|
|
SS << "BOLT is unable to proceed because it couldn't properly understand "
|
|
|
|
|
"this function.\n";
|
|
|
|
|
SS << "If you are running the most recent version of BOLT, you may "
|
|
|
|
|
"want to "
|
|
|
|
|
"report this and paste this dump.\nPlease check that there is no "
|
|
|
|
|
"sensitive contents being shared in this dump.\n";
|
|
|
|
|
SS << "\nOffending function: " << Function.getPrintName() << "\n\n";
|
|
|
|
|
ScopedPrinter SP(SS);
|
2020-02-10 15:35:11 -08:00
|
|
|
SP.printBinaryBlock("Function contents", *Function.getData());
|
2024-02-12 14:53:53 -08:00
|
|
|
SS << "\n";
|
|
|
|
|
const_cast<BinaryFunction &>(Function).print(SS, "");
|
|
|
|
|
SS << "ERROR: " << Message;
|
|
|
|
|
SS << "\n=======================================\n";
|
|
|
|
|
return Msg;
|
2018-06-20 12:03:24 -07:00
|
|
|
}
|
2018-07-08 12:14:08 -07:00
|
|
|
|
|
|
|
|
BinaryFunction *
|
|
|
|
|
BinaryContext::createInjectedBinaryFunction(const std::string &Name,
|
|
|
|
|
bool IsSimple) {
|
|
|
|
|
InjectedBinaryFunctions.push_back(new BinaryFunction(Name, *this, IsSimple));
|
2021-04-08 00:19:26 -07:00
|
|
|
BinaryFunction *BF = InjectedBinaryFunctions.back();
|
2018-07-08 12:14:08 -07:00
|
|
|
setSymbolToFunctionMap(BF->getSymbol(), BF);
|
2019-10-31 16:54:48 -07:00
|
|
|
BF->CurrentState = BinaryFunction::State::CFG;
|
2018-07-08 12:14:08 -07:00
|
|
|
return BF;
|
|
|
|
|
}
|
2018-11-15 16:02:16 -08:00
|
|
|
|
[BOLT][AArch64] Add partial support for lite mode (#133014)
In lite mode, we only emit code for a subset of functions while
preserving the original code in .bolt.org.text. This requires updating
code references in non-emitted functions to ensure that:
* Non-optimized versions of the optimized code never execute.
* Function pointer comparison semantics is preserved.
On x86-64, we can update code references in-place using "pending
relocations" added in scanExternalRefs(). However, on AArch64, this is
not always possible due to address range limitations and linker address
"relaxation".
There are two types of code-to-code references: control transfer (e.g.,
calls and branches) and function pointer materialization.
AArch64-specific control transfer instructions are covered by #116964.
For function pointer materialization, simply changing the immediate
field of an instruction is not always sufficient. In some cases, we need
to modify a pair of instructions, such as undoing linker relaxation and
converting NOP+ADR into ADRP+ADD sequence.
To achieve this, we use the instruction patch mechanism instead of
pending relocations. Instruction patches are emitted via the regular MC
layer, just like regular functions. However, they have a fixed address
and do not have an associated symbol table entry. This allows us to make
more complex changes to the code, ensuring that function pointers are
correctly updated. Such mechanism should also be portable to RISC-V and
other architectures.
To summarize, for AArch64, we extend the scanExternalRefs() process to
undo linker relaxation and use instruction patches to partially
overwrite unoptimized code.
2025-03-27 21:33:25 -07:00
|
|
|
BinaryFunction *
|
|
|
|
|
BinaryContext::createInstructionPatch(uint64_t Address,
|
|
|
|
|
const InstructionListType &Instructions,
|
|
|
|
|
const Twine &Name) {
|
2025-03-01 19:20:17 -08:00
|
|
|
ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
|
|
|
|
|
assert(Section && "cannot get section for patching");
|
|
|
|
|
assert(Section->hasSectionRef() && Section->isText() &&
|
|
|
|
|
"can only patch input file code sections");
|
|
|
|
|
|
|
|
|
|
const uint64_t FileOffset =
|
|
|
|
|
Section->getInputFileOffset() + Address - Section->getAddress();
|
|
|
|
|
|
|
|
|
|
std::string PatchName = Name.str();
|
|
|
|
|
if (PatchName.empty()) {
|
|
|
|
|
// Assign unique name to the patch.
|
|
|
|
|
static uint64_t N = 0;
|
|
|
|
|
PatchName = "__BP_" + std::to_string(N++);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
BinaryFunction *PBF = createInjectedBinaryFunction(PatchName);
|
|
|
|
|
PBF->setOutputAddress(Address);
|
|
|
|
|
PBF->setFileOffset(FileOffset);
|
|
|
|
|
PBF->setOriginSection(&Section.get());
|
|
|
|
|
PBF->addBasicBlock()->addInstructions(Instructions);
|
[BOLT][AArch64] Add partial support for lite mode (#133014)
In lite mode, we only emit code for a subset of functions while
preserving the original code in .bolt.org.text. This requires updating
code references in non-emitted functions to ensure that:
* Non-optimized versions of the optimized code never execute.
* Function pointer comparison semantics is preserved.
On x86-64, we can update code references in-place using "pending
relocations" added in scanExternalRefs(). However, on AArch64, this is
not always possible due to address range limitations and linker address
"relaxation".
There are two types of code-to-code references: control transfer (e.g.,
calls and branches) and function pointer materialization.
AArch64-specific control transfer instructions are covered by #116964.
For function pointer materialization, simply changing the immediate
field of an instruction is not always sufficient. In some cases, we need
to modify a pair of instructions, such as undoing linker relaxation and
converting NOP+ADR into ADRP+ADD sequence.
To achieve this, we use the instruction patch mechanism instead of
pending relocations. Instruction patches are emitted via the regular MC
layer, just like regular functions. However, they have a fixed address
and do not have an associated symbol table entry. This allows us to make
more complex changes to the code, ensuring that function pointers are
correctly updated. Such mechanism should also be portable to RISC-V and
other architectures.
To summarize, for AArch64, we extend the scanExternalRefs() process to
undo linker relaxation and use instruction patches to partially
overwrite unoptimized code.
2025-03-27 21:33:25 -07:00
|
|
|
PBF->setIsPatch(true);
|
|
|
|
|
|
|
|
|
|
// Don't create symbol table entry if the name wasn't specified.
|
|
|
|
|
if (Name.str().empty())
|
|
|
|
|
PBF->setAnonymous(true);
|
2025-03-01 19:20:17 -08:00
|
|
|
|
|
|
|
|
return PBF;
|
|
|
|
|
}
|
|
|
|
|
|
2018-11-15 16:02:16 -08:00
|
|
|
std::pair<size_t, size_t>
|
[BOLT][non-reloc] Change function splitting in non-relocation mode
Summary:
This diff applies to non-relocation mode mostly. In this mode, we are
limited by original function boundaries, i.e. if a function becomes
larger after optimizations (e.g. because of the newly introduced
branches) then we might not be able to write the optimized version,
unless we split the function. At the same time, we do not benefit from
function splitting as we do in the relocation mode since we are not
moving functions/fragments, and the hot code does not become more
compact.
For the reasons described above, we used to execute multiple re-write
attempts to optimize the binary and we would only split functions that
were too large to fit into their original space.
After the first attempt, we would know functions that did not fit
into their original space. Then we would re-run all our passes again
feeding back the function information and forcefully splitting
such functions. Some functions still wouldn't fit even after the
splitting (mostly because of the branch relaxation for conditional tail
calls that does not happen in non-relocation mode). Yet we have emitted
debug info as if they were successfully overwritten. That's why we had
one more stage to write the functions again, marking failed-to-emit
functions non-simple. Sadly, there was a bug in the way 2nd and 3rd
attempts interacted, and we were not splitting the functions correctly
and as a result we were emitting less optimized code.
One of the reasons we had the multi-pass rewrite scheme in place, was
that we did not have an ability to precisely estimate the code size
before the actual code emission. Recently, BinaryContext obtained such
functionality, and now we can use it instead of relying on the
multi-pass rewrite. This eliminates redundant work of re-running
the same function passes multiple times.
Because function splitting runs before a number of optimization passes
that run on post-CFG state (those rely on the splitting pass), we
cannot estimate the non-split code size with 100% accuracy. However,
it is good enough for over 99% of the cases to extract most of the
performance gains for the binary.
As a result of eliminating the multi-pass rewrite, the processing time
in non-relocation mode with `-split-functions=2` is greatly reduced.
With debug info update, it is less than half of what it used to be.
New semantics for `-split-functions=<n>`:
-split-functions - split functions into hot and cold regions
=0 - do not split any function
=1 - in non-relocation mode only split functions too large to fit
into original code space
=2 - same as 1 (backwards compatibility)
=3 - split all functions
(cherry picked from FBD17362607)
2019-09-11 15:42:22 -07:00
|
|
|
BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) {
|
2025-05-22 14:01:54 -07:00
|
|
|
// Use the original size for non-simple functions.
|
|
|
|
|
if (!BF.isSimple() || BF.isIgnored())
|
|
|
|
|
return std::make_pair(BF.getSize(), 0);
|
|
|
|
|
|
2018-11-15 16:02:16 -08:00
|
|
|
// Adjust branch instruction to match the current layout.
|
[BOLT][non-reloc] Change function splitting in non-relocation mode
Summary:
This diff applies to non-relocation mode mostly. In this mode, we are
limited by original function boundaries, i.e. if a function becomes
larger after optimizations (e.g. because of the newly introduced
branches) then we might not be able to write the optimized version,
unless we split the function. At the same time, we do not benefit from
function splitting as we do in the relocation mode since we are not
moving functions/fragments, and the hot code does not become more
compact.
For the reasons described above, we used to execute multiple re-write
attempts to optimize the binary and we would only split functions that
were too large to fit into their original space.
After the first attempt, we would know functions that did not fit
into their original space. Then we would re-run all our passes again
feeding back the function information and forcefully splitting
such functions. Some functions still wouldn't fit even after the
splitting (mostly because of the branch relaxation for conditional tail
calls that does not happen in non-relocation mode). Yet we have emitted
debug info as if they were successfully overwritten. That's why we had
one more stage to write the functions again, marking failed-to-emit
functions non-simple. Sadly, there was a bug in the way 2nd and 3rd
attempts interacted, and we were not splitting the functions correctly
and as a result we were emitting less optimized code.
One of the reasons we had the multi-pass rewrite scheme in place, was
that we did not have an ability to precisely estimate the code size
before the actual code emission. Recently, BinaryContext obtained such
functionality, and now we can use it instead of relying on the
multi-pass rewrite. This eliminates redundant work of re-running
the same function passes multiple times.
Because function splitting runs before a number of optimization passes
that run on post-CFG state (those rely on the splitting pass), we
cannot estimate the non-split code size with 100% accuracy. However,
it is good enough for over 99% of the cases to extract most of the
performance gains for the binary.
As a result of eliminating the multi-pass rewrite, the processing time
in non-relocation mode with `-split-functions=2` is greatly reduced.
With debug info update, it is less than half of what it used to be.
New semantics for `-split-functions=<n>`:
-split-functions - split functions into hot and cold regions
=0 - do not split any function
=1 - in non-relocation mode only split functions too large to fit
into original code space
=2 - same as 1 (backwards compatibility)
=3 - split all functions
(cherry picked from FBD17362607)
2019-09-11 15:42:22 -07:00
|
|
|
if (FixBranches)
|
|
|
|
|
BF.fixBranches();
|
2018-11-15 16:02:16 -08:00
|
|
|
|
|
|
|
|
// Create local MC context to isolate the effect of ephemeral code emission.
|
2021-04-08 00:19:26 -07:00
|
|
|
IndependentCodeEmitter MCEInstance = createIndependentMCCodeEmitter();
|
|
|
|
|
MCContext *LocalCtx = MCEInstance.LocalCtx.get();
|
|
|
|
|
MCAsmBackend *MAB =
|
|
|
|
|
TheTarget->createMCAsmBackend(*STI, *MRI, MCTargetOptions());
|
2019-07-08 12:32:58 -07:00
|
|
|
|
2018-11-15 16:02:16 -08:00
|
|
|
SmallString<256> Code;
|
|
|
|
|
raw_svector_ostream VecOS(Code);
|
|
|
|
|
|
2020-12-01 16:29:39 -08:00
|
|
|
std::unique_ptr<MCObjectWriter> OW = MAB->createObjectWriter(VecOS);
|
2018-11-15 16:02:16 -08:00
|
|
|
std::unique_ptr<MCStreamer> Streamer(TheTarget->createMCObjectStreamer(
|
2020-12-01 16:29:39 -08:00
|
|
|
*TheTriple, *LocalCtx, std::unique_ptr<MCAsmBackend>(MAB), std::move(OW),
|
2024-07-20 21:30:49 -07:00
|
|
|
std::unique_ptr<MCCodeEmitter>(MCEInstance.MCE.release()), *STI));
|
2018-11-15 16:02:16 -08:00
|
|
|
|
2020-12-01 16:29:39 -08:00
|
|
|
Streamer->initSections(false, *STI);
|
2018-11-15 16:02:16 -08:00
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
MCSection *Section = MCEInstance.LocalMOFI->getTextSection();
|
2018-11-15 16:02:16 -08:00
|
|
|
Section->setHasInstructions(true);
|
|
|
|
|
|
2020-11-16 14:34:51 -08:00
|
|
|
// Create symbols in the LocalCtx so that they get destroyed with it.
|
|
|
|
|
MCSymbol *StartLabel = LocalCtx->createTempSymbol();
|
|
|
|
|
MCSymbol *EndLabel = LocalCtx->createTempSymbol();
|
2018-11-15 16:02:16 -08:00
|
|
|
|
2022-06-10 22:50:55 -07:00
|
|
|
Streamer->switchSection(Section);
|
2020-12-01 16:29:39 -08:00
|
|
|
Streamer->emitLabel(StartLabel);
|
2022-08-18 21:26:18 -07:00
|
|
|
emitFunctionBody(*Streamer, BF, BF.getLayout().getMainFragment(),
|
2020-03-06 15:06:37 -08:00
|
|
|
/*EmitCodeOnly=*/true);
|
2020-12-01 16:29:39 -08:00
|
|
|
Streamer->emitLabel(EndLabel);
|
2018-11-15 16:02:16 -08:00
|
|
|
|
2022-08-18 21:26:18 -07:00
|
|
|
using LabelRange = std::pair<const MCSymbol *, const MCSymbol *>;
|
|
|
|
|
SmallVector<LabelRange> SplitLabels;
|
2022-08-24 17:47:01 -07:00
|
|
|
for (FunctionFragment &FF : BF.getLayout().getSplitFragments()) {
|
2022-08-18 21:26:18 -07:00
|
|
|
MCSymbol *const SplitStartLabel = LocalCtx->createTempSymbol();
|
|
|
|
|
MCSymbol *const SplitEndLabel = LocalCtx->createTempSymbol();
|
|
|
|
|
SplitLabels.emplace_back(SplitStartLabel, SplitEndLabel);
|
|
|
|
|
|
|
|
|
|
MCSectionELF *const SplitSection = LocalCtx->getELFSection(
|
2022-08-18 21:48:19 -07:00
|
|
|
BF.getCodeSectionName(FF.getFragmentNum()), ELF::SHT_PROGBITS,
|
2022-08-18 21:26:18 -07:00
|
|
|
ELF::SHF_EXECINSTR | ELF::SHF_ALLOC);
|
|
|
|
|
SplitSection->setHasInstructions(true);
|
|
|
|
|
Streamer->switchSection(SplitSection);
|
|
|
|
|
|
|
|
|
|
Streamer->emitLabel(SplitStartLabel);
|
|
|
|
|
emitFunctionBody(*Streamer, BF, FF, /*EmitCodeOnly=*/true);
|
|
|
|
|
Streamer->emitLabel(SplitEndLabel);
|
2018-11-15 16:02:16 -08:00
|
|
|
}
|
|
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
MCAssembler &Assembler =
|
2018-11-15 16:02:16 -08:00
|
|
|
static_cast<MCObjectStreamer *>(Streamer.get())->getAssembler();
|
2024-07-02 16:56:35 -07:00
|
|
|
Assembler.layout();
|
2018-11-15 16:02:16 -08:00
|
|
|
|
2023-11-23 15:28:31 -05:00
|
|
|
// Obtain fragment sizes.
|
|
|
|
|
std::vector<uint64_t> FragmentSizes;
|
|
|
|
|
// Main fragment size.
|
2024-07-01 11:51:26 -07:00
|
|
|
const uint64_t HotSize = Assembler.getSymbolOffset(*EndLabel) -
|
|
|
|
|
Assembler.getSymbolOffset(*StartLabel);
|
2023-11-23 15:28:31 -05:00
|
|
|
FragmentSizes.push_back(HotSize);
|
|
|
|
|
// Split fragment sizes.
|
|
|
|
|
uint64_t ColdSize = 0;
|
|
|
|
|
for (const auto &Labels : SplitLabels) {
|
2024-07-01 11:51:26 -07:00
|
|
|
uint64_t Size = Assembler.getSymbolOffset(*Labels.second) -
|
|
|
|
|
Assembler.getSymbolOffset(*Labels.first);
|
2023-11-23 15:28:31 -05:00
|
|
|
FragmentSizes.push_back(Size);
|
|
|
|
|
ColdSize += Size;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Populate new start and end offsets of each basic block.
|
|
|
|
|
uint64_t FragmentIndex = 0;
|
|
|
|
|
for (FunctionFragment &FF : BF.getLayout().fragments()) {
|
|
|
|
|
BinaryBasicBlock *PrevBB = nullptr;
|
|
|
|
|
for (BinaryBasicBlock *BB : FF) {
|
2024-07-01 11:51:26 -07:00
|
|
|
const uint64_t BBStartOffset =
|
|
|
|
|
Assembler.getSymbolOffset(*(BB->getLabel()));
|
2023-11-23 15:28:31 -05:00
|
|
|
BB->setOutputStartAddress(BBStartOffset);
|
|
|
|
|
if (PrevBB)
|
|
|
|
|
PrevBB->setOutputEndAddress(BBStartOffset);
|
|
|
|
|
PrevBB = BB;
|
|
|
|
|
}
|
|
|
|
|
if (PrevBB)
|
|
|
|
|
PrevBB->setOutputEndAddress(FragmentSizes[FragmentIndex]);
|
|
|
|
|
FragmentIndex++;
|
|
|
|
|
}
|
2018-11-15 16:02:16 -08:00
|
|
|
|
|
|
|
|
// Clean-up the effect of the code emission.
|
2021-04-08 00:19:26 -07:00
|
|
|
for (const MCSymbol &Symbol : Assembler.symbols()) {
|
|
|
|
|
MCSymbol *MutableSymbol = const_cast<MCSymbol *>(&Symbol);
|
2018-11-15 16:02:16 -08:00
|
|
|
MutableSymbol->setUndefined();
|
|
|
|
|
MutableSymbol->setIsRegistered(false);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return std::make_pair(HotSize, ColdSize);
|
|
|
|
|
}
|
2019-04-03 15:52:01 -07:00
|
|
|
|
2022-10-17 16:15:59 -07:00
|
|
|
bool BinaryContext::validateInstructionEncoding(
|
|
|
|
|
ArrayRef<uint8_t> InputSequence) const {
|
|
|
|
|
MCInst Inst;
|
|
|
|
|
uint64_t InstSize;
|
|
|
|
|
DisAsm->getInstruction(Inst, InstSize, InputSequence, 0, nulls());
|
|
|
|
|
assert(InstSize == InputSequence.size() &&
|
|
|
|
|
"Disassembled instruction size does not match the sequence.");
|
|
|
|
|
|
2019-11-22 14:53:20 -08:00
|
|
|
SmallString<256> Code;
|
|
|
|
|
SmallVector<MCFixup, 4> Fixups;
|
|
|
|
|
|
2023-03-10 15:20:30 +01:00
|
|
|
MCE->encodeInstruction(Inst, Code, Fixups, *STI);
|
2022-10-17 16:15:59 -07:00
|
|
|
auto OutputSequence = ArrayRef<uint8_t>((uint8_t *)Code.data(), Code.size());
|
|
|
|
|
if (InputSequence != OutputSequence) {
|
2020-01-13 11:24:10 -08:00
|
|
|
if (opts::Verbosity > 1) {
|
2024-02-12 14:53:53 -08:00
|
|
|
this->errs() << "BOLT-WARNING: mismatched encoding detected\n"
|
|
|
|
|
<< " input: " << InputSequence << '\n'
|
|
|
|
|
<< " output: " << OutputSequence << '\n';
|
2020-01-13 11:24:10 -08:00
|
|
|
}
|
2019-11-22 14:53:20 -08:00
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2020-07-27 18:07:18 -07:00
|
|
|
uint64_t BinaryContext::getHotThreshold() const {
|
2021-05-13 10:50:47 -07:00
|
|
|
static uint64_t Threshold = 0;
|
2020-07-27 18:07:18 -07:00
|
|
|
if (Threshold == 0) {
|
2021-12-14 16:52:51 -08:00
|
|
|
Threshold = std::max(
|
|
|
|
|
(uint64_t)opts::ExecutionCountThreshold,
|
2020-07-27 18:07:18 -07:00
|
|
|
NumProfiledFuncs ? SumExecutionCount / (2 * NumProfiledFuncs) : 1);
|
|
|
|
|
}
|
|
|
|
|
return Threshold;
|
|
|
|
|
}
|
|
|
|
|
|
2021-12-14 16:52:51 -08:00
|
|
|
BinaryFunction *BinaryContext::getBinaryFunctionContainingAddress(
|
|
|
|
|
uint64_t Address, bool CheckPastEnd, bool UseMaxSize) {
|
2019-04-03 15:52:01 -07:00
|
|
|
auto FI = BinaryFunctions.upper_bound(Address);
|
|
|
|
|
if (FI == BinaryFunctions.begin())
|
|
|
|
|
return nullptr;
|
|
|
|
|
--FI;
|
|
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
const uint64_t UsedSize =
|
|
|
|
|
UseMaxSize ? FI->second.getMaxSize() : FI->second.getSize();
|
2019-04-03 15:52:01 -07:00
|
|
|
|
|
|
|
|
if (Address >= FI->first + UsedSize + (CheckPastEnd ? 1 : 0))
|
|
|
|
|
return nullptr;
|
[BOLT] Basic support for split functions
Summary:
This adds very basic and limited support for split functions.
In non-relocation mode, split functions are ignored, while their debug
info is properly updated. No support in the relocation mode yet.
Split functions consist of a main body and one or more fragments.
For fragments, the main part is called their parent. Any fragment
could only be entered via its parent or another fragment.
The short-term goal is to correctly update debug information for split
functions, while the long-term goal is to have a complete support
including full optimization. Note that if we don't detect split
bodies, we would have to add multiple entry points via tail calls,
which we would rather avoid.
Parent functions and fragments are represented by a `BinaryFunction`
and are marked accordingly. For now they are marked as non-simple, and
thus only supported in non-relocation mode. Once we start building a
CFG, it should be a common graph (i.e. the one that includes all
fragments) in the parent function.
The function discovery is unchanged, except for the detection of
`\.cold\.` pattern in the function name, which automatically marks the
function as a fragment of another function.
Because of the local function name ambiguity, we cannot rely on the
function name to establish child fragment and parent relationship.
Instead we rely on disassembly processing.
`BinaryContext::getBinaryFunctionContainingAddress()` now returns a
parent function if an address from its fragment is passed.
There's no jump table support at the moment. Jump tables can have
source and destinations in both fragment and parent.
Parent functions that enter their fragments via C++ exception handling
mechanism are not yet supported.
(cherry picked from FBD14970569)
2019-04-16 10:24:34 -07:00
|
|
|
|
2020-09-14 15:48:32 -07:00
|
|
|
return &FI->second;
|
[BOLT] Basic support for split functions
Summary:
This adds very basic and limited support for split functions.
In non-relocation mode, split functions are ignored, while their debug
info is properly updated. No support in the relocation mode yet.
Split functions consist of a main body and one or more fragments.
For fragments, the main part is called their parent. Any fragment
could only be entered via its parent or another fragment.
The short-term goal is to correctly update debug information for split
functions, while the long-term goal is to have a complete support
including full optimization. Note that if we don't detect split
bodies, we would have to add multiple entry points via tail calls,
which we would rather avoid.
Parent functions and fragments are represented by a `BinaryFunction`
and are marked accordingly. For now they are marked as non-simple, and
thus only supported in non-relocation mode. Once we start building a
CFG, it should be a common graph (i.e. the one that includes all
fragments) in the parent function.
The function discovery is unchanged, except for the detection of
`\.cold\.` pattern in the function name, which automatically marks the
function as a fragment of another function.
Because of the local function name ambiguity, we cannot rely on the
function name to establish child fragment and parent relationship.
Instead we rely on disassembly processing.
`BinaryContext::getBinaryFunctionContainingAddress()` now returns a
parent function if an address from its fragment is passed.
There's no jump table support at the moment. Jump tables can have
source and destinations in both fragment and parent.
Parent functions that enter their fragments via C++ exception handling
mechanism are not yet supported.
(cherry picked from FBD14970569)
2019-04-16 10:24:34 -07:00
|
|
|
}
|
|
|
|
|
|
2021-12-14 16:52:51 -08:00
|
|
|
BinaryFunction *BinaryContext::getBinaryFunctionAtAddress(uint64_t Address) {
|
2020-04-04 20:12:38 -07:00
|
|
|
// First, try to find a function starting at the given address. If the
|
|
|
|
|
// function was folded, this will get us the original folded function if it
|
|
|
|
|
// wasn't removed from the list, e.g. in non-relocation mode.
|
|
|
|
|
auto BFI = BinaryFunctions.find(Address);
|
2021-12-20 11:07:46 -08:00
|
|
|
if (BFI != BinaryFunctions.end())
|
2020-09-14 15:48:32 -07:00
|
|
|
return &BFI->second;
|
2020-04-04 20:12:38 -07:00
|
|
|
|
|
|
|
|
// We might have folded the function matching the object at the given
|
|
|
|
|
// address. In such case, we look for a function matching the symbol
|
|
|
|
|
// registered at the original address. The new function (the one that the
|
|
|
|
|
// original was folded into) will hold the symbol.
|
2021-04-08 00:19:26 -07:00
|
|
|
if (const BinaryData *BD = getBinaryDataAtAddress(Address)) {
|
2021-05-13 10:50:47 -07:00
|
|
|
uint64_t EntryID = 0;
|
2021-04-08 00:19:26 -07:00
|
|
|
BinaryFunction *BF = getFunctionForSymbol(BD->getSymbol(), &EntryID);
|
2020-09-14 15:48:32 -07:00
|
|
|
if (BF && EntryID == 0)
|
[BOLT] Basic support for split functions
Summary:
This adds very basic and limited support for split functions.
In non-relocation mode, split functions are ignored, while their debug
info is properly updated. No support in the relocation mode yet.
Split functions consist of a main body and one or more fragments.
For fragments, the main part is called their parent. Any fragment
could only be entered via its parent or another fragment.
The short-term goal is to correctly update debug information for split
functions, while the long-term goal is to have a complete support
including full optimization. Note that if we don't detect split
bodies, we would have to add multiple entry points via tail calls,
which we would rather avoid.
Parent functions and fragments are represented by a `BinaryFunction`
and are marked accordingly. For now they are marked as non-simple, and
thus only supported in non-relocation mode. Once we start building a
CFG, it should be a common graph (i.e. the one that includes all
fragments) in the parent function.
The function discovery is unchanged, except for the detection of
`\.cold\.` pattern in the function name, which automatically marks the
function as a fragment of another function.
Because of the local function name ambiguity, we cannot rely on the
function name to establish child fragment and parent relationship.
Instead we rely on disassembly processing.
`BinaryContext::getBinaryFunctionContainingAddress()` now returns a
parent function if an address from its fragment is passed.
There's no jump table support at the moment. Jump tables can have
source and destinations in both fragment and parent.
Parent functions that enter their fragments via C++ exception handling
mechanism are not yet supported.
(cherry picked from FBD14970569)
2019-04-16 10:24:34 -07:00
|
|
|
return BF;
|
|
|
|
|
}
|
|
|
|
|
return nullptr;
|
2019-04-03 15:52:01 -07:00
|
|
|
}
|
|
|
|
|
|
2024-05-09 12:35:45 -07:00
|
|
|
/// Deregister JumpTable registered at a given \p Address and delete it.
|
|
|
|
|
void BinaryContext::deleteJumpTable(uint64_t Address) {
|
|
|
|
|
assert(JumpTables.count(Address) && "Must have a jump table at address");
|
|
|
|
|
JumpTable *JT = JumpTables.at(Address);
|
|
|
|
|
for (BinaryFunction *Parent : JT->Parents)
|
|
|
|
|
Parent->JumpTables.erase(Address);
|
|
|
|
|
JumpTables.erase(Address);
|
|
|
|
|
delete JT;
|
|
|
|
|
}
|
|
|
|
|
|
2019-04-03 15:52:01 -07:00
|
|
|
DebugAddressRangesVector BinaryContext::translateModuleAddressRanges(
|
2021-12-14 16:52:51 -08:00
|
|
|
const DWARFAddressRangesVector &InputRanges) const {
|
2019-04-03 15:52:01 -07:00
|
|
|
DebugAddressRangesVector OutputRanges;
|
|
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
for (const DWARFAddressRange Range : InputRanges) {
|
2019-04-03 15:52:01 -07:00
|
|
|
auto BFI = BinaryFunctions.lower_bound(Range.LowPC);
|
|
|
|
|
while (BFI != BinaryFunctions.end()) {
|
2021-04-08 00:19:26 -07:00
|
|
|
const BinaryFunction &Function = BFI->second;
|
2019-04-03 15:52:01 -07:00
|
|
|
if (Function.getAddress() >= Range.HighPC)
|
|
|
|
|
break;
|
2021-04-08 00:19:26 -07:00
|
|
|
const DebugAddressRangesVector FunctionRanges =
|
|
|
|
|
Function.getOutputAddressRanges();
|
2022-06-23 22:15:47 -07:00
|
|
|
llvm::move(FunctionRanges, std::back_inserter(OutputRanges));
|
2019-04-03 15:52:01 -07:00
|
|
|
std::advance(BFI, 1);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return OutputRanges;
|
|
|
|
|
}
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
|
|
|
|
|
} // namespace bolt
|
|
|
|
|
} // namespace llvm
|