2021-12-21 10:21:41 -08:00
|
|
|
//===- bolt/Core/BinarySection.cpp - Section in a binary file -------------===//
|
2018-01-23 15:10:24 -08:00
|
|
|
//
|
2021-03-15 18:04:18 -07:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2018-01-23 15:10:24 -08:00
|
|
|
//
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
//
|
2021-12-21 10:21:41 -08:00
|
|
|
// This file implements the BinarySection class.
|
|
|
|
|
//
|
2018-01-23 15:10:24 -08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
2021-10-08 11:47:10 -07:00
|
|
|
#include "bolt/Core/BinarySection.h"
|
|
|
|
|
#include "bolt/Core/BinaryContext.h"
|
|
|
|
|
#include "bolt/Utils/Utils.h"
|
2021-04-30 13:54:02 -07:00
|
|
|
#include "llvm/MC/MCStreamer.h"
|
2018-02-01 16:33:43 -08:00
|
|
|
#include "llvm/Support/CommandLine.h"
|
|
|
|
|
|
2019-11-19 14:47:49 -08:00
|
|
|
#define DEBUG_TYPE "bolt"
|
2018-01-23 15:10:24 -08:00
|
|
|
|
|
|
|
|
using namespace llvm;
|
|
|
|
|
using namespace bolt;
|
|
|
|
|
|
2018-02-01 16:33:43 -08:00
|
|
|
namespace opts {
|
|
|
|
|
extern cl::opt<bool> PrintRelocations;
|
2019-11-19 14:47:49 -08:00
|
|
|
extern cl::opt<bool> HotData;
|
2021-12-14 16:52:51 -08:00
|
|
|
} // namespace opts
|
2018-02-01 16:33:43 -08:00
|
|
|
|
2022-10-04 17:12:02 -07:00
|
|
|
uint64_t BinarySection::Count = 0;
|
|
|
|
|
|
2021-12-14 16:52:51 -08:00
|
|
|
bool BinarySection::isELF() const { return BC.isELF(); }
|
2020-02-11 17:54:48 -08:00
|
|
|
|
2021-12-14 16:52:51 -08:00
|
|
|
bool BinarySection::isMachO() const { return BC.isMachO(); }
|
2020-09-24 03:22:31 -07:00
|
|
|
|
2018-06-06 03:17:32 -07:00
|
|
|
uint64_t
|
|
|
|
|
BinarySection::hash(const BinaryData &BD,
|
|
|
|
|
std::map<const BinaryData *, uint64_t> &Cache) const {
|
|
|
|
|
auto Itr = Cache.find(&BD);
|
|
|
|
|
if (Itr != Cache.end())
|
|
|
|
|
return Itr->second;
|
|
|
|
|
|
2023-03-24 21:50:07 +03:00
|
|
|
hash_code Hash =
|
|
|
|
|
hash_combine(hash_value(BD.getSize()), hash_value(BD.getSectionName()));
|
|
|
|
|
|
|
|
|
|
Cache[&BD] = Hash;
|
|
|
|
|
|
|
|
|
|
if (!containsRange(BD.getAddress(), BD.getSize()))
|
|
|
|
|
return Hash;
|
2018-06-06 03:17:32 -07:00
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
uint64_t Offset = BD.getAddress() - getAddress();
|
|
|
|
|
const uint64_t EndOffset = BD.getEndAddress() - getAddress();
|
2018-06-06 03:17:32 -07:00
|
|
|
auto Begin = Relocations.lower_bound(Relocation{Offset, 0, 0, 0, 0});
|
|
|
|
|
auto End = Relocations.upper_bound(Relocation{EndOffset, 0, 0, 0, 0});
|
2021-04-08 00:19:26 -07:00
|
|
|
const StringRef Contents = getContents();
|
2018-06-06 03:17:32 -07:00
|
|
|
|
|
|
|
|
while (Begin != End) {
|
2021-04-08 00:19:26 -07:00
|
|
|
const Relocation &Rel = *Begin++;
|
2018-06-06 03:17:32 -07:00
|
|
|
Hash = hash_combine(
|
2021-12-14 16:52:51 -08:00
|
|
|
Hash, hash_value(Contents.substr(Offset, Begin->Offset - Offset)));
|
2021-12-20 11:07:46 -08:00
|
|
|
if (BinaryData *RelBD = BC.getBinaryDataByName(Rel.Symbol->getName()))
|
2018-06-06 03:17:32 -07:00
|
|
|
Hash = hash_combine(Hash, hash(*RelBD, Cache));
|
|
|
|
|
Offset = Rel.Offset + Rel.getSize();
|
|
|
|
|
}
|
|
|
|
|
|
2021-12-14 16:52:51 -08:00
|
|
|
Hash = hash_combine(Hash,
|
|
|
|
|
hash_value(Contents.substr(Offset, EndOffset - Offset)));
|
2018-06-06 03:17:32 -07:00
|
|
|
|
|
|
|
|
Cache[&BD] = Hash;
|
|
|
|
|
|
|
|
|
|
return Hash;
|
|
|
|
|
}
|
|
|
|
|
|
2022-09-22 12:05:12 -07:00
|
|
|
void BinarySection::emitAsData(MCStreamer &Streamer,
|
|
|
|
|
const Twine &SectionName) const {
|
2019-11-19 14:47:49 -08:00
|
|
|
StringRef SectionContents = getContents();
|
2021-04-08 00:19:26 -07:00
|
|
|
MCSectionELF *ELFSection =
|
|
|
|
|
BC.Ctx->getELFSection(SectionName, getELFType(), getELFFlags());
|
2019-11-19 14:47:49 -08:00
|
|
|
|
2022-06-10 22:50:55 -07:00
|
|
|
Streamer.switchSection(ELFSection);
|
2022-11-24 15:23:06 +00:00
|
|
|
Streamer.emitValueToAlignment(getAlign());
|
2019-11-19 14:47:49 -08:00
|
|
|
|
|
|
|
|
if (BC.HasRelocations && opts::HotData && isReordered())
|
2020-12-01 16:29:39 -08:00
|
|
|
Streamer.emitLabel(BC.Ctx->getOrCreateSymbol("__hot_data_start"));
|
2019-11-19 14:47:49 -08:00
|
|
|
|
2020-12-01 16:29:39 -08:00
|
|
|
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: emitting "
|
|
|
|
|
<< (isAllocatable() ? "" : "non-")
|
|
|
|
|
<< "allocatable data section " << SectionName << '\n');
|
2019-11-19 14:47:49 -08:00
|
|
|
|
|
|
|
|
if (!hasRelocations()) {
|
2020-12-01 16:29:39 -08:00
|
|
|
Streamer.emitBytes(SectionContents);
|
2019-11-19 14:47:49 -08:00
|
|
|
} else {
|
|
|
|
|
uint64_t SectionOffset = 0;
|
[BOLT] Implement composed relocations
BOLT currently assumes (and asserts) that no two relocations can share
the same offset. Although this is true in most cases, ELF has a feature
called (not sure if this is an official term) composed relocations [1]
where multiple relocations at the same offset are combined to produce a
single value.
For example, to support label subtraction (a - b) on RISC-V, two
relocations are emitted at the same offset:
- R_RISCV_ADD32 a + 0
- R_RISCV_SUB32 b + 0
which, when combined, will produce the value of (a - b).
To support this in BOLT, first, RelocationSetType in BinarySection is
changed to be a multiset in order to allow it to store multiple
relocations at the same offset.
Next, Relocation::emit() is changed to receive an iterator pair of
relocations. In most cases, these will point to a single relocation in
which case its behavior is unaltered by this patch. For composed
relocations, they should point to all relocations at the same offset and
the following happens:
- A new method Relocation::createExpr() is called for every relocation.
This method is essentially the same as the original emit() except that
it returns the MCExpr without emitting it.
- The MCExprs of relocations i and i+1 are combined using the opcode
returned by the new method Relocation::getComposeOpcodeFor().
- After combining all MCExprs, the last one is emitted.
Note that in the current patch, getComposeOpcodeFor() simply calls
llvm_unreachable() since none of the current targets use composed
relocations. This will change once the RISC-V target lands.
Finally, BinarySection::emitAsData() is updated to group relocations by
offset and emit them all at once.
Note that this means composed relocations are only supported in data
sections. Since this is the only place they seem to be used in RISC-V, I
believe it's reasonable to only support them there for now to avoid
further code complexity.
[1]: https://www.sco.com/developers/gabi/latest/ch4.reloc.html
Reviewed By: rafauler
Differential Revision: https://reviews.llvm.org/D146546
2023-06-19 16:51:43 +02:00
|
|
|
for (auto RI = Relocations.begin(), RE = Relocations.end(); RI != RE;) {
|
|
|
|
|
auto RelocationOffset = RI->Offset;
|
|
|
|
|
assert(RelocationOffset < SectionContents.size() && "overflow detected");
|
|
|
|
|
|
|
|
|
|
if (SectionOffset < RelocationOffset) {
|
|
|
|
|
Streamer.emitBytes(SectionContents.substr(
|
|
|
|
|
SectionOffset, RelocationOffset - SectionOffset));
|
|
|
|
|
SectionOffset = RelocationOffset;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Get iterators to all relocations with the same offset. Usually, there
|
|
|
|
|
// is only one such relocation but there can be more for composed
|
|
|
|
|
// relocations.
|
|
|
|
|
auto ROI = RI;
|
|
|
|
|
auto ROE = Relocations.upper_bound(RelocationOffset);
|
|
|
|
|
|
|
|
|
|
// Start from the next offset on the next iteration.
|
|
|
|
|
RI = ROE;
|
|
|
|
|
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
// Skip undefined symbols.
|
[BOLT] Implement composed relocations
BOLT currently assumes (and asserts) that no two relocations can share
the same offset. Although this is true in most cases, ELF has a feature
called (not sure if this is an official term) composed relocations [1]
where multiple relocations at the same offset are combined to produce a
single value.
For example, to support label subtraction (a - b) on RISC-V, two
relocations are emitted at the same offset:
- R_RISCV_ADD32 a + 0
- R_RISCV_SUB32 b + 0
which, when combined, will produce the value of (a - b).
To support this in BOLT, first, RelocationSetType in BinarySection is
changed to be a multiset in order to allow it to store multiple
relocations at the same offset.
Next, Relocation::emit() is changed to receive an iterator pair of
relocations. In most cases, these will point to a single relocation in
which case its behavior is unaltered by this patch. For composed
relocations, they should point to all relocations at the same offset and
the following happens:
- A new method Relocation::createExpr() is called for every relocation.
This method is essentially the same as the original emit() except that
it returns the MCExpr without emitting it.
- The MCExprs of relocations i and i+1 are combined using the opcode
returned by the new method Relocation::getComposeOpcodeFor().
- After combining all MCExprs, the last one is emitted.
Note that in the current patch, getComposeOpcodeFor() simply calls
llvm_unreachable() since none of the current targets use composed
relocations. This will change once the RISC-V target lands.
Finally, BinarySection::emitAsData() is updated to group relocations by
offset and emit them all at once.
Note that this means composed relocations are only supported in data
sections. Since this is the only place they seem to be used in RISC-V, I
believe it's reasonable to only support them there for now to avoid
further code complexity.
[1]: https://www.sco.com/developers/gabi/latest/ch4.reloc.html
Reviewed By: rafauler
Differential Revision: https://reviews.llvm.org/D146546
2023-06-19 16:51:43 +02:00
|
|
|
auto HasUndefSym = [this](const auto &Relocation) {
|
|
|
|
|
return BC.UndefinedSymbols.count(Relocation.Symbol);
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
if (std::any_of(ROI, ROE, HasUndefSym))
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
continue;
|
[BOLT] Implement composed relocations
BOLT currently assumes (and asserts) that no two relocations can share
the same offset. Although this is true in most cases, ELF has a feature
called (not sure if this is an official term) composed relocations [1]
where multiple relocations at the same offset are combined to produce a
single value.
For example, to support label subtraction (a - b) on RISC-V, two
relocations are emitted at the same offset:
- R_RISCV_ADD32 a + 0
- R_RISCV_SUB32 b + 0
which, when combined, will produce the value of (a - b).
To support this in BOLT, first, RelocationSetType in BinarySection is
changed to be a multiset in order to allow it to store multiple
relocations at the same offset.
Next, Relocation::emit() is changed to receive an iterator pair of
relocations. In most cases, these will point to a single relocation in
which case its behavior is unaltered by this patch. For composed
relocations, they should point to all relocations at the same offset and
the following happens:
- A new method Relocation::createExpr() is called for every relocation.
This method is essentially the same as the original emit() except that
it returns the MCExpr without emitting it.
- The MCExprs of relocations i and i+1 are combined using the opcode
returned by the new method Relocation::getComposeOpcodeFor().
- After combining all MCExprs, the last one is emitted.
Note that in the current patch, getComposeOpcodeFor() simply calls
llvm_unreachable() since none of the current targets use composed
relocations. This will change once the RISC-V target lands.
Finally, BinarySection::emitAsData() is updated to group relocations by
offset and emit them all at once.
Note that this means composed relocations are only supported in data
sections. Since this is the only place they seem to be used in RISC-V, I
believe it's reasonable to only support them there for now to avoid
further code complexity.
[1]: https://www.sco.com/developers/gabi/latest/ch4.reloc.html
Reviewed By: rafauler
Differential Revision: https://reviews.llvm.org/D146546
2023-06-19 16:51:43 +02:00
|
|
|
|
2023-06-19 20:02:47 -07:00
|
|
|
#ifndef NDEBUG
|
[BOLT] Implement composed relocations
BOLT currently assumes (and asserts) that no two relocations can share
the same offset. Although this is true in most cases, ELF has a feature
called (not sure if this is an official term) composed relocations [1]
where multiple relocations at the same offset are combined to produce a
single value.
For example, to support label subtraction (a - b) on RISC-V, two
relocations are emitted at the same offset:
- R_RISCV_ADD32 a + 0
- R_RISCV_SUB32 b + 0
which, when combined, will produce the value of (a - b).
To support this in BOLT, first, RelocationSetType in BinarySection is
changed to be a multiset in order to allow it to store multiple
relocations at the same offset.
Next, Relocation::emit() is changed to receive an iterator pair of
relocations. In most cases, these will point to a single relocation in
which case its behavior is unaltered by this patch. For composed
relocations, they should point to all relocations at the same offset and
the following happens:
- A new method Relocation::createExpr() is called for every relocation.
This method is essentially the same as the original emit() except that
it returns the MCExpr without emitting it.
- The MCExprs of relocations i and i+1 are combined using the opcode
returned by the new method Relocation::getComposeOpcodeFor().
- After combining all MCExprs, the last one is emitted.
Note that in the current patch, getComposeOpcodeFor() simply calls
llvm_unreachable() since none of the current targets use composed
relocations. This will change once the RISC-V target lands.
Finally, BinarySection::emitAsData() is updated to group relocations by
offset and emit them all at once.
Note that this means composed relocations are only supported in data
sections. Since this is the only place they seem to be used in RISC-V, I
believe it's reasonable to only support them there for now to avoid
further code complexity.
[1]: https://www.sco.com/developers/gabi/latest/ch4.reloc.html
Reviewed By: rafauler
Differential Revision: https://reviews.llvm.org/D146546
2023-06-19 16:51:43 +02:00
|
|
|
for (const auto &Relocation : make_range(ROI, ROE)) {
|
|
|
|
|
LLVM_DEBUG(
|
|
|
|
|
dbgs() << "BOLT-DEBUG: emitting relocation for symbol "
|
|
|
|
|
<< (Relocation.Symbol ? Relocation.Symbol->getName()
|
|
|
|
|
: StringRef("<none>"))
|
|
|
|
|
<< " at offset 0x" << Twine::utohexstr(Relocation.Offset)
|
|
|
|
|
<< " with size "
|
|
|
|
|
<< Relocation::getSizeForType(Relocation.Type) << '\n');
|
2019-11-19 14:47:49 -08:00
|
|
|
}
|
2023-06-19 20:02:47 -07:00
|
|
|
#endif
|
[BOLT] Implement composed relocations
BOLT currently assumes (and asserts) that no two relocations can share
the same offset. Although this is true in most cases, ELF has a feature
called (not sure if this is an official term) composed relocations [1]
where multiple relocations at the same offset are combined to produce a
single value.
For example, to support label subtraction (a - b) on RISC-V, two
relocations are emitted at the same offset:
- R_RISCV_ADD32 a + 0
- R_RISCV_SUB32 b + 0
which, when combined, will produce the value of (a - b).
To support this in BOLT, first, RelocationSetType in BinarySection is
changed to be a multiset in order to allow it to store multiple
relocations at the same offset.
Next, Relocation::emit() is changed to receive an iterator pair of
relocations. In most cases, these will point to a single relocation in
which case its behavior is unaltered by this patch. For composed
relocations, they should point to all relocations at the same offset and
the following happens:
- A new method Relocation::createExpr() is called for every relocation.
This method is essentially the same as the original emit() except that
it returns the MCExpr without emitting it.
- The MCExprs of relocations i and i+1 are combined using the opcode
returned by the new method Relocation::getComposeOpcodeFor().
- After combining all MCExprs, the last one is emitted.
Note that in the current patch, getComposeOpcodeFor() simply calls
llvm_unreachable() since none of the current targets use composed
relocations. This will change once the RISC-V target lands.
Finally, BinarySection::emitAsData() is updated to group relocations by
offset and emit them all at once.
Note that this means composed relocations are only supported in data
sections. Since this is the only place they seem to be used in RISC-V, I
believe it's reasonable to only support them there for now to avoid
further code complexity.
[1]: https://www.sco.com/developers/gabi/latest/ch4.reloc.html
Reviewed By: rafauler
Differential Revision: https://reviews.llvm.org/D146546
2023-06-19 16:51:43 +02:00
|
|
|
|
|
|
|
|
size_t RelocationSize = Relocation::emit(ROI, ROE, &Streamer);
|
2019-11-19 14:47:49 -08:00
|
|
|
SectionOffset += RelocationSize;
|
|
|
|
|
}
|
|
|
|
|
assert(SectionOffset <= SectionContents.size() && "overflow error");
|
2021-12-20 11:07:46 -08:00
|
|
|
if (SectionOffset < SectionContents.size())
|
2020-12-01 16:29:39 -08:00
|
|
|
Streamer.emitBytes(SectionContents.substr(SectionOffset));
|
2019-11-19 14:47:49 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (BC.HasRelocations && opts::HotData && isReordered())
|
2020-12-01 16:29:39 -08:00
|
|
|
Streamer.emitLabel(BC.Ctx->getOrCreateSymbol("__hot_data_end"));
|
2019-11-19 14:47:49 -08:00
|
|
|
}
|
|
|
|
|
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
void BinarySection::flushPendingRelocations(raw_pwrite_stream &OS,
|
|
|
|
|
SymbolResolverFuncTy Resolver) {
|
|
|
|
|
if (PendingRelocations.empty() && Patches.empty())
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
const uint64_t SectionAddress = getAddress();
|
|
|
|
|
|
|
|
|
|
// We apply relocations to original section contents. For allocatable sections
|
|
|
|
|
// this means using their input file offsets, since the output file offset
|
|
|
|
|
// could change (e.g. for new instance of .text). For non-allocatable
|
|
|
|
|
// sections, the output offset should always be a valid one.
|
2021-12-14 16:52:51 -08:00
|
|
|
const uint64_t SectionFileOffset =
|
|
|
|
|
isAllocatable() ? getInputFileOffset() : getOutputFileOffset();
|
2020-12-01 16:29:39 -08:00
|
|
|
LLVM_DEBUG(
|
|
|
|
|
dbgs() << "BOLT-DEBUG: flushing pending relocations for section "
|
|
|
|
|
<< getName() << '\n'
|
|
|
|
|
<< " address: 0x" << Twine::utohexstr(SectionAddress) << '\n'
|
|
|
|
|
<< " offset: 0x" << Twine::utohexstr(SectionFileOffset) << '\n');
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
|
2021-12-20 11:07:46 -08:00
|
|
|
for (BinaryPatch &Patch : Patches)
|
2021-12-14 16:52:51 -08:00
|
|
|
OS.pwrite(Patch.Bytes.data(), Patch.Bytes.size(),
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
SectionFileOffset + Patch.Offset);
|
|
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
for (Relocation &Reloc : PendingRelocations) {
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
uint64_t Value = Reloc.Addend;
|
|
|
|
|
if (Reloc.Symbol)
|
|
|
|
|
Value += Resolver(Reloc.Symbol);
|
2021-12-08 16:53:38 +03:00
|
|
|
|
2023-05-10 17:56:43 -07:00
|
|
|
Value = Relocation::encodeValue(Reloc.Type, Value,
|
2021-12-08 16:53:38 +03:00
|
|
|
SectionAddress + Reloc.Offset);
|
|
|
|
|
|
|
|
|
|
OS.pwrite(reinterpret_cast<const char *>(&Value),
|
|
|
|
|
Relocation::getSizeForType(Reloc.Type),
|
|
|
|
|
SectionFileOffset + Reloc.Offset);
|
|
|
|
|
|
2020-12-01 16:29:39 -08:00
|
|
|
LLVM_DEBUG(
|
|
|
|
|
dbgs() << "BOLT-DEBUG: writing value 0x" << Twine::utohexstr(Value)
|
|
|
|
|
<< " of size " << Relocation::getSizeForType(Reloc.Type)
|
|
|
|
|
<< " at section offset 0x" << Twine::utohexstr(Reloc.Offset)
|
|
|
|
|
<< " address 0x"
|
|
|
|
|
<< Twine::utohexstr(SectionAddress + Reloc.Offset)
|
|
|
|
|
<< " file offset 0x"
|
|
|
|
|
<< Twine::utohexstr(SectionFileOffset + Reloc.Offset) << '\n';);
|
2019-11-20 00:16:19 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
clearList(PendingRelocations);
|
|
|
|
|
}
|
|
|
|
|
|
2018-02-01 16:33:43 -08:00
|
|
|
BinarySection::~BinarySection() {
|
2018-04-20 20:03:31 -07:00
|
|
|
if (isReordered()) {
|
|
|
|
|
delete[] getData();
|
|
|
|
|
return;
|
|
|
|
|
}
|
2019-05-23 15:49:36 -07:00
|
|
|
|
2022-09-09 18:06:13 -07:00
|
|
|
if (!isAllocatable() && !hasValidSectionID() &&
|
2018-02-01 16:33:43 -08:00
|
|
|
(!hasSectionRef() ||
|
|
|
|
|
OutputContents.data() != getContents(Section).data())) {
|
|
|
|
|
delete[] getOutputData();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2021-12-14 16:52:51 -08:00
|
|
|
void BinarySection::clearRelocations() { clearList(Relocations); }
|
[BOLT] Support for lite mode with relocations
Summary:
Add '-lite' support for relocations for improved processing time,
memory consumption, and more resilient processing of binaries with
embedded assembly code.
In lite relocation mode, BOLT will skip full processing of functions
without a profile. It will run scanExternalRefs() on such functions
to discover external references and to create internal relocations
to update references to optimized functions.
Note that we could have relied on the compiler/linker to provide
relocations for function references. However, there's no assurance
that all such references are reported. E.g., the compiler can resolve
inter-procedural references internally, leaving no relocations
for the linker.
The scan process takes about <10 seconds per 100MB of code on modern
hardware. It's a reasonable overhead to live with considering the
flexibility it provides.
If BOLT fails to scan or disassemble a function, .e.g., due to a data
object embedded in code, or an unsupported instruction, it enables a
patching mode to guarantee that the failed function will call
optimized/moved versions of functions. The patching happens at original
function entry points.
'-skip=<func1,func2,...>' option now can be used to skip processing of
arbitrary functions in the relocation mode.
With '-use-old-text' or '-strict' we require all functions to be
processed. As such, it is incompatible with '-lite' option,
and '-skip' option will only disable optimizations of listed
functions, not their disassembly and emission.
(cherry picked from FBD22040717)
2020-06-15 00:15:47 -07:00
|
|
|
|
2018-02-01 16:33:43 -08:00
|
|
|
void BinarySection::print(raw_ostream &OS) const {
|
|
|
|
|
OS << getName() << ", "
|
2021-12-14 16:52:51 -08:00
|
|
|
<< "0x" << Twine::utohexstr(getAddress()) << ", " << getSize() << " (0x"
|
|
|
|
|
<< Twine::utohexstr(getOutputAddress()) << ", " << getOutputSize() << ")"
|
|
|
|
|
<< ", data = " << getData() << ", output data = " << getOutputData();
|
2018-02-01 16:33:43 -08:00
|
|
|
|
|
|
|
|
if (isAllocatable())
|
|
|
|
|
OS << " (allocatable)";
|
|
|
|
|
|
2017-11-14 20:05:11 -08:00
|
|
|
if (isVirtual())
|
|
|
|
|
OS << " (virtual)";
|
|
|
|
|
|
|
|
|
|
if (isTLS())
|
|
|
|
|
OS << " (tls)";
|
|
|
|
|
|
2021-12-20 11:07:46 -08:00
|
|
|
if (opts::PrintRelocations)
|
2021-04-08 00:19:26 -07:00
|
|
|
for (const Relocation &R : relocations())
|
2018-02-01 16:33:43 -08:00
|
|
|
OS << "\n " << R;
|
|
|
|
|
}
|
2018-04-20 20:03:31 -07:00
|
|
|
|
2021-05-12 23:29:04 -07:00
|
|
|
BinarySection::RelocationSetType
|
|
|
|
|
BinarySection::reorderRelocations(bool Inplace) const {
|
2018-04-20 20:03:31 -07:00
|
|
|
assert(PendingRelocations.empty() &&
|
2023-11-09 13:29:46 -06:00
|
|
|
"reordering pending relocations not supported");
|
2021-05-12 23:29:04 -07:00
|
|
|
RelocationSetType NewRelocations;
|
2021-04-08 00:19:26 -07:00
|
|
|
for (const Relocation &Rel : relocations()) {
|
|
|
|
|
uint64_t RelAddr = Rel.Offset + getAddress();
|
|
|
|
|
BinaryData *BD = BC.getBinaryDataContainingAddress(RelAddr);
|
2018-04-20 20:03:31 -07:00
|
|
|
BD = BD->getAtomicRoot();
|
|
|
|
|
assert(BD);
|
|
|
|
|
|
|
|
|
|
if ((!BD->isMoved() && !Inplace) || BD->isJumpTable())
|
|
|
|
|
continue;
|
|
|
|
|
|
2021-04-08 00:19:26 -07:00
|
|
|
Relocation NewRel(Rel);
|
|
|
|
|
uint64_t RelOffset = RelAddr - BD->getAddress();
|
2018-04-20 20:03:31 -07:00
|
|
|
NewRel.Offset = BD->getOutputOffset() + RelOffset;
|
|
|
|
|
assert(NewRel.Offset < getSize());
|
2020-12-01 16:29:39 -08:00
|
|
|
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: moving " << Rel << " -> " << NewRel
|
|
|
|
|
<< "\n");
|
[BOLT] Implement composed relocations
BOLT currently assumes (and asserts) that no two relocations can share
the same offset. Although this is true in most cases, ELF has a feature
called (not sure if this is an official term) composed relocations [1]
where multiple relocations at the same offset are combined to produce a
single value.
For example, to support label subtraction (a - b) on RISC-V, two
relocations are emitted at the same offset:
- R_RISCV_ADD32 a + 0
- R_RISCV_SUB32 b + 0
which, when combined, will produce the value of (a - b).
To support this in BOLT, first, RelocationSetType in BinarySection is
changed to be a multiset in order to allow it to store multiple
relocations at the same offset.
Next, Relocation::emit() is changed to receive an iterator pair of
relocations. In most cases, these will point to a single relocation in
which case its behavior is unaltered by this patch. For composed
relocations, they should point to all relocations at the same offset and
the following happens:
- A new method Relocation::createExpr() is called for every relocation.
This method is essentially the same as the original emit() except that
it returns the MCExpr without emitting it.
- The MCExprs of relocations i and i+1 are combined using the opcode
returned by the new method Relocation::getComposeOpcodeFor().
- After combining all MCExprs, the last one is emitted.
Note that in the current patch, getComposeOpcodeFor() simply calls
llvm_unreachable() since none of the current targets use composed
relocations. This will change once the RISC-V target lands.
Finally, BinarySection::emitAsData() is updated to group relocations by
offset and emit them all at once.
Note that this means composed relocations are only supported in data
sections. Since this is the only place they seem to be used in RISC-V, I
believe it's reasonable to only support them there for now to avoid
further code complexity.
[1]: https://www.sco.com/developers/gabi/latest/ch4.reloc.html
Reviewed By: rafauler
Differential Revision: https://reviews.llvm.org/D146546
2023-06-19 16:51:43 +02:00
|
|
|
NewRelocations.emplace(std::move(NewRel));
|
2018-04-20 20:03:31 -07:00
|
|
|
}
|
|
|
|
|
return NewRelocations;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void BinarySection::reorderContents(const std::vector<BinaryData *> &Order,
|
|
|
|
|
bool Inplace) {
|
|
|
|
|
IsReordered = true;
|
|
|
|
|
|
|
|
|
|
Relocations = reorderRelocations(Inplace);
|
|
|
|
|
|
|
|
|
|
std::string Str;
|
|
|
|
|
raw_string_ostream OS(Str);
|
2021-04-08 00:19:26 -07:00
|
|
|
const char *Src = Contents.data();
|
2020-12-01 16:29:39 -08:00
|
|
|
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: reorderContents for " << Name << "\n");
|
2021-04-08 00:19:26 -07:00
|
|
|
for (BinaryData *BD : Order) {
|
2018-04-20 20:03:31 -07:00
|
|
|
assert((BD->isMoved() || !Inplace) && !BD->isJumpTable());
|
|
|
|
|
assert(BD->isAtomic() && BD->isMoveable());
|
2021-04-08 00:19:26 -07:00
|
|
|
const uint64_t SrcOffset = BD->getAddress() - getAddress();
|
2018-04-20 20:03:31 -07:00
|
|
|
assert(SrcOffset < Contents.size());
|
|
|
|
|
assert(SrcOffset == BD->getOffset());
|
2021-12-20 11:07:46 -08:00
|
|
|
while (OS.tell() < BD->getOutputOffset())
|
2018-04-20 20:03:31 -07:00
|
|
|
OS.write((unsigned char)0);
|
2020-12-01 16:29:39 -08:00
|
|
|
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: " << BD->getName() << " @ " << OS.tell()
|
|
|
|
|
<< "\n");
|
2018-04-20 20:03:31 -07:00
|
|
|
OS.write(&Src[SrcOffset], BD->getOutputSize());
|
|
|
|
|
}
|
|
|
|
|
if (Relocations.empty()) {
|
|
|
|
|
// If there are no existing relocations, tack a phony one at the end
|
|
|
|
|
// of the reordered segment to force LLVM to recognize and map this
|
|
|
|
|
// section.
|
2021-04-08 00:19:26 -07:00
|
|
|
MCSymbol *ZeroSym = BC.registerNameAtAddress("Zero", 0, 0, 0);
|
2023-02-15 17:18:37 +04:00
|
|
|
addRelocation(OS.tell(), ZeroSym, Relocation::getAbs64(), 0xdeadbeef);
|
2018-04-20 20:03:31 -07:00
|
|
|
|
|
|
|
|
uint64_t Zero = 0;
|
|
|
|
|
OS.write(reinterpret_cast<const char *>(&Zero), sizeof(Zero));
|
|
|
|
|
}
|
|
|
|
|
auto *NewData = reinterpret_cast<char *>(copyByteArray(OS.str()));
|
|
|
|
|
Contents = OutputContents = StringRef(NewData, OS.str().size());
|
|
|
|
|
OutputSize = Contents.size();
|
|
|
|
|
}
|
2019-08-02 11:20:13 -07:00
|
|
|
|
|
|
|
|
std::string BinarySection::encodeELFNote(StringRef NameStr, StringRef DescStr,
|
|
|
|
|
uint32_t Type) {
|
|
|
|
|
std::string Str;
|
|
|
|
|
raw_string_ostream OS(Str);
|
|
|
|
|
const uint32_t NameSz = NameStr.size() + 1;
|
|
|
|
|
const uint32_t DescSz = DescStr.size();
|
|
|
|
|
OS.write(reinterpret_cast<const char *>(&(NameSz)), 4);
|
|
|
|
|
OS.write(reinterpret_cast<const char *>(&(DescSz)), 4);
|
|
|
|
|
OS.write(reinterpret_cast<const char *>(&(Type)), 4);
|
|
|
|
|
OS << NameStr << '\0';
|
2021-12-20 11:07:46 -08:00
|
|
|
for (uint64_t I = NameSz; I < alignTo(NameSz, 4); ++I)
|
2019-08-02 11:20:13 -07:00
|
|
|
OS << '\0';
|
|
|
|
|
OS << DescStr;
|
2021-12-20 11:07:46 -08:00
|
|
|
for (uint64_t I = DescStr.size(); I < alignTo(DescStr.size(), 4); ++I)
|
2019-08-02 11:20:13 -07:00
|
|
|
OS << '\0';
|
|
|
|
|
return OS.str();
|
|
|
|
|
}
|