2020-04-02 11:54:05 -07:00
|
|
|
//===- SymbolTable.cpp ----------------------------------------------------===//
|
|
|
|
|
//
|
|
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
|
|
|
//
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
|
|
#include "SymbolTable.h"
|
2021-07-15 12:54:42 -04:00
|
|
|
#include "ConcatOutputSection.h"
|
2020-12-13 19:31:33 -08:00
|
|
|
#include "Config.h"
|
2020-04-02 11:54:05 -07:00
|
|
|
#include "InputFiles.h"
|
[lld-macho] Associate compact unwind entries with function symbols
Compact unwind entries (CUEs) contain pointers to their respective
function symbols. However, during the link process, it's far more useful
to have pointers from the function symbol to the CUE than vice versa.
This diff adds that pointer in the form of `Defined::compactUnwind`.
In particular, when doing dead-stripping, we want to mark CUEs live when
their function symbol is live; and when doing ICF, we want to dedup
sections iff the symbols in that section have identical CUEs. In both
cases, we want to be able to locate the symbols within a given section,
as well as locate the CUEs belonging to those symbols. So this diff also
adds `InputSection::symbols`.
The ultimate goal of this refactor is to have ICF support dedup'ing
functions with unwind info, but that will be handled in subsequent
diffs. This diff focuses on simplifying `-dead_strip` --
`findFunctionsWithUnwindInfo` is no longer necessary, and
`Defined::isLive()` is now a lot simpler. Moreover, UnwindInfoSection no
longer has to check for dead CUEs -- we simply avoid adding them in the
first place.
Additionally, we now support stripping of dead LSDAs, which follows
quite naturally since `markLive()` can now reach them via the CUEs.
Reviewed By: #lld-macho, gkm
Differential Revision: https://reviews.llvm.org/D109944
2021-10-26 16:04:04 -04:00
|
|
|
#include "InputSection.h"
|
2020-04-02 11:54:05 -07:00
|
|
|
#include "Symbols.h"
|
2021-07-15 12:54:42 -04:00
|
|
|
#include "SyntheticSections.h"
|
2020-04-02 11:54:05 -07:00
|
|
|
#include "lld/Common/ErrorHandler.h"
|
|
|
|
|
#include "lld/Common/Memory.h"
|
2022-10-14 15:28:19 -04:00
|
|
|
#include "llvm/Demangle/Demangle.h"
|
2020-04-02 11:54:05 -07:00
|
|
|
|
|
|
|
|
using namespace llvm;
|
|
|
|
|
using namespace lld;
|
|
|
|
|
using namespace lld::macho;
|
|
|
|
|
|
2021-03-10 16:45:18 -08:00
|
|
|
Symbol *SymbolTable::find(CachedHashStringRef cachedName) {
|
|
|
|
|
auto it = symMap.find(cachedName);
|
2020-04-02 11:54:05 -07:00
|
|
|
if (it == symMap.end())
|
|
|
|
|
return nullptr;
|
|
|
|
|
return symVector[it->second];
|
|
|
|
|
}
|
|
|
|
|
|
2021-04-15 21:14:29 -04:00
|
|
|
std::pair<Symbol *, bool> SymbolTable::insert(StringRef name,
|
|
|
|
|
const InputFile *file) {
|
2020-04-02 11:54:05 -07:00
|
|
|
auto p = symMap.insert({CachedHashStringRef(name), (int)symVector.size()});
|
|
|
|
|
|
2021-04-15 21:14:29 -04:00
|
|
|
Symbol *sym;
|
|
|
|
|
if (!p.second) {
|
|
|
|
|
// Name already present in the symbol table.
|
|
|
|
|
sym = symVector[p.first->second];
|
|
|
|
|
} else {
|
|
|
|
|
// Name is a new symbol.
|
|
|
|
|
sym = reinterpret_cast<Symbol *>(make<SymbolUnion>());
|
|
|
|
|
symVector.push_back(sym);
|
|
|
|
|
}
|
2020-04-02 11:54:05 -07:00
|
|
|
|
2021-04-15 21:14:29 -04:00
|
|
|
sym->isUsedInRegularObj |= !file || isa<ObjFile>(file);
|
|
|
|
|
return {sym, p.second};
|
2020-04-02 11:54:05 -07:00
|
|
|
}
|
|
|
|
|
|
2022-09-27 23:42:47 -07:00
|
|
|
namespace {
|
|
|
|
|
struct DuplicateSymbolDiag {
|
|
|
|
|
// Pair containing source location and source file
|
|
|
|
|
const std::pair<std::string, std::string> src1;
|
|
|
|
|
const std::pair<std::string, std::string> src2;
|
|
|
|
|
const Symbol *sym;
|
|
|
|
|
|
|
|
|
|
DuplicateSymbolDiag(const std::pair<std::string, std::string> src1,
|
|
|
|
|
const std::pair<std::string, std::string> src2,
|
|
|
|
|
const Symbol *sym)
|
|
|
|
|
: src1(src1), src2(src2), sym(sym) {}
|
|
|
|
|
};
|
|
|
|
|
SmallVector<DuplicateSymbolDiag> dupSymDiags;
|
|
|
|
|
} // namespace
|
|
|
|
|
|
[lld:MachO] Allow independent override of weak symbols aliased via .set (#167825)
Currently, if multiple external weak symbols are defined at the same
address in an object file (e.g., by using the .set assembler directive
to alias them to a single weak variable), ld64.lld treats them as a
single unit. When any one of these symbols is overridden by a strong
definition, all of the original weak symbols resolve to the strong
definition.
This patch changes the behavior in `transplantSymbolsAtOffset`. When a
weak symbol is being replaced by a strong one, only non-external (local)
symbols at the same offset are moved to the new symbol's section. Other
*external* symbols are no longer transplanted.
This allows each external weak symbol to be overridden independently.
This behavior is consistent with Apple's ld-classic, but diverges from
ld-prime in one case, as noted on
https://github.com/llvm/llvm-project/issues/167262 (this discrepancy has
recently been reported to Apple).
### Backward Compatibility
This change alters linker behavior for a specific scenario. The creation
of multiple external weak symbols aliased to the same address via
assembler directives is primarily an advanced technique. It's unlikely
that existing builds rely on the current behavior of all aliases being
overridden together.
If there are concerns, this could be put behind a linker option, but the
new default seems more correct, less surprising, and is consistent with
ld-classic.
### Testing
The new lit test `test/MachO/weak-alias-override.s` verifies this
behavior using llvm-nm.
Fixes #167262
2025-11-21 17:03:40 -08:00
|
|
|
// Move local symbols at \p fromOff in \p fromIsec into \p toIsec, unless that
|
|
|
|
|
// symbol is \p skip, in which case we just remove it.
|
2023-03-10 22:28:36 -05:00
|
|
|
static void transplantSymbolsAtOffset(InputSection *fromIsec,
|
|
|
|
|
InputSection *toIsec, Defined *skip,
|
|
|
|
|
uint64_t fromOff, uint64_t toOff) {
|
|
|
|
|
// Ensure the symbols will still be in address order after our insertions.
|
2024-12-22 21:50:15 +08:00
|
|
|
auto symSucceedsOff = [](uint64_t off, const Symbol *s) {
|
|
|
|
|
return cast<Defined>(s)->value > off;
|
|
|
|
|
};
|
|
|
|
|
assert(std::is_partitioned(toIsec->symbols.begin(), toIsec->symbols.end(),
|
|
|
|
|
[symSucceedsOff, toOff](const Symbol *s) {
|
|
|
|
|
return !symSucceedsOff(toOff, s);
|
|
|
|
|
}) &&
|
|
|
|
|
"Symbols in toIsec must be partitioned by toOff.");
|
|
|
|
|
auto insertIt = llvm::upper_bound(toIsec->symbols, toOff, symSucceedsOff);
|
2023-03-10 22:28:36 -05:00
|
|
|
llvm::erase_if(fromIsec->symbols, [&](Symbol *s) {
|
|
|
|
|
auto *d = cast<Defined>(s);
|
[lld:MachO] Allow independent override of weak symbols aliased via .set (#167825)
Currently, if multiple external weak symbols are defined at the same
address in an object file (e.g., by using the .set assembler directive
to alias them to a single weak variable), ld64.lld treats them as a
single unit. When any one of these symbols is overridden by a strong
definition, all of the original weak symbols resolve to the strong
definition.
This patch changes the behavior in `transplantSymbolsAtOffset`. When a
weak symbol is being replaced by a strong one, only non-external (local)
symbols at the same offset are moved to the new symbol's section. Other
*external* symbols are no longer transplanted.
This allows each external weak symbol to be overridden independently.
This behavior is consistent with Apple's ld-classic, but diverges from
ld-prime in one case, as noted on
https://github.com/llvm/llvm-project/issues/167262 (this discrepancy has
recently been reported to Apple).
### Backward Compatibility
This change alters linker behavior for a specific scenario. The creation
of multiple external weak symbols aliased to the same address via
assembler directives is primarily an advanced technique. It's unlikely
that existing builds rely on the current behavior of all aliases being
overridden together.
If there are concerns, this could be put behind a linker option, but the
new default seems more correct, less surprising, and is consistent with
ld-classic.
### Testing
The new lit test `test/MachO/weak-alias-override.s` verifies this
behavior using llvm-nm.
Fixes #167262
2025-11-21 17:03:40 -08:00
|
|
|
if (d == skip)
|
|
|
|
|
return true;
|
|
|
|
|
if (d->value != fromOff || d->isExternal())
|
2023-03-10 22:28:36 -05:00
|
|
|
return false;
|
[lld:MachO] Allow independent override of weak symbols aliased via .set (#167825)
Currently, if multiple external weak symbols are defined at the same
address in an object file (e.g., by using the .set assembler directive
to alias them to a single weak variable), ld64.lld treats them as a
single unit. When any one of these symbols is overridden by a strong
definition, all of the original weak symbols resolve to the strong
definition.
This patch changes the behavior in `transplantSymbolsAtOffset`. When a
weak symbol is being replaced by a strong one, only non-external (local)
symbols at the same offset are moved to the new symbol's section. Other
*external* symbols are no longer transplanted.
This allows each external weak symbol to be overridden independently.
This behavior is consistent with Apple's ld-classic, but diverges from
ld-prime in one case, as noted on
https://github.com/llvm/llvm-project/issues/167262 (this discrepancy has
recently been reported to Apple).
### Backward Compatibility
This change alters linker behavior for a specific scenario. The creation
of multiple external weak symbols aliased to the same address via
assembler directives is primarily an advanced technique. It's unlikely
that existing builds rely on the current behavior of all aliases being
overridden together.
If there are concerns, this could be put behind a linker option, but the
new default seems more correct, less surprising, and is consistent with
ld-classic.
### Testing
The new lit test `test/MachO/weak-alias-override.s` verifies this
behavior using llvm-nm.
Fixes #167262
2025-11-21 17:03:40 -08:00
|
|
|
|
|
|
|
|
// This repeated insertion will be quadratic unless insertIt is the end
|
|
|
|
|
// iterator. However, that is typically the case for files that have
|
|
|
|
|
// .subsections_via_symbols set.
|
|
|
|
|
insertIt = toIsec->symbols.insert(insertIt, d);
|
|
|
|
|
d->originalIsec = toIsec;
|
|
|
|
|
d->value = toOff;
|
|
|
|
|
// We don't want to have more than one unwindEntry at a given address, so
|
|
|
|
|
// drop the redundant ones. We can safely drop the unwindEntries of the
|
|
|
|
|
// symbols in fromIsec since we will be adding another unwindEntry as we
|
|
|
|
|
// finish parsing toIsec's file. (We can assume that toIsec has its own
|
|
|
|
|
// unwindEntry because of the ODR.)
|
|
|
|
|
d->originalUnwindEntry = nullptr;
|
2023-03-10 22:28:36 -05:00
|
|
|
return true;
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
2021-03-12 17:26:12 -05:00
|
|
|
Defined *SymbolTable::addDefined(StringRef name, InputFile *file,
|
2021-04-01 17:48:09 -07:00
|
|
|
InputSection *isec, uint64_t value,
|
|
|
|
|
uint64_t size, bool isWeakDef,
|
2023-05-15 02:00:29 -07:00
|
|
|
bool isPrivateExtern,
|
2021-11-08 19:50:34 -05:00
|
|
|
bool isReferencedDynamically, bool noDeadStrip,
|
|
|
|
|
bool isWeakDefCanBeHidden) {
|
2020-08-27 15:59:30 -07:00
|
|
|
bool overridesWeakDef = false;
|
2022-08-07 10:49:26 -04:00
|
|
|
auto [s, wasInserted] = insert(name, file);
|
2020-04-02 11:54:05 -07:00
|
|
|
|
2022-09-15 22:55:41 -04:00
|
|
|
assert(!file || !isa<BitcodeFile>(file) || !isec);
|
[lld/mac] Write every weak symbol only once in the output
Before this, if an inline function was defined in several input files,
lld would write each copy of the inline function the output. With this
patch, it only writes one copy.
Reduces the size of Chromium Framework from 378MB to 345MB (compared
to 290MB linked with ld64, which also does dead-stripping, which we
don't do yet), and makes linking it faster:
N Min Max Median Avg Stddev
x 10 3.9957051 4.3496981 4.1411121 4.156837 0.10092097
+ 10 3.908154 4.169318 3.9712729 3.9846753 0.075773012
Difference at 95.0% confidence
-0.172162 +/- 0.083847
-4.14165% +/- 2.01709%
(Student's t, pooled s = 0.0892373)
Implementation-wise, when merging two weak symbols, this sets a
"canOmitFromOutput" on the InputSection belonging to the weak symbol not put in
the symbol table. We then don't write InputSections that have this set, as long
as they are not referenced from other symbols. (This happens e.g. for object
files that don't set .subsections_via_symbols or that use .alt_entry.)
Some restrictions:
- not yet done for bitcode inputs
- no "comdat" handling (`kindNoneGroupSubordinate*` in ld64) --
Frame Descriptor Entries (FDEs), Language Specific Data Areas (LSDAs)
(that is, catch block unwind information) and Personality Routines
associated with weak functions still not stripped. This is wasteful,
but harmless.
- However, this does strip weaks from __unwind_info (which is needed for
correctness and not just for size)
- This nopes out on InputSections that are referenced form more than
one symbol (eg from .alt_entry) for now
Things that work based on symbols Just Work:
- map files (change in MapFile.cpp is no-op and not needed; I just
found it a bit more explicit)
- exports
Things that work with inputSections need to explicitly check if
an inputSection is written (e.g. unwind info).
This patch is useful in itself, but it's also likely also a useful foundation
for dead_strip.
I used to have a "canoncialRepresentative" pointer on InputSection instead of
just the bool, which would be handy for ICF too. But I ended up not needing it
for this patch, so I removed that again for now.
Differential Revision: https://reviews.llvm.org/D102076
2021-05-06 14:47:57 -04:00
|
|
|
|
2020-07-24 15:55:25 -07:00
|
|
|
if (!wasInserted) {
|
|
|
|
|
if (auto *defined = dyn_cast<Defined>(s)) {
|
[lld/mac] Implement support for private extern symbols
Private extern symbols are used for things scoped to the linkage unit.
They cause duplicate symbol errors (so they're in the symbol table,
unlike TU-scoped truly local symbols), but they don't make it into the
export trie. They are created e.g. by compiling with
-fvisibility=hidden.
If two weak symbols have differing privateness, the combined symbol is
non-private external. (Example: inline functions and some TUs that
include the header defining it were built with
-fvisibility-inlines-hidden and some weren't).
A weak private external symbol implicitly has its "weak" dropped and
behaves like a regular strong private external symbol: Weak is an export
trie concept, and private symbols are not in the export trie.
If a weak and a strong symbol have different privateness, the strong
symbol wins.
If two common symbols have differing privateness, the larger symbol
wins. If they have the same size, the privateness of the symbol seen
later during the link wins (!) -- this is a bit lame, but it matches
ld64 and this behavior takes 2 lines less to implement than the less
surprising "result is non-private external), so match ld64.
(Example: `int a` in two .c files, both built with -fcommon,
one built with -fvisibility=hidden and one without.)
This also makes `__dyld_private` a true TU-local symbol, matching ld64.
To make this work, make the `const char*` StringRefZ ctor to correctly
set `size` (without this, writing the string table crashed when calling
getName() on the __dyld_private symbol).
Mention in CommonSymbol's comment that common symbols are now disabled
by default in clang.
Mention in -keep_private_externs's HelpText that the flag only has an
effect with `-r` (which we don't implement yet -- so this patch here
doesn't regress any behavior around -r + -keep_private_externs)). ld64
doesn't explicitly document it, but the commit text of
http://reviews.llvm.org/rL216146 does, and ld64's
OutputFile::buildSymbolTable() checks `_options.outputKind() ==
Options::kObjectFile` before calling `_options.keepPrivateExterns()`
(the only reference to that function).
Fixes PR48536.
Differential Revision: https://reviews.llvm.org/D93609
2020-12-17 13:30:18 -05:00
|
|
|
if (isWeakDef) {
|
2021-11-09 10:42:21 -05:00
|
|
|
// See further comment in createDefined() in InputFiles.cpp
|
[lld/mac] Write every weak symbol only once in the output
Before this, if an inline function was defined in several input files,
lld would write each copy of the inline function the output. With this
patch, it only writes one copy.
Reduces the size of Chromium Framework from 378MB to 345MB (compared
to 290MB linked with ld64, which also does dead-stripping, which we
don't do yet), and makes linking it faster:
N Min Max Median Avg Stddev
x 10 3.9957051 4.3496981 4.1411121 4.156837 0.10092097
+ 10 3.908154 4.169318 3.9712729 3.9846753 0.075773012
Difference at 95.0% confidence
-0.172162 +/- 0.083847
-4.14165% +/- 2.01709%
(Student's t, pooled s = 0.0892373)
Implementation-wise, when merging two weak symbols, this sets a
"canOmitFromOutput" on the InputSection belonging to the weak symbol not put in
the symbol table. We then don't write InputSections that have this set, as long
as they are not referenced from other symbols. (This happens e.g. for object
files that don't set .subsections_via_symbols or that use .alt_entry.)
Some restrictions:
- not yet done for bitcode inputs
- no "comdat" handling (`kindNoneGroupSubordinate*` in ld64) --
Frame Descriptor Entries (FDEs), Language Specific Data Areas (LSDAs)
(that is, catch block unwind information) and Personality Routines
associated with weak functions still not stripped. This is wasteful,
but harmless.
- However, this does strip weaks from __unwind_info (which is needed for
correctness and not just for size)
- This nopes out on InputSections that are referenced form more than
one symbol (eg from .alt_entry) for now
Things that work based on symbols Just Work:
- map files (change in MapFile.cpp is no-op and not needed; I just
found it a bit more explicit)
- exports
Things that work with inputSections need to explicitly check if
an inputSection is written (e.g. unwind info).
This patch is useful in itself, but it's also likely also a useful foundation
for dead_strip.
I used to have a "canoncialRepresentative" pointer on InputSection instead of
just the bool, which would be handy for ICF too. But I ended up not needing it
for this patch, so I removed that again for now.
Differential Revision: https://reviews.llvm.org/D102076
2021-05-06 14:47:57 -04:00
|
|
|
if (defined->isWeakDef()) {
|
[lld/mac] Implement support for private extern symbols
Private extern symbols are used for things scoped to the linkage unit.
They cause duplicate symbol errors (so they're in the symbol table,
unlike TU-scoped truly local symbols), but they don't make it into the
export trie. They are created e.g. by compiling with
-fvisibility=hidden.
If two weak symbols have differing privateness, the combined symbol is
non-private external. (Example: inline functions and some TUs that
include the header defining it were built with
-fvisibility-inlines-hidden and some weren't).
A weak private external symbol implicitly has its "weak" dropped and
behaves like a regular strong private external symbol: Weak is an export
trie concept, and private symbols are not in the export trie.
If a weak and a strong symbol have different privateness, the strong
symbol wins.
If two common symbols have differing privateness, the larger symbol
wins. If they have the same size, the privateness of the symbol seen
later during the link wins (!) -- this is a bit lame, but it matches
ld64 and this behavior takes 2 lines less to implement than the less
surprising "result is non-private external), so match ld64.
(Example: `int a` in two .c files, both built with -fcommon,
one built with -fvisibility=hidden and one without.)
This also makes `__dyld_private` a true TU-local symbol, matching ld64.
To make this work, make the `const char*` StringRefZ ctor to correctly
set `size` (without this, writing the string table crashed when calling
getName() on the __dyld_private symbol).
Mention in CommonSymbol's comment that common symbols are now disabled
by default in clang.
Mention in -keep_private_externs's HelpText that the flag only has an
effect with `-r` (which we don't implement yet -- so this patch here
doesn't regress any behavior around -r + -keep_private_externs)). ld64
doesn't explicitly document it, but the commit text of
http://reviews.llvm.org/rL216146 does, and ld64's
OutputFile::buildSymbolTable() checks `_options.outputKind() ==
Options::kObjectFile` before calling `_options.keepPrivateExterns()`
(the only reference to that function).
Fixes PR48536.
Differential Revision: https://reviews.llvm.org/D93609
2020-12-17 13:30:18 -05:00
|
|
|
defined->privateExtern &= isPrivateExtern;
|
2021-11-08 19:50:34 -05:00
|
|
|
defined->weakDefCanBeHidden &= isWeakDefCanBeHidden;
|
2021-05-17 09:15:39 -04:00
|
|
|
defined->referencedDynamically |= isReferencedDynamically;
|
[lld/mac] Implement -dead_strip
Also adds support for live_support sections, no_dead_strip sections,
.no_dead_strip symbols.
Chromium Framework 345MB unstripped -> 250MB stripped
(vs 290MB unstripped -> 236M stripped with ld64).
Doing dead stripping is a bit faster than not, because so much less
data needs to be processed:
% ministat lld_*
x lld_nostrip.txt
+ lld_strip.txt
N Min Max Median Avg Stddev
x 10 3.929414 4.07692 4.0269079 4.0089678 0.044214794
+ 10 3.8129408 3.9025559 3.8670411 3.8642573 0.024779651
Difference at 95.0% confidence
-0.144711 +/- 0.0336749
-3.60967% +/- 0.839989%
(Student's t, pooled s = 0.0358398)
This interacts with many parts of the linker. I tried to add test coverage
for all added `isLive()` checks, so that some test will fail if any of them
is removed. I checked that the test expectations for the most part match
ld64's behavior (except for live-support-iterations.s, see the comment
in the test). Interacts with:
- debug info
- export tries
- import opcodes
- flags like -exported_symbol(s_list)
- -U / dynamic_lookup
- mod_init_funcs, mod_term_funcs
- weak symbol handling
- unwind info
- stubs
- map files
- -sectcreate
- undefined, dylib, common, defined (both absolute and normal) symbols
It's possible it interacts with more features I didn't think of,
of course.
I also did some manual testing:
- check-llvm check-clang check-lld work with lld with this patch
as host linker and -dead_strip enabled
- Chromium still starts
- Chromium's base_unittests still pass, including unwind tests
Implemenation-wise, this is InputSection-based, so it'll work for
object files with .subsections_via_symbols (which includes all
object files generated by clang). I first based this on the COFF
implementation, but later realized that things are more similar to ELF.
I think it'd be good to refactor MarkLive.cpp to look more like the ELF
part at some point, but I'd like to get a working state checked in first.
Mechanical parts:
- Rename canOmitFromOutput to wasCoalesced (no behavior change)
since it really is for weak coalesced symbols
- Add noDeadStrip to Defined, corresponding to N_NO_DEAD_STRIP
(`.no_dead_strip` in asm)
Fixes PR49276.
Differential Revision: https://reviews.llvm.org/D103324
2021-05-07 17:10:05 -04:00
|
|
|
defined->noDeadStrip |= noDeadStrip;
|
[lld/mac] Write every weak symbol only once in the output
Before this, if an inline function was defined in several input files,
lld would write each copy of the inline function the output. With this
patch, it only writes one copy.
Reduces the size of Chromium Framework from 378MB to 345MB (compared
to 290MB linked with ld64, which also does dead-stripping, which we
don't do yet), and makes linking it faster:
N Min Max Median Avg Stddev
x 10 3.9957051 4.3496981 4.1411121 4.156837 0.10092097
+ 10 3.908154 4.169318 3.9712729 3.9846753 0.075773012
Difference at 95.0% confidence
-0.172162 +/- 0.083847
-4.14165% +/- 2.01709%
(Student's t, pooled s = 0.0892373)
Implementation-wise, when merging two weak symbols, this sets a
"canOmitFromOutput" on the InputSection belonging to the weak symbol not put in
the symbol table. We then don't write InputSections that have this set, as long
as they are not referenced from other symbols. (This happens e.g. for object
files that don't set .subsections_via_symbols or that use .alt_entry.)
Some restrictions:
- not yet done for bitcode inputs
- no "comdat" handling (`kindNoneGroupSubordinate*` in ld64) --
Frame Descriptor Entries (FDEs), Language Specific Data Areas (LSDAs)
(that is, catch block unwind information) and Personality Routines
associated with weak functions still not stripped. This is wasteful,
but harmless.
- However, this does strip weaks from __unwind_info (which is needed for
correctness and not just for size)
- This nopes out on InputSections that are referenced form more than
one symbol (eg from .alt_entry) for now
Things that work based on symbols Just Work:
- map files (change in MapFile.cpp is no-op and not needed; I just
found it a bit more explicit)
- exports
Things that work with inputSections need to explicitly check if
an inputSection is written (e.g. unwind info).
This patch is useful in itself, but it's also likely also a useful foundation
for dead_strip.
I used to have a "canoncialRepresentative" pointer on InputSection instead of
just the bool, which would be handy for ICF too. But I ended up not needing it
for this patch, so I removed that again for now.
Differential Revision: https://reviews.llvm.org/D102076
2021-05-06 14:47:57 -04:00
|
|
|
}
|
2023-03-10 22:28:36 -05:00
|
|
|
if (auto concatIsec = dyn_cast_or_null<ConcatInputSection>(isec)) {
|
2021-11-09 10:42:21 -05:00
|
|
|
concatIsec->wasCoalesced = true;
|
2023-03-10 22:28:36 -05:00
|
|
|
// Any local symbols that alias the coalesced symbol should be moved
|
|
|
|
|
// into the prevailing section. Note that we have sorted the symbols
|
|
|
|
|
// in ObjFile::parseSymbols() such that extern weak symbols appear
|
|
|
|
|
// last, so we don't need to worry about subsequent symbols being
|
|
|
|
|
// added to an already-coalesced section.
|
2024-04-18 11:42:22 -07:00
|
|
|
if (defined->isec())
|
|
|
|
|
transplantSymbolsAtOffset(concatIsec, defined->isec(),
|
2023-03-10 22:28:36 -05:00
|
|
|
/*skip=*/nullptr, value, defined->value);
|
|
|
|
|
}
|
2021-03-12 17:26:12 -05:00
|
|
|
return defined;
|
[lld/mac] Implement support for private extern symbols
Private extern symbols are used for things scoped to the linkage unit.
They cause duplicate symbol errors (so they're in the symbol table,
unlike TU-scoped truly local symbols), but they don't make it into the
export trie. They are created e.g. by compiling with
-fvisibility=hidden.
If two weak symbols have differing privateness, the combined symbol is
non-private external. (Example: inline functions and some TUs that
include the header defining it were built with
-fvisibility-inlines-hidden and some weren't).
A weak private external symbol implicitly has its "weak" dropped and
behaves like a regular strong private external symbol: Weak is an export
trie concept, and private symbols are not in the export trie.
If a weak and a strong symbol have different privateness, the strong
symbol wins.
If two common symbols have differing privateness, the larger symbol
wins. If they have the same size, the privateness of the symbol seen
later during the link wins (!) -- this is a bit lame, but it matches
ld64 and this behavior takes 2 lines less to implement than the less
surprising "result is non-private external), so match ld64.
(Example: `int a` in two .c files, both built with -fcommon,
one built with -fvisibility=hidden and one without.)
This also makes `__dyld_private` a true TU-local symbol, matching ld64.
To make this work, make the `const char*` StringRefZ ctor to correctly
set `size` (without this, writing the string table crashed when calling
getName() on the __dyld_private symbol).
Mention in CommonSymbol's comment that common symbols are now disabled
by default in clang.
Mention in -keep_private_externs's HelpText that the flag only has an
effect with `-r` (which we don't implement yet -- so this patch here
doesn't regress any behavior around -r + -keep_private_externs)). ld64
doesn't explicitly document it, but the commit text of
http://reviews.llvm.org/rL216146 does, and ld64's
OutputFile::buildSymbolTable() checks `_options.outputKind() ==
Options::kObjectFile` before calling `_options.keepPrivateExterns()`
(the only reference to that function).
Fixes PR48536.
Differential Revision: https://reviews.llvm.org/D93609
2020-12-17 13:30:18 -05:00
|
|
|
}
|
2021-11-09 10:42:21 -05:00
|
|
|
|
|
|
|
|
if (defined->isWeakDef()) {
|
|
|
|
|
if (auto concatIsec =
|
2024-04-18 11:42:22 -07:00
|
|
|
dyn_cast_or_null<ConcatInputSection>(defined->isec())) {
|
2021-11-09 10:42:21 -05:00
|
|
|
concatIsec->wasCoalesced = true;
|
2023-03-10 22:28:36 -05:00
|
|
|
if (isec)
|
|
|
|
|
transplantSymbolsAtOffset(concatIsec, isec, defined, defined->value,
|
|
|
|
|
value);
|
2021-11-09 10:42:21 -05:00
|
|
|
}
|
|
|
|
|
} else {
|
2022-09-27 23:42:47 -07:00
|
|
|
std::string srcLoc1 = defined->getSourceLocation();
|
|
|
|
|
std::string srcLoc2 = isec ? isec->getSourceLocation(value) : "";
|
|
|
|
|
std::string srcFile1 = toString(defined->getFile());
|
|
|
|
|
std::string srcFile2 = toString(file);
|
|
|
|
|
|
|
|
|
|
dupSymDiags.push_back({make_pair(srcLoc1, srcFile1),
|
|
|
|
|
make_pair(srcLoc2, srcFile2), defined});
|
2021-11-09 10:42:21 -05:00
|
|
|
}
|
|
|
|
|
|
2020-08-27 15:59:30 -07:00
|
|
|
} else if (auto *dysym = dyn_cast<DylibSymbol>(s)) {
|
|
|
|
|
overridesWeakDef = !isWeakDef && dysym->isWeakDef();
|
2021-05-31 22:12:35 -04:00
|
|
|
dysym->unreference();
|
2022-10-21 22:48:25 -04:00
|
|
|
} else if (auto *undef = dyn_cast<Undefined>(s)) {
|
2023-08-22 12:02:52 -07:00
|
|
|
if (undef->wasBitcodeSymbol) {
|
|
|
|
|
auto objFile = dyn_cast<ObjFile>(file);
|
|
|
|
|
if (!objFile) {
|
|
|
|
|
// The file must be a native object file, as opposed to potentially
|
|
|
|
|
// being another bitcode file. A situation arises when some symbols
|
|
|
|
|
// are defined thru `module asm` and thus they are not present in the
|
|
|
|
|
// bitcode's symbol table. Consider bitcode modules `A`, `B`, and `C`.
|
|
|
|
|
// LTO compiles only `A` and `C`, since there's no explicit symbol
|
|
|
|
|
// reference to `B` other than a symbol from `A` via `module asm`.
|
|
|
|
|
// After LTO is finished, the missing symbol now appears in the
|
|
|
|
|
// resulting object file for `A`, which prematurely resolves another
|
|
|
|
|
// prevailing symbol with `B` that hasn't been compiled, instead of
|
|
|
|
|
// the resulting object for `C`. Consequently, an incorrect
|
|
|
|
|
// relocation is generated for the prevailing symbol.
|
|
|
|
|
assert(isa<BitcodeFile>(file) && "Bitcode file is expected.");
|
|
|
|
|
std::string message =
|
|
|
|
|
"The pending prevailing symbol(" + name.str() +
|
|
|
|
|
") in the bitcode file(" + toString(undef->getFile()) +
|
|
|
|
|
") is overridden by a non-native object (from bitcode): " +
|
|
|
|
|
toString(file);
|
|
|
|
|
error(message);
|
|
|
|
|
} else if (!objFile->builtFromBitcode) {
|
|
|
|
|
// Ideally, this should be an object file compiled from a bitcode
|
|
|
|
|
// file. However, this might not hold true if a LC linker option is
|
|
|
|
|
// used. In case LTO internalizes a prevailing hidden weak symbol,
|
|
|
|
|
// there's a situation where an unresolved prevailing symbol might be
|
|
|
|
|
// linked with the corresponding one from a native library, which is
|
|
|
|
|
// loaded later after LTO. Although this could potentially result in
|
|
|
|
|
// an ODR violation, we choose to permit this scenario as a warning.
|
|
|
|
|
std::string message = "The pending prevailing symbol(" + name.str() +
|
|
|
|
|
") in the bitcode file(" +
|
|
|
|
|
toString(undef->getFile()) +
|
|
|
|
|
") is overridden by a post-processed native "
|
|
|
|
|
"object (from native archive): " +
|
|
|
|
|
toString(file);
|
|
|
|
|
warn(message);
|
|
|
|
|
} else {
|
|
|
|
|
// Preserve the original bitcode file name (instead of using the
|
|
|
|
|
// object file name).
|
|
|
|
|
file = undef->getFile();
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-07-24 15:55:25 -07:00
|
|
|
}
|
|
|
|
|
// Defined symbols take priority over other types of symbols, so in case
|
|
|
|
|
// of a name conflict, we fall through to the replaceSymbol() call below.
|
|
|
|
|
}
|
|
|
|
|
|
2022-03-14 21:51:15 -04:00
|
|
|
// With -flat_namespace, all extern symbols in dylibs are interposable.
|
2025-03-28 20:05:50 +01:00
|
|
|
bool interposable = ((config->namespaceKind == NamespaceKind::flat &&
|
|
|
|
|
config->outputType != MachO::MH_EXECUTE) ||
|
|
|
|
|
config->interposable) &&
|
2022-03-14 21:51:15 -04:00
|
|
|
!isPrivateExtern;
|
2021-05-17 09:15:39 -04:00
|
|
|
Defined *defined = replaceSymbol<Defined>(
|
|
|
|
|
s, name, file, isec, value, size, isWeakDef, /*isExternal=*/true,
|
2023-05-15 02:00:29 -07:00
|
|
|
isPrivateExtern, /*includeInSymtab=*/true, isReferencedDynamically,
|
|
|
|
|
noDeadStrip, overridesWeakDef, isWeakDefCanBeHidden, interposable);
|
2021-03-12 17:26:12 -05:00
|
|
|
return defined;
|
2020-04-02 11:54:05 -07:00
|
|
|
}
|
|
|
|
|
|
2022-09-15 22:55:41 -04:00
|
|
|
Defined *SymbolTable::aliasDefined(Defined *src, StringRef target,
|
|
|
|
|
InputFile *newFile, bool makePrivateExtern) {
|
|
|
|
|
bool isPrivateExtern = makePrivateExtern || src->privateExtern;
|
2024-04-18 11:42:22 -07:00
|
|
|
return addDefined(target, newFile, src->isec(), src->value, src->size,
|
2023-05-15 02:00:29 -07:00
|
|
|
src->isWeakDef(), isPrivateExtern,
|
2022-07-16 11:26:44 -07:00
|
|
|
src->referencedDynamically, src->noDeadStrip,
|
|
|
|
|
src->weakDefCanBeHidden);
|
|
|
|
|
}
|
|
|
|
|
|
2021-02-03 13:31:40 -05:00
|
|
|
Symbol *SymbolTable::addUndefined(StringRef name, InputFile *file,
|
|
|
|
|
bool isWeakRef) {
|
2022-08-07 10:49:26 -04:00
|
|
|
auto [s, wasInserted] = insert(name, file);
|
2020-04-02 11:54:05 -07:00
|
|
|
|
2021-03-09 20:15:29 -08:00
|
|
|
RefState refState = isWeakRef ? RefState::Weak : RefState::Strong;
|
2020-12-15 21:05:06 -05:00
|
|
|
|
2020-04-02 11:54:05 -07:00
|
|
|
if (wasInserted)
|
2022-10-21 22:48:25 -04:00
|
|
|
replaceSymbol<Undefined>(s, name, file, refState,
|
|
|
|
|
/*wasBitcodeSymbol=*/false);
|
2022-01-11 16:49:06 -08:00
|
|
|
else if (auto *lazy = dyn_cast<LazyArchive>(s))
|
2020-05-14 12:43:51 -07:00
|
|
|
lazy->fetchArchiveMember();
|
2022-01-19 10:14:49 -08:00
|
|
|
else if (isa<LazyObject>(s))
|
|
|
|
|
extract(*s->getFile(), s->getName());
|
2020-12-15 21:05:06 -05:00
|
|
|
else if (auto *dynsym = dyn_cast<DylibSymbol>(s))
|
2021-05-31 22:12:35 -04:00
|
|
|
dynsym->reference(refState);
|
2020-12-15 21:05:06 -05:00
|
|
|
else if (auto *undefined = dyn_cast<Undefined>(s))
|
|
|
|
|
undefined->refState = std::max(undefined->refState, refState);
|
2020-04-02 11:54:05 -07:00
|
|
|
return s;
|
|
|
|
|
}
|
|
|
|
|
|
2020-09-24 14:44:14 -07:00
|
|
|
Symbol *SymbolTable::addCommon(StringRef name, InputFile *file, uint64_t size,
|
[lld/mac] Implement support for private extern symbols
Private extern symbols are used for things scoped to the linkage unit.
They cause duplicate symbol errors (so they're in the symbol table,
unlike TU-scoped truly local symbols), but they don't make it into the
export trie. They are created e.g. by compiling with
-fvisibility=hidden.
If two weak symbols have differing privateness, the combined symbol is
non-private external. (Example: inline functions and some TUs that
include the header defining it were built with
-fvisibility-inlines-hidden and some weren't).
A weak private external symbol implicitly has its "weak" dropped and
behaves like a regular strong private external symbol: Weak is an export
trie concept, and private symbols are not in the export trie.
If a weak and a strong symbol have different privateness, the strong
symbol wins.
If two common symbols have differing privateness, the larger symbol
wins. If they have the same size, the privateness of the symbol seen
later during the link wins (!) -- this is a bit lame, but it matches
ld64 and this behavior takes 2 lines less to implement than the less
surprising "result is non-private external), so match ld64.
(Example: `int a` in two .c files, both built with -fcommon,
one built with -fvisibility=hidden and one without.)
This also makes `__dyld_private` a true TU-local symbol, matching ld64.
To make this work, make the `const char*` StringRefZ ctor to correctly
set `size` (without this, writing the string table crashed when calling
getName() on the __dyld_private symbol).
Mention in CommonSymbol's comment that common symbols are now disabled
by default in clang.
Mention in -keep_private_externs's HelpText that the flag only has an
effect with `-r` (which we don't implement yet -- so this patch here
doesn't regress any behavior around -r + -keep_private_externs)). ld64
doesn't explicitly document it, but the commit text of
http://reviews.llvm.org/rL216146 does, and ld64's
OutputFile::buildSymbolTable() checks `_options.outputKind() ==
Options::kObjectFile` before calling `_options.keepPrivateExterns()`
(the only reference to that function).
Fixes PR48536.
Differential Revision: https://reviews.llvm.org/D93609
2020-12-17 13:30:18 -05:00
|
|
|
uint32_t align, bool isPrivateExtern) {
|
2022-08-07 10:49:26 -04:00
|
|
|
auto [s, wasInserted] = insert(name, file);
|
2020-09-24 14:44:14 -07:00
|
|
|
|
|
|
|
|
if (!wasInserted) {
|
|
|
|
|
if (auto *common = dyn_cast<CommonSymbol>(s)) {
|
|
|
|
|
if (size < common->size)
|
|
|
|
|
return s;
|
2020-09-24 15:00:56 -07:00
|
|
|
} else if (isa<Defined>(s)) {
|
2020-09-24 14:44:14 -07:00
|
|
|
return s;
|
|
|
|
|
}
|
2020-09-24 15:00:56 -07:00
|
|
|
// Common symbols take priority over all non-Defined symbols, so in case of
|
|
|
|
|
// a name conflict, we fall through to the replaceSymbol() call below.
|
2020-09-24 14:44:14 -07:00
|
|
|
}
|
|
|
|
|
|
[lld/mac] Implement support for private extern symbols
Private extern symbols are used for things scoped to the linkage unit.
They cause duplicate symbol errors (so they're in the symbol table,
unlike TU-scoped truly local symbols), but they don't make it into the
export trie. They are created e.g. by compiling with
-fvisibility=hidden.
If two weak symbols have differing privateness, the combined symbol is
non-private external. (Example: inline functions and some TUs that
include the header defining it were built with
-fvisibility-inlines-hidden and some weren't).
A weak private external symbol implicitly has its "weak" dropped and
behaves like a regular strong private external symbol: Weak is an export
trie concept, and private symbols are not in the export trie.
If a weak and a strong symbol have different privateness, the strong
symbol wins.
If two common symbols have differing privateness, the larger symbol
wins. If they have the same size, the privateness of the symbol seen
later during the link wins (!) -- this is a bit lame, but it matches
ld64 and this behavior takes 2 lines less to implement than the less
surprising "result is non-private external), so match ld64.
(Example: `int a` in two .c files, both built with -fcommon,
one built with -fvisibility=hidden and one without.)
This also makes `__dyld_private` a true TU-local symbol, matching ld64.
To make this work, make the `const char*` StringRefZ ctor to correctly
set `size` (without this, writing the string table crashed when calling
getName() on the __dyld_private symbol).
Mention in CommonSymbol's comment that common symbols are now disabled
by default in clang.
Mention in -keep_private_externs's HelpText that the flag only has an
effect with `-r` (which we don't implement yet -- so this patch here
doesn't regress any behavior around -r + -keep_private_externs)). ld64
doesn't explicitly document it, but the commit text of
http://reviews.llvm.org/rL216146 does, and ld64's
OutputFile::buildSymbolTable() checks `_options.outputKind() ==
Options::kObjectFile` before calling `_options.keepPrivateExterns()`
(the only reference to that function).
Fixes PR48536.
Differential Revision: https://reviews.llvm.org/D93609
2020-12-17 13:30:18 -05:00
|
|
|
replaceSymbol<CommonSymbol>(s, name, file, size, align, isPrivateExtern);
|
2020-09-24 14:44:14 -07:00
|
|
|
return s;
|
|
|
|
|
}
|
|
|
|
|
|
2020-08-12 19:50:09 -07:00
|
|
|
Symbol *SymbolTable::addDylib(StringRef name, DylibFile *file, bool isWeakDef,
|
|
|
|
|
bool isTlv) {
|
2022-08-07 10:49:26 -04:00
|
|
|
auto [s, wasInserted] = insert(name, file);
|
2020-04-21 13:37:57 -07:00
|
|
|
|
2021-03-09 20:15:29 -08:00
|
|
|
RefState refState = RefState::Unreferenced;
|
2020-12-15 21:05:06 -05:00
|
|
|
if (!wasInserted) {
|
|
|
|
|
if (auto *defined = dyn_cast<Defined>(s)) {
|
|
|
|
|
if (isWeakDef && !defined->isWeakDef())
|
2020-08-27 15:59:30 -07:00
|
|
|
defined->overridesWeakDef = true;
|
2020-12-15 21:05:06 -05:00
|
|
|
} else if (auto *undefined = dyn_cast<Undefined>(s)) {
|
|
|
|
|
refState = undefined->refState;
|
|
|
|
|
} else if (auto *dysym = dyn_cast<DylibSymbol>(s)) {
|
2021-05-31 22:12:35 -04:00
|
|
|
refState = dysym->getRefState();
|
2020-12-15 21:05:06 -05:00
|
|
|
}
|
|
|
|
|
}
|
2020-08-27 15:59:30 -07:00
|
|
|
|
2021-02-25 19:56:31 -05:00
|
|
|
bool isDynamicLookup = file == nullptr;
|
2020-07-24 15:55:25 -07:00
|
|
|
if (wasInserted || isa<Undefined>(s) ||
|
2021-02-25 19:56:31 -05:00
|
|
|
(isa<DylibSymbol>(s) &&
|
|
|
|
|
((!isWeakDef && s->isWeakDef()) ||
|
2021-05-31 22:12:35 -04:00
|
|
|
(!isDynamicLookup && cast<DylibSymbol>(s)->isDynamicLookup())))) {
|
|
|
|
|
if (auto *dynsym = dyn_cast<DylibSymbol>(s))
|
|
|
|
|
dynsym->unreference();
|
2020-12-15 21:05:06 -05:00
|
|
|
replaceSymbol<DylibSymbol>(s, file, name, isWeakDef, refState, isTlv);
|
2021-05-31 22:12:35 -04:00
|
|
|
}
|
2020-07-24 15:55:25 -07:00
|
|
|
|
2020-04-21 13:37:57 -07:00
|
|
|
return s;
|
|
|
|
|
}
|
|
|
|
|
|
2021-02-25 19:56:31 -05:00
|
|
|
Symbol *SymbolTable::addDynamicLookup(StringRef name) {
|
|
|
|
|
return addDylib(name, /*file=*/nullptr, /*isWeakDef=*/false, /*isTlv=*/false);
|
|
|
|
|
}
|
|
|
|
|
|
2022-01-11 16:49:06 -08:00
|
|
|
Symbol *SymbolTable::addLazyArchive(StringRef name, ArchiveFile *file,
|
|
|
|
|
const object::Archive::Symbol &sym) {
|
2022-08-07 10:49:26 -04:00
|
|
|
auto [s, wasInserted] = insert(name, file);
|
2020-05-14 12:43:51 -07:00
|
|
|
|
2021-12-03 21:26:32 -05:00
|
|
|
if (wasInserted) {
|
2022-01-11 16:49:06 -08:00
|
|
|
replaceSymbol<LazyArchive>(s, file, sym);
|
2021-12-03 21:26:32 -05:00
|
|
|
} else if (isa<Undefined>(s)) {
|
2020-05-14 12:43:51 -07:00
|
|
|
file->fetch(sym);
|
2021-12-03 21:26:32 -05:00
|
|
|
} else if (auto *dysym = dyn_cast<DylibSymbol>(s)) {
|
|
|
|
|
if (dysym->isWeakDef()) {
|
|
|
|
|
if (dysym->getRefState() != RefState::Unreferenced)
|
|
|
|
|
file->fetch(sym);
|
|
|
|
|
else
|
2022-01-11 16:49:06 -08:00
|
|
|
replaceSymbol<LazyArchive>(s, file, sym);
|
2021-12-03 21:26:32 -05:00
|
|
|
}
|
|
|
|
|
}
|
2020-05-14 12:43:51 -07:00
|
|
|
return s;
|
|
|
|
|
}
|
|
|
|
|
|
2022-01-19 10:14:49 -08:00
|
|
|
Symbol *SymbolTable::addLazyObject(StringRef name, InputFile &file) {
|
2022-08-07 10:49:26 -04:00
|
|
|
auto [s, wasInserted] = insert(name, &file);
|
2022-01-19 10:14:49 -08:00
|
|
|
|
|
|
|
|
if (wasInserted) {
|
|
|
|
|
replaceSymbol<LazyObject>(s, file, name);
|
|
|
|
|
} else if (isa<Undefined>(s)) {
|
|
|
|
|
extract(file, name);
|
|
|
|
|
} else if (auto *dysym = dyn_cast<DylibSymbol>(s)) {
|
|
|
|
|
if (dysym->isWeakDef()) {
|
|
|
|
|
if (dysym->getRefState() != RefState::Unreferenced)
|
|
|
|
|
extract(file, name);
|
|
|
|
|
else
|
|
|
|
|
replaceSymbol<LazyObject>(s, file, name);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return s;
|
|
|
|
|
}
|
|
|
|
|
|
2021-03-12 17:26:12 -05:00
|
|
|
Defined *SymbolTable::addSynthetic(StringRef name, InputSection *isec,
|
2021-04-06 17:52:30 -04:00
|
|
|
uint64_t value, bool isPrivateExtern,
|
2021-05-17 09:15:39 -04:00
|
|
|
bool includeInSymtab,
|
|
|
|
|
bool referencedDynamically) {
|
2022-04-07 08:48:52 -04:00
|
|
|
assert(!isec || !isec->getFile()); // See makeSyntheticInputSection().
|
2023-05-15 02:00:29 -07:00
|
|
|
Defined *s = addDefined(name, /*file=*/nullptr, isec, value, /*size=*/0,
|
|
|
|
|
/*isWeakDef=*/false, isPrivateExtern,
|
|
|
|
|
referencedDynamically, /*noDeadStrip=*/false,
|
|
|
|
|
/*isWeakDefCanBeHidden=*/false);
|
2021-03-18 18:49:45 -04:00
|
|
|
s->includeInSymtab = includeInSymtab;
|
2020-07-30 14:28:41 -07:00
|
|
|
return s;
|
|
|
|
|
}
|
|
|
|
|
|
2021-07-15 12:54:42 -04:00
|
|
|
enum class Boundary {
|
|
|
|
|
Start,
|
|
|
|
|
End,
|
|
|
|
|
};
|
|
|
|
|
|
2021-07-23 10:12:55 -04:00
|
|
|
static Defined *createBoundarySymbol(const Undefined &sym) {
|
|
|
|
|
return symtab->addSynthetic(
|
|
|
|
|
sym.getName(), /*isec=*/nullptr, /*value=*/-1, /*isPrivateExtern=*/true,
|
|
|
|
|
/*includeInSymtab=*/false, /*referencedDynamically=*/false);
|
|
|
|
|
}
|
|
|
|
|
|
2021-07-15 12:54:42 -04:00
|
|
|
static void handleSectionBoundarySymbol(const Undefined &sym, StringRef segSect,
|
|
|
|
|
Boundary which) {
|
2022-08-08 11:09:22 -04:00
|
|
|
auto [segName, sectName] = segSect.split('$');
|
2021-07-15 12:54:42 -04:00
|
|
|
|
|
|
|
|
// Attach the symbol to any InputSection that will end up in the right
|
|
|
|
|
// OutputSection -- it doesn't matter which one we pick.
|
|
|
|
|
// Don't bother looking through inputSections for a matching
|
|
|
|
|
// ConcatInputSection -- we need to create ConcatInputSection for
|
|
|
|
|
// non-existing sections anyways, and that codepath works even if we should
|
|
|
|
|
// already have a ConcatInputSection with the right name.
|
|
|
|
|
|
|
|
|
|
OutputSection *osec = nullptr;
|
|
|
|
|
// This looks for __TEXT,__cstring etc.
|
|
|
|
|
for (SyntheticSection *ssec : syntheticSections)
|
|
|
|
|
if (ssec->segname == segName && ssec->name == sectName) {
|
|
|
|
|
osec = ssec->isec->parent;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!osec) {
|
[lld-macho][nfc] Eliminate InputSection::Shared
Earlier in LLD's evolution, I tried to create the illusion that
subsections were indistinguishable from "top-level" sections. Thus, even
though the subsections shared many common field values, I hid those
common values away in a private Shared struct (see D105305). More
recently, however, @gkm added a public `Section` struct in D113241 that
served as an explicit way to store values that are common to an entire
set of subsections (aka InputSections). Now that we have another "common
value" struct, `Shared` has been rendered redundant. All its fields can
be moved into `Section` instead, and the pointer to `Shared` can be replaced
with a pointer to `Section`.
This `Section` pointer also has the advantage of letting us inspect other
subsections easily, simplifying the implementation of {D118798}.
P.S. I do think that having both `Section` and `InputSection` makes for
a slightly confusing naming scheme. I considered renaming `InputSection`
to `Subsection`, but that would break the symmetry with `OutputSection`.
It would also make us deviate from LLD-ELF's naming scheme.
This change is perf-neutral on my 3.2 GHz 16-Core Intel Xeon W machine:
base diff difference (95% CI)
sys_time 1.258 ± 0.031 1.248 ± 0.023 [ -1.6% .. +0.1%]
user_time 3.659 ± 0.047 3.658 ± 0.041 [ -0.5% .. +0.4%]
wall_time 4.640 ± 0.085 4.625 ± 0.063 [ -1.0% .. +0.3%]
samples 49 61
There's also no stat sig change in RSS (as measured by `time -l`):
base diff difference (95% CI)
time 998038627.097 ± 13567305.958 1003327715.556 ± 15210451.236 [ -0.2% .. +1.2%]
samples 31 36
Reviewed By: #lld-macho, oontvoo
Differential Revision: https://reviews.llvm.org/D118797
2022-02-03 19:53:29 -05:00
|
|
|
ConcatInputSection *isec = makeSyntheticInputSection(segName, sectName);
|
2021-07-15 12:54:42 -04:00
|
|
|
|
|
|
|
|
// This runs after markLive() and is only called for Undefineds that are
|
|
|
|
|
// live. Marking the isec live ensures an OutputSection is created that the
|
|
|
|
|
// start/end symbol can refer to.
|
|
|
|
|
assert(sym.isLive());
|
2024-03-27 17:27:51 -07:00
|
|
|
assert(isec->live);
|
2021-07-15 12:54:42 -04:00
|
|
|
|
|
|
|
|
// This runs after gatherInputSections(), so need to explicitly set parent
|
|
|
|
|
// and add to inputSections.
|
|
|
|
|
osec = isec->parent = ConcatOutputSection::getOrCreateForInput(isec);
|
|
|
|
|
inputSections.push_back(isec);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (which == Boundary::Start)
|
2021-07-23 10:12:55 -04:00
|
|
|
osec->sectionStartSymbols.push_back(createBoundarySymbol(sym));
|
2021-07-15 12:54:42 -04:00
|
|
|
else
|
2021-07-23 10:12:55 -04:00
|
|
|
osec->sectionEndSymbols.push_back(createBoundarySymbol(sym));
|
2021-07-15 12:54:42 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void handleSegmentBoundarySymbol(const Undefined &sym, StringRef segName,
|
|
|
|
|
Boundary which) {
|
2021-07-23 10:12:55 -04:00
|
|
|
OutputSegment *seg = getOrCreateOutputSegment(segName);
|
|
|
|
|
if (which == Boundary::Start)
|
|
|
|
|
seg->segmentStartSymbols.push_back(createBoundarySymbol(sym));
|
|
|
|
|
else
|
|
|
|
|
seg->segmentEndSymbols.push_back(createBoundarySymbol(sym));
|
2021-07-15 12:54:42 -04:00
|
|
|
}
|
|
|
|
|
|
2022-06-14 09:41:28 -04:00
|
|
|
// Try to find a definition for an undefined symbol.
|
|
|
|
|
// Returns true if a definition was found and no diagnostics are needed.
|
|
|
|
|
static bool recoverFromUndefinedSymbol(const Undefined &sym) {
|
2021-07-15 12:54:42 -04:00
|
|
|
// Handle start/end symbols.
|
|
|
|
|
StringRef name = sym.getName();
|
2022-06-14 09:41:28 -04:00
|
|
|
if (name.consume_front("section$start$")) {
|
|
|
|
|
handleSectionBoundarySymbol(sym, name, Boundary::Start);
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
if (name.consume_front("section$end$")) {
|
|
|
|
|
handleSectionBoundarySymbol(sym, name, Boundary::End);
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
if (name.consume_front("segment$start$")) {
|
|
|
|
|
handleSegmentBoundarySymbol(sym, name, Boundary::Start);
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
if (name.consume_front("segment$end$")) {
|
|
|
|
|
handleSegmentBoundarySymbol(sym, name, Boundary::End);
|
|
|
|
|
return true;
|
|
|
|
|
}
|
2021-07-15 12:54:42 -04:00
|
|
|
|
2022-07-11 15:21:57 -04:00
|
|
|
// Leave dtrace symbols, since we will handle them when we do the relocation
|
2023-06-05 14:36:19 -07:00
|
|
|
if (name.starts_with("___dtrace_"))
|
2022-07-11 15:21:57 -04:00
|
|
|
return true;
|
|
|
|
|
|
2021-07-22 11:20:36 -04:00
|
|
|
// Handle -U.
|
|
|
|
|
if (config->explicitDynamicLookups.count(sym.getName())) {
|
|
|
|
|
symtab->addDynamicLookup(sym.getName());
|
2022-06-14 09:41:28 -04:00
|
|
|
return true;
|
2021-07-22 11:20:36 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Handle -undefined.
|
2022-06-14 09:41:28 -04:00
|
|
|
if (config->undefinedSymbolTreatment ==
|
|
|
|
|
UndefinedSymbolTreatment::dynamic_lookup ||
|
|
|
|
|
config->undefinedSymbolTreatment == UndefinedSymbolTreatment::suppress) {
|
2021-02-28 13:42:14 -05:00
|
|
|
symtab->addDynamicLookup(sym.getName());
|
2022-06-14 09:41:28 -04:00
|
|
|
return true;
|
2020-12-13 19:31:33 -08:00
|
|
|
}
|
2022-06-14 09:41:28 -04:00
|
|
|
|
|
|
|
|
// We do not return true here, as we still need to print diagnostics.
|
|
|
|
|
if (config->undefinedSymbolTreatment == UndefinedSymbolTreatment::warning)
|
|
|
|
|
symtab->addDynamicLookup(sym.getName());
|
|
|
|
|
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
2022-06-15 19:17:07 -04:00
|
|
|
namespace {
|
|
|
|
|
struct UndefinedDiag {
|
|
|
|
|
struct SectionAndOffset {
|
|
|
|
|
const InputSection *isec;
|
|
|
|
|
uint64_t offset;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
std::vector<SectionAndOffset> codeReferences;
|
|
|
|
|
std::vector<std::string> otherReferences;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
MapVector<const Undefined *, UndefinedDiag> undefs;
|
2023-03-10 22:28:36 -05:00
|
|
|
} // namespace
|
2022-06-15 19:17:07 -04:00
|
|
|
|
2022-09-27 23:42:47 -07:00
|
|
|
void macho::reportPendingDuplicateSymbols() {
|
|
|
|
|
for (const auto &duplicate : dupSymDiags) {
|
|
|
|
|
if (!config->deadStripDuplicates || duplicate.sym->isLive()) {
|
|
|
|
|
std::string message =
|
|
|
|
|
"duplicate symbol: " + toString(*duplicate.sym) + "\n>>> defined in ";
|
|
|
|
|
if (!duplicate.src1.first.empty())
|
|
|
|
|
message += duplicate.src1.first + "\n>>> ";
|
|
|
|
|
message += duplicate.src1.second + "\n>>> defined in ";
|
|
|
|
|
if (!duplicate.src2.first.empty())
|
|
|
|
|
message += duplicate.src2.first + "\n>>> ";
|
|
|
|
|
error(message + duplicate.src2.second);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2022-10-11 20:40:26 -04:00
|
|
|
// Check whether the definition name def is a mangled function name that matches
|
|
|
|
|
// the reference name ref.
|
|
|
|
|
static bool canSuggestExternCForCXX(StringRef ref, StringRef def) {
|
|
|
|
|
llvm::ItaniumPartialDemangler d;
|
|
|
|
|
std::string name = def.str();
|
|
|
|
|
if (d.partialDemangle(name.c_str()))
|
|
|
|
|
return false;
|
|
|
|
|
char *buf = d.getFunctionName(nullptr, nullptr);
|
|
|
|
|
if (!buf)
|
|
|
|
|
return false;
|
|
|
|
|
bool ret = ref == buf;
|
|
|
|
|
free(buf);
|
|
|
|
|
return ret;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Suggest an alternative spelling of an "undefined symbol" diagnostic. Returns
|
|
|
|
|
// the suggested symbol, which is either in the symbol table, or in the same
|
|
|
|
|
// file of sym.
|
|
|
|
|
static const Symbol *getAlternativeSpelling(const Undefined &sym,
|
2023-04-05 01:48:34 -04:00
|
|
|
std::string &preHint,
|
|
|
|
|
std::string &postHint) {
|
2022-10-11 20:40:26 -04:00
|
|
|
DenseMap<StringRef, const Symbol *> map;
|
|
|
|
|
if (sym.getFile() && sym.getFile()->kind() == InputFile::ObjKind) {
|
|
|
|
|
// Build a map of local defined symbols.
|
|
|
|
|
for (const Symbol *s : sym.getFile()->symbols)
|
|
|
|
|
if (auto *defined = dyn_cast_or_null<Defined>(s))
|
|
|
|
|
if (!defined->isExternal())
|
|
|
|
|
map.try_emplace(s->getName(), s);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
auto suggest = [&](StringRef newName) -> const Symbol * {
|
|
|
|
|
// If defined locally.
|
|
|
|
|
if (const Symbol *s = map.lookup(newName))
|
|
|
|
|
return s;
|
|
|
|
|
|
|
|
|
|
// If in the symbol table and not undefined.
|
|
|
|
|
if (const Symbol *s = symtab->find(newName))
|
2025-05-13 17:34:42 +03:00
|
|
|
if (!isa<Undefined>(s))
|
2022-10-11 20:40:26 -04:00
|
|
|
return s;
|
|
|
|
|
|
|
|
|
|
return nullptr;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// This loop enumerates all strings of Levenshtein distance 1 as typo
|
|
|
|
|
// correction candidates and suggests the one that exists as a non-undefined
|
|
|
|
|
// symbol.
|
|
|
|
|
StringRef name = sym.getName();
|
|
|
|
|
for (size_t i = 0, e = name.size(); i != e + 1; ++i) {
|
|
|
|
|
// Insert a character before name[i].
|
|
|
|
|
std::string newName = (name.substr(0, i) + "0" + name.substr(i)).str();
|
|
|
|
|
for (char c = '0'; c <= 'z'; ++c) {
|
|
|
|
|
newName[i] = c;
|
|
|
|
|
if (const Symbol *s = suggest(newName))
|
|
|
|
|
return s;
|
|
|
|
|
}
|
|
|
|
|
if (i == e)
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
// Substitute name[i].
|
|
|
|
|
newName = std::string(name);
|
|
|
|
|
for (char c = '0'; c <= 'z'; ++c) {
|
|
|
|
|
newName[i] = c;
|
|
|
|
|
if (const Symbol *s = suggest(newName))
|
|
|
|
|
return s;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Transpose name[i] and name[i+1]. This is of edit distance 2 but it is
|
|
|
|
|
// common.
|
|
|
|
|
if (i + 1 < e) {
|
|
|
|
|
newName[i] = name[i + 1];
|
|
|
|
|
newName[i + 1] = name[i];
|
|
|
|
|
if (const Symbol *s = suggest(newName))
|
|
|
|
|
return s;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Delete name[i].
|
|
|
|
|
newName = (name.substr(0, i) + name.substr(i + 1)).str();
|
|
|
|
|
if (const Symbol *s = suggest(newName))
|
|
|
|
|
return s;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Case mismatch, e.g. Foo vs FOO.
|
|
|
|
|
for (auto &it : map)
|
|
|
|
|
if (name.equals_insensitive(it.first))
|
|
|
|
|
return it.second;
|
|
|
|
|
for (Symbol *sym : symtab->getSymbols())
|
2025-05-13 17:34:42 +03:00
|
|
|
if (!isa<Undefined>(sym) && name.equals_insensitive(sym->getName()))
|
2022-10-11 20:40:26 -04:00
|
|
|
return sym;
|
|
|
|
|
|
|
|
|
|
// The reference may be a mangled name while the definition is not. Suggest a
|
|
|
|
|
// missing extern "C".
|
2023-06-05 14:36:19 -07:00
|
|
|
if (name.starts_with("__Z")) {
|
2022-10-11 20:40:26 -04:00
|
|
|
std::string buf = name.str();
|
|
|
|
|
llvm::ItaniumPartialDemangler d;
|
|
|
|
|
if (!d.partialDemangle(buf.c_str()))
|
|
|
|
|
if (char *buf = d.getFunctionName(nullptr, nullptr)) {
|
|
|
|
|
const Symbol *s = suggest((Twine("_") + buf).str());
|
|
|
|
|
free(buf);
|
|
|
|
|
if (s) {
|
2023-04-05 01:48:34 -04:00
|
|
|
preHint = ": extern \"C\" ";
|
2022-10-11 20:40:26 -04:00
|
|
|
return s;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
} else {
|
2023-04-05 01:48:34 -04:00
|
|
|
StringRef nameWithoutUnderscore = name;
|
|
|
|
|
nameWithoutUnderscore.consume_front("_");
|
2022-10-11 20:40:26 -04:00
|
|
|
const Symbol *s = nullptr;
|
|
|
|
|
for (auto &it : map)
|
2023-04-05 01:48:34 -04:00
|
|
|
if (canSuggestExternCForCXX(nameWithoutUnderscore, it.first)) {
|
2022-10-11 20:40:26 -04:00
|
|
|
s = it.second;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
if (!s)
|
|
|
|
|
for (Symbol *sym : symtab->getSymbols())
|
2023-04-05 01:48:34 -04:00
|
|
|
if (canSuggestExternCForCXX(nameWithoutUnderscore, sym->getName())) {
|
2022-10-11 20:40:26 -04:00
|
|
|
s = sym;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
if (s) {
|
2023-04-05 01:48:34 -04:00
|
|
|
preHint = " to declare ";
|
|
|
|
|
postHint = " as extern \"C\"?";
|
2022-10-11 20:40:26 -04:00
|
|
|
return s;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return nullptr;
|
|
|
|
|
}
|
|
|
|
|
|
2022-10-02 09:20:22 -04:00
|
|
|
static void reportUndefinedSymbol(const Undefined &sym,
|
2022-10-11 20:40:26 -04:00
|
|
|
const UndefinedDiag &locations,
|
|
|
|
|
bool correctSpelling) {
|
2022-10-02 09:20:22 -04:00
|
|
|
std::string message = "undefined symbol";
|
|
|
|
|
if (config->archMultiple)
|
|
|
|
|
message += (" for arch " + getArchitectureName(config->arch())).str();
|
|
|
|
|
message += ": " + toString(sym);
|
|
|
|
|
|
|
|
|
|
const size_t maxUndefinedReferences = 3;
|
|
|
|
|
size_t i = 0;
|
|
|
|
|
for (const std::string &loc : locations.otherReferences) {
|
|
|
|
|
if (i >= maxUndefinedReferences)
|
|
|
|
|
break;
|
|
|
|
|
message += "\n>>> referenced by " + loc;
|
|
|
|
|
++i;
|
|
|
|
|
}
|
2022-06-15 19:17:07 -04:00
|
|
|
|
2022-10-02 09:20:22 -04:00
|
|
|
for (const UndefinedDiag::SectionAndOffset &loc : locations.codeReferences) {
|
|
|
|
|
if (i >= maxUndefinedReferences)
|
|
|
|
|
break;
|
|
|
|
|
message += "\n>>> referenced by ";
|
|
|
|
|
std::string src = loc.isec->getSourceLocation(loc.offset);
|
|
|
|
|
if (!src.empty())
|
|
|
|
|
message += src + "\n>>> ";
|
|
|
|
|
message += loc.isec->getLocation(loc.offset);
|
|
|
|
|
++i;
|
2022-06-15 19:17:07 -04:00
|
|
|
}
|
|
|
|
|
|
2022-10-02 09:20:22 -04:00
|
|
|
size_t totalReferences =
|
|
|
|
|
locations.otherReferences.size() + locations.codeReferences.size();
|
|
|
|
|
if (totalReferences > i)
|
|
|
|
|
message +=
|
|
|
|
|
("\n>>> referenced " + Twine(totalReferences - i) + " more times")
|
|
|
|
|
.str();
|
|
|
|
|
|
2022-10-11 20:40:26 -04:00
|
|
|
if (correctSpelling) {
|
2023-04-05 01:48:34 -04:00
|
|
|
std::string preHint = ": ", postHint;
|
2022-10-11 20:40:26 -04:00
|
|
|
if (const Symbol *corrected =
|
2023-04-05 01:48:34 -04:00
|
|
|
getAlternativeSpelling(sym, preHint, postHint)) {
|
2022-10-11 20:40:26 -04:00
|
|
|
message +=
|
2023-04-05 01:48:34 -04:00
|
|
|
"\n>>> did you mean" + preHint + toString(*corrected) + postHint;
|
2022-10-11 20:40:26 -04:00
|
|
|
if (corrected->getFile())
|
|
|
|
|
message += "\n>>> defined in: " + toString(corrected->getFile());
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2022-10-02 09:20:22 -04:00
|
|
|
if (config->undefinedSymbolTreatment == UndefinedSymbolTreatment::error)
|
|
|
|
|
error(message);
|
|
|
|
|
else if (config->undefinedSymbolTreatment ==
|
|
|
|
|
UndefinedSymbolTreatment::warning)
|
|
|
|
|
warn(message);
|
|
|
|
|
else
|
|
|
|
|
assert(false && "diagnostics make sense for -undefined error|warning only");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void macho::reportPendingUndefinedSymbols() {
|
2022-10-11 20:40:26 -04:00
|
|
|
// Enable spell corrector for the first 2 diagnostics.
|
|
|
|
|
for (const auto &[i, undef] : llvm::enumerate(undefs))
|
|
|
|
|
reportUndefinedSymbol(*undef.first, undef.second, i < 2);
|
2022-10-02 09:20:22 -04:00
|
|
|
|
2022-06-15 19:17:07 -04:00
|
|
|
// This function is called multiple times during execution. Clear the printed
|
|
|
|
|
// diagnostics to avoid printing the same things again the next time.
|
|
|
|
|
undefs.clear();
|
2022-06-14 09:41:28 -04:00
|
|
|
}
|
|
|
|
|
|
2022-06-15 19:17:07 -04:00
|
|
|
void macho::treatUndefinedSymbol(const Undefined &sym, StringRef source) {
|
2022-06-14 09:41:28 -04:00
|
|
|
if (recoverFromUndefinedSymbol(sym))
|
|
|
|
|
return;
|
2022-06-15 19:17:07 -04:00
|
|
|
|
|
|
|
|
undefs[&sym].otherReferences.push_back(source.str());
|
2022-06-14 09:41:28 -04:00
|
|
|
}
|
|
|
|
|
|
2022-06-15 19:17:07 -04:00
|
|
|
void macho::treatUndefinedSymbol(const Undefined &sym, const InputSection *isec,
|
|
|
|
|
uint64_t offset) {
|
2022-06-14 09:41:28 -04:00
|
|
|
if (recoverFromUndefinedSymbol(sym))
|
|
|
|
|
return;
|
|
|
|
|
|
2022-06-15 19:17:07 -04:00
|
|
|
undefs[&sym].codeReferences.push_back({isec, offset});
|
2020-12-13 19:31:33 -08:00
|
|
|
}
|
|
|
|
|
|
2022-01-10 19:39:14 -08:00
|
|
|
std::unique_ptr<SymbolTable> macho::symtab;
|