mirror of
https://github.com/intel/llvm.git
synced 2026-01-16 21:55:39 +08:00
[ELF] Fix unnecessary inclusion of unreferenced provide symbols
Previously, linker was unnecessarily including a PROVIDE symbol which was referenced by another unused PROVIDE symbol. For example, if a linker script contained the below code and 'not_used_sym' provide symbol is not included, then linker was still unnecessarily including 'foo' PROVIDE symbol because it was referenced by 'not_used_sym'. This commit fixes this behavior. PROVIDE(not_used_sym = foo) PROVIDE(foo = 0x1000) This commit fixes this behavior by using dfs-like algorithm to find all the symbols referenced in provide expressions of included provide symbols. This commit also fixes the issue of unused section not being garbage-collected if a symbol of the section is referenced by an unused PROVIDE symbol. Closes #74771 Closes #84730 Co-authored-by: Fangrui Song <i@maskray.me>
This commit is contained in:
committed by
Fangrui Song
parent
b6dfaf4c29
commit
ebb326a51f
@@ -2393,12 +2393,6 @@ static void readSymbolPartitionSection(InputSectionBase *s) {
|
||||
sym->partition = newPart.getNumber();
|
||||
}
|
||||
|
||||
static Symbol *addUnusedUndefined(StringRef name,
|
||||
uint8_t binding = STB_GLOBAL) {
|
||||
return symtab.addSymbol(
|
||||
Undefined{ctx.internalFile, name, binding, STV_DEFAULT, 0});
|
||||
}
|
||||
|
||||
static void markBuffersAsDontNeed(bool skipLinkedOutput) {
|
||||
// With --thinlto-index-only, all buffers are nearly unused from now on
|
||||
// (except symbol/section names used by infrequent passes). Mark input file
|
||||
@@ -2485,15 +2479,15 @@ static std::vector<WrappedSymbol> addWrappedSymbols(opt::InputArgList &args) {
|
||||
continue;
|
||||
|
||||
Symbol *wrap =
|
||||
addUnusedUndefined(saver().save("__wrap_" + name), sym->binding);
|
||||
symtab.addUnusedUndefined(saver().save("__wrap_" + name), sym->binding);
|
||||
|
||||
// If __real_ is referenced, pull in the symbol if it is lazy. Do this after
|
||||
// processing __wrap_ as that may have referenced __real_.
|
||||
StringRef realName = saver().save("__real_" + name);
|
||||
if (symtab.find(realName))
|
||||
addUnusedUndefined(name, sym->binding);
|
||||
symtab.addUnusedUndefined(name, sym->binding);
|
||||
|
||||
Symbol *real = addUnusedUndefined(realName);
|
||||
Symbol *real = symtab.addUnusedUndefined(realName);
|
||||
v.push_back({sym, real, wrap});
|
||||
|
||||
// We want to tell LTO not to inline symbols to be overwritten
|
||||
@@ -2723,7 +2717,7 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &args) {
|
||||
// Handle -u/--undefined before input files. If both a.a and b.so define foo,
|
||||
// -u foo a.a b.so will extract a.a.
|
||||
for (StringRef name : config->undefined)
|
||||
addUnusedUndefined(name)->referenced = true;
|
||||
symtab.addUnusedUndefined(name)->referenced = true;
|
||||
|
||||
parseFiles(files, armCmseImpLib);
|
||||
|
||||
@@ -2736,13 +2730,7 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &args) {
|
||||
config->hasDynSymTab =
|
||||
!ctx.sharedFiles.empty() || config->isPic || config->exportDynamic;
|
||||
|
||||
// Some symbols (such as __ehdr_start) are defined lazily only when there
|
||||
// are undefined symbols for them, so we add these to trigger that logic.
|
||||
for (StringRef name : script->referencedSymbols) {
|
||||
Symbol *sym = addUnusedUndefined(name);
|
||||
sym->isUsedInRegularObj = true;
|
||||
sym->referenced = true;
|
||||
}
|
||||
script->addScriptReferencedSymbolsToSymTable();
|
||||
|
||||
// Prevent LTO from removing any definition referenced by -u.
|
||||
for (StringRef name : config->undefined)
|
||||
|
||||
@@ -198,15 +198,7 @@ static bool shouldDefineSym(SymbolAssignment *cmd) {
|
||||
if (cmd->name == ".")
|
||||
return false;
|
||||
|
||||
if (!cmd->provide)
|
||||
return true;
|
||||
|
||||
// If a symbol was in PROVIDE(), we need to define it only
|
||||
// when it is a referenced undefined symbol.
|
||||
Symbol *b = symtab.find(cmd->name);
|
||||
if (b && !b->isDefined() && !b->isCommon())
|
||||
return true;
|
||||
return false;
|
||||
return !cmd->provide || LinkerScript::shouldAddProvideSym(cmd->name);
|
||||
}
|
||||
|
||||
// Called by processSymbolAssignments() to assign definitions to
|
||||
@@ -1517,3 +1509,41 @@ void LinkerScript::checkFinalScriptConditions() const {
|
||||
checkMemoryRegion(lmaRegion, sec, sec->getLMA());
|
||||
}
|
||||
}
|
||||
|
||||
void LinkerScript::addScriptReferencedSymbolsToSymTable() {
|
||||
// Some symbols (such as __ehdr_start) are defined lazily only when there
|
||||
// are undefined symbols for them, so we add these to trigger that logic.
|
||||
auto reference = [](StringRef name) {
|
||||
Symbol *sym = symtab.addUnusedUndefined(name);
|
||||
sym->isUsedInRegularObj = true;
|
||||
sym->referenced = true;
|
||||
};
|
||||
for (StringRef name : referencedSymbols)
|
||||
reference(name);
|
||||
|
||||
// Keeps track of references from which PROVIDE symbols have been added to the
|
||||
// symbol table.
|
||||
DenseSet<StringRef> added;
|
||||
SmallVector<const SmallVector<StringRef, 0> *, 0> symRefsVec;
|
||||
for (const auto &[name, symRefs] : provideMap)
|
||||
if (LinkerScript::shouldAddProvideSym(name) && added.insert(name).second)
|
||||
symRefsVec.push_back(&symRefs);
|
||||
while (symRefsVec.size()) {
|
||||
for (StringRef name : *symRefsVec.pop_back_val()) {
|
||||
reference(name);
|
||||
// Prevent the symbol from being discarded by --gc-sections.
|
||||
script->referencedSymbols.push_back(name);
|
||||
auto it = script->provideMap.find(name);
|
||||
if (it != script->provideMap.end() &&
|
||||
LinkerScript::shouldAddProvideSym(name) &&
|
||||
added.insert(name).second) {
|
||||
symRefsVec.push_back(&it->second);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool LinkerScript::shouldAddProvideSym(StringRef symName) {
|
||||
Symbol *sym = symtab.find(symName);
|
||||
return sym && !sym->isDefined() && !sym->isCommon();
|
||||
}
|
||||
|
||||
@@ -16,6 +16,7 @@
|
||||
#include "llvm/ADT/ArrayRef.h"
|
||||
#include "llvm/ADT/DenseMap.h"
|
||||
#include "llvm/ADT/MapVector.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
#include "llvm/Support/Compiler.h"
|
||||
#include <cstddef>
|
||||
@@ -348,6 +349,18 @@ public:
|
||||
// Check backward location counter assignment and memory region/LMA overflows.
|
||||
void checkFinalScriptConditions() const;
|
||||
|
||||
// Add symbols that are referenced in the linker script to the symbol table.
|
||||
// Symbols referenced in a PROVIDE command are only added to the symbol table
|
||||
// if the PROVIDE command actually provides the symbol.
|
||||
// It also adds the symbols referenced by the used PROVIDE symbols to the
|
||||
// linker script referenced symbols list.
|
||||
void addScriptReferencedSymbolsToSymTable();
|
||||
|
||||
// Returns true if the PROVIDE symbol should be added to the link.
|
||||
// A PROVIDE symbol is added to the link only if it satisfies an
|
||||
// undefined reference.
|
||||
static bool shouldAddProvideSym(StringRef symName);
|
||||
|
||||
// SECTIONS command list.
|
||||
SmallVector<SectionCommand *, 0> sectionCommands;
|
||||
|
||||
@@ -379,6 +392,14 @@ public:
|
||||
|
||||
// Sections that will be warned/errored by --orphan-handling.
|
||||
SmallVector<const InputSectionBase *, 0> orphanSections;
|
||||
|
||||
// Stores the mapping: PROVIDE symbol -> symbols referred in the PROVIDE
|
||||
// expression. For example, if the PROVIDE command is:
|
||||
//
|
||||
// PROVIDE(v = a + b + c);
|
||||
//
|
||||
// then provideMap should contain the mapping: 'v' -> ['a', 'b', 'c']
|
||||
llvm::MapVector<StringRef, SmallVector<StringRef, 0>> provideMap;
|
||||
};
|
||||
|
||||
LLVM_LIBRARY_VISIBILITY extern std::unique_ptr<LinkerScript> script;
|
||||
|
||||
@@ -36,6 +36,7 @@
|
||||
#include "llvm/Support/TimeProfiler.h"
|
||||
#include <cassert>
|
||||
#include <limits>
|
||||
#include <optional>
|
||||
#include <vector>
|
||||
|
||||
using namespace llvm;
|
||||
@@ -138,6 +139,10 @@ private:
|
||||
|
||||
// A set to detect an INCLUDE() cycle.
|
||||
StringSet<> seen;
|
||||
|
||||
// If we are currently parsing a PROVIDE|PROVIDE_HIDDEN command,
|
||||
// then this member is set to the PROVIDE symbol name.
|
||||
std::optional<llvm::StringRef> activeProvideSym;
|
||||
};
|
||||
} // namespace
|
||||
|
||||
@@ -1055,6 +1060,9 @@ SymbolAssignment *ScriptParser::readProvideHidden(bool provide, bool hidden) {
|
||||
;
|
||||
return nullptr;
|
||||
}
|
||||
llvm::SaveAndRestore saveActiveProvideSym(activeProvideSym);
|
||||
if (provide)
|
||||
activeProvideSym = name;
|
||||
SymbolAssignment *cmd = readSymbolAssignment(name);
|
||||
cmd->provide = provide;
|
||||
cmd->hidden = hidden;
|
||||
@@ -1570,7 +1578,10 @@ Expr ScriptParser::readPrimary() {
|
||||
tok = unquote(tok);
|
||||
else if (!isValidSymbolName(tok))
|
||||
setError("malformed number: " + tok);
|
||||
script->referencedSymbols.push_back(tok);
|
||||
if (activeProvideSym)
|
||||
script->provideMap[*activeProvideSym].push_back(tok);
|
||||
else
|
||||
script->referencedSymbols.push_back(tok);
|
||||
return [=] { return script->getSymbolValue(tok, location); };
|
||||
}
|
||||
|
||||
|
||||
@@ -333,3 +333,7 @@ void SymbolTable::scanVersionScript() {
|
||||
// --dynamic-list.
|
||||
handleDynamicList();
|
||||
}
|
||||
|
||||
Symbol *SymbolTable::addUnusedUndefined(StringRef name, uint8_t binding) {
|
||||
return addSymbol(Undefined{ctx.internalFile, name, binding, STV_DEFAULT, 0});
|
||||
}
|
||||
|
||||
@@ -57,6 +57,9 @@ public:
|
||||
|
||||
void handleDynamicList();
|
||||
|
||||
Symbol *addUnusedUndefined(StringRef name,
|
||||
uint8_t binding = llvm::ELF::STB_GLOBAL);
|
||||
|
||||
// Set of .so files to not link the same shared object file more than once.
|
||||
llvm::DenseMap<llvm::CachedHashStringRef, SharedFile *> soNames;
|
||||
|
||||
|
||||
60
lld/test/ELF/gc-sections-with-provide.s
Normal file
60
lld/test/ELF/gc-sections-with-provide.s
Normal file
@@ -0,0 +1,60 @@
|
||||
# REQUIRES: x86
|
||||
|
||||
# This test verifies that garbage-collection is correctly garbage collecting
|
||||
# unused sections when the symbol of the unused section is only referred by
|
||||
# an unused PROVIDE symbol.
|
||||
|
||||
# RUN: rm -rf %t && split-file %s %t && cd %t
|
||||
# RUN: llvm-mc -filetype=obj -triple=x86_64 a.s -o a.o
|
||||
# RUN: ld.lld -o a_nogc a.o -T script.t
|
||||
# RUN: llvm-nm a_nogc | FileCheck -check-prefix=NOGC %s
|
||||
# RUN: ld.lld -o a_gc a.o --gc-sections --print-gc-sections -T script.t | FileCheck --check-prefix=GC_LINK %s
|
||||
# RUN: llvm-nm a_gc | FileCheck -check-prefix=GC %s
|
||||
|
||||
NOGC-NOT: another_unused
|
||||
NOGC: another_used
|
||||
NOGC: bar
|
||||
NOGC: baz
|
||||
NOGC: baz_ref
|
||||
NOGC: foo
|
||||
NOGC-NOT: unused
|
||||
NOGC: used
|
||||
|
||||
GC_LINK: removing unused section a.o:(.text.bar)
|
||||
|
||||
GC-NOT: another_unused
|
||||
GC: another_used
|
||||
GC-NOT: bar
|
||||
GC: baz
|
||||
GC: baz_ref
|
||||
GC: foo
|
||||
GC-NOT: unused
|
||||
GC: used
|
||||
|
||||
#--- a.s
|
||||
.global _start
|
||||
_start:
|
||||
call foo
|
||||
call used
|
||||
|
||||
.section .text.foo,"ax",@progbits
|
||||
foo:
|
||||
nop
|
||||
|
||||
.section .text.bar,"ax",@progbits
|
||||
.global bar
|
||||
bar:
|
||||
nop
|
||||
|
||||
.section .text.baz,"ax",@progbits
|
||||
.global baz
|
||||
baz:
|
||||
nop
|
||||
|
||||
|
||||
#--- script.t
|
||||
PROVIDE(unused = bar + used);
|
||||
PROVIDE(used = another_used);
|
||||
PROVIDE(baz_ref = baz);
|
||||
PROVIDE(another_used = baz_ref);
|
||||
PROVIDE(another_unused = unused + bar + 0x1);
|
||||
@@ -21,11 +21,31 @@
|
||||
# RUN: ld.lld -o chain -T chain.t a.o
|
||||
# RUN: llvm-nm chain | FileCheck %s
|
||||
|
||||
# CHECK: 0000000000001000 a f1
|
||||
# CHECK-NEXT: 0000000000001000 A f2
|
||||
# CHECK-NEXT: 0000000000001000 a g1
|
||||
# CHECK-NEXT: 0000000000001000 A g2
|
||||
# CHECK-NEXT: 0000000000001000 A newsym
|
||||
# CHECK-NOT: another_unused
|
||||
# CHECK: 0000000000007000 a f1
|
||||
# CHECK-NEXT: 0000000000007000 A f2
|
||||
# CHECK-NEXT: 0000000000007000 A f3
|
||||
# CHECK-NEXT: 0000000000007000 A f4
|
||||
# CHECK-NEXT: 0000000000006000 A f5
|
||||
# CHECK-NEXT: 0000000000003000 A f6
|
||||
# CHECK-NEXT: 0000000000001000 A f7
|
||||
# CHECK-NOT: g1
|
||||
# CHECK-NOT: g2
|
||||
# CHECK-NEXT: 0000000000007500 A newsym
|
||||
# CHECK: 0000000000002000 A u
|
||||
# CHECK-NOT: unused
|
||||
# CHECK-NEXT: 0000000000002000 A v
|
||||
# CHECK-NEXT: 0000000000002000 A w
|
||||
|
||||
|
||||
# RUN: ld.lld -o chain_with_cycle -T chain_with_cycle.t a.o
|
||||
# RUN: llvm-nm chain_with_cycle | FileCheck %s --check-prefix=CHAIN_WITH_CYCLE
|
||||
|
||||
# CHAIN_WITH_CYCLE: 000 A f1
|
||||
# CHAIN_WITH_CYCLE: 000 A f2
|
||||
# CHAIN_WITH_CYCLE: 000 A f3
|
||||
# CHAIN_WITH_CYCLE: 000 A f4
|
||||
# CHAIN_WITH_CYCLE: 000 A newsym
|
||||
|
||||
# RUN: not ld.lld -T chain2.t a.o 2>&1 | FileCheck %s --check-prefix=ERR --implicit-check-not=error:
|
||||
# ERR-COUNT-3: error: chain2.t:1: symbol not found: undef
|
||||
@@ -40,13 +60,30 @@ patatino:
|
||||
movl newsym, %eax
|
||||
|
||||
#--- chain.t
|
||||
PROVIDE(f2 = 0x1000);
|
||||
PROVIDE(f7 = 0x1000);
|
||||
PROVIDE(f5 = f6 + 0x3000);
|
||||
PROVIDE(f6 = f7 + 0x2000);
|
||||
PROVIDE(f4 = f5 + 0x1000);
|
||||
PROVIDE(f3 = f4);
|
||||
PROVIDE(f2 = f3);
|
||||
PROVIDE_HIDDEN(f1 = f2);
|
||||
PROVIDE(newsym = f1);
|
||||
PROVIDE(newsym = f1 + 0x500);
|
||||
|
||||
u = v;
|
||||
PROVIDE(w = 0x2000);
|
||||
PROVIDE(v = w);
|
||||
|
||||
PROVIDE(g2 = 0x1000);
|
||||
PROVIDE_HIDDEN(g1 = g2);
|
||||
PROVIDE(unused = g1);
|
||||
PROVIDE_HIDDEN(another_unused = g1);
|
||||
|
||||
#--- chain_with_cycle.t
|
||||
PROVIDE(f1 = f2 + f3);
|
||||
PROVIDE(f2 = f3 + f4);
|
||||
PROVIDE(f3 = f4);
|
||||
PROVIDE(f4 = f1);
|
||||
PROVIDE(newsym = f1);
|
||||
|
||||
#--- chain2.t
|
||||
PROVIDE(f2 = undef);
|
||||
|
||||
Reference in New Issue
Block a user