[ELF] Move section assignment from initializeSymbols to postParse

https://discourse.llvm.org/t/parallel-input-file-parsing/60164

initializeSymbols currently sets Defined::section and handles non-prevailing
COMDAT groups. Move the code to the parallel postParse to reduce work from the
single-threading code path and make parallel section initialization infeasible.

Postpone reporting duplicate symbol errors so that the messages have the
section information. (`Defined::section` is assigned in postParse and another
thread may not have the information).

* duplicated-synthetic-sym.s: BinaryFile duplicate definition (very rare) now
  has no section information
* comdat-binding: `%t/w.o %t/g.o` leads to an undesired undefined symbol. This
  is not ideal but we report a diagnostic to inform that this is unsupported.
  (See release note)
* comdat-discarded-lazy.s: %tdef.o is unextracted. The new behavior (discarded
  section error) makes more sense

Depends on D120640

Reviewed By: peter.smith

Differential Revision: https://reviews.llvm.org/D120626
This commit is contained in:
Fangrui Song
2022-03-14 14:13:41 -07:00
parent 0c3156bd43
commit c30e6447c0
13 changed files with 120 additions and 61 deletions

View File

@@ -30,6 +30,7 @@ namespace elf {
class InputFile;
class InputSectionBase;
class Symbol;
enum ELFKind : uint8_t {
ELFNoneKind,
@@ -349,6 +350,24 @@ struct Configuration {
// The only instance of Configuration struct.
extern std::unique_ptr<Configuration> config;
struct DuplicateSymbol {
const Symbol *sym;
const InputFile *file;
InputSectionBase *section;
uint64_t value;
};
struct Ctx {
// Duplicate symbol candidates.
SmallVector<DuplicateSymbol, 0> duplicates;
// Symbols in a non-prevailing COMDAT group which should be changed to an
// Undefined.
SmallVector<std::pair<Symbol *, unsigned>, 0> nonPrevailingSyms;
};
// The only instance of Ctx struct.
extern std::unique_ptr<Ctx> ctx;
// The first two elements of versionDefinitions represent VER_NDX_LOCAL and
// VER_NDX_GLOBAL. This helper returns other elements.
static inline ArrayRef<VersionDefinition> namedVersionDefs() {

View File

@@ -74,6 +74,7 @@ using namespace lld;
using namespace lld::elf;
std::unique_ptr<Configuration> elf::config;
std::unique_ptr<Ctx> elf::ctx;
std::unique_ptr<LinkerDriver> elf::driver;
static void setConfigs(opt::InputArgList &args);
@@ -117,6 +118,7 @@ bool elf::link(ArrayRef<const char *> args, llvm::raw_ostream &stdoutOS,
"--error-limit=0 to see all errors)";
config = std::make_unique<Configuration>();
elf::ctx = std::make_unique<Ctx>();
driver = std::make_unique<LinkerDriver>();
script = std::make_unique<LinkerScript>();
symtab = std::make_unique<SymbolTable>();
@@ -2486,6 +2488,16 @@ void LinkerDriver::link(opt::InputArgList &args) {
parallelForEach(objectFiles, initializeLocalSymbols);
parallelForEach(objectFiles, postParseObjectFile);
parallelForEach(bitcodeFiles, [](BitcodeFile *file) { file->postParse(); });
for (auto &it : ctx->nonPrevailingSyms) {
Symbol &sym = *it.first;
sym.replace(Undefined{sym.file, sym.getName(), sym.binding, sym.stOther,
sym.type, it.second});
cast<Undefined>(sym).nonPrevailing = true;
}
ctx->nonPrevailingSyms.clear();
for (const DuplicateSymbol &d : ctx->duplicates)
reportDuplicate(*d.sym, d.file, d.section, d.value);
ctx->duplicates.clear();
// Return if there were name resolution errors.
if (errorCount())
@@ -2558,6 +2570,8 @@ void LinkerDriver::link(opt::InputArgList &args) {
auto newObjectFiles = makeArrayRef(objectFiles).slice(numObjsBeforeLTO);
parallelForEach(newObjectFiles, initializeLocalSymbols);
parallelForEach(newObjectFiles, postParseObjectFile);
for (const DuplicateSymbol &d : ctx->duplicates)
reportDuplicate(*d.sym, d.file, d.section, d.value);
// Handle --exclude-libs again because lto.tmp may reference additional
// libcalls symbols defined in an excluded archive. This may override

View File

@@ -1016,7 +1016,6 @@ InputSectionBase *ObjFile<ELFT>::createInputSection(uint32_t idx,
// its corresponding ELF symbol table.
template <class ELFT>
void ObjFile<ELFT>::initializeSymbols(const object::ELFFile<ELFT> &obj) {
ArrayRef<InputSectionBase *> sections(this->sections);
SymbolTable &symtab = *elf::symtab;
ArrayRef<Elf_Sym> eSyms = this->getELFSyms<ELFT>();
@@ -1044,13 +1043,6 @@ void ObjFile<ELFT>::initializeSymbols(const object::ELFFile<ELFT> &obj) {
continue;
}
if (LLVM_UNLIKELY(secIdx == SHN_XINDEX))
secIdx = check(getExtendedSymbolTableIndex<ELFT>(eSym, i, shndxTable));
else if (secIdx >= SHN_LORESERVE)
secIdx = 0;
if (LLVM_UNLIKELY(secIdx >= sections.size()))
fatal(toString(this) + ": invalid section index: " + Twine(secIdx));
InputSectionBase *sec = sections[secIdx];
uint8_t stOther = eSym.st_other;
uint8_t type = eSym.getType();
uint64_t value = eSym.st_value;
@@ -1068,30 +1060,11 @@ void ObjFile<ELFT>::initializeSymbols(const object::ELFFile<ELFT> &obj) {
continue;
}
// If a defined symbol is in a discarded section, handle it as if it
// were an undefined symbol. Such symbol doesn't comply with the
// standard, but in practice, a .eh_frame often directly refer
// COMDAT member sections, and if a comdat group is discarded, some
// defined symbol in a .eh_frame becomes dangling symbols.
if (sec == &InputSection::discarded) {
Undefined und{this, StringRef(), binding, stOther, type, secIdx};
// !LazyObjFile::lazy indicates that the file containing this object has
// not finished processing, i.e. this symbol is a result of a lazy symbol
// extract. We should demote the lazy symbol to an Undefined so that any
// relocations outside of the group to it will trigger a discarded section
// error.
if (sym->symbolKind == Symbol::LazyObjectKind && !sym->file->lazy)
sym->replace(und);
else
sym->resolve(und);
continue;
}
// Handle global defined symbols.
// Handle global defined symbols. Defined::section will be set in postParse.
if (binding == STB_GLOBAL || binding == STB_WEAK ||
binding == STB_GNU_UNIQUE) {
sym->resolve(
Defined{this, StringRef(), binding, stOther, type, value, size, sec});
sym->resolve(Defined{this, StringRef(), binding, stOther, type, value,
size, nullptr});
continue;
}
@@ -1156,10 +1129,12 @@ template <class ELFT> void ObjFile<ELFT>::initializeLocalSymbols() {
// Called after all ObjFile::parse is called for all ObjFiles. This checks
// duplicate symbols and may do symbol property merge in the future.
template <class ELFT> void ObjFile<ELFT>::postParse() {
static std::mutex mu;
ArrayRef<Elf_Sym> eSyms = this->getELFSyms<ELFT>();
for (size_t i = firstGlobal, end = eSyms.size(); i != end; ++i) {
const Elf_Sym &eSym = eSyms[i];
const Symbol &sym = *symbols[i];
Symbol &sym = *symbols[i];
uint32_t secIdx = eSym.st_shndx;
// st_value of STT_TLS represents the assigned offset, not the actual
// address which is used by STT_FUNC and STT_OBJECT. STT_TLS symbols can
@@ -1170,23 +1145,41 @@ template <class ELFT> void ObjFile<ELFT>::postParse() {
errorOrWarn("TLS attribute mismatch: " + toString(sym) + "\n>>> in " +
toString(sym.file) + "\n>>> in " + toString(this));
// !sym.file allows a symbol assignment redefines a symbol without an error.
if (sym.file == this || !sym.file || !sym.isDefined() ||
eSym.st_shndx == SHN_UNDEF || eSym.st_shndx == SHN_COMMON ||
eSym.getBinding() == STB_WEAK)
// Handle non-COMMON defined symbol below. !sym.file allows a symbol
// assignment to redefine a symbol without an error.
if (!sym.file || !sym.isDefined() || secIdx == SHN_UNDEF ||
secIdx == SHN_COMMON)
continue;
uint32_t secIdx = eSym.st_shndx;
if (LLVM_UNLIKELY(secIdx == SHN_XINDEX))
secIdx = cantFail(getExtendedSymbolTableIndex<ELFT>(eSym, i, shndxTable));
secIdx = check(getExtendedSymbolTableIndex<ELFT>(eSym, i, shndxTable));
else if (secIdx >= SHN_LORESERVE)
secIdx = 0;
if (sections[secIdx] == &InputSection::discarded)
if (LLVM_UNLIKELY(secIdx >= sections.size()))
fatal(toString(this) + ": invalid section index: " + Twine(secIdx));
InputSectionBase *sec = sections[secIdx];
if (sec == &InputSection::discarded) {
if (sym.traced) {
printTraceSymbol(Undefined{this, sym.getName(), sym.binding,
sym.stOther, sym.type, secIdx},
sym.getName());
}
if (sym.file == this) {
std::lock_guard<std::mutex> lock(mu);
ctx->nonPrevailingSyms.emplace_back(&sym, secIdx);
}
continue;
// Allow absolute symbols with the same value for GNU ld compatibility.
if (!cast<Defined>(sym).section && !sections[secIdx] &&
cast<Defined>(sym).value == eSym.st_value)
}
if (sym.file == this) {
cast<Defined>(sym).section = sec;
continue;
reportDuplicate(sym, this, sections[secIdx], eSym.st_value);
}
if (eSym.getBinding() == STB_WEAK)
continue;
std::lock_guard<std::mutex> lock(mu);
ctx->duplicates.push_back({&sym, this, sec, eSym.st_value});
}
}

View File

@@ -545,9 +545,16 @@ static std::string maybeReportDiscarded(Undefined &sym) {
// If the discarded section is a COMDAT.
StringRef signature = file->getShtGroupSignature(objSections, elfSec);
if (const InputFile *prevailing =
symtab->comdatGroups.lookup(CachedHashStringRef(signature)))
symtab->comdatGroups.lookup(CachedHashStringRef(signature))) {
msg += "\n>>> section group signature: " + signature.str() +
"\n>>> prevailing definition is in " + toString(prevailing);
if (sym.nonPrevailing) {
msg += "\n>>> or the symbol in the prevailing group had STB_WEAK "
"binding and the symbol in a non-prevailing group had STB_GLOBAL "
"binding. Mixing groups with STB_WEAK and STB_GLOBAL binding "
"signature is not supported";
}
}
return msg;
}

View File

@@ -537,11 +537,20 @@ bool Symbol::shouldReplace(const Defined &other) const {
return !isGlobal() && other.isGlobal();
}
void elf::reportDuplicate(const Symbol &sym, InputFile *newFile,
void elf::reportDuplicate(const Symbol &sym, const InputFile *newFile,
InputSectionBase *errSec, uint64_t errOffset) {
if (config->allowMultipleDefinition)
return;
const Defined *d = cast<Defined>(&sym);
// A definition in a COMDAT may be reported as duplicate with a definition
// relative to .gnu.linkonce.t.__x86.get_pc_thunk.bx.
// .gnu.linkonce.t.__x86.get_pc_thunk.bx will be discarded, so there is
// actually no duplicate.
const Defined *d = dyn_cast<Defined>(&sym);
if (!d)
return;
// Allow absolute symbols with the same value for GNU ld compatibility.
if (!d->section && !errSec && errOffset && d->value == errOffset)
return;
if (!d->section || !errSec) {
error("duplicate symbol: " + toString(sym) + "\n>>> defined in " +
toString(sym.file) + "\n>>> defined in " + toString(newFile));

View File

@@ -377,6 +377,7 @@ public:
// The section index if in a discarded section, 0 otherwise.
uint32_t discardedSecIdx;
bool nonPrevailing = false;
};
class SharedSymbol : public Symbol {
@@ -558,7 +559,7 @@ template <typename... T> Defined *makeDefined(T &&...args) {
Defined(std::forward<T>(args)...);
}
void reportDuplicate(const Symbol &sym, InputFile *newFile,
void reportDuplicate(const Symbol &sym, const InputFile *newFile,
InputSectionBase *errSec, uint64_t errOffset);
void maybeWarnUnorderableSymbol(const Symbol *sym);
bool computeIsPreemptible(const Symbol &sym);

View File

@@ -35,6 +35,8 @@ Breaking changes
* The GNU ld incompatible ``--no-define-common`` has been removed.
* The obscure ``-dc``/``-dp`` options have been removed.
* ``-d`` is now ignored.
* If a prevailing COMDAT group defines STB_WEAK symbol, having a STB_GLOBAL symbol in a non-prevailing group is now rejected with a diagnostic.
(`D120626 <https://reviews.llvm.org/D120626>`_)
COFF Improvements
-----------------

View File

@@ -8,11 +8,22 @@
# RUN: ld.lld -e 0 %t/u.o %t/w.o -o %t/u
# RUN: llvm-readelf -s %t/u | FileCheck %s --check-prefix=UNIQUE
# RUN: ld.lld -e 0 %t/w.o %t/g.o -o %t/w
# RUN: llvm-readelf -s %t/w | FileCheck %s --check-prefix=WEAK
## We prefer STB_GLOBAL definition, then changing it to undefined since it is in
## in a non-prevailing COMDAT. Ideally this should be defined, but our behavior
## is fine because valid input cannot form this case.
# RUN: ld.lld -e 0 %t/w.o %t/g.o -o %t/und --noinhibit-exec 2>&1 | FileCheck %s --check-prefix=WARN
# RUN: llvm-readelf -s %t/und | FileCheck %s --check-prefix=UND
# WEAK: NOTYPE WEAK DEFAULT [[#]] _ZZ1fvE1x
# UNIQUE: OBJECT UNIQUE DEFAULT [[#]] _ZZ1fvE1x
# UND: NOTYPE GLOBAL DEFAULT UND _ZZ1fvE1x
# WARN: warning: relocation refers to a symbol in a discarded section: f()::x
# WARN-NEXT: >>> defined in {{.*}}g.o
# WARN-NEXT: >>> section group signature: _ZZ1fvE1x
# WARN-NEXT: >>> prevailing definition is in {{.*}}w.o
# WARN-NEXT: >>> or the symbol in the prevailing group had STB_WEAK binding and the symbol in a non-prevailing group had STB_GLOBAL binding. Mixing groups with STB_WEAK and STB_GLOBAL binding signature is not supported
# WARN-NEXT: >>> referenced by {{.*}}g.o:(.text+0x3)
#--- g.s
movq _ZZ1fvE1x@gotpcrel(%rip), %rax

View File

@@ -11,6 +11,7 @@
# CHECK-NEXT: >>> defined in {{.*}}3.o
# CHECK-NEXT: >>> section group signature: foo
# CHECK-NEXT: >>> prevailing definition is in {{.*}}2.o
# CHECK-NEXT: >>> or the symbol in the prevailing group {{.*}}
# CHECK-NEXT: >>> referenced by {{.*}}1.o:(.text+0x1)
# CHECK: error: relocation refers to a discarded section: .text.foo

View File

@@ -26,6 +26,7 @@
# AA-NEXT: >>> defined in {{.*}}aa.o
# AA-NEXT: >>> section group signature: foo
# AA-NEXT: >>> prevailing definition is in {{.*}}1.o
# AA-NEXT: >>> or the symbol in the prevailing group {{.*}}
# AA-NEXT: >>> referenced by {{.*}}aa.o:(.text+0x1)
## Test the case when the symbol causing a "discarded section" is ordered
@@ -45,14 +46,15 @@
# ZZ-NEXT: >>> defined in {{.*}}zz.o
# ZZ-NEXT: >>> section group signature: foo
# ZZ-NEXT: >>> prevailing definition is in {{.*}}1.o
# ZZ-NEXT: >>> or the symbol in the prevailing group {{.*}}
# ZZ-NEXT: >>> referenced by {{.*}}zz.o:(.text+0x1)
## Don't error if the symbol which would cause "discarded section"
## was inserted before %tzz.o
## The definition in %tdef.o is outside a group. Currently we give an error
## because %tdef.o is not extracted.
# RUN: echo '.globl zz; zz:' | llvm-mc -filetype=obj -triple=x86_64 - -o %tdef.o
# RUN: ld.lld %t.o --start-lib %t1.o %tdef.o %tzz.o --end-lib -o /dev/null
# RUN: not ld.lld %t.o --start-lib %t1.o %tdef.o %tzz.o --end-lib -o /dev/null 2>&1 | FileCheck --check-prefix=ZZ %s
# RUN: rm -f %tdef.a && llvm-ar rc %tdef.a %tdef.o
# RUN: ld.lld %t.o --start-lib %t1.o %tdef.a %tzz.o --end-lib -o /dev/null
# RUN: not ld.lld %t.o --start-lib %t1.o %tdef.a %tzz.o --end-lib -o /dev/null 2>&1 | FileCheck --check-prefix=ZZ %s
.globl _start
_start:

View File

@@ -9,8 +9,8 @@
// RUN: not ld.lld %t.o --format binary file.bin -o /dev/null 2>&1 | FileCheck %s
// CHECK: duplicate symbol: _binary_file_bin_start
// CHECK-NEXT: defined at {{.*}}.o:(.text+0x0)
// CHECK-NEXT: defined at file.bin:(.data+0x0)
// CHECK-NEXT: defined in {{.*}}.o
// CHECK-NEXT: defined in <internal>
.globl _binary_file_bin_start
_binary_file_bin_start:

View File

@@ -1,14 +1,14 @@
# REQUIRES: x86
# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t.o
# RUN: echo '.section .foo,"ae"; .weak foo; foo:' | \
# RUN: echo '.section .foo,"aG",@progbits,zz,comdat; .weak foo; foo:' | \
# RUN: llvm-mc -filetype=obj -triple=x86_64 - -o %t1.o
# RUN: not ld.lld %t.o %t1.o -o /dev/null 2>&1 | FileCheck %s
# RUN: ld.lld %t.o %t1.o -o %t
# RUN: llvm-readelf -s %t | FileCheck %s
# Because foo defined in %t1.o is weak, it does not override global undefined
# in %t.o
# CHECK-NOT: discarded section
# CHECK: undefined symbol: foo
# CHECK: NOTYPE WEAK DEFAULT UND foo
.globl _start
_start:
jmp foo
.section .foo,"aG",@progbits,zz,comdat

View File

@@ -31,8 +31,8 @@ TRACE: lib.a(obj.o): lazy definition of foo
TRACE-NEXT: lib.a(obj.o): lazy definition of bar
TRACE-NEXT: lib.a(bc.bc): definition of foo
TRACE-NEXT: lib.a(bc.bc): reference to bar
TRACE-NEXT: lib.a(obj.o): reference to foo
TRACE-NEXT: lib.a(obj.o): definition of bar
TRACE-NEXT: lib.a(obj.o): reference to foo
TRACE-NEXT: <internal>: reference to foo
;; The definition of "foo" is visible outside the LTO result.
TRACE-NEXT: lto.tmp: definition of foo