[LLD][COFF] Check both mangled and demangled symbols before adding a lazy archive symbol to the symbol table on ARM64EC (#113284)

On ARM64EC, a function symbol may appear in both mangled and demangled
forms:
- ARM64EC archives contain only the mangled name, while the demangled
symbol is defined by the object file as an alias.
- x86_64 archives contain only the demangled name (the mangled name is
usually defined by an object referencing the symbol as an alias to a
guess exit thunk).
- ARM64EC import files contain both the mangled and demangled names for
thunks.

If more than one archive defines the same function, this could lead to
different libraries being used for the same function depending on how
they are referenced. Avoid this by checking if the paired symbol is
already defined before adding a symbol to the table.
This commit is contained in:
Jacek Caban
2024-10-23 13:10:07 +02:00
committed by GitHub
parent 7ab6d39a4d
commit 98bc5295ec
2 changed files with 62 additions and 0 deletions

View File

@@ -18,6 +18,7 @@
#include "lld/Common/Timer.h"
#include "llvm/DebugInfo/DIContext.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Mangler.h"
#include "llvm/LTO/LTO.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Parallel.h"
@@ -631,8 +632,47 @@ Symbol *SymbolTable::addUndefined(StringRef name, InputFile *f,
return s;
}
// On ARM64EC, a function symbol may appear in both mangled and demangled forms:
// - ARM64EC archives contain only the mangled name, while the demangled symbol
// is defined by the object file as an alias.
// - x86_64 archives contain only the demangled name (the mangled name is
// usually defined by an object referencing the symbol as an alias to a guess
// exit thunk).
// - ARM64EC import files contain both the mangled and demangled names for
// thunks.
// If more than one archive defines the same function, this could lead
// to different libraries being used for the same function depending on how they
// are referenced. Avoid this by checking if the paired symbol is already
// defined before adding a symbol to the table.
template <typename T>
bool checkLazyECPair(SymbolTable *symtab, StringRef name, InputFile *f) {
if (name.starts_with("__imp_"))
return true;
std::string pairName;
if (std::optional<std::string> mangledName =
getArm64ECMangledFunctionName(name))
pairName = std::move(*mangledName);
else
pairName = *getArm64ECDemangledFunctionName(name);
Symbol *sym = symtab->find(pairName);
if (!sym)
return true;
if (sym->pendingArchiveLoad)
return false;
if (auto u = dyn_cast<Undefined>(sym))
return !u->weakAlias || u->isAntiDep;
// If the symbol is lazy, allow it only if it originates from the same
// archive.
auto lazy = dyn_cast<T>(sym);
return lazy && lazy->file == f;
}
void SymbolTable::addLazyArchive(ArchiveFile *f, const Archive::Symbol &sym) {
StringRef name = sym.getName();
if (isArm64EC(ctx.config.machine) &&
!checkLazyECPair<LazyArchive>(this, name, f))
return;
auto [s, wasInserted] = insert(name);
if (wasInserted) {
replaceSymbol<LazyArchive>(s, f, sym);
@@ -648,6 +688,8 @@ void SymbolTable::addLazyArchive(ArchiveFile *f, const Archive::Symbol &sym) {
void SymbolTable::addLazyObject(InputFile *f, StringRef n) {
assert(f->lazy);
if (isArm64EC(ctx.config.machine) && !checkLazyECPair<LazyObject>(this, n, f))
return;
auto [s, wasInserted] = insert(n, f);
if (wasInserted) {
replaceSymbol<LazyObject>(s, f, n);

View File

@@ -17,6 +17,7 @@ RUN: llvm-lib -machine:arm64ec -out:sym-arm64ec.lib sym-arm64ec.obj nsym-aarch64
RUN: llvm-lib -machine:amd64 -out:sym-x86_64.lib sym-x86_64.obj
RUN: llvm-lib -machine:arm64ec -out:func.lib func.obj
RUN: llvm-lib -machine:arm64ec -out:func-x86_64.lib func-x86_64.obj
RUN: llvm-lib -machine:arm64ec -out:func-imp.lib -def:func.def
Verify that a symbol can be referenced from ECSYMBOLS.
RUN: lld-link -machine:arm64ec -dll -noentry -out:test.dll symref-arm64ec.obj sym-arm64ec.lib loadconfig-arm64ec.obj
@@ -57,6 +58,15 @@ RUN: lld-link -machine:arm64ec -dll -noentry -out:ref-thunk-2.dll func.lib ref-t
RUN: llvm-objdump -d ref-thunk-2.dll | FileCheck -check-prefix=DISASM %s
RUN: llvm-readobj --hex-dump=.test ref-thunk-2.dll | FileCheck -check-prefix=TESTSEC %s
Pass multiple libraries containing `func` with different manglings and ensure they don't conflict with each other.
RUN: lld-link -machine:arm64ec -dll -noentry -out:ref-thunk-3.dll func.lib loadconfig-arm64ec.obj func-x86_64.lib func-imp.lib ref-thunk.obj
RUN: llvm-objdump -d ref-thunk-3.dll | FileCheck -check-prefix=DISASM %s
RUN: llvm-readobj --hex-dump=.test ref-thunk-3.dll | FileCheck -check-prefix=TESTSEC %s
RUN: lld-link -machine:arm64ec -dll -noentry -out:ref-thunk-4.dll ref-thunk.obj func.lib loadconfig-arm64ec.obj func-x86_64.lib func-imp.lib
RUN: llvm-objdump -d ref-thunk-4.dll | FileCheck -check-prefix=DISASM %s
RUN: llvm-readobj --hex-dump=.test ref-thunk-4.dll | FileCheck -check-prefix=TESTSEC %s
Test linking against an x86_64 library (which uses a demangled function name).
RUN: lld-link -machine:arm64ec -dll -noentry -out:ref-x86-1.dll ref-thunk.obj func-x86_64.lib loadconfig-arm64ec.obj
RUN: llvm-objdump -d ref-x86-1.dll | FileCheck -check-prefix=DISASM-X86 %s
@@ -80,6 +90,11 @@ RUN: lld-link -machine:arm64ec -dll -noentry -out:start-lib-1.dll ref-thunk.obj
RUN: llvm-objdump -d start-lib-1.dll | FileCheck -check-prefix=DISASM %s
RUN: llvm-readobj --hex-dump=.test start-lib-1.dll | FileCheck -check-prefix=TESTSEC %s
RUN: lld-link -machine:arm64ec -dll -noentry -out:start-lib-2.dll ref-thunk.obj -start-lib func.obj -end-lib loadconfig-arm64ec.obj \
RUN: -start-lib func-x86_64.obj -end-lib func-imp.lib
RUN: llvm-objdump -d ref-thunk-3.dll | FileCheck -check-prefix=DISASM %s
RUN: llvm-readobj --hex-dump=.test ref-thunk-3.dll | FileCheck -check-prefix=TESTSEC %s
#--- symref.s
.data
.rva sym
@@ -135,3 +150,8 @@ thunksym:
.globl func
func:
ret
#--- func.def
LIBRARY func.dll
EXPORTS
func