[lld] Add archive file support to Mach-O backend

With this change, basic archive files can be linked together. Input
section discovery has been refactored into a function since archive
files lazily resolve their symbols / the object files containing those
symbols.

Reviewed By: int3, smeenai

Differential Revision: https://reviews.llvm.org/D78342
This commit is contained in:
Kellie Medlin
2020-05-14 12:43:51 -07:00
committed by Jez Ng
parent 920ff806d4
commit 2b920ae78c
11 changed files with 195 additions and 0 deletions

View File

@@ -26,6 +26,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/BinaryFormat/MachO.h"
#include "llvm/BinaryFormat/Magic.h"
#include "llvm/Object/Archive.h"
#include "llvm/Option/ArgList.h"
#include "llvm/Option/Option.h"
#include "llvm/Support/MemoryBuffer.h"
@@ -105,6 +106,16 @@ static void addFile(StringRef path) {
MemoryBufferRef mbref = *buffer;
switch (identify_magic(mbref.getBuffer())) {
case file_magic::archive: {
std::unique_ptr<object::Archive> file = CHECK(
object::Archive::create(mbref), path + ": failed to parse archive");
if (!file->isEmpty() && !file->hasSymbolTable())
error(path + ": archive has no index; run ranlib to add one");
inputFiles.push_back(make<ArchiveFile>(std::move(file)));
break;
}
case file_magic::macho_object:
inputFiles.push_back(make<ObjFile>(mbref));
break;

View File

@@ -302,6 +302,30 @@ DylibFile *DylibFile::createLibSystemMock() {
return file;
}
ArchiveFile::ArchiveFile(std::unique_ptr<llvm::object::Archive> &&f)
: InputFile(ArchiveKind, f->getMemoryBufferRef()), file(std::move(f)) {
for (const object::Archive::Symbol &sym : file->symbols())
symtab->addLazy(sym.getName(), this, sym);
}
void ArchiveFile::fetch(const object::Archive::Symbol &sym) {
object::Archive::Child c =
CHECK(sym.getMember(), toString(this) +
": could not get the member for symbol " +
sym.getName());
if (!seen.insert(c.getChildOffset()).second)
return;
MemoryBufferRef mb =
CHECK(c.getMemoryBufferRef(),
toString(this) +
": could not get the buffer for the member defining symbol " +
sym.getName());
auto file = make<ObjFile>(mb);
sections.insert(sections.end(), file->sections.begin(), file->sections.end());
}
// Returns "<internal>" or "baz.o".
std::string lld::toString(const InputFile *file) {
return file ? std::string(file->getName()) : "<internal>";

View File

@@ -28,6 +28,7 @@ public:
enum Kind {
ObjKind,
DylibKind,
ArchiveKind,
};
virtual ~InputFile() = default;
@@ -81,6 +82,20 @@ public:
std::vector<DylibFile *> reexported;
};
// .a file
class ArchiveFile : public InputFile {
public:
explicit ArchiveFile(std::unique_ptr<llvm::object::Archive> &&file);
static bool classof(const InputFile *f) { return f->kind() == ArchiveKind; }
void fetch(const llvm::object::Archive::Symbol &sym);
private:
std::unique_ptr<llvm::object::Archive> file;
// Keep track of children fetched from the archive by tracking
// which address offsets have been fetched already.
llvm::DenseSet<uint64_t> seen;
};
extern std::vector<InputFile *> inputFiles;
llvm::Optional<MemoryBufferRef> readFile(StringRef path);

View File

@@ -56,6 +56,8 @@ Symbol *SymbolTable::addUndefined(StringRef name) {
if (wasInserted)
replaceSymbol<Undefined>(s, name);
else if (LazySymbol *lazy = dyn_cast<LazySymbol>(s))
lazy->fetchArchiveMember();
return s;
}
@@ -69,4 +71,17 @@ Symbol *SymbolTable::addDylib(StringRef name, DylibFile *file) {
return s;
}
Symbol *SymbolTable::addLazy(StringRef name, ArchiveFile *file,
const llvm::object::Archive::Symbol &sym) {
Symbol *s;
bool wasInserted;
std::tie(s, wasInserted) = insert(name);
if (wasInserted)
replaceSymbol<LazySymbol>(s, file, sym);
else if (isa<Undefined>(s))
file->fetch(sym);
return s;
}
SymbolTable *macho::symtab;

View File

@@ -30,6 +30,9 @@ public:
Symbol *addDylib(StringRef name, DylibFile *file);
Symbol *addLazy(StringRef name, ArchiveFile *file,
const llvm::object::Archive::Symbol &sym);
ArrayRef<Symbol *> getSymbols() const { return symVector; }
Symbol *find(StringRef name);

View File

@@ -15,6 +15,8 @@ using namespace llvm;
using namespace lld;
using namespace lld::macho;
void LazySymbol::fetchArchiveMember() { file->fetch(sym); }
// Returns a symbol for an error message.
std::string lld::toString(const Symbol &sym) {
if (Optional<std::string> s = demangleItanium(sym.getName()))

View File

@@ -35,6 +35,7 @@ public:
DefinedKind,
UndefinedKind,
DylibKind,
LazyKind,
};
Kind kind() const { return static_cast<Kind>(symbolKind); }
@@ -81,6 +82,20 @@ public:
uint32_t lazyBindOffset = UINT32_MAX;
};
class LazySymbol : public Symbol {
public:
LazySymbol(ArchiveFile *file, const llvm::object::Archive::Symbol &sym)
: Symbol(LazyKind, sym.getName()), file(file), sym(sym) {}
static bool classof(const Symbol *s) { return s->kind() == LazyKind; }
void fetchArchiveMember();
private:
ArchiveFile *file;
const llvm::object::Archive::Symbol sym;
};
inline uint64_t Symbol::getVA() const {
if (auto *d = dyn_cast<Defined>(this))
return d->isec->getVA() + d->value;
@@ -91,6 +106,7 @@ union SymbolUnion {
alignas(Defined) char a[sizeof(Defined)];
alignas(Undefined) char b[sizeof(Undefined)];
alignas(DylibSymbol) char c[sizeof(DylibSymbol)];
alignas(LazySymbol) char d[sizeof(LazySymbol)];
};
template <typename T, typename... ArgT>

35
lld/test/MachO/archive.s Normal file
View File

@@ -0,0 +1,35 @@
# REQUIRES: x86
# RUN: mkdir -p %t
# RUN: echo ".global _boo; _boo: ret" | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/2.o
# RUN: echo ".global _bar; _bar: ret" | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/3.o
# RUN: echo ".global _undefined; .global _unused; _unused: ret" | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/4.o
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t/main.o
# RUN: rm -f %t/test.a
# RUN: llvm-ar rcs %t/test.a %t/2.o %t/3.o %t/4.o
# RUN: lld -flavor darwinnew %t/main.o %t/test.a -o %t/test.out
## TODO: Run llvm-nm -p to validate symbol order
# RUN: llvm-nm %t/test.out | FileCheck %s
# CHECK: T _bar
# CHECK: T _boo
# CHECK: T _main
## Linking with the archive first in the command line shouldn't change anything
# RUN: lld -flavor darwinnew %t/test.a %t/main.o -o %t/test.out
# RUN: llvm-nm %t/test.out | FileCheck %s --check-prefix ARCHIVE-FIRST
# ARCHIVE-FIRST: T _bar
# ARCHIVE-FIRST: T _boo
# ARCHIVE-FIRST: T _main
# RUN: llvm-nm %t/test.out | FileCheck %s --check-prefix VISIBLE
# VISIBLE-NOT: T _undefined
# VISIBLE-NOT: T _unused
.global _main
_main:
callq _boo
callq _bar
mov $0, %rax
ret

View File

@@ -0,0 +1,17 @@
# REQUIRES: x86
# RUN: mkdir -p %t
# RUN: echo ".global _boo; _boo: ret" | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/2.o
# RUN: echo ".global _bar; _bar: ret" | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/3.o
# RUN: echo ".global _undefined; .global _unused; _unused: ret" | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/4.o
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t/main.o
# RUN: rm -f %t/test.a
# RUN: llvm-ar rcS %t/test.a %t/2.o %t/3.o %t/4.o
# RUN: not lld -flavor darwinnew %t/test.o %t/test.a -o /dev/null 2>&1 | FileCheck %s
# CHECK: error: {{.*}}.a: archive has no index; run ranlib to add one
.global _main
_main:
mov $0, %rax
ret

View File

@@ -0,0 +1,11 @@
# REQUIRES: x86
# RUN: echo "!<arch>" > %t.a
# RUN: echo "foo" >> %t.a
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o
# RUN: not lld -flavor darwinnew %t.o %t.a -o /dev/null 2>&1 | FileCheck -DFILE=%t.a %s
# CHECK: error: [[FILE]]: failed to parse archive: truncated or malformed archive (remaining size of archive too small for next archive member header at offset 8)
.global _main
_main:
ret

View File

@@ -0,0 +1,46 @@
# REQUIRES: x86
# RUN: mkdir -p %t
# RUN: echo ".global f, g; .section __TEXT,test_g; g: callq f" | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/g.o
# RUN: echo ".global f; .section __TEXT,test_f1; f: ret" | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/f1.o
# RUN: echo ".global f; .section __TEXT,test_f2; f: ret" | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/f2.o
# RUN: echo ".global f, g; .section __TEXT,test_fg; f: ret; g: callq f" | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/fg.o
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t/test.o
# RUN: lld -flavor darwinnew -dylib -o %t/libf1.dylib %t/f1.o
# RUN: rm -f %t/libf2_g.a
# RUN: llvm-ar rcs %t/libf2_g.a %t/f2.o %t/g.o
# RUN: rm -f %t/libfg.a
# RUN: llvm-ar rcs %t/libfg.a %t/fg.o
# RUN: lld -flavor darwinnew %t/libf1.dylib %t/libf2_g.a %t/test.o -o %t/test.out
# RUN: llvm-objdump --syms --macho --lazy-bind %t/test.out | FileCheck %s --check-prefix DYLIB-FIRST
# DYLIB-FIRST: SYMBOL TABLE:
# DYLIB-FIRST-DAG: __TEXT,test_g g
# DYLIB-FIRST: Lazy bind table:
# DYLIB-FIRST-NEXT: segment section address dylib symbol
# DYLIB-FIRST-NEXT: __DATA __la_symbol_ptr {{[0-9a-z]+}} libf1 f
# RUN: lld -flavor darwinnew %t/libf2_g.a %t/libf1.dylib %t/test.o -o %t/test.out
# RUN: llvm-objdump --syms --macho --lazy-bind %t/test.out | FileCheck %s --check-prefix ARCHIVE-FIRST
# ARCHIVE-FIRST: SYMBOL TABLE:
# ARCHIVE-FIRST-DAG: __TEXT,test_f2 f
# ARCHIVE-FIRST-DAG: __TEXT,test_g g
# ARCHIVE-FIRST: Lazy bind table:
# ARCHIVE-FIRST-NEXT: segment section address dylib symbol
# ARCHIVE-FIRST-EMPTY:
# RUN: lld -flavor darwinnew %t/libf1.dylib %t/libfg.a %t/test.o -o %t/test.out
# RUN: llvm-objdump --syms --macho --lazy-bind %t/test.out | FileCheck %s --check-prefix ARCHIVE-PRIORITY
# ARCHIVE-PRIORITY: SYMBOL TABLE:
# ARCHIVE-PRIORITY-DAG: __TEXT,test_fg f
# ARCHIVE-PRIORITY-DAG: __TEXT,test_fg g
# ARCHIVE-PRIORITY: Lazy bind table:
# ARCHIVE-PRIORITY-NEXT: segment section address dylib symbol
# ARCHIVE-PRIORITY-EMPTY:
.global g
.global _main
_main:
callq g
ret