Files
llvm/lld/COFF/SymbolTable.cpp

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

1355 lines
45 KiB
C++
Raw Normal View History

//===- SymbolTable.cpp ----------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "SymbolTable.h"
#include "COFFLinkerContext.h"
#include "Config.h"
#include "Driver.h"
#include "LTO.h"
#include "PDB.h"
#include "Symbols.h"
#include "lld/Common/ErrorHandler.h"
#include "lld/Common/Memory.h"
#include "lld/Common/Timer.h"
#include "llvm/DebugInfo/DIContext.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Mangler.h"
#include "llvm/LTO/LTO.h"
#include "llvm/Object/COFFModuleDefinition.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/GlobPattern.h"
#include "llvm/Support/Parallel.h"
#include "llvm/Support/TimeProfiler.h"
#include "llvm/Support/raw_ostream.h"
#include <utility>
using namespace llvm;
using namespace llvm::COFF;
using namespace llvm::object;
using namespace llvm::support;
namespace lld::coff {
StringRef ltrim1(StringRef s, const char *chars) {
if (!s.empty() && strchr(chars, s[0]))
return s.substr(1);
return s;
}
static COFFSyncStream errorOrWarn(COFFLinkerContext &ctx) {
return {ctx, ctx.config.forceUnresolved ? DiagLevel::Warn : DiagLevel::Err};
}
// Causes the file associated with a lazy symbol to be linked in.
static void forceLazy(Symbol *s) {
s->pendingArchiveLoad = true;
switch (s->kind()) {
case Symbol::Kind::LazyArchiveKind: {
auto *l = cast<LazyArchive>(s);
l->file->addMember(l->sym);
break;
}
case Symbol::Kind::LazyObjectKind: {
InputFile *file = cast<LazyObject>(s)->file;
// FIXME: Remove this once we resolve all defineds before all undefineds in
// ObjFile::initializeSymbols().
if (!file->lazy)
return;
file->lazy = false;
file->symtab.ctx.driver.addFile(file);
break;
}
[LLD] [COFF] Support linking directly against DLLs in MinGW mode GNU ld.bfd supports linking directly against DLLs without using an import library, and some projects have picked up on this habit. (There's no one single unsurmountable issue with using import libraries, but this is a regularly surfacing missing feature.) As long as one is linking by name (instead of by ordinal), the DLL export table contains most of the information needed. (One can inspect what section a symbol points at, to see if it's a function or data symbol. The practical implementation of this loops over all sections for each symbol, but as long as they're not very many, that should hopefully be tolerable performance wise.) One exception where the information in the DLL isn't entirely enough is on i386 with stdcall functions; depending on how they're done, the exported function name can be a plain undecorated name, while the import library would contain the full decorated symbol name. This issue is addressed separately in a different patch. This is implemented mimicing the structure of a regular import library, with one InputFile corresponding to the static archive that just adds lazy symbols, which then are fetched when they are needed. When such a symbol is fetched, we synthesize a coff_import_header structure in memory and create a regular ImportFile out of it. The implementation could be even smaller by just creating ImportFiles for every symbol available immediately, but that would have the drawback of actually ending up importing all symbols unless running with GC enabled (and mingw mode defaults to having it disabled for historical reasons). Differential Revision: https://reviews.llvm.org/D104530
2021-06-16 16:59:46 +03:00
case Symbol::Kind::LazyDLLSymbolKind: {
auto *l = cast<LazyDLLSymbol>(s);
l->file->makeImport(l->sym);
break;
}
default:
llvm_unreachable(
"symbol passed to forceLazy is not a LazyArchive or LazyObject");
}
}
// Returns the symbol in SC whose value is <= Addr that is closest to Addr.
// This is generally the global variable or function whose definition contains
// Addr.
static Symbol *getSymbol(SectionChunk *sc, uint32_t addr) {
DefinedRegular *candidate = nullptr;
for (Symbol *s : sc->file->getSymbols()) {
auto *d = dyn_cast_or_null<DefinedRegular>(s);
if (!d || !d->data || d->file != sc->file || d->getChunk() != sc ||
d->getValue() > addr ||
(candidate && d->getValue() < candidate->getValue()))
continue;
candidate = d;
}
return candidate;
}
static std::vector<std::string> getSymbolLocations(BitcodeFile *file) {
std::string res("\n>>> referenced by ");
StringRef source = file->obj->getSourceFileName();
if (!source.empty())
res += source.str() + "\n>>> ";
res += toString(file);
return {res};
}
static std::optional<std::pair<StringRef, uint32_t>>
getFileLineDwarf(const SectionChunk *c, uint32_t addr) {
std::optional<DILineInfo> optionalLineInfo =
c->file->getDILineInfo(addr, c->getSectionNumber() - 1);
if (!optionalLineInfo)
return std::nullopt;
const DILineInfo &lineInfo = *optionalLineInfo;
if (lineInfo.FileName == DILineInfo::BadString)
return std::nullopt;
return std::make_pair(saver().save(lineInfo.FileName), lineInfo.Line);
}
static std::optional<std::pair<StringRef, uint32_t>>
getFileLine(const SectionChunk *c, uint32_t addr) {
// MinGW can optionally use codeview, even if the default is dwarf.
std::optional<std::pair<StringRef, uint32_t>> fileLine =
getFileLineCodeView(c, addr);
// If codeview didn't yield any result, check dwarf in MinGW mode.
if (!fileLine && c->file->symtab.ctx.config.mingw)
fileLine = getFileLineDwarf(c, addr);
return fileLine;
}
// Given a file and the index of a symbol in that file, returns a description
// of all references to that symbol from that file. If no debug information is
// available, returns just the name of the file, else one string per actual
// reference as described in the debug info.
// Returns up to maxStrings string descriptions, along with the total number of
// locations found.
static std::pair<std::vector<std::string>, size_t>
getSymbolLocations(ObjFile *file, uint32_t symIndex, size_t maxStrings) {
struct Location {
Symbol *sym;
std::pair<StringRef, uint32_t> fileLine;
};
std::vector<Location> locations;
size_t numLocations = 0;
for (Chunk *c : file->getChunks()) {
auto *sc = dyn_cast<SectionChunk>(c);
if (!sc)
continue;
for (const coff_relocation &r : sc->getRelocs()) {
if (r.SymbolTableIndex != symIndex)
continue;
numLocations++;
if (locations.size() >= maxStrings)
continue;
std::optional<std::pair<StringRef, uint32_t>> fileLine =
getFileLine(sc, r.VirtualAddress);
Symbol *sym = getSymbol(sc, r.VirtualAddress);
if (fileLine)
locations.push_back({sym, *fileLine});
else if (sym)
locations.push_back({sym, {"", 0}});
}
}
if (maxStrings == 0)
return std::make_pair(std::vector<std::string>(), numLocations);
if (numLocations == 0)
return std::make_pair(
std::vector<std::string>{"\n>>> referenced by " + toString(file)}, 1);
std::vector<std::string> symbolLocations(locations.size());
size_t i = 0;
for (Location loc : locations) {
llvm::raw_string_ostream os(symbolLocations[i++]);
os << "\n>>> referenced by ";
if (!loc.fileLine.first.empty())
os << loc.fileLine.first << ":" << loc.fileLine.second
<< "\n>>> ";
os << toString(file);
if (loc.sym)
os << ":(" << toString(file->symtab.ctx, *loc.sym) << ')';
}
return std::make_pair(symbolLocations, numLocations);
}
std::vector<std::string> getSymbolLocations(ObjFile *file, uint32_t symIndex) {
return getSymbolLocations(file, symIndex, SIZE_MAX).first;
}
static std::pair<std::vector<std::string>, size_t>
getSymbolLocations(InputFile *file, uint32_t symIndex, size_t maxStrings) {
if (auto *o = dyn_cast<ObjFile>(file))
return getSymbolLocations(o, symIndex, maxStrings);
if (auto *b = dyn_cast<BitcodeFile>(file)) {
std::vector<std::string> symbolLocations = getSymbolLocations(b);
size_t numLocations = symbolLocations.size();
if (symbolLocations.size() > maxStrings)
symbolLocations.resize(maxStrings);
return std::make_pair(symbolLocations, numLocations);
}
llvm_unreachable("unsupported file type passed to getSymbolLocations");
return std::make_pair(std::vector<std::string>(), (size_t)0);
}
// For an undefined symbol, stores all files referencing it and the index of
// the undefined symbol in each file.
struct UndefinedDiag {
Symbol *sym;
struct File {
InputFile *file;
uint32_t symIndex;
};
std::vector<File> files;
};
static void reportUndefinedSymbol(COFFLinkerContext &ctx,
const UndefinedDiag &undefDiag) {
2024-12-05 20:56:19 -08:00
auto diag = errorOrWarn(ctx);
diag << "undefined symbol: " << undefDiag.sym;
const size_t maxUndefReferences = 3;
size_t numDisplayedRefs = 0, numRefs = 0;
for (const UndefinedDiag::File &ref : undefDiag.files) {
auto [symbolLocations, totalLocations] = getSymbolLocations(
ref.file, ref.symIndex, maxUndefReferences - numDisplayedRefs);
numRefs += totalLocations;
numDisplayedRefs += symbolLocations.size();
2024-12-05 20:56:19 -08:00
for (const std::string &s : symbolLocations)
diag << s;
}
if (numDisplayedRefs < numRefs)
2024-12-05 20:56:19 -08:00
diag << "\n>>> referenced " << numRefs - numDisplayedRefs << " more times";
}
void SymbolTable::loadMinGWSymbols() {
[COFF] Support MinGW automatic dllimport of data Normally, in order to reference exported data symbols from a different DLL, the declarations need to have the dllimport attribute, in order to use the __imp_<var> symbol (which contains an address to the actual variable) instead of the variable itself directly. This isn't an issue in the same way for functions, since any reference to the function without the dllimport attribute will end up as a reference to a thunk which loads the actual target function from the import address table (IAT). GNU ld, in MinGW environments, supports automatically importing data symbols from DLLs, even if the references didn't have the appropriate dllimport attribute. Since the PE/COFF format doesn't support the kind of relocations that this would require, the MinGW's CRT startup code has an custom framework of their own for manually fixing the missing relocations once module is loaded and the target addresses in the IAT are known. For this to work, the linker (originall in GNU ld) creates a list of remaining references needing fixup, which the runtime processes on startup before handing over control to user code. While this feature is rather controversial, it's one of the main features allowing unix style libraries to be used on windows without any extra porting effort. Some sort of automatic fixing of data imports is also necessary for the itanium C++ ABI on windows (as clang implements it right now) for importing vtable pointers in certain cases, see D43184 for some discussion on that. The runtime pseudo relocation handler supports 8/16/32/64 bit addresses, either PC relative references (like IMAGE_REL_*_REL32*) or absolute references (IMAGE_REL_AMD64_ADDR32, IMAGE_REL_AMD64_ADDR32, IMAGE_REL_I386_DIR32). On linking, the relocation is handled as a relocation against the corresponding IAT slot. For the absolute references, a normal base relocation is created, to update the embedded address in case the image is loaded at a different address. The list of runtime pseudo relocations contains the RVA of the imported symbol (the IAT slot), the RVA of the location the relocation should be applied to, and a size of the memory location. When the relocations are fixed at runtime, the difference between the actual IAT slot value and the IAT slot address is added to the reference, doing the right thing for both absolute and relative references. With this patch alone, things work fine for i386 binaries, and mostly for x86_64 binaries, with feature parity with GNU ld. Despite this, there are a few gotchas: - References to data from within code works fine on both x86 architectures, since their relocations consist of plain 32 or 64 bit absolute/relative references. On ARM and AArch64, references to data doesn't consist of a plain 32 or 64 bit embedded address or offset in the code. On ARMNT, it's usually a MOVW+MOVT instruction pair represented by a IMAGE_REL_ARM_MOV32T relocation, each instruction containing 16 bit of the target address), on AArch64, it's usually an ADRP+ADD/LDR/STR instruction pair with an even more complex encoding, storing a PC relative address (with a range of +/- 4 GB). This could theoretically be remedied by extending the runtime pseudo relocation handler with new relocation types, to support these instruction encodings. This isn't an issue for GCC/GNU ld since they don't support windows on ARMNT/AArch64. - For x86_64, if references in code are encoded as 32 bit PC relative offsets, the runtime relocation will fail if the target turns out to be out of range for a 32 bit offset. - Fixing up the relocations at runtime requires making sections writable if necessary, with the VirtualProtect function. In Windows Store/UWP apps, this function is forbidden. These limitations are addressed by a few later patches in lld and llvm. Differential Revision: https://reviews.llvm.org/D50917 llvm-svn: 340726
2018-08-27 08:43:31 +00:00
for (auto &i : symMap) {
Symbol *sym = i.second;
auto *undef = dyn_cast<Undefined>(sym);
if (!undef)
continue;
if (undef->getWeakAlias())
continue;
[COFF] Support MinGW automatic dllimport of data Normally, in order to reference exported data symbols from a different DLL, the declarations need to have the dllimport attribute, in order to use the __imp_<var> symbol (which contains an address to the actual variable) instead of the variable itself directly. This isn't an issue in the same way for functions, since any reference to the function without the dllimport attribute will end up as a reference to a thunk which loads the actual target function from the import address table (IAT). GNU ld, in MinGW environments, supports automatically importing data symbols from DLLs, even if the references didn't have the appropriate dllimport attribute. Since the PE/COFF format doesn't support the kind of relocations that this would require, the MinGW's CRT startup code has an custom framework of their own for manually fixing the missing relocations once module is loaded and the target addresses in the IAT are known. For this to work, the linker (originall in GNU ld) creates a list of remaining references needing fixup, which the runtime processes on startup before handing over control to user code. While this feature is rather controversial, it's one of the main features allowing unix style libraries to be used on windows without any extra porting effort. Some sort of automatic fixing of data imports is also necessary for the itanium C++ ABI on windows (as clang implements it right now) for importing vtable pointers in certain cases, see D43184 for some discussion on that. The runtime pseudo relocation handler supports 8/16/32/64 bit addresses, either PC relative references (like IMAGE_REL_*_REL32*) or absolute references (IMAGE_REL_AMD64_ADDR32, IMAGE_REL_AMD64_ADDR32, IMAGE_REL_I386_DIR32). On linking, the relocation is handled as a relocation against the corresponding IAT slot. For the absolute references, a normal base relocation is created, to update the embedded address in case the image is loaded at a different address. The list of runtime pseudo relocations contains the RVA of the imported symbol (the IAT slot), the RVA of the location the relocation should be applied to, and a size of the memory location. When the relocations are fixed at runtime, the difference between the actual IAT slot value and the IAT slot address is added to the reference, doing the right thing for both absolute and relative references. With this patch alone, things work fine for i386 binaries, and mostly for x86_64 binaries, with feature parity with GNU ld. Despite this, there are a few gotchas: - References to data from within code works fine on both x86 architectures, since their relocations consist of plain 32 or 64 bit absolute/relative references. On ARM and AArch64, references to data doesn't consist of a plain 32 or 64 bit embedded address or offset in the code. On ARMNT, it's usually a MOVW+MOVT instruction pair represented by a IMAGE_REL_ARM_MOV32T relocation, each instruction containing 16 bit of the target address), on AArch64, it's usually an ADRP+ADD/LDR/STR instruction pair with an even more complex encoding, storing a PC relative address (with a range of +/- 4 GB). This could theoretically be remedied by extending the runtime pseudo relocation handler with new relocation types, to support these instruction encodings. This isn't an issue for GCC/GNU ld since they don't support windows on ARMNT/AArch64. - For x86_64, if references in code are encoded as 32 bit PC relative offsets, the runtime relocation will fail if the target turns out to be out of range for a 32 bit offset. - Fixing up the relocations at runtime requires making sections writable if necessary, with the VirtualProtect function. In Windows Store/UWP apps, this function is forbidden. These limitations are addressed by a few later patches in lld and llvm. Differential Revision: https://reviews.llvm.org/D50917 llvm-svn: 340726
2018-08-27 08:43:31 +00:00
StringRef name = undef->getName();
if (machine == I386 && ctx.config.stdcallFixup) {
// Check if we can resolve an undefined decorated symbol by finding
2021-10-12 17:03:22 -04:00
// the intended target as an undecorated symbol (only with a leading
// underscore).
StringRef origName = name;
StringRef baseName = name;
// Trim down stdcall/fastcall/vectorcall symbols to the base name.
baseName = ltrim1(baseName, "_@");
baseName = baseName.substr(0, baseName.find('@'));
// Add a leading underscore, as it would be in cdecl form.
std::string newName = ("_" + baseName).str();
Symbol *l;
if (newName != origName && (l = find(newName)) != nullptr) {
// If we found a symbol and it is lazy; load it.
if (l->isLazy() && !l->pendingArchiveLoad) {
2024-12-04 09:04:40 -08:00
Log(ctx) << "Loading lazy " << l->getName() << " from "
<< l->getFile()->getName() << " for stdcall fixup";
forceLazy(l);
}
// If it's lazy or already defined, hook it up as weak alias.
if (l->isLazy() || isa<Defined>(l)) {
if (ctx.config.warnStdcallFixup)
Warn(ctx) << "Resolving " << origName << " by linking to "
<< newName;
else
2024-12-04 09:04:40 -08:00
Log(ctx) << "Resolving " << origName << " by linking to "
<< newName;
undef->setWeakAlias(l);
continue;
}
}
}
[COFF] Support MinGW automatic dllimport of data Normally, in order to reference exported data symbols from a different DLL, the declarations need to have the dllimport attribute, in order to use the __imp_<var> symbol (which contains an address to the actual variable) instead of the variable itself directly. This isn't an issue in the same way for functions, since any reference to the function without the dllimport attribute will end up as a reference to a thunk which loads the actual target function from the import address table (IAT). GNU ld, in MinGW environments, supports automatically importing data symbols from DLLs, even if the references didn't have the appropriate dllimport attribute. Since the PE/COFF format doesn't support the kind of relocations that this would require, the MinGW's CRT startup code has an custom framework of their own for manually fixing the missing relocations once module is loaded and the target addresses in the IAT are known. For this to work, the linker (originall in GNU ld) creates a list of remaining references needing fixup, which the runtime processes on startup before handing over control to user code. While this feature is rather controversial, it's one of the main features allowing unix style libraries to be used on windows without any extra porting effort. Some sort of automatic fixing of data imports is also necessary for the itanium C++ ABI on windows (as clang implements it right now) for importing vtable pointers in certain cases, see D43184 for some discussion on that. The runtime pseudo relocation handler supports 8/16/32/64 bit addresses, either PC relative references (like IMAGE_REL_*_REL32*) or absolute references (IMAGE_REL_AMD64_ADDR32, IMAGE_REL_AMD64_ADDR32, IMAGE_REL_I386_DIR32). On linking, the relocation is handled as a relocation against the corresponding IAT slot. For the absolute references, a normal base relocation is created, to update the embedded address in case the image is loaded at a different address. The list of runtime pseudo relocations contains the RVA of the imported symbol (the IAT slot), the RVA of the location the relocation should be applied to, and a size of the memory location. When the relocations are fixed at runtime, the difference between the actual IAT slot value and the IAT slot address is added to the reference, doing the right thing for both absolute and relative references. With this patch alone, things work fine for i386 binaries, and mostly for x86_64 binaries, with feature parity with GNU ld. Despite this, there are a few gotchas: - References to data from within code works fine on both x86 architectures, since their relocations consist of plain 32 or 64 bit absolute/relative references. On ARM and AArch64, references to data doesn't consist of a plain 32 or 64 bit embedded address or offset in the code. On ARMNT, it's usually a MOVW+MOVT instruction pair represented by a IMAGE_REL_ARM_MOV32T relocation, each instruction containing 16 bit of the target address), on AArch64, it's usually an ADRP+ADD/LDR/STR instruction pair with an even more complex encoding, storing a PC relative address (with a range of +/- 4 GB). This could theoretically be remedied by extending the runtime pseudo relocation handler with new relocation types, to support these instruction encodings. This isn't an issue for GCC/GNU ld since they don't support windows on ARMNT/AArch64. - For x86_64, if references in code are encoded as 32 bit PC relative offsets, the runtime relocation will fail if the target turns out to be out of range for a 32 bit offset. - Fixing up the relocations at runtime requires making sections writable if necessary, with the VirtualProtect function. In Windows Store/UWP apps, this function is forbidden. These limitations are addressed by a few later patches in lld and llvm. Differential Revision: https://reviews.llvm.org/D50917 llvm-svn: 340726
2018-08-27 08:43:31 +00:00
if (ctx.config.autoImport) {
if (name.starts_with("__imp_"))
continue;
// If we have an undefined symbol, but we have a lazy symbol we could
// load, load it.
Symbol *l = find(("__imp_" + name).str());
if (!l || l->pendingArchiveLoad || !l->isLazy())
continue;
2024-12-04 09:04:40 -08:00
Log(ctx) << "Loading lazy " << l->getName() << " from "
<< l->getFile()->getName() << " for automatic import";
forceLazy(l);
}
[COFF] Support MinGW automatic dllimport of data Normally, in order to reference exported data symbols from a different DLL, the declarations need to have the dllimport attribute, in order to use the __imp_<var> symbol (which contains an address to the actual variable) instead of the variable itself directly. This isn't an issue in the same way for functions, since any reference to the function without the dllimport attribute will end up as a reference to a thunk which loads the actual target function from the import address table (IAT). GNU ld, in MinGW environments, supports automatically importing data symbols from DLLs, even if the references didn't have the appropriate dllimport attribute. Since the PE/COFF format doesn't support the kind of relocations that this would require, the MinGW's CRT startup code has an custom framework of their own for manually fixing the missing relocations once module is loaded and the target addresses in the IAT are known. For this to work, the linker (originall in GNU ld) creates a list of remaining references needing fixup, which the runtime processes on startup before handing over control to user code. While this feature is rather controversial, it's one of the main features allowing unix style libraries to be used on windows without any extra porting effort. Some sort of automatic fixing of data imports is also necessary for the itanium C++ ABI on windows (as clang implements it right now) for importing vtable pointers in certain cases, see D43184 for some discussion on that. The runtime pseudo relocation handler supports 8/16/32/64 bit addresses, either PC relative references (like IMAGE_REL_*_REL32*) or absolute references (IMAGE_REL_AMD64_ADDR32, IMAGE_REL_AMD64_ADDR32, IMAGE_REL_I386_DIR32). On linking, the relocation is handled as a relocation against the corresponding IAT slot. For the absolute references, a normal base relocation is created, to update the embedded address in case the image is loaded at a different address. The list of runtime pseudo relocations contains the RVA of the imported symbol (the IAT slot), the RVA of the location the relocation should be applied to, and a size of the memory location. When the relocations are fixed at runtime, the difference between the actual IAT slot value and the IAT slot address is added to the reference, doing the right thing for both absolute and relative references. With this patch alone, things work fine for i386 binaries, and mostly for x86_64 binaries, with feature parity with GNU ld. Despite this, there are a few gotchas: - References to data from within code works fine on both x86 architectures, since their relocations consist of plain 32 or 64 bit absolute/relative references. On ARM and AArch64, references to data doesn't consist of a plain 32 or 64 bit embedded address or offset in the code. On ARMNT, it's usually a MOVW+MOVT instruction pair represented by a IMAGE_REL_ARM_MOV32T relocation, each instruction containing 16 bit of the target address), on AArch64, it's usually an ADRP+ADD/LDR/STR instruction pair with an even more complex encoding, storing a PC relative address (with a range of +/- 4 GB). This could theoretically be remedied by extending the runtime pseudo relocation handler with new relocation types, to support these instruction encodings. This isn't an issue for GCC/GNU ld since they don't support windows on ARMNT/AArch64. - For x86_64, if references in code are encoded as 32 bit PC relative offsets, the runtime relocation will fail if the target turns out to be out of range for a 32 bit offset. - Fixing up the relocations at runtime requires making sections writable if necessary, with the VirtualProtect function. In Windows Store/UWP apps, this function is forbidden. These limitations are addressed by a few later patches in lld and llvm. Differential Revision: https://reviews.llvm.org/D50917 llvm-svn: 340726
2018-08-27 08:43:31 +00:00
}
}
Defined *SymbolTable::impSymbol(StringRef name) {
if (name.starts_with("__imp_"))
return nullptr;
return dyn_cast_or_null<Defined>(find(("__imp_" + name).str()));
}
bool SymbolTable::handleMinGWAutomaticImport(Symbol *sym, StringRef name) {
Defined *imp = impSymbol(name);
[COFF] Support MinGW automatic dllimport of data Normally, in order to reference exported data symbols from a different DLL, the declarations need to have the dllimport attribute, in order to use the __imp_<var> symbol (which contains an address to the actual variable) instead of the variable itself directly. This isn't an issue in the same way for functions, since any reference to the function without the dllimport attribute will end up as a reference to a thunk which loads the actual target function from the import address table (IAT). GNU ld, in MinGW environments, supports automatically importing data symbols from DLLs, even if the references didn't have the appropriate dllimport attribute. Since the PE/COFF format doesn't support the kind of relocations that this would require, the MinGW's CRT startup code has an custom framework of their own for manually fixing the missing relocations once module is loaded and the target addresses in the IAT are known. For this to work, the linker (originall in GNU ld) creates a list of remaining references needing fixup, which the runtime processes on startup before handing over control to user code. While this feature is rather controversial, it's one of the main features allowing unix style libraries to be used on windows without any extra porting effort. Some sort of automatic fixing of data imports is also necessary for the itanium C++ ABI on windows (as clang implements it right now) for importing vtable pointers in certain cases, see D43184 for some discussion on that. The runtime pseudo relocation handler supports 8/16/32/64 bit addresses, either PC relative references (like IMAGE_REL_*_REL32*) or absolute references (IMAGE_REL_AMD64_ADDR32, IMAGE_REL_AMD64_ADDR32, IMAGE_REL_I386_DIR32). On linking, the relocation is handled as a relocation against the corresponding IAT slot. For the absolute references, a normal base relocation is created, to update the embedded address in case the image is loaded at a different address. The list of runtime pseudo relocations contains the RVA of the imported symbol (the IAT slot), the RVA of the location the relocation should be applied to, and a size of the memory location. When the relocations are fixed at runtime, the difference between the actual IAT slot value and the IAT slot address is added to the reference, doing the right thing for both absolute and relative references. With this patch alone, things work fine for i386 binaries, and mostly for x86_64 binaries, with feature parity with GNU ld. Despite this, there are a few gotchas: - References to data from within code works fine on both x86 architectures, since their relocations consist of plain 32 or 64 bit absolute/relative references. On ARM and AArch64, references to data doesn't consist of a plain 32 or 64 bit embedded address or offset in the code. On ARMNT, it's usually a MOVW+MOVT instruction pair represented by a IMAGE_REL_ARM_MOV32T relocation, each instruction containing 16 bit of the target address), on AArch64, it's usually an ADRP+ADD/LDR/STR instruction pair with an even more complex encoding, storing a PC relative address (with a range of +/- 4 GB). This could theoretically be remedied by extending the runtime pseudo relocation handler with new relocation types, to support these instruction encodings. This isn't an issue for GCC/GNU ld since they don't support windows on ARMNT/AArch64. - For x86_64, if references in code are encoded as 32 bit PC relative offsets, the runtime relocation will fail if the target turns out to be out of range for a 32 bit offset. - Fixing up the relocations at runtime requires making sections writable if necessary, with the VirtualProtect function. In Windows Store/UWP apps, this function is forbidden. These limitations are addressed by a few later patches in lld and llvm. Differential Revision: https://reviews.llvm.org/D50917 llvm-svn: 340726
2018-08-27 08:43:31 +00:00
if (!imp)
return false;
// Replace the reference directly to a variable with a reference
// to the import address table instead. This obviously isn't right,
// but we mark the symbol as isRuntimePseudoReloc, and a later pass
[COFF] Support MinGW automatic dllimport of data Normally, in order to reference exported data symbols from a different DLL, the declarations need to have the dllimport attribute, in order to use the __imp_<var> symbol (which contains an address to the actual variable) instead of the variable itself directly. This isn't an issue in the same way for functions, since any reference to the function without the dllimport attribute will end up as a reference to a thunk which loads the actual target function from the import address table (IAT). GNU ld, in MinGW environments, supports automatically importing data symbols from DLLs, even if the references didn't have the appropriate dllimport attribute. Since the PE/COFF format doesn't support the kind of relocations that this would require, the MinGW's CRT startup code has an custom framework of their own for manually fixing the missing relocations once module is loaded and the target addresses in the IAT are known. For this to work, the linker (originall in GNU ld) creates a list of remaining references needing fixup, which the runtime processes on startup before handing over control to user code. While this feature is rather controversial, it's one of the main features allowing unix style libraries to be used on windows without any extra porting effort. Some sort of automatic fixing of data imports is also necessary for the itanium C++ ABI on windows (as clang implements it right now) for importing vtable pointers in certain cases, see D43184 for some discussion on that. The runtime pseudo relocation handler supports 8/16/32/64 bit addresses, either PC relative references (like IMAGE_REL_*_REL32*) or absolute references (IMAGE_REL_AMD64_ADDR32, IMAGE_REL_AMD64_ADDR32, IMAGE_REL_I386_DIR32). On linking, the relocation is handled as a relocation against the corresponding IAT slot. For the absolute references, a normal base relocation is created, to update the embedded address in case the image is loaded at a different address. The list of runtime pseudo relocations contains the RVA of the imported symbol (the IAT slot), the RVA of the location the relocation should be applied to, and a size of the memory location. When the relocations are fixed at runtime, the difference between the actual IAT slot value and the IAT slot address is added to the reference, doing the right thing for both absolute and relative references. With this patch alone, things work fine for i386 binaries, and mostly for x86_64 binaries, with feature parity with GNU ld. Despite this, there are a few gotchas: - References to data from within code works fine on both x86 architectures, since their relocations consist of plain 32 or 64 bit absolute/relative references. On ARM and AArch64, references to data doesn't consist of a plain 32 or 64 bit embedded address or offset in the code. On ARMNT, it's usually a MOVW+MOVT instruction pair represented by a IMAGE_REL_ARM_MOV32T relocation, each instruction containing 16 bit of the target address), on AArch64, it's usually an ADRP+ADD/LDR/STR instruction pair with an even more complex encoding, storing a PC relative address (with a range of +/- 4 GB). This could theoretically be remedied by extending the runtime pseudo relocation handler with new relocation types, to support these instruction encodings. This isn't an issue for GCC/GNU ld since they don't support windows on ARMNT/AArch64. - For x86_64, if references in code are encoded as 32 bit PC relative offsets, the runtime relocation will fail if the target turns out to be out of range for a 32 bit offset. - Fixing up the relocations at runtime requires making sections writable if necessary, with the VirtualProtect function. In Windows Store/UWP apps, this function is forbidden. These limitations are addressed by a few later patches in lld and llvm. Differential Revision: https://reviews.llvm.org/D50917 llvm-svn: 340726
2018-08-27 08:43:31 +00:00
// will add runtime pseudo relocations for every relocation against
// this Symbol. The runtime pseudo relocation framework expects the
// reference itself to point at the IAT entry.
size_t impSize = 0;
if (isa<DefinedImportData>(imp)) {
2024-12-04 09:04:40 -08:00
Log(ctx) << "Automatically importing " << name << " from "
<< cast<DefinedImportData>(imp)->getDLLName();
impSize = sizeof(DefinedImportData);
} else if (isa<DefinedRegular>(imp)) {
2024-12-04 09:04:40 -08:00
Log(ctx) << "Automatically importing " << name << " from "
<< toString(cast<DefinedRegular>(imp)->file);
impSize = sizeof(DefinedRegular);
} else {
Warn(ctx) << "unable to automatically import " << name << " from "
<< imp->getName() << " from " << cast<DefinedRegular>(imp)->file
<< "; unexpected symbol type";
return false;
}
sym->replaceKeepingName(imp, impSize);
sym->isRuntimePseudoReloc = true;
// There may exist symbols named .refptr.<name> which only consist
// of a single pointer to <name>. If it turns out <name> is
// automatically imported, we don't need to keep the .refptr.<name>
// pointer at all, but redirect all accesses to it to the IAT entry
// for __imp_<name> instead, and drop the whole .refptr.<name> chunk.
DefinedRegular *refptr =
dyn_cast_or_null<DefinedRegular>(find((".refptr." + name).str()));
if (refptr && refptr->getChunk()->getSize() == ctx.config.wordsize) {
SectionChunk *sc = dyn_cast_or_null<SectionChunk>(refptr->getChunk());
if (sc && sc->getRelocs().size() == 1 && *sc->symbols().begin() == sym) {
2024-12-04 09:04:40 -08:00
Log(ctx) << "Replacing .refptr." << name << " with " << imp->getName();
refptr->getChunk()->live = false;
refptr->replaceKeepingName(imp, impSize);
}
}
[COFF] Support MinGW automatic dllimport of data Normally, in order to reference exported data symbols from a different DLL, the declarations need to have the dllimport attribute, in order to use the __imp_<var> symbol (which contains an address to the actual variable) instead of the variable itself directly. This isn't an issue in the same way for functions, since any reference to the function without the dllimport attribute will end up as a reference to a thunk which loads the actual target function from the import address table (IAT). GNU ld, in MinGW environments, supports automatically importing data symbols from DLLs, even if the references didn't have the appropriate dllimport attribute. Since the PE/COFF format doesn't support the kind of relocations that this would require, the MinGW's CRT startup code has an custom framework of their own for manually fixing the missing relocations once module is loaded and the target addresses in the IAT are known. For this to work, the linker (originall in GNU ld) creates a list of remaining references needing fixup, which the runtime processes on startup before handing over control to user code. While this feature is rather controversial, it's one of the main features allowing unix style libraries to be used on windows without any extra porting effort. Some sort of automatic fixing of data imports is also necessary for the itanium C++ ABI on windows (as clang implements it right now) for importing vtable pointers in certain cases, see D43184 for some discussion on that. The runtime pseudo relocation handler supports 8/16/32/64 bit addresses, either PC relative references (like IMAGE_REL_*_REL32*) or absolute references (IMAGE_REL_AMD64_ADDR32, IMAGE_REL_AMD64_ADDR32, IMAGE_REL_I386_DIR32). On linking, the relocation is handled as a relocation against the corresponding IAT slot. For the absolute references, a normal base relocation is created, to update the embedded address in case the image is loaded at a different address. The list of runtime pseudo relocations contains the RVA of the imported symbol (the IAT slot), the RVA of the location the relocation should be applied to, and a size of the memory location. When the relocations are fixed at runtime, the difference between the actual IAT slot value and the IAT slot address is added to the reference, doing the right thing for both absolute and relative references. With this patch alone, things work fine for i386 binaries, and mostly for x86_64 binaries, with feature parity with GNU ld. Despite this, there are a few gotchas: - References to data from within code works fine on both x86 architectures, since their relocations consist of plain 32 or 64 bit absolute/relative references. On ARM and AArch64, references to data doesn't consist of a plain 32 or 64 bit embedded address or offset in the code. On ARMNT, it's usually a MOVW+MOVT instruction pair represented by a IMAGE_REL_ARM_MOV32T relocation, each instruction containing 16 bit of the target address), on AArch64, it's usually an ADRP+ADD/LDR/STR instruction pair with an even more complex encoding, storing a PC relative address (with a range of +/- 4 GB). This could theoretically be remedied by extending the runtime pseudo relocation handler with new relocation types, to support these instruction encodings. This isn't an issue for GCC/GNU ld since they don't support windows on ARMNT/AArch64. - For x86_64, if references in code are encoded as 32 bit PC relative offsets, the runtime relocation will fail if the target turns out to be out of range for a 32 bit offset. - Fixing up the relocations at runtime requires making sections writable if necessary, with the VirtualProtect function. In Windows Store/UWP apps, this function is forbidden. These limitations are addressed by a few later patches in lld and llvm. Differential Revision: https://reviews.llvm.org/D50917 llvm-svn: 340726
2018-08-27 08:43:31 +00:00
return true;
}
/// Helper function for reportUnresolvable and resolveRemainingUndefines.
/// This function emits an "undefined symbol" diagnostic for each symbol in
/// undefs. If localImports is not nullptr, it also emits a "locally
/// defined symbol imported" diagnostic for symbols in localImports.
/// objFiles and bitcodeFiles (if not nullptr) are used to report where
/// undefined symbols are referenced.
void SymbolTable::reportProblemSymbols(
const SmallPtrSetImpl<Symbol *> &undefs,
const DenseMap<Symbol *, Symbol *> *localImports, bool needBitcodeFiles) {
// Return early if there is nothing to report (which should be
// the common case).
if (undefs.empty() && (!localImports || localImports->empty()))
return;
for (Symbol *b : ctx.config.gcroot) {
if (undefs.count(b))
2024-12-05 20:56:19 -08:00
errorOrWarn(ctx) << "<root>: undefined symbol: " << b;
if (localImports)
if (Symbol *imp = localImports->lookup(b))
2024-12-05 20:56:19 -08:00
Warn(ctx) << "<root>: locally defined symbol imported: " << imp
<< " (defined in " << toString(imp->getFile())
<< ") [LNK4217]";
}
std::vector<UndefinedDiag> undefDiags;
DenseMap<Symbol *, int> firstDiag;
auto processFile = [&](InputFile *file, ArrayRef<Symbol *> symbols) {
uint32_t symIndex = (uint32_t)-1;
for (Symbol *sym : symbols) {
++symIndex;
if (!sym)
continue;
if (undefs.count(sym)) {
auto [it, inserted] = firstDiag.try_emplace(sym, undefDiags.size());
if (inserted)
undefDiags.push_back({sym, {{file, symIndex}}});
else
undefDiags[it->second].files.push_back({file, symIndex});
}
if (localImports)
if (Symbol *imp = localImports->lookup(sym))
2024-12-05 20:56:19 -08:00
Warn(ctx) << file << ": locally defined symbol imported: " << imp
<< " (defined in " << imp->getFile() << ") [LNK4217]";
}
};
for (ObjFile *file : ctx.objFileInstances)
processFile(file, file->getSymbols());
if (needBitcodeFiles)
for (BitcodeFile *file : bitcodeFileInstances)
processFile(file, file->getSymbols());
for (const UndefinedDiag &undefDiag : undefDiags)
reportUndefinedSymbol(ctx, undefDiag);
}
void SymbolTable::reportUnresolvable() {
SmallPtrSet<Symbol *, 8> undefs;
for (auto &i : symMap) {
Symbol *sym = i.second;
auto *undef = dyn_cast<Undefined>(sym);
if (!undef || sym->deferUndefined)
continue;
if (undef->getWeakAlias())
continue;
StringRef name = undef->getName();
if (name.starts_with("__imp_")) {
Symbol *imp = find(name.substr(strlen("__imp_")));
[LTO] [LLD] Don't alias the __imp_func and func symbol resolutions (#71376) Commit b963c0b658cc54b370832df4f5a3d63fd69da334 fixed LTO compilation of cases where one translation unit is calling a function with the dllimport attribute, and another translation unit provides this function locally within the same linked module (i.e. not actually dllimported); see https://github.com/llvm/llvm-project/issues/37453 or https://bugs.llvm.org/show_bug.cgi?id=38105 for full context. This was fixed by aliasing their GlobalResolution structs, for the `__imp_` prefixed and non prefixed symbols. I believe this fix to be wrong. This patch reverts that fix, and fixes the same issue differently, within LLD instead. The fix assumed that one can treat the `__imp_` prefixed and unprefixed symbols as equal, referencing SVN r240620 (d766653534e0cff702e42a43b44d3057b6094fea). However that referenced commit had mistaken how this logic works, which was corrected later in SVN r240622 (88e0f9206b4dccb56dee931adab08f89ff80525a); those symbols aren't direct aliases for each other - but if there's a need for the `__imp_` prefixed one and the other one exists, the `__imp_` prefixed one is created, as a pointer to the other one. However this fix only works if both translation units are compiled as LTO; if the caller is compiled as a regular object file and the callee is compiled as LTO, the fix fails, as the LTO compilation doesn't know that the unprefixed symbol is needed. The only level that knows of the potential relationship between the `__imp_` prefixed and unprefixed symbol, across regular and bitcode object files, is LLD itself. Therefore, revert the original fix from b963c0b658cc54b370832df4f5a3d63fd69da334, and fix the issue differently - when concluding that we can fulfill an undefined symbol starting with `__imp_`, mark the corresponding non prefixed symbol as used in a regular object for the LTO compilation, to make sure that this non prefixed symbol exists after the LTO compilation, to let LLD do the fixup of the local import. Extend the testcase to test a regular object file calling an LTO object file, which previously failed. This change also fixes another issue; an object file can provide both unprefixed and prefixed versions of the same symbol, like this: void importedFunc(void) { } void (*__imp_importedFunc)(void) = importedFunc; That allows the function to be called both with and without dllimport markings. (The concept of automatically resolving a reference to `__imp_func` to a locally defined `func` only is done in MSVC style linkers, but not in GNU ld, therefore MinGW mode code often uses this construct.) Previously, the aliasing of global resolutions at the LTO level would trigger a failed assert with "Multiple prevailing defs are not allowed" for this case, as both `importedFunc` and `__imp_importedFunc` could be prevailing. Add a case to the existing LLD test case lto-imp-prefix.ll to test this as well. This change (together with previous change in 3ab6209a3f93bdbeec8e9b9fcc00a9a4980915ff) completes LLD to work with mingw-w64-crt files (the base glue code for a mingw-w64 toolchain) built with LTO.
2023-11-21 15:06:00 +02:00
if (Defined *def = dyn_cast_or_null<Defined>(imp)) {
def->isUsedInRegularObj = true;
continue;
[LTO] [LLD] Don't alias the __imp_func and func symbol resolutions (#71376) Commit b963c0b658cc54b370832df4f5a3d63fd69da334 fixed LTO compilation of cases where one translation unit is calling a function with the dllimport attribute, and another translation unit provides this function locally within the same linked module (i.e. not actually dllimported); see https://github.com/llvm/llvm-project/issues/37453 or https://bugs.llvm.org/show_bug.cgi?id=38105 for full context. This was fixed by aliasing their GlobalResolution structs, for the `__imp_` prefixed and non prefixed symbols. I believe this fix to be wrong. This patch reverts that fix, and fixes the same issue differently, within LLD instead. The fix assumed that one can treat the `__imp_` prefixed and unprefixed symbols as equal, referencing SVN r240620 (d766653534e0cff702e42a43b44d3057b6094fea). However that referenced commit had mistaken how this logic works, which was corrected later in SVN r240622 (88e0f9206b4dccb56dee931adab08f89ff80525a); those symbols aren't direct aliases for each other - but if there's a need for the `__imp_` prefixed one and the other one exists, the `__imp_` prefixed one is created, as a pointer to the other one. However this fix only works if both translation units are compiled as LTO; if the caller is compiled as a regular object file and the callee is compiled as LTO, the fix fails, as the LTO compilation doesn't know that the unprefixed symbol is needed. The only level that knows of the potential relationship between the `__imp_` prefixed and unprefixed symbol, across regular and bitcode object files, is LLD itself. Therefore, revert the original fix from b963c0b658cc54b370832df4f5a3d63fd69da334, and fix the issue differently - when concluding that we can fulfill an undefined symbol starting with `__imp_`, mark the corresponding non prefixed symbol as used in a regular object for the LTO compilation, to make sure that this non prefixed symbol exists after the LTO compilation, to let LLD do the fixup of the local import. Extend the testcase to test a regular object file calling an LTO object file, which previously failed. This change also fixes another issue; an object file can provide both unprefixed and prefixed versions of the same symbol, like this: void importedFunc(void) { } void (*__imp_importedFunc)(void) = importedFunc; That allows the function to be called both with and without dllimport markings. (The concept of automatically resolving a reference to `__imp_func` to a locally defined `func` only is done in MSVC style linkers, but not in GNU ld, therefore MinGW mode code often uses this construct.) Previously, the aliasing of global resolutions at the LTO level would trigger a failed assert with "Multiple prevailing defs are not allowed" for this case, as both `importedFunc` and `__imp_importedFunc` could be prevailing. Add a case to the existing LLD test case lto-imp-prefix.ll to test this as well. This change (together with previous change in 3ab6209a3f93bdbeec8e9b9fcc00a9a4980915ff) completes LLD to work with mingw-w64-crt files (the base glue code for a mingw-w64 toolchain) built with LTO.
2023-11-21 15:06:00 +02:00
}
}
if (name.contains("_PchSym_"))
continue;
if (ctx.config.autoImport && impSymbol(name))
continue;
undefs.insert(sym);
}
reportProblemSymbols(undefs, /*localImports=*/nullptr, true);
}
bool SymbolTable::resolveRemainingUndefines() {
llvm::TimeTraceScope timeScope("Resolve remaining undefined symbols");
SmallPtrSet<Symbol *, 8> undefs;
DenseMap<Symbol *, Symbol *> localImports;
bool foundLazy = false;
for (auto &i : symMap) {
Symbol *sym = i.second;
auto *undef = dyn_cast<Undefined>(sym);
if (!undef)
continue;
if (!sym->isUsedInRegularObj)
continue;
StringRef name = undef->getName();
// A weak alias may have been resolved, so check for that.
if (undef->resolveWeakAlias())
continue;
// If we can resolve a symbol by removing __imp_ prefix, do that.
// This odd rule is for compatibility with MSVC linker.
if (name.starts_with("__imp_")) {
auto findLocalSym = [&](StringRef n) {
Symbol *sym = find(n);
if (auto undef = dyn_cast_or_null<Undefined>(sym)) {
// The unprefixed symbol might come later in symMap, so handle it now
// if needed.
if (!undef->resolveWeakAlias())
sym = nullptr;
}
return sym;
};
StringRef impName = name.substr(strlen("__imp_"));
Symbol *imp = findLocalSym(impName);
if (!imp && isEC()) {
// Try to use the mangled symbol on ARM64EC.
std::optional<std::string> mangledName =
getArm64ECMangledFunctionName(impName);
if (mangledName)
imp = findLocalSym(*mangledName);
if (!imp && impName.consume_front("aux_")) {
// If it's a __imp_aux_ symbol, try skipping the aux_ prefix.
imp = findLocalSym(impName);
if (!imp && (mangledName = getArm64ECMangledFunctionName(impName)))
imp = findLocalSym(*mangledName);
}
}
if (imp && imp->isLazy()) {
forceLazy(imp);
foundLazy = true;
continue;
}
if (imp && isa<Defined>(imp)) {
auto *d = cast<Defined>(imp);
replaceSymbol<DefinedLocalImport>(sym, ctx, name, d);
localImportChunks.push_back(cast<DefinedLocalImport>(sym)->getChunk());
localImports[sym] = d;
continue;
}
}
// We don't want to report missing Microsoft precompiled headers symbols.
// A proper message will be emitted instead in PDBLinker::aquirePrecompObj
if (name.contains("_PchSym_"))
continue;
if (ctx.config.autoImport && handleMinGWAutomaticImport(sym, name))
[COFF] Support MinGW automatic dllimport of data Normally, in order to reference exported data symbols from a different DLL, the declarations need to have the dllimport attribute, in order to use the __imp_<var> symbol (which contains an address to the actual variable) instead of the variable itself directly. This isn't an issue in the same way for functions, since any reference to the function without the dllimport attribute will end up as a reference to a thunk which loads the actual target function from the import address table (IAT). GNU ld, in MinGW environments, supports automatically importing data symbols from DLLs, even if the references didn't have the appropriate dllimport attribute. Since the PE/COFF format doesn't support the kind of relocations that this would require, the MinGW's CRT startup code has an custom framework of their own for manually fixing the missing relocations once module is loaded and the target addresses in the IAT are known. For this to work, the linker (originall in GNU ld) creates a list of remaining references needing fixup, which the runtime processes on startup before handing over control to user code. While this feature is rather controversial, it's one of the main features allowing unix style libraries to be used on windows without any extra porting effort. Some sort of automatic fixing of data imports is also necessary for the itanium C++ ABI on windows (as clang implements it right now) for importing vtable pointers in certain cases, see D43184 for some discussion on that. The runtime pseudo relocation handler supports 8/16/32/64 bit addresses, either PC relative references (like IMAGE_REL_*_REL32*) or absolute references (IMAGE_REL_AMD64_ADDR32, IMAGE_REL_AMD64_ADDR32, IMAGE_REL_I386_DIR32). On linking, the relocation is handled as a relocation against the corresponding IAT slot. For the absolute references, a normal base relocation is created, to update the embedded address in case the image is loaded at a different address. The list of runtime pseudo relocations contains the RVA of the imported symbol (the IAT slot), the RVA of the location the relocation should be applied to, and a size of the memory location. When the relocations are fixed at runtime, the difference between the actual IAT slot value and the IAT slot address is added to the reference, doing the right thing for both absolute and relative references. With this patch alone, things work fine for i386 binaries, and mostly for x86_64 binaries, with feature parity with GNU ld. Despite this, there are a few gotchas: - References to data from within code works fine on both x86 architectures, since their relocations consist of plain 32 or 64 bit absolute/relative references. On ARM and AArch64, references to data doesn't consist of a plain 32 or 64 bit embedded address or offset in the code. On ARMNT, it's usually a MOVW+MOVT instruction pair represented by a IMAGE_REL_ARM_MOV32T relocation, each instruction containing 16 bit of the target address), on AArch64, it's usually an ADRP+ADD/LDR/STR instruction pair with an even more complex encoding, storing a PC relative address (with a range of +/- 4 GB). This could theoretically be remedied by extending the runtime pseudo relocation handler with new relocation types, to support these instruction encodings. This isn't an issue for GCC/GNU ld since they don't support windows on ARMNT/AArch64. - For x86_64, if references in code are encoded as 32 bit PC relative offsets, the runtime relocation will fail if the target turns out to be out of range for a 32 bit offset. - Fixing up the relocations at runtime requires making sections writable if necessary, with the VirtualProtect function. In Windows Store/UWP apps, this function is forbidden. These limitations are addressed by a few later patches in lld and llvm. Differential Revision: https://reviews.llvm.org/D50917 llvm-svn: 340726
2018-08-27 08:43:31 +00:00
continue;
// Remaining undefined symbols are not fatal if /force is specified.
// They are replaced with dummy defined symbols.
if (ctx.config.forceUnresolved)
replaceSymbol<DefinedAbsolute>(sym, ctx, name, 0);
undefs.insert(sym);
}
reportProblemSymbols(
undefs, ctx.config.warnLocallyDefinedImported ? &localImports : nullptr,
false);
return foundLazy;
}
std::pair<Symbol *, bool> SymbolTable::insert(StringRef name) {
bool inserted = false;
Symbol *&sym = symMap[CachedHashStringRef(name)];
if (!sym) {
sym = reinterpret_cast<Symbol *>(make<SymbolUnion>());
sym->isUsedInRegularObj = false;
sym->pendingArchiveLoad = false;
sym->canInline = true;
inserted = true;
[LLD][COFF] Generate X64 thunks for ARM64EC entry points and patchable functions. (#105499) This implements Fast-Forward Sequences documented in ARM64EC ABI https://learn.microsoft.com/en-us/windows/arm/arm64ec-abi. There are two conditions when linker should generate such thunks: - For each exported ARM64EC functions. It applies only to ARM64EC functions (we may also have pure x64 functions, for which no thunk is needed). MSVC linker creates `EXP+<mangled export name>` symbol in those cases that points to the thunk and uses that symbol for the export. It's observable from the module: it's possible to reference such symbols as I did in the test. Note that it uses export name, not name of the symbol that's exported (as in `foo` in `/EXPORT:foo=bar`). This implies that if the same function is exported multiple times, it will have multiple thunks. I followed this MSVC behavior. - For hybrid_patchable functions. The linker tries to generate a thunk for each undefined `EXP+*` symbol (and such symbols are created by the compiler as a target of weak alias from the demangled name). MSVC linker tries to find corresponding `*$hp_target` symbol and if fails to do so, it outputs a cryptic error like `LINK : fatal error LNK1000: Internal error during IMAGE::BuildImage`. I just skip generating the thunk in such case (which causes undefined reference error). MSVC linker additionally checks that the symbol complex type is a function (see also #102898). We generally don't do such checks in LLD, so I made it less strict. It should be fine: if it's some data symbol, it will not have `$hp_target` symbol, so we will skip it anyway.
2024-08-22 22:03:05 +02:00
if (isEC() && name.starts_with("EXP+"))
[LLD][COFF] Generate X64 thunks for ARM64EC entry points and patchable functions. (#105499) This implements Fast-Forward Sequences documented in ARM64EC ABI https://learn.microsoft.com/en-us/windows/arm/arm64ec-abi. There are two conditions when linker should generate such thunks: - For each exported ARM64EC functions. It applies only to ARM64EC functions (we may also have pure x64 functions, for which no thunk is needed). MSVC linker creates `EXP+<mangled export name>` symbol in those cases that points to the thunk and uses that symbol for the export. It's observable from the module: it's possible to reference such symbols as I did in the test. Note that it uses export name, not name of the symbol that's exported (as in `foo` in `/EXPORT:foo=bar`). This implies that if the same function is exported multiple times, it will have multiple thunks. I followed this MSVC behavior. - For hybrid_patchable functions. The linker tries to generate a thunk for each undefined `EXP+*` symbol (and such symbols are created by the compiler as a target of weak alias from the demangled name). MSVC linker tries to find corresponding `*$hp_target` symbol and if fails to do so, it outputs a cryptic error like `LINK : fatal error LNK1000: Internal error during IMAGE::BuildImage`. I just skip generating the thunk in such case (which causes undefined reference error). MSVC linker additionally checks that the symbol complex type is a function (see also #102898). We generally don't do such checks in LLD, so I made it less strict. It should be fine: if it's some data symbol, it will not have `$hp_target` symbol, so we will skip it anyway.
2024-08-22 22:03:05 +02:00
expSymbols.push_back(sym);
}
return {sym, inserted};
COFF: Change the order of adding symbols to the symbol table. Previously, the order of adding symbols to the symbol table was simple. We have a list of all input files. We read each file from beginning of the list and add all symbols in it to the symbol table. This patch changes that order. Now all archive files are added to the symbol table first, and then all the other object files are added. This shouldn't change the behavior in single-threading, and make room to parallelize in multi-threading. In the first step, only lazy symbols are added to the symbol table because archives contain only Lazy symbols. Member object files found to be necessary are queued. In the second step, defined and undefined symbols are added from object files. Adding an undefined symbol to the symbol table may cause more member files to be added to the queue. We simply continue reading all object files until the queue is empty. Finally, new archive or object files may be added to the queues by object files' directive sections (which contain new command line options). The above process is repeated until we get no new files. Symbols defined both in object files and in archives can make results undeterministic. If an archive is read before an object, a new member file gets linked, while in the other way, no new file would be added. That is the most popular cause of an undeterministic result or linking failure as I observed. Separating phases of adding lazy symbols and undefined symbols makes that deterministic. Adding symbols in each phase should be parallelizable. llvm-svn: 241107
2015-06-30 19:35:21 +00:00
}
std::pair<Symbol *, bool> SymbolTable::insert(StringRef name, InputFile *file) {
std::pair<Symbol *, bool> result = insert(name);
if (!file || !isa<BitcodeFile>(file))
result.first->isUsedInRegularObj = true;
return result;
}
void SymbolTable::initializeLoadConfig() {
auto sym =
dyn_cast_or_null<DefinedRegular>(findUnderscore("_load_config_used"));
if (!sym) {
if (isEC()) {
Warn(ctx) << "EC version of '_load_config_used' is missing";
return;
}
if (ctx.hybridSymtab) {
Warn(ctx) << "native version of '_load_config_used' is missing for "
"ARM64X target";
return;
}
if (ctx.config.guardCF != GuardCFLevel::Off)
Warn(ctx)
<< "Control Flow Guard is enabled but '_load_config_used' is missing";
if (ctx.config.dependentLoadFlags)
Warn(ctx) << "_load_config_used not found, /dependentloadflag will have "
"no effect";
return;
}
SectionChunk *sc = sym->getChunk();
if (!sc->hasData) {
Err(ctx) << "_load_config_used points to uninitialized data";
return;
}
uint64_t offsetInChunk = sym->getValue();
if (offsetInChunk + 4 > sc->getSize()) {
Err(ctx) << "_load_config_used section chunk is too small";
return;
}
ArrayRef<uint8_t> secContents = sc->getContents();
loadConfigSize =
*reinterpret_cast<const ulittle32_t *>(&secContents[offsetInChunk]);
if (offsetInChunk + loadConfigSize > sc->getSize()) {
Err(ctx) << "_load_config_used specifies a size larger than its containing "
"section chunk";
return;
}
uint32_t expectedAlign = ctx.config.is64() ? 8 : 4;
if (sc->getAlignment() < expectedAlign)
Warn(ctx) << "'_load_config_used' is misaligned (expected alignment to be "
<< expectedAlign << " bytes, got " << sc->getAlignment()
<< " instead)";
else if (!isAligned(Align(expectedAlign), offsetInChunk))
Warn(ctx) << "'_load_config_used' is misaligned (section offset is 0x"
<< Twine::utohexstr(sym->getValue()) << " not aligned to "
<< expectedAlign << " bytes)";
loadConfigSym = sym;
}
void SymbolTable::addEntryThunk(Symbol *from, Symbol *to) {
entryThunks.push_back({from, to});
}
void SymbolTable::addExitThunk(Symbol *from, Symbol *to) {
exitThunks[from] = to;
}
void SymbolTable::initializeECThunks() {
if (!isArm64EC(ctx.config.machine))
return;
for (auto it : entryThunks) {
auto *to = dyn_cast<Defined>(it.second);
if (!to)
continue;
auto *from = dyn_cast<DefinedRegular>(it.first);
// We need to be able to add padding to the function and fill it with an
// offset to its entry thunks. To ensure that padding the function is
// feasible, functions are required to be COMDAT symbols with no offset.
if (!from || !from->getChunk()->isCOMDAT() ||
cast<DefinedRegular>(from)->getValue()) {
2024-12-05 19:44:26 -08:00
Err(ctx) << "non COMDAT symbol '" << from->getName() << "' in hybrid map";
continue;
}
from->getChunk()->setEntryThunk(to);
}
for (ImportFile *file : ctx.importFileInstances) {
if (!file->impchkThunk)
continue;
Symbol *sym = exitThunks.lookup(file->thunkSym);
if (!sym)
sym = exitThunks.lookup(file->impECSym);
file->impchkThunk->exitThunk = dyn_cast_or_null<Defined>(sym);
}
// On ARM64EC, the __imp_ symbol references the auxiliary IAT, while the
// __imp_aux_ symbol references the regular IAT. However, x86_64 code expects
// both to reference the regular IAT, so adjust the symbol if necessary.
parallelForEach(ctx.objFileInstances, [&](ObjFile *file) {
if (file->getMachineType() != AMD64)
return;
for (auto &sym : file->getMutableSymbols()) {
auto impSym = dyn_cast_or_null<DefinedImportData>(sym);
if (impSym && impSym->file->impchkThunk && sym == impSym->file->impECSym)
sym = impSym->file->impSym;
}
});
}
Symbol *SymbolTable::addUndefined(StringRef name, InputFile *f,
bool overrideLazy) {
auto [s, wasInserted] = insert(name, f);
if (wasInserted || (s->isLazy() && overrideLazy)) {
replaceSymbol<Undefined>(s, name);
return s;
}
if (s->isLazy())
forceLazy(s);
return s;
}
Symbol *SymbolTable::addGCRoot(StringRef name, bool aliasEC) {
Symbol *b = addUndefined(name);
if (!b->isGCRoot) {
b->isGCRoot = true;
ctx.config.gcroot.push_back(b);
}
// On ARM64EC, a symbol may be defined in either its mangled or demangled form
// (or both). Define an anti-dependency symbol that binds both forms, similar
// to how compiler-generated code references external functions.
if (aliasEC && isEC()) {
if (std::optional<std::string> mangledName =
getArm64ECMangledFunctionName(name)) {
auto u = dyn_cast<Undefined>(b);
if (u && !u->weakAlias) {
Symbol *t = addUndefined(saver().save(*mangledName));
u->setWeakAlias(t, true);
}
} else if (std::optional<std::string> demangledName =
getArm64ECDemangledFunctionName(name)) {
Symbol *us = addUndefined(saver().save(*demangledName));
auto u = dyn_cast<Undefined>(us);
if (u && !u->weakAlias)
u->setWeakAlias(b, true);
}
}
return b;
}
// On ARM64EC, a function symbol may appear in both mangled and demangled forms:
// - ARM64EC archives contain only the mangled name, while the demangled symbol
// is defined by the object file as an alias.
// - x86_64 archives contain only the demangled name (the mangled name is
// usually defined by an object referencing the symbol as an alias to a guess
// exit thunk).
// - ARM64EC import files contain both the mangled and demangled names for
// thunks.
// If more than one archive defines the same function, this could lead
// to different libraries being used for the same function depending on how they
// are referenced. Avoid this by checking if the paired symbol is already
// defined before adding a symbol to the table.
template <typename T>
bool checkLazyECPair(SymbolTable *symtab, StringRef name, InputFile *f) {
if (name.starts_with("__imp_"))
return true;
std::string pairName;
if (std::optional<std::string> mangledName =
getArm64ECMangledFunctionName(name))
pairName = std::move(*mangledName);
else if (std::optional<std::string> demangledName =
getArm64ECDemangledFunctionName(name))
pairName = std::move(*demangledName);
else
return true;
Symbol *sym = symtab->find(pairName);
if (!sym)
return true;
if (sym->pendingArchiveLoad)
return false;
if (auto u = dyn_cast<Undefined>(sym))
return !u->weakAlias || u->isAntiDep;
// If the symbol is lazy, allow it only if it originates from the same
// archive.
auto lazy = dyn_cast<T>(sym);
return lazy && lazy->file == f;
}
void SymbolTable::addLazyArchive(ArchiveFile *f, const Archive::Symbol &sym) {
StringRef name = sym.getName();
if (isEC() && !checkLazyECPair<LazyArchive>(this, name, f))
return;
auto [s, wasInserted] = insert(name);
if (wasInserted) {
replaceSymbol<LazyArchive>(s, f, sym);
return;
}
auto *u = dyn_cast<Undefined>(s);
if (!u || (u->weakAlias && !u->isECAlias(machine)) || s->pendingArchiveLoad)
return;
s->pendingArchiveLoad = true;
f->addMember(sym);
}
void SymbolTable::addLazyObject(InputFile *f, StringRef n) {
assert(f->lazy);
if (isEC() && !checkLazyECPair<LazyObject>(this, n, f))
return;
auto [s, wasInserted] = insert(n, f);
if (wasInserted) {
replaceSymbol<LazyObject>(s, f, n);
return;
}
auto *u = dyn_cast<Undefined>(s);
if (!u || (u->weakAlias && !u->isECAlias(machine)) || s->pendingArchiveLoad)
return;
s->pendingArchiveLoad = true;
f->lazy = false;
ctx.driver.addFile(f);
}
[LLD] [COFF] Support linking directly against DLLs in MinGW mode GNU ld.bfd supports linking directly against DLLs without using an import library, and some projects have picked up on this habit. (There's no one single unsurmountable issue with using import libraries, but this is a regularly surfacing missing feature.) As long as one is linking by name (instead of by ordinal), the DLL export table contains most of the information needed. (One can inspect what section a symbol points at, to see if it's a function or data symbol. The practical implementation of this loops over all sections for each symbol, but as long as they're not very many, that should hopefully be tolerable performance wise.) One exception where the information in the DLL isn't entirely enough is on i386 with stdcall functions; depending on how they're done, the exported function name can be a plain undecorated name, while the import library would contain the full decorated symbol name. This issue is addressed separately in a different patch. This is implemented mimicing the structure of a regular import library, with one InputFile corresponding to the static archive that just adds lazy symbols, which then are fetched when they are needed. When such a symbol is fetched, we synthesize a coff_import_header structure in memory and create a regular ImportFile out of it. The implementation could be even smaller by just creating ImportFiles for every symbol available immediately, but that would have the drawback of actually ending up importing all symbols unless running with GC enabled (and mingw mode defaults to having it disabled for historical reasons). Differential Revision: https://reviews.llvm.org/D104530
2021-06-16 16:59:46 +03:00
void SymbolTable::addLazyDLLSymbol(DLLFile *f, DLLFile::Symbol *sym,
StringRef n) {
auto [s, wasInserted] = insert(n);
[LLD] [COFF] Support linking directly against DLLs in MinGW mode GNU ld.bfd supports linking directly against DLLs without using an import library, and some projects have picked up on this habit. (There's no one single unsurmountable issue with using import libraries, but this is a regularly surfacing missing feature.) As long as one is linking by name (instead of by ordinal), the DLL export table contains most of the information needed. (One can inspect what section a symbol points at, to see if it's a function or data symbol. The practical implementation of this loops over all sections for each symbol, but as long as they're not very many, that should hopefully be tolerable performance wise.) One exception where the information in the DLL isn't entirely enough is on i386 with stdcall functions; depending on how they're done, the exported function name can be a plain undecorated name, while the import library would contain the full decorated symbol name. This issue is addressed separately in a different patch. This is implemented mimicing the structure of a regular import library, with one InputFile corresponding to the static archive that just adds lazy symbols, which then are fetched when they are needed. When such a symbol is fetched, we synthesize a coff_import_header structure in memory and create a regular ImportFile out of it. The implementation could be even smaller by just creating ImportFiles for every symbol available immediately, but that would have the drawback of actually ending up importing all symbols unless running with GC enabled (and mingw mode defaults to having it disabled for historical reasons). Differential Revision: https://reviews.llvm.org/D104530
2021-06-16 16:59:46 +03:00
if (wasInserted) {
replaceSymbol<LazyDLLSymbol>(s, f, sym, n);
return;
}
auto *u = dyn_cast<Undefined>(s);
if (!u || u->weakAlias || s->pendingArchiveLoad)
return;
s->pendingArchiveLoad = true;
f->makeImport(sym);
}
static std::string getSourceLocationBitcode(BitcodeFile *file) {
std::string res("\n>>> defined at ");
StringRef source = file->obj->getSourceFileName();
if (!source.empty())
res += source.str() + "\n>>> ";
res += toString(file);
return res;
}
static std::string getSourceLocationObj(ObjFile *file, SectionChunk *sc,
uint32_t offset, StringRef name) {
std::optional<std::pair<StringRef, uint32_t>> fileLine;
if (sc)
fileLine = getFileLine(sc, offset);
if (!fileLine)
fileLine = file->getVariableLocation(name);
std::string res;
llvm::raw_string_ostream os(res);
os << "\n>>> defined at ";
if (fileLine)
os << fileLine->first << ":" << fileLine->second << "\n>>> ";
os << toString(file);
return res;
}
static std::string getSourceLocation(InputFile *file, SectionChunk *sc,
uint32_t offset, StringRef name) {
if (!file)
return "";
if (auto *o = dyn_cast<ObjFile>(file))
return getSourceLocationObj(o, sc, offset, name);
if (auto *b = dyn_cast<BitcodeFile>(file))
return getSourceLocationBitcode(b);
return "\n>>> defined at " + toString(file);
}
// Construct and print an error message in the form of:
//
// lld-link: error: duplicate symbol: foo
// >>> defined at bar.c:30
// >>> bar.o
// >>> defined at baz.c:563
// >>> baz.o
void SymbolTable::reportDuplicate(Symbol *existing, InputFile *newFile,
SectionChunk *newSc,
uint32_t newSectionOffset) {
2024-12-05 20:56:19 -08:00
COFFSyncStream diag(ctx, ctx.config.forceMultiple ? DiagLevel::Warn
: DiagLevel::Err);
diag << "duplicate symbol: " << existing;
DefinedRegular *d = dyn_cast<DefinedRegular>(existing);
if (d && isa<ObjFile>(d->getFile())) {
2024-12-05 20:56:19 -08:00
diag << getSourceLocation(d->getFile(), d->getChunk(), d->getValue(),
existing->getName());
} else {
2024-12-05 20:56:19 -08:00
diag << getSourceLocation(existing->getFile(), nullptr, 0, "");
}
2024-12-05 20:56:19 -08:00
diag << getSourceLocation(newFile, newSc, newSectionOffset,
existing->getName());
}
Symbol *SymbolTable::addAbsolute(StringRef n, COFFSymbolRef sym) {
auto [s, wasInserted] = insert(n, nullptr);
s->isUsedInRegularObj = true;
if (wasInserted || isa<Undefined>(s) || s->isLazy())
replaceSymbol<DefinedAbsolute>(s, ctx, n, sym);
else if (auto *da = dyn_cast<DefinedAbsolute>(s)) {
if (da->getVA() != sym.getValue())
reportDuplicate(s, nullptr);
} else if (!isa<DefinedCOFF>(s))
reportDuplicate(s, nullptr);
return s;
}
Symbol *SymbolTable::addAbsolute(StringRef n, uint64_t va) {
auto [s, wasInserted] = insert(n, nullptr);
s->isUsedInRegularObj = true;
if (wasInserted || isa<Undefined>(s) || s->isLazy())
replaceSymbol<DefinedAbsolute>(s, ctx, n, va);
else if (auto *da = dyn_cast<DefinedAbsolute>(s)) {
if (da->getVA() != va)
reportDuplicate(s, nullptr);
} else if (!isa<DefinedCOFF>(s))
reportDuplicate(s, nullptr);
return s;
}
Symbol *SymbolTable::addSynthetic(StringRef n, Chunk *c) {
auto [s, wasInserted] = insert(n, nullptr);
s->isUsedInRegularObj = true;
if (wasInserted || isa<Undefined>(s) || s->isLazy())
replaceSymbol<DefinedSynthetic>(s, n, c);
else if (!isa<DefinedCOFF>(s))
reportDuplicate(s, nullptr);
return s;
}
Symbol *SymbolTable::addRegular(InputFile *f, StringRef n,
const coff_symbol_generic *sym, SectionChunk *c,
uint32_t sectionOffset, bool isWeak) {
auto [s, wasInserted] = insert(n, f);
if (wasInserted || !isa<DefinedRegular>(s) || s->isWeak)
replaceSymbol<DefinedRegular>(s, f, n, /*IsCOMDAT*/ false,
/*IsExternal*/ true, sym, c, isWeak);
else if (!isWeak)
reportDuplicate(s, f, c, sectionOffset);
return s;
}
2019-01-30 02:17:27 +00:00
std::pair<DefinedRegular *, bool>
SymbolTable::addComdat(InputFile *f, StringRef n,
const coff_symbol_generic *sym) {
auto [s, wasInserted] = insert(n, f);
if (wasInserted || !isa<DefinedRegular>(s)) {
replaceSymbol<DefinedRegular>(s, f, n, /*IsCOMDAT*/ true,
/*IsExternal*/ true, sym, nullptr);
2019-01-30 02:17:27 +00:00
return {cast<DefinedRegular>(s), true};
}
2019-01-30 02:17:27 +00:00
auto *existingSymbol = cast<DefinedRegular>(s);
if (!existingSymbol->isCOMDAT)
reportDuplicate(s, f);
2019-01-30 02:17:27 +00:00
return {existingSymbol, false};
}
Symbol *SymbolTable::addCommon(InputFile *f, StringRef n, uint64_t size,
const coff_symbol_generic *sym, CommonChunk *c) {
auto [s, wasInserted] = insert(n, f);
if (wasInserted || !isa<DefinedCOFF>(s))
replaceSymbol<DefinedCommon>(s, f, n, size, sym, c);
else if (auto *dc = dyn_cast<DefinedCommon>(s))
if (size > dc->getSize())
replaceSymbol<DefinedCommon>(s, f, n, size, sym, c);
return s;
}
DefinedImportData *SymbolTable::addImportData(StringRef n, ImportFile *f,
Chunk *&location) {
auto [s, wasInserted] = insert(n, nullptr);
s->isUsedInRegularObj = true;
if (wasInserted || isa<Undefined>(s) || s->isLazy()) {
replaceSymbol<DefinedImportData>(s, n, f, location);
return cast<DefinedImportData>(s);
}
reportDuplicate(s, f);
return nullptr;
}
Defined *SymbolTable::addImportThunk(StringRef name, DefinedImportData *id,
ImportThunkChunk *chunk) {
auto [s, wasInserted] = insert(name, nullptr);
s->isUsedInRegularObj = true;
if (wasInserted || isa<Undefined>(s) || s->isLazy()) {
replaceSymbol<DefinedImportThunk>(s, ctx, name, id, chunk);
return cast<Defined>(s);
}
reportDuplicate(s, id->file);
return nullptr;
}
void SymbolTable::addLibcall(StringRef name) {
Symbol *sym = findUnderscore(name);
if (!sym)
return;
if (auto *l = dyn_cast<LazyArchive>(sym)) {
MemoryBufferRef mb = l->getMemberBuffer();
if (isBitcode(mb))
addUndefined(sym->getName());
} else if (LazyObject *o = dyn_cast<LazyObject>(sym)) {
if (isBitcode(o->file->mb))
addUndefined(sym->getName());
}
}
Symbol *SymbolTable::find(StringRef name) const {
return symMap.lookup(CachedHashStringRef(name));
}
Symbol *SymbolTable::findUnderscore(StringRef name) const {
if (machine == I386)
return find(("_" + name).str());
return find(name);
}
[COFF] Fix /export:foo=bar when bar is a weak alias Summary: When handling exports from the command line or from .def files, the linker does a "fuzzy" string lookup to allow finding mangled symbols. However, when the symbol is re-exported under a new name, the linker has to transfer the decorations from the exported symbol over to the new name. This is implemented by taking the mangled symbol that was found in the object and replacing the original symbol name with the export name. Before this patch, LLD implemented the fuzzy search by adding an undefined symbol with the unmangled name, and then during symbol resolution, checking if similar mangled symbols had been added after the last round of symbol resolution. If so, LLD makes the original symbol a weak alias of the mangled symbol. Later, to get the original symbol name, LLD would look through the weak alias and forward it on to the import library writer, which copies the symbol decorations. This approach doesn't work when bar is itself a weak alias, as is the case in asan. It's especially bad when the aliasee of bar contains the string "bar", consider "bar_default". In this case, we would end up exporting the symbol "foo_default" when we should've exported just "foo". To fix this, don't look through weak aliases to find the mangled name. Save the mangled name earlier during fuzzy symbol lookup. Fixes PR42074 Reviewers: mstorsjo, ruiu Subscribers: thakis, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D62984 llvm-svn: 362849
2019-06-07 22:05:12 +00:00
// Return all symbols that start with Prefix, possibly ignoring the first
// character of Prefix or the first character symbol.
std::vector<Symbol *> SymbolTable::getSymsWithPrefix(StringRef prefix) {
std::vector<Symbol *> syms;
for (auto pair : symMap) {
StringRef name = pair.first.val();
if (name.starts_with(prefix) || name.starts_with(prefix.drop_front()) ||
name.drop_front().starts_with(prefix) ||
name.drop_front().starts_with(prefix.drop_front())) {
[COFF] Fix /export:foo=bar when bar is a weak alias Summary: When handling exports from the command line or from .def files, the linker does a "fuzzy" string lookup to allow finding mangled symbols. However, when the symbol is re-exported under a new name, the linker has to transfer the decorations from the exported symbol over to the new name. This is implemented by taking the mangled symbol that was found in the object and replacing the original symbol name with the export name. Before this patch, LLD implemented the fuzzy search by adding an undefined symbol with the unmangled name, and then during symbol resolution, checking if similar mangled symbols had been added after the last round of symbol resolution. If so, LLD makes the original symbol a weak alias of the mangled symbol. Later, to get the original symbol name, LLD would look through the weak alias and forward it on to the import library writer, which copies the symbol decorations. This approach doesn't work when bar is itself a weak alias, as is the case in asan. It's especially bad when the aliasee of bar contains the string "bar", consider "bar_default". In this case, we would end up exporting the symbol "foo_default" when we should've exported just "foo". To fix this, don't look through weak aliases to find the mangled name. Save the mangled name earlier during fuzzy symbol lookup. Fixes PR42074 Reviewers: mstorsjo, ruiu Subscribers: thakis, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D62984 llvm-svn: 362849
2019-06-07 22:05:12 +00:00
syms.push_back(pair.second);
}
}
[COFF] Fix /export:foo=bar when bar is a weak alias Summary: When handling exports from the command line or from .def files, the linker does a "fuzzy" string lookup to allow finding mangled symbols. However, when the symbol is re-exported under a new name, the linker has to transfer the decorations from the exported symbol over to the new name. This is implemented by taking the mangled symbol that was found in the object and replacing the original symbol name with the export name. Before this patch, LLD implemented the fuzzy search by adding an undefined symbol with the unmangled name, and then during symbol resolution, checking if similar mangled symbols had been added after the last round of symbol resolution. If so, LLD makes the original symbol a weak alias of the mangled symbol. Later, to get the original symbol name, LLD would look through the weak alias and forward it on to the import library writer, which copies the symbol decorations. This approach doesn't work when bar is itself a weak alias, as is the case in asan. It's especially bad when the aliasee of bar contains the string "bar", consider "bar_default". In this case, we would end up exporting the symbol "foo_default" when we should've exported just "foo". To fix this, don't look through weak aliases to find the mangled name. Save the mangled name earlier during fuzzy symbol lookup. Fixes PR42074 Reviewers: mstorsjo, ruiu Subscribers: thakis, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D62984 llvm-svn: 362849
2019-06-07 22:05:12 +00:00
return syms;
}
[COFF] Fix /export:foo=bar when bar is a weak alias Summary: When handling exports from the command line or from .def files, the linker does a "fuzzy" string lookup to allow finding mangled symbols. However, when the symbol is re-exported under a new name, the linker has to transfer the decorations from the exported symbol over to the new name. This is implemented by taking the mangled symbol that was found in the object and replacing the original symbol name with the export name. Before this patch, LLD implemented the fuzzy search by adding an undefined symbol with the unmangled name, and then during symbol resolution, checking if similar mangled symbols had been added after the last round of symbol resolution. If so, LLD makes the original symbol a weak alias of the mangled symbol. Later, to get the original symbol name, LLD would look through the weak alias and forward it on to the import library writer, which copies the symbol decorations. This approach doesn't work when bar is itself a weak alias, as is the case in asan. It's especially bad when the aliasee of bar contains the string "bar", consider "bar_default". In this case, we would end up exporting the symbol "foo_default" when we should've exported just "foo". To fix this, don't look through weak aliases to find the mangled name. Save the mangled name earlier during fuzzy symbol lookup. Fixes PR42074 Reviewers: mstorsjo, ruiu Subscribers: thakis, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D62984 llvm-svn: 362849
2019-06-07 22:05:12 +00:00
Symbol *SymbolTable::findMangle(StringRef name) {
if (Symbol *sym = find(name)) {
if (auto *u = dyn_cast<Undefined>(sym)) {
// We're specifically looking for weak aliases that ultimately resolve to
// defined symbols, hence the call to getWeakAlias() instead of just using
// the weakAlias member variable. This matches link.exe's behavior.
if (Symbol *weakAlias = u->getWeakAlias())
return weakAlias;
} else {
[COFF] Fix /export:foo=bar when bar is a weak alias Summary: When handling exports from the command line or from .def files, the linker does a "fuzzy" string lookup to allow finding mangled symbols. However, when the symbol is re-exported under a new name, the linker has to transfer the decorations from the exported symbol over to the new name. This is implemented by taking the mangled symbol that was found in the object and replacing the original symbol name with the export name. Before this patch, LLD implemented the fuzzy search by adding an undefined symbol with the unmangled name, and then during symbol resolution, checking if similar mangled symbols had been added after the last round of symbol resolution. If so, LLD makes the original symbol a weak alias of the mangled symbol. Later, to get the original symbol name, LLD would look through the weak alias and forward it on to the import library writer, which copies the symbol decorations. This approach doesn't work when bar is itself a weak alias, as is the case in asan. It's especially bad when the aliasee of bar contains the string "bar", consider "bar_default". In this case, we would end up exporting the symbol "foo_default" when we should've exported just "foo". To fix this, don't look through weak aliases to find the mangled name. Save the mangled name earlier during fuzzy symbol lookup. Fixes PR42074 Reviewers: mstorsjo, ruiu Subscribers: thakis, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D62984 llvm-svn: 362849
2019-06-07 22:05:12 +00:00
return sym;
}
}
[COFF] Fix /export:foo=bar when bar is a weak alias Summary: When handling exports from the command line or from .def files, the linker does a "fuzzy" string lookup to allow finding mangled symbols. However, when the symbol is re-exported under a new name, the linker has to transfer the decorations from the exported symbol over to the new name. This is implemented by taking the mangled symbol that was found in the object and replacing the original symbol name with the export name. Before this patch, LLD implemented the fuzzy search by adding an undefined symbol with the unmangled name, and then during symbol resolution, checking if similar mangled symbols had been added after the last round of symbol resolution. If so, LLD makes the original symbol a weak alias of the mangled symbol. Later, to get the original symbol name, LLD would look through the weak alias and forward it on to the import library writer, which copies the symbol decorations. This approach doesn't work when bar is itself a weak alias, as is the case in asan. It's especially bad when the aliasee of bar contains the string "bar", consider "bar_default". In this case, we would end up exporting the symbol "foo_default" when we should've exported just "foo". To fix this, don't look through weak aliases to find the mangled name. Save the mangled name earlier during fuzzy symbol lookup. Fixes PR42074 Reviewers: mstorsjo, ruiu Subscribers: thakis, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D62984 llvm-svn: 362849
2019-06-07 22:05:12 +00:00
// Efficient fuzzy string lookup is impossible with a hash table, so iterate
// the symbol table once and collect all possibly matching symbols into this
// vector. Then compare each possibly matching symbol with each possible
// mangling.
std::vector<Symbol *> syms = getSymsWithPrefix(name);
auto findByPrefix = [&syms](const Twine &t) -> Symbol * {
std::string prefix = t.str();
for (auto *s : syms)
if (s->getName().starts_with(prefix))
[COFF] Fix /export:foo=bar when bar is a weak alias Summary: When handling exports from the command line or from .def files, the linker does a "fuzzy" string lookup to allow finding mangled symbols. However, when the symbol is re-exported under a new name, the linker has to transfer the decorations from the exported symbol over to the new name. This is implemented by taking the mangled symbol that was found in the object and replacing the original symbol name with the export name. Before this patch, LLD implemented the fuzzy search by adding an undefined symbol with the unmangled name, and then during symbol resolution, checking if similar mangled symbols had been added after the last round of symbol resolution. If so, LLD makes the original symbol a weak alias of the mangled symbol. Later, to get the original symbol name, LLD would look through the weak alias and forward it on to the import library writer, which copies the symbol decorations. This approach doesn't work when bar is itself a weak alias, as is the case in asan. It's especially bad when the aliasee of bar contains the string "bar", consider "bar_default". In this case, we would end up exporting the symbol "foo_default" when we should've exported just "foo". To fix this, don't look through weak aliases to find the mangled name. Save the mangled name earlier during fuzzy symbol lookup. Fixes PR42074 Reviewers: mstorsjo, ruiu Subscribers: thakis, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D62984 llvm-svn: 362849
2019-06-07 22:05:12 +00:00
return s;
return nullptr;
};
// For non-x86, just look for C++ functions.
if (machine != I386)
[COFF] Fix /export:foo=bar when bar is a weak alias Summary: When handling exports from the command line or from .def files, the linker does a "fuzzy" string lookup to allow finding mangled symbols. However, when the symbol is re-exported under a new name, the linker has to transfer the decorations from the exported symbol over to the new name. This is implemented by taking the mangled symbol that was found in the object and replacing the original symbol name with the export name. Before this patch, LLD implemented the fuzzy search by adding an undefined symbol with the unmangled name, and then during symbol resolution, checking if similar mangled symbols had been added after the last round of symbol resolution. If so, LLD makes the original symbol a weak alias of the mangled symbol. Later, to get the original symbol name, LLD would look through the weak alias and forward it on to the import library writer, which copies the symbol decorations. This approach doesn't work when bar is itself a weak alias, as is the case in asan. It's especially bad when the aliasee of bar contains the string "bar", consider "bar_default". In this case, we would end up exporting the symbol "foo_default" when we should've exported just "foo". To fix this, don't look through weak aliases to find the mangled name. Save the mangled name earlier during fuzzy symbol lookup. Fixes PR42074 Reviewers: mstorsjo, ruiu Subscribers: thakis, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D62984 llvm-svn: 362849
2019-06-07 22:05:12 +00:00
return findByPrefix("?" + name + "@@Y");
if (!name.starts_with("_"))
[COFF] Fix /export:foo=bar when bar is a weak alias Summary: When handling exports from the command line or from .def files, the linker does a "fuzzy" string lookup to allow finding mangled symbols. However, when the symbol is re-exported under a new name, the linker has to transfer the decorations from the exported symbol over to the new name. This is implemented by taking the mangled symbol that was found in the object and replacing the original symbol name with the export name. Before this patch, LLD implemented the fuzzy search by adding an undefined symbol with the unmangled name, and then during symbol resolution, checking if similar mangled symbols had been added after the last round of symbol resolution. If so, LLD makes the original symbol a weak alias of the mangled symbol. Later, to get the original symbol name, LLD would look through the weak alias and forward it on to the import library writer, which copies the symbol decorations. This approach doesn't work when bar is itself a weak alias, as is the case in asan. It's especially bad when the aliasee of bar contains the string "bar", consider "bar_default". In this case, we would end up exporting the symbol "foo_default" when we should've exported just "foo". To fix this, don't look through weak aliases to find the mangled name. Save the mangled name earlier during fuzzy symbol lookup. Fixes PR42074 Reviewers: mstorsjo, ruiu Subscribers: thakis, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D62984 llvm-svn: 362849
2019-06-07 22:05:12 +00:00
return nullptr;
// Search for x86 stdcall function.
[COFF] Fix /export:foo=bar when bar is a weak alias Summary: When handling exports from the command line or from .def files, the linker does a "fuzzy" string lookup to allow finding mangled symbols. However, when the symbol is re-exported under a new name, the linker has to transfer the decorations from the exported symbol over to the new name. This is implemented by taking the mangled symbol that was found in the object and replacing the original symbol name with the export name. Before this patch, LLD implemented the fuzzy search by adding an undefined symbol with the unmangled name, and then during symbol resolution, checking if similar mangled symbols had been added after the last round of symbol resolution. If so, LLD makes the original symbol a weak alias of the mangled symbol. Later, to get the original symbol name, LLD would look through the weak alias and forward it on to the import library writer, which copies the symbol decorations. This approach doesn't work when bar is itself a weak alias, as is the case in asan. It's especially bad when the aliasee of bar contains the string "bar", consider "bar_default". In this case, we would end up exporting the symbol "foo_default" when we should've exported just "foo". To fix this, don't look through weak aliases to find the mangled name. Save the mangled name earlier during fuzzy symbol lookup. Fixes PR42074 Reviewers: mstorsjo, ruiu Subscribers: thakis, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D62984 llvm-svn: 362849
2019-06-07 22:05:12 +00:00
if (Symbol *s = findByPrefix(name + "@"))
return s;
// Search for x86 fastcall function.
[COFF] Fix /export:foo=bar when bar is a weak alias Summary: When handling exports from the command line or from .def files, the linker does a "fuzzy" string lookup to allow finding mangled symbols. However, when the symbol is re-exported under a new name, the linker has to transfer the decorations from the exported symbol over to the new name. This is implemented by taking the mangled symbol that was found in the object and replacing the original symbol name with the export name. Before this patch, LLD implemented the fuzzy search by adding an undefined symbol with the unmangled name, and then during symbol resolution, checking if similar mangled symbols had been added after the last round of symbol resolution. If so, LLD makes the original symbol a weak alias of the mangled symbol. Later, to get the original symbol name, LLD would look through the weak alias and forward it on to the import library writer, which copies the symbol decorations. This approach doesn't work when bar is itself a weak alias, as is the case in asan. It's especially bad when the aliasee of bar contains the string "bar", consider "bar_default". In this case, we would end up exporting the symbol "foo_default" when we should've exported just "foo". To fix this, don't look through weak aliases to find the mangled name. Save the mangled name earlier during fuzzy symbol lookup. Fixes PR42074 Reviewers: mstorsjo, ruiu Subscribers: thakis, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D62984 llvm-svn: 362849
2019-06-07 22:05:12 +00:00
if (Symbol *s = findByPrefix("@" + name.substr(1) + "@"))
return s;
// Search for x86 vectorcall function.
[COFF] Fix /export:foo=bar when bar is a weak alias Summary: When handling exports from the command line or from .def files, the linker does a "fuzzy" string lookup to allow finding mangled symbols. However, when the symbol is re-exported under a new name, the linker has to transfer the decorations from the exported symbol over to the new name. This is implemented by taking the mangled symbol that was found in the object and replacing the original symbol name with the export name. Before this patch, LLD implemented the fuzzy search by adding an undefined symbol with the unmangled name, and then during symbol resolution, checking if similar mangled symbols had been added after the last round of symbol resolution. If so, LLD makes the original symbol a weak alias of the mangled symbol. Later, to get the original symbol name, LLD would look through the weak alias and forward it on to the import library writer, which copies the symbol decorations. This approach doesn't work when bar is itself a weak alias, as is the case in asan. It's especially bad when the aliasee of bar contains the string "bar", consider "bar_default". In this case, we would end up exporting the symbol "foo_default" when we should've exported just "foo". To fix this, don't look through weak aliases to find the mangled name. Save the mangled name earlier during fuzzy symbol lookup. Fixes PR42074 Reviewers: mstorsjo, ruiu Subscribers: thakis, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D62984 llvm-svn: 362849
2019-06-07 22:05:12 +00:00
if (Symbol *s = findByPrefix(name.substr(1) + "@@"))
return s;
// Search for x86 C++ non-member function.
[COFF] Fix /export:foo=bar when bar is a weak alias Summary: When handling exports from the command line or from .def files, the linker does a "fuzzy" string lookup to allow finding mangled symbols. However, when the symbol is re-exported under a new name, the linker has to transfer the decorations from the exported symbol over to the new name. This is implemented by taking the mangled symbol that was found in the object and replacing the original symbol name with the export name. Before this patch, LLD implemented the fuzzy search by adding an undefined symbol with the unmangled name, and then during symbol resolution, checking if similar mangled symbols had been added after the last round of symbol resolution. If so, LLD makes the original symbol a weak alias of the mangled symbol. Later, to get the original symbol name, LLD would look through the weak alias and forward it on to the import library writer, which copies the symbol decorations. This approach doesn't work when bar is itself a weak alias, as is the case in asan. It's especially bad when the aliasee of bar contains the string "bar", consider "bar_default". In this case, we would end up exporting the symbol "foo_default" when we should've exported just "foo". To fix this, don't look through weak aliases to find the mangled name. Save the mangled name earlier during fuzzy symbol lookup. Fixes PR42074 Reviewers: mstorsjo, ruiu Subscribers: thakis, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D62984 llvm-svn: 362849
2019-06-07 22:05:12 +00:00
return findByPrefix("?" + name.substr(1) + "@@Y");
}
bool SymbolTable::findUnderscoreMangle(StringRef sym) {
Symbol *s = findMangle(mangle(sym));
return s && !isa<Undefined>(s);
}
// Symbol names are mangled by prepending "_" on x86.
StringRef SymbolTable::mangle(StringRef sym) {
assert(machine != IMAGE_FILE_MACHINE_UNKNOWN);
if (machine == I386)
return saver().save("_" + sym);
return sym;
}
StringRef SymbolTable::mangleMaybe(Symbol *s) {
// If the plain symbol name has already been resolved, do nothing.
Undefined *unmangled = dyn_cast<Undefined>(s);
if (!unmangled)
return "";
// Otherwise, see if a similar, mangled symbol exists in the symbol table.
Symbol *mangled = findMangle(unmangled->getName());
if (!mangled)
return "";
// If we find a similar mangled symbol, make this an alias to it and return
// its name.
Log(ctx) << unmangled->getName() << " aliased to " << mangled->getName();
unmangled->setWeakAlias(addUndefined(mangled->getName()));
return mangled->getName();
}
// Windows specific -- find default entry point name.
//
// There are four different entry point functions for Windows executables,
// each of which corresponds to a user-defined "main" function. This function
// infers an entry point from a user-defined "main" function.
StringRef SymbolTable::findDefaultEntry() {
assert(ctx.config.subsystem != IMAGE_SUBSYSTEM_UNKNOWN &&
"must handle /subsystem before calling this");
if (ctx.config.mingw)
return mangle(ctx.config.subsystem == IMAGE_SUBSYSTEM_WINDOWS_GUI
? "WinMainCRTStartup"
: "mainCRTStartup");
if (ctx.config.subsystem == IMAGE_SUBSYSTEM_WINDOWS_GUI) {
if (findUnderscoreMangle("wWinMain")) {
if (!findUnderscoreMangle("WinMain"))
return mangle("wWinMainCRTStartup");
Warn(ctx) << "found both wWinMain and WinMain; using latter";
}
return mangle("WinMainCRTStartup");
}
if (findUnderscoreMangle("wmain")) {
if (!findUnderscoreMangle("main"))
return mangle("wmainCRTStartup");
Warn(ctx) << "found both wmain and main; using latter";
}
return mangle("mainCRTStartup");
}
WindowsSubsystem SymbolTable::inferSubsystem() {
if (ctx.config.dll)
return IMAGE_SUBSYSTEM_WINDOWS_GUI;
if (ctx.config.mingw)
return IMAGE_SUBSYSTEM_WINDOWS_CUI;
// Note that link.exe infers the subsystem from the presence of these
// functions even if /entry: or /nodefaultlib are passed which causes them
// to not be called.
bool haveMain = findUnderscoreMangle("main");
bool haveWMain = findUnderscoreMangle("wmain");
bool haveWinMain = findUnderscoreMangle("WinMain");
bool haveWWinMain = findUnderscoreMangle("wWinMain");
if (haveMain || haveWMain) {
if (haveWinMain || haveWWinMain) {
Warn(ctx) << "found " << (haveMain ? "main" : "wmain") << " and "
<< (haveWinMain ? "WinMain" : "wWinMain")
<< "; defaulting to /subsystem:console";
}
return IMAGE_SUBSYSTEM_WINDOWS_CUI;
}
if (haveWinMain || haveWWinMain)
return IMAGE_SUBSYSTEM_WINDOWS_GUI;
return IMAGE_SUBSYSTEM_UNKNOWN;
}
void SymbolTable::addUndefinedGlob(StringRef arg) {
Expected<GlobPattern> pat = GlobPattern::create(arg);
if (!pat) {
Err(ctx) << "/includeglob: " << toString(pat.takeError());
return;
}
SmallVector<Symbol *, 0> syms;
forEachSymbol([&syms, &pat](Symbol *sym) {
if (pat->match(sym->getName())) {
syms.push_back(sym);
}
});
for (Symbol *sym : syms)
addGCRoot(sym->getName());
}
// Convert stdcall/fastcall style symbols into unsuffixed symbols,
// with or without a leading underscore. (MinGW specific.)
static StringRef killAt(StringRef sym, bool prefix) {
if (sym.empty())
return sym;
// Strip any trailing stdcall suffix
sym = sym.substr(0, sym.find('@', 1));
if (!sym.starts_with("@")) {
if (prefix && !sym.starts_with("_"))
return saver().save("_" + sym);
return sym;
}
// For fastcall, remove the leading @ and replace it with an
// underscore, if prefixes are used.
sym = sym.substr(1);
if (prefix)
sym = saver().save("_" + sym);
return sym;
}
static StringRef exportSourceName(ExportSource s) {
switch (s) {
case ExportSource::Directives:
return "source file (directives)";
case ExportSource::Export:
return "/export";
case ExportSource::ModuleDefinition:
return "/def";
default:
llvm_unreachable("unknown ExportSource");
}
}
// Performs error checking on all /export arguments.
// It also sets ordinals.
void SymbolTable::fixupExports() {
llvm::TimeTraceScope timeScope("Fixup exports");
// Symbol ordinals must be unique.
std::set<uint16_t> ords;
for (Export &e : exports) {
if (e.ordinal == 0)
continue;
if (!ords.insert(e.ordinal).second)
Fatal(ctx) << "duplicate export ordinal: " << e.name;
}
for (Export &e : exports) {
if (!e.exportAs.empty()) {
e.exportName = e.exportAs;
continue;
}
StringRef sym =
!e.forwardTo.empty() || e.extName.empty() ? e.name : e.extName;
if (machine == I386 && sym.starts_with("_")) {
// In MSVC mode, a fully decorated stdcall function is exported
// as-is with the leading underscore (with type IMPORT_NAME).
// In MinGW mode, a decorated stdcall function gets the underscore
// removed, just like normal cdecl functions.
if (ctx.config.mingw || !sym.contains('@')) {
e.exportName = sym.substr(1);
continue;
}
}
if (isEC() && !e.data && !e.constant) {
if (std::optional<std::string> demangledName =
getArm64ECDemangledFunctionName(sym)) {
e.exportName = saver().save(*demangledName);
continue;
}
}
e.exportName = sym;
}
if (ctx.config.killAt && machine == I386) {
for (Export &e : exports) {
e.name = killAt(e.name, true);
e.exportName = killAt(e.exportName, false);
e.extName = killAt(e.extName, true);
e.symbolName = killAt(e.symbolName, true);
}
}
// Uniquefy by name.
DenseMap<StringRef, std::pair<Export *, unsigned>> map(exports.size());
std::vector<Export> v;
for (Export &e : exports) {
auto pair = map.insert(std::make_pair(e.exportName, std::make_pair(&e, 0)));
bool inserted = pair.second;
if (inserted) {
pair.first->second.second = v.size();
v.push_back(e);
continue;
}
Export *existing = pair.first->second.first;
if (e == *existing || e.name != existing->name)
continue;
// If the existing export comes from .OBJ directives, we are allowed to
// overwrite it with /DEF: or /EXPORT without any warning, as MSVC link.exe
// does.
if (existing->source == ExportSource::Directives) {
*existing = e;
v[pair.first->second.second] = e;
continue;
}
if (existing->source == e.source) {
Warn(ctx) << "duplicate " << exportSourceName(existing->source)
<< " option: " << e.name;
} else {
Warn(ctx) << "duplicate export: " << e.name << " first seen in "
<< exportSourceName(existing->source) << ", now in "
<< exportSourceName(e.source);
}
}
exports = std::move(v);
// Sort by name.
llvm::sort(exports, [](const Export &a, const Export &b) {
return a.exportName < b.exportName;
});
}
void SymbolTable::assignExportOrdinals() {
// Assign unique ordinals if default (= 0).
uint32_t max = 0;
for (Export &e : exports)
max = std::max(max, (uint32_t)e.ordinal);
for (Export &e : exports)
if (e.ordinal == 0)
e.ordinal = ++max;
if (max > std::numeric_limits<uint16_t>::max())
Fatal(ctx) << "too many exported symbols (got " << max << ", max "
<< Twine(std::numeric_limits<uint16_t>::max()) << ")";
}
void SymbolTable::parseModuleDefs(StringRef path) {
llvm::TimeTraceScope timeScope("Parse def file");
std::unique_ptr<MemoryBuffer> mb =
CHECK(MemoryBuffer::getFile(path, /*IsText=*/false,
/*RequiresNullTerminator=*/false,
/*IsVolatile=*/true),
"could not open " + path);
COFFModuleDefinition m = check(parseCOFFModuleDefinition(
mb->getMemBufferRef(), machine, ctx.config.mingw));
// Include in /reproduce: output if applicable.
ctx.driver.takeBuffer(std::move(mb));
if (ctx.config.outputFile.empty())
ctx.config.outputFile = std::string(saver().save(m.OutputFile));
ctx.config.importName = std::string(saver().save(m.ImportName));
if (m.ImageBase)
ctx.config.imageBase = m.ImageBase;
if (m.StackReserve)
ctx.config.stackReserve = m.StackReserve;
if (m.StackCommit)
ctx.config.stackCommit = m.StackCommit;
if (m.HeapReserve)
ctx.config.heapReserve = m.HeapReserve;
if (m.HeapCommit)
ctx.config.heapCommit = m.HeapCommit;
if (m.MajorImageVersion)
ctx.config.majorImageVersion = m.MajorImageVersion;
if (m.MinorImageVersion)
ctx.config.minorImageVersion = m.MinorImageVersion;
if (m.MajorOSVersion)
ctx.config.majorOSVersion = m.MajorOSVersion;
if (m.MinorOSVersion)
ctx.config.minorOSVersion = m.MinorOSVersion;
for (COFFShortExport e1 : m.Exports) {
Export e2;
// Renamed exports are parsed and set as "ExtName = Name". If Name has
// the form "OtherDll.Func", it shouldn't be a normal exported
// function but a forward to another DLL instead. This is supported
// by both MS and GNU linkers.
if (!e1.ExtName.empty() && e1.ExtName != e1.Name &&
StringRef(e1.Name).contains('.')) {
e2.name = saver().save(e1.ExtName);
e2.forwardTo = saver().save(e1.Name);
} else {
e2.name = saver().save(e1.Name);
e2.extName = saver().save(e1.ExtName);
}
e2.exportAs = saver().save(e1.ExportAs);
e2.importName = saver().save(e1.ImportName);
e2.ordinal = e1.Ordinal;
e2.noname = e1.Noname;
e2.data = e1.Data;
e2.isPrivate = e1.Private;
e2.constant = e1.Constant;
e2.source = ExportSource::ModuleDefinition;
exports.push_back(e2);
}
}
// Parse a string of the form of "<from>=<to>".
void SymbolTable::parseAlternateName(StringRef s) {
auto [from, to] = s.split('=');
if (from.empty() || to.empty())
Fatal(ctx) << "/alternatename: invalid argument: " << s;
auto it = alternateNames.find(from);
if (it != alternateNames.end() && it->second != to)
Fatal(ctx) << "/alternatename: conflicts: " << s;
alternateNames.insert(it, std::make_pair(from, to));
}
Symbol *SymbolTable::addUndefined(StringRef name) {
return addUndefined(name, nullptr, false);
}
void SymbolTable::compileBitcodeFiles() {
if (bitcodeFileInstances.empty())
return;
llvm::TimeTraceScope timeScope("Compile bitcode");
ScopedTimer t(ctx.ltoTimer);
lto.reset(new BitcodeCompiler(ctx));
for (BitcodeFile *f : bitcodeFileInstances)
lto->add(*f);
for (InputFile *newObj : lto->compile()) {
ObjFile *obj = cast<ObjFile>(newObj);
obj->parse();
ctx.objFileInstances.push_back(obj);
}
}
} // namespace lld::coff