[ELF] --save-temps --lto-emit-asm: derive ELF/asm file names from bitcode file names

Port COFF's https://reviews.llvm.org/D78221 and
https://reviews.llvm.org/D137217 to ELF. For the in-process ThinLTO
link, `ld.lld --save-temps a.o d/b.o -o out` will create
ELF relocatable files `out.lto.a.o`/`d/out.lto.b.o` instead of
`out1.lto.o`/`out2.lto.o`. Deriving the LTO-generated relocatable file
name from bitcode file names helps debugging.

The relocatable file name from the first regular LTO partition does not
change: `out.lto.o`. The second, if present due to `--lto-partition=`,
changes from `out1.lto.o` to `lto.1.o`.

For an archive member, e.g. `d/a.a(coll.o at 8)`,
the relocatable file is `d/out.lto.a.a(coll.o at 8).o`.

`--lto-emit-asm` file names are changed similarly. `--lto-emit-asm -o
out` now creates `out.lto.s` instead of `out`, therefore the
`--lto-emit-asm -o -` idiom no longer works. However, I think this new
behavior (which matches COFF) is better since keeping or removing
`--lto-emit-asm` will dump different files, instead of overwriting the
`-o` output file from an executable/shared object to an assembly file.

Reviewers: rnk, igorkudrin, xur-llvm, teresajohnson, ZequanWu

Reviewed By: teresajohnson

Pull Request: https://github.com/llvm/llvm-project/pull/78835
This commit is contained in:
Fangrui Song
2024-01-23 11:38:15 -08:00
committed by GitHub
parent f6ced3579a
commit f7669ba3d9
14 changed files with 116 additions and 80 deletions

View File

@@ -12,6 +12,7 @@
#include "SymbolTable.h"
#include "Symbols.h"
#include "lld/Common/Args.h"
#include "lld/Common/CommonLinkerContext.h"
#include "lld/Common/ErrorHandler.h"
#include "lld/Common/Filesystem.h"
#include "lld/Common/Strings.h"
@@ -28,6 +29,7 @@
#include "llvm/Support/Error.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
#include <algorithm>
#include <cstddef>
#include <memory>
@@ -305,6 +307,7 @@ std::vector<InputFile *> BitcodeCompiler::compile() {
unsigned maxTasks = ltoObj->getMaxTasks();
buf.resize(maxTasks);
files.resize(maxTasks);
filenames.resize(maxTasks);
// The --thinlto-cache-dir option specifies the path to a directory in which
// to cache native object files for ThinLTO incremental builds. If a path was
@@ -315,13 +318,15 @@ std::vector<InputFile *> BitcodeCompiler::compile() {
[&](size_t task, const Twine &moduleName,
std::unique_ptr<MemoryBuffer> mb) {
files[task] = std::move(mb);
filenames[task] = moduleName.str();
}));
if (!ctx.bitcodeFiles.empty())
checkError(ltoObj->run(
[&](size_t task, const Twine &moduleName) {
buf[task].first = moduleName.str();
return std::make_unique<CachedFileStream>(
std::make_unique<raw_svector_ostream>(buf[task]));
std::make_unique<raw_svector_ostream>(buf[task].second));
},
cache));
@@ -340,7 +345,7 @@ std::vector<InputFile *> BitcodeCompiler::compile() {
if (config->thinLTOIndexOnly) {
if (!config->ltoObjPath.empty())
saveBuffer(buf[0], config->ltoObjPath);
saveBuffer(buf[0].second, config->ltoObjPath);
// ThinLTO with index only option is required to generate only the index
// files. After that, we exit from linker and ThinLTO backend runs in a
@@ -354,32 +359,57 @@ std::vector<InputFile *> BitcodeCompiler::compile() {
pruneCache(config->thinLTOCacheDir, config->thinLTOCachePolicy, files);
if (!config->ltoObjPath.empty()) {
saveBuffer(buf[0], config->ltoObjPath);
saveBuffer(buf[0].second, config->ltoObjPath);
for (unsigned i = 1; i != maxTasks; ++i)
saveBuffer(buf[i], config->ltoObjPath + Twine(i));
}
if (config->saveTempsArgs.contains("prelink")) {
if (!buf[0].empty())
saveBuffer(buf[0], config->outputFile + ".lto.o");
for (unsigned i = 1; i != maxTasks; ++i)
saveBuffer(buf[i], config->outputFile + Twine(i) + ".lto.o");
}
if (config->ltoEmitAsm) {
saveBuffer(buf[0], config->outputFile);
for (unsigned i = 1; i != maxTasks; ++i)
saveBuffer(buf[i], config->outputFile + Twine(i));
return {};
saveBuffer(buf[i].second, config->ltoObjPath + Twine(i));
}
bool savePrelink = config->saveTempsArgs.contains("prelink");
std::vector<InputFile *> ret;
for (unsigned i = 0; i != maxTasks; ++i)
if (!buf[i].empty())
ret.push_back(createObjFile(MemoryBufferRef(buf[i], "lto.tmp")));
const char *ext = config->ltoEmitAsm ? ".s" : ".o";
for (unsigned i = 0; i != maxTasks; ++i) {
StringRef bitcodeFilePath;
StringRef objBuf;
if (files[i]) {
// When files[i] is not null, we get the native relocatable file from the
// cache. filenames[i] contains the original BitcodeFile's identifier.
objBuf = files[i]->getBuffer();
bitcodeFilePath = filenames[i];
} else {
// Get the native relocatable file after in-process LTO compilation.
objBuf = buf[i].second;
bitcodeFilePath = buf[i].first;
}
if (objBuf.empty())
continue;
for (std::unique_ptr<MemoryBuffer> &file : files)
if (file)
ret.push_back(createObjFile(*file));
// If the input bitcode file is path/to/x.o and -o specifies a.out, the
// corresponding native relocatable file path will look like:
// path/to/a.out.lto.x.o.
StringRef ltoObjName;
if (bitcodeFilePath == "ld-temp.o") {
ltoObjName =
saver().save(Twine(config->outputFile) + ".lto" +
(i == 0 ? Twine("") : Twine('.') + Twine(i)) + ext);
} else {
StringRef directory = sys::path::parent_path(bitcodeFilePath);
// For an archive member, which has an identifier like "d/a.a(coll.o at
// 8)" (see BitcodeFile::BitcodeFile), use the filename; otherwise, use
// the stem (d/a.o => a).
StringRef baseName = bitcodeFilePath.ends_with(")")
? sys::path::filename(bitcodeFilePath)
: sys::path::stem(bitcodeFilePath);
StringRef outputFileBaseName = sys::path::filename(config->outputFile);
SmallString<256> path;
sys::path::append(path, directory,
outputFileBaseName + ".lto." + baseName + ext);
sys::path::remove_dots(path, true);
ltoObjName = saver().save(path.str());
}
if (savePrelink || config->ltoEmitAsm)
saveBuffer(buf[i].second, ltoObjName);
if (!config->ltoEmitAsm)
ret.push_back(createObjFile(MemoryBufferRef(objBuf, ltoObjName)));
}
return ret;
}

View File

@@ -46,8 +46,10 @@ public:
private:
std::unique_ptr<llvm::lto::LTO> ltoObj;
std::vector<SmallString<0>> buf;
// An array of (module name, native relocatable file content) pairs.
SmallVector<std::pair<std::string, SmallString<0>>, 0> buf;
std::vector<std::unique_ptr<MemoryBuffer>> files;
SmallVector<std::string, 0> filenames;
llvm::DenseSet<StringRef> usedStartStop;
std::unique_ptr<llvm::raw_fd_ostream> indexFile;
llvm::DenseSet<StringRef> thinIndices;

View File

@@ -61,10 +61,12 @@
# RUN: ld.lld --no-fortran-common -o 11 main.o --start-lib 1.o strong_data_only.o --end-lib
# RUN: llvm-readobj --syms 11 | FileCheck --check-prefix=NFC %s
# RUN: ld.lld -o - main.o 4.a --fortran-common --lto-emit-asm | FileCheck --check-prefix=ASM %s
# RUN: ld.lld -o out main.o 4.a --fortran-common --lto-emit-asm
# RUN: FileCheck --check-prefix=ASM %s < out.lto.s
# RUN: ld.lld -o - main.o --start-lib 1.bc 2.bc --end-lib --fortran-common --lto-emit-asm | \
# RUN: FileCheck --check-prefix=ASM %s
# RUN: rm out.lto.s
# RUN: ld.lld -o out main.o --start-lib 1.bc 2.bc --end-lib --fortran-common --lto-emit-asm
# RUN: FileCheck --check-prefix=ASM %s < out.lto.s
## COMMON overrides weak. Don't extract 3.bc which provides a weak definition.
# RUN: ld.lld -o /dev/null main.o --start-lib 1.bc 3.bc --end-lib -y block | FileCheck --check-prefix=LTO_WEAK %s

View File

@@ -53,10 +53,10 @@
; RUN: rm -fr cache && mkdir cache
; RUN: ld.lld --thinlto-cache-dir=cache --save-temps -o out b.bc a.bc -M | FileCheck %s --check-prefix=MAP
; RUN: ls out1.lto.o a.bc.0.preopt.bc b.bc.0.preopt.bc
; RUN: ls out.lto.a.o a.bc.0.preopt.bc b.bc.0.preopt.bc
; MAP: llvmcache-{{.*}}:(.text)
; MAP: llvmcache-{{.*}}:(.text)
; MAP: out.lto.b.o:(.text)
; MAP: out.lto.a.o:(.text)
;; Check that mllvm options participate in the cache key
; RUN: rm -rf cache && mkdir cache

View File

@@ -35,8 +35,8 @@ TRACE-NEXT: lib.a(obj.o): definition of bar
TRACE-NEXT: lib.a(obj.o): reference to foo
TRACE-NEXT: <internal>: reference to foo
;; The definition of "foo" is visible outside the LTO result.
TRACE-NEXT: lto.tmp: definition of foo
TRACE-NEXT: lto.tmp: reference to bar
TRACE-NEXT: {{.*}}.lto.o: definition of foo
TRACE-NEXT: {{.*}}.lto.o: reference to bar
;--- start.s
.global _start, baz

View File

@@ -1,13 +1,14 @@
; REQUIRES: x86
; RUN: rm -rf %t && mkdir %t && cd %t
; RUN: llvm-as %s -o a.bc
; RUN: ld.lld --lto-emit-asm -shared a.bc -o - | FileCheck %s
; RUN: ld.lld --plugin-opt=emit-asm --plugin-opt=lto-partitions=2 -shared a.bc -o out.s
; RUN: cat out.s out.s1 | FileCheck %s
; RUN: ld.lld --lto-emit-asm -shared a.bc -o out 2>&1 | count 0
; RUN: FileCheck %s < out.lto.s
; RUN: ld.lld --plugin-opt=emit-asm --plugin-opt=lto-partitions=2 -shared a.bc -o out
; RUN: cat out.lto.s out.lto.1.s | FileCheck %s
; RUN: ld.lld --lto-emit-asm --save-temps -shared a.bc -o out.s
; RUN: FileCheck --input-file out.s %s
; RUN: llvm-dis out.s.0.4.opt.bc -o - | FileCheck --check-prefix=OPT %s
; RUN: ld.lld --lto-emit-asm --save-temps -shared a.bc -o out
; RUN: FileCheck --input-file out.lto.s %s
; RUN: llvm-dis out.0.4.opt.bc -o - | FileCheck --check-prefix=OPT %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

View File

@@ -7,7 +7,7 @@
; RUN: llvm-readelf --dyn-syms %t.so | FileCheck %s
; TRACE: {{.*}}/b.a(b.o): lazy definition of __divti3
; TRACE-NEXT: lto.tmp: reference to __divti3
; TRACE-NEXT: {{.*}}.lto.o: reference to __divti3
; TRACE-NEXT: {{.*}}/b.a(b.o): definition of __divti3
; CHECK: Symbol table '.dynsym' contains 2 entries:

View File

@@ -67,12 +67,12 @@
;; Ensure lld emits empty combined module if specific obj-path.
; RUN: mkdir obj
; RUN: ld.lld --plugin-opt=obj-path=objpath.o -shared 1.bc d/2.bc -o obj/out --save-temps
; RUN: ls obj/out.lto.o obj/out1.lto.o obj/out2.lto.o
; RUN: ls obj/out.lto.o out.lto.1.o d/out.lto.2.o
;; Ensure lld does not emit empty combined module by default.
; RUN: rm -fr obj && mkdir obj
; RUN: ld.lld -shared 1.bc d/2.bc -o obj/out --save-temps
; RUN: ls obj/out*.lto.* | count 2
; RUN: not test -e obj/out.lto.o
; EMPTY: file format elf64-x86-64
; EMPTY-NOT: {{.}}

View File

@@ -4,7 +4,7 @@
; RUN: ld.lld --lto-partitions=2 -save-temps -o out a.bc -e foo --lto-O0
; RUN: llvm-readobj --symbols --dyn-syms out | FileCheck %s
; RUN: llvm-nm out.lto.o | FileCheck --check-prefix=CHECK0 %s
; RUN: llvm-nm out1.lto.o | FileCheck --check-prefix=CHECK1 %s
; RUN: llvm-nm out.lto.1.o | FileCheck --check-prefix=CHECK1 %s
; CHECK: Symbols [
; CHECK-NEXT: Symbol {

View File

@@ -3,7 +3,7 @@
; RUN: llvm-as -o a.bc %s
; RUN: ld.lld --lto-partitions=2 -save-temps -o out a.bc -shared
; RUN: llvm-nm out.lto.o | FileCheck --check-prefix=CHECK0 %s
; RUN: llvm-nm out1.lto.o | FileCheck --check-prefix=CHECK1 %s
; RUN: llvm-nm out.lto.1.o | FileCheck --check-prefix=CHECK1 %s
; RUN: not ld.lld --lto-partitions=0 a.bc -o /dev/null 2>&1 | FileCheck --check-prefix=INVALID %s
; INVALID: --lto-partitions: number of threads must be > 0

View File

@@ -1,6 +1,7 @@
; REQUIRES: x86
; RUN: opt < %s -passes=pseudo-probe -function-sections -o %t.o
; RUN: ld.lld %t.o -shared --lto-emit-asm -o - | FileCheck %s
; RUN: ld.lld %t.o -shared --lto-emit-asm -o %t
; RUN: FileCheck %s < %t.lto.s
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-scei-ps4"

View File

@@ -14,14 +14,14 @@
;; Create the .all dir with save-temps saving everything, this will be used to compare
;; with the output from individualized save-temps later
; RUN: ld.lld main.o thin1.o --save-temps -o %t/all/a.out
; RUN: mv *.o.* %t/all
; RUN: mv a.out.lto.* *.o.*.bc %t/all
;; Sanity check that everything got moved
; RUN: ls | count 2
;; Check precedence if both --save-temps and --save-temps= are present
; RUN: ld.lld main.o thin1.o --save-temps=preopt --save-temps --save-temps=\opt -o %t/all2/a.out
; RUN: cmp %t/all2/a.out %t/all/a.out
; RUN: mv *.o.* %t/all2
; RUN: mv a.out.lto.* *.o.* %t/all2
; RUN: ls | count 2
; RUN: diff -r %t/all %t/all2
@@ -83,8 +83,8 @@
;; Check prelink
; RUN: ld.lld main.o thin1.o --save-temps=prelink
; RUN: cmp %t/all/a.out a.out && rm -f a.out
; RUN: cp *.lto.o %t/subset2
; RUN: mv *.lto.o %t/all3
; RUN: cp a.out.lto.*.o %t/subset2
; RUN: mv a.out.lto.*.o %t/all3
; RUN: ls | count 2
;; Check resolution
@@ -104,7 +104,7 @@
; RUN: cmp %t/all/a.out a.out && rm -f a.out
; RUN: mv *.0.preopt.* %t/subset
; RUN: mv *.4.opt* %t/subset
; RUN: mv *.lto.o %t/subset
; RUN: mv a.out.lto.*.o %t/subset
; RUN: ls | count 2
; RUN: diff -r %t/subset2 %t/subset

View File

@@ -9,21 +9,21 @@
; RUN: llvm-ar rcS d/a.a d/coll.o e/coll.o
; RUN: ld.lld b.bc d/a.a -o out --save-temps
; RUN: llvm-nm out | FileCheck %s
; RUN: llvm-nm out2.lto.o | FileCheck %s --check-prefix=MOD2
; RUN: llvm-nm out3.lto.o | FileCheck %s --check-prefix=MOD3
;; d/out.lto.a.a(coll.o at 8).o out.lto.a.a(coll.o at 1916).o
; RUN: llvm-nm d/out.lto.a.a*at*.o | FileCheck %s --check-prefix=MOD2
; Check we handle this case correctly even in presence of --whole-archive.
; RUN: rm out1.lto.o out2.lto.o out3.lto.o
; RUN: rm d/out.lto.a.a*at*.o
; RUN: ld.lld b.bc --whole-archive d/a.a -o out --save-temps
; RUN: llvm-nm out | FileCheck %s
; RUN: ls out1.lto.o out2.lto.o out3.lto.o
; RUN: llvm-nm d/out.lto.a.a*at*.o | FileCheck %s --check-prefix=MOD2
; CHECK: T _start
; CHECK: T blah
; CHECK: T foo
; MOD2: T foo
; MOD3: T blah
; MOD2-DAG: T foo
; MOD2-DAG: T blah
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-scei-ps4"

View File

@@ -7,56 +7,56 @@
; RUN: opt -module-summary %p/Inputs/thinlto.ll -o d/b.o
; First force single-threaded mode
; RUN: rm -f e/out1.lto.o e/out2.lto.o
; RUN: rm -f out.lto.a.o d/out.lto.b.o
; RUN: ld.lld -save-temps --thinlto-jobs=1 -shared a.o d/b.o -o e/out
; RUN: llvm-nm e/out1.lto.o | FileCheck %s --check-prefix=NM1
; RUN: llvm-nm e/out2.lto.o | FileCheck %s --check-prefix=NM2
; RUN: llvm-nm out.lto.a.o | FileCheck %s --check-prefix=NM1
; RUN: llvm-nm d/out.lto.b.o | FileCheck %s --check-prefix=NM2
; Next force multi-threaded mode
; RUN: rm -f e/out1.lto.o e/out2.lto.o
; RUN: rm -f out.lto.a.o d/out.lto.b.o
; RUN: ld.lld -save-temps --thinlto-jobs=2 -shared a.o d/b.o -o e/out
; RUN: llvm-nm e/out1.lto.o | FileCheck %s --check-prefix=NM1
; RUN: llvm-nm e/out2.lto.o | FileCheck %s --check-prefix=NM2
; RUN: llvm-nm out.lto.a.o | FileCheck %s --check-prefix=NM1
; RUN: llvm-nm d/out.lto.b.o | FileCheck %s --check-prefix=NM2
;; --plugin-opt=jobs= is an alias.
; RUN: rm -f e/out1.lto.o e/out2.lto.o
; RUN: rm -f out.lto.a.o d/out.lto.b.o
; RUN: ld.lld -save-temps --plugin-opt=jobs=2 -shared a.o d/b.o -o e/out
; RUN: llvm-nm e/out1.lto.o | FileCheck %s --check-prefix=NM1
; RUN: llvm-nm e/out2.lto.o | FileCheck %s --check-prefix=NM2
; RUN: llvm-nm out.lto.a.o | FileCheck %s --check-prefix=NM1
; RUN: llvm-nm d/out.lto.b.o | FileCheck %s --check-prefix=NM2
;; --thinlto-jobs= defaults to --threads=.
; RUN: rm -f e/out1.lto.o e/out2.lto.o
; RUN: rm -f out.lto.a.o d/out.lto.b.o
; RUN: ld.lld -save-temps --threads=2 -shared a.o d/b.o -o e/out
; RUN: llvm-nm e/out1.lto.o | FileCheck %s --check-prefix=NM1
; RUN: llvm-nm e/out2.lto.o | FileCheck %s --check-prefix=NM2
; RUN: llvm-nm out.lto.a.o | FileCheck %s --check-prefix=NM1
; RUN: llvm-nm d/out.lto.b.o | FileCheck %s --check-prefix=NM2
;; --thinlto-jobs= overrides --threads=.
; RUN: rm -f e/out1.lto.o e/out2.lto.o
; RUN: rm -f out.lto.a.o d/out.lto.b.o
; RUN: ld.lld -save-temps --threads=1 --plugin-opt=jobs=2 -shared a.o d/b.o -o e/out
; RUN: llvm-nm e/out1.lto.o | FileCheck %s --check-prefix=NM1
; RUN: llvm-nm e/out2.lto.o | FileCheck %s --check-prefix=NM2
; RUN: llvm-nm out.lto.a.o | FileCheck %s --check-prefix=NM1
; RUN: llvm-nm d/out.lto.b.o | FileCheck %s --check-prefix=NM2
; Test with all threads, on all cores, on all CPU sockets
; RUN: rm -f e/out1.lto.o e/out2.lto.o
; RUN: rm -f out.lto.a.o d/out.lto.b.o
; RUN: ld.lld -save-temps --thinlto-jobs=all -shared a.o d/b.o -o e/out
; RUN: llvm-nm e/out1.lto.o | FileCheck %s --check-prefix=NM1
; RUN: llvm-nm e/out2.lto.o | FileCheck %s --check-prefix=NM2
; RUN: llvm-nm out.lto.a.o | FileCheck %s --check-prefix=NM1
; RUN: llvm-nm d/out.lto.b.o | FileCheck %s --check-prefix=NM2
; Test with many more threads than the system has
; RUN: rm -f e/out1.lto.o e/out2.lto.o
; RUN: rm -f out.lto.a.o d/out.lto.b.o
; RUN: ld.lld -save-temps --thinlto-jobs=100 -shared a.o d/b.o -o e/out
; RUN: llvm-nm e/out1.lto.o | FileCheck %s --check-prefix=NM1
; RUN: llvm-nm e/out2.lto.o | FileCheck %s --check-prefix=NM2
; RUN: llvm-nm out.lto.a.o | FileCheck %s --check-prefix=NM1
; RUN: llvm-nm d/out.lto.b.o | FileCheck %s --check-prefix=NM2
; Test with a bad value
; RUN: rm -f e/out1.lto.o e/out2.lto.o
; RUN: rm -f out.lto.a.o d/out.lto.b.o
; RUN: not ld.lld -save-temps --thinlto-jobs=foo -shared a.o d/b.o -o e/out 2>&1 | FileCheck %s --check-prefix=BAD-JOBS
; BAD-JOBS: error: --thinlto-jobs: invalid job count: foo
; Then check without --thinlto-jobs (which currently defaults to heavyweight_hardware_concurrency, meanning one thread per hardware core -- not SMT)
; RUN: ld.lld -shared -save-temps a.o d/b.o -o e/out
; RUN: llvm-nm e/out1.lto.o | FileCheck %s --check-prefix=NM1
; RUN: llvm-nm e/out2.lto.o | FileCheck %s --check-prefix=NM2
; RUN: llvm-nm out.lto.a.o | FileCheck %s --check-prefix=NM1
; RUN: llvm-nm d/out.lto.b.o | FileCheck %s --check-prefix=NM2
; Check that -save-temps is usable with thin archives
; RUN: mkdir dir