[LLD][COFF] Deduplicate common chunks when linking COFF files. (#162553)

This fixes [issue
162148](https://github.com/llvm/llvm-project/issues/162148).

Common symbols are intended to have only a single version of the data
present in the final executable. The MSVC linker is able to successfully
deduplicate these chunks. If you have an application with a large number
of translation units with a large block of common data (this is
possible, for example, with Fortran code), then failing to deduplicate
these chunks can make the data size so large that the resulting
executable fails to load.

The logic in this patch doesn't catch all of the potential cases for
deduplication, but it should catch the most common ones.
This commit is contained in:
Joshua Cranmer
2025-10-24 16:07:49 -04:00
committed by GitHub
parent 251edd1228
commit 0b01b96864
5 changed files with 48 additions and 1 deletions

View File

@@ -777,7 +777,7 @@ uint32_t SectionChunk::getSectionNumber() const {
return s.getIndex() + 1;
}
CommonChunk::CommonChunk(const COFFSymbolRef s) : sym(s) {
CommonChunk::CommonChunk(const COFFSymbolRef s) : live(false), sym(s) {
// The value of a common symbol is its size. Align all common symbols smaller
// than 32 bytes naturally, i.e. round the size up to the next power of two.
// This is what MSVC link.exe does.

View File

@@ -522,6 +522,8 @@ public:
uint32_t getOutputCharacteristics() const override;
StringRef getSectionName() const override { return ".bss"; }
bool live;
private:
const COFFSymbolRef sym;
};

View File

@@ -236,6 +236,8 @@ public:
CommonChunk *c = nullptr)
: DefinedCOFF(DefinedCommonKind, f, n, s), data(c), size(size) {
this->isExternal = true;
if (c)
c->live = true;
}
static bool classof(const Symbol *s) {

View File

@@ -1114,6 +1114,10 @@ void Writer::createSections() {
sc->printDiscardedMessage();
continue;
}
if (auto *cc = dyn_cast<CommonChunk>(c)) {
if (!cc->live)
continue;
}
StringRef name = c->getSectionName();
if (shouldStripSectionSuffix(sc, name, ctx.config.mingw))
name = name.split('$').first;

View File

@@ -0,0 +1,39 @@
; REQUIRES: x86
; RUN: rm -rf %t.dir
; RUN: split-file %s %t.dir
; RUN: llc %t.dir/t1.ll -o %t.dir/t1.obj --filetype=obj
; RUN: llc %t.dir/t2.ll -o %t.dir/t2.obj --filetype=obj
; RUN: lld-link %t.dir/t1.obj %t.dir/t2.obj -entry:main -out:%t.dir/out.exe
; RUN: llvm-readobj --section-headers %t.dir/out.exe | FileCheck %s
; Make sure that the data section contains just one copy of @a, not two.
; CHECK: Name: .data
; CHECK-NEXT: VirtualSize: 0x1000
;--- t1.ll
target triple = "x86_64-pc-windows-msvc"
@a = common global [4096 x i8] zeroinitializer
define i32 @usea() {
%ref_common = load i32, ptr @a
ret i32 %ref_common
}
;--- t2.ll
target triple = "x86_64-pc-windows-msvc"
@a = common global [4096 x i8] zeroinitializer
define i32 @useb() {
%ref_common = load i32, ptr @a
ret i32 %ref_common
}
declare i32 @usea()
define dso_local i32 @main() local_unnamed_addr {
entry:
%a = tail call i32 @usea()
%b = tail call i32 @useb()
%add = add nsw i32 %a, %b
ret i32 %add
}