From 0b01b96864983c4b150776b869a3d048b0d50e2c Mon Sep 17 00:00:00 2001 From: Joshua Cranmer Date: Fri, 24 Oct 2025 16:07:49 -0400 Subject: [PATCH] [LLD][COFF] Deduplicate common chunks when linking COFF files. (#162553) This fixes [issue 162148](https://github.com/llvm/llvm-project/issues/162148). Common symbols are intended to have only a single version of the data present in the final executable. The MSVC linker is able to successfully deduplicate these chunks. If you have an application with a large number of translation units with a large block of common data (this is possible, for example, with Fortran code), then failing to deduplicate these chunks can make the data size so large that the resulting executable fails to load. The logic in this patch doesn't catch all of the potential cases for deduplication, but it should catch the most common ones. --- lld/COFF/Chunks.cpp | 2 +- lld/COFF/Chunks.h | 2 ++ lld/COFF/Symbols.h | 2 ++ lld/COFF/Writer.cpp | 4 ++++ lld/test/COFF/common-dedup.ll | 39 +++++++++++++++++++++++++++++++++++ 5 files changed, 48 insertions(+), 1 deletion(-) create mode 100644 lld/test/COFF/common-dedup.ll diff --git a/lld/COFF/Chunks.cpp b/lld/COFF/Chunks.cpp index cb5cba5c414a..548d87bdaefe 100644 --- a/lld/COFF/Chunks.cpp +++ b/lld/COFF/Chunks.cpp @@ -777,7 +777,7 @@ uint32_t SectionChunk::getSectionNumber() const { return s.getIndex() + 1; } -CommonChunk::CommonChunk(const COFFSymbolRef s) : sym(s) { +CommonChunk::CommonChunk(const COFFSymbolRef s) : live(false), sym(s) { // The value of a common symbol is its size. Align all common symbols smaller // than 32 bytes naturally, i.e. round the size up to the next power of two. // This is what MSVC link.exe does. diff --git a/lld/COFF/Chunks.h b/lld/COFF/Chunks.h index 6d88f5ec7377..cf8857dc5730 100644 --- a/lld/COFF/Chunks.h +++ b/lld/COFF/Chunks.h @@ -522,6 +522,8 @@ public: uint32_t getOutputCharacteristics() const override; StringRef getSectionName() const override { return ".bss"; } + bool live; + private: const COFFSymbolRef sym; }; diff --git a/lld/COFF/Symbols.h b/lld/COFF/Symbols.h index c86ded860876..9f077ddb2bb7 100644 --- a/lld/COFF/Symbols.h +++ b/lld/COFF/Symbols.h @@ -236,6 +236,8 @@ public: CommonChunk *c = nullptr) : DefinedCOFF(DefinedCommonKind, f, n, s), data(c), size(size) { this->isExternal = true; + if (c) + c->live = true; } static bool classof(const Symbol *s) { diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp index 258a82e371f3..3ac26681541b 100644 --- a/lld/COFF/Writer.cpp +++ b/lld/COFF/Writer.cpp @@ -1114,6 +1114,10 @@ void Writer::createSections() { sc->printDiscardedMessage(); continue; } + if (auto *cc = dyn_cast(c)) { + if (!cc->live) + continue; + } StringRef name = c->getSectionName(); if (shouldStripSectionSuffix(sc, name, ctx.config.mingw)) name = name.split('$').first; diff --git a/lld/test/COFF/common-dedup.ll b/lld/test/COFF/common-dedup.ll new file mode 100644 index 000000000000..cec3a7065d54 --- /dev/null +++ b/lld/test/COFF/common-dedup.ll @@ -0,0 +1,39 @@ +; REQUIRES: x86 +; RUN: rm -rf %t.dir +; RUN: split-file %s %t.dir +; RUN: llc %t.dir/t1.ll -o %t.dir/t1.obj --filetype=obj +; RUN: llc %t.dir/t2.ll -o %t.dir/t2.obj --filetype=obj +; RUN: lld-link %t.dir/t1.obj %t.dir/t2.obj -entry:main -out:%t.dir/out.exe +; RUN: llvm-readobj --section-headers %t.dir/out.exe | FileCheck %s + +; Make sure that the data section contains just one copy of @a, not two. +; CHECK: Name: .data +; CHECK-NEXT: VirtualSize: 0x1000 + +;--- t1.ll +target triple = "x86_64-pc-windows-msvc" +@a = common global [4096 x i8] zeroinitializer + +define i32 @usea() { + %ref_common = load i32, ptr @a + ret i32 %ref_common +} + +;--- t2.ll +target triple = "x86_64-pc-windows-msvc" +@a = common global [4096 x i8] zeroinitializer + +define i32 @useb() { + %ref_common = load i32, ptr @a + ret i32 %ref_common +} + +declare i32 @usea() + +define dso_local i32 @main() local_unnamed_addr { +entry: + %a = tail call i32 @usea() + %b = tail call i32 @useb() + %add = add nsw i32 %a, %b + ret i32 %add +}