From 73d01a80cec397cb4e66ebb88a1204866b31ca59 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Thu, 12 Nov 2020 08:53:11 -0800 Subject: [PATCH] [ELF] Sort by input order within an input section description According to https://sourceware.org/binutils/docs/ld/Input-Section-Basics.html#Input-Section-Basics for `*(.a .b)`, the order should match the input order: * for `ld 1.o 2.o`, sections from 1.o precede sections from 2.o * within a file, `.a` and `.b` appear in the section header table order This patch implements the behavior. The interaction with `SORT*` and --sort-section is: Matched sections are ordered by radix sort with the keys being `(SORT*, --sort-section, input order)`, where `SORT*` (if present) is most significant. > Note, multiple `SORT*` within an input section description has undocumented and > confusing behaviors in GNU ld: > https://sourceware.org/pipermail/binutils/2020-November/114083.html > Therefore multiple `SORT*` is not the focus for this patch but > this patch still strives to have an explainable behavior. As an example, we partition `SORT(a.*) b.* c.* SORT(d.*)`, into `SORT(a.*) | b.* c.* | SORT(d.*)` and perform sorting within groups. Sections matched by patterns between two `SORT*` are sorted by input order. If --sort-alignment is given, they are sorted by --sort-alignment, breaking tie by input order. This patch also allows a section to be matched by multiple patterns, previously duplicated sections could occupy more space in the output and had erroneous zero bytes. The patch is in preparation for support for `*(SORT_BY_INIT_PRIORITY(.init_array.* .ctors.*)) *(.init_array .ctors)`, which will allow LLD to mix .ctors*/.init_array* like GNU ld (gold's --ctors-in-init-array) PR44698 and PR48096 Reviewed By: grimar, psmith Differential Revision: https://reviews.llvm.org/D91127 --- lld/ELF/LinkerScript.cpp | 53 ++++++++++++++++---- lld/test/ELF/linkerscript/exclude-multiple.s | 15 +++--- lld/test/ELF/linkerscript/sort2.s | 13 ++++- 3 files changed, 64 insertions(+), 17 deletions(-) diff --git a/lld/ELF/LinkerScript.cpp b/lld/ELF/LinkerScript.cpp index 8ae95fbe1e94..a03e21c55503 100644 --- a/lld/ELF/LinkerScript.cpp +++ b/lld/ELF/LinkerScript.cpp @@ -409,15 +409,16 @@ static void sortSections(MutableArrayRef vec, // 3. If one SORT command is given, and if it is SORT_NONE, don't sort. // 4. If no SORT command is given, sort according to --sort-section. static void sortInputSections(MutableArrayRef vec, - const SectionPattern &pat) { - if (pat.sortOuter == SortSectionPolicy::None) + SortSectionPolicy outer, + SortSectionPolicy inner) { + if (outer == SortSectionPolicy::None) return; - if (pat.sortInner == SortSectionPolicy::Default) + if (inner == SortSectionPolicy::Default) sortSections(vec, config->sortSection); else - sortSections(vec, pat.sortInner); - sortSections(vec, pat.sortOuter); + sortSections(vec, inner); + sortSections(vec, outer); } // Compute and remember which sections the InputSectionDescription matches. @@ -425,13 +426,27 @@ std::vector LinkerScript::computeInputSections(const InputSectionDescription *cmd, ArrayRef sections) { std::vector ret; + std::vector indexes; + DenseSet seen; + auto sortByPositionThenCommandLine = [&](size_t begin, size_t end) { + llvm::sort(MutableArrayRef(indexes).slice(begin, end - begin)); + for (size_t i = begin; i != end; ++i) + ret[i] = sections[indexes[i]]; + sortInputSections( + MutableArrayRef(ret).slice(begin, end - begin), + config->sortSection, SortSectionPolicy::None); + }; // Collects all sections that satisfy constraints of Cmd. + size_t sizeAfterPrevSort = 0; for (const SectionPattern &pat : cmd->sectionPatterns) { - size_t sizeBefore = ret.size(); + size_t sizeBeforeCurrPat = ret.size(); - for (InputSectionBase *sec : sections) { - if (!sec->isLive() || sec->parent) + for (size_t i = 0, e = sections.size(); i != e; ++i) { + // Skip if the section is dead or has been matched by a previous input + // section description or a previous pattern. + InputSectionBase *sec = sections[i]; + if (!sec->isLive() || sec->parent || seen.contains(i)) continue; // For -emit-relocs we have to ignore entries like @@ -453,11 +468,31 @@ LinkerScript::computeInputSections(const InputSectionDescription *cmd, continue; ret.push_back(sec); + indexes.push_back(i); + seen.insert(i); } + if (pat.sortOuter == SortSectionPolicy::Default) + continue; + + // Matched sections are ordered by radix sort with the keys being (SORT*, + // --sort-section, input order), where SORT* (if present) is most + // significant. + // + // Matched sections between the previous SORT* and this SORT* are sorted by + // (--sort-alignment, input order). + sortByPositionThenCommandLine(sizeAfterPrevSort, sizeBeforeCurrPat); + // Matched sections by this SORT* pattern are sorted using all 3 keys. + // ret[sizeBeforeCurrPat,ret.size()) are already in the input order, so we + // just sort by sortOuter and sortInner. sortInputSections( - MutableArrayRef(ret).slice(sizeBefore), pat); + MutableArrayRef(ret).slice(sizeBeforeCurrPat), + pat.sortOuter, pat.sortInner); + sizeAfterPrevSort = ret.size(); } + // Matched sections after the last SORT* are sorted by (--sort-alignment, + // input order). + sortByPositionThenCommandLine(sizeAfterPrevSort, ret.size()); return ret; } diff --git a/lld/test/ELF/linkerscript/exclude-multiple.s b/lld/test/ELF/linkerscript/exclude-multiple.s index e8e4348c947f..a4e09c0f4ec0 100644 --- a/lld/test/ELF/linkerscript/exclude-multiple.s +++ b/lld/test/ELF/linkerscript/exclude-multiple.s @@ -8,10 +8,12 @@ # RUN: ld.lld -script %t1.script %tfile1.o %tfile2.o %tfile3.o -o %t1.o # RUN: llvm-objdump -s %t1.o | FileCheck %s +## Sections from %tfile1 precede sections from %tfile2 and %tfile3. +## In each file, the sections are added in the original order. # CHECK: Contents of section .foo: -# CHECK-NEXT: 01000000 00000000 04000000 00000000 -# CHECK-NEXT: 07000000 00000000 05000000 00000000 -# CHECK-NEXT: 08000000 00000000 03000000 00000000 +# CHECK-NEXT: 03000000 00000000 01000000 00000000 +# CHECK-NEXT: 04000000 00000000 05000000 00000000 +# CHECK-NEXT: 07000000 00000000 08000000 00000000 # CHECK-NEXT: 09000000 00000000 # CHECK-NEXT: Contents of section .foo.2: # CHECK-NEXT: 02000000 00000000 @@ -27,11 +29,12 @@ # RUN: not ld.lld -script %t3.script %tfile1.o %tfile2.o %tfile3.o -o /dev/null 2>&1 | \ # RUN: FileCheck %s --check-prefix=ERR -.section .foo.1,"a" - .quad 1 - .section .foo.2,"a" .quad 2 +## %tfile1.o(.foo.3) precedes %tfile.o(.foo.1) in the output section. .section .foo.3,"a" .quad 3 + +.section .foo.1,"a" + .quad 1 diff --git a/lld/test/ELF/linkerscript/sort2.s b/lld/test/ELF/linkerscript/sort2.s index 732f96d39433..87969f023126 100644 --- a/lld/test/ELF/linkerscript/sort2.s +++ b/lld/test/ELF/linkerscript/sort2.s @@ -5,9 +5,11 @@ # RUN: ld.lld -o %t1 --script %t1.script %tfile1.o # RUN: llvm-readelf -x .abc %t1 | FileCheck %s -## FIXME Some input sections are duplicated in .abc and their second occurrences are zeros. +## Sections matched by patterns between two SORT are sorted separately by input order. +## Note, GNU ld has a strange behavior with more than one SORT* https://sourceware.org/pipermail/binutils/2020-November/114083.html +## In the absence of SORT, our multi-pattern behavior matches GNU ld. # CHECK: Hex dump of section '.abc' -# CHECK-NEXT: 0x00000000 01020306 05040000 00070908 0b0c0a +# CHECK-NEXT: 0x00000000 01020306 05040708 090b0c0a # RUN: echo "SECTIONS { \ # RUN: .abc : { *(SORT(.foo.* EXCLUDE_FILE (*file1.o) .bar.*) .a* SORT(.bar.*) .b*) } \ @@ -15,6 +17,13 @@ # RUN: ld.lld -o %t2 --script %t2.script %tfile1.o # RUN: llvm-readelf -x .abc %t2 | FileCheck %s +## Non-SORT patterns are sorted by --sort-section, breaking tie by input order. +# RUN: ld.lld -o %t4 --script %t1.script --sort-section=name %tfile1.o +# RUN: llvm-readelf -x .abc %t4 | FileCheck %s --check-prefix=CHECK2 + +# CHECK2: Hex dump of section '.abc' +# CHECK2-NEXT: 0x00000000 01020304 05060708 090a0b0c + .text .globl _start _start: