From 61f94f2768e6b74beb91b96b03630bbf72b9bfaa Mon Sep 17 00:00:00 2001 From: Jez Ng Date: Fri, 23 Dec 2022 14:50:58 -0500 Subject: [PATCH] [lld-macho] Only fold private-label aliases that do not have flags This will enable us to re-land {D139069}. The issue with the original diff was that we were folding all private-label symbols. We were not merging the symbol flags during this folding; instead we just made all references to the folded symbol point to its aliasee. This caused some flags to be incorrectly discarded. This surfaced as code that was incorrectly stripped due to LLD dropping the `.no_dead_strip` flag. This diff fixes things by only folding flag-less private-label aliases. Most (maybe all) of the `ltmp` symbols that are generated by the MC aarch64 backend are flag-less, so this conservative folding behavior does the job. Reviewed By: #lld-macho, thakis Differential Revision: https://reviews.llvm.org/D140606 --- lld/MachO/InputFiles.cpp | 25 +++++++---- ...-private-label.s => private-label-alias.s} | 42 ++++++++++++++++--- 2 files changed, 52 insertions(+), 15 deletions(-) rename lld/test/MachO/{weak-def-alias-private-label.s => private-label-alias.s} (73%) diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp index c903c98cc015..0dda9c787011 100644 --- a/lld/MachO/InputFiles.cpp +++ b/lld/MachO/InputFiles.cpp @@ -845,10 +845,15 @@ void ObjFile::parseSymbols(ArrayRef sectionHeaders, // We populate subsections by repeatedly splitting the last (highest // address) subsection. llvm::stable_sort(symbolIndices, [&](uint32_t lhs, uint32_t rhs) { - // Put private-label symbols after other symbols at the same address. - if (nList[lhs].n_value == nList[rhs].n_value) - return !isPrivateLabel(getSymName(nList[lhs])) && - isPrivateLabel(getSymName(nList[rhs])); + // Put private-label symbols that have no flags after other symbols at the + // same address. + StringRef lhsName = getSymName(nList[lhs]); + StringRef rhsName = getSymName(nList[rhs]); + if (nList[lhs].n_value == nList[rhs].n_value) { + if (isPrivateLabel(lhsName) && isPrivateLabel(rhsName)) + return nList[lhs].n_desc > nList[rhs].n_desc; + return !isPrivateLabel(lhsName) && isPrivateLabel(rhsName); + } return nList[lhs].n_value < nList[rhs].n_value; }); for (size_t j = 0; j < symbolIndices.size(); ++j) { @@ -874,11 +879,13 @@ void ObjFile::parseSymbols(ArrayRef sectionHeaders, if (!subsectionsViaSymbols || symbolOffset == 0 || sym.n_desc & N_ALT_ENTRY || !isa(isec)) { isec->hasAltEntry = symbolOffset != 0; - // If we have an private-label symbol that's an alias, just reuse the - // aliased symbol. Our sorting step above ensures that any such symbols - // will appear after the non-private-label ones. See - // weak-def-alias-ignored.s for the motivation behind this. - if (symbolOffset == 0 && isPrivateLabel(name) && j != 0) + // If we have an private-label symbol that's an alias, and that alias + // doesn't have any flags of its own, then we can just reuse the aliased + // symbol. Our sorting step above ensures that any such symbols will + // appear after the non-private-label ones. See weak-def-alias-ignored.s + // for the motivation behind this. + if (symbolOffset == 0 && isPrivateLabel(name) && j != 0 && + sym.n_desc == 0) symbols[symIndex] = symbols[symbolIndices[j - 1]]; else symbols[symIndex] = createDefined(sym, name, isec, symbolOffset, diff --git a/lld/test/MachO/weak-def-alias-private-label.s b/lld/test/MachO/private-label-alias.s similarity index 73% rename from lld/test/MachO/weak-def-alias-private-label.s rename to lld/test/MachO/private-label-alias.s index 32067b77867b..c5eb6277206c 100644 --- a/lld/test/MachO/weak-def-alias-private-label.s +++ b/lld/test/MachO/private-label-alias.s @@ -1,10 +1,10 @@ # REQUIRES: x86 -## This test checks that when we coalesce weak definitions, any private-label -## aliases to those weak defs don't cause the coalesced data to be retained. -## This test explicitly creates those private-label symbols, but it was actually -## motivated by MC's aarch64 backend which automatically creates them when -## emitting object files. I've chosen to explicitly create them here since we -## can then reference those symbols for a more complete test. +## This test checks that when we coalesce weak definitions, any flag-less +## private-label aliases to those weak defs don't cause the coalesced data to be +## retained. This test explicitly creates those private-label symbols, but it +## was actually motivated by MC's aarch64 backend which automatically creates +## them when emitting object files. I've chosen to explicitly create them here +## since we can then reference those symbols for a more complete test. ## ## Not retaining the data matters for more than just size -- we have a use case ## that depends on proper data coalescing to emit a valid file format. @@ -25,8 +25,11 @@ # RUN: rm -rf %t; split-file %s %t # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/weak-then-private.s -o %t/weak-then-private.o # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/private-then-weak.s -o %t/private-then-weak.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/no-dead-strip.s -o %t/no-dead-strip.o # RUN: %lld -dylib %t/weak-then-private.o %t/private-then-weak.o -o %t/test1 # RUN: %lld -dylib %t/private-then-weak.o %t/weak-then-private.o -o %t/test2 +# RUN: %lld -dead_strip %t/no-dead-strip.o -o %t/no-dead-strip + # RUN: llvm-objdump --macho --syms --section="__DATA,__data" --weak-bind %t/test1 | FileCheck %s # RUN: llvm-objdump --macho --syms --section="__DATA,__data" --weak-bind %t/test2 | FileCheck %s @@ -48,6 +51,14 @@ # CHECK-NEXT: __DATA __data 0x00001008 pointer 0 _weak # CHECK-NEXT: __DATA __data 0x00001010 pointer 0 _weak +## Verify that we don't drop any flags that private-label aliases have (such as +## .no_dead_strip). This is a regression test. We previously had subsections +## that were mistakenly stripped. + +# RUN: llvm-objdump --macho --section-headers %t/no-dead-strip | FileCheck %s \ +# RUN: --check-prefix=NO-DEAD-STRIP +# NO-DEAD-STRIP: __data 00000010 + #--- weak-then-private.s .globl _weak .weak_definition _weak @@ -73,3 +84,22 @@ _ref: .quad l_ignored .subsections_via_symbols + +#--- no-dead-strip.s +.globl _main + +_main: + ret + +.data +.no_dead_strip l_foo, l_bar + +_foo: +l_foo: + .quad 0x123 + +l_bar: +_bar: + .quad 0x123 + +.subsections_via_symbols