2020-04-02 11:54:05 -07:00
|
|
|
//===- OutputSegment.cpp --------------------------------------------------===//
|
|
|
|
|
//
|
|
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
|
|
|
//
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
|
|
#include "OutputSegment.h"
|
2021-05-25 14:57:16 -04:00
|
|
|
#include "ConcatOutputSection.h"
|
2020-04-27 12:50:59 -07:00
|
|
|
#include "InputSection.h"
|
2024-07-23 11:02:55 -04:00
|
|
|
#include "Sections.h"
|
2021-07-23 10:12:55 -04:00
|
|
|
#include "Symbols.h"
|
2020-05-01 16:29:06 -07:00
|
|
|
#include "SyntheticSections.h"
|
2020-04-27 12:50:59 -07:00
|
|
|
|
2020-04-02 11:54:05 -07:00
|
|
|
#include "lld/Common/Memory.h"
|
2021-05-25 14:57:18 -04:00
|
|
|
#include "llvm/ADT/StringSwitch.h"
|
2020-04-27 12:50:59 -07:00
|
|
|
#include "llvm/BinaryFormat/MachO.h"
|
2020-04-02 11:54:05 -07:00
|
|
|
|
|
|
|
|
using namespace llvm;
|
2020-04-27 12:50:59 -07:00
|
|
|
using namespace llvm::MachO;
|
2020-04-02 11:54:05 -07:00
|
|
|
using namespace lld;
|
|
|
|
|
using namespace lld::macho;
|
|
|
|
|
|
2020-04-27 12:50:59 -07:00
|
|
|
static uint32_t initProt(StringRef name) {
|
2021-03-29 14:08:12 -04:00
|
|
|
auto it = find_if(
|
|
|
|
|
config->segmentProtections,
|
|
|
|
|
[&](const SegmentProtection &segprot) { return segprot.name == name; });
|
|
|
|
|
if (it != config->segmentProtections.end())
|
|
|
|
|
return it->initProt;
|
|
|
|
|
|
2020-04-27 12:50:59 -07:00
|
|
|
if (name == segment_names::text)
|
|
|
|
|
return VM_PROT_READ | VM_PROT_EXECUTE;
|
|
|
|
|
if (name == segment_names::pageZero)
|
|
|
|
|
return 0;
|
|
|
|
|
if (name == segment_names::linkEdit)
|
|
|
|
|
return VM_PROT_READ;
|
|
|
|
|
return VM_PROT_READ | VM_PROT_WRITE;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static uint32_t maxProt(StringRef name) {
|
2021-04-21 15:43:38 -04:00
|
|
|
assert(config->arch() != AK_i386 &&
|
2020-10-14 12:46:49 -07:00
|
|
|
"TODO: i386 has different maxProt requirements");
|
[lld/mac] Allow -segprot having stricter initprot than maxprot on mac (#107269)
...including for catalyst.
The usecase for this is to put certain security-critical variables into
a special segment/section that's mapped as read-only most of the time,
and that temporary gets remapped as writeable when these variables are
written to be the program. This protects against them being written to
by heap spraying attacks. This special section should be mapped as
read-only at program start, so using
`-segprot MY_PROTECTED_MEMORY_THINGER rw r`
to mark that segment as rw maxprot and r initprot is exactly what we
want.
lld has so far rejected mismatching initprot and maxprot.
ld64 doesn't reject this, but silently writes initprot into both fields
(!) It looks like this might not be fully intentional, see
https://crbug.com/41495919#comment5 and
http://crbug.com/41495919#comment8.
In any case, when postprocessing ld64's output to have different values
for initprot and maxprot, the dynamic loader seems to do the right thing
(see also the previous two links).
The same technique also works on Windows, using both link.exe and
lld-link.exe using `/SECTION:myprotsect,R`.
So, since this is useful, allow it when targeting macOS, and make it do
what you'd expect.
Since loader support for this on iOS is less clear, keep disallowing it
there for now.
See the PR for the program I used to check that this seems to work. (I
only checked on arm64 macOS 14.5 so far; will run this on many more
systems on bots once this is merged and rolled in.)
2024-09-05 12:29:46 -04:00
|
|
|
auto it = find_if(
|
|
|
|
|
config->segmentProtections,
|
|
|
|
|
[&](const SegmentProtection &segprot) { return segprot.name == name; });
|
|
|
|
|
if (it != config->segmentProtections.end())
|
|
|
|
|
return it->maxProt;
|
|
|
|
|
|
2020-10-14 12:46:49 -07:00
|
|
|
return initProt(name);
|
2020-04-27 12:50:59 -07:00
|
|
|
}
|
|
|
|
|
|
2022-08-31 12:32:21 +02:00
|
|
|
static uint32_t flags(StringRef name) {
|
|
|
|
|
// If we ever implement shared cache output support, SG_READ_ONLY should not
|
|
|
|
|
// be used for dylibs that can be placed in it.
|
2022-10-30 13:28:00 -07:00
|
|
|
return name == segment_names::dataConst ? (uint32_t)SG_READ_ONLY : 0;
|
2022-08-31 12:32:21 +02:00
|
|
|
}
|
|
|
|
|
|
2020-05-01 16:29:06 -07:00
|
|
|
size_t OutputSegment::numNonHiddenSections() const {
|
|
|
|
|
size_t count = 0;
|
2021-03-16 21:34:28 -07:00
|
|
|
for (const OutputSection *osec : sections)
|
[lld-macho] Refactor segment/section creation, sorting, and merging
Summary:
There were a few issues with the previous setup:
1. The section sorting comparator used a declarative map of section names to
determine the correct order, but it turns out we need to match on more than
just names -- in particular, an upcoming diff will sort based on whether the
S_ZERO_FILL flag is set. This diff changes the sorter to a more imperative but
flexible form.
2. We were sorting OutputSections stored in a MapVector, which left the
MapVector in an inconsistent state -- the wrong keys map to the wrong values!
In practice, we weren't doing key lookups (only container iteration) after the
sort, so this was fine, but it was still a dubious state of affairs. This diff
copies the OutputSections to a vector before sorting them.
3. We were adding unneeded OutputSections to OutputSegments and then filtering
them out later, which meant that we had to remember whether an OutputSegment
was in a pre- or post-filtered state. This diff only adds the sections to the
segments if they are needed.
In addition to those major changes, two minor ones worth noting:
1. I renamed all OutputSection variable names to `osec`, to parallel `isec`.
Previously we were using some inconsistent combination of `osec`, `os`, and
`section`.
2. I added a check (and a test) for InputSections with names that clashed with
those of our synthetic OutputSections.
Reviewers: #lld-macho
Subscribers: llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D81887
2020-06-15 00:03:24 -07:00
|
|
|
count += (!osec->isHidden() ? 1 : 0);
|
2020-05-01 16:29:06 -07:00
|
|
|
return count;
|
|
|
|
|
}
|
|
|
|
|
|
[lld-macho] Refactor segment/section creation, sorting, and merging
Summary:
There were a few issues with the previous setup:
1. The section sorting comparator used a declarative map of section names to
determine the correct order, but it turns out we need to match on more than
just names -- in particular, an upcoming diff will sort based on whether the
S_ZERO_FILL flag is set. This diff changes the sorter to a more imperative but
flexible form.
2. We were sorting OutputSections stored in a MapVector, which left the
MapVector in an inconsistent state -- the wrong keys map to the wrong values!
In practice, we weren't doing key lookups (only container iteration) after the
sort, so this was fine, but it was still a dubious state of affairs. This diff
copies the OutputSections to a vector before sorting them.
3. We were adding unneeded OutputSections to OutputSegments and then filtering
them out later, which meant that we had to remember whether an OutputSegment
was in a pre- or post-filtered state. This diff only adds the sections to the
segments if they are needed.
In addition to those major changes, two minor ones worth noting:
1. I renamed all OutputSection variable names to `osec`, to parallel `isec`.
Previously we were using some inconsistent combination of `osec`, `os`, and
`section`.
2. I added a check (and a test) for InputSections with names that clashed with
those of our synthetic OutputSections.
Reviewers: #lld-macho
Subscribers: llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D81887
2020-06-15 00:03:24 -07:00
|
|
|
void OutputSegment::addOutputSection(OutputSection *osec) {
|
2021-05-25 14:57:17 -04:00
|
|
|
inputOrder = std::min(inputOrder, osec->inputOrder);
|
|
|
|
|
|
[lld-macho] Refactor segment/section creation, sorting, and merging
Summary:
There were a few issues with the previous setup:
1. The section sorting comparator used a declarative map of section names to
determine the correct order, but it turns out we need to match on more than
just names -- in particular, an upcoming diff will sort based on whether the
S_ZERO_FILL flag is set. This diff changes the sorter to a more imperative but
flexible form.
2. We were sorting OutputSections stored in a MapVector, which left the
MapVector in an inconsistent state -- the wrong keys map to the wrong values!
In practice, we weren't doing key lookups (only container iteration) after the
sort, so this was fine, but it was still a dubious state of affairs. This diff
copies the OutputSections to a vector before sorting them.
3. We were adding unneeded OutputSections to OutputSegments and then filtering
them out later, which meant that we had to remember whether an OutputSegment
was in a pre- or post-filtered state. This diff only adds the sections to the
segments if they are needed.
In addition to those major changes, two minor ones worth noting:
1. I renamed all OutputSection variable names to `osec`, to parallel `isec`.
Previously we were using some inconsistent combination of `osec`, `os`, and
`section`.
2. I added a check (and a test) for InputSections with names that clashed with
those of our synthetic OutputSections.
Reviewers: #lld-macho
Subscribers: llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D81887
2020-06-15 00:03:24 -07:00
|
|
|
osec->parent = this;
|
|
|
|
|
sections.push_back(osec);
|
2021-05-11 11:43:48 -04:00
|
|
|
|
|
|
|
|
for (const SectionAlign §Align : config->sectionAlignments)
|
|
|
|
|
if (sectAlign.segName == name && sectAlign.sectName == osec->name)
|
|
|
|
|
osec->align = sectAlign.align;
|
2020-04-27 12:50:59 -07:00
|
|
|
}
|
|
|
|
|
|
2021-05-25 14:57:18 -04:00
|
|
|
template <typename T, typename F> static auto compareByOrder(F ord) {
|
|
|
|
|
return [=](T a, T b) { return ord(a) < ord(b); };
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static int segmentOrder(OutputSegment *seg) {
|
|
|
|
|
return StringSwitch<int>(seg->name)
|
|
|
|
|
.Case(segment_names::pageZero, -4)
|
|
|
|
|
.Case(segment_names::text, -3)
|
|
|
|
|
.Case(segment_names::dataConst, -2)
|
|
|
|
|
.Case(segment_names::data, -1)
|
|
|
|
|
.Case(segment_names::llvm, std::numeric_limits<int>::max() - 1)
|
|
|
|
|
// Make sure __LINKEDIT is the last segment (i.e. all its hidden
|
|
|
|
|
// sections must be ordered after other sections).
|
|
|
|
|
.Case(segment_names::linkEdit, std::numeric_limits<int>::max())
|
|
|
|
|
.Default(seg->inputOrder);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static int sectionOrder(OutputSection *osec) {
|
|
|
|
|
StringRef segname = osec->parent->name;
|
|
|
|
|
// Sections are uniquely identified by their segment + section name.
|
|
|
|
|
if (segname == segment_names::text) {
|
2024-07-23 11:02:55 -04:00
|
|
|
if (osec->name == section_names::header)
|
|
|
|
|
return -7;
|
|
|
|
|
// `__text` needs to precede the other code sections since its
|
|
|
|
|
// expected to be the largest. This means in effect that it will
|
|
|
|
|
// be the section that determines whether we need thunks or not.
|
|
|
|
|
if (osec->name == section_names::text)
|
|
|
|
|
return -6;
|
2025-04-04 16:37:35 -07:00
|
|
|
|
|
|
|
|
// Prioritize specific section ordering based on our knowledge. This ensures
|
|
|
|
|
// that certain sections are placed in a particular order, even if they
|
|
|
|
|
// are also categorized as code sections. This explicit ordering takes
|
|
|
|
|
// precedence over the general code section ordering.
|
|
|
|
|
int knownPriority =
|
|
|
|
|
StringSwitch<int>(osec->name)
|
|
|
|
|
.Case(section_names::stubs, -4)
|
|
|
|
|
.Case(section_names::stubHelper, -3)
|
|
|
|
|
.Case(section_names::objcStubs, -2)
|
|
|
|
|
.Case(section_names::initOffsets, -1)
|
|
|
|
|
.Case(section_names::unwindInfo,
|
|
|
|
|
std::numeric_limits<int>::max() - 1)
|
|
|
|
|
.Case(section_names::ehFrame, std::numeric_limits<int>::max())
|
|
|
|
|
.Default(0);
|
|
|
|
|
|
|
|
|
|
if (knownPriority != 0)
|
|
|
|
|
return knownPriority;
|
|
|
|
|
|
2024-07-23 11:02:55 -04:00
|
|
|
// Ensure all code sections are contiguous with `__text` for thunk
|
|
|
|
|
// calculations.
|
2025-04-04 16:37:35 -07:00
|
|
|
if (sections::isCodeSection(osec->name, segment_names::text, osec->flags)) {
|
2024-07-23 11:02:55 -04:00
|
|
|
return -5;
|
|
|
|
|
}
|
2025-04-04 16:37:35 -07:00
|
|
|
|
|
|
|
|
return osec->inputOrder;
|
2021-05-25 14:57:18 -04:00
|
|
|
} else if (segname == segment_names::data ||
|
|
|
|
|
segname == segment_names::dataConst) {
|
|
|
|
|
// For each thread spawned, dyld will initialize its TLVs by copying the
|
|
|
|
|
// address range from the start of the first thread-local data section to
|
|
|
|
|
// the end of the last one. We therefore arrange these sections contiguously
|
|
|
|
|
// to minimize the amount of memory used. Additionally, since zerofill
|
|
|
|
|
// sections must be at the end of their segments, and since TLV data
|
|
|
|
|
// sections can be zerofills, we end up putting all TLV data sections at the
|
|
|
|
|
// end of the segment.
|
|
|
|
|
switch (sectionType(osec->flags)) {
|
2021-06-19 10:55:48 -04:00
|
|
|
case S_THREAD_LOCAL_VARIABLE_POINTERS:
|
|
|
|
|
return std::numeric_limits<int>::max() - 3;
|
2021-05-25 14:57:18 -04:00
|
|
|
case S_THREAD_LOCAL_REGULAR:
|
|
|
|
|
return std::numeric_limits<int>::max() - 2;
|
|
|
|
|
case S_THREAD_LOCAL_ZEROFILL:
|
|
|
|
|
return std::numeric_limits<int>::max() - 1;
|
|
|
|
|
case S_ZEROFILL:
|
|
|
|
|
return std::numeric_limits<int>::max();
|
|
|
|
|
default:
|
|
|
|
|
return StringSwitch<int>(osec->name)
|
|
|
|
|
.Case(section_names::got, -3)
|
|
|
|
|
.Case(section_names::lazySymbolPtr, -2)
|
|
|
|
|
.Case(section_names::const_, -1)
|
|
|
|
|
.Default(osec->inputOrder);
|
|
|
|
|
}
|
|
|
|
|
} else if (segname == segment_names::linkEdit) {
|
|
|
|
|
return StringSwitch<int>(osec->name)
|
2022-11-05 16:29:11 +01:00
|
|
|
.Case(section_names::chainFixups, -11)
|
2021-06-16 15:23:07 -04:00
|
|
|
.Case(section_names::rebase, -10)
|
|
|
|
|
.Case(section_names::binding, -9)
|
|
|
|
|
.Case(section_names::weakBinding, -8)
|
|
|
|
|
.Case(section_names::lazyBinding, -7)
|
|
|
|
|
.Case(section_names::export_, -6)
|
|
|
|
|
.Case(section_names::functionStarts, -5)
|
|
|
|
|
.Case(section_names::dataInCode, -4)
|
2021-05-25 14:57:18 -04:00
|
|
|
.Case(section_names::symbolTable, -3)
|
|
|
|
|
.Case(section_names::indirectSymbolTable, -2)
|
|
|
|
|
.Case(section_names::stringTable, -1)
|
|
|
|
|
.Case(section_names::codeSignature, std::numeric_limits<int>::max())
|
|
|
|
|
.Default(osec->inputOrder);
|
|
|
|
|
}
|
2021-06-20 19:39:09 -04:00
|
|
|
// ZeroFill sections must always be the at the end of their segments:
|
|
|
|
|
// dyld checks if a segment's file size is smaller than its in-memory
|
|
|
|
|
// size to detect if a segment has zerofill sections, and if so it maps
|
|
|
|
|
// the missing tail as zerofill.
|
2021-05-25 14:57:18 -04:00
|
|
|
if (sectionType(osec->flags) == S_ZEROFILL)
|
|
|
|
|
return std::numeric_limits<int>::max();
|
|
|
|
|
return osec->inputOrder;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void OutputSegment::sortOutputSections() {
|
2021-06-28 15:29:16 -04:00
|
|
|
// Must be stable_sort() to keep special sections such as
|
|
|
|
|
// S_THREAD_LOCAL_REGULAR in input order.
|
|
|
|
|
llvm::stable_sort(sections, compareByOrder<OutputSection *>(sectionOrder));
|
2021-05-25 14:57:18 -04:00
|
|
|
}
|
|
|
|
|
|
2021-07-23 10:12:55 -04:00
|
|
|
void OutputSegment::assignAddressesToStartEndSymbols() {
|
|
|
|
|
for (Defined *d : segmentStartSymbols)
|
|
|
|
|
d->value = addr;
|
|
|
|
|
for (Defined *d : segmentEndSymbols)
|
|
|
|
|
d->value = addr + vmSize;
|
|
|
|
|
}
|
|
|
|
|
|
2021-05-25 14:57:18 -04:00
|
|
|
void macho::sortOutputSegments() {
|
2021-07-23 16:54:19 -04:00
|
|
|
llvm::stable_sort(outputSegments,
|
|
|
|
|
compareByOrder<OutputSegment *>(segmentOrder));
|
2021-05-25 14:57:18 -04:00
|
|
|
}
|
|
|
|
|
|
2021-01-09 11:58:19 -05:00
|
|
|
static DenseMap<StringRef, OutputSegment *> nameToOutputSegment;
|
2020-04-02 11:54:05 -07:00
|
|
|
std::vector<OutputSegment *> macho::outputSegments;
|
|
|
|
|
|
2021-10-30 16:35:30 -07:00
|
|
|
void macho::resetOutputSegments() {
|
|
|
|
|
outputSegments.clear();
|
|
|
|
|
nameToOutputSegment.clear();
|
|
|
|
|
}
|
|
|
|
|
|
2021-07-25 10:09:37 -04:00
|
|
|
static StringRef maybeRenameSegment(StringRef name) {
|
|
|
|
|
auto newName = config->segmentRenameMap.find(name);
|
|
|
|
|
if (newName != config->segmentRenameMap.end())
|
|
|
|
|
return newName->second;
|
|
|
|
|
return name;
|
|
|
|
|
}
|
|
|
|
|
|
2020-04-27 12:50:59 -07:00
|
|
|
OutputSegment *macho::getOrCreateOutputSegment(StringRef name) {
|
2021-07-25 10:09:37 -04:00
|
|
|
name = maybeRenameSegment(name);
|
|
|
|
|
|
2020-04-27 12:50:59 -07:00
|
|
|
OutputSegment *&segRef = nameToOutputSegment[name];
|
2020-09-23 20:09:49 -07:00
|
|
|
if (segRef)
|
2020-04-27 12:50:59 -07:00
|
|
|
return segRef;
|
|
|
|
|
|
|
|
|
|
segRef = make<OutputSegment>();
|
|
|
|
|
segRef->name = name;
|
|
|
|
|
segRef->maxProt = maxProt(name);
|
|
|
|
|
segRef->initProt = initProt(name);
|
2022-08-31 12:32:21 +02:00
|
|
|
segRef->flags = flags(name);
|
2020-04-27 12:50:59 -07:00
|
|
|
|
|
|
|
|
outputSegments.push_back(segRef);
|
|
|
|
|
return segRef;
|
2020-04-02 11:54:05 -07:00
|
|
|
}
|