Files
llvm/lld/MachO/BPSectionOrderer.cpp
Ellis Hoag 60ab8c8967 [lld][macho] Use reloc length correctly in hash computation (#165287)
`Reloc::length` actually encodes the log2 of the length. Thanks @int3
for pointing this out in
https://github.com/llvm/llvm-project/issues/160894#issuecomment-3416250179.

This code computes hashes of relocations. With the correct length, the
hashes should more accurately represent the relocation. In my testing of
some large binaries, the compressed size change is negligable.
2025-10-27 14:48:04 -07:00

144 lines
5.2 KiB
C++

//===- BPSectionOrderer.cpp -----------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "BPSectionOrderer.h"
#include "InputSection.h"
#include "Relocations.h"
#include "Symbols.h"
#include "lld/Common/BPSectionOrdererBase.inc"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/StableHashing.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/xxhash.h"
#define DEBUG_TYPE "bp-section-orderer"
using namespace llvm;
using namespace lld::macho;
namespace {
struct BPOrdererMachO;
}
template <> struct lld::BPOrdererTraits<struct BPOrdererMachO> {
using Section = macho::InputSection;
using Defined = macho::Defined;
};
namespace {
struct BPOrdererMachO : lld::BPOrderer<BPOrdererMachO> {
static uint64_t getSize(const Section &sec) { return sec.getSize(); }
static bool isCodeSection(const Section &sec) {
return macho::isCodeSection(&sec);
}
static ArrayRef<Defined *> getSymbols(const Section &sec) {
return sec.symbols;
}
// Linkage names can be prefixed with "_" or "l_" on Mach-O. See
// Mangler::getNameWithPrefix() for details.
std::optional<StringRef> static getResolvedLinkageName(llvm::StringRef name) {
if (name.consume_front("_") || name.consume_front("l_"))
return name;
return {};
}
static void
getSectionHashes(const Section &sec, llvm::SmallVectorImpl<uint64_t> &hashes,
const llvm::DenseMap<const void *, uint64_t> &sectionToIdx) {
constexpr unsigned windowSize = 4;
// Calculate content hashes: k-mers and the last k-1 bytes.
ArrayRef<uint8_t> data = sec.data;
if (data.size() >= windowSize)
for (size_t i = 0; i <= data.size() - windowSize; ++i)
hashes.push_back(llvm::support::endian::read32le(data.data() + i));
for (uint8_t byte : data.take_back(windowSize - 1))
hashes.push_back(byte);
// Calculate relocation hashes
for (const auto &r : sec.relocs) {
uint32_t relocLength = 1 << r.length;
if (r.referent.isNull() || r.offset + relocLength > data.size())
continue;
uint64_t relocHash = getRelocHash(r, sectionToIdx);
uint32_t start = (r.offset < windowSize) ? 0 : r.offset - windowSize + 1;
for (uint32_t i = start; i < r.offset + relocLength; i++) {
auto window = data.drop_front(i).take_front(windowSize);
hashes.push_back(xxh3_64bits(window) ^ relocHash);
}
}
llvm::sort(hashes);
hashes.erase(llvm::unique(hashes), hashes.end());
}
static llvm::StringRef getSymName(const Defined &sym) {
return sym.getName();
}
static uint64_t getSymValue(const Defined &sym) { return sym.value; }
static uint64_t getSymSize(const Defined &sym) { return sym.size; }
private:
static uint64_t
getRelocHash(const macho::Reloc &reloc,
const llvm::DenseMap<const void *, uint64_t> &sectionToIdx) {
auto *isec = reloc.getReferentInputSection();
std::optional<uint64_t> sectionIdx;
if (auto it = sectionToIdx.find(isec); it != sectionToIdx.end())
sectionIdx = it->second;
uint64_t kind = -1, value = 0;
if (isec)
kind = uint64_t(isec->kind());
if (auto *sym = reloc.referent.dyn_cast<Symbol *>()) {
kind = (kind << 8) | uint8_t(sym->kind());
if (auto *d = llvm::dyn_cast<Defined>(sym))
value = d->value;
}
return llvm::stable_hash_combine(kind, sectionIdx.value_or(0), value,
reloc.addend);
}
};
} // namespace
DenseMap<const InputSection *, int> lld::macho::runBalancedPartitioning(
StringRef profilePath, bool forFunctionCompression, bool forDataCompression,
bool compressionSortStartupFunctions, bool verbose) {
// Collect candidate sections and associated symbols.
SmallVector<InputSection *> sections;
DenseMap<CachedHashStringRef, std::set<unsigned>> rootSymbolToSectionIdxs;
for (const auto *file : inputFiles) {
for (auto *sec : file->sections) {
for (auto &subsec : sec->subsections) {
auto *isec = subsec.isec;
if (!isec || isec->data.empty() || !isec->data.data())
continue;
// ConcatInputSections are entirely live or dead, so the offset is
// irrelevant.
if (isa<ConcatInputSection>(isec) && !isec->isLive(0))
continue;
size_t idx = sections.size();
sections.emplace_back(isec);
for (auto *sym : BPOrdererMachO::getSymbols(*isec)) {
auto rootName = lld::utils::getRootSymbol(sym->getName());
rootSymbolToSectionIdxs[CachedHashStringRef(rootName)].insert(idx);
if (auto linkageName =
BPOrdererMachO::getResolvedLinkageName(rootName))
rootSymbolToSectionIdxs[CachedHashStringRef(*linkageName)].insert(
idx);
}
}
}
}
return BPOrdererMachO().computeOrder(profilePath, forFunctionCompression,
forDataCompression,
compressionSortStartupFunctions, verbose,
sections, rootSymbolToSectionIdxs);
}