[lld-macho,BalancedPartition] Simplify relocation hash and avoid xxHash

xxHash, inferior to xxh3, is discouraged. We try not to use xxhash in
lld.

Switch to read32le for content hash and xxh3/stable_hash_combine for
relocation hash. Remove the intermediate std::string for relocation
hash.

Change the tail hashing scheme to consider individual bytes instead.
This helps group 0102 and 0201 together. The benefit is negligible,
though.

Pull Request: https://github.com/llvm/llvm-project/pull/121729
This commit is contained in:
Fangrui Song
2025-01-16 09:31:42 -08:00
committed by GitHub
parent 94fee13d42
commit 60e4d24963
2 changed files with 20 additions and 27 deletions

View File

@@ -19,7 +19,10 @@
#include "Symbols.h"
#include "lld/Common/BPSectionOrdererBase.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/StableHashing.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/xxhash.h"
namespace lld::macho {
@@ -90,23 +93,24 @@ public:
&sectionToIdx) const override {
constexpr unsigned windowSize = 4;
// Calculate content hashes
size_t dataSize = isec->data.size();
for (size_t i = 0; i < dataSize; i++) {
auto window = isec->data.drop_front(i).take_front(windowSize);
hashes.push_back(xxHash64(window));
}
// Calculate content hashes: k-mers and the last k-1 bytes.
ArrayRef<uint8_t> data = isec->data;
if (data.size() >= windowSize)
for (size_t i = 0; i <= data.size() - windowSize; ++i)
hashes.push_back(llvm::support::endian::read32le(data.data() + i));
for (uint8_t byte : data.take_back(windowSize - 1))
hashes.push_back(byte);
// Calculate relocation hashes
for (const auto &r : isec->relocs) {
if (r.length == 0 || r.referent.isNull() || r.offset >= isec->data.size())
if (r.length == 0 || r.referent.isNull() || r.offset >= data.size())
continue;
uint64_t relocHash = getRelocHash(r, sectionToIdx);
uint32_t start = (r.offset < windowSize) ? 0 : r.offset - windowSize + 1;
for (uint32_t i = start; i < r.offset + r.length; i++) {
auto window = isec->data.drop_front(i).take_front(windowSize);
hashes.push_back(xxHash64(window) + relocHash);
auto window = data.drop_front(i).take_front(windowSize);
hashes.push_back(xxh3_64bits(window) ^ relocHash);
}
}
@@ -124,19 +128,17 @@ private:
std::optional<uint64_t> sectionIdx;
if (auto it = sectionToIdx.find(isec); it != sectionToIdx.end())
sectionIdx = it->second;
std::string kind;
uint64_t kind = -1, value = 0;
if (isec)
kind = ("Section " + Twine(isec->kind())).str();
kind = uint64_t(isec->kind());
if (auto *sym = reloc.referent.dyn_cast<Symbol *>()) {
kind += (" Symbol " + Twine(sym->kind())).str();
if (auto *d = llvm::dyn_cast<Defined>(sym)) {
return BPSectionBase::getRelocHash(kind, sectionIdx.value_or(0),
d->value, reloc.addend);
}
kind = (kind << 8) | uint8_t(sym->kind());
if (auto *d = llvm::dyn_cast<Defined>(sym))
value = d->value;
}
return BPSectionBase::getRelocHash(kind, sectionIdx.value_or(0), 0,
reloc.addend);
return llvm::stable_hash_combine(kind, sectionIdx.value_or(0), value,
reloc.addend);
}
};

View File

@@ -18,7 +18,6 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Support/xxhash.h"
#include <memory>
#include <optional>
@@ -56,14 +55,6 @@ public:
return P1;
}
static uint64_t getRelocHash(llvm::StringRef kind, uint64_t sectionIdx,
uint64_t offset, uint64_t addend) {
return llvm::xxHash64((kind + ": " + llvm::Twine::utohexstr(sectionIdx) +
" + " + llvm::Twine::utohexstr(offset) + " + " +
llvm::Twine::utohexstr(addend))
.str());
}
/// Reorders sections using balanced partitioning algorithm based on profile
/// data.
static llvm::DenseMap<const BPSectionBase *, int>