Files
llvm/bolt/lib/Core/HashUtilities.cpp
spupyrev 3e3a926be8 [BOLT][NFC] Add hash computation for basic blocks
Extending yaml profile format with block hashes, which are used for stale
profile matching. To avoid duplication of the code, created a new class with a
collection of utilities for computing hashes.

Reviewed By: Amir

Differential Revision: https://reviews.llvm.org/D144306
2023-05-02 14:03:47 -07:00

136 lines
3.9 KiB
C++

//===- bolt/Core/HashUtilities.cpp - Misc hash utilities ------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Computation of hash values over BinaryFunction and BinaryBasicBlock.
//
//===----------------------------------------------------------------------===//
#include "bolt/Core/HashUtilities.h"
#include "bolt/Core/BinaryContext.h"
#include "bolt/Core/BinaryFunction.h"
namespace llvm {
namespace bolt {
/// Hashing a 64-bit integer to a 16-bit one.
uint16_t hash_64_to_16(const uint64_t Hash) {
uint16_t Res = (uint16_t)(Hash & 0xFFFF);
Res ^= (uint16_t)((Hash >> 16) & 0xFFFF);
Res ^= (uint16_t)((Hash >> 32) & 0xFFFF);
Res ^= (uint16_t)((Hash >> 48) & 0xFFFF);
return Res;
}
std::string hashInteger(uint64_t Value) {
std::string HashString;
if (Value == 0)
HashString.push_back(0);
while (Value) {
uint8_t LSB = Value & 0xff;
HashString.push_back(LSB);
Value >>= 8;
}
return HashString;
}
std::string hashSymbol(BinaryContext &BC, const MCSymbol &Symbol) {
std::string HashString;
// Ignore function references.
if (BC.getFunctionForSymbol(&Symbol))
return HashString;
llvm::ErrorOr<uint64_t> ErrorOrValue = BC.getSymbolValue(Symbol);
if (!ErrorOrValue)
return HashString;
// Ignore jump table references.
if (BC.getJumpTableContainingAddress(*ErrorOrValue))
return HashString;
return HashString.append(hashInteger(*ErrorOrValue));
}
std::string hashExpr(BinaryContext &BC, const MCExpr &Expr) {
switch (Expr.getKind()) {
case MCExpr::Constant:
return hashInteger(cast<MCConstantExpr>(Expr).getValue());
case MCExpr::SymbolRef:
return hashSymbol(BC, cast<MCSymbolRefExpr>(Expr).getSymbol());
case MCExpr::Unary: {
const auto &UnaryExpr = cast<MCUnaryExpr>(Expr);
return hashInteger(UnaryExpr.getOpcode())
.append(hashExpr(BC, *UnaryExpr.getSubExpr()));
}
case MCExpr::Binary: {
const auto &BinaryExpr = cast<MCBinaryExpr>(Expr);
return hashExpr(BC, *BinaryExpr.getLHS())
.append(hashInteger(BinaryExpr.getOpcode()))
.append(hashExpr(BC, *BinaryExpr.getRHS()));
}
case MCExpr::Target:
return std::string();
}
llvm_unreachable("invalid expression kind");
}
std::string hashInstOperand(BinaryContext &BC, const MCOperand &Operand) {
if (Operand.isImm())
return hashInteger(Operand.getImm());
if (Operand.isReg())
return hashInteger(Operand.getReg());
if (Operand.isExpr())
return hashExpr(BC, *Operand.getExpr());
return std::string();
}
std::string hashBlock(BinaryContext &BC, const BinaryBasicBlock &BB,
OperandHashFuncTy OperandHashFunc) {
const bool IsX86 = BC.isX86();
// The hash is computed by creating a string of all instruction opcodes and
// possibly their operands and then hashing that string with std::hash.
std::string HashString;
for (const MCInst &Inst : BB) {
if (BC.MIB->isPseudo(Inst))
continue;
unsigned Opcode = Inst.getOpcode();
// Ignore unconditional jumps since we check CFG consistency by processing
// basic blocks in order and do not rely on branches to be in-sync with
// CFG. Note that we still use condition code of conditional jumps.
if (BC.MIB->isUnconditionalBranch(Inst))
continue;
if (IsX86 && BC.MIB->isConditionalBranch(Inst))
Opcode = BC.MIB->getShortBranchOpcode(Opcode);
if (Opcode == 0)
HashString.push_back(0);
while (Opcode) {
uint8_t LSB = Opcode & 0xff;
HashString.push_back(LSB);
Opcode = Opcode >> 8;
}
for (const MCOperand &Op : MCPlus::primeOperands(Inst))
HashString.append(OperandHashFunc(Op));
}
return HashString;
}
} // namespace bolt
} // namespace llvm