Files
llvm/bolt/src/BinaryContext.cpp
laith sakka 744a2417dd Run findSubprograms in preprocessDebugInfo in parallel
Summary:
While reading debug info the function findSubprograms
runs on each compilation unit. This diff parallelize that loop
reducing its runtime duration by 70%.

(cherry picked from FBD16362867)
2019-07-17 20:54:53 -07:00

1775 lines
62 KiB
C++

//===--- BinaryContext.cpp - Interface for machine-level context ---------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
//===----------------------------------------------------------------------===//
#include "BinaryContext.h"
#include "BinaryFunction.h"
#include "DataReader.h"
#include "ParallelUtilities.h"
#include "llvm/ADT/Twine.h"
#include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
#include "llvm/DebugInfo/DWARF/DWARFUnit.h"
#include "llvm/MC/MCAsmLayout.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCELFStreamer.h"
#include "llvm/MC/MCObjectStreamer.h"
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/CommandLine.h"
#include <iterator>
using namespace llvm;
using namespace bolt;
#undef DEBUG_TYPE
#define DEBUG_TYPE "bolt"
namespace opts {
extern cl::OptionCategory BoltCategory;
extern cl::opt<bool> AggregateOnly;
extern cl::opt<bool> StrictMode;
extern cl::opt<unsigned> Verbosity;
cl::opt<bool>
NoHugePages("no-huge-pages",
cl::desc("use regular size pages for code alignment"),
cl::ZeroOrMore,
cl::Hidden,
cl::cat(BoltCategory));
static cl::opt<bool>
PrintDebugInfo("print-debug-info",
cl::desc("print debug info when printing functions"),
cl::Hidden,
cl::ZeroOrMore,
cl::cat(BoltCategory));
cl::opt<bool>
PrintRelocations("print-relocations",
cl::desc("print relocations when printing functions/objects"),
cl::Hidden,
cl::ZeroOrMore,
cl::cat(BoltCategory));
static cl::opt<bool>
PrintMemData("print-mem-data",
cl::desc("print memory data annotations when printing functions"),
cl::Hidden,
cl::ZeroOrMore,
cl::cat(BoltCategory));
} // namespace opts
BinaryContext::BinaryContext(std::unique_ptr<MCContext> Ctx,
std::unique_ptr<DWARFContext> DwCtx,
std::unique_ptr<Triple> TheTriple,
const Target *TheTarget,
std::string TripleName,
std::unique_ptr<MCCodeEmitter> MCE,
std::unique_ptr<MCObjectFileInfo> MOFI,
std::unique_ptr<const MCAsmInfo> AsmInfo,
std::unique_ptr<const MCInstrInfo> MII,
std::unique_ptr<const MCSubtargetInfo> STI,
std::unique_ptr<MCInstPrinter> InstPrinter,
std::unique_ptr<const MCInstrAnalysis> MIA,
std::unique_ptr<MCPlusBuilder> MIB,
std::unique_ptr<const MCRegisterInfo> MRI,
std::unique_ptr<MCDisassembler> DisAsm,
DataReader &DR)
: Ctx(std::move(Ctx)),
DwCtx(std::move(DwCtx)),
TheTriple(std::move(TheTriple)),
TheTarget(TheTarget),
TripleName(TripleName),
MCE(std::move(MCE)),
MOFI(std::move(MOFI)),
AsmInfo(std::move(AsmInfo)),
MII(std::move(MII)),
STI(std::move(STI)),
InstPrinter(std::move(InstPrinter)),
MIA(std::move(MIA)),
MIB(std::move(MIB)),
MRI(std::move(MRI)),
DisAsm(std::move(DisAsm)),
DR(DR) {
Relocation::Arch = this->TheTriple->getArch();
PageAlign = opts::NoHugePages ? RegularPageSize : HugePageSize;
}
BinaryContext::~BinaryContext() {
for (auto *Section : Sections) {
delete Section;
}
for (auto *InjectedFunction : InjectedBinaryFunctions) {
delete InjectedFunction;
}
for (auto JTI : JumpTables) {
delete JTI.second;
}
clearBinaryData();
}
std::unique_ptr<MCObjectWriter>
BinaryContext::createObjectWriter(raw_pwrite_stream &OS) {
if (!MAB) {
MAB = std::unique_ptr<MCAsmBackend>(
TheTarget->createMCAsmBackend(*STI, *MRI, MCTargetOptions()));
}
return MAB->createObjectWriter(OS);
}
bool BinaryContext::validateObjectNesting() const {
auto Itr = BinaryDataMap.begin();
auto End = BinaryDataMap.end();
bool Valid = true;
while (Itr != End) {
auto Next = std::next(Itr);
while (Next != End &&
Itr->second->getSection() == Next->second->getSection() &&
Itr->second->containsRange(Next->second->getAddress(),
Next->second->getSize())) {
if (Next->second->Parent != Itr->second) {
errs() << "BOLT-WARNING: object nesting incorrect for:\n"
<< "BOLT-WARNING: " << *Itr->second << "\n"
<< "BOLT-WARNING: " << *Next->second << "\n";
Valid = false;
}
++Next;
}
Itr = Next;
}
return Valid;
}
bool BinaryContext::validateHoles() const {
bool Valid = true;
for (auto &Section : sections()) {
for (const auto &Rel : Section.relocations()) {
auto RelAddr = Rel.Offset + Section.getAddress();
auto *BD = getBinaryDataContainingAddress(RelAddr);
if (!BD) {
errs() << "BOLT-WARNING: no BinaryData found for relocation at address"
<< " 0x" << Twine::utohexstr(RelAddr) << " in "
<< Section.getName() << "\n";
Valid = false;
} else if (!BD->getAtomicRoot()) {
errs() << "BOLT-WARNING: no atomic BinaryData found for relocation at "
<< "address 0x" << Twine::utohexstr(RelAddr) << " in "
<< Section.getName() << "\n";
Valid = false;
}
}
}
return Valid;
}
void BinaryContext::updateObjectNesting(BinaryDataMapType::iterator GAI) {
const auto Address = GAI->second->getAddress();
const auto Size = GAI->second->getSize();
auto fixParents =
[&](BinaryDataMapType::iterator Itr, BinaryData *NewParent) {
auto *OldParent = Itr->second->Parent;
Itr->second->Parent = NewParent;
++Itr;
while (Itr != BinaryDataMap.end() && OldParent &&
Itr->second->Parent == OldParent) {
Itr->second->Parent = NewParent;
++Itr;
}
};
// Check if the previous symbol contains the newly added symbol.
if (GAI != BinaryDataMap.begin()) {
auto *Prev = std::prev(GAI)->second;
while (Prev) {
if (Prev->getSection() == GAI->second->getSection() &&
Prev->containsRange(Address, Size)) {
fixParents(GAI, Prev);
} else {
fixParents(GAI, nullptr);
}
Prev = Prev->Parent;
}
}
// Check if the newly added symbol contains any subsequent symbols.
if (Size != 0) {
auto *BD = GAI->second->Parent ? GAI->second->Parent : GAI->second;
auto Itr = std::next(GAI);
while (Itr != BinaryDataMap.end() &&
BD->containsRange(Itr->second->getAddress(),
Itr->second->getSize())) {
Itr->second->Parent = BD;
++Itr;
}
}
}
iterator_range<BinaryContext::binary_data_iterator>
BinaryContext::getSubBinaryData(BinaryData *BD) {
auto Start = std::next(BinaryDataMap.find(BD->getAddress()));
auto End = Start;
while (End != BinaryDataMap.end() &&
BD->isAncestorOf(End->second)) {
++End;
}
return make_range(Start, End);
}
std::pair<const MCSymbol *, uint64_t>
BinaryContext::handleAddressRef(uint64_t Address, BinaryFunction &BF,
bool IsPCRel) {
uint64_t Addend{0};
if (isAArch64()) {
// Check if this is an access to a constant island and create bookkeeping
// to keep track of it and emit it later as part of this function.
if (MCSymbol *IslandSym = BF.getOrCreateIslandAccess(Address))
return std::make_pair(IslandSym, Addend);
// Detect custom code written in assembly that refers to arbitrary
// constant islands from other functions. Write this reference so we
// can pull this constant island and emit it as part of this function
// too.
auto IslandIter = AddressToConstantIslandMap.lower_bound(Address);
if (IslandIter != AddressToConstantIslandMap.end()) {
if (auto *IslandSym =
IslandIter->second->getOrCreateProxyIslandAccess(Address, BF)) {
/// Make this function depend on IslandIter->second because we have
/// a reference to its constant island. When emitting this function,
/// we will also emit IslandIter->second's constants. This only
/// happens in custom AArch64 assembly code.
BF.IslandDependency.insert(IslandIter->second);
BF.ProxyIslandSymbols[IslandSym] = IslandIter->second;
return std::make_pair(IslandSym, Addend);
}
}
}
// Note that the address does not necessarily have to reside inside
// a section, it could be an absolute address too.
auto Section = getSectionForAddress(Address);
if (Section && Section->isText()) {
if (BF.containsAddress(Address, /*UseMaxSize=*/ isAArch64())) {
if (Address != BF.getAddress()) {
// The address could potentially escape. Mark it as another entry
// point into the function.
if (opts::Verbosity >= 1) {
outs() << "BOLT-INFO: potentially escaped address 0x"
<< Twine::utohexstr(Address) << " in function "
<< BF << '\n';
}
BF.HasInternalLabelReference = true;
return std::make_pair(
BF.addEntryPointAtOffset(Address - BF.getAddress()),
Addend);
}
} else {
InterproceduralReferences.insert(std::make_pair(&BF, Address));
}
}
const auto MemType = analyzeMemoryAt(Address, BF);
// FIXME: this is too permissive in creating jump tables. This is a random
// memory access we did not necessarily match against an indirect jump. Only
// do this for strict mode, for now. We should revisit this and come up with a
// better heuristic.
if (opts::StrictMode &&
MemType == MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE && IsPCRel) {
const MCSymbol *Symbol =
getOrCreateJumpTable(BF, Address, JumpTable::JTT_PIC);
return std::make_pair(Symbol, Addend);
}
if (auto *BD = getBinaryDataContainingAddress(Address)) {
return std::make_pair(BD->getSymbol(), Address - BD->getAddress());
}
// TODO: use DWARF info to get size/alignment here?
auto *TargetSymbol = getOrCreateGlobalSymbol(Address, "DATAat");
DEBUG(dbgs() << "Created symbol " << TargetSymbol->getName());
return std::make_pair(TargetSymbol, Addend);
}
MemoryContentsType
BinaryContext::analyzeMemoryAt(uint64_t Address, BinaryFunction &BF) {
if (!isX86())
return MemoryContentsType::UNKNOWN;
auto Section = getSectionForAddress(Address);
if (!Section) {
// No section - possibly an absolute address. Since we don't allow
// internal function addresses to escape the function scope - we
// consider it a tail call.
if (opts::Verbosity > 1) {
errs() << "BOLT-WARNING: no section for address 0x"
<< Twine::utohexstr(Address) << " referenced from function "
<< BF << '\n';
}
return MemoryContentsType::UNKNOWN;
}
if (Section->isVirtual()) {
// The contents are filled at runtime.
return MemoryContentsType::UNKNOWN;
}
// No support for jump tables in code yet.
if (Section->isText())
return MemoryContentsType::UNKNOWN;
auto couldBeJumpTable = [&](const uint64_t JTAddress,
JumpTable::JumpTableType Type) {
const auto EntrySize =
Type == JumpTable::JTT_PIC ? 4 : AsmInfo->getCodePointerSize();
auto ValueAddress = JTAddress;
auto UpperBound = Section->getEndAddress();
const auto *JumpTableBD = getBinaryDataAtAddress(JTAddress);
if (JumpTableBD && JumpTableBD->getSize()) {
UpperBound = JumpTableBD->getEndAddress();
assert(UpperBound <= Section->getEndAddress() &&
"data object cannot cross a section boundary");
}
while (ValueAddress <= UpperBound - EntrySize) {
DEBUG(dbgs() << "BOLT-DEBUG: analyzing memory at 0x"
<< Twine::utohexstr(ValueAddress));
uint64_t Value;
if (Type == JumpTable::JTT_PIC) {
Value = JTAddress + *getSignedValueAtAddress(ValueAddress, EntrySize);
} else {
Value = *getPointerAtAddress(ValueAddress);
}
DEBUG(dbgs() << ", which contains value 0x"
<< Twine::utohexstr(Value) << '\n');
ValueAddress += EntrySize;
// We assume that a jump table cannot have function start as an entry.
if (BF.containsAddress(Value) && Value != BF.getAddress())
return true;
// Potentially a jump table can contain __builtin_unreachable() entry
// pointing just right after the function. In this case we have to check
// another entry. Otherwise the entry is outside of this function scope
// and it's not a jump table.
if (Value == BF.getAddress() + BF.getSize())
continue;
return false;
}
return false;
};
// Start with checking for PIC jump table. We expect non-PIC jump tables
// to have high 32 bits set to 0.
if (couldBeJumpTable(Address, JumpTable::JTT_PIC))
return MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE;
if (couldBeJumpTable(Address, JumpTable::JTT_NORMAL))
return MemoryContentsType::POSSIBLE_JUMP_TABLE;
return MemoryContentsType::UNKNOWN;
}
void BinaryContext::populateJumpTables() {
for (auto JTI = JumpTables.begin(), JTE = JumpTables.end(); JTI != JTE;
++JTI) {
auto *JT = JTI->second;
auto &BF = *JT->Parent;
DEBUG(dbgs() << "BOLT-DEBUG: populating jump table "
<< JT->getName() << '\n');
// The upper bound is defined by containing object, section limits, and
// the next jump table in memory.
auto UpperBound = JT->getSection().getEndAddress();
const auto *JumpTableBD = getBinaryDataAtAddress(JT->getAddress());
if (JumpTableBD && JumpTableBD->getSize()) {
assert(JumpTableBD->getEndAddress() <= UpperBound &&
"data object cannot cross a section boundary");
UpperBound = JumpTableBD->getEndAddress();
}
auto NextJTI = std::next(JTI);
if (NextJTI != JTE) {
assert (UpperBound != JT->getAddress());
UpperBound = std::min(NextJTI->second->getAddress(), UpperBound);
}
for (auto EntryAddress = JT->getAddress();
EntryAddress <= UpperBound - JT->EntrySize;
EntryAddress += JT->EntrySize) {
uint64_t Value;
if (JT->Type == JumpTable::JTT_PIC) {
Value = JT->getAddress() +
*getSignedValueAtAddress(EntryAddress, JT->EntrySize);
} else {
Value = *getPointerAtAddress(EntryAddress);
}
// __builtin_unreachable() case.
if (Value == BF.getAddress() + BF.getSize()) {
JT->OffsetEntries.emplace_back(Value - BF.getAddress());
BF.IgnoredBranches.emplace_back(Value - BF.getAddress(), BF.getSize());
continue;
}
// We assume that a jump table cannot have function start as an entry.
if (!BF.containsAddress(Value) || Value == BF.getAddress())
break;
// Check there's an instruction at this offset.
if (!BF.getInstructionAtOffset(Value - BF.getAddress()))
break;
BF.registerReferencedOffset(Value - BF.getAddress());
JT->OffsetEntries.emplace_back(Value - BF.getAddress());
}
if (JT->OffsetEntries.size() <= 1) {
dbgs() << "JT with size " << JT->OffsetEntries.size() << " detected in "
<< BF << '\n';
JT->print(dbgs());
if (NextJTI != JTE) {
dbgs() << "next jump table at 0x"
<< Twine::utohexstr(NextJTI->second->getAddress())
<< " belongs to function " << *NextJTI->second->Parent << '\n';
NextJTI->second->print(dbgs());
}
}
assert(JT->OffsetEntries.size() > 1 &&
"expected more than one jump table entry");
// Check there are relocations against JT entries.
if (opts::StrictMode) {
for (auto Address = JT->getAddress();
Address < JT->getAddress() + JT->getSize();
Address += JT->EntrySize) {
if (JT->Type == JumpTable::JTT_PIC) {
assert(PCRelocation.count(Address) && "no matching relocation");
PCRelocation.erase(PCRelocation.find(Address));
} else {
assert(getRelocationAt(Address) && "missing relocation");
}
}
}
}
assert((!opts::StrictMode || !PCRelocation.size()) &&
"unclaimed PC-relative relocations left in data\n");
clearList(PCRelocation);
}
MCSymbol *BinaryContext::getOrCreateGlobalSymbol(uint64_t Address,
Twine Prefix,
uint64_t Size,
uint16_t Alignment,
unsigned Flags) {
auto Itr = BinaryDataMap.find(Address);
if (Itr != BinaryDataMap.end()) {
assert(Itr->second->getSize() == Size || !Size);
return Itr->second->getSymbol();
}
std::string Name = (Prefix + "0x" + Twine::utohexstr(Address)).str();
assert(!GlobalSymbols.count(Name) && "created name is not unique");
return registerNameAtAddress(Name, Address, Size, Alignment, Flags);
}
BinaryFunction *BinaryContext::createBinaryFunction(
const std::string &Name, BinarySection &Section, uint64_t Address,
uint64_t Size, bool IsSimple, uint64_t SymbolSize, uint16_t Alignment) {
auto Result = BinaryFunctions.emplace(
Address, BinaryFunction(Name, Section, Address, Size, *this, IsSimple));
assert(Result.second == true && "unexpected duplicate function");
auto *BF = &Result.first->second;
registerNameAtAddress(Name, Address, SymbolSize ? SymbolSize : Size,
Alignment);
setSymbolToFunctionMap(BF->getSymbol(), BF);
return BF;
}
const MCSymbol *
BinaryContext::getOrCreateJumpTable(BinaryFunction &Function, uint64_t Address,
JumpTable::JumpTableType Type) {
if (auto *JT = getJumpTableContainingAddress(Address)) {
assert(JT->Type == Type && "jump table types have to match");
assert(JT->Parent == &Function &&
"cannot re-use jump table of a different function");
assert(Address == JT->getAddress() && "unexpected non-empty jump table");
return JT->getFirstLabel();
}
const auto EntrySize =
Type == JumpTable::JTT_PIC ? 4 : AsmInfo->getCodePointerSize();
// Re-use the existing symbol if possible.
MCSymbol *JTLabel{nullptr};
if (auto *Object = getBinaryDataAtAddress(Address)) {
if (!isInternalSymbolName(Object->getSymbol()->getName()))
JTLabel = Object->getSymbol();
}
if (!JTLabel) {
const auto JumpTableName = generateJumpTableName(Function, Address);
JTLabel = Ctx->getOrCreateSymbol(JumpTableName);
registerNameAtAddress(JTLabel->getName(), Address, 0, EntrySize);
}
DEBUG(dbgs() << "BOLT-DEBUG: creating jump table "
<< JTLabel->getName()
<< " in function " << Function << 'n');
auto *JT = new JumpTable(JTLabel->getName(),
Address,
EntrySize,
Type,
{},
JumpTable::LabelMapType{{0, JTLabel}},
Function,
*getSectionForAddress(Address));
JumpTables.emplace(Address, JT);
// Duplicate the entry for the parent function for easy access.
Function.JumpTables.emplace(Address, JT);
return JTLabel;
}
std::pair<uint64_t, const MCSymbol *>
BinaryContext::duplicateJumpTable(BinaryFunction &Function, JumpTable *JT,
const MCSymbol *OldLabel) {
unsigned Offset = 0;
bool Found = false;
for (auto Elmt : JT->Labels) {
if (Elmt.second != OldLabel)
continue;
Offset = Elmt.first;
Found = true;
break;
}
assert(Found && "Label not found");
auto *NewLabel = Ctx->createTempSymbol("duplicatedJT", true);
auto *NewJT = new JumpTable(NewLabel->getName(),
JT->getAddress(),
JT->EntrySize,
JT->Type,
{},
JumpTable::LabelMapType{{Offset, NewLabel}},
Function,
*getSectionForAddress(JT->getAddress()));
NewJT->Entries = JT->Entries;
NewJT->Counts = JT->Counts;
uint64_t JumpTableID = ++DuplicatedJumpTables;
// Invert it to differentiate from regular jump tables whose IDs are their
// addresses in the input binary memory space
JumpTableID = ~JumpTableID;
JumpTables.emplace(JumpTableID, NewJT);
Function.JumpTables.emplace(JumpTableID, NewJT);
return std::make_pair(JumpTableID, NewLabel);
}
std::string BinaryContext::generateJumpTableName(const BinaryFunction &BF,
uint64_t Address) {
size_t Id;
uint64_t Offset = 0;
if (const auto *JT = BF.getJumpTableContainingAddress(Address)) {
Offset = Address - JT->getAddress();
auto Itr = JT->Labels.find(Offset);
if (Itr != JT->Labels.end()) {
return Itr->second->getName();
}
Id = JumpTableIds.at(JT->getAddress());
} else {
Id = JumpTableIds[Address] = BF.JumpTables.size();
}
return ("JUMP_TABLE/" + BF.Names[0] + "." + std::to_string(Id) +
(Offset ? ("." + std::to_string(Offset)) : ""));
}
MCSymbol *BinaryContext::registerNameAtAddress(StringRef Name,
uint64_t Address,
uint64_t Size,
uint16_t Alignment,
unsigned Flags) {
auto SectionOrErr = getSectionForAddress(Address);
auto &Section = SectionOrErr ? SectionOrErr.get() : absoluteSection();
auto GAI = BinaryDataMap.find(Address);
BinaryData *BD;
if (GAI == BinaryDataMap.end()) {
BD = new BinaryData(Name,
Address,
Size,
Alignment ? Alignment : 1,
Section,
Flags);
} else {
BD = GAI->second;
}
return registerNameAtAddress(Name, Address, BD);
}
MCSymbol *BinaryContext::registerNameAtAddress(StringRef Name,
uint64_t Address,
BinaryData *BD) {
auto GAI = BinaryDataMap.find(Address);
if (GAI != BinaryDataMap.end()) {
if (BD != GAI->second) {
// Note: this could be a source of bugs if client code holds
// on to BinaryData*'s in data structures for any length of time.
auto *OldBD = GAI->second;
BD->merge(GAI->second);
delete OldBD;
GAI->second = BD;
for (auto &Name : BD->names()) {
GlobalSymbols[Name] = BD;
}
updateObjectNesting(GAI);
BD = nullptr;
} else if (!GAI->second->hasName(Name)) {
GAI->second->Names.push_back(Name);
GlobalSymbols[Name] = GAI->second;
} else {
BD = nullptr;
}
} else {
GAI = BinaryDataMap.emplace(Address, BD).first;
GlobalSymbols[Name] = BD;
updateObjectNesting(GAI);
}
// Register the name with MCContext.
auto *Symbol = Ctx->getOrCreateSymbol(Name);
if (BD) {
BD->Symbols.push_back(Symbol);
assert(BD->Symbols.size() == BD->Names.size() &&
"there should be a 1:1 mapping between names and symbols");
}
return Symbol;
}
const BinaryData *
BinaryContext::getBinaryDataContainingAddressImpl(uint64_t Address,
bool IncludeEnd,
bool BestFit) const {
auto NI = BinaryDataMap.lower_bound(Address);
auto End = BinaryDataMap.end();
if ((NI != End && Address == NI->first && !IncludeEnd) ||
(NI-- != BinaryDataMap.begin())) {
if (NI->second->containsAddress(Address) ||
(IncludeEnd && NI->second->getEndAddress() == Address)) {
while (BestFit &&
std::next(NI) != End &&
(std::next(NI)->second->containsAddress(Address) ||
(IncludeEnd && std::next(NI)->second->getEndAddress() == Address))) {
++NI;
}
return NI->second;
}
// If this is a sub-symbol, see if a parent data contains the address.
auto *BD = NI->second->getParent();
while (BD) {
if (BD->containsAddress(Address) ||
(IncludeEnd && NI->second->getEndAddress() == Address))
return BD;
BD = BD->getParent();
}
}
return nullptr;
}
bool BinaryContext::setBinaryDataSize(uint64_t Address, uint64_t Size) {
auto NI = BinaryDataMap.find(Address);
assert(NI != BinaryDataMap.end());
if (NI == BinaryDataMap.end())
return false;
// TODO: it's possible that a jump table starts at the same address
// as a larger blob of private data. When we set the size of the
// jump table, it might be smaller than the total blob size. In this
// case we just leave the original size since (currently) it won't really
// affect anything. See T26915981.
assert((!NI->second->Size || NI->second->Size == Size ||
(NI->second->isJumpTable() && NI->second->Size > Size)) &&
"can't change the size of a symbol that has already had its "
"size set");
if (!NI->second->Size) {
NI->second->Size = Size;
updateObjectNesting(NI);
return true;
}
return false;
}
void BinaryContext::generateSymbolHashes() {
auto isPadding = [](const BinaryData &BD) {
auto Contents = BD.getSection().getContents();
auto SymData = Contents.substr(BD.getOffset(), BD.getSize());
return (BD.getName().startswith("HOLEat") ||
SymData.find_first_not_of(0) == StringRef::npos);
};
uint64_t NumCollisions = 0;
for (auto &Entry : BinaryDataMap) {
auto &BD = *Entry.second;
auto Name = BD.getName();
if (!isInternalSymbolName(Name))
continue;
// First check if a non-anonymous alias exists and move it to the front.
if (BD.getNames().size() > 1) {
auto Itr = std::find_if(BD.Names.begin(),
BD.Names.end(),
[&](const StringRef Name) {
return !isInternalSymbolName(Name);
});
if (Itr != BD.Names.end()) {
assert(BD.Names.size() == BD.Symbols.size() &&
"there should be a 1:1 mapping between names and symbols");
auto Idx = std::distance(BD.Names.begin(), Itr);
std::swap(BD.Names[0], *Itr);
std::swap(BD.Symbols[0], BD.Symbols[Idx]);
continue;
}
}
// We have to skip 0 size symbols since they will all collide.
if (BD.getSize() == 0) {
continue;
}
const auto Hash = BD.getSection().hash(BD);
const auto Idx = Name.find("0x");
std::string NewName = (Twine(Name.substr(0, Idx)) +
"_" + Twine::utohexstr(Hash)).str();
if (getBinaryDataByName(NewName)) {
// Ignore collisions for symbols that appear to be padding
// (i.e. all zeros or a "hole")
if (!isPadding(BD)) {
if (opts::Verbosity) {
errs() << "BOLT-WARNING: collision detected when hashing " << BD
<< " with new name (" << NewName << "), skipping.\n";
}
++NumCollisions;
}
continue;
}
BD.Names.insert(BD.Names.begin(), NewName);
BD.Symbols.insert(BD.Symbols.begin(),
Ctx->getOrCreateSymbol(NewName));
assert(BD.Names.size() == BD.Symbols.size() &&
"there should be a 1:1 mapping between names and symbols");
GlobalSymbols[NewName] = &BD;
}
if (NumCollisions) {
errs() << "BOLT-WARNING: " << NumCollisions
<< " collisions detected while hashing binary objects";
if (!opts::Verbosity)
errs() << ". Use -v=1 to see the list.";
errs() << '\n';
}
}
void BinaryContext::processInterproceduralReferences() {
for (auto &Pair : InterproceduralReferences) {
auto *FromBF = Pair.first;
auto Addr = Pair.second;
auto *ContainingFunction = getBinaryFunctionContainingAddress(Addr);
if (FromBF == ContainingFunction)
continue;
if (ContainingFunction) {
// Only a parent function (or a sibling) can reach its fragment.
if (ContainingFunction->IsFragment) {
assert(!FromBF->IsFragment &&
"only one cold fragment is supported at this time");
ContainingFunction->setParentFunction(FromBF);
FromBF->addFragment(ContainingFunction);
if (!HasRelocations) {
ContainingFunction->setSimple(false);
FromBF->setSimple(false);
}
if (opts::Verbosity >= 1) {
outs() << "BOLT-INFO: marking " << *ContainingFunction
<< " as a fragment of " << *FromBF << '\n';
}
continue;
}
if (ContainingFunction->getAddress() != Addr) {
ContainingFunction->addEntryPoint(Addr);
if (!HasRelocations) {
if (opts::Verbosity >= 1) {
errs() << "BOLT-WARNING: Function " << *ContainingFunction
<< " has internal BBs that are target of a reference "
<< "located in another function. Skipping the function.\n";
}
ContainingFunction->setSimple(false);
}
}
} else if (Addr) {
// Check if address falls in function padding space - this could be
// unmarked data in code. In this case adjust the padding space size.
auto Section = getSectionForAddress(Addr);
assert(Section && "cannot get section for referenced address");
if (!Section->isText())
continue;
// PLT requires special handling and could be ignored in this context.
StringRef SectionName = Section->getName();
if (SectionName == ".plt" || SectionName == ".plt.got")
continue;
if (HasRelocations) {
errs() << "BOLT-ERROR: cannot process binaries with unmarked "
<< "object in code at address 0x"
<< Twine::utohexstr(Addr) << " belonging to section "
<< SectionName << " in relocation mode.\n";
exit(1);
}
ContainingFunction =
getBinaryFunctionContainingAddress(Addr,
/*CheckPastEnd=*/false,
/*UseMaxSize=*/true);
// We are not going to overwrite non-simple functions, but for simple
// ones - adjust the padding size.
if (ContainingFunction && ContainingFunction->isSimple()) {
errs() << "BOLT-WARNING: function " << *ContainingFunction
<< " has an object detected in a padding region at address 0x"
<< Twine::utohexstr(Addr) << '\n';
ContainingFunction->setMaxSize(Addr -
ContainingFunction->getAddress());
}
}
}
InterproceduralReferences.clear();
}
void BinaryContext::postProcessSymbolTable() {
fixBinaryDataHoles();
bool Valid = true;
for (auto &Entry : BinaryDataMap) {
auto *BD = Entry.second;
if ((BD->getName().startswith("SYMBOLat") ||
BD->getName().startswith("DATAat")) &&
!BD->getParent() &&
!BD->getSize() &&
!BD->isAbsolute() &&
BD->getSection()) {
errs() << "BOLT-WARNING: zero-sized top level symbol: " << *BD << "\n";
Valid = false;
}
}
assert(Valid);
assignMemData();
generateSymbolHashes();
}
void BinaryContext::foldFunction(BinaryFunction &ChildBF,
BinaryFunction &ParentBF) {
std::shared_lock<std::shared_timed_mutex> ReadCtxLock(CtxMutex,
std::defer_lock);
std::unique_lock<std::shared_timed_mutex> WriteCtxLock(CtxMutex,
std::defer_lock);
std::unique_lock<std::shared_timed_mutex> WriteSymbolMapLock(
SymbolToFunctionMapMutex, std::defer_lock);
// Copy name list.
ParentBF.addNewNames(ChildBF.getNames());
// Update internal bookkeeping info.
for (auto &Name : ChildBF.getNames()) {
ReadCtxLock.lock();
// Calls to functions are handled via symbols, and we keep the lookup table
// that we need to update.
auto *Symbol = Ctx->lookupSymbol(Name);
ReadCtxLock.unlock();
assert(Symbol && "symbol cannot be NULL at this point");
WriteSymbolMapLock.lock();
SymbolToFunctionMap[Symbol] = &ParentBF;
WriteSymbolMapLock.unlock();
// NB: there's no need to update BinaryDataMap and GlobalSymbols.
}
// Merge execution counts of ChildBF into those of ParentBF.
ChildBF.mergeProfileDataInto(ParentBF);
if (HasRelocations) {
std::shared_lock<std::shared_timed_mutex> ReadBfsLock(BinaryFunctionsMutex,
std::defer_lock);
std::unique_lock<std::shared_timed_mutex> WriteBfsLock(BinaryFunctionsMutex,
std::defer_lock);
// Remove ChildBF from the global set of functions in relocs mode.
ReadBfsLock.lock();
auto FI = BinaryFunctions.find(ChildBF.getAddress());
ReadBfsLock.unlock();
assert(FI != BinaryFunctions.end() && "function not found");
assert(&ChildBF == &FI->second && "function mismatch");
WriteBfsLock.lock();
FI = BinaryFunctions.erase(FI);
WriteBfsLock.unlock();
} else {
// In non-relocation mode we keep the function, but rename it.
std::string NewName = "__ICF_" + ChildBF.getSymbol()->getName().str();
ChildBF.Names.clear();
ChildBF.Names.push_back(NewName);
WriteCtxLock.lock();
ChildBF.OutputSymbol = Ctx->getOrCreateSymbol(NewName);
WriteCtxLock.unlock();
ChildBF.setFolded();
}
}
void BinaryContext::fixBinaryDataHoles() {
assert(validateObjectNesting() && "object nesting inconsitency detected");
for (auto &Section : allocatableSections()) {
std::vector<std::pair<uint64_t, uint64_t>> Holes;
auto isNotHole = [&Section](const binary_data_iterator &Itr) {
auto *BD = Itr->second;
bool isHole = (!BD->getParent() &&
!BD->getSize() &&
BD->isObject() &&
(BD->getName().startswith("SYMBOLat0x") ||
BD->getName().startswith("DATAat0x") ||
BD->getName().startswith("ANONYMOUS")));
return !isHole && BD->getSection() == Section && !BD->getParent();
};
auto BDStart = BinaryDataMap.begin();
auto BDEnd = BinaryDataMap.end();
auto Itr = FilteredBinaryDataIterator(isNotHole, BDStart, BDEnd);
auto End = FilteredBinaryDataIterator(isNotHole, BDEnd, BDEnd);
uint64_t EndAddress = Section.getAddress();
while (Itr != End) {
if (Itr->second->getAddress() > EndAddress) {
auto Gap = Itr->second->getAddress() - EndAddress;
Holes.push_back(std::make_pair(EndAddress, Gap));
}
EndAddress = Itr->second->getEndAddress();
++Itr;
}
if (EndAddress < Section.getEndAddress()) {
Holes.push_back(std::make_pair(EndAddress,
Section.getEndAddress() - EndAddress));
}
// If there is already a symbol at the start of the hole, grow that symbol
// to cover the rest. Otherwise, create a new symbol to cover the hole.
for (auto &Hole : Holes) {
auto *BD = getBinaryDataAtAddress(Hole.first);
if (BD) {
// BD->getSection() can be != Section if there are sections that
// overlap. In this case it is probably safe to just skip the holes
// since the overlapping section will not(?) have any symbols in it.
if (BD->getSection() == Section)
setBinaryDataSize(Hole.first, Hole.second);
} else {
getOrCreateGlobalSymbol(Hole.first, "HOLEat", Hole.second, 1);
}
}
}
assert(validateObjectNesting() && "object nesting inconsitency detected");
assert(validateHoles() && "top level hole detected in object map");
}
void BinaryContext::printGlobalSymbols(raw_ostream& OS) const {
const BinarySection* CurrentSection = nullptr;
bool FirstSection = true;
for (auto &Entry : BinaryDataMap) {
const auto *BD = Entry.second;
const auto &Section = BD->getSection();
if (FirstSection || Section != *CurrentSection) {
uint64_t Address, Size;
StringRef Name = Section.getName();
if (Section) {
Address = Section.getAddress();
Size = Section.getSize();
} else {
Address = BD->getAddress();
Size = BD->getSize();
}
OS << "BOLT-INFO: Section " << Name << ", "
<< "0x" + Twine::utohexstr(Address) << ":"
<< "0x" + Twine::utohexstr(Address + Size) << "/"
<< Size << "\n";
CurrentSection = &Section;
FirstSection = false;
}
OS << "BOLT-INFO: ";
auto *P = BD->getParent();
while (P) {
OS << " ";
P = P->getParent();
}
OS << *BD << "\n";
}
}
void BinaryContext::assignMemData() {
auto getAddress = [&](const MemInfo &MI) -> uint64_t {
if (!MI.Addr.IsSymbol)
return MI.Addr.Offset;
if (auto *BD = getBinaryDataByName(MI.Addr.Name))
return BD->getAddress() + MI.Addr.Offset;
return 0;
};
// Map of sections (or heap/stack) to count/size.
std::map<StringRef, uint64_t> Counts;
std::map<StringRef, uint64_t> JumpTableCounts;
uint64_t TotalCount = 0;
for (auto &Entry : DR.getAllFuncsMemData()) {
for (auto &MI : Entry.second.Data) {
const auto Addr = getAddress(MI);
auto *BD = getBinaryDataContainingAddress(Addr);
if (BD) {
BD->getAtomicRoot()->addMemData(MI);
Counts[BD->getSectionName()] += MI.Count;
if (BD->getAtomicRoot()->isJumpTable()) {
JumpTableCounts[BD->getSectionName()] += MI.Count;
}
} else {
Counts["Heap/stack"] += MI.Count;
}
TotalCount += MI.Count;
}
}
if (!Counts.empty()) {
outs() << "BOLT-INFO: Memory stats breakdown:\n";
for (auto &Entry : Counts) {
const auto Section = Entry.first;
const auto Count = Entry.second;
outs() << "BOLT-INFO: " << Section << " = " << Count
<< format(" (%.1f%%)\n", 100.0*Count/TotalCount);
if (JumpTableCounts.count(Section) != 0) {
const auto JTCount = JumpTableCounts[Section];
outs() << "BOLT-INFO: jump tables = " << JTCount
<< format(" (%.1f%%)\n", 100.0*JTCount/Count);
}
}
outs() << "BOLT-INFO: Total memory events: " << TotalCount << "\n";
}
}
namespace {
/// Recursively finds DWARF DW_TAG_subprogram DIEs and match them with
/// BinaryFunctions. Record DIEs for unknown subprograms (mostly functions that
/// are never called and removed from the binary) in Unknown.
void findSubprograms(const DWARFDie DIE,
std::map<uint64_t, BinaryFunction> &BinaryFunctions) {
if (DIE.isSubprogramDIE()) {
uint64_t LowPC, HighPC, SectionIndex;
if (DIE.getLowAndHighPC(LowPC, HighPC, SectionIndex)) {
auto It = BinaryFunctions.find(LowPC);
if (It != BinaryFunctions.end()) {
It->second.addSubprogramDIE(DIE);
} else {
// The function must have been optimized away by GC.
}
} else {
const auto RangesVector = DIE.getAddressRanges();
for (const auto Range : DIE.getAddressRanges()) {
auto It = BinaryFunctions.find(Range.LowPC);
if (It != BinaryFunctions.end()) {
It->second.addSubprogramDIE(DIE);
}
}
}
}
for (auto ChildDIE = DIE.getFirstChild(); ChildDIE && !ChildDIE.isNULL();
ChildDIE = ChildDIE.getSibling()) {
findSubprograms(ChildDIE, BinaryFunctions);
}
}
} // namespace
unsigned BinaryContext::addDebugFilenameToUnit(const uint32_t DestCUID,
const uint32_t SrcCUID,
unsigned FileIndex) {
auto SrcUnit = DwCtx->getCompileUnitForOffset(SrcCUID);
auto LineTable = DwCtx->getLineTableForUnit(SrcUnit);
const auto &FileNames = LineTable->Prologue.FileNames;
// Dir indexes start at 1, as DWARF file numbers, and a dir index 0
// means empty dir.
assert(FileIndex > 0 && FileIndex <= FileNames.size() &&
"FileIndex out of range for the compilation unit.");
StringRef Dir = "";
if (FileNames[FileIndex - 1].DirIdx != 0) {
if (auto DirName =
LineTable->Prologue
.IncludeDirectories[FileNames[FileIndex - 1].DirIdx - 1]
.getAsCString()) {
Dir = *DirName;
}
}
StringRef FileName = "";
if (auto FName = FileNames[FileIndex - 1].Name.getAsCString())
FileName = *FName;
assert(FileName != "");
return cantFail(Ctx->getDwarfFile(Dir, FileName, 0, nullptr, None, DestCUID));
}
std::vector<BinaryFunction *> BinaryContext::getSortedFunctions() {
std::vector<BinaryFunction *> SortedFunctions(BinaryFunctions.size());
std::transform(BinaryFunctions.begin(), BinaryFunctions.end(),
SortedFunctions.begin(),
[](std::pair<const uint64_t, BinaryFunction> &BFI) {
return &BFI.second;
});
std::stable_sort(SortedFunctions.begin(), SortedFunctions.end(),
[] (const BinaryFunction *A, const BinaryFunction *B) {
if (A->hasValidIndex() && B->hasValidIndex()) {
return A->getIndex() < B->getIndex();
}
return A->hasValidIndex();
});
return SortedFunctions;
}
void BinaryContext::preprocessDebugInfo() {
// Populate MCContext with DWARF files.
for (const auto &CU : DwCtx->compile_units()) {
const auto CUID = CU->getOffset();
auto *LineTable = DwCtx->getLineTableForUnit(CU.get());
const auto &FileNames = LineTable->Prologue.FileNames;
// Make sure empty debug line tables are registered too.
if (FileNames.empty()) {
cantFail(Ctx->getDwarfFile("", "<unknown>", 0, nullptr, None, CUID));
continue;
}
for (size_t I = 0, Size = FileNames.size(); I != Size; ++I) {
// Dir indexes start at 1, as DWARF file numbers, and a dir index 0
// means empty dir.
StringRef Dir = "";
if (FileNames[I].DirIdx != 0)
if (auto DirName =
LineTable->Prologue.IncludeDirectories[FileNames[I].DirIdx - 1]
.getAsCString())
Dir = *DirName;
StringRef FileName = "";
if (auto FName = FileNames[I].Name.getAsCString())
FileName = *FName;
assert(FileName != "");
cantFail(Ctx->getDwarfFile(Dir, FileName, 0, nullptr, None, CUID));
}
}
// For each CU, iterate over its children DIEs and match subprogram DIEs to
// BinaryFunctions.
// Run findSubprograms on a range of compilation units
auto processBlock = [&](auto BlockBegin, auto BlockEnd) {
for (auto It = BlockBegin; It != BlockEnd; ++It) {
findSubprograms((*It)->getUnitDIE(false), BinaryFunctions);
}
};
if (opts::NoThreads) {
processBlock(DwCtx->compile_units().begin(), DwCtx->compile_units().end());
} else {
auto &ThreadPool = ParallelUtilities::getThreadPool();
// Divide compilation units uniformally into tasks.
unsigned BlockCost =
DwCtx->getNumCompileUnits() / (opts::TaskCount * opts::ThreadCount);
if (BlockCost == 0)
BlockCost = 1;
auto BlockBegin = DwCtx->compile_units().begin();
unsigned CurrentCost = 0;
for (auto It = DwCtx->compile_units().begin();
It != DwCtx->compile_units().end(); It++) {
CurrentCost++;
if (CurrentCost >= BlockCost) {
ThreadPool.async(processBlock, BlockBegin, std::next(It));
BlockBegin = std::next(It);
CurrentCost = 0;
}
}
ThreadPool.async(processBlock, BlockBegin, DwCtx->compile_units().end());
ThreadPool.wait();
}
// Some functions may not have a corresponding subprogram DIE
// yet they will be included in some CU and will have line number information.
// Hence we need to associate them with the CU and include in CU ranges.
for (auto &AddrFunctionPair : BinaryFunctions) {
auto FunctionAddress = AddrFunctionPair.first;
auto &Function = AddrFunctionPair.second;
if (!Function.getSubprogramDIEs().empty())
continue;
if (auto DebugAranges = DwCtx->getDebugAranges()) {
auto CUOffset = DebugAranges->findAddress(FunctionAddress);
if (CUOffset != -1U) {
Function.addSubprogramDIE(
DWARFDie(DwCtx->getCompileUnitForOffset(CUOffset), nullptr));
continue;
}
}
#ifdef DWARF_LOOKUP_ALL_RANGES
// Last resort - iterate over all compile units. This should not happen
// very often. If it does, we need to create a separate lookup table
// similar to .debug_aranges internally. This slows down processing
// considerably.
for (const auto &CU : DwCtx->compile_units()) {
const auto *CUDie = CU->getUnitDIE();
for (const auto &Range : CUDie->getAddressRanges(CU.get())) {
if (FunctionAddress >= Range.first &&
FunctionAddress < Range.second) {
Function.addSubprogramDIE(DWARFDie(CU.get(), nullptr));
break;
}
}
}
#endif
}
}
void BinaryContext::printCFI(raw_ostream &OS, const MCCFIInstruction &Inst) {
uint32_t Operation = Inst.getOperation();
switch (Operation) {
case MCCFIInstruction::OpSameValue:
OS << "OpSameValue Reg" << Inst.getRegister();
break;
case MCCFIInstruction::OpRememberState:
OS << "OpRememberState";
break;
case MCCFIInstruction::OpRestoreState:
OS << "OpRestoreState";
break;
case MCCFIInstruction::OpOffset:
OS << "OpOffset Reg" << Inst.getRegister() << " " << Inst.getOffset();
break;
case MCCFIInstruction::OpDefCfaRegister:
OS << "OpDefCfaRegister Reg" << Inst.getRegister();
break;
case MCCFIInstruction::OpDefCfaOffset:
OS << "OpDefCfaOffset " << Inst.getOffset();
break;
case MCCFIInstruction::OpDefCfa:
OS << "OpDefCfa Reg" << Inst.getRegister() << " " << Inst.getOffset();
break;
case MCCFIInstruction::OpRelOffset:
OS << "OpRelOffset Reg" << Inst.getRegister() << " " << Inst.getOffset();
break;
case MCCFIInstruction::OpAdjustCfaOffset:
OS << "OfAdjustCfaOffset " << Inst.getOffset();
break;
case MCCFIInstruction::OpEscape:
OS << "OpEscape";
break;
case MCCFIInstruction::OpRestore:
OS << "OpRestore Reg" << Inst.getRegister();
break;
case MCCFIInstruction::OpUndefined:
OS << "OpUndefined Reg" << Inst.getRegister();
break;
case MCCFIInstruction::OpRegister:
OS << "OpRegister Reg" << Inst.getRegister() << " Reg"
<< Inst.getRegister2();
break;
case MCCFIInstruction::OpWindowSave:
OS << "OpWindowSave";
break;
case MCCFIInstruction::OpGnuArgsSize:
OS << "OpGnuArgsSize";
break;
default:
OS << "Op#" << Operation;
break;
}
}
void BinaryContext::printInstruction(raw_ostream &OS,
const MCInst &Instruction,
uint64_t Offset,
const BinaryFunction* Function,
bool PrintMCInst,
bool PrintMemData,
bool PrintRelocations) const {
if (MIB->isEHLabel(Instruction)) {
OS << " EH_LABEL: " << *MIB->getTargetSymbol(Instruction) << '\n';
return;
}
OS << format(" %08" PRIx64 ": ", Offset);
if (MIB->isCFI(Instruction)) {
uint32_t Offset = Instruction.getOperand(0).getImm();
OS << "\t!CFI\t$" << Offset << "\t; ";
if (Function)
printCFI(OS, *Function->getCFIFor(Instruction));
OS << "\n";
return;
}
InstPrinter->printInst(&Instruction, OS, "", *STI);
if (MIB->isCall(Instruction)) {
if (MIB->isTailCall(Instruction))
OS << " # TAILCALL ";
if (MIB->isInvoke(Instruction)) {
const auto EHInfo = MIB->getEHInfo(Instruction);
OS << " # handler: ";
if (EHInfo->first)
OS << *EHInfo->first;
else
OS << '0';
OS << "; action: " << EHInfo->second;
const auto GnuArgsSize = MIB->getGnuArgsSize(Instruction);
if (GnuArgsSize >= 0)
OS << "; GNU_args_size = " << GnuArgsSize;
}
} else if (MIB->isIndirectBranch(Instruction)) {
if (auto JTAddress = MIB->getJumpTable(Instruction)) {
OS << " # JUMPTABLE @0x" << Twine::utohexstr(JTAddress);
} else {
OS << " # UNKNOWN CONTROL FLOW";
}
}
MIB->printAnnotations(Instruction, OS);
const DWARFDebugLine::LineTable *LineTable =
Function && opts::PrintDebugInfo ? Function->getDWARFUnitLineTable().second
: nullptr;
if (LineTable) {
auto RowRef = DebugLineTableRowRef::fromSMLoc(Instruction.getLoc());
if (RowRef != DebugLineTableRowRef::NULL_ROW) {
const auto &Row = LineTable->Rows[RowRef.RowIndex - 1];
StringRef FileName = "";
if (auto FName =
LineTable->Prologue.FileNames[Row.File - 1].Name.getAsCString())
FileName = *FName;
OS << " # debug line " << FileName << ":" << Row.Line;
if (Row.Column) {
OS << ":" << Row.Column;
}
}
}
if ((opts::PrintMemData || PrintMemData) && Function) {
const auto *MD = Function->getMemData();
const auto MemDataOffset =
MIB->tryGetAnnotationAs<uint64_t>(Instruction, "MemDataOffset");
if (MD && MemDataOffset) {
bool DidPrint = false;
for (auto &MI : MD->getMemInfoRange(MemDataOffset.get())) {
OS << (DidPrint ? ", " : " # Loads: ");
OS << MI.Addr << "/" << MI.Count;
DidPrint = true;
}
}
}
if ((opts::PrintRelocations || PrintRelocations) && Function) {
const auto Size = computeCodeSize(&Instruction, &Instruction + 1);
Function->printRelocations(OS, Offset, Size);
}
OS << "\n";
if (PrintMCInst) {
Instruction.dump_pretty(OS, InstPrinter.get());
OS << "\n";
}
}
ErrorOr<ArrayRef<uint8_t>>
BinaryContext::getFunctionData(const BinaryFunction &Function) const {
auto &Section = Function.getSection();
assert(Section.containsRange(Function.getAddress(), Function.getSize()) &&
"wrong section for function");
if (!Section.isText() || Section.isVirtual() || !Section.getSize()) {
return std::make_error_code(std::errc::bad_address);
}
StringRef SectionContents = Section.getContents();
assert(SectionContents.size() == Section.getSize() &&
"section size mismatch");
// Function offset from the section start.
auto FunctionOffset = Function.getAddress() - Section.getAddress();
auto *Bytes = reinterpret_cast<const uint8_t *>(SectionContents.data());
return ArrayRef<uint8_t>(Bytes + FunctionOffset, Function.getSize());
}
ErrorOr<BinarySection&> BinaryContext::getSectionForAddress(uint64_t Address) {
auto SI = AddressToSection.upper_bound(Address);
if (SI != AddressToSection.begin()) {
--SI;
auto UpperBound = SI->first + SI->second->getSize();
if (!SI->second->getSize())
UpperBound += 1;
if (UpperBound > Address)
return *SI->second;
}
return std::make_error_code(std::errc::bad_address);
}
ErrorOr<StringRef>
BinaryContext::getSectionNameForAddress(uint64_t Address) const {
if (auto Section = getSectionForAddress(Address)) {
return Section->getName();
}
return std::make_error_code(std::errc::bad_address);
}
BinarySection &BinaryContext::registerSection(BinarySection *Section) {
assert(!Section->getName().empty() &&
"can't register sections without a name");
auto Res = Sections.insert(Section);
assert(Res.second && "can't register the same section twice.");
// Only register sections with addresses in the AddressToSection map.
if (Section->getAddress())
AddressToSection.insert(std::make_pair(Section->getAddress(), Section));
NameToSection.insert(std::make_pair(Section->getName(), Section));
DEBUG(dbgs() << "BOLT-DEBUG: registering " << *Section << "\n");
return *Section;
}
BinarySection &BinaryContext::registerSection(SectionRef Section) {
return registerSection(new BinarySection(*this, Section));
}
BinarySection &
BinaryContext::registerSection(StringRef SectionName,
const BinarySection &OriginalSection) {
return registerSection(new BinarySection(*this,
SectionName,
OriginalSection));
}
BinarySection &BinaryContext::registerOrUpdateSection(StringRef Name,
unsigned ELFType,
unsigned ELFFlags,
uint8_t *Data,
uint64_t Size,
unsigned Alignment,
bool IsLocal) {
auto NamedSections = getSectionByName(Name);
if (NamedSections.begin() != NamedSections.end()) {
assert(std::next(NamedSections.begin()) == NamedSections.end() &&
"can only update unique sections");
auto *Section = NamedSections.begin()->second;
DEBUG(dbgs() << "BOLT-DEBUG: updating " << *Section << " -> ");
const auto Flag = Section->isAllocatable();
Section->update(Data, Size, Alignment, ELFType, ELFFlags, IsLocal);
DEBUG(dbgs() << *Section << "\n");
assert(Flag == Section->isAllocatable() &&
"can't change section allocation status");
return *Section;
}
return registerSection(new BinarySection(*this, Name, Data, Size, Alignment,
ELFType, ELFFlags, IsLocal));
}
bool BinaryContext::deregisterSection(BinarySection &Section) {
auto *SectionPtr = &Section;
auto Itr = Sections.find(SectionPtr);
if (Itr != Sections.end()) {
auto Range = AddressToSection.equal_range(SectionPtr->getAddress());
while (Range.first != Range.second) {
if (Range.first->second == SectionPtr) {
AddressToSection.erase(Range.first);
break;
}
++Range.first;
}
auto NameRange = NameToSection.equal_range(SectionPtr->getName());
while (NameRange.first != NameRange.second) {
if (NameRange.first->second == SectionPtr) {
NameToSection.erase(NameRange.first);
break;
}
++NameRange.first;
}
Sections.erase(Itr);
delete SectionPtr;
return true;
}
return false;
}
void BinaryContext::printSections(raw_ostream &OS) const {
for (auto &Section : Sections) {
OS << "BOLT-INFO: " << *Section << "\n";
}
}
BinarySection &BinaryContext::absoluteSection() {
if (auto Section = getUniqueSectionByName("<absolute>"))
return *Section;
return registerOrUpdateSection("<absolute>", ELF::SHT_NULL, 0u);
}
ErrorOr<uint64_t>
BinaryContext::getUnsignedValueAtAddress(uint64_t Address,
size_t Size) const {
const auto Section = getSectionForAddress(Address);
if (!Section)
return std::make_error_code(std::errc::bad_address);
if (Section->isVirtual())
return 0;
DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(),
AsmInfo->getCodePointerSize());
auto ValueOffset = static_cast<uint32_t>(Address - Section->getAddress());
return DE.getUnsigned(&ValueOffset, Size);
}
ErrorOr<uint64_t>
BinaryContext::getSignedValueAtAddress(uint64_t Address,
size_t Size) const {
const auto Section = getSectionForAddress(Address);
if (!Section)
return std::make_error_code(std::errc::bad_address);
if (Section->isVirtual())
return 0;
DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(),
AsmInfo->getCodePointerSize());
auto ValueOffset = static_cast<uint32_t>(Address - Section->getAddress());
return DE.getSigned(&ValueOffset, Size);
}
void BinaryContext::addRelocation(uint64_t Address,
MCSymbol *Symbol,
uint64_t Type,
uint64_t Addend,
uint64_t Value) {
auto Section = getSectionForAddress(Address);
assert(Section && "cannot find section for address");
Section->addRelocation(Address - Section->getAddress(),
Symbol,
Type,
Addend,
Value);
}
bool BinaryContext::removeRelocationAt(uint64_t Address) {
auto Section = getSectionForAddress(Address);
assert(Section && "cannot find section for address");
return Section->removeRelocationAt(Address - Section->getAddress());
}
const Relocation *BinaryContext::getRelocationAt(uint64_t Address) {
auto Section = getSectionForAddress(Address);
if (!Section)
return nullptr;
return Section->getRelocationAt(Address - Section->getAddress());
}
void BinaryContext::exitWithBugReport(StringRef Message,
const BinaryFunction &Function) const {
errs() << "=======================================\n";
errs() << "BOLT is unable to proceed because it couldn't properly understand "
"this function.\n";
errs() << "If you are running the most recent version of BOLT, you may "
"want to "
"report this and paste this dump.\nPlease check that there is no "
"sensitive contents being shared in this dump.\n";
errs() << "\nOffending function: " << Function.getPrintName() << "\n\n";
ScopedPrinter SP(errs());
SP.printBinaryBlock("Function contents", *getFunctionData(Function));
errs() << "\n";
Function.dump();
errs() << "ERROR: " << Message;
errs() << "\n=======================================\n";
exit(1);
}
BinaryFunction *
BinaryContext::createInjectedBinaryFunction(const std::string &Name,
bool IsSimple) {
InjectedBinaryFunctions.push_back(new BinaryFunction(Name, *this, IsSimple));
auto *BF = InjectedBinaryFunctions.back();
setSymbolToFunctionMap(BF->getSymbol(), BF);
return BF;
}
std::pair<size_t, size_t>
BinaryContext::calculateEmittedSize(BinaryFunction &BF) {
// Adjust branch instruction to match the current layout.
BF.fixBranches();
// Create local MC context to isolate the effect of ephemeral code emission.
auto MCEInstance = createIndependentMCCodeEmitter();
auto *LocalCtx = MCEInstance.LocalCtx.get();
auto *MAB = TheTarget->createMCAsmBackend(*STI, *MRI, MCTargetOptions());
SmallString<256> Code;
raw_svector_ostream VecOS(Code);
std::unique_ptr<MCStreamer> Streamer(TheTarget->createMCObjectStreamer(
*TheTriple, *LocalCtx, std::unique_ptr<MCAsmBackend>(MAB), VecOS,
std::unique_ptr<MCCodeEmitter>(MCEInstance.MCE.release()), *STI,
/* RelaxAll */ false,
/* IncrementalLinkerCompatible */ false,
/* DWARFMustBeAtTheEnd */ false));
Streamer->InitSections(false);
auto *Section = MCEInstance.LocalMOFI->getTextSection();
Section->setHasInstructions(true);
auto *StartLabel = LocalCtx->getOrCreateSymbol("__hstart");
auto *EndLabel = LocalCtx->getOrCreateSymbol("__hend");
auto *ColdStartLabel = LocalCtx->getOrCreateSymbol("__cstart");
auto *ColdEndLabel = LocalCtx->getOrCreateSymbol("__cend");
Streamer->SwitchSection(Section);
Streamer->EmitLabel(StartLabel);
BF.emitBody(*Streamer, /*EmitColdPart = */false, /*EmitCodeOnly = */true);
Streamer->EmitLabel(EndLabel);
if (BF.isSplit()) {
auto *ColdSection =
LocalCtx->getELFSection(BF.getColdCodeSectionName(),
ELF::SHT_PROGBITS,
ELF::SHF_EXECINSTR | ELF::SHF_ALLOC);
ColdSection->setHasInstructions(true);
Streamer->SwitchSection(ColdSection);
Streamer->EmitLabel(ColdStartLabel);
BF.emitBody(*Streamer, /*EmitColdPart = */true, /*EmitCodeOnly = */true);
Streamer->EmitLabel(ColdEndLabel);
}
// To avoid calling MCObjectStreamer::flushPendingLabels() which is private.
Streamer->EmitBytes(StringRef(""));
auto &Assembler =
static_cast<MCObjectStreamer *>(Streamer.get())->getAssembler();
MCAsmLayout Layout(Assembler);
Assembler.layout(Layout);
const auto HotSize = Layout.getSymbolOffset(*EndLabel) -
Layout.getSymbolOffset(*StartLabel);
const auto ColdSize = BF.isSplit() ? Layout.getSymbolOffset(*ColdEndLabel) -
Layout.getSymbolOffset(*ColdStartLabel)
: 0ULL;
// Clean-up the effect of the code emission.
for (const auto &Symbol : Assembler.symbols()) {
auto *MutableSymbol = const_cast<MCSymbol *>(&Symbol);
MutableSymbol->setUndefined();
MutableSymbol->setIsRegistered(false);
}
return std::make_pair(HotSize, ColdSize);
}
BinaryFunction *
BinaryContext::getBinaryFunctionContainingAddress(uint64_t Address,
bool CheckPastEnd,
bool UseMaxSize,
bool Shallow) {
auto FI = BinaryFunctions.upper_bound(Address);
if (FI == BinaryFunctions.begin())
return nullptr;
--FI;
const auto UsedSize = UseMaxSize ? FI->second.getMaxSize()
: FI->second.getSize();
if (Address >= FI->first + UsedSize + (CheckPastEnd ? 1 : 0))
return nullptr;
auto *BF = &FI->second;
if (Shallow)
return BF;
while (BF->getParentFunction())
BF = BF->getParentFunction();
return BF;
}
BinaryFunction *
BinaryContext::getBinaryFunctionAtAddress(uint64_t Address, bool Shallow) {
if (const auto *BD = getBinaryDataAtAddress(Address)) {
if (auto *BF = getFunctionForSymbol(BD->getSymbol())) {
while (BF->getParentFunction() && !Shallow) {
BF = BF->getParentFunction();
}
return BF;
}
}
return nullptr;
}
DebugAddressRangesVector BinaryContext::translateModuleAddressRanges(
const DWARFAddressRangesVector &InputRanges) const {
DebugAddressRangesVector OutputRanges;
for (const auto Range : InputRanges) {
auto BFI = BinaryFunctions.lower_bound(Range.LowPC);
while (BFI != BinaryFunctions.end()) {
const auto &Function = BFI->second;
if (Function.getAddress() >= Range.HighPC)
break;
const auto FunctionRanges = Function.getOutputAddressRanges();
std::move(std::begin(FunctionRanges),
std::end(FunctionRanges),
std::back_inserter(OutputRanges));
std::advance(BFI, 1);
}
}
return OutputRanges;
}