Files
llvm/bolt/lib/Core/GDBIndex.cpp
itrofimow cccc9d3221 [BOLT] Don't emit invalid (gdb-breaking) address ranges in gdb-index (#151923)
Empty address map ranges in gdb-index break gdb, so don't emit them
2025-09-26 13:37:44 -07:00

287 lines
11 KiB
C++

//===- bolt/Core/GDBIndex.cpp - GDB Index support ------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "bolt/Core/GDBIndex.h"
using namespace llvm::bolt;
using namespace llvm::support::endian;
void GDBIndex::addGDBTypeUnitEntry(const GDBIndexTUEntry &&Entry) {
std::lock_guard<std::mutex> Lock(GDBIndexMutex);
if (!BC.getGdbIndexSection())
return;
GDBIndexTUEntryVector.emplace_back(Entry);
}
void GDBIndex::updateGdbIndexSection(
const CUOffsetMap &CUMap, const uint32_t NumCUs,
DebugARangesSectionWriter &ARangesSectionWriter) {
if (!BC.getGdbIndexSection())
return;
// See https://sourceware.org/gdb/onlinedocs/gdb/Index-Section-Format.html
// for .gdb_index section format.
StringRef GdbIndexContents = BC.getGdbIndexSection()->getContents();
const char *Data = GdbIndexContents.data();
// Parse the header.
const uint32_t Version = read32le(Data);
if (Version != 7 && Version != 8) {
errs() << "BOLT-ERROR: can only process .gdb_index versions 7 and 8\n";
exit(1);
}
// Some .gdb_index generators use file offsets while others use section
// offsets. Hence we can only rely on offsets relative to each other,
// and ignore their absolute values.
const uint32_t CUListOffset = read32le(Data + 4);
const uint32_t CUTypesOffset = read32le(Data + 8);
const uint32_t AddressTableOffset = read32le(Data + 12);
const uint32_t SymbolTableOffset = read32le(Data + 16);
const uint32_t ConstantPoolOffset = read32le(Data + 20);
Data += 24;
// Map CUs offsets to indices and verify existing index table.
std::map<uint32_t, uint32_t> OffsetToIndexMap;
const uint32_t CUListSize = CUTypesOffset - CUListOffset;
const uint32_t TUListSize = AddressTableOffset - CUTypesOffset;
const unsigned NUmCUsEncoded = CUListSize / 16;
unsigned MaxDWARFVersion = BC.DwCtx->getMaxVersion();
unsigned NumDWARF5TUs =
getGDBIndexTUEntryVector().size() - BC.DwCtx->getNumTypeUnits();
bool SkipTypeUnits = false;
// For DWARF5 Types are in .debug_info.
// LLD doesn't generate Types CU List, and in CU list offset
// only includes CUs.
// GDB 11+ includes only CUs in CU list and generates Types
// list.
// GDB 9 includes CUs and TUs in CU list and generates TYpes
// list. The NumCUs is CUs + TUs, so need to modify the check.
// For split-dwarf
// GDB-11, DWARF5: TU units from dwo are not included.
// GDB-11, DWARF4: TU units from dwo are included.
if (MaxDWARFVersion >= 5)
SkipTypeUnits = !TUListSize ? true
: ((NUmCUsEncoded + NumDWARF5TUs) ==
BC.DwCtx->getNumCompileUnits());
if (!((CUListSize == NumCUs * 16) ||
(CUListSize == (NumCUs + NumDWARF5TUs) * 16))) {
errs() << "BOLT-ERROR: .gdb_index: CU count mismatch\n";
exit(1);
}
DenseSet<uint64_t> OriginalOffsets;
for (unsigned Index = 0, PresentUnitsIndex = 0,
Units = BC.DwCtx->getNumCompileUnits();
Index < Units; ++Index) {
const DWARFUnit *CU = BC.DwCtx->getUnitAtIndex(Index);
if (SkipTypeUnits && CU->isTypeUnit())
continue;
const uint64_t Offset = read64le(Data);
Data += 16;
if (CU->getOffset() != Offset) {
errs() << "BOLT-ERROR: .gdb_index CU offset mismatch\n";
exit(1);
}
OriginalOffsets.insert(Offset);
OffsetToIndexMap[Offset] = PresentUnitsIndex++;
}
// Ignore old address table.
const uint32_t OldAddressTableSize = SymbolTableOffset - AddressTableOffset;
// Move Data to the beginning of symbol table.
Data += SymbolTableOffset - CUTypesOffset;
// Calculate the size of the new address table.
const auto IsValidAddressRange = [](const DebugAddressRange &Range) {
return Range.HighPC > Range.LowPC;
};
uint32_t NewAddressTableSize = 0;
for (const auto &CURangesPair : ARangesSectionWriter.getCUAddressRanges()) {
const SmallVector<DebugAddressRange, 2> &Ranges = CURangesPair.second;
NewAddressTableSize +=
llvm::count_if(Ranges,
[&IsValidAddressRange](const DebugAddressRange &Range) {
return IsValidAddressRange(Range);
}) *
20;
}
// Difference between old and new table (and section) sizes.
// Could be negative.
int32_t Delta = NewAddressTableSize - OldAddressTableSize;
size_t NewGdbIndexSize = GdbIndexContents.size() + Delta;
// Free'd by ExecutableFileMemoryManager.
auto *NewGdbIndexContents = new uint8_t[NewGdbIndexSize];
uint8_t *Buffer = NewGdbIndexContents;
write32le(Buffer, Version);
write32le(Buffer + 4, CUListOffset);
write32le(Buffer + 8, CUTypesOffset);
write32le(Buffer + 12, AddressTableOffset);
write32le(Buffer + 16, SymbolTableOffset + Delta);
write32le(Buffer + 20, ConstantPoolOffset + Delta);
Buffer += 24;
using MapEntry = std::pair<uint32_t, CUInfo>;
std::vector<MapEntry> CUVector(CUMap.begin(), CUMap.end());
// Remove the CUs we won't emit anyway.
CUVector.erase(std::remove_if(CUVector.begin(), CUVector.end(),
[&OriginalOffsets](const MapEntry &It) {
// Skipping TU for DWARF5 when they are not
// included in CU list.
return OriginalOffsets.count(It.first) == 0;
}),
CUVector.end());
// Need to sort since we write out all of TUs in .debug_info before CUs.
std::sort(CUVector.begin(), CUVector.end(),
[](const MapEntry &E1, const MapEntry &E2) -> bool {
return E1.second.Offset < E2.second.Offset;
});
// Create the original CU index -> updated CU index mapping,
// as the sort above could've changed the order and we have to update
// indices correspondingly in address map and constant pool.
std::unordered_map<uint32_t, uint32_t> OriginalCUIndexToUpdatedCUIndexMap;
OriginalCUIndexToUpdatedCUIndexMap.reserve(CUVector.size());
for (uint32_t I = 0; I < CUVector.size(); ++I) {
OriginalCUIndexToUpdatedCUIndexMap[OffsetToIndexMap.at(CUVector[I].first)] =
I;
}
const auto RemapCUIndex = [&OriginalCUIndexToUpdatedCUIndexMap,
CUVectorSize = CUVector.size(),
TUVectorSize = getGDBIndexTUEntryVector().size()](
uint32_t OriginalIndex) {
if (OriginalIndex >= CUVectorSize) {
if (OriginalIndex >= CUVectorSize + TUVectorSize) {
errs() << "BOLT-ERROR: .gdb_index unknown CU index\n";
exit(1);
}
// The index is into TU CU List, which we don't reorder, so return as is.
return OriginalIndex;
}
const auto It = OriginalCUIndexToUpdatedCUIndexMap.find(OriginalIndex);
if (It == OriginalCUIndexToUpdatedCUIndexMap.end()) {
errs() << "BOLT-ERROR: .gdb_index unknown CU index\n";
exit(1);
}
return It->second;
};
// Writing out CU List <Offset, Size>
for (auto &CUInfo : CUVector) {
write64le(Buffer, CUInfo.second.Offset);
// Length encoded in CU doesn't contain first 4 bytes that encode length.
write64le(Buffer + 8, CUInfo.second.Length + 4);
Buffer += 16;
}
sortGDBIndexTUEntryVector();
// Rewrite TU CU List, since abbrevs can be different.
// Entry example:
// 0: offset = 0x00000000, type_offset = 0x0000001e, type_signature =
// 0x418503b8111e9a7b Spec says " triplet, the first value is the CU offset,
// the second value is the type offset in the CU, and the third value is the
// type signature" Looking at what is being generated by gdb-add-index. The
// first entry is TU offset, second entry is offset from it, and third entry
// is the type signature.
if (TUListSize)
for (const GDBIndexTUEntry &Entry : getGDBIndexTUEntryVector()) {
write64le(Buffer, Entry.UnitOffset);
write64le(Buffer + 8, Entry.TypeDIERelativeOffset);
write64le(Buffer + 16, Entry.TypeHash);
Buffer += sizeof(GDBIndexTUEntry);
}
// Generate new address table.
for (const std::pair<const uint64_t, DebugAddressRangesVector> &CURangesPair :
ARangesSectionWriter.getCUAddressRanges()) {
const uint32_t OriginalCUIndex = OffsetToIndexMap[CURangesPair.first];
const uint32_t UpdatedCUIndex = RemapCUIndex(OriginalCUIndex);
const DebugAddressRangesVector &Ranges = CURangesPair.second;
for (const DebugAddressRange &Range : Ranges) {
// Don't emit ranges that break gdb,
// https://sourceware.org/bugzilla/show_bug.cgi?id=33247.
// We've seen [0, 0) ranges here, for instance.
if (IsValidAddressRange(Range)) {
write64le(Buffer, Range.LowPC);
write64le(Buffer + 8, Range.HighPC);
write32le(Buffer + 16, UpdatedCUIndex);
Buffer += 20;
}
}
}
const size_t TrailingSize =
GdbIndexContents.data() + GdbIndexContents.size() - Data;
assert(Buffer + TrailingSize == NewGdbIndexContents + NewGdbIndexSize &&
"size calculation error");
// Copy over the rest of the original data.
memcpy(Buffer, Data, TrailingSize);
// Fixup CU-indices in constant pool.
const char *const OriginalConstantPoolData =
GdbIndexContents.data() + ConstantPoolOffset;
uint8_t *const UpdatedConstantPoolData =
NewGdbIndexContents + ConstantPoolOffset + Delta;
const char *OriginalSymbolTableData =
GdbIndexContents.data() + SymbolTableOffset;
std::set<uint32_t> CUVectorOffsets;
// Parse the symbol map and extract constant pool CU offsets from it.
while (OriginalSymbolTableData < OriginalConstantPoolData) {
const uint32_t NameOffset = read32le(OriginalSymbolTableData);
const uint32_t CUVectorOffset = read32le(OriginalSymbolTableData + 4);
OriginalSymbolTableData += 8;
// Iff both are zero, then the slot is considered empty in the hash-map.
if (NameOffset || CUVectorOffset) {
CUVectorOffsets.insert(CUVectorOffset);
}
}
// Update the CU-indicies in the constant pool
for (const auto CUVectorOffset : CUVectorOffsets) {
const char *CurrentOriginalConstantPoolData =
OriginalConstantPoolData + CUVectorOffset;
uint8_t *CurrentUpdatedConstantPoolData =
UpdatedConstantPoolData + CUVectorOffset;
const uint32_t Num = read32le(CurrentOriginalConstantPoolData);
CurrentOriginalConstantPoolData += 4;
CurrentUpdatedConstantPoolData += 4;
for (uint32_t J = 0; J < Num; ++J) {
const uint32_t OriginalCUIndexAndAttributes =
read32le(CurrentOriginalConstantPoolData);
CurrentOriginalConstantPoolData += 4;
// We only care for the index, which is the lowest 24 bits, other bits are
// left as is.
const uint32_t OriginalCUIndex =
OriginalCUIndexAndAttributes & ((1 << 24) - 1);
const uint32_t Attributes = OriginalCUIndexAndAttributes >> 24;
const uint32_t UpdatedCUIndexAndAttributes =
RemapCUIndex(OriginalCUIndex) | (Attributes << 24);
write32le(CurrentUpdatedConstantPoolData, UpdatedCUIndexAndAttributes);
CurrentUpdatedConstantPoolData += 4;
}
}
// Register the new section.
BC.registerOrUpdateNoteSection(".gdb_index", NewGdbIndexContents,
NewGdbIndexSize);
}