From 8c0f3b6e8f8db76e4ef47f38fb7b32ba9be1913b Mon Sep 17 00:00:00 2001 From: Grigory Pastukhov <99913765+grigorypas@users.noreply.github.com> Date: Thu, 11 Sep 2025 10:41:11 -0700 Subject: [PATCH] [BOLT] Fix debug line emission for functions in multiple compilation units (#151230) This patch fixes a bug in BOLT's debug line emission where functions that belong to multiple compilation units (such as inline functions in header files) were not handled correctly. Previously, BOLT incorrectly assumed that a binary function could belong to only one compilation unit, leading to incomplete or incorrect debug line information. ### **Problem** When a function appears in multiple compilation units (common scenarios include): * Template instantiated functions * Inline functions defined in header files included by multiple source files BOLT would only emit debug line information for one compilation unit, losing debug information for other CUs where the function was compiled. This resulted in incomplete debugging information and could cause debuggers to fail to set breakpoints or show incorrect source locations. ### **Root Cause** The issue was in BOLT's assumption that each binary function maps to exactly one compilation unit. However, when the same function (e.g., an inline function from a header) is compiled into multiple object files, it legitimately belongs to multiple CUs in the final binary. --- bolt/include/bolt/Core/BinaryContext.h | 6 + bolt/include/bolt/Core/BinaryFunction.h | 26 +- bolt/include/bolt/Core/DebugData.h | 113 ++++++-- bolt/lib/Core/BinaryContext.cpp | 55 ++-- bolt/lib/Core/BinaryEmitter.cpp | 194 ++++++++------ bolt/lib/Core/BinaryFunction.cpp | 55 ++-- bolt/lib/Core/DebugData.cpp | 2 - bolt/test/X86/multi-cu-debug-line.s | 327 ++++++++++++++++++++++++ bolt/test/lit.cfg.py | 1 + bolt/test/process-debug-line | 105 ++++++++ bolt/unittests/Core/CMakeLists.txt | 1 + bolt/unittests/Core/ClusteredRows.cpp | 141 ++++++++++ 12 files changed, 870 insertions(+), 156 deletions(-) create mode 100644 bolt/test/X86/multi-cu-debug-line.s create mode 100755 bolt/test/process-debug-line create mode 100644 bolt/unittests/Core/ClusteredRows.cpp diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h index 91ecf89da618..72c8817daa71 100644 --- a/bolt/include/bolt/Core/BinaryContext.h +++ b/bolt/include/bolt/Core/BinaryContext.h @@ -288,6 +288,12 @@ public: /// overwritten, but it is okay to re-generate debug info for them. std::set ProcessedCUs; + /// DWARF-related container to manage lifecycle of groups of rows from line + /// tables associated with instructions. Since binary functions can span + /// multiple compilation units, instructions may reference debug line + /// information from multiple CUs. + ClusteredRowsContainer ClusteredRows; + // Setup MCPlus target builder void initializeTarget(std::unique_ptr TargetBuilder) { MIB = std::move(TargetBuilder); diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h index b59926cc7557..51b139a15e1a 100644 --- a/bolt/include/bolt/Core/BinaryFunction.h +++ b/bolt/include/bolt/Core/BinaryFunction.h @@ -35,6 +35,7 @@ #include "bolt/Core/JumpTable.h" #include "bolt/Core/MCPlus.h" #include "bolt/Utils/NameResolver.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringRef.h" @@ -423,8 +424,9 @@ private: /// Original LSDA type encoding unsigned LSDATypeEncoding{dwarf::DW_EH_PE_omit}; - /// Containing compilation unit for the function. - DWARFUnit *DwarfUnit{nullptr}; + /// All compilation units this function belongs to. + /// Maps DWARF unit offset to the unit pointer. + DenseMap DwarfUnitMap; /// Last computed hash value. Note that the value could be recomputed using /// different parameters by every pass. @@ -2409,15 +2411,21 @@ public: void computeBlockHashes(HashFunction HashFunction = HashFunction::Default) const; - void setDWARFUnit(DWARFUnit *Unit) { DwarfUnit = Unit; } + void addDWARFUnit(DWARFUnit *Unit) { DwarfUnitMap[Unit->getOffset()] = Unit; } - /// Return DWARF compile unit for this function. - DWARFUnit *getDWARFUnit() const { return DwarfUnit; } + void removeDWARFUnit(DWARFUnit *Unit) { + DwarfUnitMap.erase(Unit->getOffset()); + } - /// Return line info table for this function. - const DWARFDebugLine::LineTable *getDWARFLineTable() const { - return getDWARFUnit() ? BC.DwCtx->getLineTableForUnit(getDWARFUnit()) - : nullptr; + /// Return DWARF compile units for this function. + /// Returns a reference to the map of DWARF unit offsets to units. + const DenseMap &getDWARFUnits() const { + return DwarfUnitMap; + } + + const DWARFDebugLine::LineTable * + getDWARFLineTableForUnit(DWARFUnit *Unit) const { + return BC.DwCtx->getLineTableForUnit(Unit); } /// Finalize profile for the function. diff --git a/bolt/include/bolt/Core/DebugData.h b/bolt/include/bolt/Core/DebugData.h index 6ea3b1af1024..814978965ce3 100644 --- a/bolt/include/bolt/Core/DebugData.h +++ b/bolt/include/bolt/Core/DebugData.h @@ -135,8 +135,6 @@ struct DebugLineTableRowRef { uint32_t DwCompileUnitIndex; uint32_t RowIndex; - const static DebugLineTableRowRef NULL_ROW; - bool operator==(const DebugLineTableRowRef &Rhs) const { return DwCompileUnitIndex == Rhs.DwCompileUnitIndex && RowIndex == Rhs.RowIndex; @@ -145,24 +143,6 @@ struct DebugLineTableRowRef { bool operator!=(const DebugLineTableRowRef &Rhs) const { return !(*this == Rhs); } - - static DebugLineTableRowRef fromSMLoc(const SMLoc &Loc) { - union { - decltype(Loc.getPointer()) Ptr; - DebugLineTableRowRef Ref; - } U; - U.Ptr = Loc.getPointer(); - return U.Ref; - } - - SMLoc toSMLoc() const { - union { - decltype(SMLoc().getPointer()) Ptr; - DebugLineTableRowRef Ref; - } U; - U.Ref = *this; - return SMLoc::getFromPointer(U.Ptr); - } }; /// Common buffer vector used for debug info handling. @@ -210,7 +190,7 @@ public: static bool classof(const DebugRangesSectionWriter *Writer) { return Writer->getKind() == RangesWriterKind::DebugRangesWriter; } - + /// Append a range to the main buffer. void appendToRangeBuffer(const DebugBufferVector &CUBuffer); @@ -852,6 +832,97 @@ public: // Returns DWARF Version for this line table. uint16_t getDwarfVersion() const { return DwarfVersion; } }; + +/// ClusteredRows represents a collection of debug line table row references. +/// +/// MEMORY LAYOUT AND DESIGN: +/// This class uses a flexible array member pattern to store all +/// DebugLineTableRowRef elements in a single contiguous memory allocation. +/// The memory layout is: +/// +/// +------------------+ +/// | ClusteredRows | <- Object header (Size + first element) +/// | - Size | +/// | - Rows (element) | <- First DebugLineTableRowRef element +/// +------------------+ +/// | element[1] | <- Additional DebugLineTableRowRef elements +/// | element[2] | stored immediately after the object +/// | ... | +/// | element[Size-1] | +/// +------------------+ +/// +/// The 'Rows' member serves as both the first element storage and the base +/// address for pointer arithmetic to access subsequent elements. +class ClusteredRows { +public: + ArrayRef getRows() const { + return ArrayRef(beginPtrConst(), Size); + } + + /// Returns the number of elements in the array. + uint64_t size() const { return Size; } + + /// We re-purpose SMLoc inside MCInst to store the pointer + /// to ClusteredRows. fromSMLoc() and toSMLoc() are helper + /// functions to convert between SMLoc and ClusteredRows. + + static const ClusteredRows *fromSMLoc(const SMLoc &Loc) { + return reinterpret_cast(Loc.getPointer()); + } + SMLoc toSMLoc() const { + return SMLoc::getFromPointer(reinterpret_cast(this)); + } + + /// Given a vector of DebugLineTableRowRef, this method + /// copies the elements into pre-allocated memory. + template void populate(const T Vec) { + assert(Vec.size() == Size && "Sizes must match"); + DebugLineTableRowRef *CurRawPtr = beginPtr(); + for (DebugLineTableRowRef RowRef : Vec) { + *CurRawPtr = RowRef; + ++CurRawPtr; + } + } + +private: + uint64_t Size; + DebugLineTableRowRef Rows; + + ClusteredRows(uint64_t Size) : Size(Size) {} + + /// Total size of the object including the array. + static uint64_t getTotalSize(uint64_t Size) { + assert(Size > 0 && "Size must be greater than 0"); + return sizeof(ClusteredRows) + (Size - 1) * sizeof(DebugLineTableRowRef); + } + const DebugLineTableRowRef *beginPtrConst() const { + return reinterpret_cast(&Rows); + } + DebugLineTableRowRef *beginPtr() { + return reinterpret_cast(&Rows); + } + + friend class ClusteredRowsContainer; +}; + +/// ClusteredRowsContainer manages the lifecycle of ClusteredRows objects. +class ClusteredRowsContainer { +public: + ClusteredRows *createClusteredRows(uint64_t Size) { + auto *CR = new (std::malloc(ClusteredRows::getTotalSize(Size))) + ClusteredRows(Size); + Clusters.push_back(CR); + return CR; + } + ~ClusteredRowsContainer() { + for (auto *CR : Clusters) + std::free(CR); + } + +private: + std::vector Clusters; +}; + } // namespace bolt } // namespace llvm diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp index 23a5a65c2c5f..6d16edfff73d 100644 --- a/bolt/lib/Core/BinaryContext.cpp +++ b/bolt/lib/Core/BinaryContext.cpp @@ -1693,22 +1693,39 @@ void BinaryContext::preprocessDebugInfo() { auto It = llvm::partition_point( AllRanges, [=](CURange R) { return R.HighPC <= FunctionAddress; }); - if (It != AllRanges.end() && It->LowPC <= FunctionAddress) - Function.setDWARFUnit(It->Unit); + if (It == AllRanges.end() || It->LowPC > FunctionAddress) { + continue; + } + Function.addDWARFUnit(It->Unit); + + // Go forward and add all units from ranges that cover the function. + while (++It != AllRanges.end()) { + if (It->LowPC > FunctionAddress || FunctionAddress >= It->HighPC) + break; + Function.addDWARFUnit(It->Unit); + } } // Discover units with debug info that needs to be updated. for (const auto &KV : BinaryFunctions) { const BinaryFunction &BF = KV.second; - if (shouldEmit(BF) && BF.getDWARFUnit()) - ProcessedCUs.insert(BF.getDWARFUnit()); + if (shouldEmit(BF) && !BF.getDWARFUnits().empty()) + for (const auto &[_, Unit] : BF.getDWARFUnits()) + ProcessedCUs.insert(Unit); } - // Clear debug info for functions from units that we are not going to process. for (auto &KV : BinaryFunctions) { BinaryFunction &BF = KV.second; - if (BF.getDWARFUnit() && !ProcessedCUs.count(BF.getDWARFUnit())) - BF.setDWARFUnit(nullptr); + // Collect units to remove to avoid iterator invalidation + SmallVector UnitsToRemove; + for (const auto &[_, Unit] : BF.getDWARFUnits()) { + if (!ProcessedCUs.count(Unit)) + UnitsToRemove.push_back(Unit); + } + // Remove the collected units + for (auto *Unit : UnitsToRemove) { + BF.removeDWARFUnit(Unit); + } } if (opts::Verbosity >= 1) { @@ -1903,23 +1920,23 @@ bool BinaryContext::isMarker(const SymbolRef &Symbol) const { static void printDebugInfo(raw_ostream &OS, const MCInst &Instruction, const BinaryFunction *Function, DWARFContext *DwCtx) { - DebugLineTableRowRef RowRef = - DebugLineTableRowRef::fromSMLoc(Instruction.getLoc()); - if (RowRef == DebugLineTableRowRef::NULL_ROW) + const ClusteredRows *LineTableRows = + ClusteredRows::fromSMLoc(Instruction.getLoc()); + if (LineTableRows == nullptr) return; - const DWARFDebugLine::LineTable *LineTable; - if (Function && Function->getDWARFUnit() && - Function->getDWARFUnit()->getOffset() == RowRef.DwCompileUnitIndex) { - LineTable = Function->getDWARFLineTable(); - } else { - LineTable = DwCtx->getLineTableForUnit( - DwCtx->getCompileUnitForOffset(RowRef.DwCompileUnitIndex)); - } - assert(LineTable && "line table expected for instruction with debug info"); + // File name and line number should be the same for all CUs. + // So it is sufficient to check the first one. + DebugLineTableRowRef RowRef = LineTableRows->getRows().front(); + const DWARFDebugLine::LineTable *LineTable = DwCtx->getLineTableForUnit( + DwCtx->getCompileUnitForOffset(RowRef.DwCompileUnitIndex)); + + if (!LineTable) + return; const DWARFDebugLine::Row &Row = LineTable->Rows[RowRef.RowIndex - 1]; StringRef FileName = ""; + if (std::optional FName = dwarf::toString(LineTable->Prologue.getFileNameEntry(Row.File).Name)) FileName = *FName; diff --git a/bolt/lib/Core/BinaryEmitter.cpp b/bolt/lib/Core/BinaryEmitter.cpp index 7b5cd276fee8..7aaf721da976 100644 --- a/bolt/lib/Core/BinaryEmitter.cpp +++ b/bolt/lib/Core/BinaryEmitter.cpp @@ -177,7 +177,8 @@ private: /// Note that it does not automatically result in the insertion of the EOS /// marker in the line table program, but provides one to the DWARF generator /// when it needs it. - void emitLineInfoEnd(const BinaryFunction &BF, MCSymbol *FunctionEndSymbol); + void emitLineInfoEnd(const BinaryFunction &BF, MCSymbol *FunctionEndSymbol, + const DWARFUnit &Unit); /// Emit debug line info for unprocessed functions from CUs that include /// emitted functions. @@ -436,8 +437,9 @@ bool BinaryEmitter::emitFunction(BinaryFunction &Function, Streamer.emitELFSize(StartSymbol, SizeExpr); } - if (opts::UpdateDebugSections && Function.getDWARFUnit()) - emitLineInfoEnd(Function, EndSymbol); + if (opts::UpdateDebugSections && !Function.getDWARFUnits().empty()) + for (const auto &[_, Unit] : Function.getDWARFUnits()) + emitLineInfoEnd(Function, EndSymbol, *Unit); // Exception handling info for the function. emitLSDA(Function, FF); @@ -486,7 +488,7 @@ void BinaryEmitter::emitFunctionBody(BinaryFunction &BF, FunctionFragment &FF, // A symbol to be emitted before the instruction to mark its location. MCSymbol *InstrLabel = BC.MIB->getInstLabel(Instr); - if (opts::UpdateDebugSections && BF.getDWARFUnit()) { + if (opts::UpdateDebugSections && !BF.getDWARFUnits().empty()) { LastLocSeen = emitLineInfo(BF, Instr.getLoc(), LastLocSeen, FirstInstr, InstrLabel); FirstInstr = false; @@ -679,74 +681,100 @@ void BinaryEmitter::emitConstantIslands(BinaryFunction &BF, bool EmitColdPart, SMLoc BinaryEmitter::emitLineInfo(const BinaryFunction &BF, SMLoc NewLoc, SMLoc PrevLoc, bool FirstInstr, MCSymbol *&InstrLabel) { - DWARFUnit *FunctionCU = BF.getDWARFUnit(); - const DWARFDebugLine::LineTable *FunctionLineTable = BF.getDWARFLineTable(); - assert(FunctionCU && "cannot emit line info for function without CU"); - - DebugLineTableRowRef RowReference = DebugLineTableRowRef::fromSMLoc(NewLoc); - - // Check if no new line info needs to be emitted. - if (RowReference == DebugLineTableRowRef::NULL_ROW || + if (NewLoc.getPointer() == nullptr || NewLoc.getPointer() == PrevLoc.getPointer()) return PrevLoc; + const ClusteredRows *Cluster = ClusteredRows::fromSMLoc(NewLoc); - unsigned CurrentFilenum = 0; - const DWARFDebugLine::LineTable *CurrentLineTable = FunctionLineTable; + auto addToLineTable = [&](DebugLineTableRowRef RowReference, + const DWARFUnit &TargetCU, unsigned Flags, + MCSymbol &InstrLabel, + const DWARFDebugLine::Row &CurrentRow) { + const uint64_t TargetUnitIndex = TargetCU.getOffset(); + unsigned TargetFilenum = CurrentRow.File; + const uint32_t CurrentUnitIndex = RowReference.DwCompileUnitIndex; + // If the CU id from the current instruction location does not + // match the target CU id, it means that we have come across some + // inlined code (by BOLT). We must look up the CU for the instruction's + // original function and get the line table from that. + if (TargetUnitIndex != CurrentUnitIndex) { + // Add filename from the inlined function to the current CU. + TargetFilenum = BC.addDebugFilenameToUnit( + TargetUnitIndex, CurrentUnitIndex, CurrentRow.File); + } + BC.Ctx->setCurrentDwarfLoc(TargetFilenum, CurrentRow.Line, + CurrentRow.Column, Flags, CurrentRow.Isa, + CurrentRow.Discriminator); + const MCDwarfLoc &DwarfLoc = BC.Ctx->getCurrentDwarfLoc(); + BC.Ctx->clearDwarfLocSeen(); + const MCLineSection::MCLineDivisionMap &MapLineEntries = + BC.getDwarfLineTable(TargetUnitIndex) + .getMCLineSections() + .getMCLineEntries(); + const auto *It = MapLineEntries.find(Streamer.getCurrentSectionOnly()); + MCDwarfLineEntry NewLineEntry = MCDwarfLineEntry(&InstrLabel, DwarfLoc); - // If the CU id from the current instruction location does not - // match the CU id from the current function, it means that we - // have come across some inlined code. We must look up the CU - // for the instruction's original function and get the line table - // from that. - const uint64_t FunctionUnitIndex = FunctionCU->getOffset(); - const uint32_t CurrentUnitIndex = RowReference.DwCompileUnitIndex; - if (CurrentUnitIndex != FunctionUnitIndex) { - CurrentLineTable = BC.DwCtx->getLineTableForUnit( - BC.DwCtx->getCompileUnitForOffset(CurrentUnitIndex)); - // Add filename from the inlined function to the current CU. - CurrentFilenum = BC.addDebugFilenameToUnit( - FunctionUnitIndex, CurrentUnitIndex, - CurrentLineTable->Rows[RowReference.RowIndex - 1].File); - } + // Check if line table exists and has entries before doing comparison. + if (It != MapLineEntries.end() && !It->second.empty()) { + // Check if the new line entry has the same debug info as the last one + // to avoid duplicates. We don't compare labels since different + // instructions can have the same line info. + const auto &LastEntry = It->second.back(); + if (LastEntry.getFileNum() == NewLineEntry.getFileNum() && + LastEntry.getLine() == NewLineEntry.getLine() && + LastEntry.getColumn() == NewLineEntry.getColumn() && + LastEntry.getFlags() == NewLineEntry.getFlags() && + LastEntry.getIsa() == NewLineEntry.getIsa() && + LastEntry.getDiscriminator() == NewLineEntry.getDiscriminator()) + return; + } - const DWARFDebugLine::Row &CurrentRow = - CurrentLineTable->Rows[RowReference.RowIndex - 1]; - if (!CurrentFilenum) - CurrentFilenum = CurrentRow.File; - - unsigned Flags = (DWARF2_FLAG_IS_STMT * CurrentRow.IsStmt) | - (DWARF2_FLAG_BASIC_BLOCK * CurrentRow.BasicBlock) | - (DWARF2_FLAG_PROLOGUE_END * CurrentRow.PrologueEnd) | - (DWARF2_FLAG_EPILOGUE_BEGIN * CurrentRow.EpilogueBegin); - - // Always emit is_stmt at the beginning of function fragment. - if (FirstInstr) - Flags |= DWARF2_FLAG_IS_STMT; - - BC.Ctx->setCurrentDwarfLoc(CurrentFilenum, CurrentRow.Line, CurrentRow.Column, - Flags, CurrentRow.Isa, CurrentRow.Discriminator); - const MCDwarfLoc &DwarfLoc = BC.Ctx->getCurrentDwarfLoc(); - BC.Ctx->clearDwarfLocSeen(); + BC.getDwarfLineTable(TargetUnitIndex) + .getMCLineSections() + .addLineEntry(NewLineEntry, Streamer.getCurrentSectionOnly()); + }; if (!InstrLabel) InstrLabel = BC.Ctx->createTempSymbol(); + for (DebugLineTableRowRef RowReference : Cluster->getRows()) { + const DWARFDebugLine::LineTable *CurrentLineTable = + BC.DwCtx->getLineTableForUnit( + BC.DwCtx->getCompileUnitForOffset(RowReference.DwCompileUnitIndex)); + const DWARFDebugLine::Row &CurrentRow = + CurrentLineTable->Rows[RowReference.RowIndex - 1]; + unsigned Flags = (DWARF2_FLAG_IS_STMT * CurrentRow.IsStmt) | + (DWARF2_FLAG_BASIC_BLOCK * CurrentRow.BasicBlock) | + (DWARF2_FLAG_PROLOGUE_END * CurrentRow.PrologueEnd) | + (DWARF2_FLAG_EPILOGUE_BEGIN * CurrentRow.EpilogueBegin); - BC.getDwarfLineTable(FunctionUnitIndex) - .getMCLineSections() - .addLineEntry(MCDwarfLineEntry(InstrLabel, DwarfLoc), - Streamer.getCurrentSectionOnly()); + // Always emit is_stmt at the beginning of function fragment. + if (FirstInstr) + Flags |= DWARF2_FLAG_IS_STMT; + const auto &FunctionDwarfUnits = BF.getDWARFUnits(); + auto It = FunctionDwarfUnits.find(RowReference.DwCompileUnitIndex); + if (It != FunctionDwarfUnits.end()) { + addToLineTable(RowReference, *It->second, Flags, *InstrLabel, CurrentRow); + continue; + } + // This rows is from CU that did not contain the original function. + // This might happen if BOLT moved/inlined that instruction from other CUs. + // In this case, we need to insert it to all CUs that the function + // originally beloned to. + for (const auto &[_, Unit] : BF.getDWARFUnits()) { + addToLineTable(RowReference, *Unit, Flags, *InstrLabel, CurrentRow); + } + } return NewLoc; } void BinaryEmitter::emitLineInfoEnd(const BinaryFunction &BF, - MCSymbol *FunctionEndLabel) { - DWARFUnit *FunctionCU = BF.getDWARFUnit(); - assert(FunctionCU && "DWARF unit expected"); + MCSymbol *FunctionEndLabel, + const DWARFUnit &Unit) { BC.Ctx->setCurrentDwarfLoc(0, 0, 0, DWARF2_FLAG_END_SEQUENCE, 0, 0); const MCDwarfLoc &DwarfLoc = BC.Ctx->getCurrentDwarfLoc(); BC.Ctx->clearDwarfLocSeen(); - BC.getDwarfLineTable(FunctionCU->getOffset()) + BC.getDwarfLineTable(Unit.getOffset()) .getMCLineSections() .addLineEntry(MCDwarfLineEntry(FunctionEndLabel, DwarfLoc), Streamer.getCurrentSectionOnly()); @@ -1115,36 +1143,40 @@ void BinaryEmitter::emitDebugLineInfoForOriginalFunctions() { if (Function.isEmitted()) continue; - const DWARFDebugLine::LineTable *LineTable = Function.getDWARFLineTable(); - if (!LineTable) - continue; // nothing to update for this function + // Loop through all CUs in the function + for (const auto &[_, Unit] : Function.getDWARFUnits()) { + const DWARFDebugLine::LineTable *LineTable = + Function.getDWARFLineTableForUnit(Unit); + if (!LineTable) + continue; // nothing to update for this unit - const uint64_t Address = Function.getAddress(); - std::vector Results; - if (!LineTable->lookupAddressRange( - {Address, object::SectionedAddress::UndefSection}, - Function.getSize(), Results)) - continue; + const uint64_t Address = Function.getAddress(); + std::vector Results; + if (!LineTable->lookupAddressRange( + {Address, object::SectionedAddress::UndefSection}, + Function.getSize(), Results)) + continue; - if (Results.empty()) - continue; + if (Results.empty()) + continue; - // The first row returned could be the last row matching the start address. - // Find the first row with the same address that is not the end of the - // sequence. - uint64_t FirstRow = Results.front(); - while (FirstRow > 0) { - const DWARFDebugLine::Row &PrevRow = LineTable->Rows[FirstRow - 1]; - if (PrevRow.Address.Address != Address || PrevRow.EndSequence) - break; - --FirstRow; + // The first row returned could be the last row matching the start + // address. Find the first row with the same address that is not the end + // of the sequence. + uint64_t FirstRow = Results.front(); + while (FirstRow > 0) { + const DWARFDebugLine::Row &PrevRow = LineTable->Rows[FirstRow - 1]; + if (PrevRow.Address.Address != Address || PrevRow.EndSequence) + break; + --FirstRow; + } + + const uint64_t EndOfSequenceAddress = + Function.getAddress() + Function.getMaxSize(); + BC.getDwarfLineTable(Unit->getOffset()) + .addLineTableSequence(LineTable, FirstRow, Results.back(), + EndOfSequenceAddress); } - - const uint64_t EndOfSequenceAddress = - Function.getAddress() + Function.getMaxSize(); - BC.getDwarfLineTable(Function.getDWARFUnit()->getOffset()) - .addLineTableSequence(LineTable, FirstRow, Results.back(), - EndOfSequenceAddress); } // For units that are completely unprocessed, use original debug line contents diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp index 6cac2d0cca2c..578a87dc6c09 100644 --- a/bolt/lib/Core/BinaryFunction.cpp +++ b/bolt/lib/Core/BinaryFunction.cpp @@ -179,37 +179,29 @@ template static bool emptyRange(const R &Range) { } /// Gets debug line information for the instruction located at the given -/// address in the original binary. The SMLoc's pointer is used -/// to point to this information, which is represented by a -/// DebugLineTableRowRef. The returned pointer is null if no debug line -/// information for this instruction was found. -static SMLoc findDebugLineInformationForInstructionAt( +/// address in the original binary. Returns an optional DebugLineTableRowRef +/// that references the corresponding row in the DWARF line table. Since binary +/// functions can span multiple compilation units, this function helps +/// associate instructions with their debug line information from the +/// appropriate CU. Returns std::nullopt if no debug line information for +/// this instruction was found. +static std::optional +findDebugLineInformationForInstructionAt( uint64_t Address, DWARFUnit *Unit, const DWARFDebugLine::LineTable *LineTable) { - // We use the pointer in SMLoc to store an instance of DebugLineTableRowRef, - // which occupies 64 bits. Thus, we can only proceed if the struct fits into - // the pointer itself. - static_assert( - sizeof(decltype(SMLoc().getPointer())) >= sizeof(DebugLineTableRowRef), - "Cannot fit instruction debug line information into SMLoc's pointer"); - - SMLoc NullResult = DebugLineTableRowRef::NULL_ROW.toSMLoc(); uint32_t RowIndex = LineTable->lookupAddress( {Address, object::SectionedAddress::UndefSection}); if (RowIndex == LineTable->UnknownRowIndex) - return NullResult; + return std::nullopt; assert(RowIndex < LineTable->Rows.size() && "Line Table lookup returned invalid index."); - decltype(SMLoc().getPointer()) Ptr; - DebugLineTableRowRef *InstructionLocation = - reinterpret_cast(&Ptr); + DebugLineTableRowRef InstructionLocation; + InstructionLocation.DwCompileUnitIndex = Unit->getOffset(); + InstructionLocation.RowIndex = RowIndex + 1; - InstructionLocation->DwCompileUnitIndex = Unit->getOffset(); - InstructionLocation->RowIndex = RowIndex + 1; - - return SMLoc::getFromPointer(Ptr); + return InstructionLocation; } static std::string buildSectionName(StringRef Prefix, StringRef Name, @@ -1496,9 +1488,24 @@ Error BinaryFunction::disassemble() { } add_instruction: - if (getDWARFLineTable()) { - Instruction.setLoc(findDebugLineInformationForInstructionAt( - AbsoluteInstrAddr, getDWARFUnit(), getDWARFLineTable())); + if (!getDWARFUnits().empty()) { + SmallVector Rows; + for (const auto &[_, Unit] : getDWARFUnits()) { + const DWARFDebugLine::LineTable *LineTable = + getDWARFLineTableForUnit(Unit); + if (!LineTable) + continue; + if (std::optional RowRef = + findDebugLineInformationForInstructionAt(AbsoluteInstrAddr, + Unit, LineTable)) + Rows.emplace_back(*RowRef); + } + if (!Rows.empty()) { + ClusteredRows *Cluster = + BC.ClusteredRows.createClusteredRows(Rows.size()); + Cluster->populate(Rows); + Instruction.setLoc(Cluster->toSMLoc()); + } } // Record offset of the instruction for profile matching. diff --git a/bolt/lib/Core/DebugData.cpp b/bolt/lib/Core/DebugData.cpp index 521eb8d91bbc..e05f28f08572 100644 --- a/bolt/lib/Core/DebugData.cpp +++ b/bolt/lib/Core/DebugData.cpp @@ -101,8 +101,6 @@ std::optional findAttributeInfo(const DWARFDie DIE, return findAttributeInfo(DIE, AbbrevDecl, *Index); } -const DebugLineTableRowRef DebugLineTableRowRef::NULL_ROW{0, 0}; - LLVM_ATTRIBUTE_UNUSED static void printLE64(const std::string &S) { for (uint32_t I = 0, Size = S.size(); I < Size; ++I) { diff --git a/bolt/test/X86/multi-cu-debug-line.s b/bolt/test/X86/multi-cu-debug-line.s new file mode 100644 index 000000000000..15f49a211e58 --- /dev/null +++ b/bolt/test/X86/multi-cu-debug-line.s @@ -0,0 +1,327 @@ +## Test that BOLT correctly handles debug line information for functions +## that belong to multiple compilation units (e.g., inline functions in +## common header files). This is the assembly version of the multi-cu-debug-line.test. +## The test covers two scenarios: +## 1. Normal processing: .debug_line section shows lines for the function +## in all CUs where it was compiled, with no duplicate rows within CUs +## 2. Functions not processed: When BOLT doesn't process functions (using +## --funcs with nonexistent function), original debug info is preserved + +# REQUIRES: system-linux + +# RUN: split-file %s %t +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %t/multi-cu-file1.s -o %t/multi-cu-file1.o +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %t/multi-cu-file2.s -o %t/multi-cu-file2.o +# RUN: %clang %cflags %t/multi-cu-file1.o %t/multi-cu-file2.o -o %t.exe -Wl,-q + +## Test 1: Normal BOLT processing (functions are processed/optimized) +# RUN: llvm-bolt %t.exe -o %t.bolt --update-debug-sections +# RUN: llvm-dwarfdump --debug-line %t.bolt > %t.debug-line.txt +# RUN: FileCheck %s --check-prefix=BASIC --input-file %t.debug-line.txt + +## Check that debug line information is present for both compilation units +# BASIC: debug_line[{{.*}}] +# BASIC: file_names[{{.*}}]: +# BASIC: name: "{{.*}}multi-cu-file1.c" +# BASIC: debug_line[{{.*}}] +# BASIC: file_names[{{.*}}]: +# BASIC: name: "{{.*}}multi-cu-file2.c" + +## Use our helper script to create a normalized table without addresses +# RUN: process-debug-line %t.debug-line.txt > %t.normalized-debug-line.txt +# RUN: FileCheck %s --check-prefix=NORMALIZED --input-file %t.normalized-debug-line.txt + +## Check that we have line entries for the inline function (lines 5, 6, 7) from multi-cu-common.h +## in both compilation units +# NORMALIZED: multi-cu-file1.c 5 {{[0-9]+}} multi-cu-common.h +# NORMALIZED: multi-cu-file1.c 6 {{[0-9]+}} multi-cu-common.h +# NORMALIZED: multi-cu-file1.c 7 {{[0-9]+}} multi-cu-common.h +# NORMALIZED: multi-cu-file2.c 5 {{[0-9]+}} multi-cu-common.h +# NORMALIZED: multi-cu-file2.c 6 {{[0-9]+}} multi-cu-common.h +# NORMALIZED: multi-cu-file2.c 7 {{[0-9]+}} multi-cu-common.h + +## Verify that we have line entries for the inline function in multiple CUs +## by checking that the header file appears multiple times in different contexts +# RUN: grep -c "multi-cu-common.h" %t.debug-line.txt > %t.header-count.txt +# RUN: FileCheck %s --check-prefix=MULTI-CU --input-file %t.header-count.txt + +## The header should appear in debug line info for multiple CUs +# MULTI-CU: {{[2-9]|[1-9][0-9]+}} + +## Check that there are no duplicate line table rows within the same CU +## This verifies the fix for the bug where duplicate entries were created +# RUN: sort %t.normalized-debug-line.txt | uniq -c | \ +# RUN: awk '$1 > 1 {print "DUPLICATE_ROW: " $0}' > %t.duplicates.txt +# RUN: FileCheck %s --check-prefix=NO-DUPLICATES --input-file %t.duplicates.txt --allow-empty + +## Should have no duplicate normalized rows (file should be empty) +## Note: Cross-CU duplicates are expected and valid (same function in different CUs) +## but within-CU duplicates would indicate a bug +# NO-DUPLICATES-NOT: DUPLICATE_ROW + +## Test 2: Functions not processed by BOLT (using --funcs with nonexistent function) +## This tests the code path where BOLT preserves original debug info +# RUN: llvm-bolt %t.exe -o %t.not-emitted.bolt --update-debug-sections --funcs=nonexistent_function +# RUN: llvm-dwarfdump --debug-line %t.not-emitted.bolt > %t.not-emitted.debug-line.txt +# RUN: FileCheck %s --check-prefix=PRESERVED-BASIC --input-file %t.not-emitted.debug-line.txt + +## Check that debug line information is still present for both compilation units when functions aren't processed +# PRESERVED-BASIC: debug_line[{{.*}}] +# PRESERVED-BASIC: file_names[{{.*}}]: +# PRESERVED-BASIC: name: "{{.*}}multi-cu-file1.c" +# PRESERVED-BASIC: debug_line[{{.*}}] +# PRESERVED-BASIC: file_names[{{.*}}]: +# PRESERVED-BASIC: name: "{{.*}}multi-cu-file2.c" + +## Create normalized output for the not-emitted case +# RUN: process-debug-line %t.not-emitted.debug-line.txt > %t.not-emitted.normalized.txt +# RUN: FileCheck %s --check-prefix=PRESERVED-NORMALIZED --input-file %t.not-emitted.normalized.txt + +## Check that we have line entries for the inline function (lines 5, 6, 7) from multi-cu-common.h +## in both compilation units (preserved from original) +# PRESERVED-NORMALIZED: multi-cu-file1.c 5 {{[0-9]+}} multi-cu-common.h +# PRESERVED-NORMALIZED: multi-cu-file1.c 6 {{[0-9]+}} multi-cu-common.h +# PRESERVED-NORMALIZED: multi-cu-file1.c 7 {{[0-9]+}} multi-cu-common.h +# PRESERVED-NORMALIZED: multi-cu-file2.c 5 {{[0-9]+}} multi-cu-common.h +# PRESERVED-NORMALIZED: multi-cu-file2.c 6 {{[0-9]+}} multi-cu-common.h +# PRESERVED-NORMALIZED: multi-cu-file2.c 7 {{[0-9]+}} multi-cu-common.h + +## Verify that we have line entries for the inline function in multiple CUs (preserved) +## by checking that the header file appears multiple times in different contexts +# RUN: grep -c "multi-cu-common.h" %t.not-emitted.debug-line.txt > %t.preserved-header-count.txt +# RUN: FileCheck %s --check-prefix=PRESERVED-MULTI-CU --input-file %t.preserved-header-count.txt + +## The header should appear in debug line info for multiple CUs (preserved from original) +# PRESERVED-MULTI-CU: {{[2-9]|[1-9][0-9]+}} + +## Check that original debug info is preserved for main functions +# RUN: grep "multi-cu-file1.c.*multi-cu-file1.c" %t.not-emitted.normalized.txt > %t.preserved-main.txt +# RUN: FileCheck %s --check-prefix=PRESERVED-MAIN --input-file %t.preserved-main.txt + +# PRESERVED-MAIN: multi-cu-file1.c {{[0-9]+}} {{[0-9]+}} multi-cu-file1.c + +## Check that original debug info is preserved for file2 functions +# RUN: grep "multi-cu-file2.c.*multi-cu-file2.c" %t.not-emitted.normalized.txt > %t.preserved-file2.txt +# RUN: FileCheck %s --check-prefix=PRESERVED-FILE2 --input-file %t.preserved-file2.txt + +# PRESERVED-FILE2: multi-cu-file2.c {{[0-9]+}} {{[0-9]+}} multi-cu-file2.c + +;--- multi-cu-file1.s + .text + .file 1 "/repo/llvm-project" "bolt/test/Inputs/multi-cu-file1.c" + .file 2 "/repo/llvm-project" "bolt/test/Inputs/multi-cu-common.h" + + .globl main + .type main,@function +main: +.Lfunc_begin0: + .loc 1 4 0 + callq common_inline_function + .loc 1 8 0 + retq +.Lfunc_end0: + .size main, .Lfunc_end0-main + + .type common_inline_function,@function +common_inline_function: +.Lfunc_begin1: + .loc 2 5 0 + movl $42, %eax + .loc 2 6 0 + addl $10, %eax + .loc 2 7 0 + retq +.Lfunc_end1: + .size common_inline_function, .Lfunc_end1-common_inline_function + + .section .debug_abbrev,"",@progbits + .byte 1 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 1 # DW_CHILDREN_yes + .byte 37 # DW_AT_producer + .byte 14 # DW_FORM_strp + .byte 19 # DW_AT_language + .byte 5 # DW_FORM_data2 + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 16 # DW_AT_stmt_list + .byte 23 # DW_FORM_sec_offset + .byte 27 # DW_AT_comp_dir + .byte 14 # DW_FORM_strp + .byte 17 # DW_AT_low_pc + .byte 1 # DW_FORM_addr + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 2 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 0 # DW_CHILDREN_no + .byte 17 # DW_AT_low_pc + .byte 1 # DW_FORM_addr + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + + .section .debug_info,"",@progbits +.Lcu_begin0: + .long .Ldebug_info_end0-.Ldebug_info_start0 +.Ldebug_info_start0: + .short 4 # DWARF version number + .long .debug_abbrev # Offset Into Abbrev. Section + .byte 8 # Address Size (in bytes) + .byte 1 # Abbrev [1] 0xb:0x30 DW_TAG_compile_unit + .long .Linfo_string0 # DW_AT_producer + .short 29 # DW_AT_language + .long .Linfo_string1 # DW_AT_name + .long .Lline_table_start0 # DW_AT_stmt_list + .long .Linfo_string2 # DW_AT_comp_dir + .quad .Lfunc_begin0 # DW_AT_low_pc + .long .Lfunc_end1-.Lfunc_begin0 # DW_AT_high_pc + .byte 2 # Abbrev [2] 0x2a:0x10 DW_TAG_subprogram + .quad .Lfunc_begin0 # DW_AT_low_pc + .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc + .long .Linfo_string3 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 4 # DW_AT_decl_line + .byte 2 # Abbrev [2] 0x3a:0x10 DW_TAG_subprogram + .quad .Lfunc_begin1 # DW_AT_low_pc + .long .Lfunc_end1-.Lfunc_begin1 # DW_AT_high_pc + .long .Linfo_string4 # DW_AT_name + .byte 2 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .byte 0 # End Of Children Mark +.Ldebug_info_end0: + + .section .debug_str,"MS",@progbits,1 +.Linfo_string0: + .asciz "clang version 18.0.0" +.Linfo_string1: + .asciz "/repo/llvm-project/bolt/test/Inputs/multi-cu-file1.c" +.Linfo_string2: + .asciz "/repo/llvm-project" +.Linfo_string3: + .asciz "main" +.Linfo_string4: + .asciz "common_inline_function" + + .section .debug_line,"",@progbits +.Lline_table_start0: + +;--- multi-cu-file2.s + .text + .file 1 "/repo/llvm-project" "bolt/test/Inputs/multi-cu-file2.c" + .file 2 "/repo/llvm-project" "bolt/test/Inputs/multi-cu-common.h" + + .globl helper_function + .type helper_function,@function +helper_function: +.Lfunc_begin0: + .loc 1 4 0 + callq common_inline_function + .loc 1 8 0 + retq +.Lfunc_end0: + .size helper_function, .Lfunc_end0-helper_function + + .type common_inline_function,@function +common_inline_function: +.Lfunc_begin1: + .loc 2 5 0 + movl $42, %eax + .loc 2 6 0 + addl $10, %eax + .loc 2 7 0 + retq +.Lfunc_end1: + .size common_inline_function, .Lfunc_end1-common_inline_function + + .section .debug_abbrev,"",@progbits + .byte 1 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 1 # DW_CHILDREN_yes + .byte 37 # DW_AT_producer + .byte 14 # DW_FORM_strp + .byte 19 # DW_AT_language + .byte 5 # DW_FORM_data2 + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 16 # DW_AT_stmt_list + .byte 23 # DW_FORM_sec_offset + .byte 27 # DW_AT_comp_dir + .byte 14 # DW_FORM_strp + .byte 17 # DW_AT_low_pc + .byte 1 # DW_FORM_addr + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 2 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 0 # DW_CHILDREN_no + .byte 17 # DW_AT_low_pc + .byte 1 # DW_FORM_addr + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + + .section .debug_info,"",@progbits +.Lcu_begin0: + .long .Ldebug_info_end0-.Ldebug_info_start0 +.Ldebug_info_start0: + .short 4 # DWARF version number + .long .debug_abbrev # Offset Into Abbrev. Section + .byte 8 # Address Size (in bytes) + .byte 1 # Abbrev [1] 0xb:0x30 DW_TAG_compile_unit + .long .Linfo_string0 # DW_AT_producer + .short 29 # DW_AT_language + .long .Linfo_string1 # DW_AT_name + .long .Lline_table_start0 # DW_AT_stmt_list + .long .Linfo_string2 # DW_AT_comp_dir + .quad .Lfunc_begin0 # DW_AT_low_pc + .long .Lfunc_end1-.Lfunc_begin0 # DW_AT_high_pc + .byte 2 # Abbrev [2] 0x2a:0x10 DW_TAG_subprogram + .quad .Lfunc_begin0 # DW_AT_low_pc + .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc + .long .Linfo_string3 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 4 # DW_AT_decl_line + .byte 2 # Abbrev [2] 0x3a:0x10 DW_TAG_subprogram + .quad .Lfunc_begin1 # DW_AT_low_pc + .long .Lfunc_end1-.Lfunc_begin1 # DW_AT_high_pc + .long .Linfo_string4 # DW_AT_name + .byte 2 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .byte 0 # End Of Children Mark +.Ldebug_info_end0: + + .section .debug_str,"MS",@progbits,1 +.Linfo_string0: + .asciz "clang version 18.0.0" +.Linfo_string1: + .asciz "/repo/llvm-project/bolt/test/Inputs/multi-cu-file2.c" +.Linfo_string2: + .asciz "/repo/llvm-project" +.Linfo_string3: + .asciz "helper_function" +.Linfo_string4: + .asciz "common_inline_function" + + .section .debug_line,"",@progbits +.Lline_table_start0: diff --git a/bolt/test/lit.cfg.py b/bolt/test/lit.cfg.py index bef570ba50a0..3299051db498 100644 --- a/bolt/test/lit.cfg.py +++ b/bolt/test/lit.cfg.py @@ -138,6 +138,7 @@ tools = [ unresolved="fatal", extra_args=[link_fdata_cmd], ), + ToolSubst("process-debug-line", unresolved="fatal"), ToolSubst("merge-fdata", unresolved="fatal"), ToolSubst("llvm-readobj", unresolved="fatal"), ToolSubst("llvm-dwp", unresolved="fatal"), diff --git a/bolt/test/process-debug-line b/bolt/test/process-debug-line new file mode 100755 index 000000000000..44cbcd1e5984 --- /dev/null +++ b/bolt/test/process-debug-line @@ -0,0 +1,105 @@ +#!/bin/sh + +# Script to process llvm-dwarfdump --debug-line output and create a normalized table +# Usage: process-debug-line.sh +# +# Output format: CU_FILE LINE COLUMN FILE_NAME [additional_info] +# This strips addresses to make rows unique and adds context about which CU and file each line belongs to + +if [ $# -ne 1 ]; then + echo "Usage: $0 " >&2 + exit 1 +fi + +debug_line_file="$1" + +if [ ! -f "$debug_line_file" ]; then + echo "Error: File '$debug_line_file' not found" >&2 + exit 1 +fi + +awk ' +BEGIN { + cu_count = 0 + current_cu_file = "" + # Initialize file names array + for (i = 0; i < 100; i++) { + current_file_names[i] = "" + } +} + +# Track debug_line sections (new CU) +/^debug_line\[/ { + cu_count++ + current_cu_file = "" + # Clear file names array for new CU + for (i = 0; i < 100; i++) { + current_file_names[i] = "" + } + next +} + +# Capture file names and their indices +/^file_names\[.*\]:/ { + # Extract file index using simple string operations + line_copy = $0 + gsub(/file_names\[/, "", line_copy) + gsub(/\]:.*/, "", line_copy) + gsub(/[ \t]/, "", line_copy) + file_index = line_copy + + getline # Read the next line which contains the actual filename + # Extract filename from name: "filename" format + if (match($0, /name:[ \t]*"/)) { + filename = $0 + gsub(/.*name:[ \t]*"/, "", filename) + gsub(/".*/, "", filename) + current_file_names[file_index] = filename + + # Extract basename for main CU file (first .c/.cpp/.cc file we see) + if (current_cu_file == "" && match(filename, /\.(c|cpp|cc)$/)) { + cu_filename = filename + gsub(/.*\//, "", cu_filename) + current_cu_file = cu_filename + } + } + next +} + +# Process line table entries +/^0x[0-9a-f]+/ { + # Parse the line entry: Address Line Column File ISA Discriminator OpIndex Flags + if (NF >= 4) { + line = $2 + column = $3 + file_index = $4 + + # Get the filename for this file index + filename = current_file_names[file_index] + if (filename == "") { + filename = "UNKNOWN_FILE_" file_index + } else { + # Extract just the basename + basename = filename + gsub(/.*\//, "", basename) + filename = basename + } + + # Build additional info (flags, etc.) + additional_info = "" + for (i = 8; i <= NF; i++) { + if (additional_info != "") { + additional_info = additional_info " " + } + additional_info = additional_info $i + } + + # Output normalized row: CU_FILE LINE COLUMN FILE_NAME [additional_info] + printf "%s %s %s %s", current_cu_file, line, column, filename + if (additional_info != "") { + printf " %s", additional_info + } + printf "\n" + } +} +' "$debug_line_file" diff --git a/bolt/unittests/Core/CMakeLists.txt b/bolt/unittests/Core/CMakeLists.txt index f10b0d947206..297dec744920 100644 --- a/bolt/unittests/Core/CMakeLists.txt +++ b/bolt/unittests/Core/CMakeLists.txt @@ -7,6 +7,7 @@ set(LLVM_LINK_COMPONENTS add_bolt_unittest(CoreTests BinaryContext.cpp + ClusteredRows.cpp MCPlusBuilder.cpp MemoryMaps.cpp DynoStats.cpp diff --git a/bolt/unittests/Core/ClusteredRows.cpp b/bolt/unittests/Core/ClusteredRows.cpp new file mode 100644 index 000000000000..4665022c91fd --- /dev/null +++ b/bolt/unittests/Core/ClusteredRows.cpp @@ -0,0 +1,141 @@ +//===- bolt/unittest/Core/ClusteredRows.cpp ------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "bolt/Core/DebugData.h" +#include "llvm/Support/SMLoc.h" +#include "gtest/gtest.h" +#include + +using namespace llvm; +using namespace llvm::bolt; + +namespace { + +class ClusteredRowsTest : public ::testing::Test { +protected: + void SetUp() override { + Container = std::make_unique(); + } + + std::unique_ptr Container; +}; + +TEST_F(ClusteredRowsTest, CreateSingleElement) { + ClusteredRows *CR = Container->createClusteredRows(1); + ASSERT_NE(CR, nullptr); + EXPECT_EQ(CR->size(), 1u); + + // Test population with single element + std::vector TestRefs = {{42, 100}}; + CR->populate(TestRefs); + + ArrayRef Rows = CR->getRows(); + EXPECT_EQ(Rows.size(), 1u); + EXPECT_EQ(Rows[0].DwCompileUnitIndex, 42u); + EXPECT_EQ(Rows[0].RowIndex, 100u); +} + +TEST_F(ClusteredRowsTest, CreateMultipleElements) { + ClusteredRows *CR = Container->createClusteredRows(3); + ASSERT_NE(CR, nullptr); + EXPECT_EQ(CR->size(), 3u); + + // Test population with multiple elements + std::vector TestRefs = {{10, 20}, {30, 40}, {50, 60}}; + CR->populate(TestRefs); + + ArrayRef Rows = CR->getRows(); + EXPECT_EQ(Rows.size(), 3u); + + EXPECT_EQ(Rows[0].DwCompileUnitIndex, 10u); + EXPECT_EQ(Rows[0].RowIndex, 20u); + + EXPECT_EQ(Rows[1].DwCompileUnitIndex, 30u); + EXPECT_EQ(Rows[1].RowIndex, 40u); + + EXPECT_EQ(Rows[2].DwCompileUnitIndex, 50u); + EXPECT_EQ(Rows[2].RowIndex, 60u); +} + +TEST_F(ClusteredRowsTest, SMLoc_Conversion) { + ClusteredRows *CR = Container->createClusteredRows(2); + ASSERT_NE(CR, nullptr); + + // Test SMLoc conversion + SMLoc Loc = CR->toSMLoc(); + EXPECT_TRUE(Loc.isValid()); + + // Test round-trip conversion + const ClusteredRows *CR2 = ClusteredRows::fromSMLoc(Loc); + EXPECT_EQ(CR, CR2); + EXPECT_EQ(CR2->size(), 2u); +} + +TEST_F(ClusteredRowsTest, PopulateWithArrayRef) { + ClusteredRows *CR = Container->createClusteredRows(4); + ASSERT_NE(CR, nullptr); + + // Test population with ArrayRef + DebugLineTableRowRef TestArray[] = {{1, 2}, {3, 4}, {5, 6}, {7, 8}}; + ArrayRef TestRefs(TestArray, 4); + CR->populate(TestRefs); + + ArrayRef Rows = CR->getRows(); + EXPECT_EQ(Rows.size(), 4u); + + for (size_t i = 0; i < 4; ++i) { + EXPECT_EQ(Rows[i].DwCompileUnitIndex, TestArray[i].DwCompileUnitIndex); + EXPECT_EQ(Rows[i].RowIndex, TestArray[i].RowIndex); + } +} + +TEST_F(ClusteredRowsTest, MultipleClusteredRows) { + // Test creating multiple ClusteredRows objects + ClusteredRows *CR1 = Container->createClusteredRows(2); + ClusteredRows *CR2 = Container->createClusteredRows(3); + ClusteredRows *CR3 = Container->createClusteredRows(1); + + ASSERT_NE(CR1, nullptr); + ASSERT_NE(CR2, nullptr); + ASSERT_NE(CR3, nullptr); + + // Ensure they are different objects + EXPECT_NE(CR1, CR2); + EXPECT_NE(CR2, CR3); + EXPECT_NE(CR1, CR3); + + // Verify sizes + EXPECT_EQ(CR1->size(), 2u); + EXPECT_EQ(CR2->size(), 3u); + EXPECT_EQ(CR3->size(), 1u); + + // Populate each with different data + std::vector TestRefs1 = {{100, 200}, {300, 400}}; + std::vector TestRefs2 = {{10, 20}, {30, 40}, {50, 60}}; + std::vector TestRefs3 = {{999, 888}}; + + CR1->populate(TestRefs1); + CR2->populate(TestRefs2); + CR3->populate(TestRefs3); + + // Verify data integrity + ArrayRef Rows1 = CR1->getRows(); + ArrayRef Rows2 = CR2->getRows(); + ArrayRef Rows3 = CR3->getRows(); + + EXPECT_EQ(Rows1[0].DwCompileUnitIndex, 100u); + EXPECT_EQ(Rows1[1].RowIndex, 400u); + + EXPECT_EQ(Rows2[1].DwCompileUnitIndex, 30u); + EXPECT_EQ(Rows2[2].RowIndex, 60u); + + EXPECT_EQ(Rows3[0].DwCompileUnitIndex, 999u); + EXPECT_EQ(Rows3[0].RowIndex, 888u); +} + +} // namespace