From 2a3c08f620fc89823ebf1d2af4ea0beb97671db2 Mon Sep 17 00:00:00 2001 From: Pavel Labath Date: Tue, 12 Nov 2024 09:34:53 +0100 Subject: [PATCH] [lldb] (Begin to) support discontinuous lldb_private::Functions (#115730) This is the beginning of a different, more fundamental approach to handling. This PR tries to tries to minimize functional changes. It only makes sure that we store the true set of ranges inside the function object, so that subsequent patches can make use of it. --- lldb/include/lldb/Symbol/Function.h | 8 +- .../Breakpad/SymbolFileBreakpad.cpp | 2 +- .../Plugins/SymbolFile/CTF/SymbolFileCTF.cpp | 2 +- .../Plugins/SymbolFile/DWARF/DWARFASTParser.h | 2 +- .../SymbolFile/DWARF/DWARFASTParserClang.cpp | 24 ++- .../SymbolFile/DWARF/DWARFASTParserClang.h | 2 +- .../SymbolFile/DWARF/SymbolFileDWARF.cpp | 34 ++-- .../NativePDB/SymbolFileNativePDB.cpp | 2 +- .../Plugins/SymbolFile/PDB/SymbolFilePDB.cpp | 6 +- .../SymbolFile/Symtab/SymbolFileSymtab.cpp | 2 +- lldb/source/Symbol/Function.cpp | 39 ++-- .../DWARF/x86/discontinuous-function.s | 168 ++++++++++++++++++ 12 files changed, 233 insertions(+), 58 deletions(-) create mode 100644 lldb/test/Shell/SymbolFile/DWARF/x86/discontinuous-function.s diff --git a/lldb/include/lldb/Symbol/Function.h b/lldb/include/lldb/Symbol/Function.h index 8255c2baea77..bbfc25fe74ea 100644 --- a/lldb/include/lldb/Symbol/Function.h +++ b/lldb/include/lldb/Symbol/Function.h @@ -428,7 +428,7 @@ public: /// The section offset based address for this function. Function(CompileUnit *comp_unit, lldb::user_id_t func_uid, lldb::user_id_t func_type_uid, const Mangled &mangled, - Type *func_type, const AddressRange &range); + Type *func_type, AddressRanges ranges); /// Destructor. ~Function() override; @@ -649,8 +649,12 @@ protected: /// All lexical blocks contained in this function. Block m_block; + /// List of address ranges belonging to the function. + AddressRanges m_ranges; + /// The function address range that covers the widest range needed to contain - /// all blocks + /// all blocks. DEPRECATED: do not use this field in new code as the range may + /// include addresses belonging to other functions. AddressRange m_range; /// The frame base expression for variables that are relative to the frame diff --git a/lldb/source/Plugins/SymbolFile/Breakpad/SymbolFileBreakpad.cpp b/lldb/source/Plugins/SymbolFile/Breakpad/SymbolFileBreakpad.cpp index 9e78ba8174e3..fadc19676609 100644 --- a/lldb/source/Plugins/SymbolFile/Breakpad/SymbolFileBreakpad.cpp +++ b/lldb/source/Plugins/SymbolFile/Breakpad/SymbolFileBreakpad.cpp @@ -255,7 +255,7 @@ FunctionSP SymbolFileBreakpad::GetOrCreateFunction(CompileUnit &comp_unit) { section_sp, address - section_sp->GetFileAddress(), record->Size); // Use the CU's id because every CU has only one function inside. func_sp = std::make_shared(&comp_unit, id, 0, func_name, - nullptr, func_range); + nullptr, AddressRanges{func_range}); comp_unit.AddFunction(func_sp); } } diff --git a/lldb/source/Plugins/SymbolFile/CTF/SymbolFileCTF.cpp b/lldb/source/Plugins/SymbolFile/CTF/SymbolFileCTF.cpp index bb738c3dcc54..15e8d38e7f33 100644 --- a/lldb/source/Plugins/SymbolFile/CTF/SymbolFileCTF.cpp +++ b/lldb/source/Plugins/SymbolFile/CTF/SymbolFileCTF.cpp @@ -829,7 +829,7 @@ size_t SymbolFileCTF::ParseFunctions(CompileUnit &cu) { lldb::user_id_t func_uid = m_functions.size(); FunctionSP function_sp = std::make_shared( &cu, func_uid, function_type_uid, symbol->GetMangled(), type_sp.get(), - func_range); + AddressRanges{func_range}); m_functions.emplace_back(function_sp); cu.AddFunction(function_sp); } diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.h index 971cbe47fb70..80f7becc1b24 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.h @@ -42,7 +42,7 @@ public: virtual Function *ParseFunctionFromDWARF(CompileUnit &comp_unit, const DWARFDIE &die, - const AddressRange &range) = 0; + AddressRanges ranges) = 0; virtual bool CompleteTypeFromDWARF(const DWARFDIE &die, Type *type, const CompilerType &compiler_type) = 0; diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp index a30d898a93cc..b69b433fdeb6 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp @@ -2340,12 +2340,9 @@ DWARFASTParserClang::ConstructDemangledNameFromDWARF(const DWARFDIE &die) { return ConstString(sstr.GetString()); } -Function * -DWARFASTParserClang::ParseFunctionFromDWARF(CompileUnit &comp_unit, - const DWARFDIE &die, - const AddressRange &func_range) { - assert(func_range.GetBaseAddress().IsValid()); - DWARFRangeList func_ranges; +Function *DWARFASTParserClang::ParseFunctionFromDWARF( + CompileUnit &comp_unit, const DWARFDIE &die, AddressRanges func_ranges) { + DWARFRangeList unused_func_ranges; const char *name = nullptr; const char *mangled = nullptr; std::optional decl_file; @@ -2361,9 +2358,9 @@ DWARFASTParserClang::ParseFunctionFromDWARF(CompileUnit &comp_unit, if (tag != DW_TAG_subprogram) return nullptr; - if (die.GetDIENamesAndRanges(name, mangled, func_ranges, decl_file, decl_line, - decl_column, call_file, call_line, call_column, - &frame_base)) { + if (die.GetDIENamesAndRanges(name, mangled, unused_func_ranges, decl_file, + decl_line, decl_column, call_file, call_line, + call_column, &frame_base)) { Mangled func_name; if (mangled) func_name.SetValue(ConstString(mangled)); @@ -2395,11 +2392,10 @@ DWARFASTParserClang::ParseFunctionFromDWARF(CompileUnit &comp_unit, assert(func_type == nullptr || func_type != DIE_IS_BEING_PARSED); const user_id_t func_user_id = die.GetID(); - func_sp = - std::make_shared(&comp_unit, - func_user_id, // UserID is the DIE offset - func_user_id, func_name, func_type, - func_range); // first address range + func_sp = std::make_shared( + &comp_unit, + func_user_id, // UserID is the DIE offset + func_user_id, func_name, func_type, std::move(func_ranges)); if (func_sp.get() != nullptr) { if (frame_base.IsValid()) diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h index 1ffb09b2fc19..6c3aac6d452e 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h @@ -59,7 +59,7 @@ public: lldb_private::Function * ParseFunctionFromDWARF(lldb_private::CompileUnit &comp_unit, const lldb_private::plugin::dwarf::DWARFDIE &die, - const lldb_private::AddressRange &func_range) override; + lldb_private::AddressRanges func_ranges) override; bool CompleteTypeFromDWARF( const lldb_private::plugin::dwarf::DWARFDIE &die, diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp index e19c490bf44d..666595a6d063 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp @@ -918,32 +918,20 @@ Function *SymbolFileDWARF::ParseFunction(CompileUnit &comp_unit, if (!dwarf_ast) return nullptr; - DWARFRangeList ranges = die.GetDIE()->GetAttributeAddressRanges( - die.GetCU(), /*check_hi_lo_pc=*/true); - if (ranges.IsEmpty()) - return nullptr; - - // Union of all ranges in the function DIE (if the function is - // discontiguous) - lldb::addr_t lowest_func_addr = ranges.GetMinRangeBase(0); - lldb::addr_t highest_func_addr = ranges.GetMaxRangeEnd(0); - if (lowest_func_addr == LLDB_INVALID_ADDRESS || - lowest_func_addr >= highest_func_addr || - lowest_func_addr < m_first_code_address) - return nullptr; - + AddressRanges ranges; ModuleSP module_sp(die.GetModule()); - AddressRange func_range; - func_range.GetBaseAddress().ResolveAddressUsingFileSections( - lowest_func_addr, module_sp->GetSectionList()); - if (!func_range.GetBaseAddress().IsValid()) + for (const auto &range : die.GetDIE()->GetAttributeAddressRanges( + die.GetCU(), /*check_hi_lo_pc=*/true)) { + if (range.base < m_first_code_address) + continue; + if (Address base_addr(range.base, module_sp->GetSectionList()); + base_addr.IsValid() && FixupAddress(base_addr)) + ranges.emplace_back(std::move(base_addr), range.size); + } + if (ranges.empty()) return nullptr; - func_range.SetByteSize(highest_func_addr - lowest_func_addr); - if (!FixupAddress(func_range.GetBaseAddress())) - return nullptr; - - return dwarf_ast->ParseFunctionFromDWARF(comp_unit, die, func_range); + return dwarf_ast->ParseFunctionFromDWARF(comp_unit, die, std::move(ranges)); } ConstString diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp index c784c2e28f64..c0416b4d0681 100644 --- a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp +++ b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp @@ -504,7 +504,7 @@ lldb::FunctionSP SymbolFileNativePDB::CreateFunction(PdbCompilandSymId func_id, Mangled mangled(proc.Name); FunctionSP func_sp = std::make_shared( &comp_unit, toOpaqueUid(func_id), toOpaqueUid(sig_id), mangled, - func_type.get(), func_range); + func_type.get(), AddressRanges{func_range}); comp_unit.AddFunction(func_sp); diff --git a/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp b/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp index 4fc48b4d1333..c7b23aedfdcc 100644 --- a/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp +++ b/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp @@ -310,9 +310,9 @@ SymbolFilePDB::ParseCompileUnitFunctionForPDBFunc(const PDBSymbolFunc &pdb_func, Mangled mangled = GetMangledForPDBFunc(pdb_func); - FunctionSP func_sp = - std::make_shared(&comp_unit, pdb_func.getSymIndexId(), - func_type_uid, mangled, func_type, func_range); + FunctionSP func_sp = std::make_shared( + &comp_unit, pdb_func.getSymIndexId(), func_type_uid, mangled, func_type, + AddressRanges{func_range}); comp_unit.AddFunction(func_sp); diff --git a/lldb/source/Plugins/SymbolFile/Symtab/SymbolFileSymtab.cpp b/lldb/source/Plugins/SymbolFile/Symtab/SymbolFileSymtab.cpp index 8c17017442b1..e1e11d274d6e 100644 --- a/lldb/source/Plugins/SymbolFile/Symtab/SymbolFileSymtab.cpp +++ b/lldb/source/Plugins/SymbolFile/Symtab/SymbolFileSymtab.cpp @@ -186,7 +186,7 @@ size_t SymbolFileSymtab::ParseFunctions(CompileUnit &comp_unit) { // for this function curr_symbol->GetMangled(), // Linker/mangled name nullptr, // no return type for a code symbol... - func_range)); // first address range + AddressRanges{func_range})); if (func_sp.get() != nullptr) { comp_unit.AddFunction(func_sp); diff --git a/lldb/source/Symbol/Function.cpp b/lldb/source/Symbol/Function.cpp index 04766e26674c..f590cf4632bc 100644 --- a/lldb/source/Symbol/Function.cpp +++ b/lldb/source/Symbol/Function.cpp @@ -254,12 +254,32 @@ Function *IndirectCallEdge::GetCallee(ModuleList &images, /// @} +AddressRange CollapseRanges(llvm::ArrayRef ranges) { + if (ranges.empty()) + return AddressRange(); + if (ranges.size() == 1) + return ranges[0]; + + Address lowest_addr = ranges[0].GetBaseAddress(); + addr_t highest_addr = lowest_addr.GetFileAddress() + ranges[0].GetByteSize(); + for (const AddressRange &range : ranges.drop_front()) { + Address range_begin = range.GetBaseAddress(); + addr_t range_end = range_begin.GetFileAddress() + range.GetByteSize(); + if (range_begin.GetFileAddress() < lowest_addr.GetFileAddress()) + lowest_addr = range_begin; + if (range_end > highest_addr) + highest_addr = range_end; + } + return AddressRange(lowest_addr, highest_addr - lowest_addr.GetFileAddress()); +} + // Function::Function(CompileUnit *comp_unit, lldb::user_id_t func_uid, lldb::user_id_t type_uid, const Mangled &mangled, Type *type, - const AddressRange &range) + AddressRanges ranges) : UserID(func_uid), m_comp_unit(comp_unit), m_type_uid(type_uid), - m_type(type), m_mangled(mangled), m_block(func_uid), m_range(range), + m_type(type), m_mangled(mangled), m_block(func_uid), + m_ranges(std::move(ranges)), m_range(CollapseRanges(m_ranges)), m_frame_base(), m_flags(), m_prologue_byte_size(0) { m_block.SetParentScope(this); assert(comp_unit != nullptr); @@ -406,14 +426,13 @@ void Function::GetDescription(Stream *s, lldb::DescriptionLevel level, llvm::interleaveComma(decl_context, *s, [&](auto &ctx) { ctx.Dump(*s); }); *s << "}"; } - *s << ", range = "; - Address::DumpStyle fallback_style; - if (level == eDescriptionLevelVerbose) - fallback_style = Address::DumpStyleModuleWithFileAddress; - else - fallback_style = Address::DumpStyleFileAddress; - GetAddressRange().Dump(s, target, Address::DumpStyleLoadAddress, - fallback_style); + *s << ", range" << (m_ranges.size() > 1 ? "s" : "") << " = "; + Address::DumpStyle fallback_style = + level == eDescriptionLevelVerbose + ? Address::DumpStyleModuleWithFileAddress + : Address::DumpStyleFileAddress; + for (const AddressRange &range : m_ranges) + range.Dump(s, target, Address::DumpStyleLoadAddress, fallback_style); } void Function::Dump(Stream *s, bool show_context) const { diff --git a/lldb/test/Shell/SymbolFile/DWARF/x86/discontinuous-function.s b/lldb/test/Shell/SymbolFile/DWARF/x86/discontinuous-function.s new file mode 100644 index 000000000000..2584158207cc --- /dev/null +++ b/lldb/test/Shell/SymbolFile/DWARF/x86/discontinuous-function.s @@ -0,0 +1,168 @@ +# An example of a function which has been split into two parts. Roughly +# corresponds to this C code. +# int baz(); +# int bar() { return 47; } +# int foo(int flag) { return flag ? bar() : baz(); } +# The function bar has been placed "in the middle" of foo. + +# RUN: llvm-mc -triple x86_64-pc-linux -filetype=obj %s -o %t +# RUN: %lldb %t -o "image lookup -v -n foo" -o exit | FileCheck %s + +# CHECK: 1 match found in {{.*}} +# CHECK: Summary: {{.*}}`foo +# CHECK: Function: id = {{.*}}, name = "foo", ranges = [0x0000000000000000-0x0000000000000007)[0x0000000000000007-0x000000000000000e)[0x0000000000000014-0x000000000000001b)[0x000000000000001b-0x000000000000001c) + + .text + + .type foo,@function +foo: + .cfi_startproc + cmpl $0, %edi + je foo.__part.2 + jmp foo.__part.1 + .cfi_endproc +.Lfoo_end: + .size foo, .Lfoo_end-foo + +foo.__part.1: + .cfi_startproc + callq bar + jmp foo.__part.3 +.Lfoo.__part.1_end: + .size foo.__part.1, .Lfoo.__part.1_end-foo.__part.1 + .cfi_endproc + +bar: + .cfi_startproc + movl $47, %eax + retq + .cfi_endproc +.Lbar_end: + .size bar, .Lbar_end-bar + +foo.__part.2: + .cfi_startproc + callq baz + jmp foo.__part.3 +.Lfoo.__part.2_end: + .size foo.__part.2, .Lfoo.__part.2_end-foo.__part.2 + .cfi_endproc + +foo.__part.3: + .cfi_startproc + retq +.Lfoo.__part.3_end: + .size foo.__part.3, .Lfoo.__part.3_end-foo.__part.3 + .cfi_endproc + + + .section .debug_abbrev,"",@progbits + .byte 1 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 1 # DW_CHILDREN_yes + .byte 37 # DW_AT_producer + .byte 8 # DW_FORM_string + .byte 19 # DW_AT_language + .byte 5 # DW_FORM_data2 + .byte 17 # DW_AT_low_pc + .byte 1 # DW_FORM_addr + .byte 85 # DW_AT_ranges + .byte 35 # DW_FORM_rnglistx + .byte 116 # DW_AT_rnglists_base + .byte 23 # DW_FORM_sec_offset + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 2 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 0 # DW_CHILDREN_no + .byte 17 # DW_AT_low_pc + .byte 1 # DW_FORM_addr + .byte 18 # DW_AT_high_pc + .byte 1 # DW_FORM_addr + .byte 3 # DW_AT_name + .byte 8 # DW_FORM_string + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 3 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 0 # DW_CHILDREN_no + .byte 85 # DW_AT_ranges + .byte 35 # DW_FORM_rnglistx + .byte 64 # DW_AT_frame_base + .byte 24 # DW_FORM_exprloc + .byte 3 # DW_AT_name + .byte 8 # DW_FORM_string + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + + .section .debug_info,"",@progbits +.Lcu_begin0: + .long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit +.Ldebug_info_start0: + .short 5 # DWARF version number + .byte 1 # DWARF Unit Type + .byte 8 # Address Size (in bytes) + .long .debug_abbrev # Offset Into Abbrev. Section + .byte 1 # Abbrev [1] DW_TAG_compile_unit + .asciz "Hand-written DWARF" # DW_AT_producer + .short 29 # DW_AT_language + .quad 0 # DW_AT_low_pc + .byte 1 # DW_AT_ranges + .long .Lrnglists_table_base0 # DW_AT_rnglists_base + .byte 2 # Abbrev [2] DW_TAG_subprogram + .quad bar # DW_AT_low_pc + .quad .Lbar_end # DW_AT_high_pc + .asciz "bar" # DW_AT_name + .byte 3 # Abbrev [3] DW_TAG_subprogram + .byte 0 # DW_AT_ranges + .byte 1 # DW_AT_frame_base + .byte 86 + .asciz "foo" # DW_AT_name + .byte 0 # End Of Children Mark +.Ldebug_info_end0: + + .section .debug_rnglists,"",@progbits + .long .Ldebug_list_header_end0-.Ldebug_list_header_start0 # Length +.Ldebug_list_header_start0: + .short 5 # Version + .byte 8 # Address size + .byte 0 # Segment selector size + .long 2 # Offset entry count +.Lrnglists_table_base0: + .long .Ldebug_ranges0-.Lrnglists_table_base0 + .long .Ldebug_ranges1-.Lrnglists_table_base0 +.Ldebug_ranges0: + .byte 6 # DW_RLE_start_end + .quad foo + .quad .Lfoo_end + .byte 6 # DW_RLE_start_end + .quad foo.__part.1 + .quad .Lfoo.__part.1_end + .byte 6 # DW_RLE_start_end + .quad foo.__part.2 + .quad .Lfoo.__part.2_end + .byte 6 # DW_RLE_start_end + .quad foo.__part.3 + .quad .Lfoo.__part.3_end + .byte 0 # DW_RLE_end_of_list +.Ldebug_ranges1: + .byte 6 # DW_RLE_start_end + .quad bar + .quad .Lbar_end + .byte 6 # DW_RLE_start_end + .quad foo.__part.1 + .quad .Lfoo.__part.1_end + .byte 6 # DW_RLE_start_end + .quad foo.__part.2 + .quad .Lfoo.__part.2_end + .byte 6 # DW_RLE_start_end + .quad foo.__part.3 + .quad .Lfoo.__part.3_end + .byte 6 # DW_RLE_start_end + .quad foo + .quad .Lfoo_end + .byte 0 # DW_RLE_end_of_list +.Ldebug_list_header_end0: + + .section ".note.GNU-stack","",@progbits