contrib/plugins/uftrace_symbols.py: generate debug files to map symbols to source

Enhance uftrace_symbols.py to generate .dbg files, containing source location for every symbol present in .sym file. It allows to use `uftrace {replay,dump} --srcline` and show origin of functions, connecting trace to original source code. It was first implemented with pyelftools DWARF parser, which was way too slow (~minutes) to get locations for every symbol in the linux kernel. Thus, we use `addr2line` instead, which runs in seconds. As well, there were some bugs with latest pyelftools release, requiring to run master version, which is not installable with pip. Thus, since we now require binutils (addr2line), we can ditch pyelftools based implementation and simply rely on `nm` to get symbols information, which is faster and better. Signed-off-by: Pierrick Bouvier <pierrick.bouvier@linaro.org> Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> Message-ID: <20251016150357.876415-8-alex.bennee@linaro.org> Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
2025-11-03 08:00:38 +08:00 · 2025-10-16 16:03:52 +01:00
parent 9f714c4b20
commit 8a545a336d
1 changed files with 76 additions and 40 deletions
--- a/contrib/plugins/uftrace_symbols.py
+++ b/contrib/plugins/uftrace_symbols.py
@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 #
-# Create symbols and mapping files for uftrace.
+# Create symbols, debug and mapping files for uftrace.
 #
 # Copyright 2025 Linaro Ltd
 # Author: Pierrick Bouvier <pierrick.bouvier@linaro.org>
@ -9,44 +9,71 @@
 # SPDX-License-Identifier: GPL-2.0-or-later
 import argparse
 import elftools # pip install pyelftools
 import os
 import subprocess
-from elftools.elf.elffile import ELFFile
+class Symbol:
-from elftools.elf.sections import SymbolTableSection
+    def __init__(self, name, addr, size):
        self.name = name
        # clamp addr to 48 bits, like uftrace entries
        self.addr = addr & 0xffffffffffff
        self.full_addr = addr
        self.size = size
-def elf_func_symbols(elf):
+    def set_loc(self, file, line):
-    symbol_tables = [(idx, s) for idx, s in enumerate(elf.iter_sections())
+        self.file = file
-                  if isinstance(s, SymbolTableSection)]
+        self.line = line
-    symbols = []
+
-    for _, section in symbol_tables:
+def get_symbols(elf_file):
-        for _, symbol in enumerate(section.iter_symbols()):
+    symbols=[]
-            if symbol_size(symbol) == 0:
+    try:
-                continue
+        out = subprocess.check_output(['nm', '--print-size', elf_file],
-            type = symbol['st_info']['type']
+                                      stderr=subprocess.STDOUT,
-            if type == 'STT_FUNC' or type == 'STT_NOTYPE':
+                                      text=True)
-                symbols.append(symbol)
+    except subprocess.CalledProcessError as e:
-    symbols.sort(key = lambda x: symbol_addr(x))
+        print(e.output)
        raise
    out = out.strip().split('\n')
    for line in out:
        info = line.split(' ')
        if len(info) == 3:
            # missing size information
            continue
        addr, size, type, name = info
        # add only symbols from .text section
        if type.lower() != 't':
            continue
        addr = int(addr, 16)
        size = int(size, 16)
        symbols.append(Symbol(name, addr, size))
    symbols.sort(key = lambda x: x.addr)
    return symbols
-def symbol_size(symbol):
+def find_symbols_locations(elf_file, symbols):
-    return symbol['st_size']
+    addresses = '\n'.join([hex(x.full_addr) for x in symbols])
-
+    try:
-def symbol_addr(symbol):
+        out = subprocess.check_output(['addr2line', '--exe', elf_file],
-    addr = symbol['st_value']
+                                      stderr=subprocess.STDOUT,
-    # clamp addr to 48 bits, like uftrace entries
+                                      input=addresses, text=True)
-    return addr & 0xffffffffffff
+    except subprocess.CalledProcessError as e:
-
+        print(e.output)
-def symbol_name(symbol):
+        raise
-    return symbol.name
+    out = out.strip().split('\n')
    assert len(out) == len(symbols)
    for i in range(len(symbols)):
        s = symbols[i]
        file, line = out[i].split(':')
        # addr2line may return 'line (discriminator [0-9]+)' sometimes,
        # remove this to keep only line number.
        line = line.split(' ')[0]
        s.set_loc(file, line)
 class BinaryFile:
    def __init__(self, path, map_offset):
        self.fullpath = os.path.realpath(path)
        self.map_offset = map_offset
-        with open(path, 'rb') as f:
+        self.symbols = get_symbols(self.fullpath)
-            self.elf = ELFFile(f)
+        find_symbols_locations(self.fullpath, self.symbols)
            self.symbols = elf_func_symbols(self.elf)
    def path(self):
        return self.fullpath
@ -56,24 +83,31 @@ class BinaryFile:
    def addr_end(self):
        last_sym = self.symbols[-1]
-        return symbol_addr(last_sym) + symbol_size(last_sym) + self.map_offset
+        return last_sym.addr + last_sym.size + self.map_offset
    def generate_symbol_file(self, prefix_symbols):
        binary_name = os.path.basename(self.fullpath)
-        sym_file_path = f'./uftrace.data/{binary_name}.sym'
+        sym_file_path = os.path.join('uftrace.data', f'{binary_name}.sym')
        print(f'{sym_file_path} ({len(self.symbols)} symbols)')
        with open(sym_file_path, 'w') as sym_file:
            # print hexadecimal addresses on 48 bits
            addrx = "0>12x"
            for s in self.symbols:
-                addr = symbol_addr(s)
+                addr = s.addr
                addr = f'{addr:{addrx}}'
-                size = f'{symbol_size(s):{addrx}}'
+                size = f'{s.size:{addrx}}'
                name = symbol_name(s)
                if prefix_symbols:
-                    name = f'{binary_name}:{name}'
+                    name = f'{binary_name}:{s.name}'
                print(addr, size, 'T', name, file=sym_file)
    def generate_debug_file(self):
        binary_name = os.path.basename(self.fullpath)
        dbg_file_path = os.path.join('uftrace.data', f'{binary_name}.dbg')
        with open(dbg_file_path, 'w') as dbg_file:
            for s in self.symbols:
                print(f'F: {hex(s.addr)} {s.name}', file=dbg_file)
                print(f'L: {s.line} {s.file}', file=dbg_file)
 def parse_parameter(p):
    s = p.split(":")
    path = s[0]
@ -84,7 +118,7 @@ def parse_parameter(p):
    offset = s[1]
    if not offset.startswith('0x'):
        err = f'offset "{offset}" is not an hexadecimal constant. '
-        err += 'It should starts with "0x".'
+        err += 'It should start with "0x".'
        raise ValueError(err)
    offset = int(offset, 16)
    return path, offset
@ -97,7 +131,7 @@ def is_from_user_mode(map_file_path):
    return False
 def generate_map(binaries):
-    map_file_path = './uftrace.data/sid-0.map'
+    map_file_path = os.path.join('uftrace.data', 'sid-0.map')
    if is_from_user_mode(map_file_path):
        print(f'do not overwrite {map_file_path} generated from qemu-user')
@ -124,7 +158,8 @@ def generate_map(binaries):
 def main():
    parser = argparse.ArgumentParser(description=
-                                     'generate symbol files for uftrace')
+                                     'generate symbol files for uftrace. '
                                     'Require binutils (nm and addr2line).')
    parser.add_argument('elf_file', nargs='+',
                        help='path to an ELF file. '
                        'Use /path/to/file:0xdeadbeef to add a mapping offset.')
@ -133,8 +168,8 @@ def main():
                        action=argparse.BooleanOptionalAction)
    args = parser.parse_args()
-    if not os.path.exists('./uftrace.data'):
+    if not os.path.exists('uftrace.data'):
-        os.mkdir('./uftrace.data')
+        os.mkdir('uftrace.data')
    binaries = []
    for file in args.elf_file:
@ -145,6 +180,7 @@ def main():
    for b in binaries:
        b.generate_symbol_file(args.prefix_symbols)
        b.generate_debug_file()
    generate_map(binaries)