[lldb/crashlog] Load inlined symbol into interactive crashlog

Sometimes, crash reports come with inlined symbols. These provide the exact stacktrace from the user binary. However, when investigating a crash, it's very likely that the images related to the crashed thread are not available on the debugging user system or that the versions don't match. This causes interactive crashlog to show a degraded backtrace in lldb. This patch aims to address that issue, by parsing the inlined symbols from the crash report and load them into lldb's target. This patch is a follow-up to 27f27d1, focusing on inlined symbols loading from legacy (non-json) crash reports. To do so, it updates the stack frame regular expression to make the capture groups more granular, to be able to extract the symbol name, the offset and the source location if available, while making it more maintainable. So now, when parsing the crash report, we build a data structure containing all the symbol information for each stackframe. Then, after launching the scripted process for interactive mode, we write a JSON symbol file for each module, only containing the symbols that it contains. Finally, we load the json symbol file into lldb, before showing the user the process status and backtrace. rdar://97345586 Differential Revision: https://reviews.llvm.org/D146765 Signed-off-by: Med Ismail Bennani <ismail@bennani.ma>
2026-01-15 12:25:46 +08:00 · 2023-05-19 20:05:05 -07:00
parent 34d8cd1538
commit dfdd898862
4 changed files with 314 additions and 23 deletions
--- a/lldb/examples/python/crashlog.py
+++ b/lldb/examples/python/crashlog.py
@@ -432,6 +432,8 @@ class CrashLogParser:
        self.path = os.path.expanduser(path)
        self.verbose = verbose
        self.crashlog = CrashLog(debugger, self.path, self.verbose)
+        # List of DarwinImages sorted by their index.
+        self.images = list()

    @abc.abstractmethod
    def parse(self):
@@ -459,8 +461,6 @@ class JSONCrashLogParser(CrashLogParser):

    def __init__(self, debugger, path, verbose):
        super().__init__(debugger, path, verbose)
-        # List of DarwinImages sorted by their index.
-        self.images = list()

    def parse(self):
        try:
@@ -603,14 +603,45 @@ class JSONCrashLogParser(CrashLogParser):
                print("error: can't parse application specific backtrace.")
                return False

-            (frame_id, frame_img_name, frame_addr,
-                frame_ofs) = frame_match.groups()
+            frame_id = frame_img_name = frame_addr = frame_symbol = frame_offset = frame_file = frame_line = frame_column = None
+
+            if len(frame_match.groups()) == 3:
+                # Get the image UUID from the frame image name.
+                (frame_id, frame_img_name, frame_addr) = frame_match.groups()
+            elif len(frame_match.groups()) == 5:
+                (frame_id, frame_img_name, frame_addr,
+                        frame_symbol, frame_offset) = frame_match.groups()
+            elif len(frame_match.groups()) == 7:
+                (frame_id, frame_img_name, frame_addr,
+                        frame_symbol, frame_offset,
+                        frame_file, frame_line) = frame_match.groups()
+            elif len(frame_match.groups()) == 8:
+                (frame_id, frame_img_name, frame_addr,
+                        frame_symbol, frame_offset,
+                        frame_file, frame_line, frame_column) = frame_match.groups()

            thread.add_ident(frame_img_name)
            if frame_img_name not in self.crashlog.idents:
                self.crashlog.idents.append(frame_img_name)
-            thread.frames.append(self.crashlog.Frame(int(frame_id), int(
-                frame_addr, 0), frame_ofs))
+
+            description = ""
+            if frame_img_name and frame_addr and frame_symbol:
+                description = frame_symbol
+                frame_offset_value = 0
+                if frame_offset:
+                    description += " + " + frame_offset
+                    frame_offset_value = int(frame_offset, 0)
+                for image in self.images:
+                    if image.identifier == frame_img_name:
+                        image.symbols[frame_symbol] = {
+                            "name": frame_symbol,
+                            "type": "code",
+                            "address": int(frame_addr, 0) - frame_offset_value,
+                        }
+
+            thread.frames.append(
+                self.crashlog.Frame(int(frame_id), int(frame_addr, 0), description)
+            )

        return True

@@ -657,19 +688,48 @@ class TextCrashLogParser(CrashLogParser):
    thread_instrs_regex = re.compile(r'^Thread \d+ instruction stream')
    thread_regex = re.compile(r'^Thread (\d+).*:')
    app_backtrace_regex = re.compile(r'^Application Specific Backtrace (\d+).*:')
-    version = r'\(.+\)|(?:arm|x86_)[0-9a-z]+'
-    frame_regex = re.compile(r'^(\d+)\s+'              # id
-                             r'(.+?)\s+'               # img_name
-                             r'(?:' +version+ r'\s+)?' # img_version
-                             r'(0x[0-9a-fA-F]{4,})'    # addr (4 chars or more)
-                             r'(?: +(.*))?'            # offs
+
+    class VersionRegex:
+        version = r'\(.+\)|(?:arm|x86_)[0-9a-z]+'
+
+    class FrameRegex(VersionRegex):
+        @classmethod
+        def get(cls):
+            index    = r'^(\d+)\s+'
+            img_name = r'(.+?)\s+'
+            version  = r'(?:' + super().version + r'\s+)?'
+            address  = r'(0x[0-9a-fA-F]{4,})' # 4 digits or more
+
+            symbol   = """
+                        (?:
+                            [ ]+
+                            (?P<symbol>.+)
+                            (?:
+                                [ ]\+[ ]
+                                (?P<symbol_offset>\d+)
                            )
+                            (?:
+                                [ ]\(
+                                (?P<file_name>[^:]+):(?P<line_number>\d+)
+                                (?:
+                                    :(?P<column_num>\d+)
+                                )?
+                            )?
+                        )?
+                       """
+
+            return re.compile(index + img_name + version + address + symbol,
+                              flags=re.VERBOSE)
+
+    frame_regex = FrameRegex.get()
    null_frame_regex = re.compile(r'^\d+\s+\?\?\?\s+0{4,} +')
    image_regex_uuid = re.compile(r'(0x[0-9a-fA-F]+)'          # img_lo
                                  r'\s+-\s+'                   #   -
                                  r'(0x[0-9a-fA-F]+)\s+'       # img_hi
                                  r'[+]?(.+?)\s+'              # img_name
-                                  r'(?:(' +version+ r')\s+)?'  # img_version
+                                  r'(?:(' +
+                                  VersionRegex.version +         # img_version
+                                  r')\s+)?'
                                  r'(?:<([-0-9a-fA-F]+)>\s+)?' # img_uuid
                                  r'(\?+|/.*)'                 # img_path
                                 )
@@ -690,6 +750,7 @@ class TextCrashLogParser(CrashLogParser):
            CrashLogParseMode.SYSTEM : self.parse_system,
            CrashLogParseMode.INSTRS : self.parse_instructions,
        }
+        self.symbols = {}

    def parse(self):
        with open(self.path,'r', encoding='utf-8') as f:
@@ -844,29 +905,76 @@ class TextCrashLogParser(CrashLogParser):
            print('warning: thread parser ignored null-frame: "%s"' % line)
            return
        frame_match = self.frame_regex.search(line)
-        if frame_match:
-            (frame_id, frame_img_name, frame_addr,
-                frame_ofs) = frame_match.groups()
-            ident = frame_img_name
-            self.thread.add_ident(ident)
-            if ident not in self.crashlog.idents:
-                self.crashlog.idents.append(ident)
-            self.thread.frames.append(self.crashlog.Frame(int(frame_id), int(
-                frame_addr, 0), frame_ofs))
-        else:
+        if not frame_match:
            print('error: frame regex failed for line: "%s"' % line)
+            return
+
+        frame_id = frame_img_name = frame_addr = frame_symbol = frame_offset = frame_file = frame_line = frame_column = None
+
+        if len(frame_match.groups()) == 3:
+            # Get the image UUID from the frame image name.
+            (frame_id, frame_img_name, frame_addr) = frame_match.groups()
+        elif len(frame_match.groups()) == 5:
+            (frame_id, frame_img_name, frame_addr,
+                    frame_symbol, frame_offset) = frame_match.groups()
+        elif len(frame_match.groups()) == 7:
+            (frame_id, frame_img_name, frame_addr,
+                    frame_symbol, frame_offset,
+                    frame_file, frame_line) = frame_match.groups()
+        elif len(frame_match.groups()) == 8:
+            (frame_id, frame_img_name, frame_addr,
+                    frame_symbol, frame_offset,
+                    frame_file, frame_line, frame_column) = frame_match.groups()
+
+        self.thread.add_ident(frame_img_name)
+        if frame_img_name not in self.crashlog.idents:
+            self.crashlog.idents.append(frame_img_name)
+
+        description = ""
+        # Since images are parsed after threads, we need to build a
+        # map for every image with a list of all the symbols and addresses
+        if frame_img_name and frame_addr and frame_symbol:
+            description = frame_symbol
+            frame_offset_value = 0
+            if frame_offset:
+                description += " + " + frame_offset
+                frame_offset_value = int(frame_offset, 0)
+            if frame_img_name not in self.symbols:
+                self.symbols[frame_img_name] = list()
+            self.symbols[frame_img_name].append(
+                {
+                    "name": frame_symbol,
+                    "address": int(frame_addr, 0) - frame_offset_value,
+                }
+            )
+
+        self.thread.frames.append(
+            self.crashlog.Frame(int(frame_id), int(frame_addr, 0), description)
+        )

    def parse_images(self, line):
        image_match = self.image_regex_uuid.search(line)
        if image_match:
            (img_lo, img_hi, img_name, img_version,
                img_uuid, img_path) = image_match.groups()
+
            image = self.crashlog.DarwinImage(int(img_lo, 0), int(img_hi, 0),
                                            img_name.strip(),
                                            img_version.strip()
                                            if img_version else "",
                                            uuid.UUID(img_uuid), img_path,
                                            self.verbose)
+            unqualified_img_name = os.path.basename(img_path)
+            if unqualified_img_name in self.symbols:
+                for symbol in self.symbols[unqualified_img_name]:
+                    image.symbols[symbol["name"]] = {
+                        "name": symbol["name"],
+                        "type": "code",
+                        # NOTE: "address" is actually the symbol image offset
+                        "address": symbol["address"] - int(img_lo, 0),
+                    }
+
+            self.images.append(image)
            self.crashlog.images.append(image)
        else:
            print("error: image regex failed for: %s" % line)