[lldb/crashlog] Load inlined symbol into interactive crashlog

Sometimes, crash reports come with inlined symbols. These provide the
exact stacktrace from the user binary.

However, when investigating a crash, it's very likely that the images related
to the crashed thread are not available on the debugging user system or
that the versions don't match. This causes interactive crashlog to show
a degraded backtrace in lldb.

This patch aims to address that issue, by parsing the inlined symbols
from the crash report and load them into lldb's target.

This patch is a follow-up to 27f27d1, focusing on inlined symbols
loading from legacy (non-json) crash reports.

To do so, it updates the stack frame regular expression to make the
capture groups more granular, to be able to extract the symbol name, the
offset and the source location if available, while making it more
maintainable.

So now, when parsing the crash report, we build a data structure
containing all the symbol information for each stackframe. Then, after
launching the scripted process for interactive mode, we write a JSON
symbol file for each module, only containing the symbols that it contains.

Finally, we load the json symbol file into lldb, before showing the user
the process status and backtrace.

rdar://97345586

Differential Revision: https://reviews.llvm.org/D146765

Signed-off-by: Med Ismail Bennani <ismail@bennani.ma>
This commit is contained in:
Med Ismail Bennani
2023-05-19 20:05:05 -07:00
parent 34d8cd1538
commit dfdd898862
4 changed files with 314 additions and 23 deletions

View File

@@ -432,6 +432,8 @@ class CrashLogParser:
self.path = os.path.expanduser(path)
self.verbose = verbose
self.crashlog = CrashLog(debugger, self.path, self.verbose)
# List of DarwinImages sorted by their index.
self.images = list()
@abc.abstractmethod
def parse(self):
@@ -459,8 +461,6 @@ class JSONCrashLogParser(CrashLogParser):
def __init__(self, debugger, path, verbose):
super().__init__(debugger, path, verbose)
# List of DarwinImages sorted by their index.
self.images = list()
def parse(self):
try:
@@ -603,14 +603,45 @@ class JSONCrashLogParser(CrashLogParser):
print("error: can't parse application specific backtrace.")
return False
(frame_id, frame_img_name, frame_addr,
frame_ofs) = frame_match.groups()
frame_id = frame_img_name = frame_addr = frame_symbol = frame_offset = frame_file = frame_line = frame_column = None
if len(frame_match.groups()) == 3:
# Get the image UUID from the frame image name.
(frame_id, frame_img_name, frame_addr) = frame_match.groups()
elif len(frame_match.groups()) == 5:
(frame_id, frame_img_name, frame_addr,
frame_symbol, frame_offset) = frame_match.groups()
elif len(frame_match.groups()) == 7:
(frame_id, frame_img_name, frame_addr,
frame_symbol, frame_offset,
frame_file, frame_line) = frame_match.groups()
elif len(frame_match.groups()) == 8:
(frame_id, frame_img_name, frame_addr,
frame_symbol, frame_offset,
frame_file, frame_line, frame_column) = frame_match.groups()
thread.add_ident(frame_img_name)
if frame_img_name not in self.crashlog.idents:
self.crashlog.idents.append(frame_img_name)
thread.frames.append(self.crashlog.Frame(int(frame_id), int(
frame_addr, 0), frame_ofs))
description = ""
if frame_img_name and frame_addr and frame_symbol:
description = frame_symbol
frame_offset_value = 0
if frame_offset:
description += " + " + frame_offset
frame_offset_value = int(frame_offset, 0)
for image in self.images:
if image.identifier == frame_img_name:
image.symbols[frame_symbol] = {
"name": frame_symbol,
"type": "code",
"address": int(frame_addr, 0) - frame_offset_value,
}
thread.frames.append(
self.crashlog.Frame(int(frame_id), int(frame_addr, 0), description)
)
return True
@@ -657,19 +688,48 @@ class TextCrashLogParser(CrashLogParser):
thread_instrs_regex = re.compile(r'^Thread \d+ instruction stream')
thread_regex = re.compile(r'^Thread (\d+).*:')
app_backtrace_regex = re.compile(r'^Application Specific Backtrace (\d+).*:')
version = r'\(.+\)|(?:arm|x86_)[0-9a-z]+'
frame_regex = re.compile(r'^(\d+)\s+' # id
r'(.+?)\s+' # img_name
r'(?:' +version+ r'\s+)?' # img_version
r'(0x[0-9a-fA-F]{4,})' # addr (4 chars or more)
r'(?: +(.*))?' # offs
class VersionRegex:
version = r'\(.+\)|(?:arm|x86_)[0-9a-z]+'
class FrameRegex(VersionRegex):
@classmethod
def get(cls):
index = r'^(\d+)\s+'
img_name = r'(.+?)\s+'
version = r'(?:' + super().version + r'\s+)?'
address = r'(0x[0-9a-fA-F]{4,})' # 4 digits or more
symbol = """
(?:
[ ]+
(?P<symbol>.+)
(?:
[ ]\+[ ]
(?P<symbol_offset>\d+)
)
(?:
[ ]\(
(?P<file_name>[^:]+):(?P<line_number>\d+)
(?:
:(?P<column_num>\d+)
)?
)?
)?
"""
return re.compile(index + img_name + version + address + symbol,
flags=re.VERBOSE)
frame_regex = FrameRegex.get()
null_frame_regex = re.compile(r'^\d+\s+\?\?\?\s+0{4,} +')
image_regex_uuid = re.compile(r'(0x[0-9a-fA-F]+)' # img_lo
r'\s+-\s+' # -
r'(0x[0-9a-fA-F]+)\s+' # img_hi
r'[+]?(.+?)\s+' # img_name
r'(?:(' +version+ r')\s+)?' # img_version
r'(?:(' +
VersionRegex.version + # img_version
r')\s+)?'
r'(?:<([-0-9a-fA-F]+)>\s+)?' # img_uuid
r'(\?+|/.*)' # img_path
)
@@ -690,6 +750,7 @@ class TextCrashLogParser(CrashLogParser):
CrashLogParseMode.SYSTEM : self.parse_system,
CrashLogParseMode.INSTRS : self.parse_instructions,
}
self.symbols = {}
def parse(self):
with open(self.path,'r', encoding='utf-8') as f:
@@ -844,29 +905,76 @@ class TextCrashLogParser(CrashLogParser):
print('warning: thread parser ignored null-frame: "%s"' % line)
return
frame_match = self.frame_regex.search(line)
if frame_match:
(frame_id, frame_img_name, frame_addr,
frame_ofs) = frame_match.groups()
ident = frame_img_name
self.thread.add_ident(ident)
if ident not in self.crashlog.idents:
self.crashlog.idents.append(ident)
self.thread.frames.append(self.crashlog.Frame(int(frame_id), int(
frame_addr, 0), frame_ofs))
else:
if not frame_match:
print('error: frame regex failed for line: "%s"' % line)
return
frame_id = frame_img_name = frame_addr = frame_symbol = frame_offset = frame_file = frame_line = frame_column = None
if len(frame_match.groups()) == 3:
# Get the image UUID from the frame image name.
(frame_id, frame_img_name, frame_addr) = frame_match.groups()
elif len(frame_match.groups()) == 5:
(frame_id, frame_img_name, frame_addr,
frame_symbol, frame_offset) = frame_match.groups()
elif len(frame_match.groups()) == 7:
(frame_id, frame_img_name, frame_addr,
frame_symbol, frame_offset,
frame_file, frame_line) = frame_match.groups()
elif len(frame_match.groups()) == 8:
(frame_id, frame_img_name, frame_addr,
frame_symbol, frame_offset,
frame_file, frame_line, frame_column) = frame_match.groups()
self.thread.add_ident(frame_img_name)
if frame_img_name not in self.crashlog.idents:
self.crashlog.idents.append(frame_img_name)
description = ""
# Since images are parsed after threads, we need to build a
# map for every image with a list of all the symbols and addresses
if frame_img_name and frame_addr and frame_symbol:
description = frame_symbol
frame_offset_value = 0
if frame_offset:
description += " + " + frame_offset
frame_offset_value = int(frame_offset, 0)
if frame_img_name not in self.symbols:
self.symbols[frame_img_name] = list()
self.symbols[frame_img_name].append(
{
"name": frame_symbol,
"address": int(frame_addr, 0) - frame_offset_value,
}
)
self.thread.frames.append(
self.crashlog.Frame(int(frame_id), int(frame_addr, 0), description)
)
def parse_images(self, line):
image_match = self.image_regex_uuid.search(line)
if image_match:
(img_lo, img_hi, img_name, img_version,
img_uuid, img_path) = image_match.groups()
image = self.crashlog.DarwinImage(int(img_lo, 0), int(img_hi, 0),
img_name.strip(),
img_version.strip()
if img_version else "",
uuid.UUID(img_uuid), img_path,
self.verbose)
unqualified_img_name = os.path.basename(img_path)
if unqualified_img_name in self.symbols:
for symbol in self.symbols[unqualified_img_name]:
image.symbols[symbol["name"]] = {
"name": symbol["name"],
"type": "code",
# NOTE: "address" is actually the symbol image offset
"address": symbol["address"] - int(img_lo, 0),
}
self.images.append(image)
self.crashlog.images.append(image)
else:
print("error: image regex failed for: %s" % line)