From 17e4c217b66305e60657a48f10fe3c428c2fe4d2 Mon Sep 17 00:00:00 2001 From: Daniel Thornburgh Date: Fri, 10 Jun 2022 16:11:36 -0700 Subject: [PATCH] [Symbolizer] Implement contextual symbolizer markup elements. This change implements the contextual symbolizer markup elements: reset, module, and mmap. These provide information about the runtime context of the binary necessary to resolve addresses to symbolic values. Summary information is printed to the output about this context. Multiple mmap elements for the same module line are coalesced together. The standard requires that such elements occur on their own lines to allow for this; accordingly, anything after a contextual element on a line is silently discarded. Implementing this cleanly requires that the filter drive the parser; this allows skipped sections to avoid being parsed. This also makes the filter quite a bit easier to use, at the cost of some unused flexibility. Reviewed By: peter.smith Differential Revision: https://reviews.llvm.org/D129519 --- llvm/docs/CommandGuide/llvm-symbolizer.rst | 3 + llvm/docs/SymbolizerMarkupFormat.rst | 6 +- .../include/llvm/DebugInfo/Symbolize/Markup.h | 4 + .../llvm/DebugInfo/Symbolize/MarkupFilter.h | 98 ++++- llvm/lib/DebugInfo/Symbolize/Markup.cpp | 3 + llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp | 393 +++++++++++++++++- ...ze-filter-markup-context-line-elision.test | 12 + ...ymbolize-filter-markup-error-location.test | 2 +- .../symbolize-filter-markup-mmap.test | 40 ++ .../symbolize-filter-markup-module.test | 26 ++ .../symbolize-filter-markup-parse-fields.test | 44 ++ .../symbolize-filter-markup-reset.test | 21 + .../tools/llvm-symbolizer/llvm-symbolizer.cpp | 12 +- 13 files changed, 618 insertions(+), 46 deletions(-) create mode 100644 llvm/test/DebugInfo/symbolize-filter-markup-context-line-elision.test create mode 100644 llvm/test/DebugInfo/symbolize-filter-markup-mmap.test create mode 100644 llvm/test/DebugInfo/symbolize-filter-markup-module.test create mode 100644 llvm/test/DebugInfo/symbolize-filter-markup-parse-fields.test create mode 100644 llvm/test/DebugInfo/symbolize-filter-markup-reset.test diff --git a/llvm/docs/CommandGuide/llvm-symbolizer.rst b/llvm/docs/CommandGuide/llvm-symbolizer.rst index 22ed6d9de00a..33b5fa5b0fe1 100644 --- a/llvm/docs/CommandGuide/llvm-symbolizer.rst +++ b/llvm/docs/CommandGuide/llvm-symbolizer.rst @@ -255,6 +255,9 @@ OPTIONS markup elements are supported: * ``{{symbol}}`` + * ``{{reset}}`` + * ``{{module}}`` + * ``{{mmap}}`` .. _llvm-symbolizer-opt-f: diff --git a/llvm/docs/SymbolizerMarkupFormat.rst b/llvm/docs/SymbolizerMarkupFormat.rst index dfd9d6b5b770..95ac5d89d84e 100644 --- a/llvm/docs/SymbolizerMarkupFormat.rst +++ b/llvm/docs/SymbolizerMarkupFormat.rst @@ -360,7 +360,7 @@ elements should have appeared somewhere earlier in the logging stream. It should always be possible for the symbolizing filter to be implemented as a single pass over the raw logging stream, accumulating context and massaging text as it goes. -``{{{reset}}}`` [#not_yet_implemented]_ +``{{{reset}}}`` This should be output before any other contextual element. The need for this contextual element is to support implementations that handle logs coming from @@ -372,7 +372,7 @@ over the raw logging stream, accumulating context and massaging text as it goes. previous process's contextual elements is not assumed for new process that just happens have the same identifying information. -``{{{module:%i:%s:%s:...}}}`` [#not_yet_implemented]_ +``{{{module:%i:%s:%s:...}}}`` This element represents a so-called "module". A "module" is a single linked binary, such as a loaded ELF file. Usually each module occupies a contiguous @@ -399,7 +399,7 @@ over the raw logging stream, accumulating context and massaging text as it goes. {{{module:1:libc.so:elf:83238ab56ba10497}}} -``{{{mmap:%p:%i:...}}}`` [#not_yet_implemented]_ +``{{{mmap:%p:%i:...}}}`` This contextual element is used to give information about a particular region in memory. ``%p`` is the starting address and ``%i`` gives the size in hex of the diff --git a/llvm/include/llvm/DebugInfo/Symbolize/Markup.h b/llvm/include/llvm/DebugInfo/Symbolize/Markup.h index 2628b47cf6d3..4f2b0de481ec 100644 --- a/llvm/include/llvm/DebugInfo/Symbolize/Markup.h +++ b/llvm/include/llvm/DebugInfo/Symbolize/Markup.h @@ -84,6 +84,10 @@ public: /// \returns the next markup node or None if none remain. Optional nextNode(); + bool isSGR(const MarkupNode &Node) const { + return SGRSyntax.match(Node.Text); + } + private: Optional parseElement(StringRef Line); void parseTextOutsideMarkup(StringRef Text); diff --git a/llvm/include/llvm/DebugInfo/Symbolize/MarkupFilter.h b/llvm/include/llvm/DebugInfo/Symbolize/MarkupFilter.h index b7d70ccafe66..a224c3205f0b 100644 --- a/llvm/include/llvm/DebugInfo/Symbolize/MarkupFilter.h +++ b/llvm/include/llvm/DebugInfo/Symbolize/MarkupFilter.h @@ -17,6 +17,9 @@ #include "Markup.h" +#include + +#include "llvm/ADT/DenseMap.h" #include "llvm/Support/WithColor.h" #include "llvm/Support/raw_ostream.h" @@ -29,45 +32,106 @@ class MarkupFilter { public: MarkupFilter(raw_ostream &OS, Optional ColorsEnabled = llvm::None); - /// Begins a logical \p Line of markup. + /// Filters a line containing symbolizer markup and writes the human-readable + /// results to the output stream. /// - /// This must be called for each line of the input stream before calls to - /// filter() for elements of that line. The provided \p Line must be the same - /// one that was passed to parseLine() to produce the elements to be later - /// passed to filter(). - /// - /// This informs the filter that a new line is beginning and establishes a - /// context for error location reporting. - void beginLine(StringRef Line); + /// Invalid or unimplemented markup elements are removed. Some output may be + /// deferred until future filter() or finish() call. + void filter(StringRef Line); - /// Handle a \p Node of symbolizer markup. - /// - /// If the node is a recognized, valid markup element, it is replaced with a - /// human-readable string. If the node isn't an element or the element isn't - /// recognized, it is output verbatim. If the element is recognized but isn't - /// valid, it is omitted from the output. - void filter(const MarkupNode &Node); + /// Records that the input stream has ended and writes any deferred output. + void finish(); private: + struct Module { + uint64_t ID; + std::string Name; + SmallVector BuildID; + }; + + struct MMap { + uint64_t Addr; + uint64_t Size; + const Module *Module; + std::string Mode; // Lowercase + uint64_t ModuleRelativeAddr; + + bool contains(uint64_t Addr) const; + }; + + // An informational module line currently being constructed. As many mmap + // elements as possible are folded into one ModuleInfo line. + struct ModuleInfoLine { + const Module *Module; + + SmallVector MMaps = {}; + }; + + bool tryContextualElement(const MarkupNode &Node, + const SmallVector &DeferredNodes); + bool tryMMap(const MarkupNode &Element, + const SmallVector &DeferredNodes); + bool tryReset(const MarkupNode &Element, + const SmallVector &DeferredNodes); + bool tryModule(const MarkupNode &Element, + const SmallVector &DeferredNodes); + + void beginModuleInfoLine(const Module *M); + void endAnyModuleInfoLine(); + + void filterNode(const MarkupNode &Node); + + bool tryPresentation(const MarkupNode &Node); + bool trySymbol(const MarkupNode &Node); + bool trySGR(const MarkupNode &Node); void highlight(); + void highlightValue(); void restoreColor(); void resetColor(); + Optional parseModule(const MarkupNode &Element) const; + Optional parseMMap(const MarkupNode &Element) const; + + Optional parseAddr(StringRef Str) const; + Optional parseModuleID(StringRef Str) const; + Optional parseSize(StringRef Str) const; + Optional> parseBuildID(StringRef Str) const; + Optional parseMode(StringRef Str) const; + bool checkTag(const MarkupNode &Node) const; - bool checkNumFields(const MarkupNode &Node, size_t Size) const; + bool checkNumFields(const MarkupNode &Element, size_t Size) const; + bool checkNumFieldsAtLeast(const MarkupNode &Element, size_t Size) const; void reportTypeError(StringRef Str, StringRef TypeName) const; void reportLocation(StringRef::iterator Loc) const; + const MMap *overlappingMMap(const MMap &Map) const; + + StringRef lineEnding() const; + raw_ostream &OS; const bool ColorsEnabled; + MarkupParser Parser; + + // Current line being filtered. StringRef Line; + // A module info line currently being built. This incorporates as much mmap + // information as possible before being emitted. + Optional MIL; + + // SGR state. Optional Color; bool Bold = false; + + // Map from Module ID to Module. + DenseMap> Modules; + + // Ordered map from starting address to mmap. + std::map MMaps; }; } // end namespace symbolize diff --git a/llvm/lib/DebugInfo/Symbolize/Markup.cpp b/llvm/lib/DebugInfo/Symbolize/Markup.cpp index 9bc65e763287..aa8a89812227 100644 --- a/llvm/lib/DebugInfo/Symbolize/Markup.cpp +++ b/llvm/lib/DebugInfo/Symbolize/Markup.cpp @@ -100,6 +100,9 @@ Optional MarkupParser::nextNode() { } void MarkupParser::flush() { + Buffer.clear(); + NextIdx = 0; + Line = {}; if (InProgressMultiline.empty()) return; FinishedMultiline.swap(InProgressMultiline); diff --git a/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp b/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp index 3363fe5e531f..ad21df62b9b3 100644 --- a/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp +++ b/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp @@ -10,14 +10,22 @@ /// This file defines the implementation of a filter that replaces symbolizer /// markup with human-readable expressions. /// +/// See https://llvm.org/docs/SymbolizerMarkupFormat.html +/// //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/Symbolize/MarkupFilter.h" #include "llvm/ADT/None.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/DebugInfo/Symbolize/Markup.h" +#include "llvm/Debuginfod/Debuginfod.h" #include "llvm/Demangle/Demangle.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/FormatVariadic.h" #include "llvm/Support/WithColor.h" #include "llvm/Support/raw_ostream.h" @@ -28,30 +36,195 @@ MarkupFilter::MarkupFilter(raw_ostream &OS, Optional ColorsEnabled) : OS(OS), ColorsEnabled(ColorsEnabled.value_or( WithColor::defaultAutoDetectFunction()(OS))) {} -void MarkupFilter::beginLine(StringRef Line) { +void MarkupFilter::filter(StringRef Line) { this->Line = Line; resetColor(); + + Parser.parseLine(Line); + SmallVector DeferredNodes; + // See if the line is a contextual (i.e. contains a contextual element). + // In this case, anything after the contextual element is elided, or the whole + // line may be elided. + while (Optional Node = Parser.nextNode()) { + // If this was a contextual line, then summarily stop processing. + if (tryContextualElement(*Node, DeferredNodes)) + return; + // This node may yet be part of an elided contextual line. + DeferredNodes.push_back(*Node); + } + + // This was not a contextual line, so nothing in it should be elided. + endAnyModuleInfoLine(); + for (const MarkupNode &Node : DeferredNodes) + filterNode(Node); } -void MarkupFilter::filter(const MarkupNode &Node) { +void MarkupFilter::finish() { + Parser.flush(); + while (Optional Node = Parser.nextNode()) + filterNode(*Node); + endAnyModuleInfoLine(); + resetColor(); + Modules.clear(); + MMaps.clear(); +} + +// See if the given node is a contextual element and handle it if so. This may +// either output or defer the element; in the former case, it will first emit +// any DeferredNodes. +// +// Returns true if the given element was a contextual element. In this case, +// DeferredNodes should be considered handled and should not be emitted. The +// rest of the containing line must also be ignored in case the element was +// deferred to a following line. +bool MarkupFilter::tryContextualElement( + const MarkupNode &Node, const SmallVector &DeferredNodes) { + if (tryMMap(Node, DeferredNodes)) + return true; + if (tryReset(Node, DeferredNodes)) + return true; + return tryModule(Node, DeferredNodes); +} + +bool MarkupFilter::tryMMap(const MarkupNode &Node, + const SmallVector &DeferredNodes) { + if (Node.Tag != "mmap") + return false; + Optional ParsedMMap = parseMMap(Node); + if (!ParsedMMap) + return true; + + if (const MMap *M = overlappingMMap(*ParsedMMap)) { + WithColor::error(errs()) + << formatv("overlapping mmap: #{0:x} [{1:x},{2:x})\n", M->Module->ID, + M->Addr, M->Addr + M->Size); + reportLocation(Node.Fields[0].begin()); + return true; + } + + auto Res = MMaps.emplace(ParsedMMap->Addr, std::move(*ParsedMMap)); + assert(Res.second && "Overlap check should ensure emplace succeeds."); + MMap &MMap = Res.first->second; + + if (!MIL || MIL->Module != MMap.Module) { + endAnyModuleInfoLine(); + for (const MarkupNode &Node : DeferredNodes) + filterNode(Node); + beginModuleInfoLine(MMap.Module); + OS << "; adds"; + } + MIL->MMaps.push_back(&MMap); + return true; +} + +bool MarkupFilter::tryReset(const MarkupNode &Node, + const SmallVector &DeferredNodes) { + if (Node.Tag != "reset") + return false; + if (!checkNumFields(Node, 0)) + return true; + + if (!Modules.empty() || !MMaps.empty()) { + Modules.clear(); + MMaps.clear(); + + endAnyModuleInfoLine(); + for (const MarkupNode &Node : DeferredNodes) + filterNode(Node); + highlight(); + OS << "[[[reset]]]" << lineEnding(); + restoreColor(); + } + return true; +} + +bool MarkupFilter::tryModule(const MarkupNode &Node, + const SmallVector &DeferredNodes) { + if (Node.Tag != "module") + return false; + Optional ParsedModule = parseModule(Node); + if (!ParsedModule) + return true; + + auto Res = Modules.try_emplace( + ParsedModule->ID, std::make_unique(std::move(*ParsedModule))); + if (!Res.second) { + WithColor::error(errs()) << "duplicate module ID\n"; + reportLocation(Node.Fields[0].begin()); + return true; + } + Module &Module = *Res.first->second; + + endAnyModuleInfoLine(); + for (const MarkupNode &Node : DeferredNodes) + filterNode(Node); + beginModuleInfoLine(&Module); + OS << "; BuildID="; + highlightValue(); + OS << toHex(Module.BuildID, /*LowerCase=*/true); + highlight(); + return true; +} + +void MarkupFilter::beginModuleInfoLine(const Module *M) { + highlight(); + OS << "[[[ELF module"; + highlightValue(); + OS << formatv(" #{0:x} \"{1}\"", M->ID, M->Name); + highlight(); + MIL = ModuleInfoLine{M}; +} + +void MarkupFilter::endAnyModuleInfoLine() { + if (!MIL) + return; + llvm::stable_sort(MIL->MMaps, [](const MMap *A, const MMap *B) { + return A->Addr < B->Addr; + }); + for (const MMap *M : MIL->MMaps) { + OS << (M == MIL->MMaps.front() ? ' ' : '-'); + highlightValue(); + OS << formatv("{0:x}", M->Addr); + highlight(); + OS << '('; + highlightValue(); + OS << M->Mode; + highlight(); + OS << ')'; + } + OS << "]]]" << lineEnding(); + restoreColor(); + MIL.reset(); +} + +// Handle a node that is known not to be a contextual element. +void MarkupFilter::filterNode(const MarkupNode &Node) { if (!checkTag(Node)) return; - + if (tryPresentation(Node)) + return; if (trySGR(Node)) return; - if (Node.Tag == "symbol") { - if (!checkNumFields(Node, 1)) - return; - highlight(); - OS << llvm::demangle(Node.Fields.front().str()); - restoreColor(); - return; - } - OS << Node.Text; } +bool MarkupFilter::tryPresentation(const MarkupNode &Node) { + return trySymbol(Node); +} + +bool MarkupFilter::trySymbol(const MarkupNode &Node) { + if (Node.Tag != "symbol") + return false; + if (!checkNumFields(Node, 1)) + return true; + + highlight(); + OS << llvm::demangle(Node.Fields.front().str()); + restoreColor(); + return true; +} + bool MarkupFilter::trySGR(const MarkupNode &Node) { if (Node.Text == "\033[0m") { resetColor(); @@ -93,6 +266,13 @@ void MarkupFilter::highlight() { Bold); } +// Begin highlighting a field within a highlighted markup string. +void MarkupFilter::highlightValue() { + if (!ColorsEnabled) + return; + OS.changeColor(raw_ostream::Colors::GREEN, Bold); +} + // Set the output stream's color to the current color and bold state of the SGR // abstract machine. void MarkupFilter::restoreColor() { @@ -117,6 +297,139 @@ void MarkupFilter::resetColor() { OS.resetColor(); } +// This macro helps reduce the amount of indirection done through Optional +// below, since the usual case upon returning a None Optional is to return None. +#define ASSIGN_OR_RETURN_NONE(TYPE, NAME, EXPR) \ + auto NAME##Opt = (EXPR); \ + if (!NAME##Opt) \ + return None; \ + TYPE NAME = std::move(*NAME##Opt) + +Optional +MarkupFilter::parseModule(const MarkupNode &Element) const { + if (!checkNumFieldsAtLeast(Element, 3)) + return None; + ASSIGN_OR_RETURN_NONE(uint64_t, ID, parseModuleID(Element.Fields[0])); + StringRef Name = Element.Fields[1]; + StringRef Type = Element.Fields[2]; + if (Type != "elf") { + WithColor::error() << "unknown module type\n"; + reportLocation(Type.begin()); + return None; + } + if (!checkNumFields(Element, 4)) + return None; + ASSIGN_OR_RETURN_NONE(SmallVector, BuildID, + parseBuildID(Element.Fields[3])); + return Module{ID, Name.str(), std::move(BuildID)}; +} + +Optional +MarkupFilter::parseMMap(const MarkupNode &Element) const { + if (!checkNumFieldsAtLeast(Element, 3)) + return None; + ASSIGN_OR_RETURN_NONE(uint64_t, Addr, parseAddr(Element.Fields[0])); + ASSIGN_OR_RETURN_NONE(uint64_t, Size, parseSize(Element.Fields[1])); + StringRef Type = Element.Fields[2]; + if (Type != "load") { + WithColor::error() << "unknown mmap type\n"; + reportLocation(Type.begin()); + return None; + } + if (!checkNumFields(Element, 6)) + return None; + ASSIGN_OR_RETURN_NONE(uint64_t, ID, parseModuleID(Element.Fields[3])); + ASSIGN_OR_RETURN_NONE(std::string, Mode, parseMode(Element.Fields[4])); + auto It = Modules.find(ID); + if (It == Modules.end()) { + WithColor::error() << "unknown module ID\n"; + reportLocation(Element.Fields[3].begin()); + return None; + } + ASSIGN_OR_RETURN_NONE(uint64_t, ModuleRelativeAddr, + parseAddr(Element.Fields[5])); + return MMap{Addr, Size, It->second.get(), std::move(Mode), + ModuleRelativeAddr}; +} + +// Parse an address (%p in the spec). +Optional MarkupFilter::parseAddr(StringRef Str) const { + if (Str.empty()) { + reportTypeError(Str, "address"); + return None; + } + if (all_of(Str, [](char C) { return C == '0'; })) + return 0; + if (!Str.startswith("0x")) { + reportTypeError(Str, "address"); + return None; + } + uint64_t Addr; + if (Str.drop_front(2).getAsInteger(16, Addr)) { + reportTypeError(Str, "address"); + return None; + } + return Addr; +} + +// Parse a module ID (%i in the spec). +Optional MarkupFilter::parseModuleID(StringRef Str) const { + uint64_t ID; + if (Str.getAsInteger(0, ID)) { + reportTypeError(Str, "module ID"); + return None; + } + return ID; +} + +// Parse a size (%i in the spec). +Optional MarkupFilter::parseSize(StringRef Str) const { + uint64_t ID; + if (Str.getAsInteger(0, ID)) { + reportTypeError(Str, "size"); + return None; + } + return ID; +} + +// Parse a build ID (%x in the spec). +Optional> MarkupFilter::parseBuildID(StringRef Str) const { + std::string Bytes; + if (Str.empty() || Str.size() % 2 || !tryGetFromHex(Str, Bytes)) { + reportTypeError(Str, "build ID"); + return None; + } + ArrayRef BuildID(reinterpret_cast(Bytes.data()), + Bytes.size()); + return SmallVector(BuildID.begin(), BuildID.end()); +} + +// Parses the mode string for an mmap element. +Optional MarkupFilter::parseMode(StringRef Str) const { + if (Str.empty()) { + reportTypeError(Str, "mode"); + return None; + } + + // Pop off each of r/R, w/W, and x/X from the front, in that order. + StringRef Remainder = Str; + if (!Remainder.empty() && tolower(Remainder.front()) == 'r') + Remainder = Remainder.drop_front(); + if (!Remainder.empty() && tolower(Remainder.front()) == 'w') + Remainder = Remainder.drop_front(); + if (!Remainder.empty() && tolower(Remainder.front()) == 'x') + Remainder = Remainder.drop_front(); + + // If anything remains, then the string wasn't a mode. + if (!Remainder.empty()) { + reportTypeError(Str, "mode"); + return None; + } + + // Normalize the mode. + return Str.lower(); +} + bool MarkupFilter::checkTag(const MarkupNode &Node) const { if (any_of(Node.Tag, [](char C) { return C < 'a' || C > 'z'; })) { WithColor::error(errs()) << "tags must be all lowercase characters\n"; @@ -126,18 +439,66 @@ bool MarkupFilter::checkTag(const MarkupNode &Node) const { return true; } -bool MarkupFilter::checkNumFields(const MarkupNode &Node, size_t Size) const { - if (Node.Fields.size() != Size) { +bool MarkupFilter::checkNumFields(const MarkupNode &Element, + size_t Size) const { + if (Element.Fields.size() != Size) { WithColor::error(errs()) << "expected " << Size << " fields; found " - << Node.Fields.size() << "\n"; - reportLocation(Node.Tag.end()); + << Element.Fields.size() << "\n"; + reportLocation(Element.Tag.end()); return false; } return true; } +bool MarkupFilter::checkNumFieldsAtLeast(const MarkupNode &Element, + size_t Size) const { + if (Element.Fields.size() < Size) { + WithColor::error(errs()) + << "expected at least " << Size << " fields; found " + << Element.Fields.size() << "\n"; + reportLocation(Element.Tag.end()); + return false; + } + return true; +} + +void MarkupFilter::reportTypeError(StringRef Str, StringRef TypeName) const { + WithColor::error(errs()) << "expected " << TypeName << "; found '" << Str + << "'\n"; + reportLocation(Str.begin()); +} + +// Prints two lines that point out the given location in the current Line using +// a caret. The iterator must be within the bounds of the most recent line +// passed to beginLine(). void MarkupFilter::reportLocation(StringRef::iterator Loc) const { errs() << Line; WithColor(errs().indent(Loc - Line.begin()), HighlightColor::String) << '^'; errs() << '\n'; } + +// Checks for an existing mmap that overlaps the given one and returns a +// pointer to one of them. +const MarkupFilter::MMap *MarkupFilter::overlappingMMap(const MMap &Map) const { + // If the given map contains the start of another mmap, they overlap. + auto I = MMaps.upper_bound(Map.Addr); + if (I != MMaps.end() && Map.contains(I->second.Addr)) + return &I->second; + + // If no element starts inside the given mmap, the only possible overlap would + // be if the preceding mmap contains the start point of the given mmap. + if (I != MMaps.begin()) { + --I; + if (I->second.contains(Map.Addr)) + return &I->second; + } + return nullptr; +} + +StringRef MarkupFilter::lineEnding() const { + return Line.endswith("\r\n") ? "\r\n" : "\n"; +} + +bool MarkupFilter::MMap::contains(uint64_t Addr) const { + return this->Addr <= Addr && Addr < this->Addr + Size; +} diff --git a/llvm/test/DebugInfo/symbolize-filter-markup-context-line-elision.test b/llvm/test/DebugInfo/symbolize-filter-markup-context-line-elision.test new file mode 100644 index 000000000000..af187f8954bd --- /dev/null +++ b/llvm/test/DebugInfo/symbolize-filter-markup-context-line-elision.test @@ -0,0 +1,12 @@ +RUN: split-file %s %t +RUN: llvm-symbolizer --filter-markup < %t/log | \ +RUN: FileCheck --match-full-lines --implicit-check-not {{.}} \ +RUN: --strict-whitespace %s + +CHECK:keep[[BEGIN:\[{3}]]ELF module #0x0 "a.o"; BuildID=ab 0x0(r)[[END:\]{3}]] +CHECK:keep[[BEGIN]]ELF module #0x1 "b.o"; BuildID=cd[[END]] + +;--- log +keep{{{module:0:a.o:elf:ab}}}skip +skip{{{mmap:0:1:load:0:r:0}}}skip +keep{{{module:1:b.o:elf:cd}}}skip diff --git a/llvm/test/DebugInfo/symbolize-filter-markup-error-location.test b/llvm/test/DebugInfo/symbolize-filter-markup-error-location.test index 4d05bfd39ca9..400131d9e549 100644 --- a/llvm/test/DebugInfo/symbolize-filter-markup-error-location.test +++ b/llvm/test/DebugInfo/symbolize-filter-markup-error-location.test @@ -1,5 +1,5 @@ RUN: split-file %s %t -RUN: llvm-symbolizer --debug-file-directory=%p/Inputs --filter-markup < %t/log > /dev/null 2> %t.err +RUN: llvm-symbolizer --filter-markup < %t/log > /dev/null 2> %t.err RUN: FileCheck %s -input-file=%t.err --match-full-lines --strict-whitespace CHECK:error: expected 1 fields; found 0 diff --git a/llvm/test/DebugInfo/symbolize-filter-markup-mmap.test b/llvm/test/DebugInfo/symbolize-filter-markup-mmap.test new file mode 100644 index 000000000000..506d7a926bae --- /dev/null +++ b/llvm/test/DebugInfo/symbolize-filter-markup-mmap.test @@ -0,0 +1,40 @@ +RUN: split-file %s %t +RUN: llvm-symbolizer --filter-markup < %t/log > %t.out 2> %t.err +RUN: FileCheck %s --input-file=%t.out --match-full-lines \ +RUN: --implicit-check-not {{.}} +RUN: FileCheck %s --check-prefix=ERR -input-file=%t.err --match-full-lines + +CHECK: [[BEGIN:\[{3}]]ELF module #0x0 "a.o"; BuildID=abb50d82b6bdc861 0x0(rwx)-0x1(r)-0x2(w)-0x3(x)-0x4(rwx)-0xa(r)[[END:\]{3}]] + +ERR: error: expected at least 3 fields; found 0 +ERR: error: unknown mmap type +ERR: error: expected 6 fields; found 3 +ERR: error: expected address; found '1' +ERR: error: expected size; found '-1' +ERR: error: expected mode; found '' +ERR: error: expected mode; found 'g' +ERR: error: expected mode; found 'wr' +ERR: error: overlapping mmap: #0x0 [0xa,0xc) +ERR: error: overlapping mmap: #0x0 [0xa,0xc) +ERR: error: overlapping mmap: #0x0 [0xa,0xc) + +;--- log +{{{module:0:a.o:elf:abb50d82b6bdc861}}} +{{{mmap:0x1:1:load:0:r:0}}} +{{{mmap:0x2:1:load:0:w:0}}} +{{{mmap:0x3:1:load:0:x:0}}} +{{{mmap:0x4:1:load:0:rwx:0}}} +{{{mmap:0x0:1:load:0:RWX:0}}} +{{{mmap:0xa:2:load:0:r:0}}} + +{{{mmap}}} +{{{mmap:0:1:unknown}}} +{{{mmap:0:10000000:load}}} +{{{mmap:1:10000000:load:0:r:0}}} +{{{mmap:0:-1:load:0:r:0}}} +{{{mmap:0:10000000:load:0::0}}} +{{{mmap:0:10000000:load:0:g:0}}} +{{{mmap:0:10000000:load:0:wr:0}}} +{{{mmap:0xa:1:load:0:r:0}}} +{{{mmap:0x9:2:load:0:r:0}}} +{{{mmap:0x9:5:load:0:r:0}}} diff --git a/llvm/test/DebugInfo/symbolize-filter-markup-module.test b/llvm/test/DebugInfo/symbolize-filter-markup-module.test new file mode 100644 index 000000000000..74d6347941dc --- /dev/null +++ b/llvm/test/DebugInfo/symbolize-filter-markup-module.test @@ -0,0 +1,26 @@ +RUN: split-file %s %t +RUN: llvm-symbolizer --filter-markup < %t/log > %t.out 2> %t.err +RUN: FileCheck %s --input-file=%t.out --match-full-lines \ +RUN: --implicit-check-not {{.}} +RUN: FileCheck %s --check-prefix=ERR -input-file=%t.err --match-full-lines + +CHECK: [[BEGIN:\[{3}]]ELF module #0x0 "a.o"; BuildID=ab[[END:\]{3}]] +CHECK: [[BEGIN]]ELF module #0x1 "b.o"; BuildID=abb50d82b6bdc861[[END]] +CHECK: [[BEGIN]]ELF module #0x2 "c.o"; BuildID=cd[[END]] +CHECK: [[BEGIN]]ELF module #0x1 "b.o"; adds 0x0(r)[[END]] + +ERR: error: expected at least 3 fields; found 0 +ERR: error: unknown module type +ERR: error: duplicate module ID +ERR: error: expected 4 fields; found 3 + +;--- log +{{{module:0:a.o:elf:ab}}} +{{{module:1:b.o:elf:abb50d82b6bdc861}}} +{{{module:2:c.o:elf:cd}}} +{{{mmap:0:10000000:load:1:r:0}}} + +{{{module}}} +{{{module:3:d.o:foo}}} +{{{module:0:d.o:elf:ef}}} +{{{module:4:d.o:elf}}} diff --git a/llvm/test/DebugInfo/symbolize-filter-markup-parse-fields.test b/llvm/test/DebugInfo/symbolize-filter-markup-parse-fields.test new file mode 100644 index 000000000000..13e1d7f786c4 --- /dev/null +++ b/llvm/test/DebugInfo/symbolize-filter-markup-parse-fields.test @@ -0,0 +1,44 @@ +RUN: split-file %s %t +RUN: llvm-symbolizer --filter-markup < %t/log 2> %t.err +RUN: FileCheck %s -input-file=%t.err --match-full-lines + +CHECK-NOT: '0x4f' +CHECK-NOT: '00' +CHECK: error: expected address; found '' +CHECK: error: expected address; found '42' +CHECK: error: expected address; found '0xgg' + +CHECK-NOT: '0' +CHECK: error: expected module ID; found '' +CHECK: error: expected module ID; found '-1' +CHECK-NOT: '077' +CHECK: error: expected module ID; found '079' +CHECK-NOT: '0xff' +CHECK: error: expected module ID; found '0xfg' +CHECK: error: expected module ID; found '0x' + +CHECK: error: expected build ID; found '' +CHECK: error: expected build ID; found '0' +CHECK-NOT: '0xff' +CHECK: error: expected build ID; found 'fg' + +;--- log +{{{mmap:0x4f:1:unknown}}} +{{{mmap:00:1:unknown}}} +{{{mmap::1:unknown}}} +{{{mmap:42:1:unknown}}} +{{{mmap:0xgg:1:unknown}}} + +{{{module:0::elf:00}}} +{{{module:::elf:00}}} +{{{module:-1::elf:00}}} +{{{module:077::elf:00}}} +{{{module:079::elf:00}}} +{{{module:0xff::elf:00}}} +{{{module:0xfg::elf:00}}} +{{{module:0x::elf:00}}} + +{{{module:1::elf:}}} +{{{module:2::elf:0}}} +{{{module:3::elf:ff}}} +{{{module:4::elf:fg}}} diff --git a/llvm/test/DebugInfo/symbolize-filter-markup-reset.test b/llvm/test/DebugInfo/symbolize-filter-markup-reset.test new file mode 100644 index 000000000000..1abb90582dfe --- /dev/null +++ b/llvm/test/DebugInfo/symbolize-filter-markup-reset.test @@ -0,0 +1,21 @@ +RUN: split-file %s %t +RUN: llvm-symbolizer --filter-markup < %t/log > %t.out 2> %t.err +RUN: FileCheck %s --input-file=%t.out --match-full-lines \ +RUN: --implicit-check-not {{.}} +RUN: FileCheck %s --check-prefix=ERR -input-file=%t.err --match-full-lines + +CHECK: [[BEGIN:\[{3}]]ELF module #0x0 "a.o"; BuildID=ab 0x0(r)[[END:\]{3}]] +CHECK: {{ }}[[BEGIN]]reset[[END]] +CHECK: [[BEGIN:\[{3}]]ELF module #0x0 "b.o"; BuildID=cd 0x1(r)[[END:\]{3}]] + +ERR: error: expected 0 fields; found 1 + +;--- log + {{{reset}}} +{{{module:0:a.o:elf:ab}}} +{{{mmap:0:1:load:0:r:0}}} + {{{reset}}} +{{{module:0:b.o:elf:cd}}} +{{{mmap:0x1:1:load:0:r:0}}} + +{{{reset:}}} diff --git a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp index b782c7a1720a..fb223d1ee8a9 100644 --- a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp +++ b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp @@ -365,20 +365,14 @@ static SmallVector parseBuildIDArg(const opt::InputArgList &Args, return BuildID; } -// Symbolize the markup from stdin and write the result to stdout. +// Symbolize markup from stdin and write the result to stdout. static void filterMarkup(const opt::InputArgList &Args) { - MarkupParser Parser; MarkupFilter Filter(outs(), parseColorArg(Args)); for (std::string InputString; std::getline(std::cin, InputString);) { InputString += '\n'; - Parser.parseLine(InputString); - Filter.beginLine(InputString); - while (Optional Element = Parser.nextNode()) - Filter.filter(*Element); + Filter.filter(InputString); } - Parser.flush(); - while (Optional Element = Parser.nextNode()) - Filter.filter(*Element); + Filter.finish(); } ExitOnError ExitOnErr;