LLVM  16.0.0git
Markup.h
Go to the documentation of this file.
1 //===- Markup.h -------------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file declares the log symbolizer markup data model and parser.
11 ///
12 /// See https://llvm.org/docs/SymbolizerMarkupFormat.html
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #ifndef LLVM_DEBUGINFO_SYMBOLIZE_MARKUP_H
17 #define LLVM_DEBUGINFO_SYMBOLIZE_MARKUP_H
18 
19 #include "llvm/ADT/Optional.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/StringRef.h"
22 #include "llvm/ADT/StringSet.h"
23 #include "llvm/Support/Regex.h"
24 
25 namespace llvm {
26 namespace symbolize {
27 
28 /// A node of symbolizer markup.
29 ///
30 /// If only the Text field is set, this represents a region of text outside a
31 /// markup element. ANSI SGR control codes are also reported this way; if
32 /// detected, then the control code will be the entirety of the Text field, and
33 /// any surrounding text will be reported as preceding and following nodes.
34 struct MarkupNode {
35  /// The full text of this node in the input.
37 
38  /// If this represents an element, the tag. Otherwise, empty.
40 
41  /// If this represents an element with fields, a list of the field contents.
42  /// Otherwise, empty.
44 
45  bool operator==(const MarkupNode &Other) const {
46  return Text == Other.Text && Tag == Other.Tag && Fields == Other.Fields;
47  }
48  bool operator!=(const MarkupNode &Other) const { return !(*this == Other); }
49 };
50 
51 /// Parses a log containing symbolizer markup into a sequence of nodes.
52 class MarkupParser {
53 public:
54  MarkupParser(StringSet<> MultilineTags = {});
55 
56  /// Parses an individual \p Line of input.
57  ///
58  /// Nodes from the previous parseLine() call that haven't yet been extracted
59  /// by nextNode() are discarded. The nodes returned by nextNode() may
60  /// reference the input string, so it must be retained by the caller until the
61  /// last use.
62  ///
63  /// Note that some elements may span multiple lines. If a line ends with the
64  /// start of one of these elements, then no nodes will be produced until the
65  /// either the end or something that cannot be part of an element is
66  /// encountered. This may only occur after multiple calls to parseLine(),
67  /// corresponding to the lines of the multi-line element.
68  void parseLine(StringRef Line);
69 
70  /// Inform the parser of that the input stream has ended.
71  ///
72  /// This allows the parser to finish any deferred processing (e.g., an
73  /// in-progress multi-line element) and may cause nextNode() to return
74  /// additional nodes.
75  void flush();
76 
77  /// Returns the next node in the input sequence.
78  ///
79  /// Calling nextNode() may invalidate the contents of the node returned by the
80  /// previous call.
81  ///
82  /// \returns the next markup node or None if none remain.
84 
85  bool isSGR(const MarkupNode &Node) const {
86  return SGRSyntax.match(Node.Text);
87  }
88 
89 private:
90  Optional<MarkupNode> parseElement(StringRef Line);
91  void parseTextOutsideMarkup(StringRef Text);
92  Optional<StringRef> parseMultiLineBegin(StringRef Line);
93  Optional<StringRef> parseMultiLineEnd(StringRef Line);
94 
95  // Tags of elements that can span multiple lines.
96  const StringSet<> MultilineTags;
97 
98  // Contents of a multi-line element that has finished being parsed. Retained
99  // to keep returned StringRefs for the contents valid.
100  std::string FinishedMultiline;
101 
102  // Contents of a multi-line element that is still in the process of receiving
103  // lines.
104  std::string InProgressMultiline;
105 
106  // The line currently being parsed.
107  StringRef Line;
108 
109  // Buffer for nodes parsed from the current line.
111 
112  // Next buffer index to return.
113  size_t NextIdx;
114 
115  // Regular expression matching supported ANSI SGR escape sequences.
116  const Regex SGRSyntax;
117 };
118 
119 } // end namespace symbolize
120 } // end namespace llvm
121 
122 #endif // LLVM_DEBUGINFO_SYMBOLIZE_MARKUP_H
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
Optional.h
StringRef.h
llvm::symbolize::MarkupParser
Parses a log containing symbolizer markup into a sequence of nodes.
Definition: Markup.h:52
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1181
llvm::symbolize::MarkupNode::operator==
bool operator==(const MarkupNode &Other) const
Definition: Markup.h:45
llvm::Optional
Definition: APInt.h:33
llvm::dwarf::Tag
Tag
Definition: Dwarf.h:105
llvm::Regex::match
bool match(StringRef String, SmallVectorImpl< StringRef > *Matches=nullptr, std::string *Error=nullptr) const
matches - Match the regex against a given String.
Definition: Regex.cpp:86
llvm::symbolize::MarkupParser::flush
void flush()
Inform the parser of that the input stream has ended.
Definition: Markup.cpp:102
llvm::symbolize::MarkupNode::Fields
SmallVector< StringRef > Fields
If this represents an element with fields, a list of the field contents.
Definition: Markup.h:43
llvm::symbolize::MarkupParser::MarkupParser
MarkupParser(StringSet<> MultilineTags={})
Definition: Markup.cpp:28
llvm::symbolize::MarkupParser::parseLine
void parseLine(StringRef Line)
Parses an individual Line of input.
Definition: Markup.cpp:38
llvm::symbolize::MarkupParser::isSGR
bool isSGR(const MarkupNode &Node) const
Definition: Markup.h:85
Regex.h
llvm::symbolize::MarkupNode::Tag
StringRef Tag
If this represents an element, the tag. Otherwise, empty.
Definition: Markup.h:39
llvm::symbolize::MarkupParser::nextNode
Optional< MarkupNode > nextNode()
Returns the next node in the input sequence.
Definition: Markup.cpp:45
llvm::StringSet
StringSet - A wrapper for StringMap that provides set-like functionality.
Definition: StringSet.h:23
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
Node
Definition: ItaniumDemangle.h:155
StringSet.h
llvm::symbolize::MarkupNode::Text
StringRef Text
The full text of this node in the input.
Definition: Markup.h:36
SmallVector.h
llvm::symbolize::MarkupNode::operator!=
bool operator!=(const MarkupNode &Other) const
Definition: Markup.h:48
llvm::Regex
Definition: Regex.h:28
llvm::symbolize::MarkupNode
A node of symbolizer markup.
Definition: Markup.h:34
Other
Optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1247