LLVM  13.0.0git
YAMLParser.cpp
Go to the documentation of this file.
1 //===- YAMLParser.cpp - Simple YAML parser --------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a YAML parser.
10 //
11 //===----------------------------------------------------------------------===//
12 
14 #include "llvm/ADT/AllocatorList.h"
15 #include "llvm/ADT/ArrayRef.h"
16 #include "llvm/ADT/None.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/ADT/SmallString.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/ADT/StringExtras.h"
21 #include "llvm/ADT/StringRef.h"
22 #include "llvm/ADT/Twine.h"
23 #include "llvm/Support/Compiler.h"
26 #include "llvm/Support/SMLoc.h"
27 #include "llvm/Support/SourceMgr.h"
28 #include "llvm/Support/Unicode.h"
30 #include <algorithm>
31 #include <cassert>
32 #include <cstddef>
33 #include <cstdint>
34 #include <map>
35 #include <memory>
36 #include <string>
37 #include <system_error>
38 #include <utility>
39 
40 using namespace llvm;
41 using namespace yaml;
42 
44  UEF_UTF32_LE, ///< UTF-32 Little Endian
45  UEF_UTF32_BE, ///< UTF-32 Big Endian
46  UEF_UTF16_LE, ///< UTF-16 Little Endian
47  UEF_UTF16_BE, ///< UTF-16 Big Endian
48  UEF_UTF8, ///< UTF-8 or ascii.
49  UEF_Unknown ///< Not a valid Unicode encoding.
50 };
51 
52 /// EncodingInfo - Holds the encoding type and length of the byte order mark if
53 /// it exists. Length is in {0, 2, 3, 4}.
54 using EncodingInfo = std::pair<UnicodeEncodingForm, unsigned>;
55 
56 /// getUnicodeEncoding - Reads up to the first 4 bytes to determine the Unicode
57 /// encoding form of \a Input.
58 ///
59 /// @param Input A string of length 0 or more.
60 /// @returns An EncodingInfo indicating the Unicode encoding form of the input
61 /// and how long the byte order mark is if one exists.
63  if (Input.empty())
64  return std::make_pair(UEF_Unknown, 0);
65 
66  switch (uint8_t(Input[0])) {
67  case 0x00:
68  if (Input.size() >= 4) {
69  if ( Input[1] == 0
70  && uint8_t(Input[2]) == 0xFE
71  && uint8_t(Input[3]) == 0xFF)
72  return std::make_pair(UEF_UTF32_BE, 4);
73  if (Input[1] == 0 && Input[2] == 0 && Input[3] != 0)
74  return std::make_pair(UEF_UTF32_BE, 0);
75  }
76 
77  if (Input.size() >= 2 && Input[1] != 0)
78  return std::make_pair(UEF_UTF16_BE, 0);
79  return std::make_pair(UEF_Unknown, 0);
80  case 0xFF:
81  if ( Input.size() >= 4
82  && uint8_t(Input[1]) == 0xFE
83  && Input[2] == 0
84  && Input[3] == 0)
85  return std::make_pair(UEF_UTF32_LE, 4);
86 
87  if (Input.size() >= 2 && uint8_t(Input[1]) == 0xFE)
88  return std::make_pair(UEF_UTF16_LE, 2);
89  return std::make_pair(UEF_Unknown, 0);
90  case 0xFE:
91  if (Input.size() >= 2 && uint8_t(Input[1]) == 0xFF)
92  return std::make_pair(UEF_UTF16_BE, 2);
93  return std::make_pair(UEF_Unknown, 0);
94  case 0xEF:
95  if ( Input.size() >= 3
96  && uint8_t(Input[1]) == 0xBB
97  && uint8_t(Input[2]) == 0xBF)
98  return std::make_pair(UEF_UTF8, 3);
99  return std::make_pair(UEF_Unknown, 0);
100  }
101 
102  // It could still be utf-32 or utf-16.
103  if (Input.size() >= 4 && Input[1] == 0 && Input[2] == 0 && Input[3] == 0)
104  return std::make_pair(UEF_UTF32_LE, 0);
105 
106  if (Input.size() >= 2 && Input[1] == 0)
107  return std::make_pair(UEF_UTF16_LE, 0);
108 
109  return std::make_pair(UEF_UTF8, 0);
110 }
111 
112 /// Pin the vtables to this file.
113 void Node::anchor() {}
114 void NullNode::anchor() {}
115 void ScalarNode::anchor() {}
116 void BlockScalarNode::anchor() {}
117 void KeyValueNode::anchor() {}
118 void MappingNode::anchor() {}
119 void SequenceNode::anchor() {}
120 void AliasNode::anchor() {}
121 
122 namespace llvm {
123 namespace yaml {
124 
125 /// Token - A single YAML token.
126 struct Token {
127  enum TokenKind {
128  TK_Error, // Uninitialized token.
150  TK_Tag
151  } Kind = TK_Error;
152 
153  /// A string of length 0 or more whose begin() points to the logical location
154  /// of the token in the input.
156 
157  /// The value of a block scalar node.
158  std::string Value;
159 
160  Token() = default;
161 };
162 
163 } // end namespace yaml
164 } // end namespace llvm
165 
167 
168 namespace {
169 
170 /// This struct is used to track simple keys.
171 ///
172 /// Simple keys are handled by creating an entry in SimpleKeys for each Token
173 /// which could legally be the start of a simple key. When peekNext is called,
174 /// if the Token To be returned is referenced by a SimpleKey, we continue
175 /// tokenizing until that potential simple key has either been found to not be
176 /// a simple key (we moved on to the next line or went further than 1024 chars).
177 /// Or when we run into a Value, and then insert a Key token (and possibly
178 /// others) before the SimpleKey's Tok.
179 struct SimpleKey {
181  unsigned Column = 0;
182  unsigned Line = 0;
183  unsigned FlowLevel = 0;
184  bool IsRequired = false;
185 
186  bool operator ==(const SimpleKey &Other) {
187  return Tok == Other.Tok;
188  }
189 };
190 
191 } // end anonymous namespace
192 
193 /// The Unicode scalar value of a UTF-8 minimal well-formed code unit
194 /// subsequence and the subsequence's length in code units (uint8_t).
195 /// A length of 0 represents an error.
196 using UTF8Decoded = std::pair<uint32_t, unsigned>;
197 
199  StringRef::iterator Position= Range.begin();
200  StringRef::iterator End = Range.end();
201  // 1 byte: [0x00, 0x7f]
202  // Bit pattern: 0xxxxxxx
203  if (Position < End && (*Position & 0x80) == 0) {
204  return std::make_pair(*Position, 1);
205  }
206  // 2 bytes: [0x80, 0x7ff]
207  // Bit pattern: 110xxxxx 10xxxxxx
208  if (Position + 1 < End && ((*Position & 0xE0) == 0xC0) &&
209  ((*(Position + 1) & 0xC0) == 0x80)) {
210  uint32_t codepoint = ((*Position & 0x1F) << 6) |
211  (*(Position + 1) & 0x3F);
212  if (codepoint >= 0x80)
213  return std::make_pair(codepoint, 2);
214  }
215  // 3 bytes: [0x8000, 0xffff]
216  // Bit pattern: 1110xxxx 10xxxxxx 10xxxxxx
217  if (Position + 2 < End && ((*Position & 0xF0) == 0xE0) &&
218  ((*(Position + 1) & 0xC0) == 0x80) &&
219  ((*(Position + 2) & 0xC0) == 0x80)) {
220  uint32_t codepoint = ((*Position & 0x0F) << 12) |
221  ((*(Position + 1) & 0x3F) << 6) |
222  (*(Position + 2) & 0x3F);
223  // Codepoints between 0xD800 and 0xDFFF are invalid, as
224  // they are high / low surrogate halves used by UTF-16.
225  if (codepoint >= 0x800 &&
226  (codepoint < 0xD800 || codepoint > 0xDFFF))
227  return std::make_pair(codepoint, 3);
228  }
229  // 4 bytes: [0x10000, 0x10FFFF]
230  // Bit pattern: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
231  if (Position + 3 < End && ((*Position & 0xF8) == 0xF0) &&
232  ((*(Position + 1) & 0xC0) == 0x80) &&
233  ((*(Position + 2) & 0xC0) == 0x80) &&
234  ((*(Position + 3) & 0xC0) == 0x80)) {
235  uint32_t codepoint = ((*Position & 0x07) << 18) |
236  ((*(Position + 1) & 0x3F) << 12) |
237  ((*(Position + 2) & 0x3F) << 6) |
238  (*(Position + 3) & 0x3F);
239  if (codepoint >= 0x10000 && codepoint <= 0x10FFFF)
240  return std::make_pair(codepoint, 4);
241  }
242  return std::make_pair(0, 0);
243 }
244 
245 namespace llvm {
246 namespace yaml {
247 
248 /// Scans YAML tokens from a MemoryBuffer.
249 class Scanner {
250 public:
251  Scanner(StringRef Input, SourceMgr &SM, bool ShowColors = true,
252  std::error_code *EC = nullptr);
253  Scanner(MemoryBufferRef Buffer, SourceMgr &SM_, bool ShowColors = true,
254  std::error_code *EC = nullptr);
255 
256  /// Parse the next token and return it without popping it.
257  Token &peekNext();
258 
259  /// Parse the next token and pop it from the queue.
260  Token getNext();
261 
262  void printError(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Message,
263  ArrayRef<SMRange> Ranges = None) {
264  SM.PrintMessage(Loc, Kind, Message, Ranges, /* FixIts= */ None, ShowColors);
265  }
266 
267  void setError(const Twine &Message, StringRef::iterator Position) {
268  if (Position >= End)
269  Position = End - 1;
270 
271  // propagate the error if possible
272  if (EC)
273  *EC = make_error_code(std::errc::invalid_argument);
274 
275  // Don't print out more errors after the first one we encounter. The rest
276  // are just the result of the first, and have no meaning.
277  if (!Failed)
278  printError(SMLoc::getFromPointer(Position), SourceMgr::DK_Error, Message);
279  Failed = true;
280  }
281 
282  /// Returns true if an error occurred while parsing.
283  bool failed() {
284  return Failed;
285  }
286 
287 private:
288  void init(MemoryBufferRef Buffer);
289 
290  StringRef currentInput() {
291  return StringRef(Current, End - Current);
292  }
293 
294  /// Decode a UTF-8 minimal well-formed code unit subsequence starting
295  /// at \a Position.
296  ///
297  /// If the UTF-8 code units starting at Position do not form a well-formed
298  /// code unit subsequence, then the Unicode scalar value is 0, and the length
299  /// is 0.
301  return ::decodeUTF8(StringRef(Position, End - Position));
302  }
303 
304  // The following functions are based on the gramar rules in the YAML spec. The
305  // style of the function names it meant to closely match how they are written
306  // in the spec. The number within the [] is the number of the grammar rule in
307  // the spec.
308  //
309  // See 4.2 [Production Naming Conventions] for the meaning of the prefixes.
310  //
311  // c-
312  // A production starting and ending with a special character.
313  // b-
314  // A production matching a single line break.
315  // nb-
316  // A production starting and ending with a non-break character.
317  // s-
318  // A production starting and ending with a white space character.
319  // ns-
320  // A production starting and ending with a non-space character.
321  // l-
322  // A production matching complete line(s).
323 
324  /// Skip a single nb-char[27] starting at Position.
325  ///
326  /// A nb-char is 0x9 | [0x20-0x7E] | 0x85 | [0xA0-0xD7FF] | [0xE000-0xFEFE]
327  /// | [0xFF00-0xFFFD] | [0x10000-0x10FFFF]
328  ///
329  /// @returns The code unit after the nb-char, or Position if it's not an
330  /// nb-char.
331  StringRef::iterator skip_nb_char(StringRef::iterator Position);
332 
333  /// Skip a single b-break[28] starting at Position.
334  ///
335  /// A b-break is 0xD 0xA | 0xD | 0xA
336  ///
337  /// @returns The code unit after the b-break, or Position if it's not a
338  /// b-break.
339  StringRef::iterator skip_b_break(StringRef::iterator Position);
340 
341  /// Skip a single s-space[31] starting at Position.
342  ///
343  /// An s-space is 0x20
344  ///
345  /// @returns The code unit after the s-space, or Position if it's not a
346  /// s-space.
347  StringRef::iterator skip_s_space(StringRef::iterator Position);
348 
349  /// Skip a single s-white[33] starting at Position.
350  ///
351  /// A s-white is 0x20 | 0x9
352  ///
353  /// @returns The code unit after the s-white, or Position if it's not a
354  /// s-white.
355  StringRef::iterator skip_s_white(StringRef::iterator Position);
356 
357  /// Skip a single ns-char[34] starting at Position.
358  ///
359  /// A ns-char is nb-char - s-white
360  ///
361  /// @returns The code unit after the ns-char, or Position if it's not a
362  /// ns-char.
363  StringRef::iterator skip_ns_char(StringRef::iterator Position);
364 
365  using SkipWhileFunc = StringRef::iterator (Scanner::*)(StringRef::iterator);
366 
367  /// Skip minimal well-formed code unit subsequences until Func
368  /// returns its input.
369  ///
370  /// @returns The code unit after the last minimal well-formed code unit
371  /// subsequence that Func accepted.
372  StringRef::iterator skip_while( SkipWhileFunc Func
373  , StringRef::iterator Position);
374 
375  /// Skip minimal well-formed code unit subsequences until Func returns its
376  /// input.
377  void advanceWhile(SkipWhileFunc Func);
378 
379  /// Scan ns-uri-char[39]s starting at Cur.
380  ///
381  /// This updates Cur and Column while scanning.
382  void scan_ns_uri_char();
383 
384  /// Consume a minimal well-formed code unit subsequence starting at
385  /// \a Cur. Return false if it is not the same Unicode scalar value as
386  /// \a Expected. This updates \a Column.
387  bool consume(uint32_t Expected);
388 
389  /// Skip \a Distance UTF-8 code units. Updates \a Cur and \a Column.
390  void skip(uint32_t Distance);
391 
392  /// Return true if the minimal well-formed code unit subsequence at
393  /// Pos is whitespace or a new line
394  bool isBlankOrBreak(StringRef::iterator Position);
395 
396  /// Consume a single b-break[28] if it's present at the current position.
397  ///
398  /// Return false if the code unit at the current position isn't a line break.
399  bool consumeLineBreakIfPresent();
400 
401  /// If IsSimpleKeyAllowed, create and push_back a new SimpleKey.
402  void saveSimpleKeyCandidate( TokenQueueT::iterator Tok
403  , unsigned AtColumn
404  , bool IsRequired);
405 
406  /// Remove simple keys that can no longer be valid simple keys.
407  ///
408  /// Invalid simple keys are not on the current line or are further than 1024
409  /// columns back.
410  void removeStaleSimpleKeyCandidates();
411 
412  /// Remove all simple keys on FlowLevel \a Level.
413  void removeSimpleKeyCandidatesOnFlowLevel(unsigned Level);
414 
415  /// Unroll indentation in \a Indents back to \a Col. Creates BlockEnd
416  /// tokens if needed.
417  bool unrollIndent(int ToColumn);
418 
419  /// Increase indent to \a Col. Creates \a Kind token at \a InsertPoint
420  /// if needed.
421  bool rollIndent( int ToColumn
423  , TokenQueueT::iterator InsertPoint);
424 
425  /// Skip a single-line comment when the comment starts at the current
426  /// position of the scanner.
427  void skipComment();
428 
429  /// Skip whitespace and comments until the start of the next token.
430  void scanToNextToken();
431 
432  /// Must be the first token generated.
433  bool scanStreamStart();
434 
435  /// Generate tokens needed to close out the stream.
436  bool scanStreamEnd();
437 
438  /// Scan a %BLAH directive.
439  bool scanDirective();
440 
441  /// Scan a ... or ---.
442  bool scanDocumentIndicator(bool IsStart);
443 
444  /// Scan a [ or { and generate the proper flow collection start token.
445  bool scanFlowCollectionStart(bool IsSequence);
446 
447  /// Scan a ] or } and generate the proper flow collection end token.
448  bool scanFlowCollectionEnd(bool IsSequence);
449 
450  /// Scan the , that separates entries in a flow collection.
451  bool scanFlowEntry();
452 
453  /// Scan the - that starts block sequence entries.
454  bool scanBlockEntry();
455 
456  /// Scan an explicit ? indicating a key.
457  bool scanKey();
458 
459  /// Scan an explicit : indicating a value.
460  bool scanValue();
461 
462  /// Scan a quoted scalar.
463  bool scanFlowScalar(bool IsDoubleQuoted);
464 
465  /// Scan an unquoted scalar.
466  bool scanPlainScalar();
467 
468  /// Scan an Alias or Anchor starting with * or &.
469  bool scanAliasOrAnchor(bool IsAlias);
470 
471  /// Scan a block scalar starting with | or >.
472  bool scanBlockScalar(bool IsLiteral);
473 
474  /// Scan a chomping indicator in a block scalar header.
475  char scanBlockChompingIndicator();
476 
477  /// Scan an indentation indicator in a block scalar header.
478  unsigned scanBlockIndentationIndicator();
479 
480  /// Scan a block scalar header.
481  ///
482  /// Return false if an error occurred.
483  bool scanBlockScalarHeader(char &ChompingIndicator, unsigned &IndentIndicator,
484  bool &IsDone);
485 
486  /// Look for the indentation level of a block scalar.
487  ///
488  /// Return false if an error occurred.
489  bool findBlockScalarIndent(unsigned &BlockIndent, unsigned BlockExitIndent,
490  unsigned &LineBreaks, bool &IsDone);
491 
492  /// Scan the indentation of a text line in a block scalar.
493  ///
494  /// Return false if an error occurred.
495  bool scanBlockScalarIndent(unsigned BlockIndent, unsigned BlockExitIndent,
496  bool &IsDone);
497 
498  /// Scan a tag of the form !stuff.
499  bool scanTag();
500 
501  /// Dispatch to the next scanning function based on \a *Cur.
502  bool fetchMoreTokens();
503 
504  /// The SourceMgr used for diagnostics and buffer management.
505  SourceMgr &SM;
506 
507  /// The original input.
508  MemoryBufferRef InputBuffer;
509 
510  /// The current position of the scanner.
511  StringRef::iterator Current;
512 
513  /// The end of the input (one past the last character).
515 
516  /// Current YAML indentation level in spaces.
517  int Indent;
518 
519  /// Current column number in Unicode code points.
520  unsigned Column;
521 
522  /// Current line number.
523  unsigned Line;
524 
525  /// How deep we are in flow style containers. 0 Means at block level.
526  unsigned FlowLevel;
527 
528  /// Are we at the start of the stream?
529  bool IsStartOfStream;
530 
531  /// Can the next token be the start of a simple key?
532  bool IsSimpleKeyAllowed;
533 
534  /// True if an error has occurred.
535  bool Failed;
536 
537  /// Should colors be used when printing out the diagnostic messages?
538  bool ShowColors;
539 
540  /// Queue of tokens. This is required to queue up tokens while looking
541  /// for the end of a simple key. And for cases where a single character
542  /// can produce multiple tokens (e.g. BlockEnd).
543  TokenQueueT TokenQueue;
544 
545  /// Indentation levels.
546  SmallVector<int, 4> Indents;
547 
548  /// Potential simple keys.
549  SmallVector<SimpleKey, 4> SimpleKeys;
550 
551  std::error_code *EC;
552 };
553 
554 } // end namespace yaml
555 } // end namespace llvm
556 
557 /// encodeUTF8 - Encode \a UnicodeScalarValue in UTF-8 and append it to result.
558 static void encodeUTF8( uint32_t UnicodeScalarValue
559  , SmallVectorImpl<char> &Result) {
560  if (UnicodeScalarValue <= 0x7F) {
561  Result.push_back(UnicodeScalarValue & 0x7F);
562  } else if (UnicodeScalarValue <= 0x7FF) {
563  uint8_t FirstByte = 0xC0 | ((UnicodeScalarValue & 0x7C0) >> 6);
564  uint8_t SecondByte = 0x80 | (UnicodeScalarValue & 0x3F);
565  Result.push_back(FirstByte);
566  Result.push_back(SecondByte);
567  } else if (UnicodeScalarValue <= 0xFFFF) {
568  uint8_t FirstByte = 0xE0 | ((UnicodeScalarValue & 0xF000) >> 12);
569  uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6);
570  uint8_t ThirdByte = 0x80 | (UnicodeScalarValue & 0x3F);
571  Result.push_back(FirstByte);
572  Result.push_back(SecondByte);
573  Result.push_back(ThirdByte);
574  } else if (UnicodeScalarValue <= 0x10FFFF) {
575  uint8_t FirstByte = 0xF0 | ((UnicodeScalarValue & 0x1F0000) >> 18);
576  uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0x3F000) >> 12);
577  uint8_t ThirdByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6);
578  uint8_t FourthByte = 0x80 | (UnicodeScalarValue & 0x3F);
579  Result.push_back(FirstByte);
580  Result.push_back(SecondByte);
581  Result.push_back(ThirdByte);
582  Result.push_back(FourthByte);
583  }
584 }
585 
587  SourceMgr SM;
588  Scanner scanner(Input, SM);
589  while (true) {
590  Token T = scanner.getNext();
591  switch (T.Kind) {
593  OS << "Stream-Start: ";
594  break;
595  case Token::TK_StreamEnd:
596  OS << "Stream-End: ";
597  break;
599  OS << "Version-Directive: ";
600  break;
602  OS << "Tag-Directive: ";
603  break;
605  OS << "Document-Start: ";
606  break;
608  OS << "Document-End: ";
609  break;
611  OS << "Block-Entry: ";
612  break;
613  case Token::TK_BlockEnd:
614  OS << "Block-End: ";
615  break;
617  OS << "Block-Sequence-Start: ";
618  break;
620  OS << "Block-Mapping-Start: ";
621  break;
622  case Token::TK_FlowEntry:
623  OS << "Flow-Entry: ";
624  break;
626  OS << "Flow-Sequence-Start: ";
627  break;
629  OS << "Flow-Sequence-End: ";
630  break;
632  OS << "Flow-Mapping-Start: ";
633  break;
635  OS << "Flow-Mapping-End: ";
636  break;
637  case Token::TK_Key:
638  OS << "Key: ";
639  break;
640  case Token::TK_Value:
641  OS << "Value: ";
642  break;
643  case Token::TK_Scalar:
644  OS << "Scalar: ";
645  break;
647  OS << "Block Scalar: ";
648  break;
649  case Token::TK_Alias:
650  OS << "Alias: ";
651  break;
652  case Token::TK_Anchor:
653  OS << "Anchor: ";
654  break;
655  case Token::TK_Tag:
656  OS << "Tag: ";
657  break;
658  case Token::TK_Error:
659  break;
660  }
661  OS << T.Range << "\n";
662  if (T.Kind == Token::TK_StreamEnd)
663  break;
664  else if (T.Kind == Token::TK_Error)
665  return false;
666  }
667  return true;
668 }
669 
671  SourceMgr SM;
672  Scanner scanner(Input, SM);
673  while (true) {
674  Token T = scanner.getNext();
675  if (T.Kind == Token::TK_StreamEnd)
676  break;
677  else if (T.Kind == Token::TK_Error)
678  return false;
679  }
680  return true;
681 }
682 
683 std::string yaml::escape(StringRef Input, bool EscapePrintable) {
684  std::string EscapedInput;
685  for (StringRef::iterator i = Input.begin(), e = Input.end(); i != e; ++i) {
686  if (*i == '\\')
687  EscapedInput += "\\\\";
688  else if (*i == '"')
689  EscapedInput += "\\\"";
690  else if (*i == 0)
691  EscapedInput += "\\0";
692  else if (*i == 0x07)
693  EscapedInput += "\\a";
694  else if (*i == 0x08)
695  EscapedInput += "\\b";
696  else if (*i == 0x09)
697  EscapedInput += "\\t";
698  else if (*i == 0x0A)
699  EscapedInput += "\\n";
700  else if (*i == 0x0B)
701  EscapedInput += "\\v";
702  else if (*i == 0x0C)
703  EscapedInput += "\\f";
704  else if (*i == 0x0D)
705  EscapedInput += "\\r";
706  else if (*i == 0x1B)
707  EscapedInput += "\\e";
708  else if ((unsigned char)*i < 0x20) { // Control characters not handled above.
709  std::string HexStr = utohexstr(*i);
710  EscapedInput += "\\x" + std::string(2 - HexStr.size(), '0') + HexStr;
711  } else if (*i & 0x80) { // UTF-8 multiple code unit subsequence.
712  UTF8Decoded UnicodeScalarValue
713  = decodeUTF8(StringRef(i, Input.end() - i));
714  if (UnicodeScalarValue.second == 0) {
715  // Found invalid char.
716  SmallString<4> Val;
717  encodeUTF8(0xFFFD, Val);
718  llvm::append_range(EscapedInput, Val);
719  // FIXME: Error reporting.
720  return EscapedInput;
721  }
722  if (UnicodeScalarValue.first == 0x85)
723  EscapedInput += "\\N";
724  else if (UnicodeScalarValue.first == 0xA0)
725  EscapedInput += "\\_";
726  else if (UnicodeScalarValue.first == 0x2028)
727  EscapedInput += "\\L";
728  else if (UnicodeScalarValue.first == 0x2029)
729  EscapedInput += "\\P";
730  else if (!EscapePrintable &&
731  sys::unicode::isPrintable(UnicodeScalarValue.first))
732  EscapedInput += StringRef(i, UnicodeScalarValue.second);
733  else {
734  std::string HexStr = utohexstr(UnicodeScalarValue.first);
735  if (HexStr.size() <= 2)
736  EscapedInput += "\\x" + std::string(2 - HexStr.size(), '0') + HexStr;
737  else if (HexStr.size() <= 4)
738  EscapedInput += "\\u" + std::string(4 - HexStr.size(), '0') + HexStr;
739  else if (HexStr.size() <= 8)
740  EscapedInput += "\\U" + std::string(8 - HexStr.size(), '0') + HexStr;
741  }
742  i += UnicodeScalarValue.second - 1;
743  } else
744  EscapedInput.push_back(*i);
745  }
746  return EscapedInput;
747 }
748 
750  switch (S.size()) {
751  case 1:
752  switch (S.front()) {
753  case 'y':
754  case 'Y':
755  return true;
756  case 'n':
757  case 'N':
758  return false;
759  default:
760  return None;
761  }
762  case 2:
763  switch (S.front()) {
764  case 'O':
765  if (S[1] == 'N') // ON
766  return true;
768  case 'o':
769  if (S[1] == 'n') //[Oo]n
770  return true;
771  return None;
772  case 'N':
773  if (S[1] == 'O') // NO
774  return false;
776  case 'n':
777  if (S[1] == 'o') //[Nn]o
778  return false;
779  return None;
780  default:
781  return None;
782  }
783  case 3:
784  switch (S.front()) {
785  case 'O':
786  if (S.drop_front() == "FF") // OFF
787  return false;
789  case 'o':
790  if (S.drop_front() == "ff") //[Oo]ff
791  return false;
792  return None;
793  case 'Y':
794  if (S.drop_front() == "ES") // YES
795  return true;
797  case 'y':
798  if (S.drop_front() == "es") //[Yy]es
799  return true;
800  return None;
801  default:
802  return None;
803  }
804  case 4:
805  switch (S.front()) {
806  case 'T':
807  if (S.drop_front() == "RUE") // TRUE
808  return true;
810  case 't':
811  if (S.drop_front() == "rue") //[Tt]rue
812  return true;
813  return None;
814  default:
815  return None;
816  }
817  case 5:
818  switch (S.front()) {
819  case 'F':
820  if (S.drop_front() == "ALSE") // FALSE
821  return false;
823  case 'f':
824  if (S.drop_front() == "alse") //[Ff]alse
825  return false;
826  return None;
827  default:
828  return None;
829  }
830  default:
831  return None;
832  }
833 }
834 
835 Scanner::Scanner(StringRef Input, SourceMgr &sm, bool ShowColors,
836  std::error_code *EC)
837  : SM(sm), ShowColors(ShowColors), EC(EC) {
838  init(MemoryBufferRef(Input, "YAML"));
839 }
840 
841 Scanner::Scanner(MemoryBufferRef Buffer, SourceMgr &SM_, bool ShowColors,
842  std::error_code *EC)
843  : SM(SM_), ShowColors(ShowColors), EC(EC) {
844  init(Buffer);
845 }
846 
847 void Scanner::init(MemoryBufferRef Buffer) {
848  InputBuffer = Buffer;
849  Current = InputBuffer.getBufferStart();
850  End = InputBuffer.getBufferEnd();
851  Indent = -1;
852  Column = 0;
853  Line = 0;
854  FlowLevel = 0;
855  IsStartOfStream = true;
856  IsSimpleKeyAllowed = true;
857  Failed = false;
858  std::unique_ptr<MemoryBuffer> InputBufferOwner =
859  MemoryBuffer::getMemBuffer(Buffer, /*RequiresNullTerminator=*/false);
860  SM.AddNewSourceBuffer(std::move(InputBufferOwner), SMLoc());
861 }
862 
864  // If the current token is a possible simple key, keep parsing until we
865  // can confirm.
866  bool NeedMore = false;
867  while (true) {
868  if (TokenQueue.empty() || NeedMore) {
869  if (!fetchMoreTokens()) {
870  TokenQueue.clear();
871  SimpleKeys.clear();
872  TokenQueue.push_back(Token());
873  return TokenQueue.front();
874  }
875  }
876  assert(!TokenQueue.empty() &&
877  "fetchMoreTokens lied about getting tokens!");
878 
879  removeStaleSimpleKeyCandidates();
880  SimpleKey SK;
881  SK.Tok = TokenQueue.begin();
882  if (!is_contained(SimpleKeys, SK))
883  break;
884  else
885  NeedMore = true;
886  }
887  return TokenQueue.front();
888 }
889 
891  Token Ret = peekNext();
892  // TokenQueue can be empty if there was an error getting the next token.
893  if (!TokenQueue.empty())
894  TokenQueue.pop_front();
895 
896  // There cannot be any referenced Token's if the TokenQueue is empty. So do a
897  // quick deallocation of them all.
898  if (TokenQueue.empty())
899  TokenQueue.resetAlloc();
900 
901  return Ret;
902 }
903 
904 StringRef::iterator Scanner::skip_nb_char(StringRef::iterator Position) {
905  if (Position == End)
906  return Position;
907  // Check 7 bit c-printable - b-char.
908  if ( *Position == 0x09
909  || (*Position >= 0x20 && *Position <= 0x7E))
910  return Position + 1;
911 
912  // Check for valid UTF-8.
913  if (uint8_t(*Position) & 0x80) {
914  UTF8Decoded u8d = decodeUTF8(Position);
915  if ( u8d.second != 0
916  && u8d.first != 0xFEFF
917  && ( u8d.first == 0x85
918  || ( u8d.first >= 0xA0
919  && u8d.first <= 0xD7FF)
920  || ( u8d.first >= 0xE000
921  && u8d.first <= 0xFFFD)
922  || ( u8d.first >= 0x10000
923  && u8d.first <= 0x10FFFF)))
924  return Position + u8d.second;
925  }
926  return Position;
927 }
928 
929 StringRef::iterator Scanner::skip_b_break(StringRef::iterator Position) {
930  if (Position == End)
931  return Position;
932  if (*Position == 0x0D) {
933  if (Position + 1 != End && *(Position + 1) == 0x0A)
934  return Position + 2;
935  return Position + 1;
936  }
937 
938  if (*Position == 0x0A)
939  return Position + 1;
940  return Position;
941 }
942 
943 StringRef::iterator Scanner::skip_s_space(StringRef::iterator Position) {
944  if (Position == End)
945  return Position;
946  if (*Position == ' ')
947  return Position + 1;
948  return Position;
949 }
950 
951 StringRef::iterator Scanner::skip_s_white(StringRef::iterator Position) {
952  if (Position == End)
953  return Position;
954  if (*Position == ' ' || *Position == '\t')
955  return Position + 1;
956  return Position;
957 }
958 
959 StringRef::iterator Scanner::skip_ns_char(StringRef::iterator Position) {
960  if (Position == End)
961  return Position;
962  if (*Position == ' ' || *Position == '\t')
963  return Position;
964  return skip_nb_char(Position);
965 }
966 
967 StringRef::iterator Scanner::skip_while( SkipWhileFunc Func
968  , StringRef::iterator Position) {
969  while (true) {
970  StringRef::iterator i = (this->*Func)(Position);
971  if (i == Position)
972  break;
973  Position = i;
974  }
975  return Position;
976 }
977 
978 void Scanner::advanceWhile(SkipWhileFunc Func) {
979  auto Final = skip_while(Func, Current);
980  Column += Final - Current;
981  Current = Final;
982 }
983 
984 static bool is_ns_hex_digit(const char C) { return isAlnum(C); }
985 
986 static bool is_ns_word_char(const char C) { return C == '-' || isAlpha(C); }
987 
988 void Scanner::scan_ns_uri_char() {
989  while (true) {
990  if (Current == End)
991  break;
992  if (( *Current == '%'
993  && Current + 2 < End
994  && is_ns_hex_digit(*(Current + 1))
995  && is_ns_hex_digit(*(Current + 2)))
996  || is_ns_word_char(*Current)
997  || StringRef(Current, 1).find_first_of("#;/?:@&=+$,_.!~*'()[]")
998  != StringRef::npos) {
999  ++Current;
1000  ++Column;
1001  } else
1002  break;
1003  }
1004 }
1005 
1006 bool Scanner::consume(uint32_t Expected) {
1007  if (Expected >= 0x80) {
1008  setError("Cannot consume non-ascii characters", Current);
1009  return false;
1010  }
1011  if (Current == End)
1012  return false;
1013  if (uint8_t(*Current) >= 0x80) {
1014  setError("Cannot consume non-ascii characters", Current);
1015  return false;
1016  }
1017  if (uint8_t(*Current) == Expected) {
1018  ++Current;
1019  ++Column;
1020  return true;
1021  }
1022  return false;
1023 }
1024 
1025 void Scanner::skip(uint32_t Distance) {
1026  Current += Distance;
1027  Column += Distance;
1028  assert(Current <= End && "Skipped past the end");
1029 }
1030 
1031 bool Scanner::isBlankOrBreak(StringRef::iterator Position) {
1032  if (Position == End)
1033  return false;
1034  return *Position == ' ' || *Position == '\t' || *Position == '\r' ||
1035  *Position == '\n';
1036 }
1037 
1038 bool Scanner::consumeLineBreakIfPresent() {
1039  auto Next = skip_b_break(Current);
1040  if (Next == Current)
1041  return false;
1042  Column = 0;
1043  ++Line;
1044  Current = Next;
1045  return true;
1046 }
1047 
1048 void Scanner::saveSimpleKeyCandidate( TokenQueueT::iterator Tok
1049  , unsigned AtColumn
1050  , bool IsRequired) {
1051  if (IsSimpleKeyAllowed) {
1052  SimpleKey SK;
1053  SK.Tok = Tok;
1054  SK.Line = Line;
1055  SK.Column = AtColumn;
1056  SK.IsRequired = IsRequired;
1057  SK.FlowLevel = FlowLevel;
1058  SimpleKeys.push_back(SK);
1059  }
1060 }
1061 
1062 void Scanner::removeStaleSimpleKeyCandidates() {
1063  for (SmallVectorImpl<SimpleKey>::iterator i = SimpleKeys.begin();
1064  i != SimpleKeys.end();) {
1065  if (i->Line != Line || i->Column + 1024 < Column) {
1066  if (i->IsRequired)
1067  setError( "Could not find expected : for simple key"
1068  , i->Tok->Range.begin());
1069  i = SimpleKeys.erase(i);
1070  } else
1071  ++i;
1072  }
1073 }
1074 
1075 void Scanner::removeSimpleKeyCandidatesOnFlowLevel(unsigned Level) {
1076  if (!SimpleKeys.empty() && (SimpleKeys.end() - 1)->FlowLevel == Level)
1077  SimpleKeys.pop_back();
1078 }
1079 
1080 bool Scanner::unrollIndent(int ToColumn) {
1081  Token T;
1082  // Indentation is ignored in flow.
1083  if (FlowLevel != 0)
1084  return true;
1085 
1086  while (Indent > ToColumn) {
1087  T.Kind = Token::TK_BlockEnd;
1088  T.Range = StringRef(Current, 1);
1089  TokenQueue.push_back(T);
1090  Indent = Indents.pop_back_val();
1091  }
1092 
1093  return true;
1094 }
1095 
1096 bool Scanner::rollIndent( int ToColumn
1098  , TokenQueueT::iterator InsertPoint) {
1099  if (FlowLevel)
1100  return true;
1101  if (Indent < ToColumn) {
1102  Indents.push_back(Indent);
1103  Indent = ToColumn;
1104 
1105  Token T;
1106  T.Kind = Kind;
1107  T.Range = StringRef(Current, 0);
1108  TokenQueue.insert(InsertPoint, T);
1109  }
1110  return true;
1111 }
1112 
1113 void Scanner::skipComment() {
1114  if (Current == End || *Current != '#')
1115  return;
1116  while (true) {
1117  // This may skip more than one byte, thus Column is only incremented
1118  // for code points.
1119  StringRef::iterator I = skip_nb_char(Current);
1120  if (I == Current)
1121  break;
1122  Current = I;
1123  ++Column;
1124  }
1125 }
1126 
1127 void Scanner::scanToNextToken() {
1128  while (true) {
1129  while (Current != End && (*Current == ' ' || *Current == '\t')) {
1130  skip(1);
1131  }
1132 
1133  skipComment();
1134 
1135  // Skip EOL.
1136  StringRef::iterator i = skip_b_break(Current);
1137  if (i == Current)
1138  break;
1139  Current = i;
1140  ++Line;
1141  Column = 0;
1142  // New lines may start a simple key.
1143  if (!FlowLevel)
1144  IsSimpleKeyAllowed = true;
1145  }
1146 }
1147 
1148 bool Scanner::scanStreamStart() {
1149  IsStartOfStream = false;
1150 
1151  EncodingInfo EI = getUnicodeEncoding(currentInput());
1152 
1153  Token T;
1154  T.Kind = Token::TK_StreamStart;
1155  T.Range = StringRef(Current, EI.second);
1156  TokenQueue.push_back(T);
1157  Current += EI.second;
1158  return true;
1159 }
1160 
1161 bool Scanner::scanStreamEnd() {
1162  // Force an ending new line if one isn't present.
1163  if (Column != 0) {
1164  Column = 0;
1165  ++Line;
1166  }
1167 
1168  unrollIndent(-1);
1169  SimpleKeys.clear();
1170  IsSimpleKeyAllowed = false;
1171 
1172  Token T;
1173  T.Kind = Token::TK_StreamEnd;
1174  T.Range = StringRef(Current, 0);
1175  TokenQueue.push_back(T);
1176  return true;
1177 }
1178 
1179 bool Scanner::scanDirective() {
1180  // Reset the indentation level.
1181  unrollIndent(-1);
1182  SimpleKeys.clear();
1183  IsSimpleKeyAllowed = false;
1184 
1185  StringRef::iterator Start = Current;
1186  consume('%');
1187  StringRef::iterator NameStart = Current;
1188  Current = skip_while(&Scanner::skip_ns_char, Current);
1189  StringRef Name(NameStart, Current - NameStart);
1190  Current = skip_while(&Scanner::skip_s_white, Current);
1191 
1192  Token T;
1193  if (Name == "YAML") {
1194  Current = skip_while(&Scanner::skip_ns_char, Current);
1196  T.Range = StringRef(Start, Current - Start);
1197  TokenQueue.push_back(T);
1198  return true;
1199  } else if(Name == "TAG") {
1200  Current = skip_while(&Scanner::skip_ns_char, Current);
1201  Current = skip_while(&Scanner::skip_s_white, Current);
1202  Current = skip_while(&Scanner::skip_ns_char, Current);
1203  T.Kind = Token::TK_TagDirective;
1204  T.Range = StringRef(Start, Current - Start);
1205  TokenQueue.push_back(T);
1206  return true;
1207  }
1208  return false;
1209 }
1210 
1211 bool Scanner::scanDocumentIndicator(bool IsStart) {
1212  unrollIndent(-1);
1213  SimpleKeys.clear();
1214  IsSimpleKeyAllowed = false;
1215 
1216  Token T;
1217  T.Kind = IsStart ? Token::TK_DocumentStart : Token::TK_DocumentEnd;
1218  T.Range = StringRef(Current, 3);
1219  skip(3);
1220  TokenQueue.push_back(T);
1221  return true;
1222 }
1223 
1224 bool Scanner::scanFlowCollectionStart(bool IsSequence) {
1225  Token T;
1226  T.Kind = IsSequence ? Token::TK_FlowSequenceStart
1228  T.Range = StringRef(Current, 1);
1229  skip(1);
1230  TokenQueue.push_back(T);
1231 
1232  // [ and { may begin a simple key.
1233  saveSimpleKeyCandidate(--TokenQueue.end(), Column - 1, false);
1234 
1235  // And may also be followed by a simple key.
1236  IsSimpleKeyAllowed = true;
1237  ++FlowLevel;
1238  return true;
1239 }
1240 
1241 bool Scanner::scanFlowCollectionEnd(bool IsSequence) {
1242  removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
1243  IsSimpleKeyAllowed = false;
1244  Token T;
1245  T.Kind = IsSequence ? Token::TK_FlowSequenceEnd
1247  T.Range = StringRef(Current, 1);
1248  skip(1);
1249  TokenQueue.push_back(T);
1250  if (FlowLevel)
1251  --FlowLevel;
1252  return true;
1253 }
1254 
1255 bool Scanner::scanFlowEntry() {
1256  removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
1257  IsSimpleKeyAllowed = true;
1258  Token T;
1259  T.Kind = Token::TK_FlowEntry;
1260  T.Range = StringRef(Current, 1);
1261  skip(1);
1262  TokenQueue.push_back(T);
1263  return true;
1264 }
1265 
1266 bool Scanner::scanBlockEntry() {
1267  rollIndent(Column, Token::TK_BlockSequenceStart, TokenQueue.end());
1268  removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
1269  IsSimpleKeyAllowed = true;
1270  Token T;
1271  T.Kind = Token::TK_BlockEntry;
1272  T.Range = StringRef(Current, 1);
1273  skip(1);
1274  TokenQueue.push_back(T);
1275  return true;
1276 }
1277 
1278 bool Scanner::scanKey() {
1279  if (!FlowLevel)
1280  rollIndent(Column, Token::TK_BlockMappingStart, TokenQueue.end());
1281 
1282  removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
1283  IsSimpleKeyAllowed = !FlowLevel;
1284 
1285  Token T;
1286  T.Kind = Token::TK_Key;
1287  T.Range = StringRef(Current, 1);
1288  skip(1);
1289  TokenQueue.push_back(T);
1290  return true;
1291 }
1292 
1293 bool Scanner::scanValue() {
1294  // If the previous token could have been a simple key, insert the key token
1295  // into the token queue.
1296  if (!SimpleKeys.empty()) {
1297  SimpleKey SK = SimpleKeys.pop_back_val();
1298  Token T;
1299  T.Kind = Token::TK_Key;
1300  T.Range = SK.Tok->Range;
1302  for (i = TokenQueue.begin(), e = TokenQueue.end(); i != e; ++i) {
1303  if (i == SK.Tok)
1304  break;
1305  }
1306  if (i == e) {
1307  Failed = true;
1308  return false;
1309  }
1310  i = TokenQueue.insert(i, T);
1311 
1312  // We may also need to add a Block-Mapping-Start token.
1313  rollIndent(SK.Column, Token::TK_BlockMappingStart, i);
1314 
1315  IsSimpleKeyAllowed = false;
1316  } else {
1317  if (!FlowLevel)
1318  rollIndent(Column, Token::TK_BlockMappingStart, TokenQueue.end());
1319  IsSimpleKeyAllowed = !FlowLevel;
1320  }
1321 
1322  Token T;
1323  T.Kind = Token::TK_Value;
1324  T.Range = StringRef(Current, 1);
1325  skip(1);
1326  TokenQueue.push_back(T);
1327  return true;
1328 }
1329 
1330 // Forbidding inlining improves performance by roughly 20%.
1331 // FIXME: Remove once llvm optimizes this to the faster version without hints.
1332 LLVM_ATTRIBUTE_NOINLINE static bool
1334 
1335 // Returns whether a character at 'Position' was escaped with a leading '\'.
1336 // 'First' specifies the position of the first character in the string.
1338  StringRef::iterator Position) {
1339  assert(Position - 1 >= First);
1340  StringRef::iterator I = Position - 1;
1341  // We calculate the number of consecutive '\'s before the current position
1342  // by iterating backwards through our string.
1343  while (I >= First && *I == '\\') --I;
1344  // (Position - 1 - I) now contains the number of '\'s before the current
1345  // position. If it is odd, the character at 'Position' was escaped.
1346  return (Position - 1 - I) % 2 == 1;
1347 }
1348 
1349 bool Scanner::scanFlowScalar(bool IsDoubleQuoted) {
1350  StringRef::iterator Start = Current;
1351  unsigned ColStart = Column;
1352  if (IsDoubleQuoted) {
1353  do {
1354  ++Current;
1355  while (Current != End && *Current != '"')
1356  ++Current;
1357  // Repeat until the previous character was not a '\' or was an escaped
1358  // backslash.
1359  } while ( Current != End
1360  && *(Current - 1) == '\\'
1361  && wasEscaped(Start + 1, Current));
1362  } else {
1363  skip(1);
1364  while (Current != End) {
1365  // Skip a ' followed by another '.
1366  if (Current + 1 < End && *Current == '\'' && *(Current + 1) == '\'') {
1367  skip(2);
1368  continue;
1369  } else if (*Current == '\'')
1370  break;
1371  StringRef::iterator i = skip_nb_char(Current);
1372  if (i == Current) {
1373  i = skip_b_break(Current);
1374  if (i == Current)
1375  break;
1376  Current = i;
1377  Column = 0;
1378  ++Line;
1379  } else {
1380  if (i == End)
1381  break;
1382  Current = i;
1383  ++Column;
1384  }
1385  }
1386  }
1387 
1388  if (Current == End) {
1389  setError("Expected quote at end of scalar", Current);
1390  return false;
1391  }
1392 
1393  skip(1); // Skip ending quote.
1394  Token T;
1395  T.Kind = Token::TK_Scalar;
1396  T.Range = StringRef(Start, Current - Start);
1397  TokenQueue.push_back(T);
1398 
1399  saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false);
1400 
1401  IsSimpleKeyAllowed = false;
1402 
1403  return true;
1404 }
1405 
1406 bool Scanner::scanPlainScalar() {
1407  StringRef::iterator Start = Current;
1408  unsigned ColStart = Column;
1409  unsigned LeadingBlanks = 0;
1410  assert(Indent >= -1 && "Indent must be >= -1 !");
1411  unsigned indent = static_cast<unsigned>(Indent + 1);
1412  while (Current != End) {
1413  if (*Current == '#')
1414  break;
1415 
1416  while (Current != End && !isBlankOrBreak(Current)) {
1417  if (FlowLevel && *Current == ':' &&
1418  (Current + 1 == End ||
1419  !(isBlankOrBreak(Current + 1) || *(Current + 1) == ','))) {
1420  setError("Found unexpected ':' while scanning a plain scalar", Current);
1421  return false;
1422  }
1423 
1424  // Check for the end of the plain scalar.
1425  if ( (*Current == ':' && isBlankOrBreak(Current + 1))
1426  || ( FlowLevel
1427  && (StringRef(Current, 1).find_first_of(",:?[]{}")
1428  != StringRef::npos)))
1429  break;
1430 
1431  StringRef::iterator i = skip_nb_char(Current);
1432  if (i == Current)
1433  break;
1434  Current = i;
1435  ++Column;
1436  }
1437 
1438  // Are we at the end?
1439  if (!isBlankOrBreak(Current))
1440  break;
1441 
1442  // Eat blanks.
1443  StringRef::iterator Tmp = Current;
1444  while (isBlankOrBreak(Tmp)) {
1445  StringRef::iterator i = skip_s_white(Tmp);
1446  if (i != Tmp) {
1447  if (LeadingBlanks && (Column < indent) && *Tmp == '\t') {
1448  setError("Found invalid tab character in indentation", Tmp);
1449  return false;
1450  }
1451  Tmp = i;
1452  ++Column;
1453  } else {
1454  i = skip_b_break(Tmp);
1455  if (!LeadingBlanks)
1456  LeadingBlanks = 1;
1457  Tmp = i;
1458  Column = 0;
1459  ++Line;
1460  }
1461  }
1462 
1463  if (!FlowLevel && Column < indent)
1464  break;
1465 
1466  Current = Tmp;
1467  }
1468  if (Start == Current) {
1469  setError("Got empty plain scalar", Start);
1470  return false;
1471  }
1472  Token T;
1473  T.Kind = Token::TK_Scalar;
1474  T.Range = StringRef(Start, Current - Start);
1475  TokenQueue.push_back(T);
1476 
1477  // Plain scalars can be simple keys.
1478  saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false);
1479 
1480  IsSimpleKeyAllowed = false;
1481 
1482  return true;
1483 }
1484 
1485 bool Scanner::scanAliasOrAnchor(bool IsAlias) {
1486  StringRef::iterator Start = Current;
1487  unsigned ColStart = Column;
1488  skip(1);
1489  while (Current != End) {
1490  if ( *Current == '[' || *Current == ']'
1491  || *Current == '{' || *Current == '}'
1492  || *Current == ','
1493  || *Current == ':')
1494  break;
1495  StringRef::iterator i = skip_ns_char(Current);
1496  if (i == Current)
1497  break;
1498  Current = i;
1499  ++Column;
1500  }
1501 
1502  if (Start + 1 == Current) {
1503  setError("Got empty alias or anchor", Start);
1504  return false;
1505  }
1506 
1507  Token T;
1509  T.Range = StringRef(Start, Current - Start);
1510  TokenQueue.push_back(T);
1511 
1512  // Alias and anchors can be simple keys.
1513  saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false);
1514 
1515  IsSimpleKeyAllowed = false;
1516 
1517  return true;
1518 }
1519 
1520 char Scanner::scanBlockChompingIndicator() {
1521  char Indicator = ' ';
1522  if (Current != End && (*Current == '+' || *Current == '-')) {
1523  Indicator = *Current;
1524  skip(1);
1525  }
1526  return Indicator;
1527 }
1528 
1529 /// Get the number of line breaks after chomping.
1530 ///
1531 /// Return the number of trailing line breaks to emit, depending on
1532 /// \p ChompingIndicator.
1533 static unsigned getChompedLineBreaks(char ChompingIndicator,
1534  unsigned LineBreaks, StringRef Str) {
1535  if (ChompingIndicator == '-') // Strip all line breaks.
1536  return 0;
1537  if (ChompingIndicator == '+') // Keep all line breaks.
1538  return LineBreaks;
1539  // Clip trailing lines.
1540  return Str.empty() ? 0 : 1;
1541 }
1542 
1543 unsigned Scanner::scanBlockIndentationIndicator() {
1544  unsigned Indent = 0;
1545  if (Current != End && (*Current >= '1' && *Current <= '9')) {
1546  Indent = unsigned(*Current - '0');
1547  skip(1);
1548  }
1549  return Indent;
1550 }
1551 
1552 bool Scanner::scanBlockScalarHeader(char &ChompingIndicator,
1553  unsigned &IndentIndicator, bool &IsDone) {
1554  auto Start = Current;
1555 
1556  ChompingIndicator = scanBlockChompingIndicator();
1557  IndentIndicator = scanBlockIndentationIndicator();
1558  // Check for the chomping indicator once again.
1559  if (ChompingIndicator == ' ')
1560  ChompingIndicator = scanBlockChompingIndicator();
1561  Current = skip_while(&Scanner::skip_s_white, Current);
1562  skipComment();
1563 
1564  if (Current == End) { // EOF, we have an empty scalar.
1565  Token T;
1566  T.Kind = Token::TK_BlockScalar;
1567  T.Range = StringRef(Start, Current - Start);
1568  TokenQueue.push_back(T);
1569  IsDone = true;
1570  return true;
1571  }
1572 
1573  if (!consumeLineBreakIfPresent()) {
1574  setError("Expected a line break after block scalar header", Current);
1575  return false;
1576  }
1577  return true;
1578 }
1579 
1580 bool Scanner::findBlockScalarIndent(unsigned &BlockIndent,
1581  unsigned BlockExitIndent,
1582  unsigned &LineBreaks, bool &IsDone) {
1583  unsigned MaxAllSpaceLineCharacters = 0;
1584  StringRef::iterator LongestAllSpaceLine;
1585 
1586  while (true) {
1587  advanceWhile(&Scanner::skip_s_space);
1588  if (skip_nb_char(Current) != Current) {
1589  // This line isn't empty, so try and find the indentation.
1590  if (Column <= BlockExitIndent) { // End of the block literal.
1591  IsDone = true;
1592  return true;
1593  }
1594  // We found the block's indentation.
1595  BlockIndent = Column;
1596  if (MaxAllSpaceLineCharacters > BlockIndent) {
1597  setError(
1598  "Leading all-spaces line must be smaller than the block indent",
1599  LongestAllSpaceLine);
1600  return false;
1601  }
1602  return true;
1603  }
1604  if (skip_b_break(Current) != Current &&
1605  Column > MaxAllSpaceLineCharacters) {
1606  // Record the longest all-space line in case it's longer than the
1607  // discovered block indent.
1608  MaxAllSpaceLineCharacters = Column;
1609  LongestAllSpaceLine = Current;
1610  }
1611 
1612  // Check for EOF.
1613  if (Current == End) {
1614  IsDone = true;
1615  return true;
1616  }
1617 
1618  if (!consumeLineBreakIfPresent()) {
1619  IsDone = true;
1620  return true;
1621  }
1622  ++LineBreaks;
1623  }
1624  return true;
1625 }
1626 
1627 bool Scanner::scanBlockScalarIndent(unsigned BlockIndent,
1628  unsigned BlockExitIndent, bool &IsDone) {
1629  // Skip the indentation.
1630  while (Column < BlockIndent) {
1631  auto I = skip_s_space(Current);
1632  if (I == Current)
1633  break;
1634  Current = I;
1635  ++Column;
1636  }
1637 
1638  if (skip_nb_char(Current) == Current)
1639  return true;
1640 
1641  if (Column <= BlockExitIndent) { // End of the block literal.
1642  IsDone = true;
1643  return true;
1644  }
1645 
1646  if (Column < BlockIndent) {
1647  if (Current != End && *Current == '#') { // Trailing comment.
1648  IsDone = true;
1649  return true;
1650  }
1651  setError("A text line is less indented than the block scalar", Current);
1652  return false;
1653  }
1654  return true; // A normal text line.
1655 }
1656 
1657 bool Scanner::scanBlockScalar(bool IsLiteral) {
1658  // Eat '|' or '>'
1659  assert(*Current == '|' || *Current == '>');
1660  skip(1);
1661 
1662  char ChompingIndicator;
1663  unsigned BlockIndent;
1664  bool IsDone = false;
1665  if (!scanBlockScalarHeader(ChompingIndicator, BlockIndent, IsDone))
1666  return false;
1667  if (IsDone)
1668  return true;
1669 
1670  auto Start = Current;
1671  unsigned BlockExitIndent = Indent < 0 ? 0 : (unsigned)Indent;
1672  unsigned LineBreaks = 0;
1673  if (BlockIndent == 0) {
1674  if (!findBlockScalarIndent(BlockIndent, BlockExitIndent, LineBreaks,
1675  IsDone))
1676  return false;
1677  }
1678 
1679  // Scan the block's scalars body.
1681  while (!IsDone) {
1682  if (!scanBlockScalarIndent(BlockIndent, BlockExitIndent, IsDone))
1683  return false;
1684  if (IsDone)
1685  break;
1686 
1687  // Parse the current line.
1688  auto LineStart = Current;
1689  advanceWhile(&Scanner::skip_nb_char);
1690  if (LineStart != Current) {
1691  Str.append(LineBreaks, '\n');
1692  Str.append(StringRef(LineStart, Current - LineStart));
1693  LineBreaks = 0;
1694  }
1695 
1696  // Check for EOF.
1697  if (Current == End)
1698  break;
1699 
1700  if (!consumeLineBreakIfPresent())
1701  break;
1702  ++LineBreaks;
1703  }
1704 
1705  if (Current == End && !LineBreaks)
1706  // Ensure that there is at least one line break before the end of file.
1707  LineBreaks = 1;
1708  Str.append(getChompedLineBreaks(ChompingIndicator, LineBreaks, Str), '\n');
1709 
1710  // New lines may start a simple key.
1711  if (!FlowLevel)
1712  IsSimpleKeyAllowed = true;
1713 
1714  Token T;
1715  T.Kind = Token::TK_BlockScalar;
1716  T.Range = StringRef(Start, Current - Start);
1717  T.Value = std::string(Str);
1718  TokenQueue.push_back(T);
1719  return true;
1720 }
1721 
1722 bool Scanner::scanTag() {
1723  StringRef::iterator Start = Current;
1724  unsigned ColStart = Column;
1725  skip(1); // Eat !.
1726  if (Current == End || isBlankOrBreak(Current)); // An empty tag.
1727  else if (*Current == '<') {
1728  skip(1);
1729  scan_ns_uri_char();
1730  if (!consume('>'))
1731  return false;
1732  } else {
1733  // FIXME: Actually parse the c-ns-shorthand-tag rule.
1734  Current = skip_while(&Scanner::skip_ns_char, Current);
1735  }
1736 
1737  Token T;
1738  T.Kind = Token::TK_Tag;
1739  T.Range = StringRef(Start, Current - Start);
1740  TokenQueue.push_back(T);
1741 
1742  // Tags can be simple keys.
1743  saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false);
1744 
1745  IsSimpleKeyAllowed = false;
1746 
1747  return true;
1748 }
1749 
1750 bool Scanner::fetchMoreTokens() {
1751  if (IsStartOfStream)
1752  return scanStreamStart();
1753 
1754  scanToNextToken();
1755 
1756  if (Current == End)
1757  return scanStreamEnd();
1758 
1759  removeStaleSimpleKeyCandidates();
1760 
1761  unrollIndent(Column);
1762 
1763  if (Column == 0 && *Current == '%')
1764  return scanDirective();
1765 
1766  if (Column == 0 && Current + 4 <= End
1767  && *Current == '-'
1768  && *(Current + 1) == '-'
1769  && *(Current + 2) == '-'
1770  && (Current + 3 == End || isBlankOrBreak(Current + 3)))
1771  return scanDocumentIndicator(true);
1772 
1773  if (Column == 0 && Current + 4 <= End
1774  && *Current == '.'
1775  && *(Current + 1) == '.'
1776  && *(Current + 2) == '.'
1777  && (Current + 3 == End || isBlankOrBreak(Current + 3)))
1778  return scanDocumentIndicator(false);
1779 
1780  if (*Current == '[')
1781  return scanFlowCollectionStart(true);
1782 
1783  if (*Current == '{')
1784  return scanFlowCollectionStart(false);
1785 
1786  if (*Current == ']')
1787  return scanFlowCollectionEnd(true);
1788 
1789  if (*Current == '}')
1790  return scanFlowCollectionEnd(false);
1791 
1792  if (*Current == ',')
1793  return scanFlowEntry();
1794 
1795  if (*Current == '-' && isBlankOrBreak(Current + 1))
1796  return scanBlockEntry();
1797 
1798  if (*Current == '?' && (FlowLevel || isBlankOrBreak(Current + 1)))
1799  return scanKey();
1800 
1801  if (*Current == ':' && (FlowLevel || isBlankOrBreak(Current + 1)))
1802  return scanValue();
1803 
1804  if (*Current == '*')
1805  return scanAliasOrAnchor(true);
1806 
1807  if (*Current == '&')
1808  return scanAliasOrAnchor(false);
1809 
1810  if (*Current == '!')
1811  return scanTag();
1812 
1813  if (*Current == '|' && !FlowLevel)
1814  return scanBlockScalar(true);
1815 
1816  if (*Current == '>' && !FlowLevel)
1817  return scanBlockScalar(false);
1818 
1819  if (*Current == '\'')
1820  return scanFlowScalar(false);
1821 
1822  if (*Current == '"')
1823  return scanFlowScalar(true);
1824 
1825  // Get a plain scalar.
1826  StringRef FirstChar(Current, 1);
1827  if (!(isBlankOrBreak(Current)
1828  || FirstChar.find_first_of("-?:,[]{}#&*!|>'\"%@`") != StringRef::npos)
1829  || (*Current == '-' && !isBlankOrBreak(Current + 1))
1830  || (!FlowLevel && (*Current == '?' || *Current == ':')
1831  && isBlankOrBreak(Current + 1))
1832  || (!FlowLevel && *Current == ':'
1833  && Current + 2 < End
1834  && *(Current + 1) == ':'
1835  && !isBlankOrBreak(Current + 2)))
1836  return scanPlainScalar();
1837 
1838  setError("Unrecognized character while tokenizing.", Current);
1839  return false;
1840 }
1841 
1842 Stream::Stream(StringRef Input, SourceMgr &SM, bool ShowColors,
1843  std::error_code *EC)
1844  : scanner(new Scanner(Input, SM, ShowColors, EC)), CurrentDoc() {}
1845 
1846 Stream::Stream(MemoryBufferRef InputBuffer, SourceMgr &SM, bool ShowColors,
1847  std::error_code *EC)
1848  : scanner(new Scanner(InputBuffer, SM, ShowColors, EC)), CurrentDoc() {}
1849 
1850 Stream::~Stream() = default;
1851 
1852 bool Stream::failed() { return scanner->failed(); }
1853 
1855  printError(N ? N->getSourceRange() : SMRange(), Msg, Kind);
1856 }
1857 
1858 void Stream::printError(const SMRange &Range, const Twine &Msg,
1860  scanner->printError(Range.Start, Kind, Msg, Range);
1861 }
1862 
1864  if (CurrentDoc)
1865  report_fatal_error("Can only iterate over the stream once");
1866 
1867  // Skip Stream-Start.
1868  scanner->getNext();
1869 
1870  CurrentDoc.reset(new Document(*this));
1871  return document_iterator(CurrentDoc);
1872 }
1873 
1875  return document_iterator();
1876 }
1877 
1879  for (document_iterator i = begin(), e = end(); i != e; ++i)
1880  i->skip();
1881 }
1882 
1883 Node::Node(unsigned int Type, std::unique_ptr<Document> &D, StringRef A,
1884  StringRef T)
1885  : Doc(D), TypeID(Type), Anchor(A), Tag(T) {
1886  SMLoc Start = SMLoc::getFromPointer(peekNext().Range.begin());
1887  SourceRange = SMRange(Start, Start);
1888 }
1889 
1890 std::string Node::getVerbatimTag() const {
1891  StringRef Raw = getRawTag();
1892  if (!Raw.empty() && Raw != "!") {
1893  std::string Ret;
1894  if (Raw.find_last_of('!') == 0) {
1895  Ret = std::string(Doc->getTagMap().find("!")->second);
1896  Ret += Raw.substr(1);
1897  return Ret;
1898  } else if (Raw.startswith("!!")) {
1899  Ret = std::string(Doc->getTagMap().find("!!")->second);
1900  Ret += Raw.substr(2);
1901  return Ret;
1902  } else {
1903  StringRef TagHandle = Raw.substr(0, Raw.find_last_of('!') + 1);
1904  std::map<StringRef, StringRef>::const_iterator It =
1905  Doc->getTagMap().find(TagHandle);
1906  if (It != Doc->getTagMap().end())
1907  Ret = std::string(It->second);
1908  else {
1909  Token T;
1910  T.Kind = Token::TK_Tag;
1911  T.Range = TagHandle;
1912  setError(Twine("Unknown tag handle ") + TagHandle, T);
1913  }
1914  Ret += Raw.substr(Raw.find_last_of('!') + 1);
1915  return Ret;
1916  }
1917  }
1918 
1919  switch (getType()) {
1920  case NK_Null:
1921  return "tag:yaml.org,2002:null";
1922  case NK_Scalar:
1923  case NK_BlockScalar:
1924  // TODO: Tag resolution.
1925  return "tag:yaml.org,2002:str";
1926  case NK_Mapping:
1927  return "tag:yaml.org,2002:map";
1928  case NK_Sequence:
1929  return "tag:yaml.org,2002:seq";
1930  }
1931 
1932  return "";
1933 }
1934 
1936  return Doc->peekNext();
1937 }
1938 
1940  return Doc->getNext();
1941 }
1942 
1944  return Doc->parseBlockNode();
1945 }
1946 
1948  return Doc->NodeAllocator;
1949 }
1950 
1951 void Node::setError(const Twine &Msg, Token &Tok) const {
1952  Doc->setError(Msg, Tok);
1953 }
1954 
1955 bool Node::failed() const {
1956  return Doc->failed();
1957 }
1958 
1960  // TODO: Handle newlines properly. We need to remove leading whitespace.
1961  if (Value[0] == '"') { // Double quoted.
1962  // Pull off the leading and trailing "s.
1963  StringRef UnquotedValue = Value.substr(1, Value.size() - 2);
1964  // Search for characters that would require unescaping the value.
1965  StringRef::size_type i = UnquotedValue.find_first_of("\\\r\n");
1966  if (i != StringRef::npos)
1967  return unescapeDoubleQuoted(UnquotedValue, i, Storage);
1968  return UnquotedValue;
1969  } else if (Value[0] == '\'') { // Single quoted.
1970  // Pull off the leading and trailing 's.
1971  StringRef UnquotedValue = Value.substr(1, Value.size() - 2);
1972  StringRef::size_type i = UnquotedValue.find('\'');
1973  if (i != StringRef::npos) {
1974  // We're going to need Storage.
1975  Storage.clear();
1976  Storage.reserve(UnquotedValue.size());
1977  for (; i != StringRef::npos; i = UnquotedValue.find('\'')) {
1978  StringRef Valid(UnquotedValue.begin(), i);
1979  llvm::append_range(Storage, Valid);
1980  Storage.push_back('\'');
1981  UnquotedValue = UnquotedValue.substr(i + 2);
1982  }
1983  llvm::append_range(Storage, UnquotedValue);
1984  return StringRef(Storage.begin(), Storage.size());
1985  }
1986  return UnquotedValue;
1987  }
1988  // Plain or block.
1989  return Value.rtrim(' ');
1990 }
1991 
1992 StringRef ScalarNode::unescapeDoubleQuoted( StringRef UnquotedValue
1994  , SmallVectorImpl<char> &Storage)
1995  const {
1996  // Use Storage to build proper value.
1997  Storage.clear();
1998  Storage.reserve(UnquotedValue.size());
1999  for (; i != StringRef::npos; i = UnquotedValue.find_first_of("\\\r\n")) {
2000  // Insert all previous chars into Storage.
2001  StringRef Valid(UnquotedValue.begin(), i);
2002  llvm::append_range(Storage, Valid);
2003  // Chop off inserted chars.
2004  UnquotedValue = UnquotedValue.substr(i);
2005 
2006  assert(!UnquotedValue.empty() && "Can't be empty!");
2007 
2008  // Parse escape or line break.
2009  switch (UnquotedValue[0]) {
2010  case '\r':
2011  case '\n':
2012  Storage.push_back('\n');
2013  if ( UnquotedValue.size() > 1
2014  && (UnquotedValue[1] == '\r' || UnquotedValue[1] == '\n'))
2015  UnquotedValue = UnquotedValue.substr(1);
2016  UnquotedValue = UnquotedValue.substr(1);
2017  break;
2018  default:
2019  if (UnquotedValue.size() == 1) {
2020  Token T;
2021  T.Range = StringRef(UnquotedValue.begin(), 1);
2022  setError("Unrecognized escape code", T);
2023  return "";
2024  }
2025  UnquotedValue = UnquotedValue.substr(1);
2026  switch (UnquotedValue[0]) {
2027  default: {
2028  Token T;
2029  T.Range = StringRef(UnquotedValue.begin(), 1);
2030  setError("Unrecognized escape code", T);
2031  return "";
2032  }
2033  case '\r':
2034  case '\n':
2035  // Remove the new line.
2036  if ( UnquotedValue.size() > 1
2037  && (UnquotedValue[1] == '\r' || UnquotedValue[1] == '\n'))
2038  UnquotedValue = UnquotedValue.substr(1);
2039  // If this was just a single byte newline, it will get skipped
2040  // below.
2041  break;
2042  case '0':
2043  Storage.push_back(0x00);
2044  break;
2045  case 'a':
2046  Storage.push_back(0x07);
2047  break;
2048  case 'b':
2049  Storage.push_back(0x08);
2050  break;
2051  case 't':
2052  case 0x09:
2053  Storage.push_back(0x09);
2054  break;
2055  case 'n':
2056  Storage.push_back(0x0A);
2057  break;
2058  case 'v':
2059  Storage.push_back(0x0B);
2060  break;
2061  case 'f':
2062  Storage.push_back(0x0C);
2063  break;
2064  case 'r':
2065  Storage.push_back(0x0D);
2066  break;
2067  case 'e':
2068  Storage.push_back(0x1B);
2069  break;
2070  case ' ':
2071  Storage.push_back(0x20);
2072  break;
2073  case '"':
2074  Storage.push_back(0x22);
2075  break;
2076  case '/':
2077  Storage.push_back(0x2F);
2078  break;
2079  case '\\':
2080  Storage.push_back(0x5C);
2081  break;
2082  case 'N':
2083  encodeUTF8(0x85, Storage);
2084  break;
2085  case '_':
2086  encodeUTF8(0xA0, Storage);
2087  break;
2088  case 'L':
2089  encodeUTF8(0x2028, Storage);
2090  break;
2091  case 'P':
2092  encodeUTF8(0x2029, Storage);
2093  break;
2094  case 'x': {
2095  if (UnquotedValue.size() < 3)
2096  // TODO: Report error.
2097  break;
2098  unsigned int UnicodeScalarValue;
2099  if (UnquotedValue.substr(1, 2).getAsInteger(16, UnicodeScalarValue))
2100  // TODO: Report error.
2101  UnicodeScalarValue = 0xFFFD;
2102  encodeUTF8(UnicodeScalarValue, Storage);
2103  UnquotedValue = UnquotedValue.substr(2);
2104  break;
2105  }
2106  case 'u': {
2107  if (UnquotedValue.size() < 5)
2108  // TODO: Report error.
2109  break;
2110  unsigned int UnicodeScalarValue;
2111  if (UnquotedValue.substr(1, 4).getAsInteger(16, UnicodeScalarValue))
2112  // TODO: Report error.
2113  UnicodeScalarValue = 0xFFFD;
2114  encodeUTF8(UnicodeScalarValue, Storage);
2115  UnquotedValue = UnquotedValue.substr(4);
2116  break;
2117  }
2118  case 'U': {
2119  if (UnquotedValue.size() < 9)
2120  // TODO: Report error.
2121  break;
2122  unsigned int UnicodeScalarValue;
2123  if (UnquotedValue.substr(1, 8).getAsInteger(16, UnicodeScalarValue))
2124  // TODO: Report error.
2125  UnicodeScalarValue = 0xFFFD;
2126  encodeUTF8(UnicodeScalarValue, Storage);
2127  UnquotedValue = UnquotedValue.substr(8);
2128  break;
2129  }
2130  }
2131  UnquotedValue = UnquotedValue.substr(1);
2132  }
2133  }
2134  llvm::append_range(Storage, UnquotedValue);
2135  return StringRef(Storage.begin(), Storage.size());
2136 }
2137 
2139  if (Key)
2140  return Key;
2141  // Handle implicit null keys.
2142  {
2143  Token &t = peekNext();
2144  if ( t.Kind == Token::TK_BlockEnd
2145  || t.Kind == Token::TK_Value
2146  || t.Kind == Token::TK_Error) {
2147  return Key = new (getAllocator()) NullNode(Doc);
2148  }
2149  if (t.Kind == Token::TK_Key)
2150  getNext(); // skip TK_Key.
2151  }
2152 
2153  // Handle explicit null keys.
2154  Token &t = peekNext();
2155  if (t.Kind == Token::TK_BlockEnd || t.Kind == Token::TK_Value) {
2156  return Key = new (getAllocator()) NullNode(Doc);
2157  }
2158 
2159  // We've got a normal key.
2160  return Key = parseBlockNode();
2161 }
2162 
2164  if (Value)
2165  return Value;
2166 
2167  if (Node* Key = getKey())
2168  Key->skip();
2169  else {
2170  setError("Null key in Key Value.", peekNext());
2171  return Value = new (getAllocator()) NullNode(Doc);
2172  }
2173 
2174  if (failed())
2175  return Value = new (getAllocator()) NullNode(Doc);
2176 
2177  // Handle implicit null values.
2178  {
2179  Token &t = peekNext();
2180  if ( t.Kind == Token::TK_BlockEnd
2181  || t.Kind == Token::TK_FlowMappingEnd
2182  || t.Kind == Token::TK_Key
2183  || t.Kind == Token::TK_FlowEntry
2184  || t.Kind == Token::TK_Error) {
2185  return Value = new (getAllocator()) NullNode(Doc);
2186  }
2187 
2188  if (t.Kind != Token::TK_Value) {
2189  setError("Unexpected token in Key Value.", t);
2190  return Value = new (getAllocator()) NullNode(Doc);
2191  }
2192  getNext(); // skip TK_Value.
2193  }
2194 
2195  // Handle explicit null values.
2196  Token &t = peekNext();
2197  if (t.Kind == Token::TK_BlockEnd || t.Kind == Token::TK_Key) {
2198  return Value = new (getAllocator()) NullNode(Doc);
2199  }
2200 
2201  // We got a normal value.
2202  return Value = parseBlockNode();
2203 }
2204 
2205 void MappingNode::increment() {
2206  if (failed()) {
2207  IsAtEnd = true;
2208  CurrentEntry = nullptr;
2209  return;
2210  }
2211  if (CurrentEntry) {
2212  CurrentEntry->skip();
2213  if (Type == MT_Inline) {
2214  IsAtEnd = true;
2215  CurrentEntry = nullptr;
2216  return;
2217  }
2218  }
2219  Token T = peekNext();
2220  if (T.Kind == Token::TK_Key || T.Kind == Token::TK_Scalar) {
2221  // KeyValueNode eats the TK_Key. That way it can detect null keys.
2222  CurrentEntry = new (getAllocator()) KeyValueNode(Doc);
2223  } else if (Type == MT_Block) {
2224  switch (T.Kind) {
2225  case Token::TK_BlockEnd:
2226  getNext();
2227  IsAtEnd = true;
2228  CurrentEntry = nullptr;
2229  break;
2230  default:
2231  setError("Unexpected token. Expected Key or Block End", T);
2233  case Token::TK_Error:
2234  IsAtEnd = true;
2235  CurrentEntry = nullptr;
2236  }
2237  } else {
2238  switch (T.Kind) {
2239  case Token::TK_FlowEntry:
2240  // Eat the flow entry and recurse.
2241  getNext();
2242  return increment();
2244  getNext();
2246  case Token::TK_Error:
2247  // Set this to end iterator.
2248  IsAtEnd = true;
2249  CurrentEntry = nullptr;
2250  break;
2251  default:
2252  setError( "Unexpected token. Expected Key, Flow Entry, or Flow "
2253  "Mapping End."
2254  , T);
2255  IsAtEnd = true;
2256  CurrentEntry = nullptr;
2257  }
2258  }
2259 }
2260 
2262  if (failed()) {
2263  IsAtEnd = true;
2264  CurrentEntry = nullptr;
2265  return;
2266  }
2267  if (CurrentEntry)
2268  CurrentEntry->skip();
2269  Token T = peekNext();
2270  if (SeqType == ST_Block) {
2271  switch (T.Kind) {
2272  case Token::TK_BlockEntry:
2273  getNext();
2274  CurrentEntry = parseBlockNode();
2275  if (!CurrentEntry) { // An error occurred.
2276  IsAtEnd = true;
2277  CurrentEntry = nullptr;
2278  }
2279  break;
2280  case Token::TK_BlockEnd:
2281  getNext();
2282  IsAtEnd = true;
2283  CurrentEntry = nullptr;
2284  break;
2285  default:
2286  setError( "Unexpected token. Expected Block Entry or Block End."
2287  , T);
2289  case Token::TK_Error:
2290  IsAtEnd = true;
2291  CurrentEntry = nullptr;
2292  }
2293  } else if (SeqType == ST_Indentless) {
2294  switch (T.Kind) {
2295  case Token::TK_BlockEntry:
2296  getNext();
2297  CurrentEntry = parseBlockNode();
2298  if (!CurrentEntry) { // An error occurred.
2299  IsAtEnd = true;
2300  CurrentEntry = nullptr;
2301  }
2302  break;
2303  default:
2304  case Token::TK_Error:
2305  IsAtEnd = true;
2306  CurrentEntry = nullptr;
2307  }
2308  } else if (SeqType == ST_Flow) {
2309  switch (T.Kind) {
2310  case Token::TK_FlowEntry:
2311  // Eat the flow entry and recurse.
2312  getNext();
2313  WasPreviousTokenFlowEntry = true;
2314  return increment();
2316  getNext();
2318  case Token::TK_Error:
2319  // Set this to end iterator.
2320  IsAtEnd = true;
2321  CurrentEntry = nullptr;
2322  break;
2323  case Token::TK_StreamEnd:
2324  case Token::TK_DocumentEnd:
2326  setError("Could not find closing ]!", T);
2327  // Set this to end iterator.
2328  IsAtEnd = true;
2329  CurrentEntry = nullptr;
2330  break;
2331  default:
2332  if (!WasPreviousTokenFlowEntry) {
2333  setError("Expected , between entries!", T);
2334  IsAtEnd = true;
2335  CurrentEntry = nullptr;
2336  break;
2337  }
2338  // Otherwise it must be a flow entry.
2339  CurrentEntry = parseBlockNode();
2340  if (!CurrentEntry) {
2341  IsAtEnd = true;
2342  }
2343  WasPreviousTokenFlowEntry = false;
2344  break;
2345  }
2346  }
2347 }
2348 
2349 Document::Document(Stream &S) : stream(S), Root(nullptr) {
2350  // Tag maps starts with two default mappings.
2351  TagMap["!"] = "!";
2352  TagMap["!!"] = "tag:yaml.org,2002:";
2353 
2354  if (parseDirectives())
2355  expectToken(Token::TK_DocumentStart);
2356  Token &T = peekNext();
2357  if (T.Kind == Token::TK_DocumentStart)
2358  getNext();
2359 }
2360 
2362  if (stream.scanner->failed())
2363  return false;
2364  if (!Root && !getRoot())
2365  return false;
2366  Root->skip();
2367  Token &T = peekNext();
2368  if (T.Kind == Token::TK_StreamEnd)
2369  return false;
2370  if (T.Kind == Token::TK_DocumentEnd) {
2371  getNext();
2372  return skip();
2373  }
2374  return true;
2375 }
2376 
2377 Token &Document::peekNext() {
2378  return stream.scanner->peekNext();
2379 }
2380 
2381 Token Document::getNext() {
2382  return stream.scanner->getNext();
2383 }
2384 
2385 void Document::setError(const Twine &Message, Token &Location) const {
2386  stream.scanner->setError(Message, Location.Range.begin());
2387 }
2388 
2389 bool Document::failed() const {
2390  return stream.scanner->failed();
2391 }
2392 
2394  Token T = peekNext();
2395  // Handle properties.
2396  Token AnchorInfo;
2397  Token TagInfo;
2398 parse_property:
2399  switch (T.Kind) {
2400  case Token::TK_Alias:
2401  getNext();
2402  return new (NodeAllocator) AliasNode(stream.CurrentDoc, T.Range.substr(1));
2403  case Token::TK_Anchor:
2404  if (AnchorInfo.Kind == Token::TK_Anchor) {
2405  setError("Already encountered an anchor for this node!", T);
2406  return nullptr;
2407  }
2408  AnchorInfo = getNext(); // Consume TK_Anchor.
2409  T = peekNext();
2410  goto parse_property;
2411  case Token::TK_Tag:
2412  if (TagInfo.Kind == Token::TK_Tag) {
2413  setError("Already encountered a tag for this node!", T);
2414  return nullptr;
2415  }
2416  TagInfo = getNext(); // Consume TK_Tag.
2417  T = peekNext();
2418  goto parse_property;
2419  default:
2420  break;
2421  }
2422 
2423  switch (T.Kind) {
2424  case Token::TK_BlockEntry:
2425  // We got an unindented BlockEntry sequence. This is not terminated with
2426  // a BlockEnd.
2427  // Don't eat the TK_BlockEntry, SequenceNode needs it.
2428  return new (NodeAllocator) SequenceNode( stream.CurrentDoc
2429  , AnchorInfo.Range.substr(1)
2430  , TagInfo.Range
2433  getNext();
2434  return new (NodeAllocator)
2435  SequenceNode( stream.CurrentDoc
2436  , AnchorInfo.Range.substr(1)
2437  , TagInfo.Range
2440  getNext();
2441  return new (NodeAllocator)
2442  MappingNode( stream.CurrentDoc
2443  , AnchorInfo.Range.substr(1)
2444  , TagInfo.Range
2447  getNext();
2448  return new (NodeAllocator)
2449  SequenceNode( stream.CurrentDoc
2450  , AnchorInfo.Range.substr(1)
2451  , TagInfo.Range
2454  getNext();
2455  return new (NodeAllocator)
2456  MappingNode( stream.CurrentDoc
2457  , AnchorInfo.Range.substr(1)
2458  , TagInfo.Range
2460  case Token::TK_Scalar:
2461  getNext();
2462  return new (NodeAllocator)
2463  ScalarNode( stream.CurrentDoc
2464  , AnchorInfo.Range.substr(1)
2465  , TagInfo.Range
2466  , T.Range);
2467  case Token::TK_BlockScalar: {
2468  getNext();
2469  StringRef NullTerminatedStr(T.Value.c_str(), T.Value.length() + 1);
2470  StringRef StrCopy = NullTerminatedStr.copy(NodeAllocator).drop_back();
2471  return new (NodeAllocator)
2472  BlockScalarNode(stream.CurrentDoc, AnchorInfo.Range.substr(1),
2473  TagInfo.Range, StrCopy, T.Range);
2474  }
2475  case Token::TK_Key:
2476  // Don't eat the TK_Key, KeyValueNode expects it.
2477  return new (NodeAllocator)
2478  MappingNode( stream.CurrentDoc
2479  , AnchorInfo.Range.substr(1)
2480  , TagInfo.Range
2483  case Token::TK_DocumentEnd:
2484  case Token::TK_StreamEnd:
2485  default:
2486  // TODO: Properly handle tags. "[!!str ]" should resolve to !!str "", not
2487  // !!null null.
2488  return new (NodeAllocator) NullNode(stream.CurrentDoc);
2491  case Token::TK_FlowEntry: {
2492  if (Root && (isa<MappingNode>(Root) || isa<SequenceNode>(Root)))
2493  return new (NodeAllocator) NullNode(stream.CurrentDoc);
2494 
2495  setError("Unexpected token", T);
2496  return nullptr;
2497  }
2498  case Token::TK_Error:
2499  return nullptr;
2500  }
2501  llvm_unreachable("Control flow shouldn't reach here.");
2502  return nullptr;
2503 }
2504 
2505 bool Document::parseDirectives() {
2506  bool isDirective = false;
2507  while (true) {
2508  Token T = peekNext();
2509  if (T.Kind == Token::TK_TagDirective) {
2510  parseTAGDirective();
2511  isDirective = true;
2512  } else if (T.Kind == Token::TK_VersionDirective) {
2513  parseYAMLDirective();
2514  isDirective = true;
2515  } else
2516  break;
2517  }
2518  return isDirective;
2519 }
2520 
2521 void Document::parseYAMLDirective() {
2522  getNext(); // Eat %YAML <version>
2523 }
2524 
2525 void Document::parseTAGDirective() {
2526  Token Tag = getNext(); // %TAG <handle> <prefix>
2527  StringRef T = Tag.Range;
2528  // Strip %TAG
2529  T = T.substr(T.find_first_of(" \t")).ltrim(" \t");
2530  std::size_t HandleEnd = T.find_first_of(" \t");
2531  StringRef TagHandle = T.substr(0, HandleEnd);
2532  StringRef TagPrefix = T.substr(HandleEnd).ltrim(" \t");
2533  TagMap[TagHandle] = TagPrefix;
2534 }
2535 
2536 bool Document::expectToken(int TK) {
2537  Token T = getNext();
2538  if (T.Kind != TK) {
2539  setError("Unexpected token", T);
2540  return false;
2541  }
2542  return true;
2543 }
llvm::StringRef::copy
LLVM_NODISCARD StringRef copy(Allocator &A) const
Definition: StringRef.h:175
i
i
Definition: README.txt:29
MemoryBuffer.h
llvm::yaml::AliasNode
Represents an alias to a Node with an anchor.
Definition: YAMLParser.h:514
llvm::AllocatorList::front
T & front()
Definition: AllocatorList.h:176
llvm::SMRange::Start
SMLoc Start
Definition: SMLoc.h:50
llvm::StringRef::startswith
LLVM_NODISCARD bool startswith(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:286
llvm::AllocatorList::iterator
IteratorImpl< T, typename list_type::iterator > iterator
Definition: AllocatorList.h:124
llvm::yaml::Token::TK_DocumentStart
@ TK_DocumentStart
Definition: YAMLParser.cpp:133
llvm::yaml::Token::TK_TagDirective
@ TK_TagDirective
Definition: YAMLParser.cpp:132
llvm::MemoryBufferRef::getBufferStart
const char * getBufferStart() const
Definition: MemoryBufferRef.h:35
llvm
Definition: AllocatorList.h:23
llvm::yaml::Node::NK_Sequence
@ NK_Sequence
Definition: YAMLParser.h:129
llvm::SmallVectorImpl::erase
iterator erase(const_iterator CI)
Definition: SmallVector.h:704
llvm::StringRef::empty
LLVM_NODISCARD bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:153
llvm::yaml::Token::Kind
enum llvm::yaml::Token::TokenKind Kind
llvm::yaml::Scanner::failed
bool failed()
Returns true if an error occurred while parsing.
Definition: YAMLParser.cpp:283
llvm::yaml::KeyValueNode::skip
void skip() override
Definition: YAMLParser.h:306
llvm::yaml::Node::getRawTag
StringRef getRawTag() const
Get the tag as it was written in the document.
Definition: YAMLParser.h:159
llvm::yaml::Token::TK_DocumentEnd
@ TK_DocumentEnd
Definition: YAMLParser.cpp:134
llvm::yaml::SequenceNode::increment
void increment()
Definition: YAMLParser.cpp:2261
llvm::yaml::operator==
bool operator==(const BinaryRef &LHS, const BinaryRef &RHS)
Definition: YAML.h:98
llvm::yaml::escape
std::string escape(StringRef Input, bool EscapePrintable=true)
Escape Input for a double quoted scalar; if EscapePrintable is true, all UTF8 sequences will be escap...
Definition: YAMLParser.cpp:683
StringRef.h
llvm::yaml::Node
Abstract base class for all Nodes.
Definition: YAMLParser.h:119
YAMLParser.h
UEF_UTF16_LE
@ UEF_UTF16_LE
UTF-16 Little Endian.
Definition: YAMLParser.cpp:46
llvm::StringRef::npos
static constexpr size_t npos
Definition: StringRef.h:60
llvm::StringRef::find
LLVM_NODISCARD size_t find(char C, size_t From=0) const
Search for the first character C in the string.
Definition: StringRef.h:315
llvm::SmallVector< int, 4 >
llvm::yaml::Scanner
Scans YAML tokens from a MemoryBuffer.
Definition: YAMLParser.cpp:249
ErrorHandling.h
llvm::yaml::Token::TK_BlockMappingStart
@ TK_BlockMappingStart
Definition: YAMLParser.cpp:138
llvm::yaml::KeyValueNode::getValue
Node * getValue()
Parse and return the value.
Definition: YAMLParser.cpp:2163
llvm::yaml::Token::TK_Error
@ TK_Error
Definition: YAMLParser.cpp:128
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:46
llvm::yaml::ScalarNode
A scalar node is an opaque datum that can be presented as a series of zero or more Unicode scalar val...
Definition: YAMLParser.h:212
decodeUTF8
static UTF8Decoded decodeUTF8(StringRef Range)
Definition: YAMLParser.cpp:198
llvm::Optional< bool >
llvm::yaml::MappingNode
Represents a YAML map created from either a block map for a flow map.
Definition: YAMLParser.h:414
T
#define T
Definition: Mips16ISelLowering.cpp:341
llvm::MipsISD::Ret
@ Ret
Definition: MipsISelLowering.h:116
llvm::Expected
Tagged union holding either a T or a Error.
Definition: APFloat.h:42
STLExtras.h
llvm::SmallVectorImpl::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:634
llvm::yaml::Document::Document
Document(Stream &ParentStream)
Definition: YAMLParser.cpp:2349
llvm::dwarf::Tag
Tag
Definition: Dwarf.h:104
llvm::yaml::skip
void skip(CollectionType &C)
Definition: YAMLParser.h:398
llvm::MemoryBufferRef
Definition: MemoryBufferRef.h:22
llvm::yaml::scanTokens
bool scanTokens(StringRef Input)
Scans all tokens in input without outputting anything.
Definition: YAMLParser.cpp:670
size_t
new
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM ID Predecessors according to mbb< bb27, 0x8b0a7c0 > Note ADDri is not a two address instruction its result reg1037 is an operand of the PHI node in bb76 and its operand reg1039 is the result of the PHI node We should treat it as a two address code and make sure the ADDri is scheduled after any node that reads reg1039 Use info(i.e. register scavenger) to assign it a free register to allow reuse the collector could move the objects and invalidate the derived pointer This is bad enough in the first but safe points can crop up unpredictably **array_addr i32 n y store obj * new
Definition: README.txt:125
encodeUTF8
static void encodeUTF8(uint32_t UnicodeScalarValue, SmallVectorImpl< char > &Result)
encodeUTF8 - Encode UnicodeScalarValue in UTF-8 and append it to result.
Definition: YAMLParser.cpp:558
llvm::yaml::Token::TokenKind
TokenKind
Definition: YAMLParser.cpp:127
llvm::StringRef::substr
LLVM_NODISCARD StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition: StringRef.h:609
llvm::yaml::Node::failed
bool failed() const
Definition: YAMLParser.cpp:1955
llvm::yaml::Stream::Document
friend class Document
Definition: YAMLParser.h:112
llvm::yaml::NullNode
A null value.
Definition: YAMLParser.h:197
llvm::StringRef::find_last_of
LLVM_NODISCARD size_t find_last_of(char C, size_t From=npos) const
Find the last character in the string that is C, or npos if not found.
Definition: StringRef.h:436
llvm::MemoryBuffer::getMemBuffer
static std::unique_ptr< MemoryBuffer > getMemBuffer(StringRef InputData, StringRef BufferName="", bool RequiresNullTerminator=true)
Open the specified memory range as a MemoryBuffer.
Definition: MemoryBuffer.cpp:113
llvm::yaml::KeyValueNode::getKey
Node * getKey()
Parse and return the key.
Definition: YAMLParser.cpp:2138
llvm::SMLoc
Represents a location in source code.
Definition: SMLoc.h:23
llvm::Failed
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition: Error.h:177
llvm::AllocatorList::resetAlloc
void resetAlloc()
Reset the underlying allocator.
Definition: AllocatorList.h:223
SmallString.h
llvm::yaml::Token::TK_Anchor
@ TK_Anchor
Definition: YAMLParser.cpp:149
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::yaml::Token::TK_FlowEntry
@ TK_FlowEntry
Definition: YAMLParser.cpp:139
Twine.h
llvm::yaml::Token::TK_Value
@ TK_Value
Definition: YAMLParser.cpp:145
llvm::yaml::Token::TK_VersionDirective
@ TK_VersionDirective
Definition: YAMLParser.cpp:131
llvm::rust_demangle::BasicType::Str
@ Str
t
bitcast float %x to i32 %s=and i32 %t, 2147483647 %d=bitcast i32 %s to float ret float %d } declare float @fabsf(float %n) define float @bar(float %x) nounwind { %d=call float @fabsf(float %x) ret float %d } This IR(from PR6194):target datalayout="e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple="x86_64-apple-darwin10.0.0" %0=type { double, double } %struct.float3=type { float, float, float } define void @test(%0, %struct.float3 *nocapture %res) nounwind noinline ssp { entry:%tmp18=extractvalue %0 %0, 0 t
Definition: README-SSE.txt:788
llvm::StringRef::iterator
const char * iterator
Definition: StringRef.h:62
llvm::yaml::SequenceNode
Represents a YAML sequence created from either a block sequence for a flow sequence.
Definition: YAMLParser.h:462
llvm::yaml::Node::Node
Node(unsigned int Type, std::unique_ptr< Document > &, StringRef Anchor, StringRef Tag)
Definition: YAMLParser.cpp:1883
llvm::yaml::Stream::end
document_iterator end()
Definition: YAMLParser.cpp:1874
llvm::yaml::dumpTokens
bool dumpTokens(StringRef Input, raw_ostream &)
Dump all the tokens in this stream to OS.
Definition: YAMLParser.cpp:586
First
into llvm powi allowing the code generator to produce balanced multiplication trees First
Definition: README.txt:54
UEF_Unknown
@ UEF_Unknown
Not a valid Unicode encoding.
Definition: YAMLParser.cpp:49
SMLoc.h
llvm::yaml::Stream::printError
void printError(Node *N, const Twine &Msg, SourceMgr::DiagKind Kind=SourceMgr::DK_Error)
Definition: YAMLParser.cpp:1854
llvm::report_fatal_error
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:140
llvm::yaml::Node::NK_Mapping
@ NK_Mapping
Definition: YAMLParser.h:128
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:50
llvm::yaml::Node::SourceRange
SMRange SourceRange
Definition: YAMLParser.h:182
llvm::yaml::parseBool
llvm::Optional< bool > parseBool(StringRef S)
Parse S as a bool according to https://yaml.org/type/bool.html.
Definition: YAMLParser.cpp:749
llvm::yaml::Token::TK_StreamStart
@ TK_StreamStart
Definition: YAMLParser.cpp:129
llvm::AllocatorList::begin
iterator begin()
Definition: AllocatorList.h:162
llvm::yaml::Node::skip
virtual void skip()
Definition: YAMLParser.h:176
llvm::StringRef::getAsInteger
std::enable_if_t< std::numeric_limits< T >::is_signed, bool > getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:508
llvm::None
const NoneType None
Definition: None.h:23
llvm::lltok::Kind
Kind
Definition: LLToken.h:18
SourceMgr.h
UnicodeEncodingForm
UnicodeEncodingForm
Definition: YAMLParser.cpp:43
llvm::yaml::MappingNode::MT_Inline
@ MT_Inline
An inline mapping node is used for "[key: value]".
Definition: YAMLParser.h:421
llvm::SmallString< 4 >
llvm::yaml::Stream::skip
void skip()
Definition: YAMLParser.cpp:1878
llvm::yaml::BlockScalarNode
A block scalar node is an opaque datum that can be presented as a series of zero or more Unicode scal...
Definition: YAMLParser.h:255
llvm::yaml::Scanner::getNext
Token getNext()
Parse the next token and pop it from the queue.
Definition: YAMLParser.cpp:890
llvm::AllocatorList::end
iterator end()
Definition: AllocatorList.h:163
llvm::AllocatorList::pop_front
void pop_front()
Definition: AllocatorList.h:208
llvm::yaml::Node::getAllocator
BumpPtrAllocator & getAllocator()
Definition: YAMLParser.cpp:1947
llvm::yaml::Node::Doc
std::unique_ptr< Document > & Doc
Definition: YAMLParser.h:181
llvm::yaml::Stream::~Stream
~Stream()
llvm::yaml::Document::parseBlockNode
Node * parseBlockNode()
Root for parsing a node. Returns a single node.
Definition: YAMLParser.cpp:2393
llvm::yaml::Scanner::printError
void printError(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Message, ArrayRef< SMRange > Ranges=None)
Definition: YAMLParser.cpp:262
llvm::MemoryBufferRef::getBufferEnd
const char * getBufferEnd() const
Definition: MemoryBufferRef.h:36
D
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
llvm::yaml::Node::getNext
Token getNext()
Definition: YAMLParser.cpp:1939
llvm::StringRef::end
iterator end() const
Definition: StringRef.h:130
llvm::yaml::SequenceNode::ST_Flow
@ ST_Flow
Definition: YAMLParser.h:468
llvm::yaml::Token::TK_Tag
@ TK_Tag
Definition: YAMLParser.cpp:150
llvm::SourceMgr::PrintMessage
void PrintMessage(raw_ostream &OS, SMLoc Loc, DiagKind Kind, const Twine &Msg, ArrayRef< SMRange > Ranges={}, ArrayRef< SMFixIt > FixIts={}, bool ShowColors=true) const
Emit a message about the specified location with the specified string.
Definition: SourceMgr.cpp:341
llvm::BumpPtrAllocatorImpl
Allocate memory in an ever growing pool, as if by bump-pointer.
Definition: Allocator.h:67
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::yaml::Token::TK_BlockSequenceStart
@ TK_BlockSequenceStart
Definition: YAMLParser.cpp:137
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
is_ns_word_char
static bool is_ns_word_char(const char C)
Definition: YAMLParser.cpp:986
EncodingInfo
std::pair< UnicodeEncodingForm, unsigned > EncodingInfo
EncodingInfo - Holds the encoding type and length of the byte order mark if it exists.
Definition: YAMLParser.cpp:54
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::yaml::MappingNode::MT_Block
@ MT_Block
Definition: YAMLParser.h:419
UEF_UTF16_BE
@ UEF_UTF16_BE
UTF-16 Big Endian.
Definition: YAMLParser.cpp:47
StringExtras.h
llvm::yaml::Token
Token - A single YAML token.
Definition: YAMLParser.cpp:126
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
llvm::AllocatorList
A linked-list with a custom, local allocator.
Definition: AllocatorList.h:34
llvm::yaml::Token::Range
StringRef Range
A string of length 0 or more whose begin() points to the logical location of the token in the input.
Definition: YAMLParser.cpp:155
llvm::is_contained
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:1547
ArrayRef.h
llvm::yaml::Node::NK_BlockScalar
@ NK_BlockScalar
Definition: YAMLParser.h:126
wasEscaped
static LLVM_ATTRIBUTE_NOINLINE bool wasEscaped(StringRef::iterator First, StringRef::iterator Position)
Definition: YAMLParser.cpp:1337
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::yaml::Token::TK_FlowSequenceEnd
@ TK_FlowSequenceEnd
Definition: YAMLParser.cpp:141
llvm::yaml::Token::TK_Alias
@ TK_Alias
Definition: YAMLParser.cpp:148
llvm::yaml::Token::TK_BlockEntry
@ TK_BlockEntry
Definition: YAMLParser.cpp:135
llvm::yaml::Token::TK_Scalar
@ TK_Scalar
Definition: YAMLParser.cpp:146
llvm::StringRef::drop_back
LLVM_NODISCARD StringRef drop_back(size_t N=1) const
Return a StringRef equal to 'this' but with the last N elements dropped.
Definition: StringRef.h:659
llvm::yaml::Document::getRoot
Node * getRoot()
Parse and return the root level node.
Definition: YAMLParser.h:543
llvm::yaml::Scanner::setError
void setError(const Twine &Message, StringRef::iterator Position)
Definition: YAMLParser.cpp:267
llvm::yaml::Stream::failed
bool failed()
Definition: YAMLParser.cpp:1852
llvm::yaml::SequenceNode::ST_Indentless
@ ST_Indentless
Definition: YAMLParser.h:476
UEF_UTF8
@ UEF_UTF8
UTF-8 or ascii.
Definition: YAMLParser.cpp:48
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
None.h
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:136
llvm::yaml::Token::TK_BlockScalar
@ TK_BlockScalar
Definition: YAMLParser.cpp:147
llvm::AllocatorList::empty
bool empty()
Definition: AllocatorList.h:159
llvm::yaml::Node::getVerbatimTag
std::string getVerbatimTag() const
Get the verbatium tag for a given Node.
Definition: YAMLParser.cpp:1890
llvm::SourceMgr
This owns the files read by a parser, handles include stacks, and handles diagnostic wrangling.
Definition: SourceMgr.h:31
llvm::yaml::KeyValueNode
A key and value pair.
Definition: YAMLParser.h:285
uint32_t
Compiler.h
llvm::append_range
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
Definition: STLExtras.h:1672
UEF_UTF32_LE
@ UEF_UTF32_LE
UTF-32 Little Endian.
Definition: YAMLParser.cpp:44
llvm::yaml::Stream
This class represents a YAML stream potentially containing multiple documents.
Definition: YAMLParser.h:86
llvm::yaml::Stream::Stream
Stream(StringRef Input, SourceMgr &, bool ShowColors=true, std::error_code *EC=nullptr)
This keeps a reference to the string referenced by Input.
Definition: YAMLParser.cpp:1842
llvm::AllocatorList::insert
iterator insert(iterator I, T &&V)
Definition: AllocatorList.h:184
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::PICLevel::Level
Level
Definition: CodeGen.h:33
is_ns_hex_digit
static bool is_ns_hex_digit(const char C)
Definition: YAMLParser.cpp:984
LLVM_FALLTHROUGH
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:281
llvm::yaml::Token::TK_BlockEnd
@ TK_BlockEnd
Definition: YAMLParser.cpp:136
llvm::yaml::SequenceNode::ST_Block
@ ST_Block
Definition: YAMLParser.h:467
Unicode.h
llvm::yaml::Document::skip
bool skip()
Finish parsing the current document and return true if there are more.
Definition: YAMLParser.cpp:2361
llvm::make_error_code
std::error_code make_error_code(BitcodeError E)
Definition: BitcodeReader.h:270
skipComment
static Cursor skipComment(Cursor C)
Skip a line comment and return the updated cursor.
Definition: MILexer.cpp:96
llvm::AllocatorList::clear
void clear()
Definition: AllocatorList.h:206
llvm::yaml::Stream::begin
document_iterator begin()
Definition: YAMLParser.cpp:1863
llvm::yaml::ScalarNode::getValue
StringRef getValue(SmallVectorImpl< char > &Storage) const
Gets the value of this node as a StringRef.
Definition: YAMLParser.cpp:1959
llvm::SourceMgr::DiagKind
DiagKind
Definition: SourceMgr.h:33
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:80
llvm::GraphProgram::Name
Name
Definition: GraphWriter.h:52
getUnicodeEncoding
static EncodingInfo getUnicodeEncoding(StringRef Input)
getUnicodeEncoding - Reads up to the first 4 bytes to determine the Unicode encoding form of Input.
Definition: YAMLParser.cpp:62
llvm::yaml::Token::TK_StreamEnd
@ TK_StreamEnd
Definition: YAMLParser.cpp:130
llvm::yaml::Token::Value
std::string Value
The value of a block scalar node.
Definition: YAMLParser.cpp:158
llvm::yaml::Node::peekNext
Token & peekNext()
Definition: YAMLParser.cpp:1935
llvm::SourceMgr::DK_Error
@ DK_Error
Definition: SourceMgr.h:34
llvm::yaml::Node::NK_Null
@ NK_Null
Definition: YAMLParser.h:124
llvm::SourceMgr::AddNewSourceBuffer
unsigned AddNewSourceBuffer(std::unique_ptr< MemoryBuffer > F, SMLoc IncludeLoc)
Add a new source buffer to this source manager.
Definition: SourceMgr.h:141
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:584
llvm::yaml::MappingNode::MT_Flow
@ MT_Flow
Definition: YAMLParser.h:420
LLVM_ATTRIBUTE_NOINLINE
#define LLVM_ATTRIBUTE_NOINLINE
LLVM_ATTRIBUTE_NOINLINE - On compilers where we have a directive to do so, mark a method "not for inl...
Definition: Compiler.h:230
UEF_UTF32_BE
@ UEF_UTF32_BE
UTF-32 Big Endian.
Definition: YAMLParser.cpp:45
SmallVector.h
llvm::SMRange
Represents a range in source code.
Definition: SMLoc.h:48
llvm::yaml::Node::setError
void setError(const Twine &Message, Token &Location) const
Definition: YAMLParser.cpp:1951
N
#define N
llvm::yaml::Scanner::peekNext
Token & peekNext()
Parse the next token and return it without popping it.
Definition: YAMLParser.cpp:863
llvm::yaml::Token::TK_Key
@ TK_Key
Definition: YAMLParser.cpp:144
llvm::SmallVectorImpl< char >
llvm::SMLoc::getFromPointer
static SMLoc getFromPointer(const char *Ptr)
Definition: SMLoc.h:36
llvm::yaml::Token::TK_FlowMappingEnd
@ TK_FlowMappingEnd
Definition: YAMLParser.cpp:143
llvm::yaml::Scanner::Scanner
Scanner(StringRef Input, SourceMgr &SM, bool ShowColors=true, std::error_code *EC=nullptr)
Definition: YAMLParser.cpp:835
llvm::yaml::document_iterator
Iterator abstraction for Documents over a Stream.
Definition: YAMLParser.h:588
llvm::StringRef::find_first_of
LLVM_NODISCARD size_t find_first_of(char C, size_t From=0) const
Find the first character in the string that is C, or npos if not found.
Definition: StringRef.h:410
AllocatorList.h
llvm::sys::unicode::isPrintable
bool isPrintable(int UCS)
Determines if a character is likely to be displayed correctly on the terminal.
Definition: Unicode.cpp:22
llvm::codeview::LocalSymFlags::IsAlias
@ IsAlias
llvm::Type::TypeID
TypeID
Definitions of all of the base types for the Type system.
Definition: Type.h:55
llvm::yaml::Token::TK_FlowSequenceStart
@ TK_FlowSequenceStart
Definition: YAMLParser.cpp:140
llvm::yaml::Node::parseBlockNode
Node * parseBlockNode()
Definition: YAMLParser.cpp:1943
raw_ostream.h
llvm::StringRef::size
LLVM_NODISCARD size_t size() const
size - Get the string size.
Definition: StringRef.h:157
llvm::SmallVectorImpl::reserve
void reserve(size_type N)
Definition: SmallVector.h:623
llvm::StringRef::begin
iterator begin() const
Definition: StringRef.h:128
llvm::yaml::Token::TK_FlowMappingStart
@ TK_FlowMappingStart
Definition: YAMLParser.cpp:142
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
llvm::yaml::Node::NK_Scalar
@ NK_Scalar
Definition: YAMLParser.h:125
llvm::yaml::Node::getType
unsigned int getType() const
Definition: YAMLParser.h:178
llvm::AllocatorList::push_back
void push_back(T &&V)
Definition: AllocatorList.h:209
UTF8Decoded
std::pair< uint32_t, unsigned > UTF8Decoded
The Unicode scalar value of a UTF-8 minimal well-formed code unit subsequence and the subsequence's l...
Definition: YAMLParser.cpp:196
Other
Optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1169
getChompedLineBreaks
static unsigned getChompedLineBreaks(char ChompingIndicator, unsigned LineBreaks, StringRef Str)
Get the number of line breaks after chomping.
Definition: YAMLParser.cpp:1533