LLVM 17.0.0git
YAMLParser.cpp
Go to the documentation of this file.
1//===- YAMLParser.cpp - Simple YAML parser --------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements a YAML parser.
10//
11//===----------------------------------------------------------------------===//
12
15#include "llvm/ADT/ArrayRef.h"
16#include "llvm/ADT/STLExtras.h"
20#include "llvm/ADT/StringRef.h"
21#include "llvm/ADT/Twine.h"
25#include "llvm/Support/SMLoc.h"
29#include <cassert>
30#include <cstddef>
31#include <cstdint>
32#include <map>
33#include <memory>
34#include <string>
35#include <system_error>
36#include <utility>
37
38using namespace llvm;
39using namespace yaml;
40
42 UEF_UTF32_LE, ///< UTF-32 Little Endian
43 UEF_UTF32_BE, ///< UTF-32 Big Endian
44 UEF_UTF16_LE, ///< UTF-16 Little Endian
45 UEF_UTF16_BE, ///< UTF-16 Big Endian
46 UEF_UTF8, ///< UTF-8 or ascii.
47 UEF_Unknown ///< Not a valid Unicode encoding.
48};
49
50/// EncodingInfo - Holds the encoding type and length of the byte order mark if
51/// it exists. Length is in {0, 2, 3, 4}.
52using EncodingInfo = std::pair<UnicodeEncodingForm, unsigned>;
53
54/// getUnicodeEncoding - Reads up to the first 4 bytes to determine the Unicode
55/// encoding form of \a Input.
56///
57/// @param Input A string of length 0 or more.
58/// @returns An EncodingInfo indicating the Unicode encoding form of the input
59/// and how long the byte order mark is if one exists.
61 if (Input.empty())
62 return std::make_pair(UEF_Unknown, 0);
63
64 switch (uint8_t(Input[0])) {
65 case 0x00:
66 if (Input.size() >= 4) {
67 if ( Input[1] == 0
68 && uint8_t(Input[2]) == 0xFE
69 && uint8_t(Input[3]) == 0xFF)
70 return std::make_pair(UEF_UTF32_BE, 4);
71 if (Input[1] == 0 && Input[2] == 0 && Input[3] != 0)
72 return std::make_pair(UEF_UTF32_BE, 0);
73 }
74
75 if (Input.size() >= 2 && Input[1] != 0)
76 return std::make_pair(UEF_UTF16_BE, 0);
77 return std::make_pair(UEF_Unknown, 0);
78 case 0xFF:
79 if ( Input.size() >= 4
80 && uint8_t(Input[1]) == 0xFE
81 && Input[2] == 0
82 && Input[3] == 0)
83 return std::make_pair(UEF_UTF32_LE, 4);
84
85 if (Input.size() >= 2 && uint8_t(Input[1]) == 0xFE)
86 return std::make_pair(UEF_UTF16_LE, 2);
87 return std::make_pair(UEF_Unknown, 0);
88 case 0xFE:
89 if (Input.size() >= 2 && uint8_t(Input[1]) == 0xFF)
90 return std::make_pair(UEF_UTF16_BE, 2);
91 return std::make_pair(UEF_Unknown, 0);
92 case 0xEF:
93 if ( Input.size() >= 3
94 && uint8_t(Input[1]) == 0xBB
95 && uint8_t(Input[2]) == 0xBF)
96 return std::make_pair(UEF_UTF8, 3);
97 return std::make_pair(UEF_Unknown, 0);
98 }
99
100 // It could still be utf-32 or utf-16.
101 if (Input.size() >= 4 && Input[1] == 0 && Input[2] == 0 && Input[3] == 0)
102 return std::make_pair(UEF_UTF32_LE, 0);
103
104 if (Input.size() >= 2 && Input[1] == 0)
105 return std::make_pair(UEF_UTF16_LE, 0);
106
107 return std::make_pair(UEF_UTF8, 0);
108}
109
110/// Pin the vtables to this file.
111void Node::anchor() {}
112void NullNode::anchor() {}
113void ScalarNode::anchor() {}
114void BlockScalarNode::anchor() {}
115void KeyValueNode::anchor() {}
116void MappingNode::anchor() {}
117void SequenceNode::anchor() {}
118void AliasNode::anchor() {}
119
120namespace llvm {
121namespace yaml {
122
123/// Token - A single YAML token.
124struct Token {
126 TK_Error, // Uninitialized token.
148 TK_Tag
149 } Kind = TK_Error;
150
151 /// A string of length 0 or more whose begin() points to the logical location
152 /// of the token in the input.
154
155 /// The value of a block scalar node.
156 std::string Value;
157
158 Token() = default;
159};
160
161} // end namespace yaml
162} // end namespace llvm
163
165
166namespace {
167
168/// This struct is used to track simple keys.
169///
170/// Simple keys are handled by creating an entry in SimpleKeys for each Token
171/// which could legally be the start of a simple key. When peekNext is called,
172/// if the Token To be returned is referenced by a SimpleKey, we continue
173/// tokenizing until that potential simple key has either been found to not be
174/// a simple key (we moved on to the next line or went further than 1024 chars).
175/// Or when we run into a Value, and then insert a Key token (and possibly
176/// others) before the SimpleKey's Tok.
177struct SimpleKey {
179 unsigned Column = 0;
180 unsigned Line = 0;
181 unsigned FlowLevel = 0;
182 bool IsRequired = false;
183
184 bool operator ==(const SimpleKey &Other) {
185 return Tok == Other.Tok;
186 }
187};
188
189} // end anonymous namespace
190
191/// The Unicode scalar value of a UTF-8 minimal well-formed code unit
192/// subsequence and the subsequence's length in code units (uint8_t).
193/// A length of 0 represents an error.
194using UTF8Decoded = std::pair<uint32_t, unsigned>;
195
197 StringRef::iterator Position= Range.begin();
198 StringRef::iterator End = Range.end();
199 // 1 byte: [0x00, 0x7f]
200 // Bit pattern: 0xxxxxxx
201 if (Position < End && (*Position & 0x80) == 0) {
202 return std::make_pair(*Position, 1);
203 }
204 // 2 bytes: [0x80, 0x7ff]
205 // Bit pattern: 110xxxxx 10xxxxxx
206 if (Position + 1 < End && ((*Position & 0xE0) == 0xC0) &&
207 ((*(Position + 1) & 0xC0) == 0x80)) {
208 uint32_t codepoint = ((*Position & 0x1F) << 6) |
209 (*(Position + 1) & 0x3F);
210 if (codepoint >= 0x80)
211 return std::make_pair(codepoint, 2);
212 }
213 // 3 bytes: [0x8000, 0xffff]
214 // Bit pattern: 1110xxxx 10xxxxxx 10xxxxxx
215 if (Position + 2 < End && ((*Position & 0xF0) == 0xE0) &&
216 ((*(Position + 1) & 0xC0) == 0x80) &&
217 ((*(Position + 2) & 0xC0) == 0x80)) {
218 uint32_t codepoint = ((*Position & 0x0F) << 12) |
219 ((*(Position + 1) & 0x3F) << 6) |
220 (*(Position + 2) & 0x3F);
221 // Codepoints between 0xD800 and 0xDFFF are invalid, as
222 // they are high / low surrogate halves used by UTF-16.
223 if (codepoint >= 0x800 &&
224 (codepoint < 0xD800 || codepoint > 0xDFFF))
225 return std::make_pair(codepoint, 3);
226 }
227 // 4 bytes: [0x10000, 0x10FFFF]
228 // Bit pattern: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
229 if (Position + 3 < End && ((*Position & 0xF8) == 0xF0) &&
230 ((*(Position + 1) & 0xC0) == 0x80) &&
231 ((*(Position + 2) & 0xC0) == 0x80) &&
232 ((*(Position + 3) & 0xC0) == 0x80)) {
233 uint32_t codepoint = ((*Position & 0x07) << 18) |
234 ((*(Position + 1) & 0x3F) << 12) |
235 ((*(Position + 2) & 0x3F) << 6) |
236 (*(Position + 3) & 0x3F);
237 if (codepoint >= 0x10000 && codepoint <= 0x10FFFF)
238 return std::make_pair(codepoint, 4);
239 }
240 return std::make_pair(0, 0);
241}
242
243namespace llvm {
244namespace yaml {
245
246/// Scans YAML tokens from a MemoryBuffer.
247class Scanner {
248public:
249 Scanner(StringRef Input, SourceMgr &SM, bool ShowColors = true,
250 std::error_code *EC = nullptr);
251 Scanner(MemoryBufferRef Buffer, SourceMgr &SM_, bool ShowColors = true,
252 std::error_code *EC = nullptr);
253
254 /// Parse the next token and return it without popping it.
255 Token &peekNext();
256
257 /// Parse the next token and pop it from the queue.
258 Token getNext();
259
260 void printError(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Message,
261 ArrayRef<SMRange> Ranges = std::nullopt) {
262 SM.PrintMessage(Loc, Kind, Message, Ranges, /* FixIts= */ std::nullopt,
263 ShowColors);
264 }
265
266 void setError(const Twine &Message, StringRef::iterator Position) {
267 if (Position >= End)
268 Position = End - 1;
269
270 // propagate the error if possible
271 if (EC)
272 *EC = make_error_code(std::errc::invalid_argument);
273
274 // Don't print out more errors after the first one we encounter. The rest
275 // are just the result of the first, and have no meaning.
276 if (!Failed)
277 printError(SMLoc::getFromPointer(Position), SourceMgr::DK_Error, Message);
278 Failed = true;
279 }
280
281 /// Returns true if an error occurred while parsing.
282 bool failed() {
283 return Failed;
284 }
285
286private:
287 void init(MemoryBufferRef Buffer);
288
289 StringRef currentInput() {
290 return StringRef(Current, End - Current);
291 }
292
293 /// Decode a UTF-8 minimal well-formed code unit subsequence starting
294 /// at \a Position.
295 ///
296 /// If the UTF-8 code units starting at Position do not form a well-formed
297 /// code unit subsequence, then the Unicode scalar value is 0, and the length
298 /// is 0.
300 return ::decodeUTF8(StringRef(Position, End - Position));
301 }
302
303 // The following functions are based on the gramar rules in the YAML spec. The
304 // style of the function names it meant to closely match how they are written
305 // in the spec. The number within the [] is the number of the grammar rule in
306 // the spec.
307 //
308 // See 4.2 [Production Naming Conventions] for the meaning of the prefixes.
309 //
310 // c-
311 // A production starting and ending with a special character.
312 // b-
313 // A production matching a single line break.
314 // nb-
315 // A production starting and ending with a non-break character.
316 // s-
317 // A production starting and ending with a white space character.
318 // ns-
319 // A production starting and ending with a non-space character.
320 // l-
321 // A production matching complete line(s).
322
323 /// Skip a single nb-char[27] starting at Position.
324 ///
325 /// A nb-char is 0x9 | [0x20-0x7E] | 0x85 | [0xA0-0xD7FF] | [0xE000-0xFEFE]
326 /// | [0xFF00-0xFFFD] | [0x10000-0x10FFFF]
327 ///
328 /// @returns The code unit after the nb-char, or Position if it's not an
329 /// nb-char.
330 StringRef::iterator skip_nb_char(StringRef::iterator Position);
331
332 /// Skip a single b-break[28] starting at Position.
333 ///
334 /// A b-break is 0xD 0xA | 0xD | 0xA
335 ///
336 /// @returns The code unit after the b-break, or Position if it's not a
337 /// b-break.
338 StringRef::iterator skip_b_break(StringRef::iterator Position);
339
340 /// Skip a single s-space[31] starting at Position.
341 ///
342 /// An s-space is 0x20
343 ///
344 /// @returns The code unit after the s-space, or Position if it's not a
345 /// s-space.
346 StringRef::iterator skip_s_space(StringRef::iterator Position);
347
348 /// Skip a single s-white[33] starting at Position.
349 ///
350 /// A s-white is 0x20 | 0x9
351 ///
352 /// @returns The code unit after the s-white, or Position if it's not a
353 /// s-white.
354 StringRef::iterator skip_s_white(StringRef::iterator Position);
355
356 /// Skip a single ns-char[34] starting at Position.
357 ///
358 /// A ns-char is nb-char - s-white
359 ///
360 /// @returns The code unit after the ns-char, or Position if it's not a
361 /// ns-char.
362 StringRef::iterator skip_ns_char(StringRef::iterator Position);
363
364 using SkipWhileFunc = StringRef::iterator (Scanner::*)(StringRef::iterator);
365
366 /// Skip minimal well-formed code unit subsequences until Func
367 /// returns its input.
368 ///
369 /// @returns The code unit after the last minimal well-formed code unit
370 /// subsequence that Func accepted.
371 StringRef::iterator skip_while( SkipWhileFunc Func
372 , StringRef::iterator Position);
373
374 /// Skip minimal well-formed code unit subsequences until Func returns its
375 /// input.
376 void advanceWhile(SkipWhileFunc Func);
377
378 /// Scan ns-uri-char[39]s starting at Cur.
379 ///
380 /// This updates Cur and Column while scanning.
381 void scan_ns_uri_char();
382
383 /// Consume a minimal well-formed code unit subsequence starting at
384 /// \a Cur. Return false if it is not the same Unicode scalar value as
385 /// \a Expected. This updates \a Column.
387
388 /// Skip \a Distance UTF-8 code units. Updates \a Cur and \a Column.
389 void skip(uint32_t Distance);
390
391 /// Return true if the minimal well-formed code unit subsequence at
392 /// Pos is whitespace or a new line
393 bool isBlankOrBreak(StringRef::iterator Position);
394
395 /// Return true if the line is a line break, false otherwise.
396 bool isLineEmpty(StringRef Line);
397
398 /// Consume a single b-break[28] if it's present at the current position.
399 ///
400 /// Return false if the code unit at the current position isn't a line break.
401 bool consumeLineBreakIfPresent();
402
403 /// If IsSimpleKeyAllowed, create and push_back a new SimpleKey.
404 void saveSimpleKeyCandidate( TokenQueueT::iterator Tok
405 , unsigned AtColumn
406 , bool IsRequired);
407
408 /// Remove simple keys that can no longer be valid simple keys.
409 ///
410 /// Invalid simple keys are not on the current line or are further than 1024
411 /// columns back.
412 void removeStaleSimpleKeyCandidates();
413
414 /// Remove all simple keys on FlowLevel \a Level.
415 void removeSimpleKeyCandidatesOnFlowLevel(unsigned Level);
416
417 /// Unroll indentation in \a Indents back to \a Col. Creates BlockEnd
418 /// tokens if needed.
419 bool unrollIndent(int ToColumn);
420
421 /// Increase indent to \a Col. Creates \a Kind token at \a InsertPoint
422 /// if needed.
423 bool rollIndent( int ToColumn
424 , Token::TokenKind Kind
425 , TokenQueueT::iterator InsertPoint);
426
427 /// Skip a single-line comment when the comment starts at the current
428 /// position of the scanner.
429 void skipComment();
430
431 /// Skip whitespace and comments until the start of the next token.
432 void scanToNextToken();
433
434 /// Must be the first token generated.
435 bool scanStreamStart();
436
437 /// Generate tokens needed to close out the stream.
438 bool scanStreamEnd();
439
440 /// Scan a %BLAH directive.
441 bool scanDirective();
442
443 /// Scan a ... or ---.
444 bool scanDocumentIndicator(bool IsStart);
445
446 /// Scan a [ or { and generate the proper flow collection start token.
447 bool scanFlowCollectionStart(bool IsSequence);
448
449 /// Scan a ] or } and generate the proper flow collection end token.
450 bool scanFlowCollectionEnd(bool IsSequence);
451
452 /// Scan the , that separates entries in a flow collection.
453 bool scanFlowEntry();
454
455 /// Scan the - that starts block sequence entries.
456 bool scanBlockEntry();
457
458 /// Scan an explicit ? indicating a key.
459 bool scanKey();
460
461 /// Scan an explicit : indicating a value.
462 bool scanValue();
463
464 /// Scan a quoted scalar.
465 bool scanFlowScalar(bool IsDoubleQuoted);
466
467 /// Scan an unquoted scalar.
468 bool scanPlainScalar();
469
470 /// Scan an Alias or Anchor starting with * or &.
471 bool scanAliasOrAnchor(bool IsAlias);
472
473 /// Scan a block scalar starting with | or >.
474 bool scanBlockScalar(bool IsLiteral);
475
476 /// Scan a block scalar style indicator and header.
477 ///
478 /// Note: This is distinct from scanBlockScalarHeader to mirror the fact that
479 /// YAML does not consider the style indicator to be a part of the header.
480 ///
481 /// Return false if an error occurred.
482 bool scanBlockScalarIndicators(char &StyleIndicator, char &ChompingIndicator,
483 unsigned &IndentIndicator, bool &IsDone);
484
485 /// Scan a style indicator in a block scalar header.
486 char scanBlockStyleIndicator();
487
488 /// Scan a chomping indicator in a block scalar header.
489 char scanBlockChompingIndicator();
490
491 /// Scan an indentation indicator in a block scalar header.
492 unsigned scanBlockIndentationIndicator();
493
494 /// Scan a block scalar header.
495 ///
496 /// Return false if an error occurred.
497 bool scanBlockScalarHeader(char &ChompingIndicator, unsigned &IndentIndicator,
498 bool &IsDone);
499
500 /// Look for the indentation level of a block scalar.
501 ///
502 /// Return false if an error occurred.
503 bool findBlockScalarIndent(unsigned &BlockIndent, unsigned BlockExitIndent,
504 unsigned &LineBreaks, bool &IsDone);
505
506 /// Scan the indentation of a text line in a block scalar.
507 ///
508 /// Return false if an error occurred.
509 bool scanBlockScalarIndent(unsigned BlockIndent, unsigned BlockExitIndent,
510 bool &IsDone);
511
512 /// Scan a tag of the form !stuff.
513 bool scanTag();
514
515 /// Dispatch to the next scanning function based on \a *Cur.
516 bool fetchMoreTokens();
517
518 /// The SourceMgr used for diagnostics and buffer management.
519 SourceMgr &SM;
520
521 /// The original input.
522 MemoryBufferRef InputBuffer;
523
524 /// The current position of the scanner.
525 StringRef::iterator Current;
526
527 /// The end of the input (one past the last character).
529
530 /// Current YAML indentation level in spaces.
531 int Indent;
532
533 /// Current column number in Unicode code points.
534 unsigned Column;
535
536 /// Current line number.
537 unsigned Line;
538
539 /// How deep we are in flow style containers. 0 Means at block level.
540 unsigned FlowLevel;
541
542 /// Are we at the start of the stream?
543 bool IsStartOfStream;
544
545 /// Can the next token be the start of a simple key?
546 bool IsSimpleKeyAllowed;
547
548 /// True if an error has occurred.
549 bool Failed;
550
551 /// Should colors be used when printing out the diagnostic messages?
552 bool ShowColors;
553
554 /// Queue of tokens. This is required to queue up tokens while looking
555 /// for the end of a simple key. And for cases where a single character
556 /// can produce multiple tokens (e.g. BlockEnd).
557 TokenQueueT TokenQueue;
558
559 /// Indentation levels.
560 SmallVector<int, 4> Indents;
561
562 /// Potential simple keys.
563 SmallVector<SimpleKey, 4> SimpleKeys;
564
565 std::error_code *EC;
566};
567
568} // end namespace yaml
569} // end namespace llvm
570
571/// encodeUTF8 - Encode \a UnicodeScalarValue in UTF-8 and append it to result.
572static void encodeUTF8( uint32_t UnicodeScalarValue
573 , SmallVectorImpl<char> &Result) {
574 if (UnicodeScalarValue <= 0x7F) {
575 Result.push_back(UnicodeScalarValue & 0x7F);
576 } else if (UnicodeScalarValue <= 0x7FF) {
577 uint8_t FirstByte = 0xC0 | ((UnicodeScalarValue & 0x7C0) >> 6);
578 uint8_t SecondByte = 0x80 | (UnicodeScalarValue & 0x3F);
579 Result.push_back(FirstByte);
580 Result.push_back(SecondByte);
581 } else if (UnicodeScalarValue <= 0xFFFF) {
582 uint8_t FirstByte = 0xE0 | ((UnicodeScalarValue & 0xF000) >> 12);
583 uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6);
584 uint8_t ThirdByte = 0x80 | (UnicodeScalarValue & 0x3F);
585 Result.push_back(FirstByte);
586 Result.push_back(SecondByte);
587 Result.push_back(ThirdByte);
588 } else if (UnicodeScalarValue <= 0x10FFFF) {
589 uint8_t FirstByte = 0xF0 | ((UnicodeScalarValue & 0x1F0000) >> 18);
590 uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0x3F000) >> 12);
591 uint8_t ThirdByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6);
592 uint8_t FourthByte = 0x80 | (UnicodeScalarValue & 0x3F);
593 Result.push_back(FirstByte);
594 Result.push_back(SecondByte);
595 Result.push_back(ThirdByte);
596 Result.push_back(FourthByte);
597 }
598}
599
601 SourceMgr SM;
602 Scanner scanner(Input, SM);
603 while (true) {
604 Token T = scanner.getNext();
605 switch (T.Kind) {
607 OS << "Stream-Start: ";
608 break;
610 OS << "Stream-End: ";
611 break;
613 OS << "Version-Directive: ";
614 break;
616 OS << "Tag-Directive: ";
617 break;
619 OS << "Document-Start: ";
620 break;
622 OS << "Document-End: ";
623 break;
625 OS << "Block-Entry: ";
626 break;
628 OS << "Block-End: ";
629 break;
631 OS << "Block-Sequence-Start: ";
632 break;
634 OS << "Block-Mapping-Start: ";
635 break;
637 OS << "Flow-Entry: ";
638 break;
640 OS << "Flow-Sequence-Start: ";
641 break;
643 OS << "Flow-Sequence-End: ";
644 break;
646 OS << "Flow-Mapping-Start: ";
647 break;
649 OS << "Flow-Mapping-End: ";
650 break;
651 case Token::TK_Key:
652 OS << "Key: ";
653 break;
654 case Token::TK_Value:
655 OS << "Value: ";
656 break;
657 case Token::TK_Scalar:
658 OS << "Scalar: ";
659 break;
661 OS << "Block Scalar: ";
662 break;
663 case Token::TK_Alias:
664 OS << "Alias: ";
665 break;
666 case Token::TK_Anchor:
667 OS << "Anchor: ";
668 break;
669 case Token::TK_Tag:
670 OS << "Tag: ";
671 break;
672 case Token::TK_Error:
673 break;
674 }
675 OS << T.Range << "\n";
676 if (T.Kind == Token::TK_StreamEnd)
677 break;
678 else if (T.Kind == Token::TK_Error)
679 return false;
680 }
681 return true;
682}
683
685 SourceMgr SM;
686 Scanner scanner(Input, SM);
687 while (true) {
688 Token T = scanner.getNext();
689 if (T.Kind == Token::TK_StreamEnd)
690 break;
691 else if (T.Kind == Token::TK_Error)
692 return false;
693 }
694 return true;
695}
696
697std::string yaml::escape(StringRef Input, bool EscapePrintable) {
698 std::string EscapedInput;
699 for (StringRef::iterator i = Input.begin(), e = Input.end(); i != e; ++i) {
700 if (*i == '\\')
701 EscapedInput += "\\\\";
702 else if (*i == '"')
703 EscapedInput += "\\\"";
704 else if (*i == 0)
705 EscapedInput += "\\0";
706 else if (*i == 0x07)
707 EscapedInput += "\\a";
708 else if (*i == 0x08)
709 EscapedInput += "\\b";
710 else if (*i == 0x09)
711 EscapedInput += "\\t";
712 else if (*i == 0x0A)
713 EscapedInput += "\\n";
714 else if (*i == 0x0B)
715 EscapedInput += "\\v";
716 else if (*i == 0x0C)
717 EscapedInput += "\\f";
718 else if (*i == 0x0D)
719 EscapedInput += "\\r";
720 else if (*i == 0x1B)
721 EscapedInput += "\\e";
722 else if ((unsigned char)*i < 0x20) { // Control characters not handled above.
723 std::string HexStr = utohexstr(*i);
724 EscapedInput += "\\x" + std::string(2 - HexStr.size(), '0') + HexStr;
725 } else if (*i & 0x80) { // UTF-8 multiple code unit subsequence.
726 UTF8Decoded UnicodeScalarValue
727 = decodeUTF8(StringRef(i, Input.end() - i));
728 if (UnicodeScalarValue.second == 0) {
729 // Found invalid char.
730 SmallString<4> Val;
731 encodeUTF8(0xFFFD, Val);
732 llvm::append_range(EscapedInput, Val);
733 // FIXME: Error reporting.
734 return EscapedInput;
735 }
736 if (UnicodeScalarValue.first == 0x85)
737 EscapedInput += "\\N";
738 else if (UnicodeScalarValue.first == 0xA0)
739 EscapedInput += "\\_";
740 else if (UnicodeScalarValue.first == 0x2028)
741 EscapedInput += "\\L";
742 else if (UnicodeScalarValue.first == 0x2029)
743 EscapedInput += "\\P";
744 else if (!EscapePrintable &&
745 sys::unicode::isPrintable(UnicodeScalarValue.first))
746 EscapedInput += StringRef(i, UnicodeScalarValue.second);
747 else {
748 std::string HexStr = utohexstr(UnicodeScalarValue.first);
749 if (HexStr.size() <= 2)
750 EscapedInput += "\\x" + std::string(2 - HexStr.size(), '0') + HexStr;
751 else if (HexStr.size() <= 4)
752 EscapedInput += "\\u" + std::string(4 - HexStr.size(), '0') + HexStr;
753 else if (HexStr.size() <= 8)
754 EscapedInput += "\\U" + std::string(8 - HexStr.size(), '0') + HexStr;
755 }
756 i += UnicodeScalarValue.second - 1;
757 } else
758 EscapedInput.push_back(*i);
759 }
760 return EscapedInput;
761}
762
763std::optional<bool> yaml::parseBool(StringRef S) {
764 switch (S.size()) {
765 case 1:
766 switch (S.front()) {
767 case 'y':
768 case 'Y':
769 return true;
770 case 'n':
771 case 'N':
772 return false;
773 default:
774 return std::nullopt;
775 }
776 case 2:
777 switch (S.front()) {
778 case 'O':
779 if (S[1] == 'N') // ON
780 return true;
781 [[fallthrough]];
782 case 'o':
783 if (S[1] == 'n') //[Oo]n
784 return true;
785 return std::nullopt;
786 case 'N':
787 if (S[1] == 'O') // NO
788 return false;
789 [[fallthrough]];
790 case 'n':
791 if (S[1] == 'o') //[Nn]o
792 return false;
793 return std::nullopt;
794 default:
795 return std::nullopt;
796 }
797 case 3:
798 switch (S.front()) {
799 case 'O':
800 if (S.drop_front() == "FF") // OFF
801 return false;
802 [[fallthrough]];
803 case 'o':
804 if (S.drop_front() == "ff") //[Oo]ff
805 return false;
806 return std::nullopt;
807 case 'Y':
808 if (S.drop_front() == "ES") // YES
809 return true;
810 [[fallthrough]];
811 case 'y':
812 if (S.drop_front() == "es") //[Yy]es
813 return true;
814 return std::nullopt;
815 default:
816 return std::nullopt;
817 }
818 case 4:
819 switch (S.front()) {
820 case 'T':
821 if (S.drop_front() == "RUE") // TRUE
822 return true;
823 [[fallthrough]];
824 case 't':
825 if (S.drop_front() == "rue") //[Tt]rue
826 return true;
827 return std::nullopt;
828 default:
829 return std::nullopt;
830 }
831 case 5:
832 switch (S.front()) {
833 case 'F':
834 if (S.drop_front() == "ALSE") // FALSE
835 return false;
836 [[fallthrough]];
837 case 'f':
838 if (S.drop_front() == "alse") //[Ff]alse
839 return false;
840 return std::nullopt;
841 default:
842 return std::nullopt;
843 }
844 default:
845 return std::nullopt;
846 }
847}
848
849Scanner::Scanner(StringRef Input, SourceMgr &sm, bool ShowColors,
850 std::error_code *EC)
851 : SM(sm), ShowColors(ShowColors), EC(EC) {
852 init(MemoryBufferRef(Input, "YAML"));
853}
854
855Scanner::Scanner(MemoryBufferRef Buffer, SourceMgr &SM_, bool ShowColors,
856 std::error_code *EC)
857 : SM(SM_), ShowColors(ShowColors), EC(EC) {
858 init(Buffer);
859}
860
861void Scanner::init(MemoryBufferRef Buffer) {
862 InputBuffer = Buffer;
863 Current = InputBuffer.getBufferStart();
864 End = InputBuffer.getBufferEnd();
865 Indent = -1;
866 Column = 0;
867 Line = 0;
868 FlowLevel = 0;
869 IsStartOfStream = true;
870 IsSimpleKeyAllowed = true;
871 Failed = false;
872 std::unique_ptr<MemoryBuffer> InputBufferOwner =
873 MemoryBuffer::getMemBuffer(Buffer, /*RequiresNullTerminator=*/false);
874 SM.AddNewSourceBuffer(std::move(InputBufferOwner), SMLoc());
875}
876
878 // If the current token is a possible simple key, keep parsing until we
879 // can confirm.
880 bool NeedMore = false;
881 while (true) {
882 if (TokenQueue.empty() || NeedMore) {
883 if (!fetchMoreTokens()) {
884 TokenQueue.clear();
885 SimpleKeys.clear();
886 TokenQueue.push_back(Token());
887 return TokenQueue.front();
888 }
889 }
890 assert(!TokenQueue.empty() &&
891 "fetchMoreTokens lied about getting tokens!");
892
893 removeStaleSimpleKeyCandidates();
894 SimpleKey SK;
895 SK.Tok = TokenQueue.begin();
896 if (!is_contained(SimpleKeys, SK))
897 break;
898 else
899 NeedMore = true;
900 }
901 return TokenQueue.front();
902}
903
905 Token Ret = peekNext();
906 // TokenQueue can be empty if there was an error getting the next token.
907 if (!TokenQueue.empty())
908 TokenQueue.pop_front();
909
910 // There cannot be any referenced Token's if the TokenQueue is empty. So do a
911 // quick deallocation of them all.
912 if (TokenQueue.empty())
913 TokenQueue.resetAlloc();
914
915 return Ret;
916}
917
918StringRef::iterator Scanner::skip_nb_char(StringRef::iterator Position) {
919 if (Position == End)
920 return Position;
921 // Check 7 bit c-printable - b-char.
922 if ( *Position == 0x09
923 || (*Position >= 0x20 && *Position <= 0x7E))
924 return Position + 1;
925
926 // Check for valid UTF-8.
927 if (uint8_t(*Position) & 0x80) {
928 UTF8Decoded u8d = decodeUTF8(Position);
929 if ( u8d.second != 0
930 && u8d.first != 0xFEFF
931 && ( u8d.first == 0x85
932 || ( u8d.first >= 0xA0
933 && u8d.first <= 0xD7FF)
934 || ( u8d.first >= 0xE000
935 && u8d.first <= 0xFFFD)
936 || ( u8d.first >= 0x10000
937 && u8d.first <= 0x10FFFF)))
938 return Position + u8d.second;
939 }
940 return Position;
941}
942
943StringRef::iterator Scanner::skip_b_break(StringRef::iterator Position) {
944 if (Position == End)
945 return Position;
946 if (*Position == 0x0D) {
947 if (Position + 1 != End && *(Position + 1) == 0x0A)
948 return Position + 2;
949 return Position + 1;
950 }
951
952 if (*Position == 0x0A)
953 return Position + 1;
954 return Position;
955}
956
957StringRef::iterator Scanner::skip_s_space(StringRef::iterator Position) {
958 if (Position == End)
959 return Position;
960 if (*Position == ' ')
961 return Position + 1;
962 return Position;
963}
964
965StringRef::iterator Scanner::skip_s_white(StringRef::iterator Position) {
966 if (Position == End)
967 return Position;
968 if (*Position == ' ' || *Position == '\t')
969 return Position + 1;
970 return Position;
971}
972
973StringRef::iterator Scanner::skip_ns_char(StringRef::iterator Position) {
974 if (Position == End)
975 return Position;
976 if (*Position == ' ' || *Position == '\t')
977 return Position;
978 return skip_nb_char(Position);
979}
980
981StringRef::iterator Scanner::skip_while( SkipWhileFunc Func
982 , StringRef::iterator Position) {
983 while (true) {
984 StringRef::iterator i = (this->*Func)(Position);
985 if (i == Position)
986 break;
987 Position = i;
988 }
989 return Position;
990}
991
992void Scanner::advanceWhile(SkipWhileFunc Func) {
993 auto Final = skip_while(Func, Current);
994 Column += Final - Current;
995 Current = Final;
996}
997
998static bool is_ns_hex_digit(const char C) { return isAlnum(C); }
999
1000static bool is_ns_word_char(const char C) { return C == '-' || isAlpha(C); }
1001
1002void Scanner::scan_ns_uri_char() {
1003 while (true) {
1004 if (Current == End)
1005 break;
1006 if (( *Current == '%'
1007 && Current + 2 < End
1008 && is_ns_hex_digit(*(Current + 1))
1009 && is_ns_hex_digit(*(Current + 2)))
1010 || is_ns_word_char(*Current)
1011 || StringRef(Current, 1).find_first_of("#;/?:@&=+$,_.!~*'()[]")
1012 != StringRef::npos) {
1013 ++Current;
1014 ++Column;
1015 } else
1016 break;
1017 }
1018}
1019
1020bool Scanner::consume(uint32_t Expected) {
1021 if (Expected >= 0x80) {
1022 setError("Cannot consume non-ascii characters", Current);
1023 return false;
1024 }
1025 if (Current == End)
1026 return false;
1027 if (uint8_t(*Current) >= 0x80) {
1028 setError("Cannot consume non-ascii characters", Current);
1029 return false;
1030 }
1031 if (uint8_t(*Current) == Expected) {
1032 ++Current;
1033 ++Column;
1034 return true;
1035 }
1036 return false;
1037}
1038
1039void Scanner::skip(uint32_t Distance) {
1040 Current += Distance;
1041 Column += Distance;
1042 assert(Current <= End && "Skipped past the end");
1043}
1044
1045bool Scanner::isBlankOrBreak(StringRef::iterator Position) {
1046 if (Position == End)
1047 return false;
1048 return *Position == ' ' || *Position == '\t' || *Position == '\r' ||
1049 *Position == '\n';
1050}
1051
1052bool Scanner::isLineEmpty(StringRef Line) {
1053 for (const auto *Position = Line.begin(); Position != Line.end(); ++Position)
1054 if (!isBlankOrBreak(Position))
1055 return false;
1056 return true;
1057}
1058
1059bool Scanner::consumeLineBreakIfPresent() {
1060 auto Next = skip_b_break(Current);
1061 if (Next == Current)
1062 return false;
1063 Column = 0;
1064 ++Line;
1065 Current = Next;
1066 return true;
1067}
1068
1069void Scanner::saveSimpleKeyCandidate( TokenQueueT::iterator Tok
1070 , unsigned AtColumn
1071 , bool IsRequired) {
1072 if (IsSimpleKeyAllowed) {
1073 SimpleKey SK;
1074 SK.Tok = Tok;
1075 SK.Line = Line;
1076 SK.Column = AtColumn;
1077 SK.IsRequired = IsRequired;
1078 SK.FlowLevel = FlowLevel;
1079 SimpleKeys.push_back(SK);
1080 }
1081}
1082
1083void Scanner::removeStaleSimpleKeyCandidates() {
1084 for (SmallVectorImpl<SimpleKey>::iterator i = SimpleKeys.begin();
1085 i != SimpleKeys.end();) {
1086 if (i->Line != Line || i->Column + 1024 < Column) {
1087 if (i->IsRequired)
1088 setError( "Could not find expected : for simple key"
1089 , i->Tok->Range.begin());
1090 i = SimpleKeys.erase(i);
1091 } else
1092 ++i;
1093 }
1094}
1095
1096void Scanner::removeSimpleKeyCandidatesOnFlowLevel(unsigned Level) {
1097 if (!SimpleKeys.empty() && (SimpleKeys.end() - 1)->FlowLevel == Level)
1098 SimpleKeys.pop_back();
1099}
1100
1101bool Scanner::unrollIndent(int ToColumn) {
1102 Token T;
1103 // Indentation is ignored in flow.
1104 if (FlowLevel != 0)
1105 return true;
1106
1107 while (Indent > ToColumn) {
1108 T.Kind = Token::TK_BlockEnd;
1109 T.Range = StringRef(Current, 1);
1110 TokenQueue.push_back(T);
1111 Indent = Indents.pop_back_val();
1112 }
1113
1114 return true;
1115}
1116
1117bool Scanner::rollIndent( int ToColumn
1118 , Token::TokenKind Kind
1119 , TokenQueueT::iterator InsertPoint) {
1120 if (FlowLevel)
1121 return true;
1122 if (Indent < ToColumn) {
1123 Indents.push_back(Indent);
1124 Indent = ToColumn;
1125
1126 Token T;
1127 T.Kind = Kind;
1128 T.Range = StringRef(Current, 0);
1129 TokenQueue.insert(InsertPoint, T);
1130 }
1131 return true;
1132}
1133
1134void Scanner::skipComment() {
1135 if (Current == End || *Current != '#')
1136 return;
1137 while (true) {
1138 // This may skip more than one byte, thus Column is only incremented
1139 // for code points.
1140 StringRef::iterator I = skip_nb_char(Current);
1141 if (I == Current)
1142 break;
1143 Current = I;
1144 ++Column;
1145 }
1146}
1147
1148void Scanner::scanToNextToken() {
1149 while (true) {
1150 while (Current != End && (*Current == ' ' || *Current == '\t')) {
1151 skip(1);
1152 }
1153
1154 skipComment();
1155
1156 // Skip EOL.
1157 StringRef::iterator i = skip_b_break(Current);
1158 if (i == Current)
1159 break;
1160 Current = i;
1161 ++Line;
1162 Column = 0;
1163 // New lines may start a simple key.
1164 if (!FlowLevel)
1165 IsSimpleKeyAllowed = true;
1166 }
1167}
1168
1169bool Scanner::scanStreamStart() {
1170 IsStartOfStream = false;
1171
1172 EncodingInfo EI = getUnicodeEncoding(currentInput());
1173
1174 Token T;
1175 T.Kind = Token::TK_StreamStart;
1176 T.Range = StringRef(Current, EI.second);
1177 TokenQueue.push_back(T);
1178 Current += EI.second;
1179 return true;
1180}
1181
1182bool Scanner::scanStreamEnd() {
1183 // Force an ending new line if one isn't present.
1184 if (Column != 0) {
1185 Column = 0;
1186 ++Line;
1187 }
1188
1189 unrollIndent(-1);
1190 SimpleKeys.clear();
1191 IsSimpleKeyAllowed = false;
1192
1193 Token T;
1194 T.Kind = Token::TK_StreamEnd;
1195 T.Range = StringRef(Current, 0);
1196 TokenQueue.push_back(T);
1197 return true;
1198}
1199
1200bool Scanner::scanDirective() {
1201 // Reset the indentation level.
1202 unrollIndent(-1);
1203 SimpleKeys.clear();
1204 IsSimpleKeyAllowed = false;
1205
1206 StringRef::iterator Start = Current;
1207 consume('%');
1208 StringRef::iterator NameStart = Current;
1209 Current = skip_while(&Scanner::skip_ns_char, Current);
1210 StringRef Name(NameStart, Current - NameStart);
1211 Current = skip_while(&Scanner::skip_s_white, Current);
1212
1213 Token T;
1214 if (Name == "YAML") {
1215 Current = skip_while(&Scanner::skip_ns_char, Current);
1217 T.Range = StringRef(Start, Current - Start);
1218 TokenQueue.push_back(T);
1219 return true;
1220 } else if(Name == "TAG") {
1221 Current = skip_while(&Scanner::skip_ns_char, Current);
1222 Current = skip_while(&Scanner::skip_s_white, Current);
1223 Current = skip_while(&Scanner::skip_ns_char, Current);
1225 T.Range = StringRef(Start, Current - Start);
1226 TokenQueue.push_back(T);
1227 return true;
1228 }
1229 return false;
1230}
1231
1232bool Scanner::scanDocumentIndicator(bool IsStart) {
1233 unrollIndent(-1);
1234 SimpleKeys.clear();
1235 IsSimpleKeyAllowed = false;
1236
1237 Token T;
1239 T.Range = StringRef(Current, 3);
1240 skip(3);
1241 TokenQueue.push_back(T);
1242 return true;
1243}
1244
1245bool Scanner::scanFlowCollectionStart(bool IsSequence) {
1246 Token T;
1247 T.Kind = IsSequence ? Token::TK_FlowSequenceStart
1249 T.Range = StringRef(Current, 1);
1250 skip(1);
1251 TokenQueue.push_back(T);
1252
1253 // [ and { may begin a simple key.
1254 saveSimpleKeyCandidate(--TokenQueue.end(), Column - 1, false);
1255
1256 // And may also be followed by a simple key.
1257 IsSimpleKeyAllowed = true;
1258 ++FlowLevel;
1259 return true;
1260}
1261
1262bool Scanner::scanFlowCollectionEnd(bool IsSequence) {
1263 removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
1264 IsSimpleKeyAllowed = false;
1265 Token T;
1266 T.Kind = IsSequence ? Token::TK_FlowSequenceEnd
1268 T.Range = StringRef(Current, 1);
1269 skip(1);
1270 TokenQueue.push_back(T);
1271 if (FlowLevel)
1272 --FlowLevel;
1273 return true;
1274}
1275
1276bool Scanner::scanFlowEntry() {
1277 removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
1278 IsSimpleKeyAllowed = true;
1279 Token T;
1280 T.Kind = Token::TK_FlowEntry;
1281 T.Range = StringRef(Current, 1);
1282 skip(1);
1283 TokenQueue.push_back(T);
1284 return true;
1285}
1286
1287bool Scanner::scanBlockEntry() {
1288 rollIndent(Column, Token::TK_BlockSequenceStart, TokenQueue.end());
1289 removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
1290 IsSimpleKeyAllowed = true;
1291 Token T;
1292 T.Kind = Token::TK_BlockEntry;
1293 T.Range = StringRef(Current, 1);
1294 skip(1);
1295 TokenQueue.push_back(T);
1296 return true;
1297}
1298
1299bool Scanner::scanKey() {
1300 if (!FlowLevel)
1301 rollIndent(Column, Token::TK_BlockMappingStart, TokenQueue.end());
1302
1303 removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
1304 IsSimpleKeyAllowed = !FlowLevel;
1305
1306 Token T;
1307 T.Kind = Token::TK_Key;
1308 T.Range = StringRef(Current, 1);
1309 skip(1);
1310 TokenQueue.push_back(T);
1311 return true;
1312}
1313
1314bool Scanner::scanValue() {
1315 // If the previous token could have been a simple key, insert the key token
1316 // into the token queue.
1317 if (!SimpleKeys.empty()) {
1318 SimpleKey SK = SimpleKeys.pop_back_val();
1319 Token T;
1320 T.Kind = Token::TK_Key;
1321 T.Range = SK.Tok->Range;
1323 for (i = TokenQueue.begin(), e = TokenQueue.end(); i != e; ++i) {
1324 if (i == SK.Tok)
1325 break;
1326 }
1327 if (i == e) {
1328 Failed = true;
1329 return false;
1330 }
1331 i = TokenQueue.insert(i, T);
1332
1333 // We may also need to add a Block-Mapping-Start token.
1334 rollIndent(SK.Column, Token::TK_BlockMappingStart, i);
1335
1336 IsSimpleKeyAllowed = false;
1337 } else {
1338 if (!FlowLevel)
1339 rollIndent(Column, Token::TK_BlockMappingStart, TokenQueue.end());
1340 IsSimpleKeyAllowed = !FlowLevel;
1341 }
1342
1343 Token T;
1344 T.Kind = Token::TK_Value;
1345 T.Range = StringRef(Current, 1);
1346 skip(1);
1347 TokenQueue.push_back(T);
1348 return true;
1349}
1350
1351// Forbidding inlining improves performance by roughly 20%.
1352// FIXME: Remove once llvm optimizes this to the faster version without hints.
1353LLVM_ATTRIBUTE_NOINLINE static bool
1355
1356// Returns whether a character at 'Position' was escaped with a leading '\'.
1357// 'First' specifies the position of the first character in the string.
1359 StringRef::iterator Position) {
1360 assert(Position - 1 >= First);
1361 StringRef::iterator I = Position - 1;
1362 // We calculate the number of consecutive '\'s before the current position
1363 // by iterating backwards through our string.
1364 while (I >= First && *I == '\\') --I;
1365 // (Position - 1 - I) now contains the number of '\'s before the current
1366 // position. If it is odd, the character at 'Position' was escaped.
1367 return (Position - 1 - I) % 2 == 1;
1368}
1369
1370bool Scanner::scanFlowScalar(bool IsDoubleQuoted) {
1371 StringRef::iterator Start = Current;
1372 unsigned ColStart = Column;
1373 if (IsDoubleQuoted) {
1374 do {
1375 ++Current;
1376 while (Current != End && *Current != '"')
1377 ++Current;
1378 // Repeat until the previous character was not a '\' or was an escaped
1379 // backslash.
1380 } while ( Current != End
1381 && *(Current - 1) == '\\'
1382 && wasEscaped(Start + 1, Current));
1383 } else {
1384 skip(1);
1385 while (Current != End) {
1386 // Skip a ' followed by another '.
1387 if (Current + 1 < End && *Current == '\'' && *(Current + 1) == '\'') {
1388 skip(2);
1389 continue;
1390 } else if (*Current == '\'')
1391 break;
1392 StringRef::iterator i = skip_nb_char(Current);
1393 if (i == Current) {
1394 i = skip_b_break(Current);
1395 if (i == Current)
1396 break;
1397 Current = i;
1398 Column = 0;
1399 ++Line;
1400 } else {
1401 if (i == End)
1402 break;
1403 Current = i;
1404 ++Column;
1405 }
1406 }
1407 }
1408
1409 if (Current == End) {
1410 setError("Expected quote at end of scalar", Current);
1411 return false;
1412 }
1413
1414 skip(1); // Skip ending quote.
1415 Token T;
1416 T.Kind = Token::TK_Scalar;
1417 T.Range = StringRef(Start, Current - Start);
1418 TokenQueue.push_back(T);
1419
1420 saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false);
1421
1422 IsSimpleKeyAllowed = false;
1423
1424 return true;
1425}
1426
1427bool Scanner::scanPlainScalar() {
1428 StringRef::iterator Start = Current;
1429 unsigned ColStart = Column;
1430 unsigned LeadingBlanks = 0;
1431 assert(Indent >= -1 && "Indent must be >= -1 !");
1432 unsigned indent = static_cast<unsigned>(Indent + 1);
1433 while (Current != End) {
1434 if (*Current == '#')
1435 break;
1436
1437 while (Current != End && !isBlankOrBreak(Current)) {
1438 if (FlowLevel && *Current == ':' &&
1439 (Current + 1 == End ||
1440 !(isBlankOrBreak(Current + 1) || *(Current + 1) == ','))) {
1441 setError("Found unexpected ':' while scanning a plain scalar", Current);
1442 return false;
1443 }
1444
1445 // Check for the end of the plain scalar.
1446 if ( (*Current == ':' && isBlankOrBreak(Current + 1))
1447 || ( FlowLevel
1448 && (StringRef(Current, 1).find_first_of(",:?[]{}")
1449 != StringRef::npos)))
1450 break;
1451
1452 StringRef::iterator i = skip_nb_char(Current);
1453 if (i == Current)
1454 break;
1455 Current = i;
1456 ++Column;
1457 }
1458
1459 // Are we at the end?
1460 if (!isBlankOrBreak(Current))
1461 break;
1462
1463 // Eat blanks.
1464 StringRef::iterator Tmp = Current;
1465 while (isBlankOrBreak(Tmp)) {
1466 StringRef::iterator i = skip_s_white(Tmp);
1467 if (i != Tmp) {
1468 if (LeadingBlanks && (Column < indent) && *Tmp == '\t') {
1469 setError("Found invalid tab character in indentation", Tmp);
1470 return false;
1471 }
1472 Tmp = i;
1473 ++Column;
1474 } else {
1475 i = skip_b_break(Tmp);
1476 if (!LeadingBlanks)
1477 LeadingBlanks = 1;
1478 Tmp = i;
1479 Column = 0;
1480 ++Line;
1481 }
1482 }
1483
1484 if (!FlowLevel && Column < indent)
1485 break;
1486
1487 Current = Tmp;
1488 }
1489 if (Start == Current) {
1490 setError("Got empty plain scalar", Start);
1491 return false;
1492 }
1493 Token T;
1494 T.Kind = Token::TK_Scalar;
1495 T.Range = StringRef(Start, Current - Start);
1496 TokenQueue.push_back(T);
1497
1498 // Plain scalars can be simple keys.
1499 saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false);
1500
1501 IsSimpleKeyAllowed = false;
1502
1503 return true;
1504}
1505
1506bool Scanner::scanAliasOrAnchor(bool IsAlias) {
1507 StringRef::iterator Start = Current;
1508 unsigned ColStart = Column;
1509 skip(1);
1510 while (Current != End) {
1511 if ( *Current == '[' || *Current == ']'
1512 || *Current == '{' || *Current == '}'
1513 || *Current == ','
1514 || *Current == ':')
1515 break;
1516 StringRef::iterator i = skip_ns_char(Current);
1517 if (i == Current)
1518 break;
1519 Current = i;
1520 ++Column;
1521 }
1522
1523 if (Start + 1 == Current) {
1524 setError("Got empty alias or anchor", Start);
1525 return false;
1526 }
1527
1528 Token T;
1530 T.Range = StringRef(Start, Current - Start);
1531 TokenQueue.push_back(T);
1532
1533 // Alias and anchors can be simple keys.
1534 saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false);
1535
1536 IsSimpleKeyAllowed = false;
1537
1538 return true;
1539}
1540
1541bool Scanner::scanBlockScalarIndicators(char &StyleIndicator,
1542 char &ChompingIndicator,
1543 unsigned &IndentIndicator,
1544 bool &IsDone) {
1545 StyleIndicator = scanBlockStyleIndicator();
1546 if (!scanBlockScalarHeader(ChompingIndicator, IndentIndicator, IsDone))
1547 return false;
1548 return true;
1549}
1550
1551char Scanner::scanBlockStyleIndicator() {
1552 char Indicator = ' ';
1553 if (Current != End && (*Current == '>' || *Current == '|')) {
1554 Indicator = *Current;
1555 skip(1);
1556 }
1557 return Indicator;
1558}
1559
1560char Scanner::scanBlockChompingIndicator() {
1561 char Indicator = ' ';
1562 if (Current != End && (*Current == '+' || *Current == '-')) {
1563 Indicator = *Current;
1564 skip(1);
1565 }
1566 return Indicator;
1567}
1568
1569/// Get the number of line breaks after chomping.
1570///
1571/// Return the number of trailing line breaks to emit, depending on
1572/// \p ChompingIndicator.
1573static unsigned getChompedLineBreaks(char ChompingIndicator,
1574 unsigned LineBreaks, StringRef Str) {
1575 if (ChompingIndicator == '-') // Strip all line breaks.
1576 return 0;
1577 if (ChompingIndicator == '+') // Keep all line breaks.
1578 return LineBreaks;
1579 // Clip trailing lines.
1580 return Str.empty() ? 0 : 1;
1581}
1582
1583unsigned Scanner::scanBlockIndentationIndicator() {
1584 unsigned Indent = 0;
1585 if (Current != End && (*Current >= '1' && *Current <= '9')) {
1586 Indent = unsigned(*Current - '0');
1587 skip(1);
1588 }
1589 return Indent;
1590}
1591
1592bool Scanner::scanBlockScalarHeader(char &ChompingIndicator,
1593 unsigned &IndentIndicator, bool &IsDone) {
1594 auto Start = Current;
1595
1596 ChompingIndicator = scanBlockChompingIndicator();
1597 IndentIndicator = scanBlockIndentationIndicator();
1598 // Check for the chomping indicator once again.
1599 if (ChompingIndicator == ' ')
1600 ChompingIndicator = scanBlockChompingIndicator();
1601 Current = skip_while(&Scanner::skip_s_white, Current);
1602 skipComment();
1603
1604 if (Current == End) { // EOF, we have an empty scalar.
1605 Token T;
1606 T.Kind = Token::TK_BlockScalar;
1607 T.Range = StringRef(Start, Current - Start);
1608 TokenQueue.push_back(T);
1609 IsDone = true;
1610 return true;
1611 }
1612
1613 if (!consumeLineBreakIfPresent()) {
1614 setError("Expected a line break after block scalar header", Current);
1615 return false;
1616 }
1617 return true;
1618}
1619
1620bool Scanner::findBlockScalarIndent(unsigned &BlockIndent,
1621 unsigned BlockExitIndent,
1622 unsigned &LineBreaks, bool &IsDone) {
1623 unsigned MaxAllSpaceLineCharacters = 0;
1624 StringRef::iterator LongestAllSpaceLine;
1625
1626 while (true) {
1627 advanceWhile(&Scanner::skip_s_space);
1628 if (skip_nb_char(Current) != Current) {
1629 // This line isn't empty, so try and find the indentation.
1630 if (Column <= BlockExitIndent) { // End of the block literal.
1631 IsDone = true;
1632 return true;
1633 }
1634 // We found the block's indentation.
1635 BlockIndent = Column;
1636 if (MaxAllSpaceLineCharacters > BlockIndent) {
1637 setError(
1638 "Leading all-spaces line must be smaller than the block indent",
1639 LongestAllSpaceLine);
1640 return false;
1641 }
1642 return true;
1643 }
1644 if (skip_b_break(Current) != Current &&
1645 Column > MaxAllSpaceLineCharacters) {
1646 // Record the longest all-space line in case it's longer than the
1647 // discovered block indent.
1648 MaxAllSpaceLineCharacters = Column;
1649 LongestAllSpaceLine = Current;
1650 }
1651
1652 // Check for EOF.
1653 if (Current == End) {
1654 IsDone = true;
1655 return true;
1656 }
1657
1658 if (!consumeLineBreakIfPresent()) {
1659 IsDone = true;
1660 return true;
1661 }
1662 ++LineBreaks;
1663 }
1664 return true;
1665}
1666
1667bool Scanner::scanBlockScalarIndent(unsigned BlockIndent,
1668 unsigned BlockExitIndent, bool &IsDone) {
1669 // Skip the indentation.
1670 while (Column < BlockIndent) {
1671 auto I = skip_s_space(Current);
1672 if (I == Current)
1673 break;
1674 Current = I;
1675 ++Column;
1676 }
1677
1678 if (skip_nb_char(Current) == Current)
1679 return true;
1680
1681 if (Column <= BlockExitIndent) { // End of the block literal.
1682 IsDone = true;
1683 return true;
1684 }
1685
1686 if (Column < BlockIndent) {
1687 if (Current != End && *Current == '#') { // Trailing comment.
1688 IsDone = true;
1689 return true;
1690 }
1691 setError("A text line is less indented than the block scalar", Current);
1692 return false;
1693 }
1694 return true; // A normal text line.
1695}
1696
1697bool Scanner::scanBlockScalar(bool IsLiteral) {
1698 assert(*Current == '|' || *Current == '>');
1699 char StyleIndicator;
1700 char ChompingIndicator;
1701 unsigned BlockIndent;
1702 bool IsDone = false;
1703 if (!scanBlockScalarIndicators(StyleIndicator, ChompingIndicator, BlockIndent,
1704 IsDone))
1705 return false;
1706 if (IsDone)
1707 return true;
1708 bool IsFolded = StyleIndicator == '>';
1709
1710 const auto *Start = Current;
1711 unsigned BlockExitIndent = Indent < 0 ? 0 : (unsigned)Indent;
1712 unsigned LineBreaks = 0;
1713 if (BlockIndent == 0) {
1714 if (!findBlockScalarIndent(BlockIndent, BlockExitIndent, LineBreaks,
1715 IsDone))
1716 return false;
1717 }
1718
1719 // Scan the block's scalars body.
1720 SmallString<256> Str;
1721 while (!IsDone) {
1722 if (!scanBlockScalarIndent(BlockIndent, BlockExitIndent, IsDone))
1723 return false;
1724 if (IsDone)
1725 break;
1726
1727 // Parse the current line.
1728 auto LineStart = Current;
1729 advanceWhile(&Scanner::skip_nb_char);
1730 if (LineStart != Current) {
1731 if (LineBreaks && IsFolded && !Scanner::isLineEmpty(Str)) {
1732 // The folded style "folds" any single line break between content into a
1733 // single space, except when that content is "empty" (only contains
1734 // whitespace) in which case the line break is left as-is.
1735 if (LineBreaks == 1) {
1736 Str.append(LineBreaks,
1737 isLineEmpty(StringRef(LineStart, Current - LineStart))
1738 ? '\n'
1739 : ' ');
1740 }
1741 // If we saw a single line break, we are completely replacing it and so
1742 // want `LineBreaks == 0`. Otherwise this decrement accounts for the
1743 // fact that the first line break is "trimmed", only being used to
1744 // signal a sequence of line breaks which should not be folded.
1745 LineBreaks--;
1746 }
1747 Str.append(LineBreaks, '\n');
1748 Str.append(StringRef(LineStart, Current - LineStart));
1749 LineBreaks = 0;
1750 }
1751
1752 // Check for EOF.
1753 if (Current == End)
1754 break;
1755
1756 if (!consumeLineBreakIfPresent())
1757 break;
1758 ++LineBreaks;
1759 }
1760
1761 if (Current == End && !LineBreaks)
1762 // Ensure that there is at least one line break before the end of file.
1763 LineBreaks = 1;
1764 Str.append(getChompedLineBreaks(ChompingIndicator, LineBreaks, Str), '\n');
1765
1766 // New lines may start a simple key.
1767 if (!FlowLevel)
1768 IsSimpleKeyAllowed = true;
1769
1770 Token T;
1771 T.Kind = Token::TK_BlockScalar;
1772 T.Range = StringRef(Start, Current - Start);
1773 T.Value = std::string(Str);
1774 TokenQueue.push_back(T);
1775 return true;
1776}
1777
1778bool Scanner::scanTag() {
1779 StringRef::iterator Start = Current;
1780 unsigned ColStart = Column;
1781 skip(1); // Eat !.
1782 if (Current == End || isBlankOrBreak(Current)); // An empty tag.
1783 else if (*Current == '<') {
1784 skip(1);
1785 scan_ns_uri_char();
1786 if (!consume('>'))
1787 return false;
1788 } else {
1789 // FIXME: Actually parse the c-ns-shorthand-tag rule.
1790 Current = skip_while(&Scanner::skip_ns_char, Current);
1791 }
1792
1793 Token T;
1794 T.Kind = Token::TK_Tag;
1795 T.Range = StringRef(Start, Current - Start);
1796 TokenQueue.push_back(T);
1797
1798 // Tags can be simple keys.
1799 saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false);
1800
1801 IsSimpleKeyAllowed = false;
1802
1803 return true;
1804}
1805
1806bool Scanner::fetchMoreTokens() {
1807 if (IsStartOfStream)
1808 return scanStreamStart();
1809
1810 scanToNextToken();
1811
1812 if (Current == End)
1813 return scanStreamEnd();
1814
1815 removeStaleSimpleKeyCandidates();
1816
1817 unrollIndent(Column);
1818
1819 if (Column == 0 && *Current == '%')
1820 return scanDirective();
1821
1822 if (Column == 0 && Current + 4 <= End
1823 && *Current == '-'
1824 && *(Current + 1) == '-'
1825 && *(Current + 2) == '-'
1826 && (Current + 3 == End || isBlankOrBreak(Current + 3)))
1827 return scanDocumentIndicator(true);
1828
1829 if (Column == 0 && Current + 4 <= End
1830 && *Current == '.'
1831 && *(Current + 1) == '.'
1832 && *(Current + 2) == '.'
1833 && (Current + 3 == End || isBlankOrBreak(Current + 3)))
1834 return scanDocumentIndicator(false);
1835
1836 if (*Current == '[')
1837 return scanFlowCollectionStart(true);
1838
1839 if (*Current == '{')
1840 return scanFlowCollectionStart(false);
1841
1842 if (*Current == ']')
1843 return scanFlowCollectionEnd(true);
1844
1845 if (*Current == '}')
1846 return scanFlowCollectionEnd(false);
1847
1848 if (*Current == ',')
1849 return scanFlowEntry();
1850
1851 if (*Current == '-' && isBlankOrBreak(Current + 1))
1852 return scanBlockEntry();
1853
1854 if (*Current == '?' && (FlowLevel || isBlankOrBreak(Current + 1)))
1855 return scanKey();
1856
1857 if (*Current == ':' && (FlowLevel || isBlankOrBreak(Current + 1)))
1858 return scanValue();
1859
1860 if (*Current == '*')
1861 return scanAliasOrAnchor(true);
1862
1863 if (*Current == '&')
1864 return scanAliasOrAnchor(false);
1865
1866 if (*Current == '!')
1867 return scanTag();
1868
1869 if (*Current == '|' && !FlowLevel)
1870 return scanBlockScalar(true);
1871
1872 if (*Current == '>' && !FlowLevel)
1873 return scanBlockScalar(false);
1874
1875 if (*Current == '\'')
1876 return scanFlowScalar(false);
1877
1878 if (*Current == '"')
1879 return scanFlowScalar(true);
1880
1881 // Get a plain scalar.
1882 StringRef FirstChar(Current, 1);
1883 if (!(isBlankOrBreak(Current)
1884 || FirstChar.find_first_of("-?:,[]{}#&*!|>'\"%@`") != StringRef::npos)
1885 || (*Current == '-' && !isBlankOrBreak(Current + 1))
1886 || (!FlowLevel && (*Current == '?' || *Current == ':')
1887 && isBlankOrBreak(Current + 1))
1888 || (!FlowLevel && *Current == ':'
1889 && Current + 2 < End
1890 && *(Current + 1) == ':'
1891 && !isBlankOrBreak(Current + 2)))
1892 return scanPlainScalar();
1893
1894 setError("Unrecognized character while tokenizing.", Current);
1895 return false;
1896}
1897
1898Stream::Stream(StringRef Input, SourceMgr &SM, bool ShowColors,
1899 std::error_code *EC)
1900 : scanner(new Scanner(Input, SM, ShowColors, EC)) {}
1901
1902Stream::Stream(MemoryBufferRef InputBuffer, SourceMgr &SM, bool ShowColors,
1903 std::error_code *EC)
1904 : scanner(new Scanner(InputBuffer, SM, ShowColors, EC)) {}
1905
1906Stream::~Stream() = default;
1907
1908bool Stream::failed() { return scanner->failed(); }
1909
1911 printError(N ? N->getSourceRange() : SMRange(), Msg, Kind);
1912}
1913
1914void Stream::printError(const SMRange &Range, const Twine &Msg,
1915 SourceMgr::DiagKind Kind) {
1916 scanner->printError(Range.Start, Kind, Msg, Range);
1917}
1918
1920 if (CurrentDoc)
1921 report_fatal_error("Can only iterate over the stream once");
1922
1923 // Skip Stream-Start.
1924 scanner->getNext();
1925
1926 CurrentDoc.reset(new Document(*this));
1927 return document_iterator(CurrentDoc);
1928}
1929
1931 return document_iterator();
1932}
1933
1935 for (Document &Doc : *this)
1936 Doc.skip();
1937}
1938
1939Node::Node(unsigned int Type, std::unique_ptr<Document> &D, StringRef A,
1940 StringRef T)
1941 : Doc(D), TypeID(Type), Anchor(A), Tag(T) {
1942 SMLoc Start = SMLoc::getFromPointer(peekNext().Range.begin());
1943 SourceRange = SMRange(Start, Start);
1944}
1945
1946std::string Node::getVerbatimTag() const {
1947 StringRef Raw = getRawTag();
1948 if (!Raw.empty() && Raw != "!") {
1949 std::string Ret;
1950 if (Raw.find_last_of('!') == 0) {
1951 Ret = std::string(Doc->getTagMap().find("!")->second);
1952 Ret += Raw.substr(1);
1953 return Ret;
1954 } else if (Raw.startswith("!!")) {
1955 Ret = std::string(Doc->getTagMap().find("!!")->second);
1956 Ret += Raw.substr(2);
1957 return Ret;
1958 } else {
1959 StringRef TagHandle = Raw.substr(0, Raw.find_last_of('!') + 1);
1960 std::map<StringRef, StringRef>::const_iterator It =
1961 Doc->getTagMap().find(TagHandle);
1962 if (It != Doc->getTagMap().end())
1963 Ret = std::string(It->second);
1964 else {
1965 Token T;
1966 T.Kind = Token::TK_Tag;
1967 T.Range = TagHandle;
1968 setError(Twine("Unknown tag handle ") + TagHandle, T);
1969 }
1970 Ret += Raw.substr(Raw.find_last_of('!') + 1);
1971 return Ret;
1972 }
1973 }
1974
1975 switch (getType()) {
1976 case NK_Null:
1977 return "tag:yaml.org,2002:null";
1978 case NK_Scalar:
1979 case NK_BlockScalar:
1980 // TODO: Tag resolution.
1981 return "tag:yaml.org,2002:str";
1982 case NK_Mapping:
1983 return "tag:yaml.org,2002:map";
1984 case NK_Sequence:
1985 return "tag:yaml.org,2002:seq";
1986 }
1987
1988 return "";
1989}
1990
1992 return Doc->peekNext();
1993}
1994
1996 return Doc->getNext();
1997}
1998
2000 return Doc->parseBlockNode();
2001}
2002
2004 return Doc->NodeAllocator;
2005}
2006
2007void Node::setError(const Twine &Msg, Token &Tok) const {
2008 Doc->setError(Msg, Tok);
2009}
2010
2011bool Node::failed() const {
2012 return Doc->failed();
2013}
2014
2016 // TODO: Handle newlines properly. We need to remove leading whitespace.
2017 if (Value[0] == '"') { // Double quoted.
2018 // Pull off the leading and trailing "s.
2019 StringRef UnquotedValue = Value.substr(1, Value.size() - 2);
2020 // Search for characters that would require unescaping the value.
2021 StringRef::size_type i = UnquotedValue.find_first_of("\\\r\n");
2022 if (i != StringRef::npos)
2023 return unescapeDoubleQuoted(UnquotedValue, i, Storage);
2024 return UnquotedValue;
2025 } else if (Value[0] == '\'') { // Single quoted.
2026 // Pull off the leading and trailing 's.
2027 StringRef UnquotedValue = Value.substr(1, Value.size() - 2);
2028 StringRef::size_type i = UnquotedValue.find('\'');
2029 if (i != StringRef::npos) {
2030 // We're going to need Storage.
2031 Storage.clear();
2032 Storage.reserve(UnquotedValue.size());
2033 for (; i != StringRef::npos; i = UnquotedValue.find('\'')) {
2034 StringRef Valid(UnquotedValue.begin(), i);
2035 llvm::append_range(Storage, Valid);
2036 Storage.push_back('\'');
2037 UnquotedValue = UnquotedValue.substr(i + 2);
2038 }
2039 llvm::append_range(Storage, UnquotedValue);
2040 return StringRef(Storage.begin(), Storage.size());
2041 }
2042 return UnquotedValue;
2043 }
2044 // Plain or block.
2045 return Value.rtrim(' ');
2046}
2047
2048StringRef ScalarNode::unescapeDoubleQuoted( StringRef UnquotedValue
2050 , SmallVectorImpl<char> &Storage)
2051 const {
2052 // Use Storage to build proper value.
2053 Storage.clear();
2054 Storage.reserve(UnquotedValue.size());
2055 for (; i != StringRef::npos; i = UnquotedValue.find_first_of("\\\r\n")) {
2056 // Insert all previous chars into Storage.
2057 StringRef Valid(UnquotedValue.begin(), i);
2058 llvm::append_range(Storage, Valid);
2059 // Chop off inserted chars.
2060 UnquotedValue = UnquotedValue.substr(i);
2061
2062 assert(!UnquotedValue.empty() && "Can't be empty!");
2063
2064 // Parse escape or line break.
2065 switch (UnquotedValue[0]) {
2066 case '\r':
2067 case '\n':
2068 Storage.push_back('\n');
2069 if ( UnquotedValue.size() > 1
2070 && (UnquotedValue[1] == '\r' || UnquotedValue[1] == '\n'))
2071 UnquotedValue = UnquotedValue.substr(1);
2072 UnquotedValue = UnquotedValue.substr(1);
2073 break;
2074 default:
2075 if (UnquotedValue.size() == 1) {
2076 Token T;
2077 T.Range = StringRef(UnquotedValue.begin(), 1);
2078 setError("Unrecognized escape code", T);
2079 return "";
2080 }
2081 UnquotedValue = UnquotedValue.substr(1);
2082 switch (UnquotedValue[0]) {
2083 default: {
2084 Token T;
2085 T.Range = StringRef(UnquotedValue.begin(), 1);
2086 setError("Unrecognized escape code", T);
2087 return "";
2088 }
2089 case '\r':
2090 case '\n':
2091 // Remove the new line.
2092 if ( UnquotedValue.size() > 1
2093 && (UnquotedValue[1] == '\r' || UnquotedValue[1] == '\n'))
2094 UnquotedValue = UnquotedValue.substr(1);
2095 // If this was just a single byte newline, it will get skipped
2096 // below.
2097 break;
2098 case '0':
2099 Storage.push_back(0x00);
2100 break;
2101 case 'a':
2102 Storage.push_back(0x07);
2103 break;
2104 case 'b':
2105 Storage.push_back(0x08);
2106 break;
2107 case 't':
2108 case 0x09:
2109 Storage.push_back(0x09);
2110 break;
2111 case 'n':
2112 Storage.push_back(0x0A);
2113 break;
2114 case 'v':
2115 Storage.push_back(0x0B);
2116 break;
2117 case 'f':
2118 Storage.push_back(0x0C);
2119 break;
2120 case 'r':
2121 Storage.push_back(0x0D);
2122 break;
2123 case 'e':
2124 Storage.push_back(0x1B);
2125 break;
2126 case ' ':
2127 Storage.push_back(0x20);
2128 break;
2129 case '"':
2130 Storage.push_back(0x22);
2131 break;
2132 case '/':
2133 Storage.push_back(0x2F);
2134 break;
2135 case '\\':
2136 Storage.push_back(0x5C);
2137 break;
2138 case 'N':
2139 encodeUTF8(0x85, Storage);
2140 break;
2141 case '_':
2142 encodeUTF8(0xA0, Storage);
2143 break;
2144 case 'L':
2145 encodeUTF8(0x2028, Storage);
2146 break;
2147 case 'P':
2148 encodeUTF8(0x2029, Storage);
2149 break;
2150 case 'x': {
2151 if (UnquotedValue.size() < 3)
2152 // TODO: Report error.
2153 break;
2154 unsigned int UnicodeScalarValue;
2155 if (UnquotedValue.substr(1, 2).getAsInteger(16, UnicodeScalarValue))
2156 // TODO: Report error.
2157 UnicodeScalarValue = 0xFFFD;
2158 encodeUTF8(UnicodeScalarValue, Storage);
2159 UnquotedValue = UnquotedValue.substr(2);
2160 break;
2161 }
2162 case 'u': {
2163 if (UnquotedValue.size() < 5)
2164 // TODO: Report error.
2165 break;
2166 unsigned int UnicodeScalarValue;
2167 if (UnquotedValue.substr(1, 4).getAsInteger(16, UnicodeScalarValue))
2168 // TODO: Report error.
2169 UnicodeScalarValue = 0xFFFD;
2170 encodeUTF8(UnicodeScalarValue, Storage);
2171 UnquotedValue = UnquotedValue.substr(4);
2172 break;
2173 }
2174 case 'U': {
2175 if (UnquotedValue.size() < 9)
2176 // TODO: Report error.
2177 break;
2178 unsigned int UnicodeScalarValue;
2179 if (UnquotedValue.substr(1, 8).getAsInteger(16, UnicodeScalarValue))
2180 // TODO: Report error.
2181 UnicodeScalarValue = 0xFFFD;
2182 encodeUTF8(UnicodeScalarValue, Storage);
2183 UnquotedValue = UnquotedValue.substr(8);
2184 break;
2185 }
2186 }
2187 UnquotedValue = UnquotedValue.substr(1);
2188 }
2189 }
2190 llvm::append_range(Storage, UnquotedValue);
2191 return StringRef(Storage.begin(), Storage.size());
2192}
2193
2195 if (Key)
2196 return Key;
2197 // Handle implicit null keys.
2198 {
2199 Token &t = peekNext();
2200 if ( t.Kind == Token::TK_BlockEnd
2201 || t.Kind == Token::TK_Value
2202 || t.Kind == Token::TK_Error) {
2203 return Key = new (getAllocator()) NullNode(Doc);
2204 }
2205 if (t.Kind == Token::TK_Key)
2206 getNext(); // skip TK_Key.
2207 }
2208
2209 // Handle explicit null keys.
2210 Token &t = peekNext();
2211 if (t.Kind == Token::TK_BlockEnd || t.Kind == Token::TK_Value) {
2212 return Key = new (getAllocator()) NullNode(Doc);
2213 }
2214
2215 // We've got a normal key.
2216 return Key = parseBlockNode();
2217}
2218
2220 if (Value)
2221 return Value;
2222
2223 if (Node* Key = getKey())
2224 Key->skip();
2225 else {
2226 setError("Null key in Key Value.", peekNext());
2227 return Value = new (getAllocator()) NullNode(Doc);
2228 }
2229
2230 if (failed())
2231 return Value = new (getAllocator()) NullNode(Doc);
2232
2233 // Handle implicit null values.
2234 {
2235 Token &t = peekNext();
2236 if ( t.Kind == Token::TK_BlockEnd
2238 || t.Kind == Token::TK_Key
2240 || t.Kind == Token::TK_Error) {
2241 return Value = new (getAllocator()) NullNode(Doc);
2242 }
2243
2244 if (t.Kind != Token::TK_Value) {
2245 setError("Unexpected token in Key Value.", t);
2246 return Value = new (getAllocator()) NullNode(Doc);
2247 }
2248 getNext(); // skip TK_Value.
2249 }
2250
2251 // Handle explicit null values.
2252 Token &t = peekNext();
2253 if (t.Kind == Token::TK_BlockEnd || t.Kind == Token::TK_Key) {
2254 return Value = new (getAllocator()) NullNode(Doc);
2255 }
2256
2257 // We got a normal value.
2258 return Value = parseBlockNode();
2259}
2260
2261void MappingNode::increment() {
2262 if (failed()) {
2263 IsAtEnd = true;
2264 CurrentEntry = nullptr;
2265 return;
2266 }
2267 if (CurrentEntry) {
2268 CurrentEntry->skip();
2269 if (Type == MT_Inline) {
2270 IsAtEnd = true;
2271 CurrentEntry = nullptr;
2272 return;
2273 }
2274 }
2275 Token T = peekNext();
2276 if (T.Kind == Token::TK_Key || T.Kind == Token::TK_Scalar) {
2277 // KeyValueNode eats the TK_Key. That way it can detect null keys.
2278 CurrentEntry = new (getAllocator()) KeyValueNode(Doc);
2279 } else if (Type == MT_Block) {
2280 switch (T.Kind) {
2281 case Token::TK_BlockEnd:
2282 getNext();
2283 IsAtEnd = true;
2284 CurrentEntry = nullptr;
2285 break;
2286 default:
2287 setError("Unexpected token. Expected Key or Block End", T);
2288 [[fallthrough]];
2289 case Token::TK_Error:
2290 IsAtEnd = true;
2291 CurrentEntry = nullptr;
2292 }
2293 } else {
2294 switch (T.Kind) {
2296 // Eat the flow entry and recurse.
2297 getNext();
2298 return increment();
2300 getNext();
2301 [[fallthrough]];
2302 case Token::TK_Error:
2303 // Set this to end iterator.
2304 IsAtEnd = true;
2305 CurrentEntry = nullptr;
2306 break;
2307 default:
2308 setError( "Unexpected token. Expected Key, Flow Entry, or Flow "
2309 "Mapping End."
2310 , T);
2311 IsAtEnd = true;
2312 CurrentEntry = nullptr;
2313 }
2314 }
2315}
2316
2318 if (failed()) {
2319 IsAtEnd = true;
2320 CurrentEntry = nullptr;
2321 return;
2322 }
2323 if (CurrentEntry)
2324 CurrentEntry->skip();
2325 Token T = peekNext();
2326 if (SeqType == ST_Block) {
2327 switch (T.Kind) {
2329 getNext();
2330 CurrentEntry = parseBlockNode();
2331 if (!CurrentEntry) { // An error occurred.
2332 IsAtEnd = true;
2333 CurrentEntry = nullptr;
2334 }
2335 break;
2336 case Token::TK_BlockEnd:
2337 getNext();
2338 IsAtEnd = true;
2339 CurrentEntry = nullptr;
2340 break;
2341 default:
2342 setError( "Unexpected token. Expected Block Entry or Block End."
2343 , T);
2344 [[fallthrough]];
2345 case Token::TK_Error:
2346 IsAtEnd = true;
2347 CurrentEntry = nullptr;
2348 }
2349 } else if (SeqType == ST_Indentless) {
2350 switch (T.Kind) {
2352 getNext();
2353 CurrentEntry = parseBlockNode();
2354 if (!CurrentEntry) { // An error occurred.
2355 IsAtEnd = true;
2356 CurrentEntry = nullptr;
2357 }
2358 break;
2359 default:
2360 case Token::TK_Error:
2361 IsAtEnd = true;
2362 CurrentEntry = nullptr;
2363 }
2364 } else if (SeqType == ST_Flow) {
2365 switch (T.Kind) {
2367 // Eat the flow entry and recurse.
2368 getNext();
2369 WasPreviousTokenFlowEntry = true;
2370 return increment();
2372 getNext();
2373 [[fallthrough]];
2374 case Token::TK_Error:
2375 // Set this to end iterator.
2376 IsAtEnd = true;
2377 CurrentEntry = nullptr;
2378 break;
2382 setError("Could not find closing ]!", T);
2383 // Set this to end iterator.
2384 IsAtEnd = true;
2385 CurrentEntry = nullptr;
2386 break;
2387 default:
2388 if (!WasPreviousTokenFlowEntry) {
2389 setError("Expected , between entries!", T);
2390 IsAtEnd = true;
2391 CurrentEntry = nullptr;
2392 break;
2393 }
2394 // Otherwise it must be a flow entry.
2395 CurrentEntry = parseBlockNode();
2396 if (!CurrentEntry) {
2397 IsAtEnd = true;
2398 }
2399 WasPreviousTokenFlowEntry = false;
2400 break;
2401 }
2402 }
2403}
2404
2405Document::Document(Stream &S) : stream(S), Root(nullptr) {
2406 // Tag maps starts with two default mappings.
2407 TagMap["!"] = "!";
2408 TagMap["!!"] = "tag:yaml.org,2002:";
2409
2410 if (parseDirectives())
2411 expectToken(Token::TK_DocumentStart);
2412 Token &T = peekNext();
2413 if (T.Kind == Token::TK_DocumentStart)
2414 getNext();
2415}
2416
2418 if (stream.scanner->failed())
2419 return false;
2420 if (!Root && !getRoot())
2421 return false;
2422 Root->skip();
2423 Token &T = peekNext();
2424 if (T.Kind == Token::TK_StreamEnd)
2425 return false;
2426 if (T.Kind == Token::TK_DocumentEnd) {
2427 getNext();
2428 return skip();
2429 }
2430 return true;
2431}
2432
2433Token &Document::peekNext() {
2434 return stream.scanner->peekNext();
2435}
2436
2437Token Document::getNext() {
2438 return stream.scanner->getNext();
2439}
2440
2441void Document::setError(const Twine &Message, Token &Location) const {
2442 stream.scanner->setError(Message, Location.Range.begin());
2443}
2444
2445bool Document::failed() const {
2446 return stream.scanner->failed();
2447}
2448
2450 Token T = peekNext();
2451 // Handle properties.
2452 Token AnchorInfo;
2453 Token TagInfo;
2454parse_property:
2455 switch (T.Kind) {
2456 case Token::TK_Alias:
2457 getNext();
2458 return new (NodeAllocator) AliasNode(stream.CurrentDoc, T.Range.substr(1));
2459 case Token::TK_Anchor:
2460 if (AnchorInfo.Kind == Token::TK_Anchor) {
2461 setError("Already encountered an anchor for this node!", T);
2462 return nullptr;
2463 }
2464 AnchorInfo = getNext(); // Consume TK_Anchor.
2465 T = peekNext();
2466 goto parse_property;
2467 case Token::TK_Tag:
2468 if (TagInfo.Kind == Token::TK_Tag) {
2469 setError("Already encountered a tag for this node!", T);
2470 return nullptr;
2471 }
2472 TagInfo = getNext(); // Consume TK_Tag.
2473 T = peekNext();
2474 goto parse_property;
2475 default:
2476 break;
2477 }
2478
2479 switch (T.Kind) {
2481 // We got an unindented BlockEntry sequence. This is not terminated with
2482 // a BlockEnd.
2483 // Don't eat the TK_BlockEntry, SequenceNode needs it.
2484 return new (NodeAllocator) SequenceNode( stream.CurrentDoc
2485 , AnchorInfo.Range.substr(1)
2486 , TagInfo.Range
2489 getNext();
2490 return new (NodeAllocator)
2491 SequenceNode( stream.CurrentDoc
2492 , AnchorInfo.Range.substr(1)
2493 , TagInfo.Range
2496 getNext();
2497 return new (NodeAllocator)
2498 MappingNode( stream.CurrentDoc
2499 , AnchorInfo.Range.substr(1)
2500 , TagInfo.Range
2503 getNext();
2504 return new (NodeAllocator)
2505 SequenceNode( stream.CurrentDoc
2506 , AnchorInfo.Range.substr(1)
2507 , TagInfo.Range
2510 getNext();
2511 return new (NodeAllocator)
2512 MappingNode( stream.CurrentDoc
2513 , AnchorInfo.Range.substr(1)
2514 , TagInfo.Range
2516 case Token::TK_Scalar:
2517 getNext();
2518 return new (NodeAllocator)
2519 ScalarNode( stream.CurrentDoc
2520 , AnchorInfo.Range.substr(1)
2521 , TagInfo.Range
2522 , T.Range);
2523 case Token::TK_BlockScalar: {
2524 getNext();
2525 StringRef NullTerminatedStr(T.Value.c_str(), T.Value.length() + 1);
2526 StringRef StrCopy = NullTerminatedStr.copy(NodeAllocator).drop_back();
2527 return new (NodeAllocator)
2528 BlockScalarNode(stream.CurrentDoc, AnchorInfo.Range.substr(1),
2529 TagInfo.Range, StrCopy, T.Range);
2530 }
2531 case Token::TK_Key:
2532 // Don't eat the TK_Key, KeyValueNode expects it.
2533 return new (NodeAllocator)
2534 MappingNode( stream.CurrentDoc
2535 , AnchorInfo.Range.substr(1)
2536 , TagInfo.Range
2541 default:
2542 // TODO: Properly handle tags. "[!!str ]" should resolve to !!str "", not
2543 // !!null null.
2544 return new (NodeAllocator) NullNode(stream.CurrentDoc);
2547 case Token::TK_FlowEntry: {
2548 if (Root && (isa<MappingNode>(Root) || isa<SequenceNode>(Root)))
2549 return new (NodeAllocator) NullNode(stream.CurrentDoc);
2550
2551 setError("Unexpected token", T);
2552 return nullptr;
2553 }
2554 case Token::TK_Error:
2555 return nullptr;
2556 }
2557 llvm_unreachable("Control flow shouldn't reach here.");
2558 return nullptr;
2559}
2560
2561bool Document::parseDirectives() {
2562 bool isDirective = false;
2563 while (true) {
2564 Token T = peekNext();
2565 if (T.Kind == Token::TK_TagDirective) {
2566 parseTAGDirective();
2567 isDirective = true;
2568 } else if (T.Kind == Token::TK_VersionDirective) {
2569 parseYAMLDirective();
2570 isDirective = true;
2571 } else
2572 break;
2573 }
2574 return isDirective;
2575}
2576
2577void Document::parseYAMLDirective() {
2578 getNext(); // Eat %YAML <version>
2579}
2580
2581void Document::parseTAGDirective() {
2582 Token Tag = getNext(); // %TAG <handle> <prefix>
2583 StringRef T = Tag.Range;
2584 // Strip %TAG
2585 T = T.substr(T.find_first_of(" \t")).ltrim(" \t");
2586 std::size_t HandleEnd = T.find_first_of(" \t");
2587 StringRef TagHandle = T.substr(0, HandleEnd);
2588 StringRef TagPrefix = T.substr(HandleEnd).ltrim(" \t");
2589 TagMap[TagHandle] = TagPrefix;
2590}
2591
2592bool Document::expectToken(int TK) {
2593 Token T = getNext();
2594 if (T.Kind != TK) {
2595 setError("Unexpected token", T);
2596 return false;
2597 }
2598 return true;
2599}
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define LLVM_ATTRIBUTE_NOINLINE
LLVM_ATTRIBUTE_NOINLINE - On compilers where we have a directive to do so, mark a method "not for inl...
Definition: Compiler.h:220
std::string Name
std::optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1260
#define I(x, y, z)
Definition: MD5.cpp:58
static Cursor skipComment(Cursor C)
Skip a line comment and return the updated cursor.
Definition: MILexer.cpp:94
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallString class.
This file defines the SmallVector class.
This file contains some functions that are useful when dealing with strings.
static EncodingInfo getUnicodeEncoding(StringRef Input)
getUnicodeEncoding - Reads up to the first 4 bytes to determine the Unicode encoding form of Input.
Definition: YAMLParser.cpp:60
static bool is_ns_hex_digit(const char C)
Definition: YAMLParser.cpp:998
static bool is_ns_word_char(const char C)
static unsigned getChompedLineBreaks(char ChompingIndicator, unsigned LineBreaks, StringRef Str)
Get the number of line breaks after chomping.
std::pair< uint32_t, unsigned > UTF8Decoded
The Unicode scalar value of a UTF-8 minimal well-formed code unit subsequence and the subsequence's l...
Definition: YAMLParser.cpp:194
UnicodeEncodingForm
Definition: YAMLParser.cpp:41
@ UEF_UTF32_LE
UTF-32 Little Endian.
Definition: YAMLParser.cpp:42
@ UEF_UTF16_BE
UTF-16 Big Endian.
Definition: YAMLParser.cpp:45
@ UEF_UTF16_LE
UTF-16 Little Endian.
Definition: YAMLParser.cpp:44
@ UEF_UTF32_BE
UTF-32 Big Endian.
Definition: YAMLParser.cpp:43
@ UEF_UTF8
UTF-8 or ascii.
Definition: YAMLParser.cpp:46
@ UEF_Unknown
Not a valid Unicode encoding.
Definition: YAMLParser.cpp:47
static LLVM_ATTRIBUTE_NOINLINE bool wasEscaped(StringRef::iterator First, StringRef::iterator Position)
std::pair< UnicodeEncodingForm, unsigned > EncodingInfo
EncodingInfo - Holds the encoding type and length of the byte order mark if it exists.
Definition: YAMLParser.cpp:52
static UTF8Decoded decodeUTF8(StringRef Range)
Definition: YAMLParser.cpp:196
static void encodeUTF8(uint32_t UnicodeScalarValue, SmallVectorImpl< char > &Result)
encodeUTF8 - Encode UnicodeScalarValue in UTF-8 and append it to result.
Definition: YAMLParser.cpp:572
A linked-list with a custom, local allocator.
Definition: AllocatorList.h:33
void push_back(T &&V)
iterator insert(iterator I, T &&V)
void resetAlloc()
Reset the underlying allocator.
IteratorImpl< T, typename list_type::iterator > iterator
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
Allocate memory in an ever growing pool, as if by bump-pointer.
Definition: Allocator.h:66
Tagged union holding either a T or a Error.
Definition: Error.h:470
const char * getBufferStart() const
const char * getBufferEnd() const
static std::unique_ptr< MemoryBuffer > getMemBuffer(StringRef InputData, StringRef BufferName="", bool RequiresNullTerminator=true)
Open the specified memory range as a MemoryBuffer.
Represents a location in source code.
Definition: SMLoc.h:23
static SMLoc getFromPointer(const char *Ptr)
Definition: SMLoc.h:36
Represents a range in source code.
Definition: SMLoc.h:48
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
void reserve(size_type N)
Definition: SmallVector.h:667
iterator erase(const_iterator CI)
Definition: SmallVector.h:741
typename SuperClass::iterator iterator
Definition: SmallVector.h:581
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
This owns the files read by a parser, handles include stacks, and handles diagnostic wrangling.
Definition: SourceMgr.h:31
void PrintMessage(raw_ostream &OS, SMLoc Loc, DiagKind Kind, const Twine &Msg, ArrayRef< SMRange > Ranges={}, ArrayRef< SMFixIt > FixIts={}, bool ShowColors=true) const
Emit a message about the specified location with the specified string.
Definition: SourceMgr.cpp:352
unsigned AddNewSourceBuffer(std::unique_ptr< MemoryBuffer > F, SMLoc IncludeLoc)
Add a new source buffer to this source manager.
Definition: SourceMgr.h:144
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:469
const char * iterator
Definition: StringRef.h:54
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition: StringRef.h:559
constexpr bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:134
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition: StringRef.h:597
iterator begin() const
Definition: StringRef.h:111
size_t size_type
Definition: StringRef.h:56
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
char front() const
front - Get the first character in the string.
Definition: StringRef.h:140
size_t find_last_of(char C, size_t From=npos) const
Find the last character in the string that is C, or npos if not found.
Definition: StringRef.h:399
bool startswith(StringRef Prefix) const
Definition: StringRef.h:261
size_t find_first_of(char C, size_t From=0) const
Find the first character in the string that is C, or npos if not found.
Definition: StringRef.h:376
iterator end() const
Definition: StringRef.h:113
size_t find(char C, size_t From=0) const
Search for the first character C in the string.
Definition: StringRef.h:295
StringRef copy(Allocator &A) const
Definition: StringRef.h:153
static constexpr size_t npos
Definition: StringRef.h:52
StringRef drop_back(size_t N=1) const
Return a StringRef equal to 'this' but with the last N elements dropped.
Definition: StringRef.h:604
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
TypeID
Definitions of all of the base types for the Type system.
Definition: Type.h:54
LLVM Value Representation.
Definition: Value.h:74
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
Represents an alias to a Node with an anchor.
Definition: YAMLParser.h:514
A block scalar node is an opaque datum that can be presented as a series of zero or more Unicode scal...
Definition: YAMLParser.h:255
A YAML Stream is a sequence of Documents.
Definition: YAMLParser.h:531
Node * parseBlockNode()
Root for parsing a node. Returns a single node.
bool skip()
Finish parsing the current document and return true if there are more.
Node * getRoot()
Parse and return the root level node.
Definition: YAMLParser.h:543
Document(Stream &ParentStream)
A key and value pair.
Definition: YAMLParser.h:285
Node * getValue()
Parse and return the value.
void skip() override
Definition: YAMLParser.h:306
Node * getKey()
Parse and return the key.
Represents a YAML map created from either a block map for a flow map.
Definition: YAMLParser.h:414
@ MT_Inline
An inline mapping node is used for "[key: value]".
Definition: YAMLParser.h:421
Abstract base class for all Nodes.
Definition: YAMLParser.h:119
StringRef getRawTag() const
Get the tag as it was written in the document.
Definition: YAMLParser.h:159
unsigned int getType() const
Definition: YAMLParser.h:178
std::string getVerbatimTag() const
Get the verbatium tag for a given Node.
bool failed() const
std::unique_ptr< Document > & Doc
Definition: YAMLParser.h:181
Node(unsigned int Type, std::unique_ptr< Document > &, StringRef Anchor, StringRef Tag)
virtual void skip()
Definition: YAMLParser.h:176
BumpPtrAllocator & getAllocator()
Node * parseBlockNode()
void setError(const Twine &Message, Token &Location) const
Token & peekNext()
A null value.
Definition: YAMLParser.h:197
A scalar node is an opaque datum that can be presented as a series of zero or more Unicode scalar val...
Definition: YAMLParser.h:212
StringRef getValue(SmallVectorImpl< char > &Storage) const
Gets the value of this node as a StringRef.
Scans YAML tokens from a MemoryBuffer.
Definition: YAMLParser.cpp:247
Scanner(StringRef Input, SourceMgr &SM, bool ShowColors=true, std::error_code *EC=nullptr)
Definition: YAMLParser.cpp:849
void printError(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Message, ArrayRef< SMRange > Ranges=std::nullopt)
Definition: YAMLParser.cpp:260
void setError(const Twine &Message, StringRef::iterator Position)
Definition: YAMLParser.cpp:266
Token getNext()
Parse the next token and pop it from the queue.
Definition: YAMLParser.cpp:904
bool failed()
Returns true if an error occurred while parsing.
Definition: YAMLParser.cpp:282
Token & peekNext()
Parse the next token and return it without popping it.
Definition: YAMLParser.cpp:877
Represents a YAML sequence created from either a block sequence for a flow sequence.
Definition: YAMLParser.h:462
This class represents a YAML stream potentially containing multiple documents.
Definition: YAMLParser.h:86
document_iterator end()
document_iterator begin()
Stream(StringRef Input, SourceMgr &, bool ShowColors=true, std::error_code *EC=nullptr)
This keeps a reference to the string referenced by Input.
friend class Document
Definition: YAMLParser.h:112
void printError(Node *N, const Twine &Msg, SourceMgr::DiagKind Kind=SourceMgr::DK_Error)
Iterator abstraction for Documents over a Stream.
Definition: YAMLParser.h:588
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
constexpr double e
Definition: MathExtras.h:31
bool isPrintable(int UCS)
Determines if a character is likely to be displayed correctly on the terminal.
Definition: Unicode.cpp:27
bool dumpTokens(StringRef Input, raw_ostream &)
Dump all the tokens in this stream to OS.
Definition: YAMLParser.cpp:600
std::optional< bool > parseBool(StringRef S)
Parse S as a bool according to https://yaml.org/type/bool.html.
Definition: YAMLParser.cpp:763
bool scanTokens(StringRef Input)
Scans all tokens in input without outputting anything.
Definition: YAMLParser.cpp:684
void skip(CollectionType &C)
Definition: YAMLParser.h:398
std::string escape(StringRef Input, bool EscapePrintable=true)
Escape Input for a double quoted scalar; if EscapePrintable is true, all UTF8 sequences will be escap...
Definition: YAMLParser.cpp:697
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool operator==(uint64_t V1, const APInt &V2)
Definition: APInt.h:2038
std::error_code make_error_code(BitcodeError E)
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition: Error.h:198
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
Definition: STLExtras.h:2014
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:145
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:1869
#define N
Token - A single YAML token.
Definition: YAMLParser.cpp:124
enum llvm::yaml::Token::TokenKind Kind
std::string Value
The value of a block scalar node.
Definition: YAMLParser.cpp:156
StringRef Range
A string of length 0 or more whose begin() points to the logical location of the token in the input.
Definition: YAMLParser.cpp:153