35#include <system_error>
64 switch (uint8_t(Input[0])) {
66 if (Input.
size() >= 4) {
68 && uint8_t(Input[2]) == 0xFE
69 && uint8_t(Input[3]) == 0xFF)
71 if (Input[1] == 0 && Input[2] == 0 && Input[3] != 0)
75 if (Input.
size() >= 2 && Input[1] != 0)
79 if ( Input.
size() >= 4
80 && uint8_t(Input[1]) == 0xFE
85 if (Input.
size() >= 2 && uint8_t(Input[1]) == 0xFE)
89 if (Input.
size() >= 2 && uint8_t(Input[1]) == 0xFF)
93 if ( Input.
size() >= 3
94 && uint8_t(Input[1]) == 0xBB
95 && uint8_t(Input[2]) == 0xBF)
101 if (Input.
size() >= 4 && Input[1] == 0 && Input[2] == 0 && Input[3] == 0)
104 if (Input.
size() >= 2 && Input[1] == 0)
111void Node::anchor() {}
112void NullNode::anchor() {}
113void ScalarNode::anchor() {}
114void BlockScalarNode::anchor() {}
115void KeyValueNode::anchor() {}
116void MappingNode::anchor() {}
117void SequenceNode::anchor() {}
118void AliasNode::anchor() {}
181 unsigned FlowLevel = 0;
182 bool IsRequired =
false;
185 return Tok ==
Other.Tok;
201 if (Position <
End && (*Position & 0x80) == 0) {
202 return std::make_pair(*Position, 1);
206 if (Position + 1 <
End && ((*Position & 0xE0) == 0xC0) &&
207 ((*(Position + 1) & 0xC0) == 0x80)) {
208 uint32_t codepoint = ((*Position & 0x1F) << 6) |
209 (*(Position + 1) & 0x3F);
210 if (codepoint >= 0x80)
211 return std::make_pair(codepoint, 2);
215 if (Position + 2 <
End && ((*Position & 0xF0) == 0xE0) &&
216 ((*(Position + 1) & 0xC0) == 0x80) &&
217 ((*(Position + 2) & 0xC0) == 0x80)) {
218 uint32_t codepoint = ((*Position & 0x0F) << 12) |
219 ((*(Position + 1) & 0x3F) << 6) |
220 (*(Position + 2) & 0x3F);
223 if (codepoint >= 0x800 &&
224 (codepoint < 0xD800 || codepoint > 0xDFFF))
225 return std::make_pair(codepoint, 3);
229 if (Position + 3 <
End && ((*Position & 0xF8) == 0xF0) &&
230 ((*(Position + 1) & 0xC0) == 0x80) &&
231 ((*(Position + 2) & 0xC0) == 0x80) &&
232 ((*(Position + 3) & 0xC0) == 0x80)) {
233 uint32_t codepoint = ((*Position & 0x07) << 18) |
234 ((*(Position + 1) & 0x3F) << 12) |
235 ((*(Position + 2) & 0x3F) << 6) |
236 (*(Position + 3) & 0x3F);
237 if (codepoint >= 0x10000 && codepoint <= 0x10FFFF)
238 return std::make_pair(codepoint, 4);
240 return std::make_pair(0, 0);
250 std::error_code *EC =
nullptr);
252 std::error_code *EC =
nullptr);
262 SM.
PrintMessage(Loc, Kind, Message, Ranges, std::nullopt,
300 return ::decodeUTF8(
StringRef(Position,
End - Position));
376 void advanceWhile(SkipWhileFunc Func);
381 void scan_ns_uri_char();
401 bool consumeLineBreakIfPresent();
412 void removeStaleSimpleKeyCandidates();
415 void removeSimpleKeyCandidatesOnFlowLevel(
unsigned Level);
419 bool unrollIndent(
int ToColumn);
423 bool rollIndent(
int ToColumn
432 void scanToNextToken();
435 bool scanStreamStart();
438 bool scanStreamEnd();
441 bool scanDirective();
444 bool scanDocumentIndicator(
bool IsStart);
447 bool scanFlowCollectionStart(
bool IsSequence);
450 bool scanFlowCollectionEnd(
bool IsSequence);
453 bool scanFlowEntry();
456 bool scanBlockEntry();
465 bool scanFlowScalar(
bool IsDoubleQuoted);
468 bool scanPlainScalar();
471 bool scanAliasOrAnchor(
bool IsAlias);
474 bool scanBlockScalar(
bool IsLiteral);
482 bool scanBlockScalarIndicators(
char &StyleIndicator,
char &ChompingIndicator,
483 unsigned &IndentIndicator,
bool &IsDone);
486 char scanBlockStyleIndicator();
489 char scanBlockChompingIndicator();
492 unsigned scanBlockIndentationIndicator();
497 bool scanBlockScalarHeader(
char &ChompingIndicator,
unsigned &IndentIndicator,
503 bool findBlockScalarIndent(
unsigned &BlockIndent,
unsigned BlockExitIndent,
504 unsigned &LineBreaks,
bool &IsDone);
509 bool scanBlockScalarIndent(
unsigned BlockIndent,
unsigned BlockExitIndent,
516 bool fetchMoreTokens();
543 bool IsStartOfStream;
546 bool IsSimpleKeyAllowed;
574 if (UnicodeScalarValue <= 0x7F) {
575 Result.push_back(UnicodeScalarValue & 0x7F);
576 }
else if (UnicodeScalarValue <= 0x7FF) {
577 uint8_t FirstByte = 0xC0 | ((UnicodeScalarValue & 0x7C0) >> 6);
578 uint8_t SecondByte = 0x80 | (UnicodeScalarValue & 0x3F);
579 Result.push_back(FirstByte);
580 Result.push_back(SecondByte);
581 }
else if (UnicodeScalarValue <= 0xFFFF) {
582 uint8_t FirstByte = 0xE0 | ((UnicodeScalarValue & 0xF000) >> 12);
583 uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6);
584 uint8_t ThirdByte = 0x80 | (UnicodeScalarValue & 0x3F);
585 Result.push_back(FirstByte);
586 Result.push_back(SecondByte);
587 Result.push_back(ThirdByte);
588 }
else if (UnicodeScalarValue <= 0x10FFFF) {
589 uint8_t FirstByte = 0xF0 | ((UnicodeScalarValue & 0x1F0000) >> 18);
590 uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0x3F000) >> 12);
591 uint8_t ThirdByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6);
592 uint8_t FourthByte = 0x80 | (UnicodeScalarValue & 0x3F);
593 Result.push_back(FirstByte);
594 Result.push_back(SecondByte);
595 Result.push_back(ThirdByte);
596 Result.push_back(FourthByte);
607 OS <<
"Stream-Start: ";
610 OS <<
"Stream-End: ";
613 OS <<
"Version-Directive: ";
616 OS <<
"Tag-Directive: ";
619 OS <<
"Document-Start: ";
622 OS <<
"Document-End: ";
625 OS <<
"Block-Entry: ";
631 OS <<
"Block-Sequence-Start: ";
634 OS <<
"Block-Mapping-Start: ";
637 OS <<
"Flow-Entry: ";
640 OS <<
"Flow-Sequence-Start: ";
643 OS <<
"Flow-Sequence-End: ";
646 OS <<
"Flow-Mapping-Start: ";
649 OS <<
"Flow-Mapping-End: ";
661 OS <<
"Block Scalar: ";
675 OS <<
T.Range <<
"\n";
698 std::string EscapedInput;
701 EscapedInput +=
"\\\\";
703 EscapedInput +=
"\\\"";
705 EscapedInput +=
"\\0";
707 EscapedInput +=
"\\a";
709 EscapedInput +=
"\\b";
711 EscapedInput +=
"\\t";
713 EscapedInput +=
"\\n";
715 EscapedInput +=
"\\v";
717 EscapedInput +=
"\\f";
719 EscapedInput +=
"\\r";
721 EscapedInput +=
"\\e";
722 else if ((
unsigned char)*i < 0x20) {
723 std::string HexStr = utohexstr(*i);
724 EscapedInput +=
"\\x" + std::string(2 - HexStr.size(),
'0') + HexStr;
725 }
else if (*i & 0x80) {
728 if (UnicodeScalarValue.second == 0) {
736 if (UnicodeScalarValue.first == 0x85)
737 EscapedInput +=
"\\N";
738 else if (UnicodeScalarValue.first == 0xA0)
739 EscapedInput +=
"\\_";
740 else if (UnicodeScalarValue.first == 0x2028)
741 EscapedInput +=
"\\L";
742 else if (UnicodeScalarValue.first == 0x2029)
743 EscapedInput +=
"\\P";
744 else if (!EscapePrintable &&
746 EscapedInput +=
StringRef(i, UnicodeScalarValue.second);
748 std::string HexStr = utohexstr(UnicodeScalarValue.first);
749 if (HexStr.size() <= 2)
750 EscapedInput +=
"\\x" + std::string(2 - HexStr.size(),
'0') + HexStr;
751 else if (HexStr.size() <= 4)
752 EscapedInput +=
"\\u" + std::string(4 - HexStr.size(),
'0') + HexStr;
753 else if (HexStr.size() <= 8)
754 EscapedInput +=
"\\U" + std::string(8 - HexStr.size(),
'0') + HexStr;
756 i += UnicodeScalarValue.second - 1;
758 EscapedInput.push_back(*i);
851 : SM(sm), ShowColors(ShowColors), EC(EC) {
857 : SM(SM_), ShowColors(ShowColors), EC(EC) {
862 InputBuffer = Buffer;
869 IsStartOfStream =
true;
870 IsSimpleKeyAllowed =
true;
872 std::unique_ptr<MemoryBuffer> InputBufferOwner =
880 bool NeedMore =
false;
882 if (TokenQueue.
empty() || NeedMore) {
883 if (!fetchMoreTokens()) {
887 return TokenQueue.
front();
891 "fetchMoreTokens lied about getting tokens!");
893 removeStaleSimpleKeyCandidates();
895 SK.Tok = TokenQueue.
begin();
901 return TokenQueue.
front();
907 if (!TokenQueue.
empty())
912 if (TokenQueue.
empty())
922 if ( *Position == 0x09
923 || (*Position >= 0x20 && *Position <= 0x7E))
927 if (uint8_t(*Position) & 0x80) {
930 && u8d.first != 0xFEFF
931 && ( u8d.first == 0x85
932 || ( u8d.first >= 0xA0
933 && u8d.first <= 0xD7FF)
934 || ( u8d.first >= 0xE000
935 && u8d.first <= 0xFFFD)
936 || ( u8d.first >= 0x10000
937 && u8d.first <= 0x10FFFF)))
938 return Position + u8d.second;
946 if (*Position == 0x0D) {
947 if (Position + 1 != End && *(Position + 1) == 0x0A)
952 if (*Position == 0x0A)
960 if (*Position ==
' ')
968 if (*Position ==
' ' || *Position ==
'\t')
976 if (*Position ==
' ' || *Position ==
'\t')
978 return skip_nb_char(Position);
992void Scanner::advanceWhile(SkipWhileFunc Func) {
993 auto Final = skip_while(Func, Current);
994 Column += Final - Current;
1002void Scanner::scan_ns_uri_char() {
1006 if (( *Current ==
'%'
1007 && Current + 2 < End
1011 ||
StringRef(Current, 1).find_first_of(
"#;/?:@&=+$,_.!~*'()[]")
1022 setError(
"Cannot consume non-ascii characters", Current);
1027 if (uint8_t(*Current) >= 0x80) {
1028 setError(
"Cannot consume non-ascii characters", Current);
1031 if (uint8_t(*Current) ==
Expected) {
1039void Scanner::skip(
uint32_t Distance) {
1040 Current += Distance;
1042 assert(Current <= End &&
"Skipped past the end");
1046 if (Position == End)
1048 return *Position ==
' ' || *Position ==
'\t' || *Position ==
'\r' ||
1052bool Scanner::isLineEmpty(
StringRef Line) {
1053 for (
const auto *Position =
Line.begin(); Position !=
Line.end(); ++Position)
1054 if (!isBlankOrBreak(Position))
1059bool Scanner::consumeLineBreakIfPresent() {
1060 auto Next = skip_b_break(Current);
1061 if (Next == Current)
1071 ,
bool IsRequired) {
1072 if (IsSimpleKeyAllowed) {
1076 SK.Column = AtColumn;
1077 SK.IsRequired = IsRequired;
1078 SK.FlowLevel = FlowLevel;
1083void Scanner::removeStaleSimpleKeyCandidates() {
1085 i != SimpleKeys.
end();) {
1086 if (i->Line != Line || i->Column + 1024 < Column) {
1088 setError(
"Could not find expected : for simple key"
1089 , i->Tok->Range.begin());
1090 i = SimpleKeys.
erase(i);
1096void Scanner::removeSimpleKeyCandidatesOnFlowLevel(
unsigned Level) {
1097 if (!SimpleKeys.
empty() && (SimpleKeys.
end() - 1)->FlowLevel == Level)
1101bool Scanner::unrollIndent(
int ToColumn) {
1107 while (Indent > ToColumn) {
1117bool Scanner::rollIndent(
int ToColumn
1122 if (Indent < ToColumn) {
1129 TokenQueue.
insert(InsertPoint,
T);
1134void Scanner::skipComment() {
1135 if (Current == End || *Current !=
'#')
1148void Scanner::scanToNextToken() {
1150 while (Current != End && (*Current ==
' ' || *Current ==
'\t')) {
1165 IsSimpleKeyAllowed =
true;
1169bool Scanner::scanStreamStart() {
1170 IsStartOfStream =
false;
1178 Current += EI.second;
1182bool Scanner::scanStreamEnd() {
1191 IsSimpleKeyAllowed =
false;
1200bool Scanner::scanDirective() {
1204 IsSimpleKeyAllowed =
false;
1209 Current = skip_while(&Scanner::skip_ns_char, Current);
1211 Current = skip_while(&Scanner::skip_s_white, Current);
1214 if (
Name ==
"YAML") {
1215 Current = skip_while(&Scanner::skip_ns_char, Current);
1220 }
else if(
Name ==
"TAG") {
1221 Current = skip_while(&Scanner::skip_ns_char, Current);
1222 Current = skip_while(&Scanner::skip_s_white, Current);
1223 Current = skip_while(&Scanner::skip_ns_char, Current);
1232bool Scanner::scanDocumentIndicator(
bool IsStart) {
1235 IsSimpleKeyAllowed =
false;
1245bool Scanner::scanFlowCollectionStart(
bool IsSequence) {
1254 saveSimpleKeyCandidate(--TokenQueue.
end(), Column - 1,
false);
1257 IsSimpleKeyAllowed =
true;
1262bool Scanner::scanFlowCollectionEnd(
bool IsSequence) {
1263 removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
1264 IsSimpleKeyAllowed =
false;
1276bool Scanner::scanFlowEntry() {
1277 removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
1278 IsSimpleKeyAllowed =
true;
1287bool Scanner::scanBlockEntry() {
1289 removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
1290 IsSimpleKeyAllowed =
true;
1299bool Scanner::scanKey() {
1303 removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
1304 IsSimpleKeyAllowed = !FlowLevel;
1314bool Scanner::scanValue() {
1317 if (!SimpleKeys.
empty()) {
1321 T.Range = SK.Tok->Range;
1323 for (i = TokenQueue.
begin(), e = TokenQueue.
end(); i != e; ++i) {
1336 IsSimpleKeyAllowed =
false;
1340 IsSimpleKeyAllowed = !FlowLevel;
1360 assert(Position - 1 >= First);
1364 while (
I >= First && *
I ==
'\\') --
I;
1367 return (Position - 1 -
I) % 2 == 1;
1370bool Scanner::scanFlowScalar(
bool IsDoubleQuoted) {
1372 unsigned ColStart = Column;
1373 if (IsDoubleQuoted) {
1376 while (Current != End && *Current !=
'"')
1380 }
while ( Current != End
1381 && *(Current - 1) ==
'\\'
1385 while (Current != End) {
1387 if (Current + 1 < End && *Current ==
'\'' && *(Current + 1) ==
'\'') {
1390 }
else if (*Current ==
'\'')
1394 i = skip_b_break(Current);
1409 if (Current == End) {
1410 setError(
"Expected quote at end of scalar", Current);
1420 saveSimpleKeyCandidate(--TokenQueue.
end(), ColStart,
false);
1422 IsSimpleKeyAllowed =
false;
1427bool Scanner::scanPlainScalar() {
1429 unsigned ColStart = Column;
1430 unsigned LeadingBlanks = 0;
1431 assert(Indent >= -1 &&
"Indent must be >= -1 !");
1432 unsigned indent =
static_cast<unsigned>(Indent + 1);
1433 while (Current != End) {
1434 if (*Current ==
'#')
1437 while (Current != End && !isBlankOrBreak(Current)) {
1438 if (FlowLevel && *Current ==
':' &&
1439 (Current + 1 == End ||
1440 !(isBlankOrBreak(Current + 1) || *(Current + 1) ==
','))) {
1441 setError(
"Found unexpected ':' while scanning a plain scalar", Current);
1446 if ( (*Current ==
':' && isBlankOrBreak(Current + 1))
1448 && (
StringRef(Current, 1).find_first_of(
",:?[]{}")
1460 if (!isBlankOrBreak(Current))
1465 while (isBlankOrBreak(Tmp)) {
1468 if (LeadingBlanks && (Column < indent) && *Tmp ==
'\t') {
1469 setError(
"Found invalid tab character in indentation", Tmp);
1475 i = skip_b_break(Tmp);
1484 if (!FlowLevel && Column < indent)
1489 if (Start == Current) {
1490 setError(
"Got empty plain scalar", Start);
1499 saveSimpleKeyCandidate(--TokenQueue.
end(), ColStart,
false);
1501 IsSimpleKeyAllowed =
false;
1506bool Scanner::scanAliasOrAnchor(
bool IsAlias) {
1508 unsigned ColStart = Column;
1510 while (Current != End) {
1511 if ( *Current ==
'[' || *Current ==
']'
1512 || *Current ==
'{' || *Current ==
'}'
1523 if (Start + 1 == Current) {
1524 setError(
"Got empty alias or anchor", Start);
1534 saveSimpleKeyCandidate(--TokenQueue.
end(), ColStart,
false);
1536 IsSimpleKeyAllowed =
false;
1541bool Scanner::scanBlockScalarIndicators(
char &StyleIndicator,
1542 char &ChompingIndicator,
1543 unsigned &IndentIndicator,
1545 StyleIndicator = scanBlockStyleIndicator();
1546 if (!scanBlockScalarHeader(ChompingIndicator, IndentIndicator, IsDone))
1551char Scanner::scanBlockStyleIndicator() {
1552 char Indicator =
' ';
1553 if (Current != End && (*Current ==
'>' || *Current ==
'|')) {
1554 Indicator = *Current;
1560char Scanner::scanBlockChompingIndicator() {
1561 char Indicator =
' ';
1562 if (Current != End && (*Current ==
'+' || *Current ==
'-')) {
1563 Indicator = *Current;
1575 if (ChompingIndicator ==
'-')
1577 if (ChompingIndicator ==
'+')
1580 return Str.empty() ? 0 : 1;
1583unsigned Scanner::scanBlockIndentationIndicator() {
1584 unsigned Indent = 0;
1585 if (Current != End && (*Current >=
'1' && *Current <=
'9')) {
1592bool Scanner::scanBlockScalarHeader(
char &ChompingIndicator,
1593 unsigned &IndentIndicator,
bool &IsDone) {
1594 auto Start = Current;
1596 ChompingIndicator = scanBlockChompingIndicator();
1597 IndentIndicator = scanBlockIndentationIndicator();
1599 if (ChompingIndicator ==
' ')
1600 ChompingIndicator = scanBlockChompingIndicator();
1601 Current = skip_while(&Scanner::skip_s_white, Current);
1604 if (Current == End) {
1613 if (!consumeLineBreakIfPresent()) {
1614 setError(
"Expected a line break after block scalar header", Current);
1620bool Scanner::findBlockScalarIndent(
unsigned &BlockIndent,
1621 unsigned BlockExitIndent,
1622 unsigned &LineBreaks,
bool &IsDone) {
1623 unsigned MaxAllSpaceLineCharacters = 0;
1627 advanceWhile(&Scanner::skip_s_space);
1628 if (skip_nb_char(Current) != Current) {
1630 if (Column <= BlockExitIndent) {
1635 BlockIndent = Column;
1636 if (MaxAllSpaceLineCharacters > BlockIndent) {
1638 "Leading all-spaces line must be smaller than the block indent",
1639 LongestAllSpaceLine);
1644 if (skip_b_break(Current) != Current &&
1645 Column > MaxAllSpaceLineCharacters) {
1648 MaxAllSpaceLineCharacters = Column;
1649 LongestAllSpaceLine = Current;
1653 if (Current == End) {
1658 if (!consumeLineBreakIfPresent()) {
1667bool Scanner::scanBlockScalarIndent(
unsigned BlockIndent,
1668 unsigned BlockExitIndent,
bool &IsDone) {
1670 while (Column < BlockIndent) {
1671 auto I = skip_s_space(Current);
1678 if (skip_nb_char(Current) == Current)
1681 if (Column <= BlockExitIndent) {
1686 if (Column < BlockIndent) {
1687 if (Current != End && *Current ==
'#') {
1691 setError(
"A text line is less indented than the block scalar", Current);
1697bool Scanner::scanBlockScalar(
bool IsLiteral) {
1698 assert(*Current ==
'|' || *Current ==
'>');
1699 char StyleIndicator;
1700 char ChompingIndicator;
1701 unsigned BlockIndent;
1702 bool IsDone =
false;
1703 if (!scanBlockScalarIndicators(StyleIndicator, ChompingIndicator, BlockIndent,
1708 bool IsFolded = StyleIndicator ==
'>';
1710 const auto *Start = Current;
1711 unsigned BlockExitIndent = Indent < 0 ? 0 : (
unsigned)Indent;
1712 unsigned LineBreaks = 0;
1713 if (BlockIndent == 0) {
1714 if (!findBlockScalarIndent(BlockIndent, BlockExitIndent, LineBreaks,
1722 if (!scanBlockScalarIndent(BlockIndent, BlockExitIndent, IsDone))
1728 auto LineStart = Current;
1729 advanceWhile(&Scanner::skip_nb_char);
1730 if (LineStart != Current) {
1731 if (LineBreaks && IsFolded && !Scanner::isLineEmpty(Str)) {
1735 if (LineBreaks == 1) {
1736 Str.append(LineBreaks,
1737 isLineEmpty(
StringRef(LineStart, Current - LineStart))
1747 Str.append(LineBreaks,
'\n');
1748 Str.append(
StringRef(LineStart, Current - LineStart));
1756 if (!consumeLineBreakIfPresent())
1761 if (Current == End && !LineBreaks)
1768 IsSimpleKeyAllowed =
true;
1773 T.Value = std::string(Str);
1778bool Scanner::scanTag() {
1780 unsigned ColStart = Column;
1782 if (Current == End || isBlankOrBreak(Current));
1783 else if (*Current ==
'<') {
1790 Current = skip_while(&Scanner::skip_ns_char, Current);
1799 saveSimpleKeyCandidate(--TokenQueue.
end(), ColStart,
false);
1801 IsSimpleKeyAllowed =
false;
1806bool Scanner::fetchMoreTokens() {
1807 if (IsStartOfStream)
1808 return scanStreamStart();
1813 return scanStreamEnd();
1815 removeStaleSimpleKeyCandidates();
1817 unrollIndent(Column);
1819 if (Column == 0 && *Current ==
'%')
1820 return scanDirective();
1822 if (Column == 0 && Current + 4 <= End
1824 && *(Current + 1) ==
'-'
1825 && *(Current + 2) ==
'-'
1826 && (Current + 3 == End || isBlankOrBreak(Current + 3)))
1827 return scanDocumentIndicator(
true);
1829 if (Column == 0 && Current + 4 <= End
1831 && *(Current + 1) ==
'.'
1832 && *(Current + 2) ==
'.'
1833 && (Current + 3 == End || isBlankOrBreak(Current + 3)))
1834 return scanDocumentIndicator(
false);
1836 if (*Current ==
'[')
1837 return scanFlowCollectionStart(
true);
1839 if (*Current ==
'{')
1840 return scanFlowCollectionStart(
false);
1842 if (*Current ==
']')
1843 return scanFlowCollectionEnd(
true);
1845 if (*Current ==
'}')
1846 return scanFlowCollectionEnd(
false);
1848 if (*Current ==
',')
1849 return scanFlowEntry();
1851 if (*Current ==
'-' && isBlankOrBreak(Current + 1))
1852 return scanBlockEntry();
1854 if (*Current ==
'?' && (FlowLevel || isBlankOrBreak(Current + 1)))
1857 if (*Current ==
':' && (FlowLevel || isBlankOrBreak(Current + 1)))
1860 if (*Current ==
'*')
1861 return scanAliasOrAnchor(
true);
1863 if (*Current ==
'&')
1864 return scanAliasOrAnchor(
false);
1866 if (*Current ==
'!')
1869 if (*Current ==
'|' && !FlowLevel)
1870 return scanBlockScalar(
true);
1872 if (*Current ==
'>' && !FlowLevel)
1873 return scanBlockScalar(
false);
1875 if (*Current ==
'\'')
1876 return scanFlowScalar(
false);
1878 if (*Current ==
'"')
1879 return scanFlowScalar(
true);
1883 if (!(isBlankOrBreak(Current)
1884 || FirstChar.find_first_of(
"-?:,[]{}#&*!|>'\"%@`") !=
StringRef::npos)
1885 || (*Current ==
'-' && !isBlankOrBreak(Current + 1))
1886 || (!FlowLevel && (*Current ==
'?' || *Current ==
':')
1887 && isBlankOrBreak(Current + 1))
1888 || (!FlowLevel && *Current ==
':'
1889 && Current + 2 < End
1890 && *(Current + 1) ==
':'
1891 && !isBlankOrBreak(Current + 2)))
1892 return scanPlainScalar();
1894 setError(
"Unrecognized character while tokenizing.", Current);
1899 std::error_code *EC)
1900 : scanner(new
Scanner(Input, SM, ShowColors, EC)) {}
1903 std::error_code *EC)
1904 : scanner(new
Scanner(InputBuffer, SM, ShowColors, EC)) {}
1916 scanner->printError(Range.Start, Kind, Msg, Range);
1926 CurrentDoc.reset(
new Document(*
this));
1943 SourceRange =
SMRange(Start, Start);
1948 if (!Raw.
empty() && Raw !=
"!") {
1951 Ret = std::string(
Doc->getTagMap().find(
"!")->second);
1955 Ret = std::string(
Doc->getTagMap().find(
"!!")->second);
1960 std::map<StringRef, StringRef>::const_iterator It =
1961 Doc->getTagMap().find(TagHandle);
1962 if (It !=
Doc->getTagMap().end())
1963 Ret = std::string(It->second);
1967 T.Range = TagHandle;
1977 return "tag:yaml.org,2002:null";
1981 return "tag:yaml.org,2002:str";
1983 return "tag:yaml.org,2002:map";
1985 return "tag:yaml.org,2002:seq";
1992 return Doc->peekNext();
1996 return Doc->getNext();
2000 return Doc->parseBlockNode();
2004 return Doc->NodeAllocator;
2008 Doc->setError(Msg, Tok);
2012 return Doc->failed();
2017 if (
Value[0] ==
'"') {
2023 return unescapeDoubleQuoted(UnquotedValue, i, Storage);
2024 return UnquotedValue;
2025 }
else if (
Value[0] ==
'\'') {
2037 UnquotedValue = UnquotedValue.
substr(i + 2);
2042 return UnquotedValue;
2048 return Value.rtrim(
"\x0A\x0D\x20\x09");
2063 UnquotedValue = UnquotedValue.
substr(i);
2065 assert(!UnquotedValue.
empty() &&
"Can't be empty!");
2068 switch (UnquotedValue[0]) {
2072 if ( UnquotedValue.
size() > 1
2073 && (UnquotedValue[1] ==
'\r' || UnquotedValue[1] ==
'\n'))
2074 UnquotedValue = UnquotedValue.
substr(1);
2075 UnquotedValue = UnquotedValue.
substr(1);
2078 if (UnquotedValue.
size() == 1) {
2081 setError(
"Unrecognized escape code",
T);
2084 UnquotedValue = UnquotedValue.
substr(1);
2085 switch (UnquotedValue[0]) {
2089 setError(
"Unrecognized escape code",
T);
2095 if ( UnquotedValue.
size() > 1
2096 && (UnquotedValue[1] ==
'\r' || UnquotedValue[1] ==
'\n'))
2097 UnquotedValue = UnquotedValue.
substr(1);
2154 if (UnquotedValue.
size() < 3)
2157 unsigned int UnicodeScalarValue;
2160 UnicodeScalarValue = 0xFFFD;
2162 UnquotedValue = UnquotedValue.
substr(2);
2166 if (UnquotedValue.
size() < 5)
2169 unsigned int UnicodeScalarValue;
2172 UnicodeScalarValue = 0xFFFD;
2174 UnquotedValue = UnquotedValue.
substr(4);
2178 if (UnquotedValue.
size() < 9)
2181 unsigned int UnicodeScalarValue;
2184 UnicodeScalarValue = 0xFFFD;
2186 UnquotedValue = UnquotedValue.
substr(8);
2190 UnquotedValue = UnquotedValue.
substr(1);
2248 setError(
"Unexpected token in Key Value.", t);
2264void MappingNode::increment() {
2267 CurrentEntry =
nullptr;
2271 CurrentEntry->
skip();
2274 CurrentEntry =
nullptr;
2287 CurrentEntry =
nullptr;
2290 setError(
"Unexpected token. Expected Key or Block End",
T);
2294 CurrentEntry =
nullptr;
2308 CurrentEntry =
nullptr;
2311 setError(
"Unexpected token. Expected Key, Flow Entry, or Flow "
2315 CurrentEntry =
nullptr;
2323 CurrentEntry =
nullptr;
2327 CurrentEntry->
skip();
2334 if (!CurrentEntry) {
2336 CurrentEntry =
nullptr;
2342 CurrentEntry =
nullptr;
2345 setError(
"Unexpected token. Expected Block Entry or Block End."
2350 CurrentEntry =
nullptr;
2357 if (!CurrentEntry) {
2359 CurrentEntry =
nullptr;
2365 CurrentEntry =
nullptr;
2367 }
else if (SeqType ==
ST_Flow) {
2372 WasPreviousTokenFlowEntry =
true;
2380 CurrentEntry =
nullptr;
2385 setError(
"Could not find closing ]!",
T);
2388 CurrentEntry =
nullptr;
2391 if (!WasPreviousTokenFlowEntry) {
2392 setError(
"Expected , between entries!",
T);
2394 CurrentEntry =
nullptr;
2399 if (!CurrentEntry) {
2402 WasPreviousTokenFlowEntry =
false;
2411 TagMap[
"!!"] =
"tag:yaml.org,2002:";
2413 if (parseDirectives())
2421 if (stream.scanner->failed())
2436Token &Document::peekNext() {
2437 return stream.scanner->peekNext();
2440Token Document::getNext() {
2441 return stream.scanner->getNext();
2444void Document::setError(
const Twine &Message,
Token &Location)
const {
2445 stream.scanner->setError(Message, Location.Range.begin());
2448bool Document::failed()
const {
2449 return stream.scanner->failed();
2461 return new (NodeAllocator)
AliasNode(stream.CurrentDoc,
T.Range.substr(1));
2464 setError(
"Already encountered an anchor for this node!",
T);
2467 AnchorInfo = getNext();
2469 goto parse_property;
2472 setError(
"Already encountered a tag for this node!",
T);
2475 TagInfo = getNext();
2477 goto parse_property;
2487 return new (NodeAllocator)
SequenceNode( stream.CurrentDoc
2493 return new (NodeAllocator)
2500 return new (NodeAllocator)
2507 return new (NodeAllocator)
2514 return new (NodeAllocator)
2521 return new (NodeAllocator)
2528 StringRef NullTerminatedStr(
T.Value.c_str(),
T.Value.length() + 1);
2530 return new (NodeAllocator)
2532 TagInfo.
Range, StrCopy,
T.Range);
2536 return new (NodeAllocator)
2547 return new (NodeAllocator)
NullNode(stream.CurrentDoc);
2551 if (Root && (isa<MappingNode>(Root) || isa<SequenceNode>(Root)))
2552 return new (NodeAllocator)
NullNode(stream.CurrentDoc);
2554 setError(
"Unexpected token",
T);
2564bool Document::parseDirectives() {
2565 bool isDirective =
false;
2569 parseTAGDirective();
2572 parseYAMLDirective();
2580void Document::parseYAMLDirective() {
2584void Document::parseTAGDirective() {
2588 T =
T.substr(
T.find_first_of(
" \t")).ltrim(
" \t");
2589 std::size_t HandleEnd =
T.find_first_of(
" \t");
2590 StringRef TagHandle =
T.substr(0, HandleEnd);
2591 StringRef TagPrefix =
T.substr(HandleEnd).ltrim(
" \t");
2592 TagMap[TagHandle] = TagPrefix;
2595bool Document::expectToken(
int TK) {
2598 setError(
"Unexpected token",
T);
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define LLVM_ATTRIBUTE_NOINLINE
LLVM_ATTRIBUTE_NOINLINE - On compilers where we have a directive to do so, mark a method "not for inl...
std::optional< std::vector< StOtherPiece > > Other
static Cursor skipComment(Cursor C)
Skip a line comment and return the updated cursor.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallString class.
This file defines the SmallVector class.
static EncodingInfo getUnicodeEncoding(StringRef Input)
getUnicodeEncoding - Reads up to the first 4 bytes to determine the Unicode encoding form of Input.
static bool is_ns_hex_digit(const char C)
static bool is_ns_word_char(const char C)
static unsigned getChompedLineBreaks(char ChompingIndicator, unsigned LineBreaks, StringRef Str)
Get the number of line breaks after chomping.
std::pair< uint32_t, unsigned > UTF8Decoded
The Unicode scalar value of a UTF-8 minimal well-formed code unit subsequence and the subsequence's l...
@ UEF_UTF32_LE
UTF-32 Little Endian.
@ UEF_UTF16_BE
UTF-16 Big Endian.
@ UEF_UTF16_LE
UTF-16 Little Endian.
@ UEF_UTF32_BE
UTF-32 Big Endian.
@ UEF_UTF8
UTF-8 or ascii.
@ UEF_Unknown
Not a valid Unicode encoding.
static LLVM_ATTRIBUTE_NOINLINE bool wasEscaped(StringRef::iterator First, StringRef::iterator Position)
std::pair< UnicodeEncodingForm, unsigned > EncodingInfo
EncodingInfo - Holds the encoding type and length of the byte order mark if it exists.
static UTF8Decoded decodeUTF8(StringRef Range)
static void encodeUTF8(uint32_t UnicodeScalarValue, SmallVectorImpl< char > &Result)
encodeUTF8 - Encode UnicodeScalarValue in UTF-8 and append it to result.
A linked-list with a custom, local allocator.
iterator insert(iterator I, T &&V)
void resetAlloc()
Reset the underlying allocator.
IteratorImpl< T, typename list_type::iterator > iterator
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Allocate memory in an ever growing pool, as if by bump-pointer.
Tagged union holding either a T or a Error.
const char * getBufferStart() const
const char * getBufferEnd() const
static std::unique_ptr< MemoryBuffer > getMemBuffer(StringRef InputData, StringRef BufferName="", bool RequiresNullTerminator=true)
Open the specified memory range as a MemoryBuffer.
Represents a location in source code.
static SMLoc getFromPointer(const char *Ptr)
Represents a range in source code.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void reserve(size_type N)
iterator erase(const_iterator CI)
typename SuperClass::iterator iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This owns the files read by a parser, handles include stacks, and handles diagnostic wrangling.
void PrintMessage(raw_ostream &OS, SMLoc Loc, DiagKind Kind, const Twine &Msg, ArrayRef< SMRange > Ranges={}, ArrayRef< SMFixIt > FixIts={}, bool ShowColors=true) const
Emit a message about the specified location with the specified string.
unsigned AddNewSourceBuffer(std::unique_ptr< MemoryBuffer > F, SMLoc IncludeLoc)
Add a new source buffer to this source manager.
StringRef - Represent a constant reference to a string, i.e.
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
constexpr bool empty() const
empty - Check if the string is empty.
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
constexpr size_t size() const
size - Get the string size.
char front() const
front - Get the first character in the string.
size_t find_last_of(char C, size_t From=npos) const
Find the last character in the string that is C, or npos if not found.
bool startswith(StringRef Prefix) const
size_t find_first_of(char C, size_t From=0) const
Find the first character in the string that is C, or npos if not found.
size_t find(char C, size_t From=0) const
Search for the first character C in the string.
StringRef copy(Allocator &A) const
static constexpr size_t npos
StringRef drop_back(size_t N=1) const
Return a StringRef equal to 'this' but with the last N elements dropped.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
TypeID
Definitions of all of the base types for the Type system.
LLVM Value Representation.
This class implements an extremely fast bulk output stream that can only output to a stream.
Represents an alias to a Node with an anchor.
A block scalar node is an opaque datum that can be presented as a series of zero or more Unicode scal...
A YAML Stream is a sequence of Documents.
Node * parseBlockNode()
Root for parsing a node. Returns a single node.
bool skip()
Finish parsing the current document and return true if there are more.
Node * getRoot()
Parse and return the root level node.
Document(Stream &ParentStream)
Node * getValue()
Parse and return the value.
Node * getKey()
Parse and return the key.
Represents a YAML map created from either a block map for a flow map.
@ MT_Inline
An inline mapping node is used for "[key: value]".
Abstract base class for all Nodes.
StringRef getRawTag() const
Get the tag as it was written in the document.
unsigned int getType() const
std::string getVerbatimTag() const
Get the verbatium tag for a given Node.
std::unique_ptr< Document > & Doc
Node(unsigned int Type, std::unique_ptr< Document > &, StringRef Anchor, StringRef Tag)
BumpPtrAllocator & getAllocator()
void setError(const Twine &Message, Token &Location) const
A scalar node is an opaque datum that can be presented as a series of zero or more Unicode scalar val...
StringRef getValue(SmallVectorImpl< char > &Storage) const
Gets the value of this node as a StringRef.
Scans YAML tokens from a MemoryBuffer.
Scanner(StringRef Input, SourceMgr &SM, bool ShowColors=true, std::error_code *EC=nullptr)
void printError(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Message, ArrayRef< SMRange > Ranges=std::nullopt)
void setError(const Twine &Message, StringRef::iterator Position)
Token getNext()
Parse the next token and pop it from the queue.
bool failed()
Returns true if an error occurred while parsing.
Token & peekNext()
Parse the next token and return it without popping it.
Represents a YAML sequence created from either a block sequence for a flow sequence.
This class represents a YAML stream potentially containing multiple documents.
document_iterator begin()
Stream(StringRef Input, SourceMgr &, bool ShowColors=true, std::error_code *EC=nullptr)
This keeps a reference to the string referenced by Input.
void printError(Node *N, const Twine &Msg, SourceMgr::DiagKind Kind=SourceMgr::DK_Error)
Iterator abstraction for Documents over a Stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
bool isPrintable(int UCS)
Determines if a character is likely to be displayed correctly on the terminal.
bool dumpTokens(StringRef Input, raw_ostream &)
Dump all the tokens in this stream to OS.
std::optional< bool > parseBool(StringRef S)
Parse S as a bool according to https://yaml.org/type/bool.html.
bool scanTokens(StringRef Input)
Scans all tokens in input without outputting anything.
void skip(CollectionType &C)
std::string escape(StringRef Input, bool EscapePrintable=true)
Escape Input for a double quoted scalar; if EscapePrintable is true, all UTF8 sequences will be escap...
This is an optimization pass for GlobalISel generic memory operations.
std::error_code make_error_code(BitcodeError E)
testing::Matcher< const detail::ErrorHolder & > Failed()
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Token - A single YAML token.
enum llvm::yaml::Token::TokenKind Kind
std::string Value
The value of a block scalar node.
StringRef Range
A string of length 0 or more whose begin() points to the logical location of the token in the input.