LLVM 17.0.0git
RustDemangle.cpp
Go to the documentation of this file.
1//===--- RustDemangle.cpp ---------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines a demangler for Rust v0 mangled symbols as specified in
10// https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html
11//
12//===----------------------------------------------------------------------===//
13
17
18#include <algorithm>
19#include <cassert>
20#include <cstdint>
21#include <cstring>
22#include <limits>
23
24using namespace llvm;
25
26using llvm::itanium_demangle::OutputBuffer;
27using llvm::itanium_demangle::ScopedOverride;
28
29namespace {
30
31struct Identifier {
32 std::string_view Name;
33 bool Punycode;
34
35 bool empty() const { return Name.empty(); }
36};
37
38enum class BasicType {
39 Bool,
40 Char,
41 I8,
42 I16,
43 I32,
44 I64,
45 I128,
46 ISize,
47 U8,
48 U16,
49 U32,
50 U64,
51 U128,
52 USize,
53 F32,
54 F64,
55 Str,
56 Placeholder,
57 Unit,
59 Never,
60};
61
62enum class IsInType {
63 No,
64 Yes,
65};
66
67enum class LeaveGenericsOpen {
68 No,
69 Yes,
70};
71
72class Demangler {
73 // Maximum recursion level. Used to avoid stack overflow.
74 size_t MaxRecursionLevel;
75 // Current recursion level.
76 size_t RecursionLevel;
77 size_t BoundLifetimes;
78 // Input string that is being demangled with "_R" prefix removed.
79 std::string_view Input;
80 // Position in the input string.
81 size_t Position;
82 // When true, print methods append the output to the stream.
83 // When false, the output is suppressed.
84 bool Print;
85 // True if an error occurred.
86 bool Error;
87
88public:
89 // Demangled output.
90 OutputBuffer Output;
91
92 Demangler(size_t MaxRecursionLevel = 500);
93
94 bool demangle(std::string_view MangledName);
95
96private:
97 bool demanglePath(IsInType Type,
98 LeaveGenericsOpen LeaveOpen = LeaveGenericsOpen::No);
99 void demangleImplPath(IsInType InType);
100 void demangleGenericArg();
101 void demangleType();
102 void demangleFnSig();
103 void demangleDynBounds();
104 void demangleDynTrait();
105 void demangleOptionalBinder();
106 void demangleConst();
107 void demangleConstInt();
108 void demangleConstBool();
109 void demangleConstChar();
110
111 template <typename Callable> void demangleBackref(Callable Demangler) {
112 uint64_t Backref = parseBase62Number();
113 if (Error || Backref >= Position) {
114 Error = true;
115 return;
116 }
117
118 if (!Print)
119 return;
120
121 ScopedOverride<size_t> SavePosition(Position, Position);
122 Position = Backref;
123 Demangler();
124 }
125
126 Identifier parseIdentifier();
127 uint64_t parseOptionalBase62Number(char Tag);
128 uint64_t parseBase62Number();
129 uint64_t parseDecimalNumber();
130 uint64_t parseHexNumber(std::string_view &HexDigits);
131
132 void print(char C);
133 void print(std::string_view S);
134 void printDecimalNumber(uint64_t N);
135 void printBasicType(BasicType);
136 void printLifetime(uint64_t Index);
137 void printIdentifier(Identifier Ident);
138
139 char look() const;
140 char consume();
141 bool consumeIf(char Prefix);
142
143 bool addAssign(uint64_t &A, uint64_t B);
144 bool mulAssign(uint64_t &A, uint64_t B);
145};
146
147} // namespace
148
149char *llvm::rustDemangle(const char *MangledName) {
150 if (MangledName == nullptr)
151 return nullptr;
152
153 // Return early if mangled name doesn't look like a Rust symbol.
154 std::string_view Mangled(MangledName);
155 if (!llvm::itanium_demangle::starts_with(Mangled, "_R"))
156 return nullptr;
157
158 Demangler D;
159 if (!D.demangle(Mangled)) {
160 std::free(D.Output.getBuffer());
161 return nullptr;
162 }
163
164 D.Output += '\0';
165
166 return D.Output.getBuffer();
167}
168
169Demangler::Demangler(size_t MaxRecursionLevel)
170 : MaxRecursionLevel(MaxRecursionLevel) {}
171
172static inline bool isDigit(const char C) { return '0' <= C && C <= '9'; }
173
174static inline bool isHexDigit(const char C) {
175 return ('0' <= C && C <= '9') || ('a' <= C && C <= 'f');
176}
177
178static inline bool isLower(const char C) { return 'a' <= C && C <= 'z'; }
179
180static inline bool isUpper(const char C) { return 'A' <= C && C <= 'Z'; }
181
182/// Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
183static inline bool isValid(const char C) {
184 return isDigit(C) || isLower(C) || isUpper(C) || C == '_';
185}
186
187// Demangles Rust v0 mangled symbol. Returns true when successful, and false
188// otherwise. The demangled symbol is stored in Output field. It is
189// responsibility of the caller to free the memory behind the output stream.
190//
191// <symbol-name> = "_R" <path> [<instantiating-crate>]
192bool Demangler::demangle(std::string_view Mangled) {
193 Position = 0;
194 Error = false;
195 Print = true;
196 RecursionLevel = 0;
197 BoundLifetimes = 0;
198
199 if (!llvm::itanium_demangle::starts_with(Mangled, "_R")) {
200 Error = true;
201 return false;
202 }
203 Mangled.remove_prefix(2);
204 size_t Dot = Mangled.find('.');
205 Input = Dot == std::string_view::npos ? Mangled : Mangled.substr(0, Dot);
206
207 demanglePath(IsInType::No);
208
209 if (Position != Input.size()) {
210 ScopedOverride<bool> SavePrint(Print, false);
211 demanglePath(IsInType::No);
212 }
213
214 if (Position != Input.size())
215 Error = true;
216
217 if (Dot != std::string_view::npos) {
218 print(" (");
219 print(Mangled.substr(Dot));
220 print(")");
221 }
222
223 return !Error;
224}
225
226// Demangles a path. InType indicates whether a path is inside a type. When
227// LeaveOpen is true, a closing `>` after generic arguments is omitted from the
228// output. Return value indicates whether generics arguments have been left
229// open.
230//
231// <path> = "C" <identifier> // crate root
232// | "M" <impl-path> <type> // <T> (inherent impl)
233// | "X" <impl-path> <type> <path> // <T as Trait> (trait impl)
234// | "Y" <type> <path> // <T as Trait> (trait definition)
235// | "N" <ns> <path> <identifier> // ...::ident (nested path)
236// | "I" <path> {<generic-arg>} "E" // ...<T, U> (generic args)
237// | <backref>
238// <identifier> = [<disambiguator>] <undisambiguated-identifier>
239// <ns> = "C" // closure
240// | "S" // shim
241// | <A-Z> // other special namespaces
242// | <a-z> // internal namespaces
243bool Demangler::demanglePath(IsInType InType, LeaveGenericsOpen LeaveOpen) {
244 if (Error || RecursionLevel >= MaxRecursionLevel) {
245 Error = true;
246 return false;
247 }
248 ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
249
250 switch (consume()) {
251 case 'C': {
252 parseOptionalBase62Number('s');
253 printIdentifier(parseIdentifier());
254 break;
255 }
256 case 'M': {
257 demangleImplPath(InType);
258 print("<");
259 demangleType();
260 print(">");
261 break;
262 }
263 case 'X': {
264 demangleImplPath(InType);
265 print("<");
266 demangleType();
267 print(" as ");
268 demanglePath(IsInType::Yes);
269 print(">");
270 break;
271 }
272 case 'Y': {
273 print("<");
274 demangleType();
275 print(" as ");
276 demanglePath(IsInType::Yes);
277 print(">");
278 break;
279 }
280 case 'N': {
281 char NS = consume();
282 if (!isLower(NS) && !isUpper(NS)) {
283 Error = true;
284 break;
285 }
286 demanglePath(InType);
287
288 uint64_t Disambiguator = parseOptionalBase62Number('s');
289 Identifier Ident = parseIdentifier();
290
291 if (isUpper(NS)) {
292 // Special namespaces
293 print("::{");
294 if (NS == 'C')
295 print("closure");
296 else if (NS == 'S')
297 print("shim");
298 else
299 print(NS);
300 if (!Ident.empty()) {
301 print(":");
302 printIdentifier(Ident);
303 }
304 print('#');
305 printDecimalNumber(Disambiguator);
306 print('}');
307 } else {
308 // Implementation internal namespaces.
309 if (!Ident.empty()) {
310 print("::");
311 printIdentifier(Ident);
312 }
313 }
314 break;
315 }
316 case 'I': {
317 demanglePath(InType);
318 // Omit "::" when in a type, where it is optional.
319 if (InType == IsInType::No)
320 print("::");
321 print("<");
322 for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
323 if (I > 0)
324 print(", ");
325 demangleGenericArg();
326 }
327 if (LeaveOpen == LeaveGenericsOpen::Yes)
328 return true;
329 else
330 print(">");
331 break;
332 }
333 case 'B': {
334 bool IsOpen = false;
335 demangleBackref([&] { IsOpen = demanglePath(InType, LeaveOpen); });
336 return IsOpen;
337 }
338 default:
339 Error = true;
340 break;
341 }
342
343 return false;
344}
345
346// <impl-path> = [<disambiguator>] <path>
347// <disambiguator> = "s" <base-62-number>
348void Demangler::demangleImplPath(IsInType InType) {
349 ScopedOverride<bool> SavePrint(Print, false);
350 parseOptionalBase62Number('s');
351 demanglePath(InType);
352}
353
354// <generic-arg> = <lifetime>
355// | <type>
356// | "K" <const>
357// <lifetime> = "L" <base-62-number>
358void Demangler::demangleGenericArg() {
359 if (consumeIf('L'))
360 printLifetime(parseBase62Number());
361 else if (consumeIf('K'))
362 demangleConst();
363 else
364 demangleType();
365}
366
367// <basic-type> = "a" // i8
368// | "b" // bool
369// | "c" // char
370// | "d" // f64
371// | "e" // str
372// | "f" // f32
373// | "h" // u8
374// | "i" // isize
375// | "j" // usize
376// | "l" // i32
377// | "m" // u32
378// | "n" // i128
379// | "o" // u128
380// | "s" // i16
381// | "t" // u16
382// | "u" // ()
383// | "v" // ...
384// | "x" // i64
385// | "y" // u64
386// | "z" // !
387// | "p" // placeholder (e.g. for generic params), shown as _
388static bool parseBasicType(char C, BasicType &Type) {
389 switch (C) {
390 case 'a':
391 Type = BasicType::I8;
392 return true;
393 case 'b':
394 Type = BasicType::Bool;
395 return true;
396 case 'c':
397 Type = BasicType::Char;
398 return true;
399 case 'd':
400 Type = BasicType::F64;
401 return true;
402 case 'e':
403 Type = BasicType::Str;
404 return true;
405 case 'f':
406 Type = BasicType::F32;
407 return true;
408 case 'h':
409 Type = BasicType::U8;
410 return true;
411 case 'i':
412 Type = BasicType::ISize;
413 return true;
414 case 'j':
415 Type = BasicType::USize;
416 return true;
417 case 'l':
418 Type = BasicType::I32;
419 return true;
420 case 'm':
421 Type = BasicType::U32;
422 return true;
423 case 'n':
424 Type = BasicType::I128;
425 return true;
426 case 'o':
427 Type = BasicType::U128;
428 return true;
429 case 'p':
430 Type = BasicType::Placeholder;
431 return true;
432 case 's':
433 Type = BasicType::I16;
434 return true;
435 case 't':
436 Type = BasicType::U16;
437 return true;
438 case 'u':
439 Type = BasicType::Unit;
440 return true;
441 case 'v':
442 Type = BasicType::Variadic;
443 return true;
444 case 'x':
445 Type = BasicType::I64;
446 return true;
447 case 'y':
448 Type = BasicType::U64;
449 return true;
450 case 'z':
451 Type = BasicType::Never;
452 return true;
453 default:
454 return false;
455 }
456}
457
458void Demangler::printBasicType(BasicType Type) {
459 switch (Type) {
460 case BasicType::Bool:
461 print("bool");
462 break;
463 case BasicType::Char:
464 print("char");
465 break;
466 case BasicType::I8:
467 print("i8");
468 break;
469 case BasicType::I16:
470 print("i16");
471 break;
472 case BasicType::I32:
473 print("i32");
474 break;
475 case BasicType::I64:
476 print("i64");
477 break;
478 case BasicType::I128:
479 print("i128");
480 break;
481 case BasicType::ISize:
482 print("isize");
483 break;
484 case BasicType::U8:
485 print("u8");
486 break;
487 case BasicType::U16:
488 print("u16");
489 break;
490 case BasicType::U32:
491 print("u32");
492 break;
493 case BasicType::U64:
494 print("u64");
495 break;
496 case BasicType::U128:
497 print("u128");
498 break;
499 case BasicType::USize:
500 print("usize");
501 break;
502 case BasicType::F32:
503 print("f32");
504 break;
505 case BasicType::F64:
506 print("f64");
507 break;
508 case BasicType::Str:
509 print("str");
510 break;
511 case BasicType::Placeholder:
512 print("_");
513 break;
514 case BasicType::Unit:
515 print("()");
516 break;
517 case BasicType::Variadic:
518 print("...");
519 break;
520 case BasicType::Never:
521 print("!");
522 break;
523 }
524}
525
526// <type> = | <basic-type>
527// | <path> // named type
528// | "A" <type> <const> // [T; N]
529// | "S" <type> // [T]
530// | "T" {<type>} "E" // (T1, T2, T3, ...)
531// | "R" [<lifetime>] <type> // &T
532// | "Q" [<lifetime>] <type> // &mut T
533// | "P" <type> // *const T
534// | "O" <type> // *mut T
535// | "F" <fn-sig> // fn(...) -> ...
536// | "D" <dyn-bounds> <lifetime> // dyn Trait<Assoc = X> + Send + 'a
537// | <backref> // backref
538void Demangler::demangleType() {
539 if (Error || RecursionLevel >= MaxRecursionLevel) {
540 Error = true;
541 return;
542 }
543 ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
544
545 size_t Start = Position;
546 char C = consume();
547 BasicType Type;
548 if (parseBasicType(C, Type))
549 return printBasicType(Type);
550
551 switch (C) {
552 case 'A':
553 print("[");
554 demangleType();
555 print("; ");
556 demangleConst();
557 print("]");
558 break;
559 case 'S':
560 print("[");
561 demangleType();
562 print("]");
563 break;
564 case 'T': {
565 print("(");
566 size_t I = 0;
567 for (; !Error && !consumeIf('E'); ++I) {
568 if (I > 0)
569 print(", ");
570 demangleType();
571 }
572 if (I == 1)
573 print(",");
574 print(")");
575 break;
576 }
577 case 'R':
578 case 'Q':
579 print('&');
580 if (consumeIf('L')) {
581 if (auto Lifetime = parseBase62Number()) {
582 printLifetime(Lifetime);
583 print(' ');
584 }
585 }
586 if (C == 'Q')
587 print("mut ");
588 demangleType();
589 break;
590 case 'P':
591 print("*const ");
592 demangleType();
593 break;
594 case 'O':
595 print("*mut ");
596 demangleType();
597 break;
598 case 'F':
599 demangleFnSig();
600 break;
601 case 'D':
602 demangleDynBounds();
603 if (consumeIf('L')) {
604 if (auto Lifetime = parseBase62Number()) {
605 print(" + ");
606 printLifetime(Lifetime);
607 }
608 } else {
609 Error = true;
610 }
611 break;
612 case 'B':
613 demangleBackref([&] { demangleType(); });
614 break;
615 default:
616 Position = Start;
617 demanglePath(IsInType::Yes);
618 break;
619 }
620}
621
622// <fn-sig> := [<binder>] ["U"] ["K" <abi>] {<type>} "E" <type>
623// <abi> = "C"
624// | <undisambiguated-identifier>
625void Demangler::demangleFnSig() {
626 ScopedOverride<size_t> SaveBoundLifetimes(BoundLifetimes, BoundLifetimes);
627 demangleOptionalBinder();
628
629 if (consumeIf('U'))
630 print("unsafe ");
631
632 if (consumeIf('K')) {
633 print("extern \"");
634 if (consumeIf('C')) {
635 print("C");
636 } else {
637 Identifier Ident = parseIdentifier();
638 if (Ident.Punycode)
639 Error = true;
640 for (char C : Ident.Name) {
641 // When mangling ABI string, the "-" is replaced with "_".
642 if (C == '_')
643 C = '-';
644 print(C);
645 }
646 }
647 print("\" ");
648 }
649
650 print("fn(");
651 for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
652 if (I > 0)
653 print(", ");
654 demangleType();
655 }
656 print(")");
657
658 if (consumeIf('u')) {
659 // Skip the unit type from the output.
660 } else {
661 print(" -> ");
662 demangleType();
663 }
664}
665
666// <dyn-bounds> = [<binder>] {<dyn-trait>} "E"
667void Demangler::demangleDynBounds() {
668 ScopedOverride<size_t> SaveBoundLifetimes(BoundLifetimes, BoundLifetimes);
669 print("dyn ");
670 demangleOptionalBinder();
671 for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
672 if (I > 0)
673 print(" + ");
674 demangleDynTrait();
675 }
676}
677
678// <dyn-trait> = <path> {<dyn-trait-assoc-binding>}
679// <dyn-trait-assoc-binding> = "p" <undisambiguated-identifier> <type>
680void Demangler::demangleDynTrait() {
681 bool IsOpen = demanglePath(IsInType::Yes, LeaveGenericsOpen::Yes);
682 while (!Error && consumeIf('p')) {
683 if (!IsOpen) {
684 IsOpen = true;
685 print('<');
686 } else {
687 print(", ");
688 }
689 print(parseIdentifier().Name);
690 print(" = ");
691 demangleType();
692 }
693 if (IsOpen)
694 print(">");
695}
696
697// Demangles optional binder and updates the number of bound lifetimes.
698//
699// <binder> = "G" <base-62-number>
700void Demangler::demangleOptionalBinder() {
701 uint64_t Binder = parseOptionalBase62Number('G');
702 if (Error || Binder == 0)
703 return;
704
705 // In valid inputs each bound lifetime is referenced later. Referencing a
706 // lifetime requires at least one byte of input. Reject inputs that are too
707 // short to reference all bound lifetimes. Otherwise demangling of invalid
708 // binders could generate excessive amounts of output.
709 if (Binder >= Input.size() - BoundLifetimes) {
710 Error = true;
711 return;
712 }
713
714 print("for<");
715 for (size_t I = 0; I != Binder; ++I) {
716 BoundLifetimes += 1;
717 if (I > 0)
718 print(", ");
719 printLifetime(1);
720 }
721 print("> ");
722}
723
724// <const> = <basic-type> <const-data>
725// | "p" // placeholder
726// | <backref>
727void Demangler::demangleConst() {
728 if (Error || RecursionLevel >= MaxRecursionLevel) {
729 Error = true;
730 return;
731 }
732 ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
733
734 char C = consume();
735 BasicType Type;
736 if (parseBasicType(C, Type)) {
737 switch (Type) {
738 case BasicType::I8:
739 case BasicType::I16:
740 case BasicType::I32:
741 case BasicType::I64:
742 case BasicType::I128:
743 case BasicType::ISize:
744 case BasicType::U8:
745 case BasicType::U16:
746 case BasicType::U32:
747 case BasicType::U64:
748 case BasicType::U128:
749 case BasicType::USize:
750 demangleConstInt();
751 break;
752 case BasicType::Bool:
753 demangleConstBool();
754 break;
755 case BasicType::Char:
756 demangleConstChar();
757 break;
758 case BasicType::Placeholder:
759 print('_');
760 break;
761 default:
762 Error = true;
763 break;
764 }
765 } else if (C == 'B') {
766 demangleBackref([&] { demangleConst(); });
767 } else {
768 Error = true;
769 }
770}
771
772// <const-data> = ["n"] <hex-number>
773void Demangler::demangleConstInt() {
774 if (consumeIf('n'))
775 print('-');
776
777 std::string_view HexDigits;
778 uint64_t Value = parseHexNumber(HexDigits);
779 if (HexDigits.size() <= 16) {
780 printDecimalNumber(Value);
781 } else {
782 print("0x");
783 print(HexDigits);
784 }
785}
786
787// <const-data> = "0_" // false
788// | "1_" // true
789void Demangler::demangleConstBool() {
790 std::string_view HexDigits;
791 parseHexNumber(HexDigits);
792 if (HexDigits == "0")
793 print("false");
794 else if (HexDigits == "1")
795 print("true");
796 else
797 Error = true;
798}
799
800/// Returns true if CodePoint represents a printable ASCII character.
801static bool isAsciiPrintable(uint64_t CodePoint) {
802 return 0x20 <= CodePoint && CodePoint <= 0x7e;
803}
804
805// <const-data> = <hex-number>
806void Demangler::demangleConstChar() {
807 std::string_view HexDigits;
808 uint64_t CodePoint = parseHexNumber(HexDigits);
809 if (Error || HexDigits.size() > 6) {
810 Error = true;
811 return;
812 }
813
814 print("'");
815 switch (CodePoint) {
816 case '\t':
817 print(R"(\t)");
818 break;
819 case '\r':
820 print(R"(\r)");
821 break;
822 case '\n':
823 print(R"(\n)");
824 break;
825 case '\\':
826 print(R"(\\)");
827 break;
828 case '"':
829 print(R"(")");
830 break;
831 case '\'':
832 print(R"(\')");
833 break;
834 default:
835 if (isAsciiPrintable(CodePoint)) {
836 char C = CodePoint;
837 print(C);
838 } else {
839 print(R"(\u{)");
840 print(HexDigits);
841 print('}');
842 }
843 break;
844 }
845 print('\'');
846}
847
848// <undisambiguated-identifier> = ["u"] <decimal-number> ["_"] <bytes>
849Identifier Demangler::parseIdentifier() {
850 bool Punycode = consumeIf('u');
851 uint64_t Bytes = parseDecimalNumber();
852
853 // Underscore resolves the ambiguity when identifier starts with a decimal
854 // digit or another underscore.
855 consumeIf('_');
856
857 if (Error || Bytes > Input.size() - Position) {
858 Error = true;
859 return {};
860 }
861 std::string_view S = Input.substr(Position, Bytes);
862 Position += Bytes;
863
864 if (!std::all_of(S.begin(), S.end(), isValid)) {
865 Error = true;
866 return {};
867 }
868
869 return {S, Punycode};
870}
871
872// Parses optional base 62 number. The presence of a number is determined using
873// Tag. Returns 0 when tag is absent and parsed value + 1 otherwise
874//
875// This function is indended for parsing disambiguators and binders which when
876// not present have their value interpreted as 0, and otherwise as decoded
877// value + 1. For example for binders, value for "G_" is 1, for "G0_" value is
878// 2. When "G" is absent value is 0.
879uint64_t Demangler::parseOptionalBase62Number(char Tag) {
880 if (!consumeIf(Tag))
881 return 0;
882
883 uint64_t N = parseBase62Number();
884 if (Error || !addAssign(N, 1))
885 return 0;
886
887 return N;
888}
889
890// Parses base 62 number with <0-9a-zA-Z> as digits. Number is terminated by
891// "_". All values are offset by 1, so that "_" encodes 0, "0_" encodes 1,
892// "1_" encodes 2, etc.
893//
894// <base-62-number> = {<0-9a-zA-Z>} "_"
895uint64_t Demangler::parseBase62Number() {
896 if (consumeIf('_'))
897 return 0;
898
899 uint64_t Value = 0;
900
901 while (true) {
902 uint64_t Digit;
903 char C = consume();
904
905 if (C == '_') {
906 break;
907 } else if (isDigit(C)) {
908 Digit = C - '0';
909 } else if (isLower(C)) {
910 Digit = 10 + (C - 'a');
911 } else if (isUpper(C)) {
912 Digit = 10 + 26 + (C - 'A');
913 } else {
914 Error = true;
915 return 0;
916 }
917
918 if (!mulAssign(Value, 62))
919 return 0;
920
921 if (!addAssign(Value, Digit))
922 return 0;
923 }
924
925 if (!addAssign(Value, 1))
926 return 0;
927
928 return Value;
929}
930
931// Parses a decimal number that had been encoded without any leading zeros.
932//
933// <decimal-number> = "0"
934// | <1-9> {<0-9>}
935uint64_t Demangler::parseDecimalNumber() {
936 char C = look();
937 if (!isDigit(C)) {
938 Error = true;
939 return 0;
940 }
941
942 if (C == '0') {
943 consume();
944 return 0;
945 }
946
947 uint64_t Value = 0;
948
949 while (isDigit(look())) {
950 if (!mulAssign(Value, 10)) {
951 Error = true;
952 return 0;
953 }
954
955 uint64_t D = consume() - '0';
956 if (!addAssign(Value, D))
957 return 0;
958 }
959
960 return Value;
961}
962
963// Parses a hexadecimal number with <0-9a-f> as a digits. Returns the parsed
964// value and stores hex digits in HexDigits. The return value is unspecified if
965// HexDigits.size() > 16.
966//
967// <hex-number> = "0_"
968// | <1-9a-f> {<0-9a-f>} "_"
969uint64_t Demangler::parseHexNumber(std::string_view &HexDigits) {
970 size_t Start = Position;
971 uint64_t Value = 0;
972
973 if (!isHexDigit(look()))
974 Error = true;
975
976 if (consumeIf('0')) {
977 if (!consumeIf('_'))
978 Error = true;
979 } else {
980 while (!Error && !consumeIf('_')) {
981 char C = consume();
982 Value *= 16;
983 if (isDigit(C))
984 Value += C - '0';
985 else if ('a' <= C && C <= 'f')
986 Value += 10 + (C - 'a');
987 else
988 Error = true;
989 }
990 }
991
992 if (Error) {
993 HexDigits = std::string_view();
994 return 0;
995 }
996
997 size_t End = Position - 1;
998 assert(Start < End);
999 HexDigits = Input.substr(Start, End - Start);
1000 return Value;
1001}
1002
1003void Demangler::print(char C) {
1004 if (Error || !Print)
1005 return;
1006
1007 Output += C;
1008}
1009
1010void Demangler::print(std::string_view S) {
1011 if (Error || !Print)
1012 return;
1013
1014 Output += S;
1015}
1016
1017void Demangler::printDecimalNumber(uint64_t N) {
1018 if (Error || !Print)
1019 return;
1020
1021 Output << N;
1022}
1023
1024// Prints a lifetime. An index 0 always represents an erased lifetime. Indices
1025// starting from 1, are De Bruijn indices, referring to higher-ranked lifetimes
1026// bound by one of the enclosing binders.
1027void Demangler::printLifetime(uint64_t Index) {
1028 if (Index == 0) {
1029 print("'_");
1030 return;
1031 }
1032
1033 if (Index - 1 >= BoundLifetimes) {
1034 Error = true;
1035 return;
1036 }
1037
1038 uint64_t Depth = BoundLifetimes - Index;
1039 print('\'');
1040 if (Depth < 26) {
1041 char C = 'a' + Depth;
1042 print(C);
1043 } else {
1044 print('z');
1045 printDecimalNumber(Depth - 26 + 1);
1046 }
1047}
1048
1049static inline bool decodePunycodeDigit(char C, size_t &Value) {
1050 if (isLower(C)) {
1051 Value = C - 'a';
1052 return true;
1053 }
1054
1055 if (isDigit(C)) {
1056 Value = 26 + (C - '0');
1057 return true;
1058 }
1059
1060 return false;
1061}
1062
1063static void removeNullBytes(OutputBuffer &Output, size_t StartIdx) {
1064 char *Buffer = Output.getBuffer();
1065 char *Start = Buffer + StartIdx;
1066 char *End = Buffer + Output.getCurrentPosition();
1067 Output.setCurrentPosition(std::remove(Start, End, '\0') - Buffer);
1068}
1069
1070// Encodes code point as UTF-8 and stores results in Output. Returns false if
1071// CodePoint is not a valid unicode scalar value.
1072static inline bool encodeUTF8(size_t CodePoint, char *Output) {
1073 if (0xD800 <= CodePoint && CodePoint <= 0xDFFF)
1074 return false;
1075
1076 if (CodePoint <= 0x7F) {
1077 Output[0] = CodePoint;
1078 return true;
1079 }
1080
1081 if (CodePoint <= 0x7FF) {
1082 Output[0] = 0xC0 | ((CodePoint >> 6) & 0x3F);
1083 Output[1] = 0x80 | (CodePoint & 0x3F);
1084 return true;
1085 }
1086
1087 if (CodePoint <= 0xFFFF) {
1088 Output[0] = 0xE0 | (CodePoint >> 12);
1089 Output[1] = 0x80 | ((CodePoint >> 6) & 0x3F);
1090 Output[2] = 0x80 | (CodePoint & 0x3F);
1091 return true;
1092 }
1093
1094 if (CodePoint <= 0x10FFFF) {
1095 Output[0] = 0xF0 | (CodePoint >> 18);
1096 Output[1] = 0x80 | ((CodePoint >> 12) & 0x3F);
1097 Output[2] = 0x80 | ((CodePoint >> 6) & 0x3F);
1098 Output[3] = 0x80 | (CodePoint & 0x3F);
1099 return true;
1100 }
1101
1102 return false;
1103}
1104
1105// Decodes string encoded using punycode and appends results to Output.
1106// Returns true if decoding was successful.
1107static bool decodePunycode(std::string_view Input, OutputBuffer &Output) {
1108 size_t OutputSize = Output.getCurrentPosition();
1109 size_t InputIdx = 0;
1110
1111 // Rust uses an underscore as a delimiter.
1112 size_t DelimiterPos = std::string_view::npos;
1113 for (size_t I = 0; I != Input.size(); ++I)
1114 if (Input[I] == '_')
1115 DelimiterPos = I;
1116
1117 if (DelimiterPos != std::string_view::npos) {
1118 // Copy basic code points before the last delimiter to the output.
1119 for (; InputIdx != DelimiterPos; ++InputIdx) {
1120 char C = Input[InputIdx];
1121 if (!isValid(C))
1122 return false;
1123 // Code points are padded with zeros while decoding is in progress.
1124 char UTF8[4] = {C};
1125 Output += std::string_view(UTF8, 4);
1126 }
1127 // Skip over the delimiter.
1128 ++InputIdx;
1129 }
1130
1131 size_t Base = 36;
1132 size_t Skew = 38;
1133 size_t Bias = 72;
1134 size_t N = 0x80;
1135 size_t TMin = 1;
1136 size_t TMax = 26;
1137 size_t Damp = 700;
1138
1139 auto Adapt = [&](size_t Delta, size_t NumPoints) {
1140 Delta /= Damp;
1141 Delta += Delta / NumPoints;
1142 Damp = 2;
1143
1144 size_t K = 0;
1145 while (Delta > (Base - TMin) * TMax / 2) {
1146 Delta /= Base - TMin;
1147 K += Base;
1148 }
1149 return K + (((Base - TMin + 1) * Delta) / (Delta + Skew));
1150 };
1151
1152 // Main decoding loop.
1153 for (size_t I = 0; InputIdx != Input.size(); I += 1) {
1154 size_t OldI = I;
1155 size_t W = 1;
1156 size_t Max = std::numeric_limits<size_t>::max();
1157 for (size_t K = Base; true; K += Base) {
1158 if (InputIdx == Input.size())
1159 return false;
1160 char C = Input[InputIdx++];
1161 size_t Digit = 0;
1162 if (!decodePunycodeDigit(C, Digit))
1163 return false;
1164
1165 if (Digit > (Max - I) / W)
1166 return false;
1167 I += Digit * W;
1168
1169 size_t T;
1170 if (K <= Bias)
1171 T = TMin;
1172 else if (K >= Bias + TMax)
1173 T = TMax;
1174 else
1175 T = K - Bias;
1176
1177 if (Digit < T)
1178 break;
1179
1180 if (W > Max / (Base - T))
1181 return false;
1182 W *= (Base - T);
1183 }
1184 size_t NumPoints = (Output.getCurrentPosition() - OutputSize) / 4 + 1;
1185 Bias = Adapt(I - OldI, NumPoints);
1186
1187 if (I / NumPoints > Max - N)
1188 return false;
1189 N += I / NumPoints;
1190 I = I % NumPoints;
1191
1192 // Insert N at position I in the output.
1193 char UTF8[4] = {};
1194 if (!encodeUTF8(N, UTF8))
1195 return false;
1196 Output.insert(OutputSize + I * 4, UTF8, 4);
1197 }
1198
1199 removeNullBytes(Output, OutputSize);
1200 return true;
1201}
1202
1203void Demangler::printIdentifier(Identifier Ident) {
1204 if (Error || !Print)
1205 return;
1206
1207 if (Ident.Punycode) {
1208 if (!decodePunycode(Ident.Name, Output))
1209 Error = true;
1210 } else {
1211 print(Ident.Name);
1212 }
1213}
1214
1215char Demangler::look() const {
1216 if (Error || Position >= Input.size())
1217 return 0;
1218
1219 return Input[Position];
1220}
1221
1222char Demangler::consume() {
1223 if (Error || Position >= Input.size()) {
1224 Error = true;
1225 return 0;
1226 }
1227
1228 return Input[Position++];
1229}
1230
1231bool Demangler::consumeIf(char Prefix) {
1232 if (Error || Position >= Input.size() || Input[Position] != Prefix)
1233 return false;
1234
1235 Position += 1;
1236 return true;
1237}
1238
1239/// Computes A + B. When computation wraps around sets the error and returns
1240/// false. Otherwise assigns the result to A and returns true.
1241bool Demangler::addAssign(uint64_t &A, uint64_t B) {
1242 if (A > std::numeric_limits<uint64_t>::max() - B) {
1243 Error = true;
1244 return false;
1245 }
1246
1247 A += B;
1248 return true;
1249}
1250
1251/// Computes A * B. When computation wraps around sets the error and returns
1252/// false. Otherwise assigns the result to A and returns true.
1253bool Demangler::mulAssign(uint64_t &A, uint64_t B) {
1254 if (B != 0 && A > std::numeric_limits<uint64_t>::max() / B) {
1255 Error = true;
1256 return false;
1257 }
1258
1259 A *= B;
1260 return true;
1261}
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
std::string Name
bool End
Definition: ELF_riscv.cpp:464
itanium_demangle::ManglingParser< DefaultAllocator > Demangler
#define I(x, y, z)
Definition: MD5.cpp:58
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
static bool isDigit(const char C)
static bool isAsciiPrintable(uint64_t CodePoint)
Returns true if CodePoint represents a printable ASCII character.
static bool isHexDigit(const char C)
static bool isLower(const char C)
static bool parseBasicType(char C, BasicType &Type)
static bool isUpper(const char C)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static void encodeUTF8(uint32_t UnicodeScalarValue, SmallVectorImpl< char > &Result)
encodeUTF8 - Encode UnicodeScalarValue in UTF-8 and append it to result.
Definition: YAMLParser.cpp:572
char * getBuffer()
Definition: Utility.h:182
void setCurrentPosition(size_t NewPos)
Definition: Utility.h:173
size_t getCurrentPosition() const
Definition: Utility.h:172
void insert(size_t Pos, const char *S, size_t N)
Definition: Utility.h:162
Lightweight error class with error context and mandatory checking.
Definition: Error.h:156
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
LLVM Value Representation.
Definition: Value.h:74
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Print(const T &, const DataFlowGraph &) -> Print< T >
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
std::string demangle(const std::string &MangledName)
Attempt to demangle a string using different demangling schemes.
Definition: Demangle.cpp:29
char * rustDemangle(const char *MangledName)
unsigned char UTF8
Definition: ConvertUTF.h:130
#define N