LLVM  16.0.0git
RustDemangle.cpp
Go to the documentation of this file.
1 //===--- RustDemangle.cpp ---------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines a demangler for Rust v0 mangled symbols as specified in
10 // https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/Demangle/Demangle.h"
16 #include "llvm/Demangle/Utility.h"
17 
18 #include <algorithm>
19 #include <cassert>
20 #include <cstdint>
21 #include <cstring>
22 #include <limits>
23 
24 using namespace llvm;
25 
26 using llvm::itanium_demangle::OutputBuffer;
27 using llvm::itanium_demangle::ScopedOverride;
28 using llvm::itanium_demangle::StringView;
29 
30 namespace {
31 
32 struct Identifier {
33  StringView Name;
34  bool Punycode;
35 
36  bool empty() const { return Name.empty(); }
37 };
38 
39 enum class BasicType {
40  Bool,
41  Char,
42  I8,
43  I16,
44  I32,
45  I64,
46  I128,
47  ISize,
48  U8,
49  U16,
50  U32,
51  U64,
52  U128,
53  USize,
54  F32,
55  F64,
56  Str,
57  Placeholder,
58  Unit,
59  Variadic,
60  Never,
61 };
62 
63 enum class IsInType {
64  No,
65  Yes,
66 };
67 
68 enum class LeaveGenericsOpen {
69  No,
70  Yes,
71 };
72 
73 class Demangler {
74  // Maximum recursion level. Used to avoid stack overflow.
75  size_t MaxRecursionLevel;
76  // Current recursion level.
77  size_t RecursionLevel;
78  size_t BoundLifetimes;
79  // Input string that is being demangled with "_R" prefix removed.
80  StringView Input;
81  // Position in the input string.
82  size_t Position;
83  // When true, print methods append the output to the stream.
84  // When false, the output is suppressed.
85  bool Print;
86  // True if an error occurred.
87  bool Error;
88 
89 public:
90  // Demangled output.
91  OutputBuffer Output;
92 
93  Demangler(size_t MaxRecursionLevel = 500);
94 
95  bool demangle(StringView MangledName);
96 
97 private:
98  bool demanglePath(IsInType Type,
99  LeaveGenericsOpen LeaveOpen = LeaveGenericsOpen::No);
100  void demangleImplPath(IsInType InType);
101  void demangleGenericArg();
102  void demangleType();
103  void demangleFnSig();
104  void demangleDynBounds();
105  void demangleDynTrait();
106  void demangleOptionalBinder();
107  void demangleConst();
108  void demangleConstInt();
109  void demangleConstBool();
110  void demangleConstChar();
111 
112  template <typename Callable> void demangleBackref(Callable Demangler) {
113  uint64_t Backref = parseBase62Number();
114  if (Error || Backref >= Position) {
115  Error = true;
116  return;
117  }
118 
119  if (!Print)
120  return;
121 
122  ScopedOverride<size_t> SavePosition(Position, Position);
123  Position = Backref;
124  Demangler();
125  }
126 
127  Identifier parseIdentifier();
128  uint64_t parseOptionalBase62Number(char Tag);
129  uint64_t parseBase62Number();
130  uint64_t parseDecimalNumber();
131  uint64_t parseHexNumber(StringView &HexDigits);
132 
133  void print(char C);
134  void print(StringView S);
135  void printDecimalNumber(uint64_t N);
136  void printBasicType(BasicType);
137  void printLifetime(uint64_t Index);
138  void printIdentifier(Identifier Ident);
139 
140  char look() const;
141  char consume();
142  bool consumeIf(char Prefix);
143 
144  bool addAssign(uint64_t &A, uint64_t B);
145  bool mulAssign(uint64_t &A, uint64_t B);
146 };
147 
148 } // namespace
149 
150 char *llvm::rustDemangle(const char *MangledName) {
151  if (MangledName == nullptr)
152  return nullptr;
153 
154  // Return early if mangled name doesn't look like a Rust symbol.
155  StringView Mangled(MangledName);
156  if (!Mangled.startsWith("_R"))
157  return nullptr;
158 
159  Demangler D;
160  if (!initializeOutputBuffer(nullptr, nullptr, D.Output, 1024))
161  return nullptr;
162 
163  if (!D.demangle(Mangled)) {
164  std::free(D.Output.getBuffer());
165  return nullptr;
166  }
167 
168  D.Output += '\0';
169 
170  return D.Output.getBuffer();
171 }
172 
173 Demangler::Demangler(size_t MaxRecursionLevel)
174  : MaxRecursionLevel(MaxRecursionLevel) {}
175 
176 static inline bool isDigit(const char C) { return '0' <= C && C <= '9'; }
177 
178 static inline bool isHexDigit(const char C) {
179  return ('0' <= C && C <= '9') || ('a' <= C && C <= 'f');
180 }
181 
182 static inline bool isLower(const char C) { return 'a' <= C && C <= 'z'; }
183 
184 static inline bool isUpper(const char C) { return 'A' <= C && C <= 'Z'; }
185 
186 /// Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
187 static inline bool isValid(const char C) {
188  return isDigit(C) || isLower(C) || isUpper(C) || C == '_';
189 }
190 
191 // Demangles Rust v0 mangled symbol. Returns true when successful, and false
192 // otherwise. The demangled symbol is stored in Output field. It is
193 // responsibility of the caller to free the memory behind the output stream.
194 //
195 // <symbol-name> = "_R" <path> [<instantiating-crate>]
196 bool Demangler::demangle(StringView Mangled) {
197  Position = 0;
198  Error = false;
199  Print = true;
200  RecursionLevel = 0;
201  BoundLifetimes = 0;
202 
203  if (!Mangled.consumeFront("_R")) {
204  Error = true;
205  return false;
206  }
207  size_t Dot = Mangled.find('.');
208  Input = Mangled.substr(0, Dot);
209  StringView Suffix = Mangled.dropFront(Dot);
210 
211  demanglePath(IsInType::No);
212 
213  if (Position != Input.size()) {
214  ScopedOverride<bool> SavePrint(Print, false);
215  demanglePath(IsInType::No);
216  }
217 
218  if (Position != Input.size())
219  Error = true;
220 
221  if (!Suffix.empty()) {
222  print(" (");
223  print(Suffix);
224  print(")");
225  }
226 
227  return !Error;
228 }
229 
230 // Demangles a path. InType indicates whether a path is inside a type. When
231 // LeaveOpen is true, a closing `>` after generic arguments is omitted from the
232 // output. Return value indicates whether generics arguments have been left
233 // open.
234 //
235 // <path> = "C" <identifier> // crate root
236 // | "M" <impl-path> <type> // <T> (inherent impl)
237 // | "X" <impl-path> <type> <path> // <T as Trait> (trait impl)
238 // | "Y" <type> <path> // <T as Trait> (trait definition)
239 // | "N" <ns> <path> <identifier> // ...::ident (nested path)
240 // | "I" <path> {<generic-arg>} "E" // ...<T, U> (generic args)
241 // | <backref>
242 // <identifier> = [<disambiguator>] <undisambiguated-identifier>
243 // <ns> = "C" // closure
244 // | "S" // shim
245 // | <A-Z> // other special namespaces
246 // | <a-z> // internal namespaces
247 bool Demangler::demanglePath(IsInType InType, LeaveGenericsOpen LeaveOpen) {
248  if (Error || RecursionLevel >= MaxRecursionLevel) {
249  Error = true;
250  return false;
251  }
252  ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
253 
254  switch (consume()) {
255  case 'C': {
256  parseOptionalBase62Number('s');
257  printIdentifier(parseIdentifier());
258  break;
259  }
260  case 'M': {
261  demangleImplPath(InType);
262  print("<");
263  demangleType();
264  print(">");
265  break;
266  }
267  case 'X': {
268  demangleImplPath(InType);
269  print("<");
270  demangleType();
271  print(" as ");
272  demanglePath(IsInType::Yes);
273  print(">");
274  break;
275  }
276  case 'Y': {
277  print("<");
278  demangleType();
279  print(" as ");
280  demanglePath(IsInType::Yes);
281  print(">");
282  break;
283  }
284  case 'N': {
285  char NS = consume();
286  if (!isLower(NS) && !isUpper(NS)) {
287  Error = true;
288  break;
289  }
290  demanglePath(InType);
291 
292  uint64_t Disambiguator = parseOptionalBase62Number('s');
293  Identifier Ident = parseIdentifier();
294 
295  if (isUpper(NS)) {
296  // Special namespaces
297  print("::{");
298  if (NS == 'C')
299  print("closure");
300  else if (NS == 'S')
301  print("shim");
302  else
303  print(NS);
304  if (!Ident.empty()) {
305  print(":");
306  printIdentifier(Ident);
307  }
308  print('#');
309  printDecimalNumber(Disambiguator);
310  print('}');
311  } else {
312  // Implementation internal namespaces.
313  if (!Ident.empty()) {
314  print("::");
315  printIdentifier(Ident);
316  }
317  }
318  break;
319  }
320  case 'I': {
321  demanglePath(InType);
322  // Omit "::" when in a type, where it is optional.
323  if (InType == IsInType::No)
324  print("::");
325  print("<");
326  for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
327  if (I > 0)
328  print(", ");
329  demangleGenericArg();
330  }
331  if (LeaveOpen == LeaveGenericsOpen::Yes)
332  return true;
333  else
334  print(">");
335  break;
336  }
337  case 'B': {
338  bool IsOpen = false;
339  demangleBackref([&] { IsOpen = demanglePath(InType, LeaveOpen); });
340  return IsOpen;
341  }
342  default:
343  Error = true;
344  break;
345  }
346 
347  return false;
348 }
349 
350 // <impl-path> = [<disambiguator>] <path>
351 // <disambiguator> = "s" <base-62-number>
352 void Demangler::demangleImplPath(IsInType InType) {
353  ScopedOverride<bool> SavePrint(Print, false);
354  parseOptionalBase62Number('s');
355  demanglePath(InType);
356 }
357 
358 // <generic-arg> = <lifetime>
359 // | <type>
360 // | "K" <const>
361 // <lifetime> = "L" <base-62-number>
362 void Demangler::demangleGenericArg() {
363  if (consumeIf('L'))
364  printLifetime(parseBase62Number());
365  else if (consumeIf('K'))
366  demangleConst();
367  else
368  demangleType();
369 }
370 
371 // <basic-type> = "a" // i8
372 // | "b" // bool
373 // | "c" // char
374 // | "d" // f64
375 // | "e" // str
376 // | "f" // f32
377 // | "h" // u8
378 // | "i" // isize
379 // | "j" // usize
380 // | "l" // i32
381 // | "m" // u32
382 // | "n" // i128
383 // | "o" // u128
384 // | "s" // i16
385 // | "t" // u16
386 // | "u" // ()
387 // | "v" // ...
388 // | "x" // i64
389 // | "y" // u64
390 // | "z" // !
391 // | "p" // placeholder (e.g. for generic params), shown as _
392 static bool parseBasicType(char C, BasicType &Type) {
393  switch (C) {
394  case 'a':
395  Type = BasicType::I8;
396  return true;
397  case 'b':
399  return true;
400  case 'c':
401  Type = BasicType::Char;
402  return true;
403  case 'd':
404  Type = BasicType::F64;
405  return true;
406  case 'e':
407  Type = BasicType::Str;
408  return true;
409  case 'f':
410  Type = BasicType::F32;
411  return true;
412  case 'h':
413  Type = BasicType::U8;
414  return true;
415  case 'i':
416  Type = BasicType::ISize;
417  return true;
418  case 'j':
419  Type = BasicType::USize;
420  return true;
421  case 'l':
422  Type = BasicType::I32;
423  return true;
424  case 'm':
425  Type = BasicType::U32;
426  return true;
427  case 'n':
428  Type = BasicType::I128;
429  return true;
430  case 'o':
431  Type = BasicType::U128;
432  return true;
433  case 'p':
434  Type = BasicType::Placeholder;
435  return true;
436  case 's':
437  Type = BasicType::I16;
438  return true;
439  case 't':
440  Type = BasicType::U16;
441  return true;
442  case 'u':
443  Type = BasicType::Unit;
444  return true;
445  case 'v':
447  return true;
448  case 'x':
449  Type = BasicType::I64;
450  return true;
451  case 'y':
453  return true;
454  case 'z':
455  Type = BasicType::Never;
456  return true;
457  default:
458  return false;
459  }
460 }
461 
462 void Demangler::printBasicType(BasicType Type) {
463  switch (Type) {
464  case BasicType::Bool:
465  print("bool");
466  break;
467  case BasicType::Char:
468  print("char");
469  break;
470  case BasicType::I8:
471  print("i8");
472  break;
473  case BasicType::I16:
474  print("i16");
475  break;
476  case BasicType::I32:
477  print("i32");
478  break;
479  case BasicType::I64:
480  print("i64");
481  break;
482  case BasicType::I128:
483  print("i128");
484  break;
485  case BasicType::ISize:
486  print("isize");
487  break;
488  case BasicType::U8:
489  print("u8");
490  break;
491  case BasicType::U16:
492  print("u16");
493  break;
494  case BasicType::U32:
495  print("u32");
496  break;
497  case BasicType::U64:
498  print("u64");
499  break;
500  case BasicType::U128:
501  print("u128");
502  break;
503  case BasicType::USize:
504  print("usize");
505  break;
506  case BasicType::F32:
507  print("f32");
508  break;
509  case BasicType::F64:
510  print("f64");
511  break;
512  case BasicType::Str:
513  print("str");
514  break;
515  case BasicType::Placeholder:
516  print("_");
517  break;
518  case BasicType::Unit:
519  print("()");
520  break;
521  case BasicType::Variadic:
522  print("...");
523  break;
524  case BasicType::Never:
525  print("!");
526  break;
527  }
528 }
529 
530 // <type> = | <basic-type>
531 // | <path> // named type
532 // | "A" <type> <const> // [T; N]
533 // | "S" <type> // [T]
534 // | "T" {<type>} "E" // (T1, T2, T3, ...)
535 // | "R" [<lifetime>] <type> // &T
536 // | "Q" [<lifetime>] <type> // &mut T
537 // | "P" <type> // *const T
538 // | "O" <type> // *mut T
539 // | "F" <fn-sig> // fn(...) -> ...
540 // | "D" <dyn-bounds> <lifetime> // dyn Trait<Assoc = X> + Send + 'a
541 // | <backref> // backref
542 void Demangler::demangleType() {
543  if (Error || RecursionLevel >= MaxRecursionLevel) {
544  Error = true;
545  return;
546  }
547  ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
548 
549  size_t Start = Position;
550  char C = consume();
551  BasicType Type;
552  if (parseBasicType(C, Type))
553  return printBasicType(Type);
554 
555  switch (C) {
556  case 'A':
557  print("[");
558  demangleType();
559  print("; ");
560  demangleConst();
561  print("]");
562  break;
563  case 'S':
564  print("[");
565  demangleType();
566  print("]");
567  break;
568  case 'T': {
569  print("(");
570  size_t I = 0;
571  for (; !Error && !consumeIf('E'); ++I) {
572  if (I > 0)
573  print(", ");
574  demangleType();
575  }
576  if (I == 1)
577  print(",");
578  print(")");
579  break;
580  }
581  case 'R':
582  case 'Q':
583  print('&');
584  if (consumeIf('L')) {
585  if (auto Lifetime = parseBase62Number()) {
586  printLifetime(Lifetime);
587  print(' ');
588  }
589  }
590  if (C == 'Q')
591  print("mut ");
592  demangleType();
593  break;
594  case 'P':
595  print("*const ");
596  demangleType();
597  break;
598  case 'O':
599  print("*mut ");
600  demangleType();
601  break;
602  case 'F':
603  demangleFnSig();
604  break;
605  case 'D':
606  demangleDynBounds();
607  if (consumeIf('L')) {
608  if (auto Lifetime = parseBase62Number()) {
609  print(" + ");
610  printLifetime(Lifetime);
611  }
612  } else {
613  Error = true;
614  }
615  break;
616  case 'B':
617  demangleBackref([&] { demangleType(); });
618  break;
619  default:
620  Position = Start;
621  demanglePath(IsInType::Yes);
622  break;
623  }
624 }
625 
626 // <fn-sig> := [<binder>] ["U"] ["K" <abi>] {<type>} "E" <type>
627 // <abi> = "C"
628 // | <undisambiguated-identifier>
629 void Demangler::demangleFnSig() {
630  ScopedOverride<size_t> SaveBoundLifetimes(BoundLifetimes, BoundLifetimes);
631  demangleOptionalBinder();
632 
633  if (consumeIf('U'))
634  print("unsafe ");
635 
636  if (consumeIf('K')) {
637  print("extern \"");
638  if (consumeIf('C')) {
639  print("C");
640  } else {
641  Identifier Ident = parseIdentifier();
642  if (Ident.Punycode)
643  Error = true;
644  for (char C : Ident.Name) {
645  // When mangling ABI string, the "-" is replaced with "_".
646  if (C == '_')
647  C = '-';
648  print(C);
649  }
650  }
651  print("\" ");
652  }
653 
654  print("fn(");
655  for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
656  if (I > 0)
657  print(", ");
658  demangleType();
659  }
660  print(")");
661 
662  if (consumeIf('u')) {
663  // Skip the unit type from the output.
664  } else {
665  print(" -> ");
666  demangleType();
667  }
668 }
669 
670 // <dyn-bounds> = [<binder>] {<dyn-trait>} "E"
671 void Demangler::demangleDynBounds() {
672  ScopedOverride<size_t> SaveBoundLifetimes(BoundLifetimes, BoundLifetimes);
673  print("dyn ");
674  demangleOptionalBinder();
675  for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
676  if (I > 0)
677  print(" + ");
678  demangleDynTrait();
679  }
680 }
681 
682 // <dyn-trait> = <path> {<dyn-trait-assoc-binding>}
683 // <dyn-trait-assoc-binding> = "p" <undisambiguated-identifier> <type>
684 void Demangler::demangleDynTrait() {
685  bool IsOpen = demanglePath(IsInType::Yes, LeaveGenericsOpen::Yes);
686  while (!Error && consumeIf('p')) {
687  if (!IsOpen) {
688  IsOpen = true;
689  print('<');
690  } else {
691  print(", ");
692  }
693  print(parseIdentifier().Name);
694  print(" = ");
695  demangleType();
696  }
697  if (IsOpen)
698  print(">");
699 }
700 
701 // Demangles optional binder and updates the number of bound lifetimes.
702 //
703 // <binder> = "G" <base-62-number>
704 void Demangler::demangleOptionalBinder() {
705  uint64_t Binder = parseOptionalBase62Number('G');
706  if (Error || Binder == 0)
707  return;
708 
709  // In valid inputs each bound lifetime is referenced later. Referencing a
710  // lifetime requires at least one byte of input. Reject inputs that are too
711  // short to reference all bound lifetimes. Otherwise demangling of invalid
712  // binders could generate excessive amounts of output.
713  if (Binder >= Input.size() - BoundLifetimes) {
714  Error = true;
715  return;
716  }
717 
718  print("for<");
719  for (size_t I = 0; I != Binder; ++I) {
720  BoundLifetimes += 1;
721  if (I > 0)
722  print(", ");
723  printLifetime(1);
724  }
725  print("> ");
726 }
727 
728 // <const> = <basic-type> <const-data>
729 // | "p" // placeholder
730 // | <backref>
731 void Demangler::demangleConst() {
732  if (Error || RecursionLevel >= MaxRecursionLevel) {
733  Error = true;
734  return;
735  }
736  ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
737 
738  char C = consume();
739  BasicType Type;
740  if (parseBasicType(C, Type)) {
741  switch (Type) {
742  case BasicType::I8:
743  case BasicType::I16:
744  case BasicType::I32:
745  case BasicType::I64:
746  case BasicType::I128:
747  case BasicType::ISize:
748  case BasicType::U8:
749  case BasicType::U16:
750  case BasicType::U32:
751  case BasicType::U64:
752  case BasicType::U128:
753  case BasicType::USize:
754  demangleConstInt();
755  break;
756  case BasicType::Bool:
757  demangleConstBool();
758  break;
759  case BasicType::Char:
760  demangleConstChar();
761  break;
762  case BasicType::Placeholder:
763  print('_');
764  break;
765  default:
766  Error = true;
767  break;
768  }
769  } else if (C == 'B') {
770  demangleBackref([&] { demangleConst(); });
771  } else {
772  Error = true;
773  }
774 }
775 
776 // <const-data> = ["n"] <hex-number>
777 void Demangler::demangleConstInt() {
778  if (consumeIf('n'))
779  print('-');
780 
781  StringView HexDigits;
782  uint64_t Value = parseHexNumber(HexDigits);
783  if (HexDigits.size() <= 16) {
784  printDecimalNumber(Value);
785  } else {
786  print("0x");
787  print(HexDigits);
788  }
789 }
790 
791 // <const-data> = "0_" // false
792 // | "1_" // true
793 void Demangler::demangleConstBool() {
794  StringView HexDigits;
795  parseHexNumber(HexDigits);
796  if (HexDigits == "0")
797  print("false");
798  else if (HexDigits == "1")
799  print("true");
800  else
801  Error = true;
802 }
803 
804 /// Returns true if CodePoint represents a printable ASCII character.
805 static bool isAsciiPrintable(uint64_t CodePoint) {
806  return 0x20 <= CodePoint && CodePoint <= 0x7e;
807 }
808 
809 // <const-data> = <hex-number>
810 void Demangler::demangleConstChar() {
811  StringView HexDigits;
812  uint64_t CodePoint = parseHexNumber(HexDigits);
813  if (Error || HexDigits.size() > 6) {
814  Error = true;
815  return;
816  }
817 
818  print("'");
819  switch (CodePoint) {
820  case '\t':
821  print(R"(\t)");
822  break;
823  case '\r':
824  print(R"(\r)");
825  break;
826  case '\n':
827  print(R"(\n)");
828  break;
829  case '\\':
830  print(R"(\\)");
831  break;
832  case '"':
833  print(R"(")");
834  break;
835  case '\'':
836  print(R"(\')");
837  break;
838  default:
839  if (isAsciiPrintable(CodePoint)) {
840  char C = CodePoint;
841  print(C);
842  } else {
843  print(R"(\u{)");
844  print(HexDigits);
845  print('}');
846  }
847  break;
848  }
849  print('\'');
850 }
851 
852 // <undisambiguated-identifier> = ["u"] <decimal-number> ["_"] <bytes>
853 Identifier Demangler::parseIdentifier() {
854  bool Punycode = consumeIf('u');
855  uint64_t Bytes = parseDecimalNumber();
856 
857  // Underscore resolves the ambiguity when identifier starts with a decimal
858  // digit or another underscore.
859  consumeIf('_');
860 
861  if (Error || Bytes > Input.size() - Position) {
862  Error = true;
863  return {};
864  }
865  StringView S = Input.substr(Position, Bytes);
866  Position += Bytes;
867 
868  if (!std::all_of(S.begin(), S.end(), isValid)) {
869  Error = true;
870  return {};
871  }
872 
873  return {S, Punycode};
874 }
875 
876 // Parses optional base 62 number. The presence of a number is determined using
877 // Tag. Returns 0 when tag is absent and parsed value + 1 otherwise
878 //
879 // This function is indended for parsing disambiguators and binders which when
880 // not present have their value interpreted as 0, and otherwise as decoded
881 // value + 1. For example for binders, value for "G_" is 1, for "G0_" value is
882 // 2. When "G" is absent value is 0.
883 uint64_t Demangler::parseOptionalBase62Number(char Tag) {
884  if (!consumeIf(Tag))
885  return 0;
886 
887  uint64_t N = parseBase62Number();
888  if (Error || !addAssign(N, 1))
889  return 0;
890 
891  return N;
892 }
893 
894 // Parses base 62 number with <0-9a-zA-Z> as digits. Number is terminated by
895 // "_". All values are offset by 1, so that "_" encodes 0, "0_" encodes 1,
896 // "1_" encodes 2, etc.
897 //
898 // <base-62-number> = {<0-9a-zA-Z>} "_"
899 uint64_t Demangler::parseBase62Number() {
900  if (consumeIf('_'))
901  return 0;
902 
903  uint64_t Value = 0;
904 
905  while (true) {
906  uint64_t Digit;
907  char C = consume();
908 
909  if (C == '_') {
910  break;
911  } else if (isDigit(C)) {
912  Digit = C - '0';
913  } else if (isLower(C)) {
914  Digit = 10 + (C - 'a');
915  } else if (isUpper(C)) {
916  Digit = 10 + 26 + (C - 'A');
917  } else {
918  Error = true;
919  return 0;
920  }
921 
922  if (!mulAssign(Value, 62))
923  return 0;
924 
925  if (!addAssign(Value, Digit))
926  return 0;
927  }
928 
929  if (!addAssign(Value, 1))
930  return 0;
931 
932  return Value;
933 }
934 
935 // Parses a decimal number that had been encoded without any leading zeros.
936 //
937 // <decimal-number> = "0"
938 // | <1-9> {<0-9>}
939 uint64_t Demangler::parseDecimalNumber() {
940  char C = look();
941  if (!isDigit(C)) {
942  Error = true;
943  return 0;
944  }
945 
946  if (C == '0') {
947  consume();
948  return 0;
949  }
950 
951  uint64_t Value = 0;
952 
953  while (isDigit(look())) {
954  if (!mulAssign(Value, 10)) {
955  Error = true;
956  return 0;
957  }
958 
959  uint64_t D = consume() - '0';
960  if (!addAssign(Value, D))
961  return 0;
962  }
963 
964  return Value;
965 }
966 
967 // Parses a hexadecimal number with <0-9a-f> as a digits. Returns the parsed
968 // value and stores hex digits in HexDigits. The return value is unspecified if
969 // HexDigits.size() > 16.
970 //
971 // <hex-number> = "0_"
972 // | <1-9a-f> {<0-9a-f>} "_"
973 uint64_t Demangler::parseHexNumber(StringView &HexDigits) {
974  size_t Start = Position;
975  uint64_t Value = 0;
976 
977  if (!isHexDigit(look()))
978  Error = true;
979 
980  if (consumeIf('0')) {
981  if (!consumeIf('_'))
982  Error = true;
983  } else {
984  while (!Error && !consumeIf('_')) {
985  char C = consume();
986  Value *= 16;
987  if (isDigit(C))
988  Value += C - '0';
989  else if ('a' <= C && C <= 'f')
990  Value += 10 + (C - 'a');
991  else
992  Error = true;
993  }
994  }
995 
996  if (Error) {
997  HexDigits = StringView();
998  return 0;
999  }
1000 
1001  size_t End = Position - 1;
1002  assert(Start < End);
1003  HexDigits = Input.substr(Start, End - Start);
1004  return Value;
1005 }
1006 
1007 void Demangler::print(char C) {
1008  if (Error || !Print)
1009  return;
1010 
1011  Output += C;
1012 }
1013 
1014 void Demangler::print(StringView S) {
1015  if (Error || !Print)
1016  return;
1017 
1018  Output += S;
1019 }
1020 
1021 void Demangler::printDecimalNumber(uint64_t N) {
1022  if (Error || !Print)
1023  return;
1024 
1025  Output << N;
1026 }
1027 
1028 // Prints a lifetime. An index 0 always represents an erased lifetime. Indices
1029 // starting from 1, are De Bruijn indices, referring to higher-ranked lifetimes
1030 // bound by one of the enclosing binders.
1031 void Demangler::printLifetime(uint64_t Index) {
1032  if (Index == 0) {
1033  print("'_");
1034  return;
1035  }
1036 
1037  if (Index - 1 >= BoundLifetimes) {
1038  Error = true;
1039  return;
1040  }
1041 
1042  uint64_t Depth = BoundLifetimes - Index;
1043  print('\'');
1044  if (Depth < 26) {
1045  char C = 'a' + Depth;
1046  print(C);
1047  } else {
1048  print('z');
1049  printDecimalNumber(Depth - 26 + 1);
1050  }
1051 }
1052 
1053 static inline bool decodePunycodeDigit(char C, size_t &Value) {
1054  if (isLower(C)) {
1055  Value = C - 'a';
1056  return true;
1057  }
1058 
1059  if (isDigit(C)) {
1060  Value = 26 + (C - '0');
1061  return true;
1062  }
1063 
1064  return false;
1065 }
1066 
1067 static void removeNullBytes(OutputBuffer &Output, size_t StartIdx) {
1068  char *Buffer = Output.getBuffer();
1069  char *Start = Buffer + StartIdx;
1070  char *End = Buffer + Output.getCurrentPosition();
1071  Output.setCurrentPosition(std::remove(Start, End, '\0') - Buffer);
1072 }
1073 
1074 // Encodes code point as UTF-8 and stores results in Output. Returns false if
1075 // CodePoint is not a valid unicode scalar value.
1076 static inline bool encodeUTF8(size_t CodePoint, char *Output) {
1077  if (0xD800 <= CodePoint && CodePoint <= 0xDFFF)
1078  return false;
1079 
1080  if (CodePoint <= 0x7F) {
1081  Output[0] = CodePoint;
1082  return true;
1083  }
1084 
1085  if (CodePoint <= 0x7FF) {
1086  Output[0] = 0xC0 | ((CodePoint >> 6) & 0x3F);
1087  Output[1] = 0x80 | (CodePoint & 0x3F);
1088  return true;
1089  }
1090 
1091  if (CodePoint <= 0xFFFF) {
1092  Output[0] = 0xE0 | (CodePoint >> 12);
1093  Output[1] = 0x80 | ((CodePoint >> 6) & 0x3F);
1094  Output[2] = 0x80 | (CodePoint & 0x3F);
1095  return true;
1096  }
1097 
1098  if (CodePoint <= 0x10FFFF) {
1099  Output[0] = 0xF0 | (CodePoint >> 18);
1100  Output[1] = 0x80 | ((CodePoint >> 12) & 0x3F);
1101  Output[2] = 0x80 | ((CodePoint >> 6) & 0x3F);
1102  Output[3] = 0x80 | (CodePoint & 0x3F);
1103  return true;
1104  }
1105 
1106  return false;
1107 }
1108 
1109 // Decodes string encoded using punycode and appends results to Output.
1110 // Returns true if decoding was successful.
1111 static bool decodePunycode(StringView Input, OutputBuffer &Output) {
1112  size_t OutputSize = Output.getCurrentPosition();
1113  size_t InputIdx = 0;
1114 
1115  // Rust uses an underscore as a delimiter.
1116  size_t DelimiterPos = StringView::npos;
1117  for (size_t I = 0; I != Input.size(); ++I)
1118  if (Input[I] == '_')
1119  DelimiterPos = I;
1120 
1121  if (DelimiterPos != StringView::npos) {
1122  // Copy basic code points before the last delimiter to the output.
1123  for (; InputIdx != DelimiterPos; ++InputIdx) {
1124  char C = Input[InputIdx];
1125  if (!isValid(C))
1126  return false;
1127  // Code points are padded with zeros while decoding is in progress.
1128  char UTF8[4] = {C};
1129  Output += StringView(UTF8, UTF8 + 4);
1130  }
1131  // Skip over the delimiter.
1132  ++InputIdx;
1133  }
1134 
1135  size_t Base = 36;
1136  size_t Skew = 38;
1137  size_t Bias = 72;
1138  size_t N = 0x80;
1139  size_t TMin = 1;
1140  size_t TMax = 26;
1141  size_t Damp = 700;
1142 
1143  auto Adapt = [&](size_t Delta, size_t NumPoints) {
1144  Delta /= Damp;
1145  Delta += Delta / NumPoints;
1146  Damp = 2;
1147 
1148  size_t K = 0;
1149  while (Delta > (Base - TMin) * TMax / 2) {
1150  Delta /= Base - TMin;
1151  K += Base;
1152  }
1153  return K + (((Base - TMin + 1) * Delta) / (Delta + Skew));
1154  };
1155 
1156  // Main decoding loop.
1157  for (size_t I = 0; InputIdx != Input.size(); I += 1) {
1158  size_t OldI = I;
1159  size_t W = 1;
1161  for (size_t K = Base; true; K += Base) {
1162  if (InputIdx == Input.size())
1163  return false;
1164  char C = Input[InputIdx++];
1165  size_t Digit = 0;
1166  if (!decodePunycodeDigit(C, Digit))
1167  return false;
1168 
1169  if (Digit > (Max - I) / W)
1170  return false;
1171  I += Digit * W;
1172 
1173  size_t T;
1174  if (K <= Bias)
1175  T = TMin;
1176  else if (K >= Bias + TMax)
1177  T = TMax;
1178  else
1179  T = K - Bias;
1180 
1181  if (Digit < T)
1182  break;
1183 
1184  if (W > Max / (Base - T))
1185  return false;
1186  W *= (Base - T);
1187  }
1188  size_t NumPoints = (Output.getCurrentPosition() - OutputSize) / 4 + 1;
1189  Bias = Adapt(I - OldI, NumPoints);
1190 
1191  if (I / NumPoints > Max - N)
1192  return false;
1193  N += I / NumPoints;
1194  I = I % NumPoints;
1195 
1196  // Insert N at position I in the output.
1197  char UTF8[4] = {};
1198  if (!encodeUTF8(N, UTF8))
1199  return false;
1200  Output.insert(OutputSize + I * 4, UTF8, 4);
1201  }
1202 
1203  removeNullBytes(Output, OutputSize);
1204  return true;
1205 }
1206 
1207 void Demangler::printIdentifier(Identifier Ident) {
1208  if (Error || !Print)
1209  return;
1210 
1211  if (Ident.Punycode) {
1212  if (!decodePunycode(Ident.Name, Output))
1213  Error = true;
1214  } else {
1215  print(Ident.Name);
1216  }
1217 }
1218 
1219 char Demangler::look() const {
1220  if (Error || Position >= Input.size())
1221  return 0;
1222 
1223  return Input[Position];
1224 }
1225 
1226 char Demangler::consume() {
1227  if (Error || Position >= Input.size()) {
1228  Error = true;
1229  return 0;
1230  }
1231 
1232  return Input[Position++];
1233 }
1234 
1235 bool Demangler::consumeIf(char Prefix) {
1236  if (Error || Position >= Input.size() || Input[Position] != Prefix)
1237  return false;
1238 
1239  Position += 1;
1240  return true;
1241 }
1242 
1243 /// Computes A + B. When computation wraps around sets the error and returns
1244 /// false. Otherwise assigns the result to A and returns true.
1245 bool Demangler::addAssign(uint64_t &A, uint64_t B) {
1247  Error = true;
1248  return false;
1249  }
1250 
1251  A += B;
1252  return true;
1253 }
1254 
1255 /// Computes A * B. When computation wraps around sets the error and returns
1256 /// false. Otherwise assigns the result to A and returns true.
1257 bool Demangler::mulAssign(uint64_t &A, uint64_t B) {
1258  if (B != 0 && A > std::numeric_limits<uint64_t>::max() / B) {
1259  Error = true;
1260  return false;
1261  }
1262 
1263  A *= B;
1264  return true;
1265 }
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::wasm::ValType::I32
@ I32
llvm::lltok::Error
@ Error
Definition: LLToken.h:21
llvm::AMDGPU::HSAMD::ValueType::U16
@ U16
print
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
Definition: ArchiveWriter.cpp:189
llvm::pdb::PDB_BuiltinType::Char
@ Char
llvm::cl::Prefix
@ Prefix
Definition: CommandLine.h:161
T
llvm::RISCVFenceField::W
@ W
Definition: RISCVBaseInfo.h:266
U64
instcombine should handle this C2 when and C2 are unsigned Similarly for udiv and signed operands Currently InstCombine avoids this transform but will do it when the signs of the operands and the sign of the divide match See the FIXME in InstructionCombining cpp in the visitSetCondInst method after the switch case for Instruction::UDiv(around line 4447) for more details. The SingleSource/Benchmarks/Shootout-C++/hash and hash2 tests have examples of this const ruct.[LOOP OPTIMIZATION] SingleSource/Benchmarks/Misc/dt.c shows several interesting optimization opportunities in its double_array_divs_variable function typedef unsigned long long U64
Definition: README.txt:268
llvm::AMDGPU::HSAMD::ValueType::I8
@ I8
llvm::AMDGPU::HSAMD::ValueType::U32
@ U32
llvm::Depth
@ Depth
Definition: SIMachineScheduler.h:36
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
Utility.h
llvm::rustDemangle
char * rustDemangle(const char *MangledName)
Definition: RustDemangle.cpp:150
T
#define T
Definition: Mips16ISelLowering.cpp:341
llvm::max
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:337
llvm::dwarf::Tag
Tag
Definition: Dwarf.h:105
Demangler
itanium_demangle::ManglingParser< DefaultAllocator > Demangler
Definition: ItaniumDemangle.cpp:366
encodeUTF8
static void encodeUTF8(uint32_t UnicodeScalarValue, SmallVectorImpl< char > &Result)
encodeUTF8 - Encode UnicodeScalarValue in UTF-8 and append it to result.
Definition: YAMLParser.cpp:572
llvm::SwiftAsyncFramePointerMode::Never
@ Never
Never set the bit.
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1590
llvm::InlinerFunctionImportStatsOpts::No
@ No
llvm::wasm::ValType::F64
@ F64
parseBasicType
static bool parseBasicType(char C, BasicType &Type)
Definition: RustDemangle.cpp:392
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::wasm::ValType::F32
@ F32
llvm::dwarf::Index
Index
Definition: Dwarf.h:472
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
StringView::npos
static const size_t npos
Definition: StringView.h:30
isLower
static bool isLower(const char C)
Definition: RustDemangle.cpp:182
llvm::demangle
std::string demangle(const std::string &MangledName)
Attempt to demangle a string using different demangling schemes.
Definition: Demangle.cpp:29
StringView.h
Index
uint32_t Index
Definition: ELFObjHandler.cpp:82
uint64_t
D
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
isAsciiPrintable
static bool isAsciiPrintable(uint64_t CodePoint)
Returns true if CodePoint represents a printable ASCII character.
Definition: RustDemangle.cpp:805
llvm::wasm::ValType::I64
@ I64
I
#define I(x, y, z)
Definition: MD5.cpp:58
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::AMDGPU::HSAMD::ValueType::I16
@ I16
llvm::codeview::consume
Error consume(BinaryStreamReader &Reader)
Definition: RecordSerialization.h:45
llvm::sys::fs::remove
std::error_code remove(const Twine &path, bool IgnoreNonExisting=true)
Remove path.
isDigit
static bool isDigit(const char C)
Definition: RustDemangle.cpp:176
Demangle.h
A
* A
Definition: README_ALTIVEC.txt:89
llvm::rdf::Print
Print(const T &, const DataFlowGraph &) -> Print< T >
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::AMDGPU::HSAMD::ValueType::U8
@ U8
llvm::pdb::Bool
@ Bool
Definition: PDBTypes.h:407
llvm::MCID::Variadic
@ Variadic
Definition: MCInstrDesc.h:149
llvm::empty
constexpr bool empty(const T &RangeOrContainer)
Test whether RangeOrContainer is empty. Similar to C++17 std::empty.
Definition: STLExtras.h:256
llvm::GraphProgram::Name
Name
Definition: GraphWriter.h:50
initializeOutputBuffer
bool initializeOutputBuffer(char *Buf, size_t *N, OutputBuffer &OB, size_t InitSize)
Definition: Utility.h:201
Bool
@ Bool
Definition: TargetLibraryInfo.cpp:45
llvm::Error
Lightweight error class with error context and mandatory checking.
Definition: Error.h:155
llvm::TargetStackID::Value
Value
Definition: TargetFrameLowering.h:27
llvm::object::Identifier
@ Identifier
Definition: COFFModuleDefinition.cpp:34
llvm::PrevailingType::Yes
@ Yes
isValid
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
Definition: RustDemangle.cpp:187
llvm::pdb::DbgHeaderType::Max
@ Max
N
#define N
isHexDigit
static bool isHexDigit(const char C)
Definition: RustDemangle.cpp:178
llvm::msgpack::Type
Type
MessagePack types as defined in the standard, with the exception of Integer being divided into a sign...
Definition: MsgPackReader.h:48
consume
static bool consume(InternalInstruction *insn, T &ptr)
Definition: X86Disassembler.cpp:192
isUpper
static bool isUpper(const char C)
Definition: RustDemangle.cpp:184
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::UTF8
unsigned char UTF8
Definition: ConvertUTF.h:130
llvm::sampleprof::Base
@ Base
Definition: Discriminator.h:58