LLVM  14.0.0git
RustDemangle.cpp
Go to the documentation of this file.
1 //===--- RustDemangle.cpp ---------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines a demangler for Rust v0 mangled symbols as specified in
10 // https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/Demangle/Demangle.h"
16 #include "llvm/Demangle/Utility.h"
17 
18 #include <algorithm>
19 #include <cassert>
20 #include <cstdint>
21 #include <cstring>
22 #include <limits>
23 
24 using namespace llvm;
25 
26 using llvm::itanium_demangle::OutputBuffer;
27 using llvm::itanium_demangle::StringView;
28 using llvm::itanium_demangle::SwapAndRestore;
29 
30 namespace {
31 
32 struct Identifier {
33  StringView Name;
34  bool Punycode;
35 
36  bool empty() const { return Name.empty(); }
37 };
38 
39 enum class BasicType {
40  Bool,
41  Char,
42  I8,
43  I16,
44  I32,
45  I64,
46  I128,
47  ISize,
48  U8,
49  U16,
50  U32,
51  U64,
52  U128,
53  USize,
54  F32,
55  F64,
56  Str,
57  Placeholder,
58  Unit,
59  Variadic,
60  Never,
61 };
62 
63 enum class IsInType {
64  No,
65  Yes,
66 };
67 
68 enum class LeaveGenericsOpen {
69  No,
70  Yes,
71 };
72 
73 class Demangler {
74  // Maximum recursion level. Used to avoid stack overflow.
75  size_t MaxRecursionLevel;
76  // Current recursion level.
77  size_t RecursionLevel;
78  size_t BoundLifetimes;
79  // Input string that is being demangled with "_R" prefix removed.
80  StringView Input;
81  // Position in the input string.
82  size_t Position;
83  // When true, print methods append the output to the stream.
84  // When false, the output is suppressed.
85  bool Print;
86  // True if an error occurred.
87  bool Error;
88 
89 public:
90  // Demangled output.
91  OutputBuffer Output;
92 
93  Demangler(size_t MaxRecursionLevel = 500);
94 
95  bool demangle(StringView MangledName);
96 
97 private:
98  bool demanglePath(IsInType Type,
99  LeaveGenericsOpen LeaveOpen = LeaveGenericsOpen::No);
100  void demangleImplPath(IsInType InType);
101  void demangleGenericArg();
102  void demangleType();
103  void demangleFnSig();
104  void demangleDynBounds();
105  void demangleDynTrait();
106  void demangleOptionalBinder();
107  void demangleConst();
108  void demangleConstInt();
109  void demangleConstBool();
110  void demangleConstChar();
111 
112  template <typename Callable> void demangleBackref(Callable Demangler) {
113  uint64_t Backref = parseBase62Number();
114  if (Error || Backref >= Position) {
115  Error = true;
116  return;
117  }
118 
119  if (!Print)
120  return;
121 
122  SwapAndRestore<size_t> SavePosition(Position, Position);
123  Position = Backref;
124  Demangler();
125  }
126 
127  Identifier parseIdentifier();
128  uint64_t parseOptionalBase62Number(char Tag);
129  uint64_t parseBase62Number();
130  uint64_t parseDecimalNumber();
131  uint64_t parseHexNumber(StringView &HexDigits);
132 
133  void print(char C);
134  void print(StringView S);
135  void printDecimalNumber(uint64_t N);
136  void printBasicType(BasicType);
137  void printLifetime(uint64_t Index);
138  void printIdentifier(Identifier Ident);
139 
140  char look() const;
141  char consume();
142  bool consumeIf(char Prefix);
143 
144  bool addAssign(uint64_t &A, uint64_t B);
145  bool mulAssign(uint64_t &A, uint64_t B);
146 };
147 
148 } // namespace
149 
150 char *llvm::rustDemangle(const char *MangledName, char *Buf, size_t *N,
151  int *Status) {
152  if (MangledName == nullptr || (Buf != nullptr && N == nullptr)) {
153  if (Status != nullptr)
155  return nullptr;
156  }
157 
158  // Return early if mangled name doesn't look like a Rust symbol.
159  StringView Mangled(MangledName);
160  if (!Mangled.startsWith("_R")) {
161  if (Status != nullptr)
163  return nullptr;
164  }
165 
166  Demangler D;
167  if (!initializeOutputBuffer(nullptr, nullptr, D.Output, 1024)) {
168  if (Status != nullptr)
170  return nullptr;
171  }
172 
173  if (!D.demangle(Mangled)) {
174  if (Status != nullptr)
176  std::free(D.Output.getBuffer());
177  return nullptr;
178  }
179 
180  D.Output += '\0';
181  char *Demangled = D.Output.getBuffer();
182  size_t DemangledLen = D.Output.getCurrentPosition();
183 
184  if (Buf != nullptr) {
185  if (DemangledLen <= *N) {
186  std::memcpy(Buf, Demangled, DemangledLen);
187  std::free(Demangled);
188  Demangled = Buf;
189  } else {
190  std::free(Buf);
191  }
192  }
193 
194  if (N != nullptr)
195  *N = DemangledLen;
196 
197  if (Status != nullptr)
199 
200  return Demangled;
201 }
202 
203 Demangler::Demangler(size_t MaxRecursionLevel)
204  : MaxRecursionLevel(MaxRecursionLevel) {}
205 
206 static inline bool isDigit(const char C) { return '0' <= C && C <= '9'; }
207 
208 static inline bool isHexDigit(const char C) {
209  return ('0' <= C && C <= '9') || ('a' <= C && C <= 'f');
210 }
211 
212 static inline bool isLower(const char C) { return 'a' <= C && C <= 'z'; }
213 
214 static inline bool isUpper(const char C) { return 'A' <= C && C <= 'Z'; }
215 
216 /// Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
217 static inline bool isValid(const char C) {
218  return isDigit(C) || isLower(C) || isUpper(C) || C == '_';
219 }
220 
221 // Demangles Rust v0 mangled symbol. Returns true when successful, and false
222 // otherwise. The demangled symbol is stored in Output field. It is
223 // responsibility of the caller to free the memory behind the output stream.
224 //
225 // <symbol-name> = "_R" <path> [<instantiating-crate>]
226 bool Demangler::demangle(StringView Mangled) {
227  Position = 0;
228  Error = false;
229  Print = true;
230  RecursionLevel = 0;
231  BoundLifetimes = 0;
232 
233  if (!Mangled.consumeFront("_R")) {
234  Error = true;
235  return false;
236  }
237  size_t Dot = Mangled.find('.');
238  Input = Mangled.substr(0, Dot);
239  StringView Suffix = Mangled.dropFront(Dot);
240 
241  demanglePath(IsInType::No);
242 
243  if (Position != Input.size()) {
244  SwapAndRestore<bool> SavePrint(Print, false);
245  demanglePath(IsInType::No);
246  }
247 
248  if (Position != Input.size())
249  Error = true;
250 
251  if (!Suffix.empty()) {
252  print(" (");
253  print(Suffix);
254  print(")");
255  }
256 
257  return !Error;
258 }
259 
260 // Demangles a path. InType indicates whether a path is inside a type. When
261 // LeaveOpen is true, a closing `>` after generic arguments is omitted from the
262 // output. Return value indicates whether generics arguments have been left
263 // open.
264 //
265 // <path> = "C" <identifier> // crate root
266 // | "M" <impl-path> <type> // <T> (inherent impl)
267 // | "X" <impl-path> <type> <path> // <T as Trait> (trait impl)
268 // | "Y" <type> <path> // <T as Trait> (trait definition)
269 // | "N" <ns> <path> <identifier> // ...::ident (nested path)
270 // | "I" <path> {<generic-arg>} "E" // ...<T, U> (generic args)
271 // | <backref>
272 // <identifier> = [<disambiguator>] <undisambiguated-identifier>
273 // <ns> = "C" // closure
274 // | "S" // shim
275 // | <A-Z> // other special namespaces
276 // | <a-z> // internal namespaces
277 bool Demangler::demanglePath(IsInType InType, LeaveGenericsOpen LeaveOpen) {
278  if (Error || RecursionLevel >= MaxRecursionLevel) {
279  Error = true;
280  return false;
281  }
282  SwapAndRestore<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
283 
284  switch (consume()) {
285  case 'C': {
286  parseOptionalBase62Number('s');
287  printIdentifier(parseIdentifier());
288  break;
289  }
290  case 'M': {
291  demangleImplPath(InType);
292  print("<");
293  demangleType();
294  print(">");
295  break;
296  }
297  case 'X': {
298  demangleImplPath(InType);
299  print("<");
300  demangleType();
301  print(" as ");
302  demanglePath(IsInType::Yes);
303  print(">");
304  break;
305  }
306  case 'Y': {
307  print("<");
308  demangleType();
309  print(" as ");
310  demanglePath(IsInType::Yes);
311  print(">");
312  break;
313  }
314  case 'N': {
315  char NS = consume();
316  if (!isLower(NS) && !isUpper(NS)) {
317  Error = true;
318  break;
319  }
320  demanglePath(InType);
321 
322  uint64_t Disambiguator = parseOptionalBase62Number('s');
323  Identifier Ident = parseIdentifier();
324 
325  if (isUpper(NS)) {
326  // Special namespaces
327  print("::{");
328  if (NS == 'C')
329  print("closure");
330  else if (NS == 'S')
331  print("shim");
332  else
333  print(NS);
334  if (!Ident.empty()) {
335  print(":");
336  printIdentifier(Ident);
337  }
338  print('#');
339  printDecimalNumber(Disambiguator);
340  print('}');
341  } else {
342  // Implementation internal namespaces.
343  if (!Ident.empty()) {
344  print("::");
345  printIdentifier(Ident);
346  }
347  }
348  break;
349  }
350  case 'I': {
351  demanglePath(InType);
352  // Omit "::" when in a type, where it is optional.
353  if (InType == IsInType::No)
354  print("::");
355  print("<");
356  for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
357  if (I > 0)
358  print(", ");
359  demangleGenericArg();
360  }
361  if (LeaveOpen == LeaveGenericsOpen::Yes)
362  return true;
363  else
364  print(">");
365  break;
366  }
367  case 'B': {
368  bool IsOpen = false;
369  demangleBackref([&] { IsOpen = demanglePath(InType, LeaveOpen); });
370  return IsOpen;
371  }
372  default:
373  Error = true;
374  break;
375  }
376 
377  return false;
378 }
379 
380 // <impl-path> = [<disambiguator>] <path>
381 // <disambiguator> = "s" <base-62-number>
382 void Demangler::demangleImplPath(IsInType InType) {
383  SwapAndRestore<bool> SavePrint(Print, false);
384  parseOptionalBase62Number('s');
385  demanglePath(InType);
386 }
387 
388 // <generic-arg> = <lifetime>
389 // | <type>
390 // | "K" <const>
391 // <lifetime> = "L" <base-62-number>
392 void Demangler::demangleGenericArg() {
393  if (consumeIf('L'))
394  printLifetime(parseBase62Number());
395  else if (consumeIf('K'))
396  demangleConst();
397  else
398  demangleType();
399 }
400 
401 // <basic-type> = "a" // i8
402 // | "b" // bool
403 // | "c" // char
404 // | "d" // f64
405 // | "e" // str
406 // | "f" // f32
407 // | "h" // u8
408 // | "i" // isize
409 // | "j" // usize
410 // | "l" // i32
411 // | "m" // u32
412 // | "n" // i128
413 // | "o" // u128
414 // | "s" // i16
415 // | "t" // u16
416 // | "u" // ()
417 // | "v" // ...
418 // | "x" // i64
419 // | "y" // u64
420 // | "z" // !
421 // | "p" // placeholder (e.g. for generic params), shown as _
422 static bool parseBasicType(char C, BasicType &Type) {
423  switch (C) {
424  case 'a':
425  Type = BasicType::I8;
426  return true;
427  case 'b':
429  return true;
430  case 'c':
431  Type = BasicType::Char;
432  return true;
433  case 'd':
434  Type = BasicType::F64;
435  return true;
436  case 'e':
437  Type = BasicType::Str;
438  return true;
439  case 'f':
440  Type = BasicType::F32;
441  return true;
442  case 'h':
443  Type = BasicType::U8;
444  return true;
445  case 'i':
446  Type = BasicType::ISize;
447  return true;
448  case 'j':
449  Type = BasicType::USize;
450  return true;
451  case 'l':
452  Type = BasicType::I32;
453  return true;
454  case 'm':
455  Type = BasicType::U32;
456  return true;
457  case 'n':
458  Type = BasicType::I128;
459  return true;
460  case 'o':
461  Type = BasicType::U128;
462  return true;
463  case 'p':
464  Type = BasicType::Placeholder;
465  return true;
466  case 's':
467  Type = BasicType::I16;
468  return true;
469  case 't':
470  Type = BasicType::U16;
471  return true;
472  case 'u':
473  Type = BasicType::Unit;
474  return true;
475  case 'v':
477  return true;
478  case 'x':
479  Type = BasicType::I64;
480  return true;
481  case 'y':
483  return true;
484  case 'z':
485  Type = BasicType::Never;
486  return true;
487  default:
488  return false;
489  }
490 }
491 
492 void Demangler::printBasicType(BasicType Type) {
493  switch (Type) {
494  case BasicType::Bool:
495  print("bool");
496  break;
497  case BasicType::Char:
498  print("char");
499  break;
500  case BasicType::I8:
501  print("i8");
502  break;
503  case BasicType::I16:
504  print("i16");
505  break;
506  case BasicType::I32:
507  print("i32");
508  break;
509  case BasicType::I64:
510  print("i64");
511  break;
512  case BasicType::I128:
513  print("i128");
514  break;
515  case BasicType::ISize:
516  print("isize");
517  break;
518  case BasicType::U8:
519  print("u8");
520  break;
521  case BasicType::U16:
522  print("u16");
523  break;
524  case BasicType::U32:
525  print("u32");
526  break;
527  case BasicType::U64:
528  print("u64");
529  break;
530  case BasicType::U128:
531  print("u128");
532  break;
533  case BasicType::USize:
534  print("usize");
535  break;
536  case BasicType::F32:
537  print("f32");
538  break;
539  case BasicType::F64:
540  print("f64");
541  break;
542  case BasicType::Str:
543  print("str");
544  break;
545  case BasicType::Placeholder:
546  print("_");
547  break;
548  case BasicType::Unit:
549  print("()");
550  break;
551  case BasicType::Variadic:
552  print("...");
553  break;
554  case BasicType::Never:
555  print("!");
556  break;
557  }
558 }
559 
560 // <type> = | <basic-type>
561 // | <path> // named type
562 // | "A" <type> <const> // [T; N]
563 // | "S" <type> // [T]
564 // | "T" {<type>} "E" // (T1, T2, T3, ...)
565 // | "R" [<lifetime>] <type> // &T
566 // | "Q" [<lifetime>] <type> // &mut T
567 // | "P" <type> // *const T
568 // | "O" <type> // *mut T
569 // | "F" <fn-sig> // fn(...) -> ...
570 // | "D" <dyn-bounds> <lifetime> // dyn Trait<Assoc = X> + Send + 'a
571 // | <backref> // backref
572 void Demangler::demangleType() {
573  if (Error || RecursionLevel >= MaxRecursionLevel) {
574  Error = true;
575  return;
576  }
577  SwapAndRestore<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
578 
579  size_t Start = Position;
580  char C = consume();
581  BasicType Type;
582  if (parseBasicType(C, Type))
583  return printBasicType(Type);
584 
585  switch (C) {
586  case 'A':
587  print("[");
588  demangleType();
589  print("; ");
590  demangleConst();
591  print("]");
592  break;
593  case 'S':
594  print("[");
595  demangleType();
596  print("]");
597  break;
598  case 'T': {
599  print("(");
600  size_t I = 0;
601  for (; !Error && !consumeIf('E'); ++I) {
602  if (I > 0)
603  print(", ");
604  demangleType();
605  }
606  if (I == 1)
607  print(",");
608  print(")");
609  break;
610  }
611  case 'R':
612  case 'Q':
613  print('&');
614  if (consumeIf('L')) {
615  if (auto Lifetime = parseBase62Number()) {
616  printLifetime(Lifetime);
617  print(' ');
618  }
619  }
620  if (C == 'Q')
621  print("mut ");
622  demangleType();
623  break;
624  case 'P':
625  print("*const ");
626  demangleType();
627  break;
628  case 'O':
629  print("*mut ");
630  demangleType();
631  break;
632  case 'F':
633  demangleFnSig();
634  break;
635  case 'D':
636  demangleDynBounds();
637  if (consumeIf('L')) {
638  if (auto Lifetime = parseBase62Number()) {
639  print(" + ");
640  printLifetime(Lifetime);
641  }
642  } else {
643  Error = true;
644  }
645  break;
646  case 'B':
647  demangleBackref([&] { demangleType(); });
648  break;
649  default:
650  Position = Start;
651  demanglePath(IsInType::Yes);
652  break;
653  }
654 }
655 
656 // <fn-sig> := [<binder>] ["U"] ["K" <abi>] {<type>} "E" <type>
657 // <abi> = "C"
658 // | <undisambiguated-identifier>
659 void Demangler::demangleFnSig() {
660  SwapAndRestore<size_t> SaveBoundLifetimes(BoundLifetimes, BoundLifetimes);
661  demangleOptionalBinder();
662 
663  if (consumeIf('U'))
664  print("unsafe ");
665 
666  if (consumeIf('K')) {
667  print("extern \"");
668  if (consumeIf('C')) {
669  print("C");
670  } else {
671  Identifier Ident = parseIdentifier();
672  if (Ident.Punycode)
673  Error = true;
674  for (char C : Ident.Name) {
675  // When mangling ABI string, the "-" is replaced with "_".
676  if (C == '_')
677  C = '-';
678  print(C);
679  }
680  }
681  print("\" ");
682  }
683 
684  print("fn(");
685  for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
686  if (I > 0)
687  print(", ");
688  demangleType();
689  }
690  print(")");
691 
692  if (consumeIf('u')) {
693  // Skip the unit type from the output.
694  } else {
695  print(" -> ");
696  demangleType();
697  }
698 }
699 
700 // <dyn-bounds> = [<binder>] {<dyn-trait>} "E"
701 void Demangler::demangleDynBounds() {
702  SwapAndRestore<size_t> SaveBoundLifetimes(BoundLifetimes, BoundLifetimes);
703  print("dyn ");
704  demangleOptionalBinder();
705  for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
706  if (I > 0)
707  print(" + ");
708  demangleDynTrait();
709  }
710 }
711 
712 // <dyn-trait> = <path> {<dyn-trait-assoc-binding>}
713 // <dyn-trait-assoc-binding> = "p" <undisambiguated-identifier> <type>
714 void Demangler::demangleDynTrait() {
715  bool IsOpen = demanglePath(IsInType::Yes, LeaveGenericsOpen::Yes);
716  while (!Error && consumeIf('p')) {
717  if (!IsOpen) {
718  IsOpen = true;
719  print('<');
720  } else {
721  print(", ");
722  }
723  print(parseIdentifier().Name);
724  print(" = ");
725  demangleType();
726  }
727  if (IsOpen)
728  print(">");
729 }
730 
731 // Demangles optional binder and updates the number of bound lifetimes.
732 //
733 // <binder> = "G" <base-62-number>
734 void Demangler::demangleOptionalBinder() {
735  uint64_t Binder = parseOptionalBase62Number('G');
736  if (Error || Binder == 0)
737  return;
738 
739  // In valid inputs each bound lifetime is referenced later. Referencing a
740  // lifetime requires at least one byte of input. Reject inputs that are too
741  // short to reference all bound lifetimes. Otherwise demangling of invalid
742  // binders could generate excessive amounts of output.
743  if (Binder >= Input.size() - BoundLifetimes) {
744  Error = true;
745  return;
746  }
747 
748  print("for<");
749  for (size_t I = 0; I != Binder; ++I) {
750  BoundLifetimes += 1;
751  if (I > 0)
752  print(", ");
753  printLifetime(1);
754  }
755  print("> ");
756 }
757 
758 // <const> = <basic-type> <const-data>
759 // | "p" // placeholder
760 // | <backref>
761 void Demangler::demangleConst() {
762  if (Error || RecursionLevel >= MaxRecursionLevel) {
763  Error = true;
764  return;
765  }
766  SwapAndRestore<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
767 
768  char C = consume();
769  BasicType Type;
770  if (parseBasicType(C, Type)) {
771  switch (Type) {
772  case BasicType::I8:
773  case BasicType::I16:
774  case BasicType::I32:
775  case BasicType::I64:
776  case BasicType::I128:
777  case BasicType::ISize:
778  case BasicType::U8:
779  case BasicType::U16:
780  case BasicType::U32:
781  case BasicType::U64:
782  case BasicType::U128:
783  case BasicType::USize:
784  demangleConstInt();
785  break;
786  case BasicType::Bool:
787  demangleConstBool();
788  break;
789  case BasicType::Char:
790  demangleConstChar();
791  break;
792  case BasicType::Placeholder:
793  print('_');
794  break;
795  default:
796  Error = true;
797  break;
798  }
799  } else if (C == 'B') {
800  demangleBackref([&] { demangleConst(); });
801  } else {
802  Error = true;
803  }
804 }
805 
806 // <const-data> = ["n"] <hex-number>
807 void Demangler::demangleConstInt() {
808  if (consumeIf('n'))
809  print('-');
810 
811  StringView HexDigits;
812  uint64_t Value = parseHexNumber(HexDigits);
813  if (HexDigits.size() <= 16) {
814  printDecimalNumber(Value);
815  } else {
816  print("0x");
817  print(HexDigits);
818  }
819 }
820 
821 // <const-data> = "0_" // false
822 // | "1_" // true
823 void Demangler::demangleConstBool() {
824  StringView HexDigits;
825  parseHexNumber(HexDigits);
826  if (HexDigits == "0")
827  print("false");
828  else if (HexDigits == "1")
829  print("true");
830  else
831  Error = true;
832 }
833 
834 /// Returns true if CodePoint represents a printable ASCII character.
835 static bool isAsciiPrintable(uint64_t CodePoint) {
836  return 0x20 <= CodePoint && CodePoint <= 0x7e;
837 }
838 
839 // <const-data> = <hex-number>
840 void Demangler::demangleConstChar() {
841  StringView HexDigits;
842  uint64_t CodePoint = parseHexNumber(HexDigits);
843  if (Error || HexDigits.size() > 6) {
844  Error = true;
845  return;
846  }
847 
848  print("'");
849  switch (CodePoint) {
850  case '\t':
851  print(R"(\t)");
852  break;
853  case '\r':
854  print(R"(\r)");
855  break;
856  case '\n':
857  print(R"(\n)");
858  break;
859  case '\\':
860  print(R"(\\)");
861  break;
862  case '"':
863  print(R"(")");
864  break;
865  case '\'':
866  print(R"(\')");
867  break;
868  default:
869  if (isAsciiPrintable(CodePoint)) {
870  char C = CodePoint;
871  print(C);
872  } else {
873  print(R"(\u{)");
874  print(HexDigits);
875  print('}');
876  }
877  break;
878  }
879  print('\'');
880 }
881 
882 // <undisambiguated-identifier> = ["u"] <decimal-number> ["_"] <bytes>
883 Identifier Demangler::parseIdentifier() {
884  bool Punycode = consumeIf('u');
885  uint64_t Bytes = parseDecimalNumber();
886 
887  // Underscore resolves the ambiguity when identifier starts with a decimal
888  // digit or another underscore.
889  consumeIf('_');
890 
891  if (Error || Bytes > Input.size() - Position) {
892  Error = true;
893  return {};
894  }
895  StringView S = Input.substr(Position, Bytes);
896  Position += Bytes;
897 
898  if (!std::all_of(S.begin(), S.end(), isValid)) {
899  Error = true;
900  return {};
901  }
902 
903  return {S, Punycode};
904 }
905 
906 // Parses optional base 62 number. The presence of a number is determined using
907 // Tag. Returns 0 when tag is absent and parsed value + 1 otherwise
908 //
909 // This function is indended for parsing disambiguators and binders which when
910 // not present have their value interpreted as 0, and otherwise as decoded
911 // value + 1. For example for binders, value for "G_" is 1, for "G0_" value is
912 // 2. When "G" is absent value is 0.
913 uint64_t Demangler::parseOptionalBase62Number(char Tag) {
914  if (!consumeIf(Tag))
915  return 0;
916 
917  uint64_t N = parseBase62Number();
918  if (Error || !addAssign(N, 1))
919  return 0;
920 
921  return N;
922 }
923 
924 // Parses base 62 number with <0-9a-zA-Z> as digits. Number is terminated by
925 // "_". All values are offset by 1, so that "_" encodes 0, "0_" encodes 1,
926 // "1_" encodes 2, etc.
927 //
928 // <base-62-number> = {<0-9a-zA-Z>} "_"
929 uint64_t Demangler::parseBase62Number() {
930  if (consumeIf('_'))
931  return 0;
932 
933  uint64_t Value = 0;
934 
935  while (true) {
936  uint64_t Digit;
937  char C = consume();
938 
939  if (C == '_') {
940  break;
941  } else if (isDigit(C)) {
942  Digit = C - '0';
943  } else if (isLower(C)) {
944  Digit = 10 + (C - 'a');
945  } else if (isUpper(C)) {
946  Digit = 10 + 26 + (C - 'A');
947  } else {
948  Error = true;
949  return 0;
950  }
951 
952  if (!mulAssign(Value, 62))
953  return 0;
954 
955  if (!addAssign(Value, Digit))
956  return 0;
957  }
958 
959  if (!addAssign(Value, 1))
960  return 0;
961 
962  return Value;
963 }
964 
965 // Parses a decimal number that had been encoded without any leading zeros.
966 //
967 // <decimal-number> = "0"
968 // | <1-9> {<0-9>}
969 uint64_t Demangler::parseDecimalNumber() {
970  char C = look();
971  if (!isDigit(C)) {
972  Error = true;
973  return 0;
974  }
975 
976  if (C == '0') {
977  consume();
978  return 0;
979  }
980 
981  uint64_t Value = 0;
982 
983  while (isDigit(look())) {
984  if (!mulAssign(Value, 10)) {
985  Error = true;
986  return 0;
987  }
988 
989  uint64_t D = consume() - '0';
990  if (!addAssign(Value, D))
991  return 0;
992  }
993 
994  return Value;
995 }
996 
997 // Parses a hexadecimal number with <0-9a-f> as a digits. Returns the parsed
998 // value and stores hex digits in HexDigits. The return value is unspecified if
999 // HexDigits.size() > 16.
1000 //
1001 // <hex-number> = "0_"
1002 // | <1-9a-f> {<0-9a-f>} "_"
1003 uint64_t Demangler::parseHexNumber(StringView &HexDigits) {
1004  size_t Start = Position;
1005  uint64_t Value = 0;
1006 
1007  if (!isHexDigit(look()))
1008  Error = true;
1009 
1010  if (consumeIf('0')) {
1011  if (!consumeIf('_'))
1012  Error = true;
1013  } else {
1014  while (!Error && !consumeIf('_')) {
1015  char C = consume();
1016  Value *= 16;
1017  if (isDigit(C))
1018  Value += C - '0';
1019  else if ('a' <= C && C <= 'f')
1020  Value += 10 + (C - 'a');
1021  else
1022  Error = true;
1023  }
1024  }
1025 
1026  if (Error) {
1027  HexDigits = StringView();
1028  return 0;
1029  }
1030 
1031  size_t End = Position - 1;
1032  assert(Start < End);
1033  HexDigits = Input.substr(Start, End - Start);
1034  return Value;
1035 }
1036 
1037 void Demangler::print(char C) {
1038  if (Error || !Print)
1039  return;
1040 
1041  Output += C;
1042 }
1043 
1044 void Demangler::print(StringView S) {
1045  if (Error || !Print)
1046  return;
1047 
1048  Output += S;
1049 }
1050 
1051 void Demangler::printDecimalNumber(uint64_t N) {
1052  if (Error || !Print)
1053  return;
1054 
1055  Output << N;
1056 }
1057 
1058 // Prints a lifetime. An index 0 always represents an erased lifetime. Indices
1059 // starting from 1, are De Bruijn indices, referring to higher-ranked lifetimes
1060 // bound by one of the enclosing binders.
1061 void Demangler::printLifetime(uint64_t Index) {
1062  if (Index == 0) {
1063  print("'_");
1064  return;
1065  }
1066 
1067  if (Index - 1 >= BoundLifetimes) {
1068  Error = true;
1069  return;
1070  }
1071 
1072  uint64_t Depth = BoundLifetimes - Index;
1073  print('\'');
1074  if (Depth < 26) {
1075  char C = 'a' + Depth;
1076  print(C);
1077  } else {
1078  print('z');
1079  printDecimalNumber(Depth - 26 + 1);
1080  }
1081 }
1082 
1083 static inline bool decodePunycodeDigit(char C, size_t &Value) {
1084  if (isLower(C)) {
1085  Value = C - 'a';
1086  return true;
1087  }
1088 
1089  if (isDigit(C)) {
1090  Value = 26 + (C - '0');
1091  return true;
1092  }
1093 
1094  return false;
1095 }
1096 
1097 static void removeNullBytes(OutputBuffer &Output, size_t StartIdx) {
1098  char *Buffer = Output.getBuffer();
1099  char *Start = Buffer + StartIdx;
1100  char *End = Buffer + Output.getCurrentPosition();
1101  Output.setCurrentPosition(std::remove(Start, End, '\0') - Buffer);
1102 }
1103 
1104 // Encodes code point as UTF-8 and stores results in Output. Returns false if
1105 // CodePoint is not a valid unicode scalar value.
1106 static inline bool encodeUTF8(size_t CodePoint, char *Output) {
1107  if (0xD800 <= CodePoint && CodePoint <= 0xDFFF)
1108  return false;
1109 
1110  if (CodePoint <= 0x7F) {
1111  Output[0] = CodePoint;
1112  return true;
1113  }
1114 
1115  if (CodePoint <= 0x7FF) {
1116  Output[0] = 0xC0 | ((CodePoint >> 6) & 0x3F);
1117  Output[1] = 0x80 | (CodePoint & 0x3F);
1118  return true;
1119  }
1120 
1121  if (CodePoint <= 0xFFFF) {
1122  Output[0] = 0xE0 | (CodePoint >> 12);
1123  Output[1] = 0x80 | ((CodePoint >> 6) & 0x3F);
1124  Output[2] = 0x80 | (CodePoint & 0x3F);
1125  return true;
1126  }
1127 
1128  if (CodePoint <= 0x10FFFF) {
1129  Output[0] = 0xF0 | (CodePoint >> 18);
1130  Output[1] = 0x80 | ((CodePoint >> 12) & 0x3F);
1131  Output[2] = 0x80 | ((CodePoint >> 6) & 0x3F);
1132  Output[3] = 0x80 | (CodePoint & 0x3F);
1133  return true;
1134  }
1135 
1136  return false;
1137 }
1138 
1139 // Decodes string encoded using punycode and appends results to Output.
1140 // Returns true if decoding was successful.
1141 static bool decodePunycode(StringView Input, OutputBuffer &Output) {
1142  size_t OutputSize = Output.getCurrentPosition();
1143  size_t InputIdx = 0;
1144 
1145  // Rust uses an underscore as a delimiter.
1146  size_t DelimiterPos = StringView::npos;
1147  for (size_t I = 0; I != Input.size(); ++I)
1148  if (Input[I] == '_')
1149  DelimiterPos = I;
1150 
1151  if (DelimiterPos != StringView::npos) {
1152  // Copy basic code points before the last delimiter to the output.
1153  for (; InputIdx != DelimiterPos; ++InputIdx) {
1154  char C = Input[InputIdx];
1155  if (!isValid(C))
1156  return false;
1157  // Code points are padded with zeros while decoding is in progress.
1158  char UTF8[4] = {C};
1159  Output += StringView(UTF8, UTF8 + 4);
1160  }
1161  // Skip over the delimiter.
1162  ++InputIdx;
1163  }
1164 
1165  size_t Base = 36;
1166  size_t Skew = 38;
1167  size_t Bias = 72;
1168  size_t N = 0x80;
1169  size_t TMin = 1;
1170  size_t TMax = 26;
1171  size_t Damp = 700;
1172 
1173  auto Adapt = [&](size_t Delta, size_t NumPoints) {
1174  Delta /= Damp;
1175  Delta += Delta / NumPoints;
1176  Damp = 2;
1177 
1178  size_t K = 0;
1179  while (Delta > (Base - TMin) * TMax / 2) {
1180  Delta /= Base - TMin;
1181  K += Base;
1182  }
1183  return K + (((Base - TMin + 1) * Delta) / (Delta + Skew));
1184  };
1185 
1186  // Main decoding loop.
1187  for (size_t I = 0; InputIdx != Input.size(); I += 1) {
1188  size_t OldI = I;
1189  size_t W = 1;
1191  for (size_t K = Base; true; K += Base) {
1192  if (InputIdx == Input.size())
1193  return false;
1194  char C = Input[InputIdx++];
1195  size_t Digit = 0;
1196  if (!decodePunycodeDigit(C, Digit))
1197  return false;
1198 
1199  if (Digit > (Max - I) / W)
1200  return false;
1201  I += Digit * W;
1202 
1203  size_t T;
1204  if (K <= Bias)
1205  T = TMin;
1206  else if (K >= Bias + TMax)
1207  T = TMax;
1208  else
1209  T = K - Bias;
1210 
1211  if (Digit < T)
1212  break;
1213 
1214  if (W > Max / (Base - T))
1215  return false;
1216  W *= (Base - T);
1217  }
1218  size_t NumPoints = (Output.getCurrentPosition() - OutputSize) / 4 + 1;
1219  Bias = Adapt(I - OldI, NumPoints);
1220 
1221  if (I / NumPoints > Max - N)
1222  return false;
1223  N += I / NumPoints;
1224  I = I % NumPoints;
1225 
1226  // Insert N at position I in the output.
1227  char UTF8[4] = {};
1228  if (!encodeUTF8(N, UTF8))
1229  return false;
1230  Output.insert(OutputSize + I * 4, UTF8, 4);
1231  }
1232 
1233  removeNullBytes(Output, OutputSize);
1234  return true;
1235 }
1236 
1237 void Demangler::printIdentifier(Identifier Ident) {
1238  if (Error || !Print)
1239  return;
1240 
1241  if (Ident.Punycode) {
1242  if (!decodePunycode(Ident.Name, Output))
1243  Error = true;
1244  } else {
1245  print(Ident.Name);
1246  }
1247 }
1248 
1249 char Demangler::look() const {
1250  if (Error || Position >= Input.size())
1251  return 0;
1252 
1253  return Input[Position];
1254 }
1255 
1256 char Demangler::consume() {
1257  if (Error || Position >= Input.size()) {
1258  Error = true;
1259  return 0;
1260  }
1261 
1262  return Input[Position++];
1263 }
1264 
1265 bool Demangler::consumeIf(char Prefix) {
1266  if (Error || Position >= Input.size() || Input[Position] != Prefix)
1267  return false;
1268 
1269  Position += 1;
1270  return true;
1271 }
1272 
1273 /// Computes A + B. When computation wraps around sets the error and returns
1274 /// false. Otherwise assigns the result to A and returns true.
1275 bool Demangler::addAssign(uint64_t &A, uint64_t B) {
1277  Error = true;
1278  return false;
1279  }
1280 
1281  A += B;
1282  return true;
1283 }
1284 
1285 /// Computes A * B. When computation wraps around sets the error and returns
1286 /// false. Otherwise assigns the result to A and returns true.
1287 bool Demangler::mulAssign(uint64_t &A, uint64_t B) {
1288  if (B != 0 && A > std::numeric_limits<uint64_t>::max() / B) {
1289  Error = true;
1290  return false;
1291  }
1292 
1293  A *= B;
1294  return true;
1295 }
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AllocatorList.h:23
llvm::wasm::ValType::I32
@ I32
llvm::lltok::Error
@ Error
Definition: LLToken.h:21
llvm::AMDGPU::HSAMD::ValueType::U16
@ U16
print
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
Definition: ArchiveWriter.cpp:148
llvm::pdb::PDB_BuiltinType::Char
@ Char
llvm::cl::Prefix
@ Prefix
Definition: CommandLine.h:164
T
llvm::RISCVFenceField::W
@ W
Definition: RISCVBaseInfo.h:208
U64
instcombine should handle this C2 when and C2 are unsigned Similarly for udiv and signed operands Currently InstCombine avoids this transform but will do it when the signs of the operands and the sign of the divide match See the FIXME in InstructionCombining cpp in the visitSetCondInst method after the switch case for Instruction::UDiv(around line 4447) for more details. The SingleSource/Benchmarks/Shootout-C++/hash and hash2 tests have examples of this const ruct.[LOOP OPTIMIZATION] SingleSource/Benchmarks/Misc/dt.c shows several interesting optimization opportunities in its double_array_divs_variable function typedef unsigned long long U64
Definition: README.txt:268
llvm::AMDGPU::HSAMD::ValueType::I8
@ I8
llvm::AMDGPU::HSAMD::ValueType::U32
@ U32
llvm::Depth
@ Depth
Definition: SIMachineScheduler.h:36
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
Utility.h
T
#define T
Definition: Mips16ISelLowering.cpp:341
llvm::dwarf::Tag
Tag
Definition: Dwarf.h:104
Demangler
itanium_demangle::ManglingParser< DefaultAllocator > Demangler
Definition: ItaniumDemangle.cpp:322
encodeUTF8
static void encodeUTF8(uint32_t UnicodeScalarValue, SmallVectorImpl< char > &Result)
encodeUTF8 - Encode UnicodeScalarValue in UTF-8 and append it to result.
Definition: YAMLParser.cpp:557
llvm::SwiftAsyncFramePointerMode::Never
@ Never
Never set the bit.
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1649
llvm::InlinerFunctionImportStatsOpts::No
@ No
llvm::wasm::ValType::F64
@ F64
parseBasicType
static bool parseBasicType(char C, BasicType &Type)
Definition: RustDemangle.cpp:422
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::wasm::ValType::F32
@ F32
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
StringView::npos
static const size_t npos
Definition: StringView.h:27
isLower
static bool isLower(const char C)
Definition: RustDemangle.cpp:212
llvm::demangle
std::string demangle(const std::string &MangledName)
Attempt to demangle a string using different demangling schemes.
Definition: Demangle.cpp:29
StringView.h
Index
uint32_t Index
Definition: ELFObjHandler.cpp:83
uint64_t
D
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
isAsciiPrintable
static bool isAsciiPrintable(uint64_t CodePoint)
Returns true if CodePoint represents a printable ASCII character.
Definition: RustDemangle.cpp:835
llvm::wasm::ValType::I64
@ I64
llvm::demangle_memory_alloc_failure
@ demangle_memory_alloc_failure
Definition: Demangle.h:27
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::demangle_success
@ demangle_success
Definition: Demangle.h:28
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::AMDGPU::HSAMD::ValueType::I16
@ I16
memcpy
<%struct.s * > cast struct s *S to sbyte *< sbyte * > sbyte uint cast struct s *agg result to sbyte *< sbyte * > sbyte uint cast struct s *memtmp to sbyte *< sbyte * > sbyte uint ret void llc ends up issuing two memcpy or custom lower memcpy(of small size) to be ldmia/stmia. I think option 2 is better but the current register allocator cannot allocate a chunk of registers at a time. A feasible temporary solution is to use specific physical registers at the lowering time for small(<
llvm::codeview::consume
Error consume(BinaryStreamReader &Reader)
Definition: RecordSerialization.h:46
Status
Definition: SIModeRegister.cpp:28
llvm::sys::fs::remove
std::error_code remove(const Twine &path, bool IgnoreNonExisting=true)
Remove path.
isDigit
static bool isDigit(const char C)
Definition: RustDemangle.cpp:206
Demangle.h
A
* A
Definition: README_ALTIVEC.txt:89
llvm::demangle_invalid_args
@ demangle_invalid_args
Definition: Demangle.h:25
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::AMDGPU::HSAMD::ValueType::U8
@ U8
llvm::pdb::Bool
@ Bool
Definition: PDBTypes.h:406
llvm::MCID::Variadic
@ Variadic
Definition: MCInstrDesc.h:148
llvm::empty
constexpr bool empty(const T &RangeOrContainer)
Test whether RangeOrContainer is empty. Similar to C++17 std::empty.
Definition: STLExtras.h:309
llvm::GraphProgram::Name
Name
Definition: GraphWriter.h:50
initializeOutputBuffer
bool initializeOutputBuffer(char *Buf, size_t *N, OutputBuffer &OB, size_t InitSize)
Definition: Utility.h:196
llvm::Error
Lightweight error class with error context and mandatory checking.
Definition: Error.h:156
llvm::TargetStackID::Value
Value
Definition: TargetFrameLowering.h:27
llvm::object::Identifier
@ Identifier
Definition: COFFModuleDefinition.cpp:36
llvm::PrevailingType::Yes
@ Yes
llvm::demangle_invalid_mangled_name
@ demangle_invalid_mangled_name
Definition: Demangle.h:26
isValid
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
Definition: RustDemangle.cpp:217
llvm::pdb::DbgHeaderType::Max
@ Max
N
#define N
isHexDigit
static bool isHexDigit(const char C)
Definition: RustDemangle.cpp:208
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
llvm::msgpack::Type
Type
MessagePack types as defined in the standard, with the exception of Integer being divided into a sign...
Definition: MsgPackReader.h:49
consume
static bool consume(InternalInstruction *insn, T &ptr)
Definition: X86Disassembler.cpp:192
isUpper
static bool isUpper(const char C)
Definition: RustDemangle.cpp:214
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::UTF8
unsigned char UTF8
Definition: ConvertUTF.h:111
llvm::sampleprof::Base
@ Base
Definition: Discriminator.h:58
llvm::rustDemangle
char * rustDemangle(const char *MangledName, char *Buf, size_t *N, int *Status)
Definition: RustDemangle.cpp:150