LLVM  15.0.0git
DLangDemangle.cpp
Go to the documentation of this file.
1 //===--- DLangDemangle.cpp ------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file defines a demangler for the D programming language as specified
11 /// in the ABI specification, available at:
12 /// https://dlang.org/spec/abi.html#name_mangling
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #include "llvm/Demangle/Demangle.h"
18 #include "llvm/Demangle/Utility.h"
19 
20 #include <cctype>
21 #include <cstring>
22 #include <limits>
23 
24 using namespace llvm;
25 using llvm::itanium_demangle::OutputBuffer;
26 using llvm::itanium_demangle::StringView;
27 
28 namespace {
29 
30 /// Demangle information structure.
31 struct Demangler {
32  /// Initialize the information structure we use to pass around information.
33  ///
34  /// \param Mangled String to demangle.
35  Demangler(const char *Mangled);
36 
37  /// Extract and demangle the mangled symbol and append it to the output
38  /// string.
39  ///
40  /// \param Demangled Output buffer to write the demangled name.
41  ///
42  /// \return The remaining string on success or nullptr on failure.
43  ///
44  /// \see https://dlang.org/spec/abi.html#name_mangling .
45  /// \see https://dlang.org/spec/abi.html#MangledName .
46  const char *parseMangle(OutputBuffer *Demangled);
47 
48 private:
49  /// Extract and demangle a given mangled symbol and append it to the output
50  /// string.
51  ///
52  /// \param Demangled output buffer to write the demangled name.
53  /// \param Mangled mangled symbol to be demangled.
54  ///
55  /// \return The remaining string on success or nullptr on failure.
56  ///
57  /// \see https://dlang.org/spec/abi.html#name_mangling .
58  /// \see https://dlang.org/spec/abi.html#MangledName .
59  const char *parseMangle(OutputBuffer *Demangled, const char *Mangled);
60 
61  /// Extract the number from a given string.
62  ///
63  /// \param Mangled string to extract the number.
64  /// \param Ret assigned result value.
65  ///
66  /// \return The remaining string on success or nullptr on failure.
67  ///
68  /// \note A result larger than UINT_MAX is considered a failure.
69  ///
70  /// \see https://dlang.org/spec/abi.html#Number .
71  const char *decodeNumber(const char *Mangled, unsigned long &Ret);
72 
73  /// Extract the back reference position from a given string.
74  ///
75  /// \param Mangled string to extract the back reference position.
76  /// \param Ret assigned result value.
77  ///
78  /// \return the remaining string on success or nullptr on failure.
79  ///
80  /// \note Ret is always >= 0 on success, and unspecified on failure
81  ///
82  /// \see https://dlang.org/spec/abi.html#back_ref .
83  /// \see https://dlang.org/spec/abi.html#NumberBackRef .
84  const char *decodeBackrefPos(const char *Mangled, long &Ret);
85 
86  /// Extract the symbol pointed by the back reference form a given string.
87  ///
88  /// \param Mangled string to extract the back reference position.
89  /// \param Ret assigned result value.
90  ///
91  /// \return the remaining string on success or nullptr on failure.
92  ///
93  /// \see https://dlang.org/spec/abi.html#back_ref .
94  const char *decodeBackref(const char *Mangled, const char *&Ret);
95 
96  /// Extract and demangle backreferenced symbol from a given mangled symbol
97  /// and append it to the output string.
98  ///
99  /// \param Demangled output buffer to write the demangled name.
100  /// \param Mangled mangled symbol to be demangled.
101  ///
102  /// \return the remaining string on success or nullptr on failure.
103  ///
104  /// \see https://dlang.org/spec/abi.html#back_ref .
105  /// \see https://dlang.org/spec/abi.html#IdentifierBackRef .
106  const char *parseSymbolBackref(OutputBuffer *Demangled, const char *Mangled);
107 
108  /// Extract and demangle backreferenced type from a given mangled symbol
109  /// and append it to the output string.
110  ///
111  /// \param Mangled mangled symbol to be demangled.
112  ///
113  /// \return the remaining string on success or nullptr on failure.
114  ///
115  /// \see https://dlang.org/spec/abi.html#back_ref .
116  /// \see https://dlang.org/spec/abi.html#TypeBackRef .
117  const char *parseTypeBackref(const char *Mangled);
118 
119  /// Check whether it is the beginning of a symbol name.
120  ///
121  /// \param Mangled string to extract the symbol name.
122  ///
123  /// \return true on success, false otherwise.
124  ///
125  /// \see https://dlang.org/spec/abi.html#SymbolName .
126  bool isSymbolName(const char *Mangled);
127 
128  /// Extract and demangle an identifier from a given mangled symbol append it
129  /// to the output string.
130  ///
131  /// \param Demangled Output buffer to write the demangled name.
132  /// \param Mangled Mangled symbol to be demangled.
133  ///
134  /// \return The remaining string on success or nullptr on failure.
135  ///
136  /// \see https://dlang.org/spec/abi.html#SymbolName .
137  const char *parseIdentifier(OutputBuffer *Demangled, const char *Mangled);
138 
139  /// Extract and demangle the plain identifier from a given mangled symbol and
140  /// prepend/append it to the output string, with a special treatment for some
141  /// magic compiler generated symbols.
142  ///
143  /// \param Demangled Output buffer to write the demangled name.
144  /// \param Mangled Mangled symbol to be demangled.
145  /// \param Len Length of the mangled symbol name.
146  ///
147  /// \return The remaining string on success or nullptr on failure.
148  ///
149  /// \see https://dlang.org/spec/abi.html#LName .
150  const char *parseLName(OutputBuffer *Demangled, const char *Mangled,
151  unsigned long Len);
152 
153  /// Extract and demangle the qualified symbol from a given mangled symbol
154  /// append it to the output string.
155  ///
156  /// \param Demangled Output buffer to write the demangled name.
157  /// \param Mangled Mangled symbol to be demangled.
158  ///
159  /// \return The remaining string on success or nullptr on failure.
160  ///
161  /// \see https://dlang.org/spec/abi.html#QualifiedName .
162  const char *parseQualified(OutputBuffer *Demangled, const char *Mangled);
163 
164  /// Extract and demangle a type from a given mangled symbol append it to
165  /// the output string.
166  ///
167  /// \param Mangled mangled symbol to be demangled.
168  ///
169  /// \return the remaining string on success or nullptr on failure.
170  ///
171  /// \see https://dlang.org/spec/abi.html#Type .
172  const char *parseType(const char *Mangled);
173 
174  /// The string we are demangling.
175  const char *Str;
176  /// The index of the last back reference.
177  int LastBackref;
178 };
179 
180 } // namespace
181 
182 const char *Demangler::decodeNumber(const char *Mangled, unsigned long &Ret) {
183  // Return nullptr if trying to extract something that isn't a digit.
184  if (Mangled == nullptr || !std::isdigit(*Mangled))
185  return nullptr;
186 
187  unsigned long Val = 0;
188 
189  do {
190  unsigned long Digit = Mangled[0] - '0';
191 
192  // Check for overflow.
193  if (Val > (std::numeric_limits<unsigned int>::max() - Digit) / 10)
194  return nullptr;
195 
196  Val = Val * 10 + Digit;
197  ++Mangled;
198  } while (std::isdigit(*Mangled));
199 
200  if (*Mangled == '\0')
201  return nullptr;
202 
203  Ret = Val;
204  return Mangled;
205 }
206 
207 const char *Demangler::decodeBackrefPos(const char *Mangled, long &Ret) {
208  // Return nullptr if trying to extract something that isn't a digit
209  if (Mangled == nullptr || !std::isalpha(*Mangled))
210  return nullptr;
211 
212  // Any identifier or non-basic type that has been emitted to the mangled
213  // symbol before will not be emitted again, but is referenced by a special
214  // sequence encoding the relative position of the original occurrence in the
215  // mangled symbol name.
216  // Numbers in back references are encoded with base 26 by upper case letters
217  // A-Z for higher digits but lower case letters a-z for the last digit.
218  // NumberBackRef:
219  // [a-z]
220  // [A-Z] NumberBackRef
221  // ^
222  unsigned long Val = 0;
223 
224  while (std::isalpha(*Mangled)) {
225  // Check for overflow
226  if (Val > (std::numeric_limits<unsigned long>::max() - 25) / 26)
227  break;
228 
229  Val *= 26;
230 
231  if (Mangled[0] >= 'a' && Mangled[0] <= 'z') {
232  Val += Mangled[0] - 'a';
233  if ((long)Val <= 0)
234  break;
235  Ret = Val;
236  return Mangled + 1;
237  }
238 
239  Val += Mangled[0] - 'A';
240  ++Mangled;
241  }
242 
243  return nullptr;
244 }
245 
246 const char *Demangler::decodeBackref(const char *Mangled, const char *&Ret) {
247  assert(Mangled != nullptr && *Mangled == 'Q' && "Invalid back reference!");
248  Ret = nullptr;
249 
250  // Position of 'Q'
251  const char *Qpos = Mangled;
252  long RefPos;
253  ++Mangled;
254 
255  Mangled = decodeBackrefPos(Mangled, RefPos);
256  if (Mangled == nullptr)
257  return nullptr;
258 
259  if (RefPos > Qpos - Str)
260  return nullptr;
261 
262  // Set the position of the back reference.
263  Ret = Qpos - RefPos;
264 
265  return Mangled;
266 }
267 
268 const char *Demangler::parseSymbolBackref(OutputBuffer *Demangled,
269  const char *Mangled) {
270  // An identifier back reference always points to a digit 0 to 9.
271  // IdentifierBackRef:
272  // Q NumberBackRef
273  // ^
274  const char *Backref;
275  unsigned long Len;
276 
277  // Get position of the back reference
278  Mangled = decodeBackref(Mangled, Backref);
279 
280  // Must point to a simple identifier
281  Backref = decodeNumber(Backref, Len);
282  if (Backref == nullptr || strlen(Backref) < Len)
283  return nullptr;
284 
285  Backref = parseLName(Demangled, Backref, Len);
286  if (Backref == nullptr)
287  return nullptr;
288 
289  return Mangled;
290 }
291 
292 const char *Demangler::parseTypeBackref(const char *Mangled) {
293  // A type back reference always points to a letter.
294  // TypeBackRef:
295  // Q NumberBackRef
296  // ^
297  const char *Backref;
298 
299  // If we appear to be moving backwards through the mangle string, then
300  // bail as this may be a recursive back reference.
301  if (Mangled - Str >= LastBackref)
302  return nullptr;
303 
304  int SaveRefPos = LastBackref;
305  LastBackref = Mangled - Str;
306 
307  // Get position of the back reference.
308  Mangled = decodeBackref(Mangled, Backref);
309 
310  // Can't decode back reference.
311  if (Backref == nullptr)
312  return nullptr;
313 
314  // TODO: Add support for function type back references.
315  Backref = parseType(Backref);
316 
317  LastBackref = SaveRefPos;
318 
319  if (Backref == nullptr)
320  return nullptr;
321 
322  return Mangled;
323 }
324 
325 bool Demangler::isSymbolName(const char *Mangled) {
326  long Ret;
327  const char *Qref = Mangled;
328 
329  if (std::isdigit(*Mangled))
330  return true;
331 
332  // TODO: Handle template instances.
333 
334  if (*Mangled != 'Q')
335  return false;
336 
337  Mangled = decodeBackrefPos(Mangled + 1, Ret);
338  if (Mangled == nullptr || Ret > Qref - Str)
339  return false;
340 
341  return std::isdigit(Qref[-Ret]);
342 }
343 
344 const char *Demangler::parseMangle(OutputBuffer *Demangled,
345  const char *Mangled) {
346  // A D mangled symbol is comprised of both scope and type information.
347  // MangleName:
348  // _D QualifiedName Type
349  // _D QualifiedName Z
350  // ^
351  // The caller should have guaranteed that the start pointer is at the
352  // above location.
353  // Note that type is never a function type, but only the return type of
354  // a function or the type of a variable.
355  Mangled += 2;
356 
357  Mangled = parseQualified(Demangled, Mangled);
358 
359  if (Mangled != nullptr) {
360  // Artificial symbols end with 'Z' and have no type.
361  if (*Mangled == 'Z')
362  ++Mangled;
363  else {
364  Mangled = parseType(Mangled);
365  }
366  }
367 
368  return Mangled;
369 }
370 
371 const char *Demangler::parseQualified(OutputBuffer *Demangled,
372  const char *Mangled) {
373  // Qualified names are identifiers separated by their encoded length.
374  // Nested functions also encode their argument types without specifying
375  // what they return.
376  // QualifiedName:
377  // SymbolFunctionName
378  // SymbolFunctionName QualifiedName
379  // ^
380  // SymbolFunctionName:
381  // SymbolName
382  // SymbolName TypeFunctionNoReturn
383  // SymbolName M TypeFunctionNoReturn
384  // SymbolName M TypeModifiers TypeFunctionNoReturn
385  // The start pointer should be at the above location.
386 
387  // Whether it has more than one symbol
388  size_t NotFirst = false;
389  do {
390  // Skip over anonymous symbols.
391  if (*Mangled == '0') {
392  do
393  ++Mangled;
394  while (*Mangled == '0');
395 
396  continue;
397  }
398 
399  if (NotFirst)
400  *Demangled << '.';
401  NotFirst = true;
402 
403  Mangled = parseIdentifier(Demangled, Mangled);
404 
405  } while (Mangled && isSymbolName(Mangled));
406 
407  return Mangled;
408 }
409 
410 const char *Demangler::parseIdentifier(OutputBuffer *Demangled,
411  const char *Mangled) {
412  unsigned long Len;
413 
414  if (Mangled == nullptr || *Mangled == '\0')
415  return nullptr;
416 
417  if (*Mangled == 'Q')
418  return parseSymbolBackref(Demangled, Mangled);
419 
420  // TODO: Parse lengthless template instances.
421 
422  const char *Endptr = decodeNumber(Mangled, Len);
423 
424  if (Endptr == nullptr || Len == 0)
425  return nullptr;
426 
427  if (strlen(Endptr) < Len)
428  return nullptr;
429 
430  Mangled = Endptr;
431 
432  // TODO: Parse template instances with a length prefix.
433 
434  // There can be multiple different declarations in the same function that
435  // have the same mangled name. To make the mangled names unique, a fake
436  // parent in the form `__Sddd' is added to the symbol.
437  if (Len >= 4 && Mangled[0] == '_' && Mangled[1] == '_' && Mangled[2] == 'S') {
438  const char *NumPtr = Mangled + 3;
439  while (NumPtr < (Mangled + Len) && std::isdigit(*NumPtr))
440  ++NumPtr;
441 
442  if (Mangled + Len == NumPtr) {
443  // Skip over the fake parent.
444  Mangled += Len;
445  return parseIdentifier(Demangled, Mangled);
446  }
447 
448  // Else demangle it as a plain identifier.
449  }
450 
451  return parseLName(Demangled, Mangled, Len);
452 }
453 
454 const char *Demangler::parseType(const char *Mangled) {
455  if (*Mangled == '\0')
456  return nullptr;
457 
458  switch (*Mangled) {
459  // TODO: Parse type qualifiers.
460  // TODO: Parse function types.
461  // TODO: Parse compound types.
462  // TODO: Parse delegate types.
463  // TODO: Parse tuple types.
464 
465  // Basic types.
466  case 'i':
467  ++Mangled;
468  // TODO: Add type name dumping
469  return Mangled;
470 
471  // TODO: Add support for the rest of the basic types.
472 
473  // Back referenced type.
474  case 'Q':
475  return parseTypeBackref(Mangled);
476 
477  default: // unhandled.
478  return nullptr;
479  }
480 }
481 
482 const char *Demangler::parseLName(OutputBuffer *Demangled, const char *Mangled,
483  unsigned long Len) {
484  switch (Len) {
485  case 6:
486  if (strncmp(Mangled, "__initZ", Len + 1) == 0) {
487  // The static initializer for a given symbol.
488  Demangled->prepend("initializer for ");
489  Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1);
490  Mangled += Len;
491  return Mangled;
492  }
493  if (strncmp(Mangled, "__vtblZ", Len + 1) == 0) {
494  // The vtable symbol for a given class.
495  Demangled->prepend("vtable for ");
496  Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1);
497  Mangled += Len;
498  return Mangled;
499  }
500  break;
501 
502  case 7:
503  if (strncmp(Mangled, "__ClassZ", Len + 1) == 0) {
504  // The classinfo symbol for a given class.
505  Demangled->prepend("ClassInfo for ");
506  Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1);
507  Mangled += Len;
508  return Mangled;
509  }
510  break;
511 
512  case 11:
513  if (strncmp(Mangled, "__InterfaceZ", Len + 1) == 0) {
514  // The interface symbol for a given class.
515  Demangled->prepend("Interface for ");
516  Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1);
517  Mangled += Len;
518  return Mangled;
519  }
520  break;
521 
522  case 12:
523  if (strncmp(Mangled, "__ModuleInfoZ", Len + 1) == 0) {
524  // The ModuleInfo symbol for a given module.
525  Demangled->prepend("ModuleInfo for ");
526  Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1);
527  Mangled += Len;
528  return Mangled;
529  }
530  break;
531  }
532 
533  *Demangled << StringView(Mangled, Len);
534  Mangled += Len;
535 
536  return Mangled;
537 }
538 
539 Demangler::Demangler(const char *Mangled)
540  : Str(Mangled), LastBackref(strlen(Mangled)) {}
541 
542 const char *Demangler::parseMangle(OutputBuffer *Demangled) {
543  return parseMangle(Demangled, this->Str);
544 }
545 
546 char *llvm::dlangDemangle(const char *MangledName) {
547  if (MangledName == nullptr || strncmp(MangledName, "_D", 2) != 0)
548  return nullptr;
549 
550  OutputBuffer Demangled;
551  if (!initializeOutputBuffer(nullptr, nullptr, Demangled, 1024))
552  return nullptr;
553 
554  if (strcmp(MangledName, "_Dmain") == 0) {
555  Demangled << "D main";
556  } else {
557 
558  Demangler D = Demangler(MangledName);
559  MangledName = D.parseMangle(&Demangled);
560 
561  // Check that the entire symbol was successfully demangled.
562  if (MangledName == nullptr || *MangledName != '\0') {
563  std::free(Demangled.getBuffer());
564  return nullptr;
565  }
566  }
567 
568  // OutputBuffer's internal buffer is not null terminated and therefore we need
569  // to add it to comply with C null terminated strings.
570  if (Demangled.getCurrentPosition() > 0) {
571  Demangled << '\0';
572  Demangled.setCurrentPosition(Demangled.getCurrentPosition() - 1);
573  return Demangled.getBuffer();
574  }
575 
576  std::free(Demangled.getBuffer());
577  return nullptr;
578 }
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
Utility.h
llvm::MipsISD::Ret
@ Ret
Definition: MipsISelLowering.h:119
Demangler
itanium_demangle::ManglingParser< DefaultAllocator > Demangler
Definition: ItaniumDemangle.cpp:366
llvm::parseType
Type * parseType(StringRef Asm, SMDiagnostic &Err, const Module &M, const SlotMapping *Slots=nullptr)
Parse a type in the given string.
Definition: Parser.cpp:192
StringView.h
D
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Demangle.h
llvm::dlangDemangle
char * dlangDemangle(const char *MangledName)
Definition: DLangDemangle.cpp:546
initializeOutputBuffer
bool initializeOutputBuffer(char *Buf, size_t *N, OutputBuffer &OB, size_t InitSize)
Definition: Utility.h:201
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340