1//===--- RustDemangle.cpp ---------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines a demangler for Rust v0 mangled symbols as specified in
10// https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/Demangle/Demangle.h"
15#include "llvm/Demangle/StringViewExtras.h"
16#include "llvm/Demangle/Utility.h"
17
18#include <algorithm>
19#include <cassert>
20#include <cstdint>
21#include <cstring>
22#include <limits>
23#include <string_view>
24
25using namespace llvm;
26
27using llvm::itanium_demangle::OutputBuffer;
28using llvm::itanium_demangle::ScopedOverride;
29using llvm::itanium_demangle::starts_with;
30
31namespace {
32
33struct Identifier {
34  std::string_view Name;
35  bool Punycode;
36
37  bool empty() const { return Name.empty(); }
38};
39
40enum class BasicType {
41  Bool,
42  Char,
43  I8,
44  I16,
45  I32,
46  I64,
47  I128,
48  ISize,
49  U8,
50  U16,
51  U32,
52  U64,
53  U128,
54  USize,
55  F32,
56  F64,
57  Str,
58  Placeholder,
59  Unit,
60  Variadic,
61  Never,
62};
63
64enum class IsInType {
65  No,
66  Yes,
67};
68
69enum class LeaveGenericsOpen {
70  No,
71  Yes,
72};
73
74class Demangler {
75  // Maximum recursion level. Used to avoid stack overflow.
76  size_t MaxRecursionLevel;
77  // Current recursion level.
78  size_t RecursionLevel;
79  size_t BoundLifetimes;
80  // Input string that is being demangled with "_R" prefix removed.
81  std::string_view Input;
82  // Position in the input string.
83  size_t Position;
84  // When true, print methods append the output to the stream.
85  // When false, the output is suppressed.
86  bool Print;
87  // True if an error occurred.
88  bool Error;
89
90public:
91  // Demangled output.
92  OutputBuffer Output;
93
94  Demangler(size_t MaxRecursionLevel = 500);
95
96  bool demangle(std::string_view MangledName);
97
98private:
99  bool demanglePath(IsInType Type,
100                    LeaveGenericsOpen LeaveOpen = LeaveGenericsOpen::No);
101  void demangleImplPath(IsInType InType);
102  void demangleGenericArg();
103  void demangleType();
104  void demangleFnSig();
105  void demangleDynBounds();
106  void demangleDynTrait();
107  void demangleOptionalBinder();
108  void demangleConst();
109  void demangleConstInt();
110  void demangleConstBool();
111  void demangleConstChar();
112
113  template <typename Callable> void demangleBackref(Callable Demangler) {
114    uint64_t Backref = parseBase62Number();
115    if (Error || Backref >= Position) {
116      Error = true;
117      return;
118    }
119
120    if (!Print)
121      return;
122
123    ScopedOverride<size_t> SavePosition(Position, Position);
124    Position = Backref;
125    Demangler();
126  }
127
128  Identifier parseIdentifier();
129  uint64_t parseOptionalBase62Number(char Tag);
130  uint64_t parseBase62Number();
131  uint64_t parseDecimalNumber();
132  uint64_t parseHexNumber(std::string_view &HexDigits);
133
134  void print(char C);
135  void print(std::string_view S);
136  void printDecimalNumber(uint64_t N);
137  void printBasicType(BasicType);
138  void printLifetime(uint64_t Index);
139  void printIdentifier(Identifier Ident);
140
141  char look() const;
142  char consume();
143  bool consumeIf(char Prefix);
144
145  bool addAssign(uint64_t &A, uint64_t B);
146  bool mulAssign(uint64_t &A, uint64_t B);
147};
148
149} // namespace
150
151char *llvm::rustDemangle(std::string_view MangledName) {
152  // Return early if mangled name doesn't look like a Rust symbol.
153  if (MangledName.empty() || !starts_with(MangledName, "_R"))
154    return nullptr;
155
156  Demangler D;
157  if (!D.demangle(MangledName)) {
158    std::free(D.Output.getBuffer());
159    return nullptr;
160  }
161
162  D.Output += '\0';
163
164  return D.Output.getBuffer();
165}
166
167Demangler::Demangler(size_t MaxRecursionLevel)
168    : MaxRecursionLevel(MaxRecursionLevel) {}
169
170static inline bool isDigit(const char C) { return '0' <= C && C <= '9'; }
171
172static inline bool isHexDigit(const char C) {
173  return ('0' <= C && C <= '9') || ('a' <= C && C <= 'f');
174}
175
176static inline bool isLower(const char C) { return 'a' <= C && C <= 'z'; }
177
178static inline bool isUpper(const char C) { return 'A' <= C && C <= 'Z'; }
179
180/// Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
181static inline bool isValid(const char C) {
182  return isDigit(C) || isLower(C) || isUpper(C) || C == '_';
183}
184
185// Demangles Rust v0 mangled symbol. Returns true when successful, and false
186// otherwise. The demangled symbol is stored in Output field. It is
187// responsibility of the caller to free the memory behind the output stream.
188//
189// <symbol-name> = "_R" <path> [<instantiating-crate>]
190bool Demangler::demangle(std::string_view Mangled) {
191  Position = 0;
192  Error = false;
193  Print = true;
194  RecursionLevel = 0;
195  BoundLifetimes = 0;
196
197  if (!starts_with(Mangled, "_R")) {
198    Error = true;
199    return false;
200  }
201  Mangled.remove_prefix(2);
202  size_t Dot = Mangled.find('.');
203  Input = Dot == std::string_view::npos ? Mangled : Mangled.substr(0, Dot);
204
205  demanglePath(IsInType::No);
206
207  if (Position != Input.size()) {
208    ScopedOverride<bool> SavePrint(Print, false);
209    demanglePath(IsInType::No);
210  }
211
212  if (Position != Input.size())
213    Error = true;
214
215  if (Dot != std::string_view::npos) {
216    print(" (");
217    print(Mangled.substr(Dot));
218    print(")");
219  }
220
221  return !Error;
222}
223
224// Demangles a path. InType indicates whether a path is inside a type. When
225// LeaveOpen is true, a closing `>` after generic arguments is omitted from the
226// output. Return value indicates whether generics arguments have been left
227// open.
228//
229// <path> = "C" <identifier>               // crate root
230//        | "M" <impl-path> <type>         // <T> (inherent impl)
231//        | "X" <impl-path> <type> <path>  // <T as Trait> (trait impl)
232//        | "Y" <type> <path>              // <T as Trait> (trait definition)
233//        | "N" <ns> <path> <identifier>   // ...::ident (nested path)
234//        | "I" <path> {<generic-arg>} "E" // ...<T, U> (generic args)
235//        | <backref>
236// <identifier> = [<disambiguator>] <undisambiguated-identifier>
237// <ns> = "C"      // closure
238//      | "S"      // shim
239//      | <A-Z>    // other special namespaces
240//      | <a-z>    // internal namespaces
241bool Demangler::demanglePath(IsInType InType, LeaveGenericsOpen LeaveOpen) {
242  if (Error || RecursionLevel >= MaxRecursionLevel) {
243    Error = true;
244    return false;
245  }
246  ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
247
248  switch (consume()) {
249  case 'C': {
250    parseOptionalBase62Number('s');
251    printIdentifier(parseIdentifier());
252    break;
253  }
254  case 'M': {
255    demangleImplPath(InType);
256    print("<");
257    demangleType();
258    print(">");
259    break;
260  }
261  case 'X': {
262    demangleImplPath(InType);
263    print("<");
264    demangleType();
265    print(" as ");
266    demanglePath(IsInType::Yes);
267    print(">");
268    break;
269  }
270  case 'Y': {
271    print("<");
272    demangleType();
273    print(" as ");
274    demanglePath(IsInType::Yes);
275    print(">");
276    break;
277  }
278  case 'N': {
279    char NS = consume();
280    if (!isLower(NS) && !isUpper(NS)) {
281      Error = true;
282      break;
283    }
284    demanglePath(InType);
285
286    uint64_t Disambiguator = parseOptionalBase62Number('s');
287    Identifier Ident = parseIdentifier();
288
289    if (isUpper(NS)) {
290      // Special namespaces
291      print("::{");
292      if (NS == 'C')
293        print("closure");
294      else if (NS == 'S')
295        print("shim");
296      else
297        print(NS);
298      if (!Ident.empty()) {
299        print(":");
300        printIdentifier(Ident);
301      }
302      print('#');
303      printDecimalNumber(Disambiguator);
304      print('}');
305    } else {
306      // Implementation internal namespaces.
307      if (!Ident.empty()) {
308        print("::");
309        printIdentifier(Ident);
310      }
311    }
312    break;
313  }
314  case 'I': {
315    demanglePath(InType);
316    // Omit "::" when in a type, where it is optional.
317    if (InType == IsInType::No)
318      print("::");
319    print("<");
320    for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
321      if (I > 0)
322        print(", ");
323      demangleGenericArg();
324    }
325    if (LeaveOpen == LeaveGenericsOpen::Yes)
326      return true;
327    else
328      print(">");
329    break;
330  }
331  case 'B': {
332    bool IsOpen = false;
333    demangleBackref([&] { IsOpen = demanglePath(InType, LeaveOpen); });
334    return IsOpen;
335  }
336  default:
337    Error = true;
338    break;
339  }
340
341  return false;
342}
343
344// <impl-path> = [<disambiguator>] <path>
345// <disambiguator> = "s" <base-62-number>
346void Demangler::demangleImplPath(IsInType InType) {
347  ScopedOverride<bool> SavePrint(Print, false);
348  parseOptionalBase62Number('s');
349  demanglePath(InType);
350}
351
352// <generic-arg> = <lifetime>
353//               | <type>
354//               | "K" <const>
355// <lifetime> = "L" <base-62-number>
356void Demangler::demangleGenericArg() {
357  if (consumeIf('L'))
358    printLifetime(parseBase62Number());
359  else if (consumeIf('K'))
360    demangleConst();
361  else
362    demangleType();
363}
364
365// <basic-type> = "a"      // i8
366//              | "b"      // bool
367//              | "c"      // char
368//              | "d"      // f64
369//              | "e"      // str
370//              | "f"      // f32
371//              | "h"      // u8
372//              | "i"      // isize
373//              | "j"      // usize
374//              | "l"      // i32
375//              | "m"      // u32
376//              | "n"      // i128
377//              | "o"      // u128
378//              | "s"      // i16
379//              | "t"      // u16
380//              | "u"      // ()
381//              | "v"      // ...
382//              | "x"      // i64
383//              | "y"      // u64
384//              | "z"      // !
385//              | "p"      // placeholder (e.g. for generic params), shown as _
386static bool parseBasicType(char C, BasicType &Type) {
387  switch (C) {
388  case 'a':
389    Type = BasicType::I8;
390    return true;
391  case 'b':
392    Type = BasicType::Bool;
393    return true;
394  case 'c':
395    Type = BasicType::Char;
396    return true;
397  case 'd':
398    Type = BasicType::F64;
399    return true;
400  case 'e':
401    Type = BasicType::Str;
402    return true;
403  case 'f':
404    Type = BasicType::F32;
405    return true;
406  case 'h':
407    Type = BasicType::U8;
408    return true;
409  case 'i':
410    Type = BasicType::ISize;
411    return true;
412  case 'j':
413    Type = BasicType::USize;
414    return true;
415  case 'l':
416    Type = BasicType::I32;
417    return true;
418  case 'm':
419    Type = BasicType::U32;
420    return true;
421  case 'n':
422    Type = BasicType::I128;
423    return true;
424  case 'o':
425    Type = BasicType::U128;
426    return true;
427  case 'p':
428    Type = BasicType::Placeholder;
429    return true;
430  case 's':
431    Type = BasicType::I16;
432    return true;
433  case 't':
434    Type = BasicType::U16;
435    return true;
436  case 'u':
437    Type = BasicType::Unit;
438    return true;
439  case 'v':
440    Type = BasicType::Variadic;
441    return true;
442  case 'x':
443    Type = BasicType::I64;
444    return true;
445  case 'y':
446    Type = BasicType::U64;
447    return true;
448  case 'z':
449    Type = BasicType::Never;
450    return true;
451  default:
452    return false;
453  }
454}
455
456void Demangler::printBasicType(BasicType Type) {
457  switch (Type) {
458  case BasicType::Bool:
459    print("bool");
460    break;
461  case BasicType::Char:
462    print("char");
463    break;
464  case BasicType::I8:
465    print("i8");
466    break;
467  case BasicType::I16:
468    print("i16");
469    break;
470  case BasicType::I32:
471    print("i32");
472    break;
473  case BasicType::I64:
474    print("i64");
475    break;
476  case BasicType::I128:
477    print("i128");
478    break;
479  case BasicType::ISize:
480    print("isize");
481    break;
482  case BasicType::U8:
483    print("u8");
484    break;
485  case BasicType::U16:
486    print("u16");
487    break;
488  case BasicType::U32:
489    print("u32");
490    break;
491  case BasicType::U64:
492    print("u64");
493    break;
494  case BasicType::U128:
495    print("u128");
496    break;
497  case BasicType::USize:
498    print("usize");
499    break;
500  case BasicType::F32:
501    print("f32");
502    break;
503  case BasicType::F64:
504    print("f64");
505    break;
506  case BasicType::Str:
507    print("str");
508    break;
509  case BasicType::Placeholder:
510    print("_");
511    break;
512  case BasicType::Unit:
513    print("()");
514    break;
515  case BasicType::Variadic:
516    print("...");
517    break;
518  case BasicType::Never:
519    print("!");
520    break;
521  }
522}
523
524// <type> = | <basic-type>
525//          | <path>                      // named type
526//          | "A" <type> <const>          // [T; N]
527//          | "S" <type>                  // [T]
528//          | "T" {<type>} "E"            // (T1, T2, T3, ...)
529//          | "R" [<lifetime>] <type>     // &T
530//          | "Q" [<lifetime>] <type>     // &mut T
531//          | "P" <type>                  // *const T
532//          | "O" <type>                  // *mut T
533//          | "F" <fn-sig>                // fn(...) -> ...
534//          | "D" <dyn-bounds> <lifetime> // dyn Trait<Assoc = X> + Send + 'a
535//          | <backref>                   // backref
536void Demangler::demangleType() {
537  if (Error || RecursionLevel >= MaxRecursionLevel) {
538    Error = true;
539    return;
540  }
541  ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
542
543  size_t Start = Position;
544  char C = consume();
545  BasicType Type;
546  if (parseBasicType(C, Type))
547    return printBasicType(Type);
548
549  switch (C) {
550  case 'A':
551    print("[");
552    demangleType();
553    print("; ");
554    demangleConst();
555    print("]");
556    break;
557  case 'S':
558    print("[");
559    demangleType();
560    print("]");
561    break;
562  case 'T': {
563    print("(");
564    size_t I = 0;
565    for (; !Error && !consumeIf('E'); ++I) {
566      if (I > 0)
567        print(", ");
568      demangleType();
569    }
570    if (I == 1)
571      print(",");
572    print(")");
573    break;
574  }
575  case 'R':
576  case 'Q':
577    print('&');
578    if (consumeIf('L')) {
579      if (auto Lifetime = parseBase62Number()) {
580        printLifetime(Lifetime);
581        print(' ');
582      }
583    }
584    if (C == 'Q')
585      print("mut ");
586    demangleType();
587    break;
588  case 'P':
589    print("*const ");
590    demangleType();
591    break;
592  case 'O':
593    print("*mut ");
594    demangleType();
595    break;
596  case 'F':
597    demangleFnSig();
598    break;
599  case 'D':
600    demangleDynBounds();
601    if (consumeIf('L')) {
602      if (auto Lifetime = parseBase62Number()) {
603        print(" + ");
604        printLifetime(Lifetime);
605      }
606    } else {
607      Error = true;
608    }
609    break;
610  case 'B':
611    demangleBackref([&] { demangleType(); });
612    break;
613  default:
614    Position = Start;
615    demanglePath(IsInType::Yes);
616    break;
617  }
618}
619
620// <fn-sig> := [<binder>] ["U"] ["K" <abi>] {<type>} "E" <type>
621// <abi> = "C"
622//       | <undisambiguated-identifier>
623void Demangler::demangleFnSig() {
624  ScopedOverride<size_t> SaveBoundLifetimes(BoundLifetimes, BoundLifetimes);
625  demangleOptionalBinder();
626
627  if (consumeIf('U'))
628    print("unsafe ");
629
630  if (consumeIf('K')) {
631    print("extern \"");
632    if (consumeIf('C')) {
633      print("C");
634    } else {
635      Identifier Ident = parseIdentifier();
636      if (Ident.Punycode)
637        Error = true;
638      for (char C : Ident.Name) {
639        // When mangling ABI string, the "-" is replaced with "_".
640        if (C == '_')
641          C = '-';
642        print(C);
643      }
644    }
645    print("\" ");
646  }
647
648  print("fn(");
649  for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
650    if (I > 0)
651      print(", ");
652    demangleType();
653  }
654  print(")");
655
656  if (consumeIf('u')) {
657    // Skip the unit type from the output.
658  } else {
659    print(" -> ");
660    demangleType();
661  }
662}
663
664// <dyn-bounds> = [<binder>] {<dyn-trait>} "E"
665void Demangler::demangleDynBounds() {
666  ScopedOverride<size_t> SaveBoundLifetimes(BoundLifetimes, BoundLifetimes);
667  print("dyn ");
668  demangleOptionalBinder();
669  for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
670    if (I > 0)
671      print(" + ");
672    demangleDynTrait();
673  }
674}
675
676// <dyn-trait> = <path> {<dyn-trait-assoc-binding>}
677// <dyn-trait-assoc-binding> = "p" <undisambiguated-identifier> <type>
678void Demangler::demangleDynTrait() {
679  bool IsOpen = demanglePath(IsInType::Yes, LeaveGenericsOpen::Yes);
680  while (!Error && consumeIf('p')) {
681    if (!IsOpen) {
682      IsOpen = true;
683      print('<');
684    } else {
685      print(", ");
686    }
687    print(parseIdentifier().Name);
688    print(" = ");
689    demangleType();
690  }
691  if (IsOpen)
692    print(">");
693}
694
695// Demangles optional binder and updates the number of bound lifetimes.
696//
697// <binder> = "G" <base-62-number>
698void Demangler::demangleOptionalBinder() {
699  uint64_t Binder = parseOptionalBase62Number('G');
700  if (Error || Binder == 0)
701    return;
702
703  // In valid inputs each bound lifetime is referenced later. Referencing a
704  // lifetime requires at least one byte of input. Reject inputs that are too
705  // short to reference all bound lifetimes. Otherwise demangling of invalid
706  // binders could generate excessive amounts of output.
707  if (Binder >= Input.size() - BoundLifetimes) {
708    Error = true;
709    return;
710  }
711
712  print("for<");
713  for (size_t I = 0; I != Binder; ++I) {
714    BoundLifetimes += 1;
715    if (I > 0)
716      print(", ");
717    printLifetime(1);
718  }
719  print("> ");
720}
721
722// <const> = <basic-type> <const-data>
723//         | "p"                          // placeholder
724//         | <backref>
725void Demangler::demangleConst() {
726  if (Error || RecursionLevel >= MaxRecursionLevel) {
727    Error = true;
728    return;
729  }
730  ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
731
732  char C = consume();
733  BasicType Type;
734  if (parseBasicType(C, Type)) {
735    switch (Type) {
736    case BasicType::I8:
737    case BasicType::I16:
738    case BasicType::I32:
739    case BasicType::I64:
740    case BasicType::I128:
741    case BasicType::ISize:
742    case BasicType::U8:
743    case BasicType::U16:
744    case BasicType::U32:
745    case BasicType::U64:
746    case BasicType::U128:
747    case BasicType::USize:
748      demangleConstInt();
749      break;
750    case BasicType::Bool:
751      demangleConstBool();
752      break;
753    case BasicType::Char:
754      demangleConstChar();
755      break;
756    case BasicType::Placeholder:
757      print('_');
758      break;
759    default:
760      Error = true;
761      break;
762    }
763  } else if (C == 'B') {
764    demangleBackref([&] { demangleConst(); });
765  } else {
766    Error = true;
767  }
768}
769
770// <const-data> = ["n"] <hex-number>
771void Demangler::demangleConstInt() {
772  if (consumeIf('n'))
773    print('-');
774
775  std::string_view HexDigits;
776  uint64_t Value = parseHexNumber(HexDigits);
777  if (HexDigits.size() <= 16) {
778    printDecimalNumber(Value);
779  } else {
780    print("0x");
781    print(HexDigits);
782  }
783}
784
785// <const-data> = "0_" // false
786//              | "1_" // true
787void Demangler::demangleConstBool() {
788  std::string_view HexDigits;
789  parseHexNumber(HexDigits);
790  if (HexDigits == "0")
791    print("false");
792  else if (HexDigits == "1")
793    print("true");
794  else
795    Error = true;
796}
797
798/// Returns true if CodePoint represents a printable ASCII character.
799static bool isAsciiPrintable(uint64_t CodePoint) {
800  return 0x20 <= CodePoint && CodePoint <= 0x7e;
801}
802
803// <const-data> = <hex-number>
804void Demangler::demangleConstChar() {
805  std::string_view HexDigits;
806  uint64_t CodePoint = parseHexNumber(HexDigits);
807  if (Error || HexDigits.size() > 6) {
808    Error = true;
809    return;
810  }
811
812  print("'");
813  switch (CodePoint) {
814  case '\t':
815    print(R"(\t)");
816    break;
817  case '\r':
818    print(R"(\r)");
819    break;
820  case '\n':
821    print(R"(\n)");
822    break;
823  case '\\':
824    print(R"(\\)");
825    break;
826  case '"':
827    print(R"(")");
828    break;
829  case '\'':
830    print(R"(\')");
831    break;
832  default:
833    if (isAsciiPrintable(CodePoint)) {
834      char C = CodePoint;
835      print(C);
836    } else {
837      print(R"(\u{)");
838      print(HexDigits);
839      print('}');
840    }
841    break;
842  }
843  print('\'');
844}
845
846// <undisambiguated-identifier> = ["u"] <decimal-number> ["_"] <bytes>
847Identifier Demangler::parseIdentifier() {
848  bool Punycode = consumeIf('u');
849  uint64_t Bytes = parseDecimalNumber();
850
851  // Underscore resolves the ambiguity when identifier starts with a decimal
852  // digit or another underscore.
853  consumeIf('_');
854
855  if (Error || Bytes > Input.size() - Position) {
856    Error = true;
857    return {};
858  }
859  std::string_view S = Input.substr(Position, Bytes);
860  Position += Bytes;
861
862  if (!std::all_of(S.begin(), S.end(), isValid)) {
863    Error = true;
864    return {};
865  }
866
867  return {S, Punycode};
868}
869
870// Parses optional base 62 number. The presence of a number is determined using
871// Tag. Returns 0 when tag is absent and parsed value + 1 otherwise
872//
873// This function is indended for parsing disambiguators and binders which when
874// not present have their value interpreted as 0, and otherwise as decoded
875// value + 1. For example for binders, value for "G_" is 1, for "G0_" value is
876// 2. When "G" is absent value is 0.
877uint64_t Demangler::parseOptionalBase62Number(char Tag) {
878  if (!consumeIf(Tag))
879    return 0;
880
881  uint64_t N = parseBase62Number();
882  if (Error || !addAssign(N, 1))
883    return 0;
884
885  return N;
886}
887
888// Parses base 62 number with <0-9a-zA-Z> as digits. Number is terminated by
889// "_". All values are offset by 1, so that "_" encodes 0, "0_" encodes 1,
890// "1_" encodes 2, etc.
891//
892// <base-62-number> = {<0-9a-zA-Z>} "_"
893uint64_t Demangler::parseBase62Number() {
894  if (consumeIf('_'))
895    return 0;
896
897  uint64_t Value = 0;
898
899  while (true) {
900    uint64_t Digit;
901    char C = consume();
902
903    if (C == '_') {
904      break;
905    } else if (isDigit(C)) {
906      Digit = C - '0';
907    } else if (isLower(C)) {
908      Digit = 10 + (C - 'a');
909    } else if (isUpper(C)) {
910      Digit = 10 + 26 + (C - 'A');
911    } else {
912      Error = true;
913      return 0;
914    }
915
916    if (!mulAssign(Value, 62))
917      return 0;
918
919    if (!addAssign(Value, Digit))
920      return 0;
921  }
922
923  if (!addAssign(Value, 1))
924    return 0;
925
926  return Value;
927}
928
929// Parses a decimal number that had been encoded without any leading zeros.
930//
931// <decimal-number> = "0"
932//                  | <1-9> {<0-9>}
933uint64_t Demangler::parseDecimalNumber() {
934  char C = look();
935  if (!isDigit(C)) {
936    Error = true;
937    return 0;
938  }
939
940  if (C == '0') {
941    consume();
942    return 0;
943  }
944
945  uint64_t Value = 0;
946
947  while (isDigit(look())) {
948    if (!mulAssign(Value, 10)) {
949      Error = true;
950      return 0;
951    }
952
953    uint64_t D = consume() - '0';
954    if (!addAssign(Value, D))
955      return 0;
956  }
957
958  return Value;
959}
960
961// Parses a hexadecimal number with <0-9a-f> as a digits. Returns the parsed
962// value and stores hex digits in HexDigits. The return value is unspecified if
963// HexDigits.size() > 16.
964//
965// <hex-number> = "0_"
966//              | <1-9a-f> {<0-9a-f>} "_"
967uint64_t Demangler::parseHexNumber(std::string_view &HexDigits) {
968  size_t Start = Position;
969  uint64_t Value = 0;
970
971  if (!isHexDigit(look()))
972    Error = true;
973
974  if (consumeIf('0')) {
975    if (!consumeIf('_'))
976      Error = true;
977  } else {
978    while (!Error && !consumeIf('_')) {
979      char C = consume();
980      Value *= 16;
981      if (isDigit(C))
982        Value += C - '0';
983      else if ('a' <= C && C <= 'f')
984        Value += 10 + (C - 'a');
985      else
986        Error = true;
987    }
988  }
989
990  if (Error) {
991    HexDigits = std::string_view();
992    return 0;
993  }
994
995  size_t End = Position - 1;
996  assert(Start < End);
997  HexDigits = Input.substr(Start, End - Start);
998  return Value;
999}
1000
1001void Demangler::print(char C) {
1002  if (Error || !Print)
1003    return;
1004
1005  Output += C;
1006}
1007
1008void Demangler::print(std::string_view S) {
1009  if (Error || !Print)
1010    return;
1011
1012  Output += S;
1013}
1014
1015void Demangler::printDecimalNumber(uint64_t N) {
1016  if (Error || !Print)
1017    return;
1018
1019  Output << N;
1020}
1021
1022// Prints a lifetime. An index 0 always represents an erased lifetime. Indices
1023// starting from 1, are De Bruijn indices, referring to higher-ranked lifetimes
1024// bound by one of the enclosing binders.
1025void Demangler::printLifetime(uint64_t Index) {
1026  if (Index == 0) {
1027    print("'_");
1028    return;
1029  }
1030
1031  if (Index - 1 >= BoundLifetimes) {
1032    Error = true;
1033    return;
1034  }
1035
1036  uint64_t Depth = BoundLifetimes - Index;
1037  print('\'');
1038  if (Depth < 26) {
1039    char C = 'a' + Depth;
1040    print(C);
1041  } else {
1042    print('z');
1043    printDecimalNumber(Depth - 26 + 1);
1044  }
1045}
1046
1047static inline bool decodePunycodeDigit(char C, size_t &Value) {
1048  if (isLower(C)) {
1049    Value = C - 'a';
1050    return true;
1051  }
1052
1053  if (isDigit(C)) {
1054    Value = 26 + (C - '0');
1055    return true;
1056  }
1057
1058  return false;
1059}
1060
1061static void removeNullBytes(OutputBuffer &Output, size_t StartIdx) {
1062  char *Buffer = Output.getBuffer();
1063  char *Start = Buffer + StartIdx;
1064  char *End = Buffer + Output.getCurrentPosition();
1065  Output.setCurrentPosition(std::remove(Start, End, '\0') - Buffer);
1066}
1067
1068// Encodes code point as UTF-8 and stores results in Output. Returns false if
1069// CodePoint is not a valid unicode scalar value.
1070static inline bool encodeUTF8(size_t CodePoint, char *Output) {
1071  if (0xD800 <= CodePoint && CodePoint <= 0xDFFF)
1072    return false;
1073
1074  if (CodePoint <= 0x7F) {
1075    Output[0] = CodePoint;
1076    return true;
1077  }
1078
1079  if (CodePoint <= 0x7FF) {
1080    Output[0] = 0xC0 | ((CodePoint >> 6) & 0x3F);
1081    Output[1] = 0x80 | (CodePoint & 0x3F);
1082    return true;
1083  }
1084
1085  if (CodePoint <= 0xFFFF) {
1086    Output[0] = 0xE0 | (CodePoint >> 12);
1087    Output[1] = 0x80 | ((CodePoint >> 6) & 0x3F);
1088    Output[2] = 0x80 | (CodePoint & 0x3F);
1089    return true;
1090  }
1091
1092  if (CodePoint <= 0x10FFFF) {
1093    Output[0] = 0xF0 | (CodePoint >> 18);
1094    Output[1] = 0x80 | ((CodePoint >> 12) & 0x3F);
1095    Output[2] = 0x80 | ((CodePoint >> 6) & 0x3F);
1096    Output[3] = 0x80 | (CodePoint & 0x3F);
1097    return true;
1098  }
1099
1100  return false;
1101}
1102
1103// Decodes string encoded using punycode and appends results to Output.
1104// Returns true if decoding was successful.
1105static bool decodePunycode(std::string_view Input, OutputBuffer &Output) {
1106  size_t OutputSize = Output.getCurrentPosition();
1107  size_t InputIdx = 0;
1108
1109  // Rust uses an underscore as a delimiter.
1110  size_t DelimiterPos = std::string_view::npos;
1111  for (size_t I = 0; I != Input.size(); ++I)
1112    if (Input[I] == '_')
1113      DelimiterPos = I;
1114
1115  if (DelimiterPos != std::string_view::npos) {
1116    // Copy basic code points before the last delimiter to the output.
1117    for (; InputIdx != DelimiterPos; ++InputIdx) {
1118      char C = Input[InputIdx];
1119      if (!isValid(C))
1120        return false;
1121      // Code points are padded with zeros while decoding is in progress.
1122      char UTF8[4] = {C};
1123      Output += std::string_view(UTF8, 4);
1124    }
1125    // Skip over the delimiter.
1126    ++InputIdx;
1127  }
1128
1129  size_t Base = 36;
1130  size_t Skew = 38;
1131  size_t Bias = 72;
1132  size_t N = 0x80;
1133  size_t TMin = 1;
1134  size_t TMax = 26;
1135  size_t Damp = 700;
1136
1137  auto Adapt = [&](size_t Delta, size_t NumPoints) {
1138    Delta /= Damp;
1139    Delta += Delta / NumPoints;
1140    Damp = 2;
1141
1142    size_t K = 0;
1143    while (Delta > (Base - TMin) * TMax / 2) {
1144      Delta /= Base - TMin;
1145      K += Base;
1146    }
1147    return K + (((Base - TMin + 1) * Delta) / (Delta + Skew));
1148  };
1149
1150  // Main decoding loop.
1151  for (size_t I = 0; InputIdx != Input.size(); I += 1) {
1152    size_t OldI = I;
1153    size_t W = 1;
1154    size_t Max = std::numeric_limits<size_t>::max();
1155    for (size_t K = Base; true; K += Base) {
1156      if (InputIdx == Input.size())
1157        return false;
1158      char C = Input[InputIdx++];
1159      size_t Digit = 0;
1160      if (!decodePunycodeDigit(C, Digit))
1161        return false;
1162
1163      if (Digit > (Max - I) / W)
1164        return false;
1165      I += Digit * W;
1166
1167      size_t T;
1168      if (K <= Bias)
1169        T = TMin;
1170      else if (K >= Bias + TMax)
1171        T = TMax;
1172      else
1173        T = K - Bias;
1174
1175      if (Digit < T)
1176        break;
1177
1178      if (W > Max / (Base - T))
1179        return false;
1180      W *= (Base - T);
1181    }
1182    size_t NumPoints = (Output.getCurrentPosition() - OutputSize) / 4 + 1;
1183    Bias = Adapt(I - OldI, NumPoints);
1184
1185    if (I / NumPoints > Max - N)
1186      return false;
1187    N += I / NumPoints;
1188    I = I % NumPoints;
1189
1190    // Insert N at position I in the output.
1191    char UTF8[4] = {};
1192    if (!encodeUTF8(N, UTF8))
1193      return false;
1194    Output.insert(OutputSize + I * 4, UTF8, 4);
1195  }
1196
1197  removeNullBytes(Output, OutputSize);
1198  return true;
1199}
1200
1201void Demangler::printIdentifier(Identifier Ident) {
1202  if (Error || !Print)
1203    return;
1204
1205  if (Ident.Punycode) {
1206    if (!decodePunycode(Ident.Name, Output))
1207      Error = true;
1208  } else {
1209    print(Ident.Name);
1210  }
1211}
1212
1213char Demangler::look() const {
1214  if (Error || Position >= Input.size())
1215    return 0;
1216
1217  return Input[Position];
1218}
1219
1220char Demangler::consume() {
1221  if (Error || Position >= Input.size()) {
1222    Error = true;
1223    return 0;
1224  }
1225
1226  return Input[Position++];
1227}
1228
1229bool Demangler::consumeIf(char Prefix) {
1230  if (Error || Position >= Input.size() || Input[Position] != Prefix)
1231    return false;
1232
1233  Position += 1;
1234  return true;
1235}
1236
1237/// Computes A + B. When computation wraps around sets the error and returns
1238/// false. Otherwise assigns the result to A and returns true.
1239bool Demangler::addAssign(uint64_t &A, uint64_t B) {
1240  if (A > std::numeric_limits<uint64_t>::max() - B) {
1241    Error = true;
1242    return false;
1243  }
1244
1245  A += B;
1246  return true;
1247}
1248
1249/// Computes A * B. When computation wraps around sets the error and returns
1250/// false. Otherwise assigns the result to A and returns true.
1251bool Demangler::mulAssign(uint64_t &A, uint64_t B) {
1252  if (B != 0 && A > std::numeric_limits<uint64_t>::max() / B) {
1253    Error = true;
1254    return false;
1255  }
1256
1257  A *= B;
1258  return true;
1259}
1260