1//===--- RustDemangle.cpp ---------------------------------------*- C++ -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file defines a demangler for Rust v0 mangled symbols as specified in 10// https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html 11// 12//===----------------------------------------------------------------------===// 13 14#include "llvm/Demangle/Demangle.h" 15#include "llvm/Demangle/StringViewExtras.h" 16#include "llvm/Demangle/Utility.h" 17 18#include <algorithm> 19#include <cassert> 20#include <cstdint> 21#include <cstring> 22#include <limits> 23#include <string_view> 24 25using namespace llvm; 26 27using llvm::itanium_demangle::OutputBuffer; 28using llvm::itanium_demangle::ScopedOverride; 29using llvm::itanium_demangle::starts_with; 30 31namespace { 32 33struct Identifier { 34 std::string_view Name; 35 bool Punycode; 36 37 bool empty() const { return Name.empty(); } 38}; 39 40enum class BasicType { 41 Bool, 42 Char, 43 I8, 44 I16, 45 I32, 46 I64, 47 I128, 48 ISize, 49 U8, 50 U16, 51 U32, 52 U64, 53 U128, 54 USize, 55 F32, 56 F64, 57 Str, 58 Placeholder, 59 Unit, 60 Variadic, 61 Never, 62}; 63 64enum class IsInType { 65 No, 66 Yes, 67}; 68 69enum class LeaveGenericsOpen { 70 No, 71 Yes, 72}; 73 74class Demangler { 75 // Maximum recursion level. Used to avoid stack overflow. 76 size_t MaxRecursionLevel; 77 // Current recursion level. 78 size_t RecursionLevel; 79 size_t BoundLifetimes; 80 // Input string that is being demangled with "_R" prefix removed. 81 std::string_view Input; 82 // Position in the input string. 83 size_t Position; 84 // When true, print methods append the output to the stream. 85 // When false, the output is suppressed. 86 bool Print; 87 // True if an error occurred. 88 bool Error; 89 90public: 91 // Demangled output. 92 OutputBuffer Output; 93 94 Demangler(size_t MaxRecursionLevel = 500); 95 96 bool demangle(std::string_view MangledName); 97 98private: 99 bool demanglePath(IsInType Type, 100 LeaveGenericsOpen LeaveOpen = LeaveGenericsOpen::No); 101 void demangleImplPath(IsInType InType); 102 void demangleGenericArg(); 103 void demangleType(); 104 void demangleFnSig(); 105 void demangleDynBounds(); 106 void demangleDynTrait(); 107 void demangleOptionalBinder(); 108 void demangleConst(); 109 void demangleConstInt(); 110 void demangleConstBool(); 111 void demangleConstChar(); 112 113 template <typename Callable> void demangleBackref(Callable Demangler) { 114 uint64_t Backref = parseBase62Number(); 115 if (Error || Backref >= Position) { 116 Error = true; 117 return; 118 } 119 120 if (!Print) 121 return; 122 123 ScopedOverride<size_t> SavePosition(Position, Position); 124 Position = Backref; 125 Demangler(); 126 } 127 128 Identifier parseIdentifier(); 129 uint64_t parseOptionalBase62Number(char Tag); 130 uint64_t parseBase62Number(); 131 uint64_t parseDecimalNumber(); 132 uint64_t parseHexNumber(std::string_view &HexDigits); 133 134 void print(char C); 135 void print(std::string_view S); 136 void printDecimalNumber(uint64_t N); 137 void printBasicType(BasicType); 138 void printLifetime(uint64_t Index); 139 void printIdentifier(Identifier Ident); 140 141 char look() const; 142 char consume(); 143 bool consumeIf(char Prefix); 144 145 bool addAssign(uint64_t &A, uint64_t B); 146 bool mulAssign(uint64_t &A, uint64_t B); 147}; 148 149} // namespace 150 151char *llvm::rustDemangle(std::string_view MangledName) { 152 // Return early if mangled name doesn't look like a Rust symbol. 153 if (MangledName.empty() || !starts_with(MangledName, "_R")) 154 return nullptr; 155 156 Demangler D; 157 if (!D.demangle(MangledName)) { 158 std::free(D.Output.getBuffer()); 159 return nullptr; 160 } 161 162 D.Output += '\0'; 163 164 return D.Output.getBuffer(); 165} 166 167Demangler::Demangler(size_t MaxRecursionLevel) 168 : MaxRecursionLevel(MaxRecursionLevel) {} 169 170static inline bool isDigit(const char C) { return '0' <= C && C <= '9'; } 171 172static inline bool isHexDigit(const char C) { 173 return ('0' <= C && C <= '9') || ('a' <= C && C <= 'f'); 174} 175 176static inline bool isLower(const char C) { return 'a' <= C && C <= 'z'; } 177 178static inline bool isUpper(const char C) { return 'A' <= C && C <= 'Z'; } 179 180/// Returns true if C is a valid mangled character: <0-9a-zA-Z_>. 181static inline bool isValid(const char C) { 182 return isDigit(C) || isLower(C) || isUpper(C) || C == '_'; 183} 184 185// Demangles Rust v0 mangled symbol. Returns true when successful, and false 186// otherwise. The demangled symbol is stored in Output field. It is 187// responsibility of the caller to free the memory behind the output stream. 188// 189// <symbol-name> = "_R" <path> [<instantiating-crate>] 190bool Demangler::demangle(std::string_view Mangled) { 191 Position = 0; 192 Error = false; 193 Print = true; 194 RecursionLevel = 0; 195 BoundLifetimes = 0; 196 197 if (!starts_with(Mangled, "_R")) { 198 Error = true; 199 return false; 200 } 201 Mangled.remove_prefix(2); 202 size_t Dot = Mangled.find('.'); 203 Input = Dot == std::string_view::npos ? Mangled : Mangled.substr(0, Dot); 204 205 demanglePath(IsInType::No); 206 207 if (Position != Input.size()) { 208 ScopedOverride<bool> SavePrint(Print, false); 209 demanglePath(IsInType::No); 210 } 211 212 if (Position != Input.size()) 213 Error = true; 214 215 if (Dot != std::string_view::npos) { 216 print(" ("); 217 print(Mangled.substr(Dot)); 218 print(")"); 219 } 220 221 return !Error; 222} 223 224// Demangles a path. InType indicates whether a path is inside a type. When 225// LeaveOpen is true, a closing `>` after generic arguments is omitted from the 226// output. Return value indicates whether generics arguments have been left 227// open. 228// 229// <path> = "C" <identifier> // crate root 230// | "M" <impl-path> <type> // <T> (inherent impl) 231// | "X" <impl-path> <type> <path> // <T as Trait> (trait impl) 232// | "Y" <type> <path> // <T as Trait> (trait definition) 233// | "N" <ns> <path> <identifier> // ...::ident (nested path) 234// | "I" <path> {<generic-arg>} "E" // ...<T, U> (generic args) 235// | <backref> 236// <identifier> = [<disambiguator>] <undisambiguated-identifier> 237// <ns> = "C" // closure 238// | "S" // shim 239// | <A-Z> // other special namespaces 240// | <a-z> // internal namespaces 241bool Demangler::demanglePath(IsInType InType, LeaveGenericsOpen LeaveOpen) { 242 if (Error || RecursionLevel >= MaxRecursionLevel) { 243 Error = true; 244 return false; 245 } 246 ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1); 247 248 switch (consume()) { 249 case 'C': { 250 parseOptionalBase62Number('s'); 251 printIdentifier(parseIdentifier()); 252 break; 253 } 254 case 'M': { 255 demangleImplPath(InType); 256 print("<"); 257 demangleType(); 258 print(">"); 259 break; 260 } 261 case 'X': { 262 demangleImplPath(InType); 263 print("<"); 264 demangleType(); 265 print(" as "); 266 demanglePath(IsInType::Yes); 267 print(">"); 268 break; 269 } 270 case 'Y': { 271 print("<"); 272 demangleType(); 273 print(" as "); 274 demanglePath(IsInType::Yes); 275 print(">"); 276 break; 277 } 278 case 'N': { 279 char NS = consume(); 280 if (!isLower(NS) && !isUpper(NS)) { 281 Error = true; 282 break; 283 } 284 demanglePath(InType); 285 286 uint64_t Disambiguator = parseOptionalBase62Number('s'); 287 Identifier Ident = parseIdentifier(); 288 289 if (isUpper(NS)) { 290 // Special namespaces 291 print("::{"); 292 if (NS == 'C') 293 print("closure"); 294 else if (NS == 'S') 295 print("shim"); 296 else 297 print(NS); 298 if (!Ident.empty()) { 299 print(":"); 300 printIdentifier(Ident); 301 } 302 print('#'); 303 printDecimalNumber(Disambiguator); 304 print('}'); 305 } else { 306 // Implementation internal namespaces. 307 if (!Ident.empty()) { 308 print("::"); 309 printIdentifier(Ident); 310 } 311 } 312 break; 313 } 314 case 'I': { 315 demanglePath(InType); 316 // Omit "::" when in a type, where it is optional. 317 if (InType == IsInType::No) 318 print("::"); 319 print("<"); 320 for (size_t I = 0; !Error && !consumeIf('E'); ++I) { 321 if (I > 0) 322 print(", "); 323 demangleGenericArg(); 324 } 325 if (LeaveOpen == LeaveGenericsOpen::Yes) 326 return true; 327 else 328 print(">"); 329 break; 330 } 331 case 'B': { 332 bool IsOpen = false; 333 demangleBackref([&] { IsOpen = demanglePath(InType, LeaveOpen); }); 334 return IsOpen; 335 } 336 default: 337 Error = true; 338 break; 339 } 340 341 return false; 342} 343 344// <impl-path> = [<disambiguator>] <path> 345// <disambiguator> = "s" <base-62-number> 346void Demangler::demangleImplPath(IsInType InType) { 347 ScopedOverride<bool> SavePrint(Print, false); 348 parseOptionalBase62Number('s'); 349 demanglePath(InType); 350} 351 352// <generic-arg> = <lifetime> 353// | <type> 354// | "K" <const> 355// <lifetime> = "L" <base-62-number> 356void Demangler::demangleGenericArg() { 357 if (consumeIf('L')) 358 printLifetime(parseBase62Number()); 359 else if (consumeIf('K')) 360 demangleConst(); 361 else 362 demangleType(); 363} 364 365// <basic-type> = "a" // i8 366// | "b" // bool 367// | "c" // char 368// | "d" // f64 369// | "e" // str 370// | "f" // f32 371// | "h" // u8 372// | "i" // isize 373// | "j" // usize 374// | "l" // i32 375// | "m" // u32 376// | "n" // i128 377// | "o" // u128 378// | "s" // i16 379// | "t" // u16 380// | "u" // () 381// | "v" // ... 382// | "x" // i64 383// | "y" // u64 384// | "z" // ! 385// | "p" // placeholder (e.g. for generic params), shown as _ 386static bool parseBasicType(char C, BasicType &Type) { 387 switch (C) { 388 case 'a': 389 Type = BasicType::I8; 390 return true; 391 case 'b': 392 Type = BasicType::Bool; 393 return true; 394 case 'c': 395 Type = BasicType::Char; 396 return true; 397 case 'd': 398 Type = BasicType::F64; 399 return true; 400 case 'e': 401 Type = BasicType::Str; 402 return true; 403 case 'f': 404 Type = BasicType::F32; 405 return true; 406 case 'h': 407 Type = BasicType::U8; 408 return true; 409 case 'i': 410 Type = BasicType::ISize; 411 return true; 412 case 'j': 413 Type = BasicType::USize; 414 return true; 415 case 'l': 416 Type = BasicType::I32; 417 return true; 418 case 'm': 419 Type = BasicType::U32; 420 return true; 421 case 'n': 422 Type = BasicType::I128; 423 return true; 424 case 'o': 425 Type = BasicType::U128; 426 return true; 427 case 'p': 428 Type = BasicType::Placeholder; 429 return true; 430 case 's': 431 Type = BasicType::I16; 432 return true; 433 case 't': 434 Type = BasicType::U16; 435 return true; 436 case 'u': 437 Type = BasicType::Unit; 438 return true; 439 case 'v': 440 Type = BasicType::Variadic; 441 return true; 442 case 'x': 443 Type = BasicType::I64; 444 return true; 445 case 'y': 446 Type = BasicType::U64; 447 return true; 448 case 'z': 449 Type = BasicType::Never; 450 return true; 451 default: 452 return false; 453 } 454} 455 456void Demangler::printBasicType(BasicType Type) { 457 switch (Type) { 458 case BasicType::Bool: 459 print("bool"); 460 break; 461 case BasicType::Char: 462 print("char"); 463 break; 464 case BasicType::I8: 465 print("i8"); 466 break; 467 case BasicType::I16: 468 print("i16"); 469 break; 470 case BasicType::I32: 471 print("i32"); 472 break; 473 case BasicType::I64: 474 print("i64"); 475 break; 476 case BasicType::I128: 477 print("i128"); 478 break; 479 case BasicType::ISize: 480 print("isize"); 481 break; 482 case BasicType::U8: 483 print("u8"); 484 break; 485 case BasicType::U16: 486 print("u16"); 487 break; 488 case BasicType::U32: 489 print("u32"); 490 break; 491 case BasicType::U64: 492 print("u64"); 493 break; 494 case BasicType::U128: 495 print("u128"); 496 break; 497 case BasicType::USize: 498 print("usize"); 499 break; 500 case BasicType::F32: 501 print("f32"); 502 break; 503 case BasicType::F64: 504 print("f64"); 505 break; 506 case BasicType::Str: 507 print("str"); 508 break; 509 case BasicType::Placeholder: 510 print("_"); 511 break; 512 case BasicType::Unit: 513 print("()"); 514 break; 515 case BasicType::Variadic: 516 print("..."); 517 break; 518 case BasicType::Never: 519 print("!"); 520 break; 521 } 522} 523 524// <type> = | <basic-type> 525// | <path> // named type 526// | "A" <type> <const> // [T; N] 527// | "S" <type> // [T] 528// | "T" {<type>} "E" // (T1, T2, T3, ...) 529// | "R" [<lifetime>] <type> // &T 530// | "Q" [<lifetime>] <type> // &mut T 531// | "P" <type> // *const T 532// | "O" <type> // *mut T 533// | "F" <fn-sig> // fn(...) -> ... 534// | "D" <dyn-bounds> <lifetime> // dyn Trait<Assoc = X> + Send + 'a 535// | <backref> // backref 536void Demangler::demangleType() { 537 if (Error || RecursionLevel >= MaxRecursionLevel) { 538 Error = true; 539 return; 540 } 541 ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1); 542 543 size_t Start = Position; 544 char C = consume(); 545 BasicType Type; 546 if (parseBasicType(C, Type)) 547 return printBasicType(Type); 548 549 switch (C) { 550 case 'A': 551 print("["); 552 demangleType(); 553 print("; "); 554 demangleConst(); 555 print("]"); 556 break; 557 case 'S': 558 print("["); 559 demangleType(); 560 print("]"); 561 break; 562 case 'T': { 563 print("("); 564 size_t I = 0; 565 for (; !Error && !consumeIf('E'); ++I) { 566 if (I > 0) 567 print(", "); 568 demangleType(); 569 } 570 if (I == 1) 571 print(","); 572 print(")"); 573 break; 574 } 575 case 'R': 576 case 'Q': 577 print('&'); 578 if (consumeIf('L')) { 579 if (auto Lifetime = parseBase62Number()) { 580 printLifetime(Lifetime); 581 print(' '); 582 } 583 } 584 if (C == 'Q') 585 print("mut "); 586 demangleType(); 587 break; 588 case 'P': 589 print("*const "); 590 demangleType(); 591 break; 592 case 'O': 593 print("*mut "); 594 demangleType(); 595 break; 596 case 'F': 597 demangleFnSig(); 598 break; 599 case 'D': 600 demangleDynBounds(); 601 if (consumeIf('L')) { 602 if (auto Lifetime = parseBase62Number()) { 603 print(" + "); 604 printLifetime(Lifetime); 605 } 606 } else { 607 Error = true; 608 } 609 break; 610 case 'B': 611 demangleBackref([&] { demangleType(); }); 612 break; 613 default: 614 Position = Start; 615 demanglePath(IsInType::Yes); 616 break; 617 } 618} 619 620// <fn-sig> := [<binder>] ["U"] ["K" <abi>] {<type>} "E" <type> 621// <abi> = "C" 622// | <undisambiguated-identifier> 623void Demangler::demangleFnSig() { 624 ScopedOverride<size_t> SaveBoundLifetimes(BoundLifetimes, BoundLifetimes); 625 demangleOptionalBinder(); 626 627 if (consumeIf('U')) 628 print("unsafe "); 629 630 if (consumeIf('K')) { 631 print("extern \""); 632 if (consumeIf('C')) { 633 print("C"); 634 } else { 635 Identifier Ident = parseIdentifier(); 636 if (Ident.Punycode) 637 Error = true; 638 for (char C : Ident.Name) { 639 // When mangling ABI string, the "-" is replaced with "_". 640 if (C == '_') 641 C = '-'; 642 print(C); 643 } 644 } 645 print("\" "); 646 } 647 648 print("fn("); 649 for (size_t I = 0; !Error && !consumeIf('E'); ++I) { 650 if (I > 0) 651 print(", "); 652 demangleType(); 653 } 654 print(")"); 655 656 if (consumeIf('u')) { 657 // Skip the unit type from the output. 658 } else { 659 print(" -> "); 660 demangleType(); 661 } 662} 663 664// <dyn-bounds> = [<binder>] {<dyn-trait>} "E" 665void Demangler::demangleDynBounds() { 666 ScopedOverride<size_t> SaveBoundLifetimes(BoundLifetimes, BoundLifetimes); 667 print("dyn "); 668 demangleOptionalBinder(); 669 for (size_t I = 0; !Error && !consumeIf('E'); ++I) { 670 if (I > 0) 671 print(" + "); 672 demangleDynTrait(); 673 } 674} 675 676// <dyn-trait> = <path> {<dyn-trait-assoc-binding>} 677// <dyn-trait-assoc-binding> = "p" <undisambiguated-identifier> <type> 678void Demangler::demangleDynTrait() { 679 bool IsOpen = demanglePath(IsInType::Yes, LeaveGenericsOpen::Yes); 680 while (!Error && consumeIf('p')) { 681 if (!IsOpen) { 682 IsOpen = true; 683 print('<'); 684 } else { 685 print(", "); 686 } 687 print(parseIdentifier().Name); 688 print(" = "); 689 demangleType(); 690 } 691 if (IsOpen) 692 print(">"); 693} 694 695// Demangles optional binder and updates the number of bound lifetimes. 696// 697// <binder> = "G" <base-62-number> 698void Demangler::demangleOptionalBinder() { 699 uint64_t Binder = parseOptionalBase62Number('G'); 700 if (Error || Binder == 0) 701 return; 702 703 // In valid inputs each bound lifetime is referenced later. Referencing a 704 // lifetime requires at least one byte of input. Reject inputs that are too 705 // short to reference all bound lifetimes. Otherwise demangling of invalid 706 // binders could generate excessive amounts of output. 707 if (Binder >= Input.size() - BoundLifetimes) { 708 Error = true; 709 return; 710 } 711 712 print("for<"); 713 for (size_t I = 0; I != Binder; ++I) { 714 BoundLifetimes += 1; 715 if (I > 0) 716 print(", "); 717 printLifetime(1); 718 } 719 print("> "); 720} 721 722// <const> = <basic-type> <const-data> 723// | "p" // placeholder 724// | <backref> 725void Demangler::demangleConst() { 726 if (Error || RecursionLevel >= MaxRecursionLevel) { 727 Error = true; 728 return; 729 } 730 ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1); 731 732 char C = consume(); 733 BasicType Type; 734 if (parseBasicType(C, Type)) { 735 switch (Type) { 736 case BasicType::I8: 737 case BasicType::I16: 738 case BasicType::I32: 739 case BasicType::I64: 740 case BasicType::I128: 741 case BasicType::ISize: 742 case BasicType::U8: 743 case BasicType::U16: 744 case BasicType::U32: 745 case BasicType::U64: 746 case BasicType::U128: 747 case BasicType::USize: 748 demangleConstInt(); 749 break; 750 case BasicType::Bool: 751 demangleConstBool(); 752 break; 753 case BasicType::Char: 754 demangleConstChar(); 755 break; 756 case BasicType::Placeholder: 757 print('_'); 758 break; 759 default: 760 Error = true; 761 break; 762 } 763 } else if (C == 'B') { 764 demangleBackref([&] { demangleConst(); }); 765 } else { 766 Error = true; 767 } 768} 769 770// <const-data> = ["n"] <hex-number> 771void Demangler::demangleConstInt() { 772 if (consumeIf('n')) 773 print('-'); 774 775 std::string_view HexDigits; 776 uint64_t Value = parseHexNumber(HexDigits); 777 if (HexDigits.size() <= 16) { 778 printDecimalNumber(Value); 779 } else { 780 print("0x"); 781 print(HexDigits); 782 } 783} 784 785// <const-data> = "0_" // false 786// | "1_" // true 787void Demangler::demangleConstBool() { 788 std::string_view HexDigits; 789 parseHexNumber(HexDigits); 790 if (HexDigits == "0") 791 print("false"); 792 else if (HexDigits == "1") 793 print("true"); 794 else 795 Error = true; 796} 797 798/// Returns true if CodePoint represents a printable ASCII character. 799static bool isAsciiPrintable(uint64_t CodePoint) { 800 return 0x20 <= CodePoint && CodePoint <= 0x7e; 801} 802 803// <const-data> = <hex-number> 804void Demangler::demangleConstChar() { 805 std::string_view HexDigits; 806 uint64_t CodePoint = parseHexNumber(HexDigits); 807 if (Error || HexDigits.size() > 6) { 808 Error = true; 809 return; 810 } 811 812 print("'"); 813 switch (CodePoint) { 814 case '\t': 815 print(R"(\t)"); 816 break; 817 case '\r': 818 print(R"(\r)"); 819 break; 820 case '\n': 821 print(R"(\n)"); 822 break; 823 case '\\': 824 print(R"(\\)"); 825 break; 826 case '"': 827 print(R"(")"); 828 break; 829 case '\'': 830 print(R"(\')"); 831 break; 832 default: 833 if (isAsciiPrintable(CodePoint)) { 834 char C = CodePoint; 835 print(C); 836 } else { 837 print(R"(\u{)"); 838 print(HexDigits); 839 print('}'); 840 } 841 break; 842 } 843 print('\''); 844} 845 846// <undisambiguated-identifier> = ["u"] <decimal-number> ["_"] <bytes> 847Identifier Demangler::parseIdentifier() { 848 bool Punycode = consumeIf('u'); 849 uint64_t Bytes = parseDecimalNumber(); 850 851 // Underscore resolves the ambiguity when identifier starts with a decimal 852 // digit or another underscore. 853 consumeIf('_'); 854 855 if (Error || Bytes > Input.size() - Position) { 856 Error = true; 857 return {}; 858 } 859 std::string_view S = Input.substr(Position, Bytes); 860 Position += Bytes; 861 862 if (!std::all_of(S.begin(), S.end(), isValid)) { 863 Error = true; 864 return {}; 865 } 866 867 return {S, Punycode}; 868} 869 870// Parses optional base 62 number. The presence of a number is determined using 871// Tag. Returns 0 when tag is absent and parsed value + 1 otherwise 872// 873// This function is indended for parsing disambiguators and binders which when 874// not present have their value interpreted as 0, and otherwise as decoded 875// value + 1. For example for binders, value for "G_" is 1, for "G0_" value is 876// 2. When "G" is absent value is 0. 877uint64_t Demangler::parseOptionalBase62Number(char Tag) { 878 if (!consumeIf(Tag)) 879 return 0; 880 881 uint64_t N = parseBase62Number(); 882 if (Error || !addAssign(N, 1)) 883 return 0; 884 885 return N; 886} 887 888// Parses base 62 number with <0-9a-zA-Z> as digits. Number is terminated by 889// "_". All values are offset by 1, so that "_" encodes 0, "0_" encodes 1, 890// "1_" encodes 2, etc. 891// 892// <base-62-number> = {<0-9a-zA-Z>} "_" 893uint64_t Demangler::parseBase62Number() { 894 if (consumeIf('_')) 895 return 0; 896 897 uint64_t Value = 0; 898 899 while (true) { 900 uint64_t Digit; 901 char C = consume(); 902 903 if (C == '_') { 904 break; 905 } else if (isDigit(C)) { 906 Digit = C - '0'; 907 } else if (isLower(C)) { 908 Digit = 10 + (C - 'a'); 909 } else if (isUpper(C)) { 910 Digit = 10 + 26 + (C - 'A'); 911 } else { 912 Error = true; 913 return 0; 914 } 915 916 if (!mulAssign(Value, 62)) 917 return 0; 918 919 if (!addAssign(Value, Digit)) 920 return 0; 921 } 922 923 if (!addAssign(Value, 1)) 924 return 0; 925 926 return Value; 927} 928 929// Parses a decimal number that had been encoded without any leading zeros. 930// 931// <decimal-number> = "0" 932// | <1-9> {<0-9>} 933uint64_t Demangler::parseDecimalNumber() { 934 char C = look(); 935 if (!isDigit(C)) { 936 Error = true; 937 return 0; 938 } 939 940 if (C == '0') { 941 consume(); 942 return 0; 943 } 944 945 uint64_t Value = 0; 946 947 while (isDigit(look())) { 948 if (!mulAssign(Value, 10)) { 949 Error = true; 950 return 0; 951 } 952 953 uint64_t D = consume() - '0'; 954 if (!addAssign(Value, D)) 955 return 0; 956 } 957 958 return Value; 959} 960 961// Parses a hexadecimal number with <0-9a-f> as a digits. Returns the parsed 962// value and stores hex digits in HexDigits. The return value is unspecified if 963// HexDigits.size() > 16. 964// 965// <hex-number> = "0_" 966// | <1-9a-f> {<0-9a-f>} "_" 967uint64_t Demangler::parseHexNumber(std::string_view &HexDigits) { 968 size_t Start = Position; 969 uint64_t Value = 0; 970 971 if (!isHexDigit(look())) 972 Error = true; 973 974 if (consumeIf('0')) { 975 if (!consumeIf('_')) 976 Error = true; 977 } else { 978 while (!Error && !consumeIf('_')) { 979 char C = consume(); 980 Value *= 16; 981 if (isDigit(C)) 982 Value += C - '0'; 983 else if ('a' <= C && C <= 'f') 984 Value += 10 + (C - 'a'); 985 else 986 Error = true; 987 } 988 } 989 990 if (Error) { 991 HexDigits = std::string_view(); 992 return 0; 993 } 994 995 size_t End = Position - 1; 996 assert(Start < End); 997 HexDigits = Input.substr(Start, End - Start); 998 return Value; 999} 1000 1001void Demangler::print(char C) { 1002 if (Error || !Print) 1003 return; 1004 1005 Output += C; 1006} 1007 1008void Demangler::print(std::string_view S) { 1009 if (Error || !Print) 1010 return; 1011 1012 Output += S; 1013} 1014 1015void Demangler::printDecimalNumber(uint64_t N) { 1016 if (Error || !Print) 1017 return; 1018 1019 Output << N; 1020} 1021 1022// Prints a lifetime. An index 0 always represents an erased lifetime. Indices 1023// starting from 1, are De Bruijn indices, referring to higher-ranked lifetimes 1024// bound by one of the enclosing binders. 1025void Demangler::printLifetime(uint64_t Index) { 1026 if (Index == 0) { 1027 print("'_"); 1028 return; 1029 } 1030 1031 if (Index - 1 >= BoundLifetimes) { 1032 Error = true; 1033 return; 1034 } 1035 1036 uint64_t Depth = BoundLifetimes - Index; 1037 print('\''); 1038 if (Depth < 26) { 1039 char C = 'a' + Depth; 1040 print(C); 1041 } else { 1042 print('z'); 1043 printDecimalNumber(Depth - 26 + 1); 1044 } 1045} 1046 1047static inline bool decodePunycodeDigit(char C, size_t &Value) { 1048 if (isLower(C)) { 1049 Value = C - 'a'; 1050 return true; 1051 } 1052 1053 if (isDigit(C)) { 1054 Value = 26 + (C - '0'); 1055 return true; 1056 } 1057 1058 return false; 1059} 1060 1061static void removeNullBytes(OutputBuffer &Output, size_t StartIdx) { 1062 char *Buffer = Output.getBuffer(); 1063 char *Start = Buffer + StartIdx; 1064 char *End = Buffer + Output.getCurrentPosition(); 1065 Output.setCurrentPosition(std::remove(Start, End, '\0') - Buffer); 1066} 1067 1068// Encodes code point as UTF-8 and stores results in Output. Returns false if 1069// CodePoint is not a valid unicode scalar value. 1070static inline bool encodeUTF8(size_t CodePoint, char *Output) { 1071 if (0xD800 <= CodePoint && CodePoint <= 0xDFFF) 1072 return false; 1073 1074 if (CodePoint <= 0x7F) { 1075 Output[0] = CodePoint; 1076 return true; 1077 } 1078 1079 if (CodePoint <= 0x7FF) { 1080 Output[0] = 0xC0 | ((CodePoint >> 6) & 0x3F); 1081 Output[1] = 0x80 | (CodePoint & 0x3F); 1082 return true; 1083 } 1084 1085 if (CodePoint <= 0xFFFF) { 1086 Output[0] = 0xE0 | (CodePoint >> 12); 1087 Output[1] = 0x80 | ((CodePoint >> 6) & 0x3F); 1088 Output[2] = 0x80 | (CodePoint & 0x3F); 1089 return true; 1090 } 1091 1092 if (CodePoint <= 0x10FFFF) { 1093 Output[0] = 0xF0 | (CodePoint >> 18); 1094 Output[1] = 0x80 | ((CodePoint >> 12) & 0x3F); 1095 Output[2] = 0x80 | ((CodePoint >> 6) & 0x3F); 1096 Output[3] = 0x80 | (CodePoint & 0x3F); 1097 return true; 1098 } 1099 1100 return false; 1101} 1102 1103// Decodes string encoded using punycode and appends results to Output. 1104// Returns true if decoding was successful. 1105static bool decodePunycode(std::string_view Input, OutputBuffer &Output) { 1106 size_t OutputSize = Output.getCurrentPosition(); 1107 size_t InputIdx = 0; 1108 1109 // Rust uses an underscore as a delimiter. 1110 size_t DelimiterPos = std::string_view::npos; 1111 for (size_t I = 0; I != Input.size(); ++I) 1112 if (Input[I] == '_') 1113 DelimiterPos = I; 1114 1115 if (DelimiterPos != std::string_view::npos) { 1116 // Copy basic code points before the last delimiter to the output. 1117 for (; InputIdx != DelimiterPos; ++InputIdx) { 1118 char C = Input[InputIdx]; 1119 if (!isValid(C)) 1120 return false; 1121 // Code points are padded with zeros while decoding is in progress. 1122 char UTF8[4] = {C}; 1123 Output += std::string_view(UTF8, 4); 1124 } 1125 // Skip over the delimiter. 1126 ++InputIdx; 1127 } 1128 1129 size_t Base = 36; 1130 size_t Skew = 38; 1131 size_t Bias = 72; 1132 size_t N = 0x80; 1133 size_t TMin = 1; 1134 size_t TMax = 26; 1135 size_t Damp = 700; 1136 1137 auto Adapt = [&](size_t Delta, size_t NumPoints) { 1138 Delta /= Damp; 1139 Delta += Delta / NumPoints; 1140 Damp = 2; 1141 1142 size_t K = 0; 1143 while (Delta > (Base - TMin) * TMax / 2) { 1144 Delta /= Base - TMin; 1145 K += Base; 1146 } 1147 return K + (((Base - TMin + 1) * Delta) / (Delta + Skew)); 1148 }; 1149 1150 // Main decoding loop. 1151 for (size_t I = 0; InputIdx != Input.size(); I += 1) { 1152 size_t OldI = I; 1153 size_t W = 1; 1154 size_t Max = std::numeric_limits<size_t>::max(); 1155 for (size_t K = Base; true; K += Base) { 1156 if (InputIdx == Input.size()) 1157 return false; 1158 char C = Input[InputIdx++]; 1159 size_t Digit = 0; 1160 if (!decodePunycodeDigit(C, Digit)) 1161 return false; 1162 1163 if (Digit > (Max - I) / W) 1164 return false; 1165 I += Digit * W; 1166 1167 size_t T; 1168 if (K <= Bias) 1169 T = TMin; 1170 else if (K >= Bias + TMax) 1171 T = TMax; 1172 else 1173 T = K - Bias; 1174 1175 if (Digit < T) 1176 break; 1177 1178 if (W > Max / (Base - T)) 1179 return false; 1180 W *= (Base - T); 1181 } 1182 size_t NumPoints = (Output.getCurrentPosition() - OutputSize) / 4 + 1; 1183 Bias = Adapt(I - OldI, NumPoints); 1184 1185 if (I / NumPoints > Max - N) 1186 return false; 1187 N += I / NumPoints; 1188 I = I % NumPoints; 1189 1190 // Insert N at position I in the output. 1191 char UTF8[4] = {}; 1192 if (!encodeUTF8(N, UTF8)) 1193 return false; 1194 Output.insert(OutputSize + I * 4, UTF8, 4); 1195 } 1196 1197 removeNullBytes(Output, OutputSize); 1198 return true; 1199} 1200 1201void Demangler::printIdentifier(Identifier Ident) { 1202 if (Error || !Print) 1203 return; 1204 1205 if (Ident.Punycode) { 1206 if (!decodePunycode(Ident.Name, Output)) 1207 Error = true; 1208 } else { 1209 print(Ident.Name); 1210 } 1211} 1212 1213char Demangler::look() const { 1214 if (Error || Position >= Input.size()) 1215 return 0; 1216 1217 return Input[Position]; 1218} 1219 1220char Demangler::consume() { 1221 if (Error || Position >= Input.size()) { 1222 Error = true; 1223 return 0; 1224 } 1225 1226 return Input[Position++]; 1227} 1228 1229bool Demangler::consumeIf(char Prefix) { 1230 if (Error || Position >= Input.size() || Input[Position] != Prefix) 1231 return false; 1232 1233 Position += 1; 1234 return true; 1235} 1236 1237/// Computes A + B. When computation wraps around sets the error and returns 1238/// false. Otherwise assigns the result to A and returns true. 1239bool Demangler::addAssign(uint64_t &A, uint64_t B) { 1240 if (A > std::numeric_limits<uint64_t>::max() - B) { 1241 Error = true; 1242 return false; 1243 } 1244 1245 A += B; 1246 return true; 1247} 1248 1249/// Computes A * B. When computation wraps around sets the error and returns 1250/// false. Otherwise assigns the result to A and returns true. 1251bool Demangler::mulAssign(uint64_t &A, uint64_t B) { 1252 if (B != 0 && A > std::numeric_limits<uint64_t>::max() / B) { 1253 Error = true; 1254 return false; 1255 } 1256 1257 A *= B; 1258 return true; 1259} 1260