1254562Scy/* Demangler for the Rust programming language 2254562Scy Copyright (C) 2016-2022 Free Software Foundation, Inc. 3254562Scy Written by David Tolnay (dtolnay@gmail.com). 4254562Scy Rewritten by Eduard-Mihai Burtescu (eddyb@lyken.rs) for v0 support. 5254562Scy 6254562ScyThis file is part of the libiberty library. 7254562ScyLibiberty is free software; you can redistribute it and/or 8254562Scymodify it under the terms of the GNU Library General Public 9254562ScyLicense as published by the Free Software Foundation; either 10254562Scyversion 2 of the License, or (at your option) any later version. 11254562Scy 12254562ScyIn addition to the permissions in the GNU Library General Public 13254562ScyLicense, the Free Software Foundation gives you unlimited permission 14254562Scyto link the compiled version of this file into combinations with other 15254562Scyprograms, and to distribute those combinations without any restriction 16254562Scycoming from the use of this file. (The Library Public License 17254562Scyrestrictions do apply in other respects; for example, they cover 18254562Scymodification of the file, and distribution when not linked into a 19254562Scycombined executable.) 20254562Scy 21254562ScyLibiberty is distributed in the hope that it will be useful, 22254562Scybut WITHOUT ANY WARRANTY; without even the implied warranty of 23254562ScyMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 24254562ScyLibrary General Public License for more details. 25254562Scy 26254562ScyYou should have received a copy of the GNU Library General Public 27254562ScyLicense along with libiberty; see the file COPYING.LIB. 28254562ScyIf not, see <http://www.gnu.org/licenses/>. */ 29254562Scy 30254562Scy 31254562Scy#ifdef HAVE_CONFIG_H 32254562Scy#include "config.h" 33254562Scy#endif 34254562Scy 35254562Scy#include "safe-ctype.h" 36254562Scy 37254562Scy#include <inttypes.h> 38254562Scy#include <sys/types.h> 39254562Scy#include <string.h> 40254562Scy#include <stdio.h> 41254562Scy#include <stdlib.h> 42254562Scy 43254562Scy#ifdef HAVE_STRING_H 44254562Scy#include <string.h> 45254562Scy#else 46254562Scyextern size_t strlen(const char *s); 47254562Scyextern int strncmp(const char *s1, const char *s2, size_t n); 48254562Scyextern void *memset(void *s, int c, size_t n); 49254562Scy#endif 50254562Scy 51254562Scy#include <demangle.h> 52254562Scy#include "libiberty.h" 53254562Scy 54254562Scystruct rust_demangler 55254562Scy{ 56254562Scy const char *sym; 57254562Scy size_t sym_len; 58254562Scy 59254562Scy void *callback_opaque; 60254562Scy demangle_callbackref callback; 61254562Scy 62254562Scy /* Position of the next character to read from the symbol. */ 63254562Scy size_t next; 64254562Scy 65254562Scy /* Non-zero if any error occurred. */ 66254562Scy int errored; 67254562Scy 68254562Scy /* Non-zero if nothing should be printed. */ 69254562Scy int skipping_printing; 70254562Scy 71254562Scy /* Non-zero if printing should be verbose (e.g. include hashes). */ 72254562Scy int verbose; 73254562Scy 74254562Scy /* Rust mangling version, with legacy mangling being -1. */ 75254562Scy int version; 76254562Scy 77254562Scy /* Recursion depth. */ 78254562Scy unsigned int recursion; 79254562Scy /* Maximum number of times demangle_path may be called recursively. */ 80254562Scy#define RUST_MAX_RECURSION_COUNT 1024 81254562Scy#define RUST_NO_RECURSION_LIMIT ((unsigned int) -1) 82254562Scy 83254562Scy uint64_t bound_lifetime_depth; 84254562Scy}; 85254562Scy 86254562Scy/* Parsing functions. */ 87254562Scy 88254562Scystatic char 89254562Scypeek (const struct rust_demangler *rdm) 90254562Scy{ 91254562Scy if (rdm->next < rdm->sym_len) 92254562Scy return rdm->sym[rdm->next]; 93254562Scy return 0; 94254562Scy} 95254562Scy 96254562Scystatic int 97254562Scyeat (struct rust_demangler *rdm, char c) 98254562Scy{ 99254562Scy if (peek (rdm) == c) 100254562Scy { 101254562Scy rdm->next++; 102254562Scy return 1; 103254562Scy } 104254562Scy else 105254562Scy return 0; 106254562Scy} 107254562Scy 108254562Scystatic char 109254562Scynext (struct rust_demangler *rdm) 110254562Scy{ 111254562Scy char c = peek (rdm); 112254562Scy if (!c) 113254562Scy rdm->errored = 1; 114254562Scy else 115254562Scy rdm->next++; 116254562Scy return c; 117254562Scy} 118254562Scy 119254562Scystatic uint64_t 120254562Scyparse_integer_62 (struct rust_demangler *rdm) 121254562Scy{ 122254562Scy char c; 123254562Scy uint64_t x; 124254562Scy 125254562Scy if (eat (rdm, '_')) 126254562Scy return 0; 127254562Scy 128254562Scy x = 0; 129254562Scy while (!eat (rdm, '_') && !rdm->errored) 130254562Scy { 131254562Scy c = next (rdm); 132254562Scy x *= 62; 133254562Scy if (ISDIGIT (c)) 134254562Scy x += c - '0'; 135254562Scy else if (ISLOWER (c)) 136254562Scy x += 10 + (c - 'a'); 137254562Scy else if (ISUPPER (c)) 138254562Scy x += 10 + 26 + (c - 'A'); 139254562Scy else 140254562Scy { 141254562Scy rdm->errored = 1; 142254562Scy return 0; 143254562Scy } 144254562Scy } 145254562Scy return x + 1; 146254562Scy} 147254562Scy 148254562Scystatic uint64_t 149254562Scyparse_opt_integer_62 (struct rust_demangler *rdm, char tag) 150254562Scy{ 151254562Scy if (!eat (rdm, tag)) 152254562Scy return 0; 153254562Scy return 1 + parse_integer_62 (rdm); 154254562Scy} 155254562Scy 156254562Scystatic uint64_t 157254562Scyparse_disambiguator (struct rust_demangler *rdm) 158254562Scy{ 159254562Scy return parse_opt_integer_62 (rdm, 's'); 160254562Scy} 161254562Scy 162254562Scystatic size_t 163254562Scyparse_hex_nibbles (struct rust_demangler *rdm, uint64_t *value) 164254562Scy{ 165254562Scy char c; 166254562Scy size_t hex_len; 167254562Scy 168254562Scy hex_len = 0; 169254562Scy *value = 0; 170254562Scy 171254562Scy while (!eat (rdm, '_')) 172254562Scy { 173254562Scy *value <<= 4; 174254562Scy 175254562Scy c = next (rdm); 176254562Scy if (ISDIGIT (c)) 177254562Scy *value |= c - '0'; 178254562Scy else if (c >= 'a' && c <= 'f') 179254562Scy *value |= 10 + (c - 'a'); 180254562Scy else 181254562Scy { 182254562Scy rdm->errored = 1; 183254562Scy return 0; 184254562Scy } 185254562Scy hex_len++; 186254562Scy } 187254562Scy 188254562Scy return hex_len; 189254562Scy} 190254562Scy 191254562Scystruct rust_mangled_ident 192254562Scy{ 193254562Scy /* ASCII part of the identifier. */ 194254562Scy const char *ascii; 195254562Scy size_t ascii_len; 196254562Scy 197254562Scy /* Punycode insertion codes for Unicode codepoints, if any. */ 198254562Scy const char *punycode; 199254562Scy size_t punycode_len; 200254562Scy}; 201254562Scy 202254562Scystatic struct rust_mangled_ident 203254562Scyparse_ident (struct rust_demangler *rdm) 204254562Scy{ 205254562Scy char c; 206254562Scy size_t start, len; 207254562Scy int is_punycode = 0; 208254562Scy struct rust_mangled_ident ident; 209254562Scy 210254562Scy ident.ascii = NULL; 211254562Scy ident.ascii_len = 0; 212254562Scy ident.punycode = NULL; 213254562Scy ident.punycode_len = 0; 214254562Scy 215254562Scy if (rdm->version != -1) 216254562Scy is_punycode = eat (rdm, 'u'); 217254562Scy 218254562Scy c = next (rdm); 219254562Scy if (!ISDIGIT (c)) 220254562Scy { 221254562Scy rdm->errored = 1; 222254562Scy return ident; 223254562Scy } 224254562Scy len = c - '0'; 225254562Scy 226254562Scy if (c != '0') 227254562Scy while (ISDIGIT (peek (rdm))) 228254562Scy len = len * 10 + (next (rdm) - '0'); 229254562Scy 230254562Scy /* Skip past the optional `_` separator (v0). */ 231254562Scy if (rdm->version != -1) 232254562Scy eat (rdm, '_'); 233254562Scy 234254562Scy start = rdm->next; 235254562Scy rdm->next += len; 236254562Scy /* Check for overflows. */ 237254562Scy if ((start > rdm->next) || (rdm->next > rdm->sym_len)) 238254562Scy { 239254562Scy rdm->errored = 1; 240254562Scy return ident; 241254562Scy } 242254562Scy 243254562Scy ident.ascii = rdm->sym + start; 244254562Scy ident.ascii_len = len; 245254562Scy 246254562Scy if (is_punycode) 247254562Scy { 248254562Scy ident.punycode_len = 0; 249254562Scy while (ident.ascii_len > 0) 250254562Scy { 251254562Scy ident.ascii_len--; 252254562Scy 253254562Scy /* The last '_' is a separator between ascii & punycode. */ 254254562Scy if (ident.ascii[ident.ascii_len] == '_') 255254562Scy break; 256254562Scy 257254562Scy ident.punycode_len++; 258254562Scy } 259254562Scy if (!ident.punycode_len) 260254562Scy { 261254562Scy rdm->errored = 1; 262254562Scy return ident; 263254562Scy } 264254562Scy ident.punycode = ident.ascii + (len - ident.punycode_len); 265254562Scy } 266254562Scy 267254562Scy if (ident.ascii_len == 0) 268254562Scy ident.ascii = NULL; 269254562Scy 270254562Scy return ident; 271254562Scy} 272254562Scy 273254562Scy/* Printing functions. */ 274254562Scy 275254562Scystatic void 276254562Scyprint_str (struct rust_demangler *rdm, const char *data, size_t len) 277254562Scy{ 278254562Scy if (!rdm->errored && !rdm->skipping_printing) 279254562Scy rdm->callback (data, len, rdm->callback_opaque); 280254562Scy} 281254562Scy 282254562Scy#define PRINT(s) print_str (rdm, s, strlen (s)) 283254562Scy 284254562Scystatic void 285254562Scyprint_uint64 (struct rust_demangler *rdm, uint64_t x) 286254562Scy{ 287254562Scy char s[21]; 288254562Scy snprintf (s, 21, "%" PRIu64, x); 289254562Scy PRINT (s); 290254562Scy} 291254562Scy 292254562Scystatic void 293254562Scyprint_uint64_hex (struct rust_demangler *rdm, uint64_t x) 294254562Scy{ 295254562Scy char s[17]; 296254562Scy snprintf (s, 17, "%" PRIx64, x); 297254562Scy PRINT (s); 298254562Scy} 299254562Scy 300254562Scy/* Return a 0x0-0xf value if the char is 0-9a-f, and -1 otherwise. */ 301254562Scystatic int 302254562Scydecode_lower_hex_nibble (char nibble) 303254562Scy{ 304254562Scy if ('0' <= nibble && nibble <= '9') 305254562Scy return nibble - '0'; 306254562Scy if ('a' <= nibble && nibble <= 'f') 307254562Scy return 0xa + (nibble - 'a'); 308254562Scy return -1; 309254562Scy} 310254562Scy 311254562Scy/* Return the unescaped character for a "$...$" escape, or 0 if invalid. */ 312254562Scystatic char 313254562Scydecode_legacy_escape (const char *e, size_t len, size_t *out_len) 314254562Scy{ 315254562Scy char c = 0; 316254562Scy size_t escape_len = 0; 317254562Scy int lo_nibble = -1, hi_nibble = -1; 318254562Scy 319254562Scy if (len < 3 || e[0] != '$') 320254562Scy return 0; 321254562Scy 322254562Scy e++; 323254562Scy len--; 324254562Scy 325254562Scy if (e[0] == 'C') 326254562Scy { 327254562Scy escape_len = 1; 328254562Scy 329254562Scy c = ','; 330254562Scy } 331254562Scy else if (len > 2) 332254562Scy { 333254562Scy escape_len = 2; 334254562Scy 335254562Scy if (e[0] == 'S' && e[1] == 'P') 336254562Scy c = '@'; 337254562Scy else if (e[0] == 'B' && e[1] == 'P') 338254562Scy c = '*'; 339254562Scy else if (e[0] == 'R' && e[1] == 'F') 340254562Scy c = '&'; 341254562Scy else if (e[0] == 'L' && e[1] == 'T') 342254562Scy c = '<'; 343254562Scy else if (e[0] == 'G' && e[1] == 'T') 344254562Scy c = '>'; 345254562Scy else if (e[0] == 'L' && e[1] == 'P') 346254562Scy c = '('; 347254562Scy else if (e[0] == 'R' && e[1] == 'P') 348254562Scy c = ')'; 349254562Scy else if (e[0] == 'u' && len > 3) 350254562Scy { 351254562Scy escape_len = 3; 352254562Scy 353254562Scy hi_nibble = decode_lower_hex_nibble (e[1]); 354254562Scy if (hi_nibble < 0) 355254562Scy return 0; 356254562Scy lo_nibble = decode_lower_hex_nibble (e[2]); 357254562Scy if (lo_nibble < 0) 358254562Scy return 0; 359254562Scy 360254562Scy /* Only allow non-control ASCII characters. */ 361254562Scy if (hi_nibble > 7) 362254562Scy return 0; 363254562Scy c = (hi_nibble << 4) | lo_nibble; 364254562Scy if (c < 0x20) 365 return 0; 366 } 367 } 368 369 if (!c || len <= escape_len || e[escape_len] != '$') 370 return 0; 371 372 *out_len = 2 + escape_len; 373 return c; 374} 375 376static void 377print_ident (struct rust_demangler *rdm, struct rust_mangled_ident ident) 378{ 379 char unescaped; 380 uint8_t *out, *p, d; 381 size_t len, cap, punycode_pos, j; 382 /* Punycode parameters and state. */ 383 uint32_t c; 384 size_t base, t_min, t_max, skew, damp, bias, i; 385 size_t delta, w, k, t; 386 387 if (rdm->errored || rdm->skipping_printing) 388 return; 389 390 if (rdm->version == -1) 391 { 392 /* Ignore leading underscores preceding escape sequences. 393 The mangler inserts an underscore to make sure the 394 identifier begins with a XID_Start character. */ 395 if (ident.ascii_len >= 2 && ident.ascii[0] == '_' 396 && ident.ascii[1] == '$') 397 { 398 ident.ascii++; 399 ident.ascii_len--; 400 } 401 402 while (ident.ascii_len > 0) 403 { 404 /* Handle legacy escape sequences ("$...$", ".." or "."). */ 405 if (ident.ascii[0] == '$') 406 { 407 unescaped 408 = decode_legacy_escape (ident.ascii, ident.ascii_len, &len); 409 if (unescaped) 410 print_str (rdm, &unescaped, 1); 411 else 412 { 413 /* Unexpected escape sequence, print the rest verbatim. */ 414 print_str (rdm, ident.ascii, ident.ascii_len); 415 return; 416 } 417 } 418 else if (ident.ascii[0] == '.') 419 { 420 if (ident.ascii_len >= 2 && ident.ascii[1] == '.') 421 { 422 /* ".." becomes "::" */ 423 PRINT ("::"); 424 len = 2; 425 } 426 else 427 { 428 PRINT ("."); 429 len = 1; 430 } 431 } 432 else 433 { 434 /* Print everything before the next escape sequence, at once. */ 435 for (len = 0; len < ident.ascii_len; len++) 436 if (ident.ascii[len] == '$' || ident.ascii[len] == '.') 437 break; 438 439 print_str (rdm, ident.ascii, len); 440 } 441 442 ident.ascii += len; 443 ident.ascii_len -= len; 444 } 445 446 return; 447 } 448 449 if (!ident.punycode) 450 { 451 print_str (rdm, ident.ascii, ident.ascii_len); 452 return; 453 } 454 455 len = 0; 456 cap = 4; 457 while (cap < ident.ascii_len) 458 { 459 cap *= 2; 460 /* Check for overflows. */ 461 if ((cap * 4) / 4 != cap) 462 { 463 rdm->errored = 1; 464 return; 465 } 466 } 467 468 /* Store the output codepoints as groups of 4 UTF-8 bytes. */ 469 out = (uint8_t *)malloc (cap * 4); 470 if (!out) 471 { 472 rdm->errored = 1; 473 return; 474 } 475 476 /* Populate initial output from ASCII fragment. */ 477 for (len = 0; len < ident.ascii_len; len++) 478 { 479 p = out + 4 * len; 480 p[0] = 0; 481 p[1] = 0; 482 p[2] = 0; 483 p[3] = ident.ascii[len]; 484 } 485 486 /* Punycode parameters and initial state. */ 487 base = 36; 488 t_min = 1; 489 t_max = 26; 490 skew = 38; 491 damp = 700; 492 bias = 72; 493 i = 0; 494 c = 0x80; 495 496 punycode_pos = 0; 497 while (punycode_pos < ident.punycode_len) 498 { 499 /* Read one delta value. */ 500 delta = 0; 501 w = 1; 502 k = 0; 503 do 504 { 505 k += base; 506 t = k < bias ? 0 : (k - bias); 507 if (t < t_min) 508 t = t_min; 509 if (t > t_max) 510 t = t_max; 511 512 if (punycode_pos >= ident.punycode_len) 513 goto cleanup; 514 d = ident.punycode[punycode_pos++]; 515 516 if (ISLOWER (d)) 517 d = d - 'a'; 518 else if (ISDIGIT (d)) 519 d = 26 + (d - '0'); 520 else 521 { 522 rdm->errored = 1; 523 goto cleanup; 524 } 525 526 delta += d * w; 527 w *= base - t; 528 } 529 while (d >= t); 530 531 /* Compute the new insert position and character. */ 532 len++; 533 i += delta; 534 c += i / len; 535 i %= len; 536 537 /* Ensure enough space is available. */ 538 if (cap < len) 539 { 540 cap *= 2; 541 /* Check for overflows. */ 542 if ((cap * 4) / 4 != cap || cap < len) 543 { 544 rdm->errored = 1; 545 goto cleanup; 546 } 547 } 548 p = (uint8_t *)realloc (out, cap * 4); 549 if (!p) 550 { 551 rdm->errored = 1; 552 goto cleanup; 553 } 554 out = p; 555 556 /* Move the characters after the insert position. */ 557 p = out + i * 4; 558 memmove (p + 4, p, (len - i - 1) * 4); 559 560 /* Insert the new character, as UTF-8 bytes. */ 561 p[0] = c >= 0x10000 ? 0xf0 | (c >> 18) : 0; 562 p[1] = c >= 0x800 ? (c < 0x10000 ? 0xe0 : 0x80) | ((c >> 12) & 0x3f) : 0; 563 p[2] = (c < 0x800 ? 0xc0 : 0x80) | ((c >> 6) & 0x3f); 564 p[3] = 0x80 | (c & 0x3f); 565 566 /* If there are no more deltas, decoding is complete. */ 567 if (punycode_pos == ident.punycode_len) 568 break; 569 570 i++; 571 572 /* Perform bias adaptation. */ 573 delta /= damp; 574 damp = 2; 575 576 delta += delta / len; 577 k = 0; 578 while (delta > ((base - t_min) * t_max) / 2) 579 { 580 delta /= base - t_min; 581 k += base; 582 } 583 bias = k + ((base - t_min + 1) * delta) / (delta + skew); 584 } 585 586 /* Remove all the 0 bytes to leave behind an UTF-8 string. */ 587 for (i = 0, j = 0; i < len * 4; i++) 588 if (out[i] != 0) 589 out[j++] = out[i]; 590 591 print_str (rdm, (const char *)out, j); 592 593cleanup: 594 free (out); 595} 596 597/* Print the lifetime according to the previously decoded index. 598 An index of `0` always refers to `'_`, but starting with `1`, 599 indices refer to late-bound lifetimes introduced by a binder. */ 600static void 601print_lifetime_from_index (struct rust_demangler *rdm, uint64_t lt) 602{ 603 char c; 604 uint64_t depth; 605 606 PRINT ("'"); 607 if (lt == 0) 608 { 609 PRINT ("_"); 610 return; 611 } 612 613 depth = rdm->bound_lifetime_depth - lt; 614 /* Try to print lifetimes alphabetically first. */ 615 if (depth < 26) 616 { 617 c = 'a' + depth; 618 print_str (rdm, &c, 1); 619 } 620 else 621 { 622 /* Use `'_123` after running out of letters. */ 623 PRINT ("_"); 624 print_uint64 (rdm, depth); 625 } 626} 627 628/* Demangling functions. */ 629 630static void demangle_binder (struct rust_demangler *rdm); 631static void demangle_path (struct rust_demangler *rdm, int in_value); 632static void demangle_generic_arg (struct rust_demangler *rdm); 633static void demangle_type (struct rust_demangler *rdm); 634static int demangle_path_maybe_open_generics (struct rust_demangler *rdm); 635static void demangle_dyn_trait (struct rust_demangler *rdm); 636static void demangle_const (struct rust_demangler *rdm); 637static void demangle_const_uint (struct rust_demangler *rdm); 638static void demangle_const_int (struct rust_demangler *rdm); 639static void demangle_const_bool (struct rust_demangler *rdm); 640static void demangle_const_char (struct rust_demangler *rdm); 641 642/* Optionally enter a binder ('G') for late-bound lifetimes, 643 printing e.g. `for<'a, 'b> `, and make those lifetimes visible 644 to the caller (via depth level, which the caller should reset). */ 645static void 646demangle_binder (struct rust_demangler *rdm) 647{ 648 uint64_t i, bound_lifetimes; 649 650 if (rdm->errored) 651 return; 652 653 bound_lifetimes = parse_opt_integer_62 (rdm, 'G'); 654 if (bound_lifetimes > 0) 655 { 656 PRINT ("for<"); 657 for (i = 0; i < bound_lifetimes; i++) 658 { 659 if (i > 0) 660 PRINT (", "); 661 rdm->bound_lifetime_depth++; 662 print_lifetime_from_index (rdm, 1); 663 } 664 PRINT ("> "); 665 } 666} 667 668static void 669demangle_path (struct rust_demangler *rdm, int in_value) 670{ 671 char tag, ns; 672 int was_skipping_printing; 673 size_t i, backref, old_next; 674 uint64_t dis; 675 struct rust_mangled_ident name; 676 677 if (rdm->errored) 678 return; 679 680 if (rdm->recursion != RUST_NO_RECURSION_LIMIT) 681 { 682 ++ rdm->recursion; 683 if (rdm->recursion > RUST_MAX_RECURSION_COUNT) 684 /* FIXME: There ought to be a way to report 685 that the recursion limit has been reached. */ 686 goto fail_return; 687 } 688 689 switch (tag = next (rdm)) 690 { 691 case 'C': 692 dis = parse_disambiguator (rdm); 693 name = parse_ident (rdm); 694 695 print_ident (rdm, name); 696 if (rdm->verbose) 697 { 698 PRINT ("["); 699 print_uint64_hex (rdm, dis); 700 PRINT ("]"); 701 } 702 break; 703 case 'N': 704 ns = next (rdm); 705 if (!ISLOWER (ns) && !ISUPPER (ns)) 706 goto fail_return; 707 708 demangle_path (rdm, in_value); 709 710 dis = parse_disambiguator (rdm); 711 name = parse_ident (rdm); 712 713 if (ISUPPER (ns)) 714 { 715 /* Special namespaces, like closures and shims. */ 716 PRINT ("::{"); 717 switch (ns) 718 { 719 case 'C': 720 PRINT ("closure"); 721 break; 722 case 'S': 723 PRINT ("shim"); 724 break; 725 default: 726 print_str (rdm, &ns, 1); 727 } 728 if (name.ascii || name.punycode) 729 { 730 PRINT (":"); 731 print_ident (rdm, name); 732 } 733 PRINT ("#"); 734 print_uint64 (rdm, dis); 735 PRINT ("}"); 736 } 737 else 738 { 739 /* Implementation-specific/unspecified namespaces. */ 740 741 if (name.ascii || name.punycode) 742 { 743 PRINT ("::"); 744 print_ident (rdm, name); 745 } 746 } 747 break; 748 case 'M': 749 case 'X': 750 /* Ignore the `impl`'s own path.*/ 751 parse_disambiguator (rdm); 752 was_skipping_printing = rdm->skipping_printing; 753 rdm->skipping_printing = 1; 754 demangle_path (rdm, in_value); 755 rdm->skipping_printing = was_skipping_printing; 756 /* fallthrough */ 757 case 'Y': 758 PRINT ("<"); 759 demangle_type (rdm); 760 if (tag != 'M') 761 { 762 PRINT (" as "); 763 demangle_path (rdm, 0); 764 } 765 PRINT (">"); 766 break; 767 case 'I': 768 demangle_path (rdm, in_value); 769 if (in_value) 770 PRINT ("::"); 771 PRINT ("<"); 772 for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++) 773 { 774 if (i > 0) 775 PRINT (", "); 776 demangle_generic_arg (rdm); 777 } 778 PRINT (">"); 779 break; 780 case 'B': 781 backref = parse_integer_62 (rdm); 782 if (!rdm->skipping_printing) 783 { 784 old_next = rdm->next; 785 rdm->next = backref; 786 demangle_path (rdm, in_value); 787 rdm->next = old_next; 788 } 789 break; 790 default: 791 goto fail_return; 792 } 793 goto pass_return; 794 795 fail_return: 796 rdm->errored = 1; 797 pass_return: 798 if (rdm->recursion != RUST_NO_RECURSION_LIMIT) 799 -- rdm->recursion; 800} 801 802static void 803demangle_generic_arg (struct rust_demangler *rdm) 804{ 805 uint64_t lt; 806 if (eat (rdm, 'L')) 807 { 808 lt = parse_integer_62 (rdm); 809 print_lifetime_from_index (rdm, lt); 810 } 811 else if (eat (rdm, 'K')) 812 demangle_const (rdm); 813 else 814 demangle_type (rdm); 815} 816 817static const char * 818basic_type (char tag) 819{ 820 switch (tag) 821 { 822 case 'b': 823 return "bool"; 824 case 'c': 825 return "char"; 826 case 'e': 827 return "str"; 828 case 'u': 829 return "()"; 830 case 'a': 831 return "i8"; 832 case 's': 833 return "i16"; 834 case 'l': 835 return "i32"; 836 case 'x': 837 return "i64"; 838 case 'n': 839 return "i128"; 840 case 'i': 841 return "isize"; 842 case 'h': 843 return "u8"; 844 case 't': 845 return "u16"; 846 case 'm': 847 return "u32"; 848 case 'y': 849 return "u64"; 850 case 'o': 851 return "u128"; 852 case 'j': 853 return "usize"; 854 case 'f': 855 return "f32"; 856 case 'd': 857 return "f64"; 858 case 'z': 859 return "!"; 860 case 'p': 861 return "_"; 862 case 'v': 863 return "..."; 864 865 default: 866 return NULL; 867 } 868} 869 870static void 871demangle_type (struct rust_demangler *rdm) 872{ 873 char tag; 874 size_t i, old_next, backref; 875 uint64_t lt, old_bound_lifetime_depth; 876 const char *basic; 877 struct rust_mangled_ident abi; 878 879 if (rdm->errored) 880 return; 881 882 tag = next (rdm); 883 884 basic = basic_type (tag); 885 if (basic) 886 { 887 PRINT (basic); 888 return; 889 } 890 891 if (rdm->recursion != RUST_NO_RECURSION_LIMIT) 892 { 893 ++ rdm->recursion; 894 if (rdm->recursion > RUST_MAX_RECURSION_COUNT) 895 /* FIXME: There ought to be a way to report 896 that the recursion limit has been reached. */ 897 { 898 rdm->errored = 1; 899 -- rdm->recursion; 900 return; 901 } 902 } 903 904 switch (tag) 905 { 906 case 'R': 907 case 'Q': 908 PRINT ("&"); 909 if (eat (rdm, 'L')) 910 { 911 lt = parse_integer_62 (rdm); 912 if (lt) 913 { 914 print_lifetime_from_index (rdm, lt); 915 PRINT (" "); 916 } 917 } 918 if (tag != 'R') 919 PRINT ("mut "); 920 demangle_type (rdm); 921 break; 922 case 'P': 923 case 'O': 924 PRINT ("*"); 925 if (tag != 'P') 926 PRINT ("mut "); 927 else 928 PRINT ("const "); 929 demangle_type (rdm); 930 break; 931 case 'A': 932 case 'S': 933 PRINT ("["); 934 demangle_type (rdm); 935 if (tag == 'A') 936 { 937 PRINT ("; "); 938 demangle_const (rdm); 939 } 940 PRINT ("]"); 941 break; 942 case 'T': 943 PRINT ("("); 944 for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++) 945 { 946 if (i > 0) 947 PRINT (", "); 948 demangle_type (rdm); 949 } 950 if (i == 1) 951 PRINT (","); 952 PRINT (")"); 953 break; 954 case 'F': 955 old_bound_lifetime_depth = rdm->bound_lifetime_depth; 956 demangle_binder (rdm); 957 958 if (eat (rdm, 'U')) 959 PRINT ("unsafe "); 960 961 if (eat (rdm, 'K')) 962 { 963 if (eat (rdm, 'C')) 964 { 965 abi.ascii = "C"; 966 abi.ascii_len = 1; 967 } 968 else 969 { 970 abi = parse_ident (rdm); 971 if (!abi.ascii || abi.punycode) 972 { 973 rdm->errored = 1; 974 goto restore; 975 } 976 } 977 978 PRINT ("extern \""); 979 980 /* If the ABI had any `-`, they were replaced with `_`, 981 so the parts between `_` have to be re-joined with `-`. */ 982 for (i = 0; i < abi.ascii_len; i++) 983 { 984 if (abi.ascii[i] == '_') 985 { 986 print_str (rdm, abi.ascii, i); 987 PRINT ("-"); 988 abi.ascii += i + 1; 989 abi.ascii_len -= i + 1; 990 i = 0; 991 } 992 } 993 print_str (rdm, abi.ascii, abi.ascii_len); 994 995 PRINT ("\" "); 996 } 997 998 PRINT ("fn("); 999 for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++) 1000 { 1001 if (i > 0) 1002 PRINT (", "); 1003 demangle_type (rdm); 1004 } 1005 PRINT (")"); 1006 1007 if (eat (rdm, 'u')) 1008 { 1009 /* Skip printing the return type if it's 'u', i.e. `()`. */ 1010 } 1011 else 1012 { 1013 PRINT (" -> "); 1014 demangle_type (rdm); 1015 } 1016 1017 /* Restore `bound_lifetime_depth` to outside the binder. */ 1018 restore: 1019 rdm->bound_lifetime_depth = old_bound_lifetime_depth; 1020 break; 1021 case 'D': 1022 PRINT ("dyn "); 1023 1024 old_bound_lifetime_depth = rdm->bound_lifetime_depth; 1025 demangle_binder (rdm); 1026 1027 for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++) 1028 { 1029 if (i > 0) 1030 PRINT (" + "); 1031 demangle_dyn_trait (rdm); 1032 } 1033 1034 /* Restore `bound_lifetime_depth` to outside the binder. */ 1035 rdm->bound_lifetime_depth = old_bound_lifetime_depth; 1036 1037 if (!eat (rdm, 'L')) 1038 { 1039 rdm->errored = 1; 1040 return; 1041 } 1042 lt = parse_integer_62 (rdm); 1043 if (lt) 1044 { 1045 PRINT (" + "); 1046 print_lifetime_from_index (rdm, lt); 1047 } 1048 break; 1049 case 'B': 1050 backref = parse_integer_62 (rdm); 1051 if (!rdm->skipping_printing) 1052 { 1053 old_next = rdm->next; 1054 rdm->next = backref; 1055 demangle_type (rdm); 1056 rdm->next = old_next; 1057 } 1058 break; 1059 default: 1060 /* Go back to the tag, so `demangle_path` also sees it. */ 1061 rdm->next--; 1062 demangle_path (rdm, 0); 1063 } 1064 1065 if (rdm->recursion != RUST_NO_RECURSION_LIMIT) 1066 -- rdm->recursion; 1067} 1068 1069/* A trait in a trait object may have some "existential projections" 1070 (i.e. associated type bindings) after it, which should be printed 1071 in the `<...>` of the trait, e.g. `dyn Trait<T, U, Assoc=X>`. 1072 To this end, this method will keep the `<...>` of an 'I' path 1073 open, by omitting the `>`, and return `Ok(true)` in that case. */ 1074static int 1075demangle_path_maybe_open_generics (struct rust_demangler *rdm) 1076{ 1077 int open; 1078 size_t i, old_next, backref; 1079 1080 open = 0; 1081 1082 if (rdm->errored) 1083 return open; 1084 1085 if (rdm->recursion != RUST_NO_RECURSION_LIMIT) 1086 { 1087 ++ rdm->recursion; 1088 if (rdm->recursion > RUST_MAX_RECURSION_COUNT) 1089 { 1090 /* FIXME: There ought to be a way to report 1091 that the recursion limit has been reached. */ 1092 rdm->errored = 1; 1093 goto end_of_func; 1094 } 1095 } 1096 1097 if (eat (rdm, 'B')) 1098 { 1099 backref = parse_integer_62 (rdm); 1100 if (!rdm->skipping_printing) 1101 { 1102 old_next = rdm->next; 1103 rdm->next = backref; 1104 open = demangle_path_maybe_open_generics (rdm); 1105 rdm->next = old_next; 1106 } 1107 } 1108 else if (eat (rdm, 'I')) 1109 { 1110 demangle_path (rdm, 0); 1111 PRINT ("<"); 1112 open = 1; 1113 for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++) 1114 { 1115 if (i > 0) 1116 PRINT (", "); 1117 demangle_generic_arg (rdm); 1118 } 1119 } 1120 else 1121 demangle_path (rdm, 0); 1122 1123 end_of_func: 1124 if (rdm->recursion != RUST_NO_RECURSION_LIMIT) 1125 -- rdm->recursion; 1126 1127 return open; 1128} 1129 1130static void 1131demangle_dyn_trait (struct rust_demangler *rdm) 1132{ 1133 int open; 1134 struct rust_mangled_ident name; 1135 1136 if (rdm->errored) 1137 return; 1138 1139 open = demangle_path_maybe_open_generics (rdm); 1140 1141 while (eat (rdm, 'p')) 1142 { 1143 if (!open) 1144 PRINT ("<"); 1145 else 1146 PRINT (", "); 1147 open = 1; 1148 1149 name = parse_ident (rdm); 1150 print_ident (rdm, name); 1151 PRINT (" = "); 1152 demangle_type (rdm); 1153 } 1154 1155 if (open) 1156 PRINT (">"); 1157} 1158 1159static void 1160demangle_const (struct rust_demangler *rdm) 1161{ 1162 char ty_tag; 1163 size_t old_next, backref; 1164 1165 if (rdm->errored) 1166 return; 1167 1168 if (rdm->recursion != RUST_NO_RECURSION_LIMIT) 1169 { 1170 ++ rdm->recursion; 1171 if (rdm->recursion > RUST_MAX_RECURSION_COUNT) 1172 /* FIXME: There ought to be a way to report 1173 that the recursion limit has been reached. */ 1174 goto fail_return; 1175 } 1176 1177 if (eat (rdm, 'B')) 1178 { 1179 backref = parse_integer_62 (rdm); 1180 if (!rdm->skipping_printing) 1181 { 1182 old_next = rdm->next; 1183 rdm->next = backref; 1184 demangle_const (rdm); 1185 rdm->next = old_next; 1186 } 1187 goto pass_return; 1188 } 1189 1190 ty_tag = next (rdm); 1191 switch (ty_tag) 1192 { 1193 /* Placeholder. */ 1194 case 'p': 1195 PRINT ("_"); 1196 goto pass_return; 1197 1198 /* Unsigned integer types. */ 1199 case 'h': 1200 case 't': 1201 case 'm': 1202 case 'y': 1203 case 'o': 1204 case 'j': 1205 demangle_const_uint (rdm); 1206 break; 1207 1208 /* Signed integer types. */ 1209 case 'a': 1210 case 's': 1211 case 'l': 1212 case 'x': 1213 case 'n': 1214 case 'i': 1215 demangle_const_int (rdm); 1216 break; 1217 1218 /* Boolean. */ 1219 case 'b': 1220 demangle_const_bool (rdm); 1221 break; 1222 1223 /* Character. */ 1224 case 'c': 1225 demangle_const_char (rdm); 1226 break; 1227 1228 default: 1229 goto fail_return; 1230 } 1231 1232 if (!rdm->errored && rdm->verbose) 1233 { 1234 PRINT (": "); 1235 PRINT (basic_type (ty_tag)); 1236 } 1237 goto pass_return; 1238 1239 fail_return: 1240 rdm->errored = 1; 1241 pass_return: 1242 if (rdm->recursion != RUST_NO_RECURSION_LIMIT) 1243 -- rdm->recursion; 1244} 1245 1246static void 1247demangle_const_uint (struct rust_demangler *rdm) 1248{ 1249 size_t hex_len; 1250 uint64_t value; 1251 1252 if (rdm->errored) 1253 return; 1254 1255 hex_len = parse_hex_nibbles (rdm, &value); 1256 1257 if (hex_len > 16) 1258 { 1259 /* Print anything that doesn't fit in `uint64_t` verbatim. */ 1260 PRINT ("0x"); 1261 print_str (rdm, rdm->sym + (rdm->next - hex_len), hex_len); 1262 } 1263 else if (hex_len > 0) 1264 print_uint64 (rdm, value); 1265 else 1266 rdm->errored = 1; 1267} 1268 1269static void 1270demangle_const_int (struct rust_demangler *rdm) 1271{ 1272 if (eat (rdm, 'n')) 1273 PRINT ("-"); 1274 demangle_const_uint (rdm); 1275} 1276 1277static void 1278demangle_const_bool (struct rust_demangler *rdm) 1279{ 1280 uint64_t value; 1281 1282 if (parse_hex_nibbles (rdm, &value) != 1) 1283 { 1284 rdm->errored = 1; 1285 return; 1286 } 1287 1288 if (value == 0) 1289 PRINT ("false"); 1290 else if (value == 1) 1291 PRINT ("true"); 1292 else 1293 rdm->errored = 1; 1294} 1295 1296static void 1297demangle_const_char (struct rust_demangler *rdm) 1298{ 1299 size_t hex_len; 1300 uint64_t value; 1301 1302 hex_len = parse_hex_nibbles (rdm, &value); 1303 1304 if (hex_len == 0 || hex_len > 8) 1305 { 1306 rdm->errored = 1; 1307 return; 1308 } 1309 1310 /* Match Rust's character "debug" output as best as we can. */ 1311 PRINT ("'"); 1312 if (value == '\t') 1313 PRINT ("\\t"); 1314 else if (value == '\r') 1315 PRINT ("\\r"); 1316 else if (value == '\n') 1317 PRINT ("\\n"); 1318 else if (value > ' ' && value < '~') 1319 { 1320 /* Rust also considers many non-ASCII codepoints to be printable, but 1321 that logic is not easily ported to C. */ 1322 char c = value; 1323 print_str (rdm, &c, 1); 1324 } 1325 else 1326 { 1327 PRINT ("\\u{"); 1328 print_uint64_hex (rdm, value); 1329 PRINT ("}"); 1330 } 1331 PRINT ("'"); 1332} 1333 1334/* A legacy hash is the prefix "h" followed by 16 lowercase hex digits. 1335 The hex digits must contain at least 5 distinct digits. */ 1336static int 1337is_legacy_prefixed_hash (struct rust_mangled_ident ident) 1338{ 1339 uint16_t seen; 1340 int nibble; 1341 size_t i, count; 1342 1343 if (ident.ascii_len != 17 || ident.ascii[0] != 'h') 1344 return 0; 1345 1346 seen = 0; 1347 for (i = 0; i < 16; i++) 1348 { 1349 nibble = decode_lower_hex_nibble (ident.ascii[1 + i]); 1350 if (nibble < 0) 1351 return 0; 1352 seen |= (uint16_t)1 << nibble; 1353 } 1354 1355 /* Count how many distinct digits were seen. */ 1356 count = 0; 1357 while (seen) 1358 { 1359 if (seen & 1) 1360 count++; 1361 seen >>= 1; 1362 } 1363 1364 return count >= 5; 1365} 1366 1367int 1368rust_demangle_callback (const char *mangled, int options, 1369 demangle_callbackref callback, void *opaque) 1370{ 1371 const char *p; 1372 struct rust_demangler rdm; 1373 struct rust_mangled_ident ident; 1374 1375 rdm.sym = mangled; 1376 rdm.sym_len = 0; 1377 1378 rdm.callback_opaque = opaque; 1379 rdm.callback = callback; 1380 1381 rdm.next = 0; 1382 rdm.errored = 0; 1383 rdm.skipping_printing = 0; 1384 rdm.verbose = (options & DMGL_VERBOSE) != 0; 1385 rdm.version = 0; 1386 rdm.recursion = (options & DMGL_NO_RECURSE_LIMIT) ? RUST_NO_RECURSION_LIMIT : 0; 1387 rdm.bound_lifetime_depth = 0; 1388 1389 /* Rust symbols always start with _R (v0) or _ZN (legacy). */ 1390 if (rdm.sym[0] == '_' && rdm.sym[1] == 'R') 1391 rdm.sym += 2; 1392 else if (rdm.sym[0] == '_' && rdm.sym[1] == 'Z' && rdm.sym[2] == 'N') 1393 { 1394 rdm.sym += 3; 1395 rdm.version = -1; 1396 } 1397 else 1398 return 0; 1399 1400 /* Paths (v0) always start with uppercase characters. */ 1401 if (rdm.version != -1 && !ISUPPER (rdm.sym[0])) 1402 return 0; 1403 1404 /* Rust symbols (v0) use only [_0-9a-zA-Z] characters. */ 1405 for (p = rdm.sym; *p; p++) 1406 { 1407 /* Rust v0 symbols can have '.' suffixes, ignore those. */ 1408 if (rdm.version == 0 && *p == '.') 1409 break; 1410 1411 rdm.sym_len++; 1412 1413 if (*p == '_' || ISALNUM (*p)) 1414 continue; 1415 1416 /* Legacy Rust symbols can also contain [.:$] characters. 1417 Or @ in the .suffix (which will be skipped, see below). */ 1418 if (rdm.version == -1 && (*p == '$' || *p == '.' || *p == ':' 1419 || *p == '@')) 1420 continue; 1421 1422 return 0; 1423 } 1424 1425 /* Legacy Rust symbols need to be handled separately. */ 1426 if (rdm.version == -1) 1427 { 1428 /* Legacy Rust symbols always end with E. But can be followed by a 1429 .suffix (which we want to ignore). */ 1430 int dot_suffix = 1; 1431 while (rdm.sym_len > 0 && 1432 !(dot_suffix && rdm.sym[rdm.sym_len - 1] == 'E')) 1433 { 1434 dot_suffix = rdm.sym[rdm.sym_len - 1] == '.'; 1435 rdm.sym_len--; 1436 } 1437 1438 if (!(rdm.sym_len > 0 && rdm.sym[rdm.sym_len - 1] == 'E')) 1439 return 0; 1440 rdm.sym_len--; 1441 1442 /* Legacy Rust symbols also always end with a path segment 1443 that encodes a 16 hex digit hash, i.e. '17h[a-f0-9]{16}'. 1444 This early check, before any parse_ident calls, should 1445 quickly filter out most C++ symbols unrelated to Rust. */ 1446 if (!(rdm.sym_len > 19 1447 && !memcmp (&rdm.sym[rdm.sym_len - 19], "17h", 3))) 1448 return 0; 1449 1450 do 1451 { 1452 ident = parse_ident (&rdm); 1453 if (rdm.errored || !ident.ascii) 1454 return 0; 1455 } 1456 while (rdm.next < rdm.sym_len); 1457 1458 /* The last path segment should be the hash. */ 1459 if (!is_legacy_prefixed_hash (ident)) 1460 return 0; 1461 1462 /* Reset the state for a second pass, to print the symbol. */ 1463 rdm.next = 0; 1464 if (!rdm.verbose && rdm.sym_len > 19) 1465 { 1466 /* Hide the last segment, containing the hash, if not verbose. */ 1467 rdm.sym_len -= 19; 1468 } 1469 1470 do 1471 { 1472 if (rdm.next > 0) 1473 print_str (&rdm, "::", 2); 1474 1475 ident = parse_ident (&rdm); 1476 print_ident (&rdm, ident); 1477 } 1478 while (rdm.next < rdm.sym_len); 1479 } 1480 else 1481 { 1482 demangle_path (&rdm, 1); 1483 1484 /* Skip instantiating crate. */ 1485 if (!rdm.errored && rdm.next < rdm.sym_len) 1486 { 1487 rdm.skipping_printing = 1; 1488 demangle_path (&rdm, 0); 1489 } 1490 1491 /* It's an error to not reach the end. */ 1492 rdm.errored |= rdm.next != rdm.sym_len; 1493 } 1494 1495 return !rdm.errored; 1496} 1497 1498/* Growable string buffers. */ 1499struct str_buf 1500{ 1501 char *ptr; 1502 size_t len; 1503 size_t cap; 1504 int errored; 1505}; 1506 1507static void 1508str_buf_reserve (struct str_buf *buf, size_t extra) 1509{ 1510 size_t available, min_new_cap, new_cap; 1511 char *new_ptr; 1512 1513 /* Allocation failed before. */ 1514 if (buf->errored) 1515 return; 1516 1517 available = buf->cap - buf->len; 1518 1519 if (extra <= available) 1520 return; 1521 1522 min_new_cap = buf->cap + (extra - available); 1523 1524 /* Check for overflows. */ 1525 if (min_new_cap < buf->cap) 1526 { 1527 buf->errored = 1; 1528 return; 1529 } 1530 1531 new_cap = buf->cap; 1532 1533 if (new_cap == 0) 1534 new_cap = 4; 1535 1536 /* Double capacity until sufficiently large. */ 1537 while (new_cap < min_new_cap) 1538 { 1539 new_cap *= 2; 1540 1541 /* Check for overflows. */ 1542 if (new_cap < buf->cap) 1543 { 1544 buf->errored = 1; 1545 return; 1546 } 1547 } 1548 1549 new_ptr = (char *)realloc (buf->ptr, new_cap); 1550 if (new_ptr == NULL) 1551 { 1552 free (buf->ptr); 1553 buf->ptr = NULL; 1554 buf->len = 0; 1555 buf->cap = 0; 1556 buf->errored = 1; 1557 } 1558 else 1559 { 1560 buf->ptr = new_ptr; 1561 buf->cap = new_cap; 1562 } 1563} 1564 1565static void 1566str_buf_append (struct str_buf *buf, const char *data, size_t len) 1567{ 1568 str_buf_reserve (buf, len); 1569 if (buf->errored) 1570 return; 1571 1572 memcpy (buf->ptr + buf->len, data, len); 1573 buf->len += len; 1574} 1575 1576static void 1577str_buf_demangle_callback (const char *data, size_t len, void *opaque) 1578{ 1579 str_buf_append ((struct str_buf *)opaque, data, len); 1580} 1581 1582char * 1583rust_demangle (const char *mangled, int options) 1584{ 1585 struct str_buf out; 1586 int success; 1587 1588 out.ptr = NULL; 1589 out.len = 0; 1590 out.cap = 0; 1591 out.errored = 0; 1592 1593 success = rust_demangle_callback (mangled, options, 1594 str_buf_demangle_callback, &out); 1595 1596 if (!success) 1597 { 1598 free (out.ptr); 1599 return NULL; 1600 } 1601 1602 str_buf_append (&out, "\0", 1); 1603 return out.ptr; 1604} 1605