1254562Scy/* Demangler for the Rust programming language
2254562Scy   Copyright (C) 2016-2022 Free Software Foundation, Inc.
3254562Scy   Written by David Tolnay (dtolnay@gmail.com).
4254562Scy   Rewritten by Eduard-Mihai Burtescu (eddyb@lyken.rs) for v0 support.
5254562Scy
6254562ScyThis file is part of the libiberty library.
7254562ScyLibiberty is free software; you can redistribute it and/or
8254562Scymodify it under the terms of the GNU Library General Public
9254562ScyLicense as published by the Free Software Foundation; either
10254562Scyversion 2 of the License, or (at your option) any later version.
11254562Scy
12254562ScyIn addition to the permissions in the GNU Library General Public
13254562ScyLicense, the Free Software Foundation gives you unlimited permission
14254562Scyto link the compiled version of this file into combinations with other
15254562Scyprograms, and to distribute those combinations without any restriction
16254562Scycoming from the use of this file.  (The Library Public License
17254562Scyrestrictions do apply in other respects; for example, they cover
18254562Scymodification of the file, and distribution when not linked into a
19254562Scycombined executable.)
20254562Scy
21254562ScyLibiberty is distributed in the hope that it will be useful,
22254562Scybut WITHOUT ANY WARRANTY; without even the implied warranty of
23254562ScyMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
24254562ScyLibrary General Public License for more details.
25254562Scy
26254562ScyYou should have received a copy of the GNU Library General Public
27254562ScyLicense along with libiberty; see the file COPYING.LIB.
28254562ScyIf not, see <http://www.gnu.org/licenses/>.  */
29254562Scy
30254562Scy
31254562Scy#ifdef HAVE_CONFIG_H
32254562Scy#include "config.h"
33254562Scy#endif
34254562Scy
35254562Scy#include "safe-ctype.h"
36254562Scy
37254562Scy#include <inttypes.h>
38254562Scy#include <sys/types.h>
39254562Scy#include <string.h>
40254562Scy#include <stdio.h>
41254562Scy#include <stdlib.h>
42254562Scy
43254562Scy#ifdef HAVE_STRING_H
44254562Scy#include <string.h>
45254562Scy#else
46254562Scyextern size_t strlen(const char *s);
47254562Scyextern int strncmp(const char *s1, const char *s2, size_t n);
48254562Scyextern void *memset(void *s, int c, size_t n);
49254562Scy#endif
50254562Scy
51254562Scy#include <demangle.h>
52254562Scy#include "libiberty.h"
53254562Scy
54254562Scystruct rust_demangler
55254562Scy{
56254562Scy  const char *sym;
57254562Scy  size_t sym_len;
58254562Scy
59254562Scy  void *callback_opaque;
60254562Scy  demangle_callbackref callback;
61254562Scy
62254562Scy  /* Position of the next character to read from the symbol. */
63254562Scy  size_t next;
64254562Scy
65254562Scy  /* Non-zero if any error occurred. */
66254562Scy  int errored;
67254562Scy
68254562Scy  /* Non-zero if nothing should be printed. */
69254562Scy  int skipping_printing;
70254562Scy
71254562Scy  /* Non-zero if printing should be verbose (e.g. include hashes). */
72254562Scy  int verbose;
73254562Scy
74254562Scy  /* Rust mangling version, with legacy mangling being -1. */
75254562Scy  int version;
76254562Scy
77254562Scy  /* Recursion depth.  */
78254562Scy  unsigned int recursion;
79254562Scy  /* Maximum number of times demangle_path may be called recursively.  */
80254562Scy#define RUST_MAX_RECURSION_COUNT  1024
81254562Scy#define RUST_NO_RECURSION_LIMIT   ((unsigned int) -1)
82254562Scy
83254562Scy  uint64_t bound_lifetime_depth;
84254562Scy};
85254562Scy
86254562Scy/* Parsing functions. */
87254562Scy
88254562Scystatic char
89254562Scypeek (const struct rust_demangler *rdm)
90254562Scy{
91254562Scy  if (rdm->next < rdm->sym_len)
92254562Scy    return rdm->sym[rdm->next];
93254562Scy  return 0;
94254562Scy}
95254562Scy
96254562Scystatic int
97254562Scyeat (struct rust_demangler *rdm, char c)
98254562Scy{
99254562Scy  if (peek (rdm) == c)
100254562Scy    {
101254562Scy      rdm->next++;
102254562Scy      return 1;
103254562Scy    }
104254562Scy  else
105254562Scy    return 0;
106254562Scy}
107254562Scy
108254562Scystatic char
109254562Scynext (struct rust_demangler *rdm)
110254562Scy{
111254562Scy  char c = peek (rdm);
112254562Scy  if (!c)
113254562Scy    rdm->errored = 1;
114254562Scy  else
115254562Scy    rdm->next++;
116254562Scy  return c;
117254562Scy}
118254562Scy
119254562Scystatic uint64_t
120254562Scyparse_integer_62 (struct rust_demangler *rdm)
121254562Scy{
122254562Scy  char c;
123254562Scy  uint64_t x;
124254562Scy
125254562Scy  if (eat (rdm, '_'))
126254562Scy    return 0;
127254562Scy
128254562Scy  x = 0;
129254562Scy  while (!eat (rdm, '_') && !rdm->errored)
130254562Scy    {
131254562Scy      c = next (rdm);
132254562Scy      x *= 62;
133254562Scy      if (ISDIGIT (c))
134254562Scy        x += c - '0';
135254562Scy      else if (ISLOWER (c))
136254562Scy        x += 10 + (c - 'a');
137254562Scy      else if (ISUPPER (c))
138254562Scy        x += 10 + 26 + (c - 'A');
139254562Scy      else
140254562Scy        {
141254562Scy          rdm->errored = 1;
142254562Scy          return 0;
143254562Scy        }
144254562Scy    }
145254562Scy  return x + 1;
146254562Scy}
147254562Scy
148254562Scystatic uint64_t
149254562Scyparse_opt_integer_62 (struct rust_demangler *rdm, char tag)
150254562Scy{
151254562Scy  if (!eat (rdm, tag))
152254562Scy    return 0;
153254562Scy  return 1 + parse_integer_62 (rdm);
154254562Scy}
155254562Scy
156254562Scystatic uint64_t
157254562Scyparse_disambiguator (struct rust_demangler *rdm)
158254562Scy{
159254562Scy  return parse_opt_integer_62 (rdm, 's');
160254562Scy}
161254562Scy
162254562Scystatic size_t
163254562Scyparse_hex_nibbles (struct rust_demangler *rdm, uint64_t *value)
164254562Scy{
165254562Scy  char c;
166254562Scy  size_t hex_len;
167254562Scy
168254562Scy  hex_len = 0;
169254562Scy  *value = 0;
170254562Scy
171254562Scy  while (!eat (rdm, '_'))
172254562Scy    {
173254562Scy      *value <<= 4;
174254562Scy
175254562Scy      c = next (rdm);
176254562Scy      if (ISDIGIT (c))
177254562Scy        *value |= c - '0';
178254562Scy      else if (c >= 'a' && c <= 'f')
179254562Scy        *value |= 10 + (c - 'a');
180254562Scy      else
181254562Scy        {
182254562Scy          rdm->errored = 1;
183254562Scy          return 0;
184254562Scy        }
185254562Scy      hex_len++;
186254562Scy    }
187254562Scy
188254562Scy  return hex_len;
189254562Scy}
190254562Scy
191254562Scystruct rust_mangled_ident
192254562Scy{
193254562Scy  /* ASCII part of the identifier. */
194254562Scy  const char *ascii;
195254562Scy  size_t ascii_len;
196254562Scy
197254562Scy  /* Punycode insertion codes for Unicode codepoints, if any. */
198254562Scy  const char *punycode;
199254562Scy  size_t punycode_len;
200254562Scy};
201254562Scy
202254562Scystatic struct rust_mangled_ident
203254562Scyparse_ident (struct rust_demangler *rdm)
204254562Scy{
205254562Scy  char c;
206254562Scy  size_t start, len;
207254562Scy  int is_punycode = 0;
208254562Scy  struct rust_mangled_ident ident;
209254562Scy
210254562Scy  ident.ascii = NULL;
211254562Scy  ident.ascii_len = 0;
212254562Scy  ident.punycode = NULL;
213254562Scy  ident.punycode_len = 0;
214254562Scy
215254562Scy  if (rdm->version != -1)
216254562Scy    is_punycode = eat (rdm, 'u');
217254562Scy
218254562Scy  c = next (rdm);
219254562Scy  if (!ISDIGIT (c))
220254562Scy    {
221254562Scy      rdm->errored = 1;
222254562Scy      return ident;
223254562Scy    }
224254562Scy  len = c - '0';
225254562Scy
226254562Scy  if (c != '0')
227254562Scy    while (ISDIGIT (peek (rdm)))
228254562Scy      len = len * 10 + (next (rdm) - '0');
229254562Scy
230254562Scy  /* Skip past the optional `_` separator (v0). */
231254562Scy  if (rdm->version != -1)
232254562Scy    eat (rdm, '_');
233254562Scy
234254562Scy  start = rdm->next;
235254562Scy  rdm->next += len;
236254562Scy  /* Check for overflows. */
237254562Scy  if ((start > rdm->next) || (rdm->next > rdm->sym_len))
238254562Scy    {
239254562Scy      rdm->errored = 1;
240254562Scy      return ident;
241254562Scy    }
242254562Scy
243254562Scy  ident.ascii = rdm->sym + start;
244254562Scy  ident.ascii_len = len;
245254562Scy
246254562Scy  if (is_punycode)
247254562Scy    {
248254562Scy      ident.punycode_len = 0;
249254562Scy      while (ident.ascii_len > 0)
250254562Scy        {
251254562Scy          ident.ascii_len--;
252254562Scy
253254562Scy          /* The last '_' is a separator between ascii & punycode. */
254254562Scy          if (ident.ascii[ident.ascii_len] == '_')
255254562Scy            break;
256254562Scy
257254562Scy          ident.punycode_len++;
258254562Scy        }
259254562Scy      if (!ident.punycode_len)
260254562Scy        {
261254562Scy          rdm->errored = 1;
262254562Scy          return ident;
263254562Scy        }
264254562Scy      ident.punycode = ident.ascii + (len - ident.punycode_len);
265254562Scy    }
266254562Scy
267254562Scy  if (ident.ascii_len == 0)
268254562Scy    ident.ascii = NULL;
269254562Scy
270254562Scy  return ident;
271254562Scy}
272254562Scy
273254562Scy/* Printing functions. */
274254562Scy
275254562Scystatic void
276254562Scyprint_str (struct rust_demangler *rdm, const char *data, size_t len)
277254562Scy{
278254562Scy  if (!rdm->errored && !rdm->skipping_printing)
279254562Scy    rdm->callback (data, len, rdm->callback_opaque);
280254562Scy}
281254562Scy
282254562Scy#define PRINT(s) print_str (rdm, s, strlen (s))
283254562Scy
284254562Scystatic void
285254562Scyprint_uint64 (struct rust_demangler *rdm, uint64_t x)
286254562Scy{
287254562Scy  char s[21];
288254562Scy  snprintf (s, 21, "%" PRIu64, x);
289254562Scy  PRINT (s);
290254562Scy}
291254562Scy
292254562Scystatic void
293254562Scyprint_uint64_hex (struct rust_demangler *rdm, uint64_t x)
294254562Scy{
295254562Scy  char s[17];
296254562Scy  snprintf (s, 17, "%" PRIx64, x);
297254562Scy  PRINT (s);
298254562Scy}
299254562Scy
300254562Scy/* Return a 0x0-0xf value if the char is 0-9a-f, and -1 otherwise. */
301254562Scystatic int
302254562Scydecode_lower_hex_nibble (char nibble)
303254562Scy{
304254562Scy  if ('0' <= nibble && nibble <= '9')
305254562Scy    return nibble - '0';
306254562Scy  if ('a' <= nibble && nibble <= 'f')
307254562Scy    return 0xa + (nibble - 'a');
308254562Scy  return -1;
309254562Scy}
310254562Scy
311254562Scy/* Return the unescaped character for a "$...$" escape, or 0 if invalid. */
312254562Scystatic char
313254562Scydecode_legacy_escape (const char *e, size_t len, size_t *out_len)
314254562Scy{
315254562Scy  char c = 0;
316254562Scy  size_t escape_len = 0;
317254562Scy  int lo_nibble = -1, hi_nibble = -1;
318254562Scy
319254562Scy  if (len < 3 || e[0] != '$')
320254562Scy    return 0;
321254562Scy
322254562Scy  e++;
323254562Scy  len--;
324254562Scy
325254562Scy  if (e[0] == 'C')
326254562Scy    {
327254562Scy      escape_len = 1;
328254562Scy
329254562Scy      c = ',';
330254562Scy    }
331254562Scy  else if (len > 2)
332254562Scy    {
333254562Scy      escape_len = 2;
334254562Scy
335254562Scy      if (e[0] == 'S' && e[1] == 'P')
336254562Scy        c = '@';
337254562Scy      else if (e[0] == 'B' && e[1] == 'P')
338254562Scy        c = '*';
339254562Scy      else if (e[0] == 'R' && e[1] == 'F')
340254562Scy        c = '&';
341254562Scy      else if (e[0] == 'L' && e[1] == 'T')
342254562Scy        c = '<';
343254562Scy      else if (e[0] == 'G' && e[1] == 'T')
344254562Scy        c = '>';
345254562Scy      else if (e[0] == 'L' && e[1] == 'P')
346254562Scy        c = '(';
347254562Scy      else if (e[0] == 'R' && e[1] == 'P')
348254562Scy        c = ')';
349254562Scy      else if (e[0] == 'u' && len > 3)
350254562Scy        {
351254562Scy          escape_len = 3;
352254562Scy
353254562Scy          hi_nibble = decode_lower_hex_nibble (e[1]);
354254562Scy          if (hi_nibble < 0)
355254562Scy            return 0;
356254562Scy          lo_nibble = decode_lower_hex_nibble (e[2]);
357254562Scy          if (lo_nibble < 0)
358254562Scy            return 0;
359254562Scy
360254562Scy          /* Only allow non-control ASCII characters. */
361254562Scy          if (hi_nibble > 7)
362254562Scy            return 0;
363254562Scy          c = (hi_nibble << 4) | lo_nibble;
364254562Scy          if (c < 0x20)
365            return 0;
366        }
367    }
368
369  if (!c || len <= escape_len || e[escape_len] != '$')
370    return 0;
371
372  *out_len = 2 + escape_len;
373  return c;
374}
375
376static void
377print_ident (struct rust_demangler *rdm, struct rust_mangled_ident ident)
378{
379  char unescaped;
380  uint8_t *out, *p, d;
381  size_t len, cap, punycode_pos, j;
382  /* Punycode parameters and state. */
383  uint32_t c;
384  size_t base, t_min, t_max, skew, damp, bias, i;
385  size_t delta, w, k, t;
386
387  if (rdm->errored || rdm->skipping_printing)
388    return;
389
390  if (rdm->version == -1)
391    {
392      /* Ignore leading underscores preceding escape sequences.
393         The mangler inserts an underscore to make sure the
394         identifier begins with a XID_Start character. */
395      if (ident.ascii_len >= 2 && ident.ascii[0] == '_'
396          && ident.ascii[1] == '$')
397        {
398          ident.ascii++;
399          ident.ascii_len--;
400        }
401
402      while (ident.ascii_len > 0)
403        {
404          /* Handle legacy escape sequences ("$...$", ".." or "."). */
405          if (ident.ascii[0] == '$')
406            {
407              unescaped
408                  = decode_legacy_escape (ident.ascii, ident.ascii_len, &len);
409              if (unescaped)
410                print_str (rdm, &unescaped, 1);
411              else
412                {
413                  /* Unexpected escape sequence, print the rest verbatim. */
414                  print_str (rdm, ident.ascii, ident.ascii_len);
415                  return;
416                }
417            }
418          else if (ident.ascii[0] == '.')
419            {
420              if (ident.ascii_len >= 2 && ident.ascii[1] == '.')
421                {
422                  /* ".." becomes "::" */
423                  PRINT ("::");
424                  len = 2;
425                }
426              else
427                {
428                  PRINT (".");
429                  len = 1;
430                }
431            }
432          else
433            {
434              /* Print everything before the next escape sequence, at once. */
435              for (len = 0; len < ident.ascii_len; len++)
436                if (ident.ascii[len] == '$' || ident.ascii[len] == '.')
437                  break;
438
439              print_str (rdm, ident.ascii, len);
440            }
441
442          ident.ascii += len;
443          ident.ascii_len -= len;
444        }
445
446      return;
447    }
448
449  if (!ident.punycode)
450    {
451      print_str (rdm, ident.ascii, ident.ascii_len);
452      return;
453    }
454
455  len = 0;
456  cap = 4;
457  while (cap < ident.ascii_len)
458    {
459      cap *= 2;
460      /* Check for overflows. */
461      if ((cap * 4) / 4 != cap)
462        {
463          rdm->errored = 1;
464          return;
465        }
466    }
467
468  /* Store the output codepoints as groups of 4 UTF-8 bytes. */
469  out = (uint8_t *)malloc (cap * 4);
470  if (!out)
471    {
472      rdm->errored = 1;
473      return;
474    }
475
476  /* Populate initial output from ASCII fragment. */
477  for (len = 0; len < ident.ascii_len; len++)
478    {
479      p = out + 4 * len;
480      p[0] = 0;
481      p[1] = 0;
482      p[2] = 0;
483      p[3] = ident.ascii[len];
484    }
485
486  /* Punycode parameters and initial state. */
487  base = 36;
488  t_min = 1;
489  t_max = 26;
490  skew = 38;
491  damp = 700;
492  bias = 72;
493  i = 0;
494  c = 0x80;
495
496  punycode_pos = 0;
497  while (punycode_pos < ident.punycode_len)
498    {
499      /* Read one delta value. */
500      delta = 0;
501      w = 1;
502      k = 0;
503      do
504        {
505          k += base;
506          t = k < bias ? 0 : (k - bias);
507          if (t < t_min)
508            t = t_min;
509          if (t > t_max)
510            t = t_max;
511
512          if (punycode_pos >= ident.punycode_len)
513            goto cleanup;
514          d = ident.punycode[punycode_pos++];
515
516          if (ISLOWER (d))
517            d = d - 'a';
518          else if (ISDIGIT (d))
519            d = 26 + (d - '0');
520          else
521            {
522              rdm->errored = 1;
523              goto cleanup;
524            }
525
526          delta += d * w;
527          w *= base - t;
528        }
529      while (d >= t);
530
531      /* Compute the new insert position and character. */
532      len++;
533      i += delta;
534      c += i / len;
535      i %= len;
536
537      /* Ensure enough space is available. */
538      if (cap < len)
539        {
540          cap *= 2;
541          /* Check for overflows. */
542          if ((cap * 4) / 4 != cap || cap < len)
543            {
544              rdm->errored = 1;
545              goto cleanup;
546            }
547        }
548      p = (uint8_t *)realloc (out, cap * 4);
549      if (!p)
550        {
551          rdm->errored = 1;
552          goto cleanup;
553        }
554      out = p;
555
556      /* Move the characters after the insert position. */
557      p = out + i * 4;
558      memmove (p + 4, p, (len - i - 1) * 4);
559
560      /* Insert the new character, as UTF-8 bytes. */
561      p[0] = c >= 0x10000 ? 0xf0 | (c >> 18) : 0;
562      p[1] = c >= 0x800 ? (c < 0x10000 ? 0xe0 : 0x80) | ((c >> 12) & 0x3f) : 0;
563      p[2] = (c < 0x800 ? 0xc0 : 0x80) | ((c >> 6) & 0x3f);
564      p[3] = 0x80 | (c & 0x3f);
565
566      /* If there are no more deltas, decoding is complete. */
567      if (punycode_pos == ident.punycode_len)
568        break;
569
570      i++;
571
572      /* Perform bias adaptation. */
573      delta /= damp;
574      damp = 2;
575
576      delta += delta / len;
577      k = 0;
578      while (delta > ((base - t_min) * t_max) / 2)
579        {
580          delta /= base - t_min;
581          k += base;
582        }
583      bias = k + ((base - t_min + 1) * delta) / (delta + skew);
584    }
585
586  /* Remove all the 0 bytes to leave behind an UTF-8 string. */
587  for (i = 0, j = 0; i < len * 4; i++)
588    if (out[i] != 0)
589      out[j++] = out[i];
590
591  print_str (rdm, (const char *)out, j);
592
593cleanup:
594  free (out);
595}
596
597/* Print the lifetime according to the previously decoded index.
598   An index of `0` always refers to `'_`, but starting with `1`,
599   indices refer to late-bound lifetimes introduced by a binder. */
600static void
601print_lifetime_from_index (struct rust_demangler *rdm, uint64_t lt)
602{
603  char c;
604  uint64_t depth;
605
606  PRINT ("'");
607  if (lt == 0)
608    {
609      PRINT ("_");
610      return;
611    }
612
613  depth = rdm->bound_lifetime_depth - lt;
614  /* Try to print lifetimes alphabetically first. */
615  if (depth < 26)
616    {
617      c = 'a' + depth;
618      print_str (rdm, &c, 1);
619    }
620  else
621    {
622      /* Use `'_123` after running out of letters. */
623      PRINT ("_");
624      print_uint64 (rdm, depth);
625    }
626}
627
628/* Demangling functions. */
629
630static void demangle_binder (struct rust_demangler *rdm);
631static void demangle_path (struct rust_demangler *rdm, int in_value);
632static void demangle_generic_arg (struct rust_demangler *rdm);
633static void demangle_type (struct rust_demangler *rdm);
634static int demangle_path_maybe_open_generics (struct rust_demangler *rdm);
635static void demangle_dyn_trait (struct rust_demangler *rdm);
636static void demangle_const (struct rust_demangler *rdm);
637static void demangle_const_uint (struct rust_demangler *rdm);
638static void demangle_const_int (struct rust_demangler *rdm);
639static void demangle_const_bool (struct rust_demangler *rdm);
640static void demangle_const_char (struct rust_demangler *rdm);
641
642/* Optionally enter a binder ('G') for late-bound lifetimes,
643   printing e.g. `for<'a, 'b> `, and make those lifetimes visible
644   to the caller (via depth level, which the caller should reset). */
645static void
646demangle_binder (struct rust_demangler *rdm)
647{
648  uint64_t i, bound_lifetimes;
649
650  if (rdm->errored)
651    return;
652
653  bound_lifetimes = parse_opt_integer_62 (rdm, 'G');
654  if (bound_lifetimes > 0)
655    {
656      PRINT ("for<");
657      for (i = 0; i < bound_lifetimes; i++)
658        {
659          if (i > 0)
660            PRINT (", ");
661          rdm->bound_lifetime_depth++;
662          print_lifetime_from_index (rdm, 1);
663        }
664      PRINT ("> ");
665    }
666}
667
668static void
669demangle_path (struct rust_demangler *rdm, int in_value)
670{
671  char tag, ns;
672  int was_skipping_printing;
673  size_t i, backref, old_next;
674  uint64_t dis;
675  struct rust_mangled_ident name;
676
677  if (rdm->errored)
678    return;
679
680  if (rdm->recursion != RUST_NO_RECURSION_LIMIT)
681    {
682      ++ rdm->recursion;
683      if (rdm->recursion > RUST_MAX_RECURSION_COUNT)
684	/* FIXME: There ought to be a way to report
685	   that the recursion limit has been reached.  */
686	goto fail_return;
687    }
688
689  switch (tag = next (rdm))
690    {
691    case 'C':
692      dis = parse_disambiguator (rdm);
693      name = parse_ident (rdm);
694
695      print_ident (rdm, name);
696      if (rdm->verbose)
697        {
698          PRINT ("[");
699          print_uint64_hex (rdm, dis);
700          PRINT ("]");
701        }
702      break;
703    case 'N':
704      ns = next (rdm);
705      if (!ISLOWER (ns) && !ISUPPER (ns))
706	goto fail_return;
707
708      demangle_path (rdm, in_value);
709
710      dis = parse_disambiguator (rdm);
711      name = parse_ident (rdm);
712
713      if (ISUPPER (ns))
714        {
715          /* Special namespaces, like closures and shims. */
716          PRINT ("::{");
717          switch (ns)
718            {
719            case 'C':
720              PRINT ("closure");
721              break;
722            case 'S':
723              PRINT ("shim");
724              break;
725            default:
726              print_str (rdm, &ns, 1);
727            }
728          if (name.ascii || name.punycode)
729            {
730              PRINT (":");
731              print_ident (rdm, name);
732            }
733          PRINT ("#");
734          print_uint64 (rdm, dis);
735          PRINT ("}");
736        }
737      else
738        {
739          /* Implementation-specific/unspecified namespaces. */
740
741          if (name.ascii || name.punycode)
742            {
743              PRINT ("::");
744              print_ident (rdm, name);
745            }
746        }
747      break;
748    case 'M':
749    case 'X':
750      /* Ignore the `impl`'s own path.*/
751      parse_disambiguator (rdm);
752      was_skipping_printing = rdm->skipping_printing;
753      rdm->skipping_printing = 1;
754      demangle_path (rdm, in_value);
755      rdm->skipping_printing = was_skipping_printing;
756      /* fallthrough */
757    case 'Y':
758      PRINT ("<");
759      demangle_type (rdm);
760      if (tag != 'M')
761        {
762          PRINT (" as ");
763          demangle_path (rdm, 0);
764        }
765      PRINT (">");
766      break;
767    case 'I':
768      demangle_path (rdm, in_value);
769      if (in_value)
770        PRINT ("::");
771      PRINT ("<");
772      for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++)
773        {
774          if (i > 0)
775            PRINT (", ");
776          demangle_generic_arg (rdm);
777        }
778      PRINT (">");
779      break;
780    case 'B':
781      backref = parse_integer_62 (rdm);
782      if (!rdm->skipping_printing)
783        {
784          old_next = rdm->next;
785          rdm->next = backref;
786          demangle_path (rdm, in_value);
787          rdm->next = old_next;
788        }
789      break;
790    default:
791      goto fail_return;
792    }
793  goto pass_return;
794
795 fail_return:
796  rdm->errored = 1;
797 pass_return:
798  if (rdm->recursion != RUST_NO_RECURSION_LIMIT)
799    -- rdm->recursion;
800}
801
802static void
803demangle_generic_arg (struct rust_demangler *rdm)
804{
805  uint64_t lt;
806  if (eat (rdm, 'L'))
807    {
808      lt = parse_integer_62 (rdm);
809      print_lifetime_from_index (rdm, lt);
810    }
811  else if (eat (rdm, 'K'))
812    demangle_const (rdm);
813  else
814    demangle_type (rdm);
815}
816
817static const char *
818basic_type (char tag)
819{
820  switch (tag)
821    {
822    case 'b':
823      return "bool";
824    case 'c':
825      return "char";
826    case 'e':
827      return "str";
828    case 'u':
829      return "()";
830    case 'a':
831      return "i8";
832    case 's':
833      return "i16";
834    case 'l':
835      return "i32";
836    case 'x':
837      return "i64";
838    case 'n':
839      return "i128";
840    case 'i':
841      return "isize";
842    case 'h':
843      return "u8";
844    case 't':
845      return "u16";
846    case 'm':
847      return "u32";
848    case 'y':
849      return "u64";
850    case 'o':
851      return "u128";
852    case 'j':
853      return "usize";
854    case 'f':
855      return "f32";
856    case 'd':
857      return "f64";
858    case 'z':
859      return "!";
860    case 'p':
861      return "_";
862    case 'v':
863      return "...";
864
865    default:
866      return NULL;
867    }
868}
869
870static void
871demangle_type (struct rust_demangler *rdm)
872{
873  char tag;
874  size_t i, old_next, backref;
875  uint64_t lt, old_bound_lifetime_depth;
876  const char *basic;
877  struct rust_mangled_ident abi;
878
879  if (rdm->errored)
880    return;
881
882  tag = next (rdm);
883
884  basic = basic_type (tag);
885  if (basic)
886    {
887      PRINT (basic);
888      return;
889    }
890
891   if (rdm->recursion != RUST_NO_RECURSION_LIMIT)
892    {
893      ++ rdm->recursion;
894      if (rdm->recursion > RUST_MAX_RECURSION_COUNT)
895	/* FIXME: There ought to be a way to report
896	   that the recursion limit has been reached.  */
897	{
898	  rdm->errored = 1;
899	  -- rdm->recursion;
900	  return;
901	}
902    }
903
904  switch (tag)
905    {
906    case 'R':
907    case 'Q':
908      PRINT ("&");
909      if (eat (rdm, 'L'))
910        {
911          lt = parse_integer_62 (rdm);
912          if (lt)
913            {
914              print_lifetime_from_index (rdm, lt);
915              PRINT (" ");
916            }
917        }
918      if (tag != 'R')
919        PRINT ("mut ");
920      demangle_type (rdm);
921      break;
922    case 'P':
923    case 'O':
924      PRINT ("*");
925      if (tag != 'P')
926        PRINT ("mut ");
927      else
928        PRINT ("const ");
929      demangle_type (rdm);
930      break;
931    case 'A':
932    case 'S':
933      PRINT ("[");
934      demangle_type (rdm);
935      if (tag == 'A')
936        {
937          PRINT ("; ");
938          demangle_const (rdm);
939        }
940      PRINT ("]");
941      break;
942    case 'T':
943      PRINT ("(");
944      for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++)
945        {
946          if (i > 0)
947            PRINT (", ");
948          demangle_type (rdm);
949        }
950      if (i == 1)
951        PRINT (",");
952      PRINT (")");
953      break;
954    case 'F':
955      old_bound_lifetime_depth = rdm->bound_lifetime_depth;
956      demangle_binder (rdm);
957
958      if (eat (rdm, 'U'))
959        PRINT ("unsafe ");
960
961      if (eat (rdm, 'K'))
962        {
963          if (eat (rdm, 'C'))
964            {
965              abi.ascii = "C";
966              abi.ascii_len = 1;
967            }
968          else
969            {
970              abi = parse_ident (rdm);
971              if (!abi.ascii || abi.punycode)
972                {
973                  rdm->errored = 1;
974                  goto restore;
975                }
976            }
977
978          PRINT ("extern \"");
979
980          /* If the ABI had any `-`, they were replaced with `_`,
981             so the parts between `_` have to be re-joined with `-`. */
982          for (i = 0; i < abi.ascii_len; i++)
983            {
984              if (abi.ascii[i] == '_')
985                {
986                  print_str (rdm, abi.ascii, i);
987                  PRINT ("-");
988                  abi.ascii += i + 1;
989                  abi.ascii_len -= i + 1;
990                  i = 0;
991                }
992            }
993          print_str (rdm, abi.ascii, abi.ascii_len);
994
995          PRINT ("\" ");
996        }
997
998      PRINT ("fn(");
999      for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++)
1000        {
1001          if (i > 0)
1002            PRINT (", ");
1003          demangle_type (rdm);
1004        }
1005      PRINT (")");
1006
1007      if (eat (rdm, 'u'))
1008        {
1009          /* Skip printing the return type if it's 'u', i.e. `()`. */
1010        }
1011      else
1012        {
1013          PRINT (" -> ");
1014          demangle_type (rdm);
1015        }
1016
1017    /* Restore `bound_lifetime_depth` to outside the binder. */
1018    restore:
1019      rdm->bound_lifetime_depth = old_bound_lifetime_depth;
1020      break;
1021    case 'D':
1022      PRINT ("dyn ");
1023
1024      old_bound_lifetime_depth = rdm->bound_lifetime_depth;
1025      demangle_binder (rdm);
1026
1027      for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++)
1028        {
1029          if (i > 0)
1030            PRINT (" + ");
1031          demangle_dyn_trait (rdm);
1032        }
1033
1034      /* Restore `bound_lifetime_depth` to outside the binder. */
1035      rdm->bound_lifetime_depth = old_bound_lifetime_depth;
1036
1037      if (!eat (rdm, 'L'))
1038        {
1039          rdm->errored = 1;
1040          return;
1041        }
1042      lt = parse_integer_62 (rdm);
1043      if (lt)
1044        {
1045          PRINT (" + ");
1046          print_lifetime_from_index (rdm, lt);
1047        }
1048      break;
1049    case 'B':
1050      backref = parse_integer_62 (rdm);
1051      if (!rdm->skipping_printing)
1052        {
1053          old_next = rdm->next;
1054          rdm->next = backref;
1055          demangle_type (rdm);
1056          rdm->next = old_next;
1057        }
1058      break;
1059    default:
1060      /* Go back to the tag, so `demangle_path` also sees it. */
1061      rdm->next--;
1062      demangle_path (rdm, 0);
1063    }
1064
1065  if (rdm->recursion != RUST_NO_RECURSION_LIMIT)
1066    -- rdm->recursion;
1067}
1068
1069/* A trait in a trait object may have some "existential projections"
1070   (i.e. associated type bindings) after it, which should be printed
1071   in the `<...>` of the trait, e.g. `dyn Trait<T, U, Assoc=X>`.
1072   To this end, this method will keep the `<...>` of an 'I' path
1073   open, by omitting the `>`, and return `Ok(true)` in that case. */
1074static int
1075demangle_path_maybe_open_generics (struct rust_demangler *rdm)
1076{
1077  int open;
1078  size_t i, old_next, backref;
1079
1080  open = 0;
1081
1082  if (rdm->errored)
1083    return open;
1084
1085  if (rdm->recursion != RUST_NO_RECURSION_LIMIT)
1086    {
1087      ++ rdm->recursion;
1088      if (rdm->recursion > RUST_MAX_RECURSION_COUNT)
1089	{
1090	  /* FIXME: There ought to be a way to report
1091	     that the recursion limit has been reached.  */
1092	  rdm->errored = 1;
1093	  goto end_of_func;
1094	}
1095    }
1096
1097  if (eat (rdm, 'B'))
1098    {
1099      backref = parse_integer_62 (rdm);
1100      if (!rdm->skipping_printing)
1101        {
1102          old_next = rdm->next;
1103          rdm->next = backref;
1104          open = demangle_path_maybe_open_generics (rdm);
1105          rdm->next = old_next;
1106        }
1107    }
1108  else if (eat (rdm, 'I'))
1109    {
1110      demangle_path (rdm, 0);
1111      PRINT ("<");
1112      open = 1;
1113      for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++)
1114        {
1115          if (i > 0)
1116            PRINT (", ");
1117          demangle_generic_arg (rdm);
1118        }
1119    }
1120  else
1121    demangle_path (rdm, 0);
1122
1123 end_of_func:
1124  if (rdm->recursion != RUST_NO_RECURSION_LIMIT)
1125    -- rdm->recursion;
1126
1127  return open;
1128}
1129
1130static void
1131demangle_dyn_trait (struct rust_demangler *rdm)
1132{
1133  int open;
1134  struct rust_mangled_ident name;
1135
1136  if (rdm->errored)
1137    return;
1138
1139  open = demangle_path_maybe_open_generics (rdm);
1140
1141  while (eat (rdm, 'p'))
1142    {
1143      if (!open)
1144        PRINT ("<");
1145      else
1146        PRINT (", ");
1147      open = 1;
1148
1149      name = parse_ident (rdm);
1150      print_ident (rdm, name);
1151      PRINT (" = ");
1152      demangle_type (rdm);
1153    }
1154
1155  if (open)
1156    PRINT (">");
1157}
1158
1159static void
1160demangle_const (struct rust_demangler *rdm)
1161{
1162  char ty_tag;
1163  size_t old_next, backref;
1164
1165  if (rdm->errored)
1166    return;
1167
1168  if (rdm->recursion != RUST_NO_RECURSION_LIMIT)
1169    {
1170      ++ rdm->recursion;
1171      if (rdm->recursion > RUST_MAX_RECURSION_COUNT)
1172	/* FIXME: There ought to be a way to report
1173	   that the recursion limit has been reached.  */
1174	goto fail_return;
1175    }
1176
1177  if (eat (rdm, 'B'))
1178    {
1179      backref = parse_integer_62 (rdm);
1180      if (!rdm->skipping_printing)
1181        {
1182          old_next = rdm->next;
1183          rdm->next = backref;
1184          demangle_const (rdm);
1185          rdm->next = old_next;
1186        }
1187      goto pass_return;
1188    }
1189
1190  ty_tag = next (rdm);
1191  switch (ty_tag)
1192    {
1193    /* Placeholder. */
1194    case 'p':
1195      PRINT ("_");
1196      goto pass_return;
1197
1198    /* Unsigned integer types. */
1199    case 'h':
1200    case 't':
1201    case 'm':
1202    case 'y':
1203    case 'o':
1204    case 'j':
1205      demangle_const_uint (rdm);
1206      break;
1207
1208    /* Signed integer types. */
1209    case 'a':
1210    case 's':
1211    case 'l':
1212    case 'x':
1213    case 'n':
1214    case 'i':
1215      demangle_const_int (rdm);
1216      break;
1217
1218    /* Boolean. */
1219    case 'b':
1220      demangle_const_bool (rdm);
1221      break;
1222
1223    /* Character. */
1224    case 'c':
1225      demangle_const_char (rdm);
1226      break;
1227
1228    default:
1229      goto fail_return;
1230    }
1231
1232  if (!rdm->errored && rdm->verbose)
1233    {
1234      PRINT (": ");
1235      PRINT (basic_type (ty_tag));
1236    }
1237  goto pass_return;
1238
1239 fail_return:
1240  rdm->errored = 1;
1241 pass_return:
1242  if (rdm->recursion != RUST_NO_RECURSION_LIMIT)
1243    -- rdm->recursion;
1244}
1245
1246static void
1247demangle_const_uint (struct rust_demangler *rdm)
1248{
1249  size_t hex_len;
1250  uint64_t value;
1251
1252  if (rdm->errored)
1253    return;
1254
1255  hex_len = parse_hex_nibbles (rdm, &value);
1256
1257  if (hex_len > 16)
1258    {
1259      /* Print anything that doesn't fit in `uint64_t` verbatim. */
1260      PRINT ("0x");
1261      print_str (rdm, rdm->sym + (rdm->next - hex_len), hex_len);
1262    }
1263  else if (hex_len > 0)
1264    print_uint64 (rdm, value);
1265  else
1266    rdm->errored = 1;
1267}
1268
1269static void
1270demangle_const_int (struct rust_demangler *rdm)
1271{
1272  if (eat (rdm, 'n'))
1273    PRINT ("-");
1274  demangle_const_uint (rdm);
1275}
1276
1277static void
1278demangle_const_bool (struct rust_demangler *rdm)
1279{
1280  uint64_t value;
1281
1282  if (parse_hex_nibbles (rdm, &value) != 1)
1283    {
1284      rdm->errored = 1;
1285      return;
1286    }
1287
1288  if (value == 0)
1289    PRINT ("false");
1290  else if (value == 1)
1291    PRINT ("true");
1292  else
1293    rdm->errored = 1;
1294}
1295
1296static void
1297demangle_const_char (struct rust_demangler *rdm)
1298{
1299  size_t hex_len;
1300  uint64_t value;
1301
1302  hex_len = parse_hex_nibbles (rdm, &value);
1303
1304  if (hex_len == 0 || hex_len > 8)
1305    {
1306      rdm->errored = 1;
1307      return;
1308    }
1309
1310  /* Match Rust's character "debug" output as best as we can. */
1311  PRINT ("'");
1312  if (value == '\t')
1313    PRINT ("\\t");
1314  else if (value == '\r')
1315    PRINT ("\\r");
1316  else if (value == '\n')
1317    PRINT ("\\n");
1318  else if (value > ' ' && value < '~')
1319    {
1320      /* Rust also considers many non-ASCII codepoints to be printable, but
1321	 that logic is not easily ported to C. */
1322      char c = value;
1323      print_str (rdm, &c, 1);
1324    }
1325  else
1326    {
1327      PRINT ("\\u{");
1328      print_uint64_hex (rdm, value);
1329      PRINT ("}");
1330    }
1331  PRINT ("'");
1332}
1333
1334/* A legacy hash is the prefix "h" followed by 16 lowercase hex digits.
1335   The hex digits must contain at least 5 distinct digits. */
1336static int
1337is_legacy_prefixed_hash (struct rust_mangled_ident ident)
1338{
1339  uint16_t seen;
1340  int nibble;
1341  size_t i, count;
1342
1343  if (ident.ascii_len != 17 || ident.ascii[0] != 'h')
1344    return 0;
1345
1346  seen = 0;
1347  for (i = 0; i < 16; i++)
1348    {
1349      nibble = decode_lower_hex_nibble (ident.ascii[1 + i]);
1350      if (nibble < 0)
1351        return 0;
1352      seen |= (uint16_t)1 << nibble;
1353    }
1354
1355  /* Count how many distinct digits were seen. */
1356  count = 0;
1357  while (seen)
1358    {
1359      if (seen & 1)
1360        count++;
1361      seen >>= 1;
1362    }
1363
1364  return count >= 5;
1365}
1366
1367int
1368rust_demangle_callback (const char *mangled, int options,
1369                        demangle_callbackref callback, void *opaque)
1370{
1371  const char *p;
1372  struct rust_demangler rdm;
1373  struct rust_mangled_ident ident;
1374
1375  rdm.sym = mangled;
1376  rdm.sym_len = 0;
1377
1378  rdm.callback_opaque = opaque;
1379  rdm.callback = callback;
1380
1381  rdm.next = 0;
1382  rdm.errored = 0;
1383  rdm.skipping_printing = 0;
1384  rdm.verbose = (options & DMGL_VERBOSE) != 0;
1385  rdm.version = 0;
1386  rdm.recursion = (options & DMGL_NO_RECURSE_LIMIT) ? RUST_NO_RECURSION_LIMIT : 0;
1387  rdm.bound_lifetime_depth = 0;
1388
1389  /* Rust symbols always start with _R (v0) or _ZN (legacy). */
1390  if (rdm.sym[0] == '_' && rdm.sym[1] == 'R')
1391    rdm.sym += 2;
1392  else if (rdm.sym[0] == '_' && rdm.sym[1] == 'Z' && rdm.sym[2] == 'N')
1393    {
1394      rdm.sym += 3;
1395      rdm.version = -1;
1396    }
1397  else
1398    return 0;
1399
1400  /* Paths (v0) always start with uppercase characters. */
1401  if (rdm.version != -1 && !ISUPPER (rdm.sym[0]))
1402    return 0;
1403
1404  /* Rust symbols (v0) use only [_0-9a-zA-Z] characters. */
1405  for (p = rdm.sym; *p; p++)
1406    {
1407      /* Rust v0 symbols can have '.' suffixes, ignore those.  */
1408      if (rdm.version == 0 && *p == '.')
1409        break;
1410
1411      rdm.sym_len++;
1412
1413      if (*p == '_' || ISALNUM (*p))
1414        continue;
1415
1416      /* Legacy Rust symbols can also contain [.:$] characters.
1417         Or @ in the .suffix (which will be skipped, see below). */
1418      if (rdm.version == -1 && (*p == '$' || *p == '.' || *p == ':'
1419                                || *p == '@'))
1420        continue;
1421
1422      return 0;
1423    }
1424
1425  /* Legacy Rust symbols need to be handled separately. */
1426  if (rdm.version == -1)
1427    {
1428      /* Legacy Rust symbols always end with E.  But can be followed by a
1429         .suffix (which we want to ignore).  */
1430      int dot_suffix = 1;
1431      while (rdm.sym_len > 0 &&
1432             !(dot_suffix && rdm.sym[rdm.sym_len - 1] == 'E'))
1433        {
1434          dot_suffix = rdm.sym[rdm.sym_len - 1] == '.';
1435          rdm.sym_len--;
1436        }
1437
1438      if (!(rdm.sym_len > 0 && rdm.sym[rdm.sym_len - 1] == 'E'))
1439        return 0;
1440      rdm.sym_len--;
1441
1442      /* Legacy Rust symbols also always end with a path segment
1443         that encodes a 16 hex digit hash, i.e. '17h[a-f0-9]{16}'.
1444         This early check, before any parse_ident calls, should
1445         quickly filter out most C++ symbols unrelated to Rust. */
1446      if (!(rdm.sym_len > 19
1447            && !memcmp (&rdm.sym[rdm.sym_len - 19], "17h", 3)))
1448        return 0;
1449
1450      do
1451        {
1452          ident = parse_ident (&rdm);
1453          if (rdm.errored || !ident.ascii)
1454            return 0;
1455        }
1456      while (rdm.next < rdm.sym_len);
1457
1458      /* The last path segment should be the hash. */
1459      if (!is_legacy_prefixed_hash (ident))
1460        return 0;
1461
1462      /* Reset the state for a second pass, to print the symbol. */
1463      rdm.next = 0;
1464      if (!rdm.verbose && rdm.sym_len > 19)
1465        {
1466          /* Hide the last segment, containing the hash, if not verbose. */
1467          rdm.sym_len -= 19;
1468        }
1469
1470      do
1471        {
1472          if (rdm.next > 0)
1473            print_str (&rdm, "::", 2);
1474
1475          ident = parse_ident (&rdm);
1476          print_ident (&rdm, ident);
1477        }
1478      while (rdm.next < rdm.sym_len);
1479    }
1480  else
1481    {
1482      demangle_path (&rdm, 1);
1483
1484      /* Skip instantiating crate. */
1485      if (!rdm.errored && rdm.next < rdm.sym_len)
1486        {
1487          rdm.skipping_printing = 1;
1488          demangle_path (&rdm, 0);
1489        }
1490
1491      /* It's an error to not reach the end. */
1492      rdm.errored |= rdm.next != rdm.sym_len;
1493    }
1494
1495  return !rdm.errored;
1496}
1497
1498/* Growable string buffers. */
1499struct str_buf
1500{
1501  char *ptr;
1502  size_t len;
1503  size_t cap;
1504  int errored;
1505};
1506
1507static void
1508str_buf_reserve (struct str_buf *buf, size_t extra)
1509{
1510  size_t available, min_new_cap, new_cap;
1511  char *new_ptr;
1512
1513  /* Allocation failed before. */
1514  if (buf->errored)
1515    return;
1516
1517  available = buf->cap - buf->len;
1518
1519  if (extra <= available)
1520    return;
1521
1522  min_new_cap = buf->cap + (extra - available);
1523
1524  /* Check for overflows. */
1525  if (min_new_cap < buf->cap)
1526    {
1527      buf->errored = 1;
1528      return;
1529    }
1530
1531  new_cap = buf->cap;
1532
1533  if (new_cap == 0)
1534    new_cap = 4;
1535
1536  /* Double capacity until sufficiently large. */
1537  while (new_cap < min_new_cap)
1538    {
1539      new_cap *= 2;
1540
1541      /* Check for overflows. */
1542      if (new_cap < buf->cap)
1543        {
1544          buf->errored = 1;
1545          return;
1546        }
1547    }
1548
1549  new_ptr = (char *)realloc (buf->ptr, new_cap);
1550  if (new_ptr == NULL)
1551    {
1552      free (buf->ptr);
1553      buf->ptr = NULL;
1554      buf->len = 0;
1555      buf->cap = 0;
1556      buf->errored = 1;
1557    }
1558  else
1559    {
1560      buf->ptr = new_ptr;
1561      buf->cap = new_cap;
1562    }
1563}
1564
1565static void
1566str_buf_append (struct str_buf *buf, const char *data, size_t len)
1567{
1568  str_buf_reserve (buf, len);
1569  if (buf->errored)
1570    return;
1571
1572  memcpy (buf->ptr + buf->len, data, len);
1573  buf->len += len;
1574}
1575
1576static void
1577str_buf_demangle_callback (const char *data, size_t len, void *opaque)
1578{
1579  str_buf_append ((struct str_buf *)opaque, data, len);
1580}
1581
1582char *
1583rust_demangle (const char *mangled, int options)
1584{
1585  struct str_buf out;
1586  int success;
1587
1588  out.ptr = NULL;
1589  out.len = 0;
1590  out.cap = 0;
1591  out.errored = 0;
1592
1593  success = rust_demangle_callback (mangled, options,
1594                                    str_buf_demangle_callback, &out);
1595
1596  if (!success)
1597    {
1598      free (out.ptr);
1599      return NULL;
1600    }
1601
1602  str_buf_append (&out, "\0", 1);
1603  return out.ptr;
1604}
1605