macroexp.c revision 1.8
1/* C preprocessor macro expansion for GDB.
2   Copyright (C) 2002-2019 Free Software Foundation, Inc.
3   Contributed by Red Hat, Inc.
4
5   This file is part of GDB.
6
7   This program is free software; you can redistribute it and/or modify
8   it under the terms of the GNU General Public License as published by
9   the Free Software Foundation; either version 3 of the License, or
10   (at your option) any later version.
11
12   This program is distributed in the hope that it will be useful,
13   but WITHOUT ANY WARRANTY; without even the implied warranty of
14   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15   GNU General Public License for more details.
16
17   You should have received a copy of the GNU General Public License
18   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
19
20#include "defs.h"
21#include "gdb_obstack.h"
22#include "bcache.h"
23#include "macrotab.h"
24#include "macroexp.h"
25#include "c-lang.h"
26
27
28
29/* A resizeable, substringable string type.  */
30
31
32/* A string type that we can resize, quickly append to, and use to
33   refer to substrings of other strings.  */
34struct macro_buffer
35{
36  /* An array of characters.  The first LEN bytes are the real text,
37     but there are SIZE bytes allocated to the array.  If SIZE is
38     zero, then this doesn't point to a malloc'ed block.  If SHARED is
39     non-zero, then this buffer is actually a pointer into some larger
40     string, and we shouldn't append characters to it, etc.  Because
41     of sharing, we can't assume in general that the text is
42     null-terminated.  */
43  char *text;
44
45  /* The number of characters in the string.  */
46  int len;
47
48  /* The number of characters allocated to the string.  If SHARED is
49     non-zero, this is meaningless; in this case, we set it to zero so
50     that any "do we have room to append something?" tests will fail,
51     so we don't always have to check SHARED before using this field.  */
52  int size;
53
54  /* Zero if TEXT can be safely realloc'ed (i.e., it's its own malloc
55     block).  Non-zero if TEXT is actually pointing into the middle of
56     some other block, or to a string literal, and we shouldn't
57     reallocate it.  */
58  bool shared;
59
60  /* For detecting token splicing.
61
62     This is the index in TEXT of the first character of the token
63     that abuts the end of TEXT.  If TEXT contains no tokens, then we
64     set this equal to LEN.  If TEXT ends in whitespace, then there is
65     no token abutting the end of TEXT (it's just whitespace), and
66     again, we set this equal to LEN.  We set this to -1 if we don't
67     know the nature of TEXT.  */
68  int last_token = -1;
69
70  /* If this buffer is holding the result from get_token, then this
71     is non-zero if it is an identifier token, zero otherwise.  */
72  int is_identifier = 0;
73
74
75  macro_buffer ()
76    : text (NULL),
77      len (0),
78      size (0),
79      shared (false)
80  {
81  }
82
83  /* Set the macro buffer to the empty string, guessing that its
84     final contents will fit in N bytes.  (It'll get resized if it
85     doesn't, so the guess doesn't have to be right.)  Allocate the
86     initial storage with xmalloc.  */
87  explicit macro_buffer (int n)
88    : len (0),
89      size (n),
90      shared (false)
91  {
92    if (n > 0)
93      text = (char *) xmalloc (n);
94    else
95      text = NULL;
96  }
97
98  /* Set the macro buffer to refer to the LEN bytes at ADDR, as a
99     shared substring.  */
100  macro_buffer (const char *addr, int len)
101  {
102    set_shared (addr, len);
103  }
104
105  /* Set the macro buffer to refer to the LEN bytes at ADDR, as a
106     shared substring.  */
107  void set_shared (const char *addr, int len_)
108  {
109    text = (char *) addr;
110    len = len_;
111    size = 0;
112    shared = true;
113  }
114
115  macro_buffer& operator= (const macro_buffer &src)
116  {
117    gdb_assert (src.shared);
118    gdb_assert (shared);
119    set_shared (src.text, src.len);
120    last_token = src.last_token;
121    is_identifier = src.is_identifier;
122    return *this;
123  }
124
125  ~macro_buffer ()
126  {
127    if (! shared && size)
128      xfree (text);
129  }
130
131  /* Release the text of the buffer to the caller, which is now
132     responsible for freeing it.  */
133  char *release ()
134  {
135    gdb_assert (! shared);
136    gdb_assert (size);
137    char *result = text;
138    text = NULL;
139    return result;
140  }
141
142  /* Resize the buffer to be at least N bytes long.  Raise an error if
143     the buffer shouldn't be resized.  */
144  void resize_buffer (int n)
145  {
146    /* We shouldn't be trying to resize shared strings.  */
147    gdb_assert (! shared);
148
149    if (size == 0)
150      size = n;
151    else
152      while (size <= n)
153	size *= 2;
154
155    text = (char *) xrealloc (text, size);
156  }
157
158  /* Append the character C to the buffer.  */
159  void appendc (int c)
160  {
161    int new_len = len + 1;
162
163    if (new_len > size)
164      resize_buffer (new_len);
165
166    text[len] = c;
167    len = new_len;
168  }
169
170  /* Append the COUNT bytes at ADDR to the buffer.  */
171  void appendmem (const char *addr, int count)
172  {
173    int new_len = len + count;
174
175    if (new_len > size)
176      resize_buffer (new_len);
177
178    memcpy (text + len, addr, count);
179    len = new_len;
180  }
181};
182
183
184
185/* Recognizing preprocessor tokens.  */
186
187
188int
189macro_is_whitespace (int c)
190{
191  return (c == ' '
192          || c == '\t'
193          || c == '\n'
194          || c == '\v'
195          || c == '\f');
196}
197
198
199int
200macro_is_digit (int c)
201{
202  return ('0' <= c && c <= '9');
203}
204
205
206int
207macro_is_identifier_nondigit (int c)
208{
209  return (c == '_'
210          || ('a' <= c && c <= 'z')
211          || ('A' <= c && c <= 'Z'));
212}
213
214
215static void
216set_token (struct macro_buffer *tok, char *start, char *end)
217{
218  tok->set_shared (start, end - start);
219  tok->last_token = 0;
220
221  /* Presumed; get_identifier may overwrite this.  */
222  tok->is_identifier = 0;
223}
224
225
226static int
227get_comment (struct macro_buffer *tok, char *p, char *end)
228{
229  if (p + 2 > end)
230    return 0;
231  else if (p[0] == '/'
232           && p[1] == '*')
233    {
234      char *tok_start = p;
235
236      p += 2;
237
238      for (; p < end; p++)
239        if (p + 2 <= end
240            && p[0] == '*'
241            && p[1] == '/')
242          {
243            p += 2;
244            set_token (tok, tok_start, p);
245            return 1;
246          }
247
248      error (_("Unterminated comment in macro expansion."));
249    }
250  else if (p[0] == '/'
251           && p[1] == '/')
252    {
253      char *tok_start = p;
254
255      p += 2;
256      for (; p < end; p++)
257        if (*p == '\n')
258          break;
259
260      set_token (tok, tok_start, p);
261      return 1;
262    }
263  else
264    return 0;
265}
266
267
268static int
269get_identifier (struct macro_buffer *tok, char *p, char *end)
270{
271  if (p < end
272      && macro_is_identifier_nondigit (*p))
273    {
274      char *tok_start = p;
275
276      while (p < end
277             && (macro_is_identifier_nondigit (*p)
278                 || macro_is_digit (*p)))
279        p++;
280
281      set_token (tok, tok_start, p);
282      tok->is_identifier = 1;
283      return 1;
284    }
285  else
286    return 0;
287}
288
289
290static int
291get_pp_number (struct macro_buffer *tok, char *p, char *end)
292{
293  if (p < end
294      && (macro_is_digit (*p)
295          || (*p == '.'
296	      && p + 2 <= end
297	      && macro_is_digit (p[1]))))
298    {
299      char *tok_start = p;
300
301      while (p < end)
302        {
303	  if (p + 2 <= end
304	      && strchr ("eEpP", *p)
305	      && (p[1] == '+' || p[1] == '-'))
306            p += 2;
307          else if (macro_is_digit (*p)
308		   || macro_is_identifier_nondigit (*p)
309		   || *p == '.')
310            p++;
311          else
312            break;
313        }
314
315      set_token (tok, tok_start, p);
316      return 1;
317    }
318  else
319    return 0;
320}
321
322
323
324/* If the text starting at P going up to (but not including) END
325   starts with a character constant, set *TOK to point to that
326   character constant, and return 1.  Otherwise, return zero.
327   Signal an error if it contains a malformed or incomplete character
328   constant.  */
329static int
330get_character_constant (struct macro_buffer *tok, char *p, char *end)
331{
332  /* ISO/IEC 9899:1999 (E)  Section 6.4.4.4  paragraph 1
333     But of course, what really matters is that we handle it the same
334     way GDB's C/C++ lexer does.  So we call parse_escape in utils.c
335     to handle escape sequences.  */
336  if ((p + 1 <= end && *p == '\'')
337      || (p + 2 <= end
338	  && (p[0] == 'L' || p[0] == 'u' || p[0] == 'U')
339	  && p[1] == '\''))
340    {
341      char *tok_start = p;
342      int char_count = 0;
343
344      if (*p == '\'')
345        p++;
346      else if (*p == 'L' || *p == 'u' || *p == 'U')
347        p += 2;
348      else
349        gdb_assert_not_reached ("unexpected character constant");
350
351      for (;;)
352        {
353          if (p >= end)
354            error (_("Unmatched single quote."));
355          else if (*p == '\'')
356            {
357              if (!char_count)
358                error (_("A character constant must contain at least one "
359                       "character."));
360              p++;
361              break;
362            }
363          else if (*p == '\\')
364            {
365	      const char *s, *o;
366
367	      s = o = ++p;
368	      char_count += c_parse_escape (&s, NULL);
369	      p += s - o;
370            }
371          else
372	    {
373	      p++;
374	      char_count++;
375	    }
376        }
377
378      set_token (tok, tok_start, p);
379      return 1;
380    }
381  else
382    return 0;
383}
384
385
386/* If the text starting at P going up to (but not including) END
387   starts with a string literal, set *TOK to point to that string
388   literal, and return 1.  Otherwise, return zero.  Signal an error if
389   it contains a malformed or incomplete string literal.  */
390static int
391get_string_literal (struct macro_buffer *tok, char *p, char *end)
392{
393  if ((p + 1 <= end
394       && *p == '"')
395      || (p + 2 <= end
396          && (p[0] == 'L' || p[0] == 'u' || p[0] == 'U')
397          && p[1] == '"'))
398    {
399      char *tok_start = p;
400
401      if (*p == '"')
402        p++;
403      else if (*p == 'L' || *p == 'u' || *p == 'U')
404        p += 2;
405      else
406        gdb_assert_not_reached ("unexpected string literal");
407
408      for (;;)
409        {
410          if (p >= end)
411            error (_("Unterminated string in expression."));
412          else if (*p == '"')
413            {
414              p++;
415              break;
416            }
417          else if (*p == '\n')
418            error (_("Newline characters may not appear in string "
419                   "constants."));
420          else if (*p == '\\')
421            {
422	      const char *s, *o;
423
424	      s = o = ++p;
425	      c_parse_escape (&s, NULL);
426	      p += s - o;
427            }
428          else
429            p++;
430        }
431
432      set_token (tok, tok_start, p);
433      return 1;
434    }
435  else
436    return 0;
437}
438
439
440static int
441get_punctuator (struct macro_buffer *tok, char *p, char *end)
442{
443  /* Here, speed is much less important than correctness and clarity.  */
444
445  /* ISO/IEC 9899:1999 (E)  Section 6.4.6  Paragraph 1.
446     Note that this table is ordered in a special way.  A punctuator
447     which is a prefix of another punctuator must appear after its
448     "extension".  Otherwise, the wrong token will be returned.  */
449  static const char * const punctuators[] = {
450    "[", "]", "(", ")", "{", "}", "?", ";", ",", "~",
451    "...", ".",
452    "->", "--", "-=", "-",
453    "++", "+=", "+",
454    "*=", "*",
455    "!=", "!",
456    "&&", "&=", "&",
457    "/=", "/",
458    "%>", "%:%:", "%:", "%=", "%",
459    "^=", "^",
460    "##", "#",
461    ":>", ":",
462    "||", "|=", "|",
463    "<<=", "<<", "<=", "<:", "<%", "<",
464    ">>=", ">>", ">=", ">",
465    "==", "=",
466    0
467  };
468
469  int i;
470
471  if (p + 1 <= end)
472    {
473      for (i = 0; punctuators[i]; i++)
474        {
475          const char *punctuator = punctuators[i];
476
477          if (p[0] == punctuator[0])
478            {
479              int len = strlen (punctuator);
480
481              if (p + len <= end
482                  && ! memcmp (p, punctuator, len))
483                {
484                  set_token (tok, p, p + len);
485                  return 1;
486                }
487            }
488        }
489    }
490
491  return 0;
492}
493
494
495/* Peel the next preprocessor token off of SRC, and put it in TOK.
496   Mutate TOK to refer to the first token in SRC, and mutate SRC to
497   refer to the text after that token.  SRC must be a shared buffer;
498   the resulting TOK will be shared, pointing into the same string SRC
499   does.  Initialize TOK's last_token field.  Return non-zero if we
500   succeed, or 0 if we didn't find any more tokens in SRC.  */
501static int
502get_token (struct macro_buffer *tok,
503           struct macro_buffer *src)
504{
505  char *p = src->text;
506  char *end = p + src->len;
507
508  gdb_assert (src->shared);
509
510  /* From the ISO C standard, ISO/IEC 9899:1999 (E), section 6.4:
511
512     preprocessing-token:
513         header-name
514         identifier
515         pp-number
516         character-constant
517         string-literal
518         punctuator
519         each non-white-space character that cannot be one of the above
520
521     We don't have to deal with header-name tokens, since those can
522     only occur after a #include, which we will never see.  */
523
524  while (p < end)
525    if (macro_is_whitespace (*p))
526      p++;
527    else if (get_comment (tok, p, end))
528      p += tok->len;
529    else if (get_pp_number (tok, p, end)
530             || get_character_constant (tok, p, end)
531             || get_string_literal (tok, p, end)
532             /* Note: the grammar in the standard seems to be
533                ambiguous: L'x' can be either a wide character
534                constant, or an identifier followed by a normal
535                character constant.  By trying `get_identifier' after
536                we try get_character_constant and get_string_literal,
537                we give the wide character syntax precedence.  Now,
538                since GDB doesn't handle wide character constants
539                anyway, is this the right thing to do?  */
540             || get_identifier (tok, p, end)
541             || get_punctuator (tok, p, end))
542      {
543        /* How many characters did we consume, including whitespace?  */
544        int consumed = p - src->text + tok->len;
545
546        src->text += consumed;
547        src->len -= consumed;
548        return 1;
549      }
550    else
551      {
552        /* We have found a "non-whitespace character that cannot be
553           one of the above."  Make a token out of it.  */
554        int consumed;
555
556        set_token (tok, p, p + 1);
557        consumed = p - src->text + tok->len;
558        src->text += consumed;
559        src->len -= consumed;
560        return 1;
561      }
562
563  return 0;
564}
565
566
567
568/* Appending token strings, with and without splicing  */
569
570
571/* Append the macro buffer SRC to the end of DEST, and ensure that
572   doing so doesn't splice the token at the end of SRC with the token
573   at the beginning of DEST.  SRC and DEST must have their last_token
574   fields set.  Upon return, DEST's last_token field is set correctly.
575
576   For example:
577
578   If DEST is "(" and SRC is "y", then we can return with
579   DEST set to "(y" --- we've simply appended the two buffers.
580
581   However, if DEST is "x" and SRC is "y", then we must not return
582   with DEST set to "xy" --- that would splice the two tokens "x" and
583   "y" together to make a single token "xy".  However, it would be
584   fine to return with DEST set to "x y".  Similarly, "<" and "<" must
585   yield "< <", not "<<", etc.  */
586static void
587append_tokens_without_splicing (struct macro_buffer *dest,
588                                struct macro_buffer *src)
589{
590  int original_dest_len = dest->len;
591  struct macro_buffer dest_tail, new_token;
592
593  gdb_assert (src->last_token != -1);
594  gdb_assert (dest->last_token != -1);
595
596  /* First, just try appending the two, and call get_token to see if
597     we got a splice.  */
598  dest->appendmem (src->text, src->len);
599
600  /* If DEST originally had no token abutting its end, then we can't
601     have spliced anything, so we're done.  */
602  if (dest->last_token == original_dest_len)
603    {
604      dest->last_token = original_dest_len + src->last_token;
605      return;
606    }
607
608  /* Set DEST_TAIL to point to the last token in DEST, followed by
609     all the stuff we just appended.  */
610  dest_tail.set_shared (dest->text + dest->last_token,
611			dest->len - dest->last_token);
612
613  /* Re-parse DEST's last token.  We know that DEST used to contain
614     at least one token, so if it doesn't contain any after the
615     append, then we must have spliced "/" and "*" or "/" and "/" to
616     make a comment start.  (Just for the record, I got this right
617     the first time.  This is not a bug fix.)  */
618  if (get_token (&new_token, &dest_tail)
619      && (new_token.text + new_token.len
620          == dest->text + original_dest_len))
621    {
622      /* No splice, so we're done.  */
623      dest->last_token = original_dest_len + src->last_token;
624      return;
625    }
626
627  /* Okay, a simple append caused a splice.  Let's chop dest back to
628     its original length and try again, but separate the texts with a
629     space.  */
630  dest->len = original_dest_len;
631  dest->appendc (' ');
632  dest->appendmem (src->text, src->len);
633
634  dest_tail.set_shared (dest->text + dest->last_token,
635			dest->len - dest->last_token);
636
637  /* Try to re-parse DEST's last token, as above.  */
638  if (get_token (&new_token, &dest_tail)
639      && (new_token.text + new_token.len
640          == dest->text + original_dest_len))
641    {
642      /* No splice, so we're done.  */
643      dest->last_token = original_dest_len + 1 + src->last_token;
644      return;
645    }
646
647  /* As far as I know, there's no case where inserting a space isn't
648     enough to prevent a splice.  */
649  internal_error (__FILE__, __LINE__,
650                  _("unable to avoid splicing tokens during macro expansion"));
651}
652
653/* Stringify an argument, and insert it into DEST.  ARG is the text to
654   stringify; it is LEN bytes long.  */
655
656static void
657stringify (struct macro_buffer *dest, const char *arg, int len)
658{
659  /* Trim initial whitespace from ARG.  */
660  while (len > 0 && macro_is_whitespace (*arg))
661    {
662      ++arg;
663      --len;
664    }
665
666  /* Trim trailing whitespace from ARG.  */
667  while (len > 0 && macro_is_whitespace (arg[len - 1]))
668    --len;
669
670  /* Insert the string.  */
671  dest->appendc ('"');
672  while (len > 0)
673    {
674      /* We could try to handle strange cases here, like control
675	 characters, but there doesn't seem to be much point.  */
676      if (macro_is_whitespace (*arg))
677	{
678	  /* Replace a sequence of whitespace with a single space.  */
679	  dest->appendc (' ');
680	  while (len > 1 && macro_is_whitespace (arg[1]))
681	    {
682	      ++arg;
683	      --len;
684	    }
685	}
686      else if (*arg == '\\' || *arg == '"')
687	{
688	  dest->appendc ('\\');
689	  dest->appendc (*arg);
690	}
691      else
692	dest->appendc (*arg);
693      ++arg;
694      --len;
695    }
696  dest->appendc ('"');
697  dest->last_token = dest->len;
698}
699
700/* See macroexp.h.  */
701
702char *
703macro_stringify (const char *str)
704{
705  int len = strlen (str);
706  struct macro_buffer buffer (len);
707
708  stringify (&buffer, str, len);
709  buffer.appendc ('\0');
710
711  return buffer.release ();
712}
713
714
715/* Expanding macros!  */
716
717
718/* A singly-linked list of the names of the macros we are currently
719   expanding --- for detecting expansion loops.  */
720struct macro_name_list {
721  const char *name;
722  struct macro_name_list *next;
723};
724
725
726/* Return non-zero if we are currently expanding the macro named NAME,
727   according to LIST; otherwise, return zero.
728
729   You know, it would be possible to get rid of all the NO_LOOP
730   arguments to these functions by simply generating a new lookup
731   function and baton which refuses to find the definition for a
732   particular macro, and otherwise delegates the decision to another
733   function/baton pair.  But that makes the linked list of excluded
734   macros chained through untyped baton pointers, which will make it
735   harder to debug.  :(  */
736static int
737currently_rescanning (struct macro_name_list *list, const char *name)
738{
739  for (; list; list = list->next)
740    if (strcmp (name, list->name) == 0)
741      return 1;
742
743  return 0;
744}
745
746
747/* Gather the arguments to a macro expansion.
748
749   NAME is the name of the macro being invoked.  (It's only used for
750   printing error messages.)
751
752   Assume that SRC is the text of the macro invocation immediately
753   following the macro name.  For example, if we're processing the
754   text foo(bar, baz), then NAME would be foo and SRC will be (bar,
755   baz).
756
757   If SRC doesn't start with an open paren ( token at all, return
758   false, leave SRC unchanged, and don't set *ARGS_PTR to anything.
759
760   If SRC doesn't contain a properly terminated argument list, then
761   raise an error.
762
763   For a variadic macro, NARGS holds the number of formal arguments to
764   the macro.  For a GNU-style variadic macro, this should be the
765   number of named arguments.  For a non-variadic macro, NARGS should
766   be -1.
767
768   Otherwise, return true and set *ARGS_PTR to a vector of macro
769   buffers referring to the argument texts.  The macro buffers share
770   their text with SRC, and their last_token fields are initialized.
771
772   NOTE WELL: if SRC starts with a open paren ( token followed
773   immediately by a close paren ) token (e.g., the invocation looks
774   like "foo()"), we treat that as one argument, which happens to be
775   the empty list of tokens.  The caller should keep in mind that such
776   a sequence of tokens is a valid way to invoke one-parameter
777   function-like macros, but also a valid way to invoke zero-parameter
778   function-like macros.  Eeew.
779
780   Consume the tokens from SRC; after this call, SRC contains the text
781   following the invocation.  */
782
783static bool
784gather_arguments (const char *name, struct macro_buffer *src, int nargs,
785		  std::vector<struct macro_buffer> *args_ptr)
786{
787  struct macro_buffer tok;
788  std::vector<struct macro_buffer> args;
789
790  /* Does SRC start with an opening paren token?  Read from a copy of
791     SRC, so SRC itself is unaffected if we don't find an opening
792     paren.  */
793  {
794    struct macro_buffer temp (src->text, src->len);
795
796    if (! get_token (&tok, &temp)
797        || tok.len != 1
798        || tok.text[0] != '(')
799      return false;
800  }
801
802  /* Consume SRC's opening paren.  */
803  get_token (&tok, src);
804
805  for (;;)
806    {
807      struct macro_buffer *arg;
808      int depth;
809
810      /* Initialize the next argument.  */
811      args.emplace_back ();
812      arg = &args.back ();
813      set_token (arg, src->text, src->text);
814
815      /* Gather the argument's tokens.  */
816      depth = 0;
817      for (;;)
818        {
819          if (! get_token (&tok, src))
820            error (_("Malformed argument list for macro `%s'."), name);
821
822          /* Is tok an opening paren?  */
823          if (tok.len == 1 && tok.text[0] == '(')
824            depth++;
825
826          /* Is tok is a closing paren?  */
827          else if (tok.len == 1 && tok.text[0] == ')')
828            {
829              /* If it's a closing paren at the top level, then that's
830                 the end of the argument list.  */
831              if (depth == 0)
832                {
833		  /* In the varargs case, the last argument may be
834		     missing.  Add an empty argument in this case.  */
835		  if (nargs != -1 && args.size () == nargs - 1)
836		    {
837		      args.emplace_back ();
838		      arg = &args.back ();
839		      set_token (arg, src->text, src->text);
840		    }
841
842		  *args_ptr = std::move (args);
843		  return true;
844                }
845
846              depth--;
847            }
848
849          /* If tok is a comma at top level, then that's the end of
850             the current argument.  However, if we are handling a
851             variadic macro and we are computing the last argument, we
852             want to include the comma and remaining tokens.  */
853          else if (tok.len == 1 && tok.text[0] == ',' && depth == 0
854		   && (nargs == -1 || args.size () < nargs))
855            break;
856
857          /* Extend the current argument to enclose this token.  If
858             this is the current argument's first token, leave out any
859             leading whitespace, just for aesthetics.  */
860          if (arg->len == 0)
861            {
862              arg->text = tok.text;
863              arg->len = tok.len;
864              arg->last_token = 0;
865            }
866          else
867            {
868              arg->len = (tok.text + tok.len) - arg->text;
869              arg->last_token = tok.text - arg->text;
870            }
871        }
872    }
873}
874
875
876/* The `expand' and `substitute_args' functions both invoke `scan'
877   recursively, so we need a forward declaration somewhere.  */
878static void scan (struct macro_buffer *dest,
879                  struct macro_buffer *src,
880                  struct macro_name_list *no_loop,
881                  macro_lookup_ftype *lookup_func,
882                  void *lookup_baton);
883
884
885/* A helper function for substitute_args.
886
887   ARGV is a vector of all the arguments; ARGC is the number of
888   arguments.  IS_VARARGS is true if the macro being substituted is a
889   varargs macro; in this case VA_ARG_NAME is the name of the
890   "variable" argument.  VA_ARG_NAME is ignored if IS_VARARGS is
891   false.
892
893   If the token TOK is the name of a parameter, return the parameter's
894   index.  If TOK is not an argument, return -1.  */
895
896static int
897find_parameter (const struct macro_buffer *tok,
898		int is_varargs, const struct macro_buffer *va_arg_name,
899		int argc, const char * const *argv)
900{
901  int i;
902
903  if (! tok->is_identifier)
904    return -1;
905
906  for (i = 0; i < argc; ++i)
907    if (tok->len == strlen (argv[i])
908	&& !memcmp (tok->text, argv[i], tok->len))
909      return i;
910
911  if (is_varargs && tok->len == va_arg_name->len
912      && ! memcmp (tok->text, va_arg_name->text, tok->len))
913    return argc - 1;
914
915  return -1;
916}
917
918/* Helper function for substitute_args that gets the next token and
919   updates the passed-in state variables.  */
920
921static void
922get_next_token_for_substitution (struct macro_buffer *replacement_list,
923				 struct macro_buffer *token,
924				 char **start,
925				 struct macro_buffer *lookahead,
926				 char **lookahead_start,
927				 int *lookahead_valid,
928				 bool *keep_going)
929{
930  if (!*lookahead_valid)
931    *keep_going = false;
932  else
933    {
934      *keep_going = true;
935      *token = *lookahead;
936      *start = *lookahead_start;
937      *lookahead_start = replacement_list->text;
938      *lookahead_valid = get_token (lookahead, replacement_list);
939    }
940}
941
942/* Given the macro definition DEF, being invoked with the actual
943   arguments given by ARGV, substitute the arguments into the
944   replacement list, and store the result in DEST.
945
946   IS_VARARGS should be true if DEF is a varargs macro.  In this case,
947   VA_ARG_NAME should be the name of the "variable" argument -- either
948   __VA_ARGS__ for c99-style varargs, or the final argument name, for
949   GNU-style varargs.  If IS_VARARGS is false, this parameter is
950   ignored.
951
952   If it is necessary to expand macro invocations in one of the
953   arguments, use LOOKUP_FUNC and LOOKUP_BATON to find the macro
954   definitions, and don't expand invocations of the macros listed in
955   NO_LOOP.  */
956
957static void
958substitute_args (struct macro_buffer *dest,
959                 struct macro_definition *def,
960		 int is_varargs, const struct macro_buffer *va_arg_name,
961		 const std::vector<struct macro_buffer> &argv,
962                 struct macro_name_list *no_loop,
963                 macro_lookup_ftype *lookup_func,
964                 void *lookup_baton)
965{
966  /* The token we are currently considering.  */
967  struct macro_buffer tok;
968  /* The replacement list's pointer from just before TOK was lexed.  */
969  char *original_rl_start;
970  /* We have a single lookahead token to handle token splicing.  */
971  struct macro_buffer lookahead;
972  /* The lookahead token might not be valid.  */
973  int lookahead_valid;
974  /* The replacement list's pointer from just before LOOKAHEAD was
975     lexed.  */
976  char *lookahead_rl_start;
977
978  /* A macro buffer for the macro's replacement list.  */
979  struct macro_buffer replacement_list (def->replacement,
980					strlen (def->replacement));
981
982  gdb_assert (dest->len == 0);
983  dest->last_token = 0;
984
985  original_rl_start = replacement_list.text;
986  if (! get_token (&tok, &replacement_list))
987    return;
988  lookahead_rl_start = replacement_list.text;
989  lookahead_valid = get_token (&lookahead, &replacement_list);
990
991  /* __VA_OPT__ state variable.  The states are:
992     0 - nothing happening
993     1 - saw __VA_OPT__
994     >= 2 in __VA_OPT__, the value encodes the parenthesis depth.  */
995  unsigned vaopt_state = 0;
996
997  for (bool keep_going = true;
998       keep_going;
999       get_next_token_for_substitution (&replacement_list,
1000					&tok,
1001					&original_rl_start,
1002					&lookahead,
1003					&lookahead_rl_start,
1004					&lookahead_valid,
1005					&keep_going))
1006    {
1007      bool token_is_vaopt = (tok.len == 10
1008			     && strncmp (tok.text, "__VA_OPT__", 10) == 0);
1009
1010      if (vaopt_state > 0)
1011	{
1012	  if (token_is_vaopt)
1013	    error (_("__VA_OPT__ cannot appear inside __VA_OPT__"));
1014	  else if (tok.len == 1 && tok.text[0] == '(')
1015	    {
1016	      ++vaopt_state;
1017	      /* We just entered __VA_OPT__, so don't emit this
1018		 token.  */
1019	      continue;
1020	    }
1021	  else if (vaopt_state == 1)
1022	    error (_("__VA_OPT__ must be followed by an open parenthesis"));
1023	  else if (tok.len == 1 && tok.text[0] == ')')
1024	    {
1025	      --vaopt_state;
1026	      if (vaopt_state == 1)
1027		{
1028		  /* Done with __VA_OPT__.  */
1029		  vaopt_state = 0;
1030		  /* Don't emit.  */
1031		  continue;
1032		}
1033	    }
1034
1035	  /* If __VA_ARGS__ is empty, then drop the contents of
1036	     __VA_OPT__.  */
1037	  if (argv.back ().len == 0)
1038	    continue;
1039	}
1040      else if (token_is_vaopt)
1041	{
1042	  if (!is_varargs)
1043	    error (_("__VA_OPT__ is only valid in a variadic macro"));
1044	  vaopt_state = 1;
1045	  /* Don't emit this token.  */
1046	  continue;
1047	}
1048
1049      /* Just for aesthetics.  If we skipped some whitespace, copy
1050         that to DEST.  */
1051      if (tok.text > original_rl_start)
1052        {
1053          dest->appendmem (original_rl_start, tok.text - original_rl_start);
1054          dest->last_token = dest->len;
1055        }
1056
1057      /* Is this token the stringification operator?  */
1058      if (tok.len == 1
1059          && tok.text[0] == '#')
1060	{
1061	  int arg;
1062
1063	  if (!lookahead_valid)
1064	    error (_("Stringification operator requires an argument."));
1065
1066	  arg = find_parameter (&lookahead, is_varargs, va_arg_name,
1067				def->argc, def->argv);
1068	  if (arg == -1)
1069	    error (_("Argument to stringification operator must name "
1070		     "a macro parameter."));
1071
1072	  stringify (dest, argv[arg].text, argv[arg].len);
1073
1074	  /* Read one token and let the loop iteration code handle the
1075	     rest.  */
1076	  lookahead_rl_start = replacement_list.text;
1077	  lookahead_valid = get_token (&lookahead, &replacement_list);
1078	}
1079      /* Is this token the splicing operator?  */
1080      else if (tok.len == 2
1081	       && tok.text[0] == '#'
1082	       && tok.text[1] == '#')
1083	error (_("Stray splicing operator"));
1084      /* Is the next token the splicing operator?  */
1085      else if (lookahead_valid
1086	       && lookahead.len == 2
1087	       && lookahead.text[0] == '#'
1088	       && lookahead.text[1] == '#')
1089	{
1090	  int finished = 0;
1091	  int prev_was_comma = 0;
1092
1093	  /* Note that GCC warns if the result of splicing is not a
1094	     token.  In the debugger there doesn't seem to be much
1095	     benefit from doing this.  */
1096
1097	  /* Insert the first token.  */
1098	  if (tok.len == 1 && tok.text[0] == ',')
1099	    prev_was_comma = 1;
1100	  else
1101	    {
1102	      int arg = find_parameter (&tok, is_varargs, va_arg_name,
1103					def->argc, def->argv);
1104
1105	      if (arg != -1)
1106		dest->appendmem (argv[arg].text, argv[arg].len);
1107	      else
1108		dest->appendmem (tok.text, tok.len);
1109	    }
1110
1111	  /* Apply a possible sequence of ## operators.  */
1112	  for (;;)
1113	    {
1114	      if (! get_token (&tok, &replacement_list))
1115		error (_("Splicing operator at end of macro"));
1116
1117	      /* Handle a comma before a ##.  If we are handling
1118		 varargs, and the token on the right hand side is the
1119		 varargs marker, and the final argument is empty or
1120		 missing, then drop the comma.  This is a GNU
1121		 extension.  There is one ambiguous case here,
1122		 involving pedantic behavior with an empty argument,
1123		 but we settle that in favor of GNU-style (GCC uses an
1124		 option).  If we aren't dealing with varargs, we
1125		 simply insert the comma.  */
1126	      if (prev_was_comma)
1127		{
1128		  if (! (is_varargs
1129			 && tok.len == va_arg_name->len
1130			 && !memcmp (tok.text, va_arg_name->text, tok.len)
1131			 && argv.back ().len == 0))
1132		    dest->appendmem (",", 1);
1133		  prev_was_comma = 0;
1134		}
1135
1136	      /* Insert the token.  If it is a parameter, insert the
1137		 argument.  If it is a comma, treat it specially.  */
1138	      if (tok.len == 1 && tok.text[0] == ',')
1139		prev_was_comma = 1;
1140	      else
1141		{
1142		  int arg = find_parameter (&tok, is_varargs, va_arg_name,
1143					    def->argc, def->argv);
1144
1145		  if (arg != -1)
1146		    dest->appendmem (argv[arg].text, argv[arg].len);
1147		  else
1148		    dest->appendmem (tok.text, tok.len);
1149		}
1150
1151	      /* Now read another token.  If it is another splice, we
1152		 loop.  */
1153	      original_rl_start = replacement_list.text;
1154	      if (! get_token (&tok, &replacement_list))
1155		{
1156		  finished = 1;
1157		  break;
1158		}
1159
1160	      if (! (tok.len == 2
1161		     && tok.text[0] == '#'
1162		     && tok.text[1] == '#'))
1163		break;
1164	    }
1165
1166	  if (prev_was_comma)
1167	    {
1168	      /* We saw a comma.  Insert it now.  */
1169	      dest->appendmem (",", 1);
1170	    }
1171
1172          dest->last_token = dest->len;
1173	  if (finished)
1174	    lookahead_valid = 0;
1175	  else
1176	    {
1177	      /* Set up for the loop iterator.  */
1178	      lookahead = tok;
1179	      lookahead_rl_start = original_rl_start;
1180	      lookahead_valid = 1;
1181	    }
1182	}
1183      else
1184	{
1185	  /* Is this token an identifier?  */
1186	  int substituted = 0;
1187	  int arg = find_parameter (&tok, is_varargs, va_arg_name,
1188				    def->argc, def->argv);
1189
1190	  if (arg != -1)
1191	    {
1192	      /* Expand any macro invocations in the argument text,
1193		 and append the result to dest.  Remember that scan
1194		 mutates its source, so we need to scan a new buffer
1195		 referring to the argument's text, not the argument
1196		 itself.  */
1197	      struct macro_buffer arg_src (argv[arg].text, argv[arg].len);
1198	      scan (dest, &arg_src, no_loop, lookup_func, lookup_baton);
1199	      substituted = 1;
1200	    }
1201
1202	  /* If it wasn't a parameter, then just copy it across.  */
1203	  if (! substituted)
1204	    append_tokens_without_splicing (dest, &tok);
1205	}
1206    }
1207
1208  if (vaopt_state > 0)
1209    error (_("Unterminated __VA_OPT__"));
1210}
1211
1212
1213/* Expand a call to a macro named ID, whose definition is DEF.  Append
1214   its expansion to DEST.  SRC is the input text following the ID
1215   token.  We are currently rescanning the expansions of the macros
1216   named in NO_LOOP; don't re-expand them.  Use LOOKUP_FUNC and
1217   LOOKUP_BATON to find definitions for any nested macro references.
1218
1219   Return 1 if we decided to expand it, zero otherwise.  (If it's a
1220   function-like macro name that isn't followed by an argument list,
1221   we don't expand it.)  If we return zero, leave SRC unchanged.  */
1222static int
1223expand (const char *id,
1224        struct macro_definition *def,
1225        struct macro_buffer *dest,
1226        struct macro_buffer *src,
1227        struct macro_name_list *no_loop,
1228        macro_lookup_ftype *lookup_func,
1229        void *lookup_baton)
1230{
1231  struct macro_name_list new_no_loop;
1232
1233  /* Create a new node to be added to the front of the no-expand list.
1234     This list is appropriate for re-scanning replacement lists, but
1235     it is *not* appropriate for scanning macro arguments; invocations
1236     of the macro whose arguments we are gathering *do* get expanded
1237     there.  */
1238  new_no_loop.name = id;
1239  new_no_loop.next = no_loop;
1240
1241  /* What kind of macro are we expanding?  */
1242  if (def->kind == macro_object_like)
1243    {
1244      struct macro_buffer replacement_list (def->replacement,
1245					    strlen (def->replacement));
1246
1247      scan (dest, &replacement_list, &new_no_loop, lookup_func, lookup_baton);
1248      return 1;
1249    }
1250  else if (def->kind == macro_function_like)
1251    {
1252      struct macro_buffer va_arg_name;
1253      int is_varargs = 0;
1254
1255      if (def->argc >= 1)
1256	{
1257	  if (strcmp (def->argv[def->argc - 1], "...") == 0)
1258	    {
1259	      /* In C99-style varargs, substitution is done using
1260		 __VA_ARGS__.  */
1261	      va_arg_name.set_shared ("__VA_ARGS__", strlen ("__VA_ARGS__"));
1262	      is_varargs = 1;
1263	    }
1264	  else
1265	    {
1266	      int len = strlen (def->argv[def->argc - 1]);
1267
1268	      if (len > 3
1269		  && strcmp (def->argv[def->argc - 1] + len - 3, "...") == 0)
1270		{
1271		  /* In GNU-style varargs, the name of the
1272		     substitution parameter is the name of the formal
1273		     argument without the "...".  */
1274		  va_arg_name.set_shared (def->argv[def->argc - 1], len - 3);
1275		  is_varargs = 1;
1276		}
1277	    }
1278	}
1279
1280      std::vector<struct macro_buffer> argv;
1281      /* If we couldn't find any argument list, then we don't expand
1282         this macro.  */
1283      if (!gather_arguments (id, src, is_varargs ? def->argc : -1,
1284			     &argv))
1285	return 0;
1286
1287      /* Check that we're passing an acceptable number of arguments for
1288         this macro.  */
1289      if (argv.size () != def->argc)
1290        {
1291	  if (is_varargs && argv.size () >= def->argc - 1)
1292	    {
1293	      /* Ok.  */
1294	    }
1295          /* Remember that a sequence of tokens like "foo()" is a
1296             valid invocation of a macro expecting either zero or one
1297             arguments.  */
1298          else if (! (argv.size () == 1
1299		      && argv[0].len == 0
1300		      && def->argc == 0))
1301            error (_("Wrong number of arguments to macro `%s' "
1302                   "(expected %d, got %d)."),
1303                   id, def->argc, int (argv.size ()));
1304        }
1305
1306      /* Note that we don't expand macro invocations in the arguments
1307         yet --- we let subst_args take care of that.  Parameters that
1308         appear as operands of the stringifying operator "#" or the
1309         splicing operator "##" don't get macro references expanded,
1310         so we can't really tell whether it's appropriate to macro-
1311         expand an argument until we see how it's being used.  */
1312      struct macro_buffer substituted (0);
1313      substitute_args (&substituted, def, is_varargs, &va_arg_name,
1314		       argv, no_loop, lookup_func, lookup_baton);
1315
1316      /* Now `substituted' is the macro's replacement list, with all
1317         argument values substituted into it properly.  Re-scan it for
1318         macro references, but don't expand invocations of this macro.
1319
1320         We create a new buffer, `substituted_src', which points into
1321         `substituted', and scan that.  We can't scan `substituted'
1322         itself, since the tokenization process moves the buffer's
1323         text pointer around, and we still need to be able to find
1324         `substituted's original text buffer after scanning it so we
1325         can free it.  */
1326      struct macro_buffer substituted_src (substituted.text, substituted.len);
1327      scan (dest, &substituted_src, &new_no_loop, lookup_func, lookup_baton);
1328
1329      return 1;
1330    }
1331  else
1332    internal_error (__FILE__, __LINE__, _("bad macro definition kind"));
1333}
1334
1335
1336/* If the single token in SRC_FIRST followed by the tokens in SRC_REST
1337   constitute a macro invokation not forbidden in NO_LOOP, append its
1338   expansion to DEST and return non-zero.  Otherwise, return zero, and
1339   leave DEST unchanged.
1340
1341   SRC_FIRST and SRC_REST must be shared buffers; DEST must not be one.
1342   SRC_FIRST must be a string built by get_token.  */
1343static int
1344maybe_expand (struct macro_buffer *dest,
1345              struct macro_buffer *src_first,
1346              struct macro_buffer *src_rest,
1347              struct macro_name_list *no_loop,
1348              macro_lookup_ftype *lookup_func,
1349              void *lookup_baton)
1350{
1351  gdb_assert (src_first->shared);
1352  gdb_assert (src_rest->shared);
1353  gdb_assert (! dest->shared);
1354
1355  /* Is this token an identifier?  */
1356  if (src_first->is_identifier)
1357    {
1358      /* Make a null-terminated copy of it, since that's what our
1359         lookup function expects.  */
1360      std::string id (src_first->text, src_first->len);
1361
1362      /* If we're currently re-scanning the result of expanding
1363         this macro, don't expand it again.  */
1364      if (! currently_rescanning (no_loop, id.c_str ()))
1365        {
1366          /* Does this identifier have a macro definition in scope?  */
1367          struct macro_definition *def = lookup_func (id.c_str (),
1368						      lookup_baton);
1369
1370          if (def && expand (id.c_str (), def, dest, src_rest, no_loop,
1371                             lookup_func, lookup_baton))
1372	    return 1;
1373        }
1374    }
1375
1376  return 0;
1377}
1378
1379
1380/* Expand macro references in SRC, appending the results to DEST.
1381   Assume we are re-scanning the result of expanding the macros named
1382   in NO_LOOP, and don't try to re-expand references to them.
1383
1384   SRC must be a shared buffer; DEST must not be one.  */
1385static void
1386scan (struct macro_buffer *dest,
1387      struct macro_buffer *src,
1388      struct macro_name_list *no_loop,
1389      macro_lookup_ftype *lookup_func,
1390      void *lookup_baton)
1391{
1392  gdb_assert (src->shared);
1393  gdb_assert (! dest->shared);
1394
1395  for (;;)
1396    {
1397      struct macro_buffer tok;
1398      char *original_src_start = src->text;
1399
1400      /* Find the next token in SRC.  */
1401      if (! get_token (&tok, src))
1402        break;
1403
1404      /* Just for aesthetics.  If we skipped some whitespace, copy
1405         that to DEST.  */
1406      if (tok.text > original_src_start)
1407        {
1408          dest->appendmem (original_src_start, tok.text - original_src_start);
1409          dest->last_token = dest->len;
1410        }
1411
1412      if (! maybe_expand (dest, &tok, src, no_loop, lookup_func, lookup_baton))
1413        /* We didn't end up expanding tok as a macro reference, so
1414           simply append it to dest.  */
1415        append_tokens_without_splicing (dest, &tok);
1416    }
1417
1418  /* Just for aesthetics.  If there was any trailing whitespace in
1419     src, copy it to dest.  */
1420  if (src->len)
1421    {
1422      dest->appendmem (src->text, src->len);
1423      dest->last_token = dest->len;
1424    }
1425}
1426
1427
1428gdb::unique_xmalloc_ptr<char>
1429macro_expand (const char *source,
1430              macro_lookup_ftype *lookup_func,
1431              void *lookup_func_baton)
1432{
1433  struct macro_buffer src (source, strlen (source));
1434
1435  struct macro_buffer dest (0);
1436  dest.last_token = 0;
1437
1438  scan (&dest, &src, 0, lookup_func, lookup_func_baton);
1439
1440  dest.appendc ('\0');
1441
1442  return gdb::unique_xmalloc_ptr<char> (dest.release ());
1443}
1444
1445
1446gdb::unique_xmalloc_ptr<char>
1447macro_expand_once (const char *source,
1448                   macro_lookup_ftype *lookup_func,
1449                   void *lookup_func_baton)
1450{
1451  error (_("Expand-once not implemented yet."));
1452}
1453
1454
1455char *
1456macro_expand_next (const char **lexptr,
1457                   macro_lookup_ftype *lookup_func,
1458                   void *lookup_baton)
1459{
1460  struct macro_buffer tok;
1461
1462  /* Set up SRC to refer to the input text, pointed to by *lexptr.  */
1463  struct macro_buffer src (*lexptr, strlen (*lexptr));
1464
1465  /* Set up DEST to receive the expansion, if there is one.  */
1466  struct macro_buffer dest (0);
1467  dest.last_token = 0;
1468
1469  /* Get the text's first preprocessing token.  */
1470  if (! get_token (&tok, &src))
1471    return 0;
1472
1473  /* If it's a macro invocation, expand it.  */
1474  if (maybe_expand (&dest, &tok, &src, 0, lookup_func, lookup_baton))
1475    {
1476      /* It was a macro invocation!  Package up the expansion as a
1477         null-terminated string and return it.  Set *lexptr to the
1478         start of the next token in the input.  */
1479      dest.appendc ('\0');
1480      *lexptr = src.text;
1481      return dest.release ();
1482    }
1483  else
1484    {
1485      /* It wasn't a macro invocation.  */
1486      return 0;
1487    }
1488}
1489