macroexp.c revision 1.7
1/* C preprocessor macro expansion for GDB.
2   Copyright (C) 2002-2017 Free Software Foundation, Inc.
3   Contributed by Red Hat, Inc.
4
5   This file is part of GDB.
6
7   This program is free software; you can redistribute it and/or modify
8   it under the terms of the GNU General Public License as published by
9   the Free Software Foundation; either version 3 of the License, or
10   (at your option) any later version.
11
12   This program is distributed in the hope that it will be useful,
13   but WITHOUT ANY WARRANTY; without even the implied warranty of
14   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15   GNU General Public License for more details.
16
17   You should have received a copy of the GNU General Public License
18   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
19
20#include "defs.h"
21#include "gdb_obstack.h"
22#include "bcache.h"
23#include "macrotab.h"
24#include "macroexp.h"
25#include "c-lang.h"
26
27
28
29/* A resizeable, substringable string type.  */
30
31
32/* A string type that we can resize, quickly append to, and use to
33   refer to substrings of other strings.  */
34struct macro_buffer
35{
36  /* An array of characters.  The first LEN bytes are the real text,
37     but there are SIZE bytes allocated to the array.  If SIZE is
38     zero, then this doesn't point to a malloc'ed block.  If SHARED is
39     non-zero, then this buffer is actually a pointer into some larger
40     string, and we shouldn't append characters to it, etc.  Because
41     of sharing, we can't assume in general that the text is
42     null-terminated.  */
43  char *text;
44
45  /* The number of characters in the string.  */
46  int len;
47
48  /* The number of characters allocated to the string.  If SHARED is
49     non-zero, this is meaningless; in this case, we set it to zero so
50     that any "do we have room to append something?" tests will fail,
51     so we don't always have to check SHARED before using this field.  */
52  int size;
53
54  /* Zero if TEXT can be safely realloc'ed (i.e., it's its own malloc
55     block).  Non-zero if TEXT is actually pointing into the middle of
56     some other block, or to a string literal, and we shouldn't
57     reallocate it.  */
58  bool shared;
59
60  /* For detecting token splicing.
61
62     This is the index in TEXT of the first character of the token
63     that abuts the end of TEXT.  If TEXT contains no tokens, then we
64     set this equal to LEN.  If TEXT ends in whitespace, then there is
65     no token abutting the end of TEXT (it's just whitespace), and
66     again, we set this equal to LEN.  We set this to -1 if we don't
67     know the nature of TEXT.  */
68  int last_token;
69
70  /* If this buffer is holding the result from get_token, then this
71     is non-zero if it is an identifier token, zero otherwise.  */
72  int is_identifier;
73};
74
75
76/* Set the macro buffer *B to the empty string, guessing that its
77   final contents will fit in N bytes.  (It'll get resized if it
78   doesn't, so the guess doesn't have to be right.)  Allocate the
79   initial storage with xmalloc.  */
80static void
81init_buffer (struct macro_buffer *b, int n)
82{
83  b->size = n;
84  if (n > 0)
85    b->text = (char *) xmalloc (n);
86  else
87    b->text = NULL;
88  b->len = 0;
89  b->shared = false;
90  b->last_token = -1;
91}
92
93
94/* Set the macro buffer *BUF to refer to the LEN bytes at ADDR, as a
95   shared substring.  */
96
97static void
98init_shared_buffer (struct macro_buffer *buf, const char *addr, int len)
99{
100  /* The function accept a "const char *" addr so that clients can
101     pass in string literals without casts.  */
102  buf->text = (char *) addr;
103  buf->len = len;
104  buf->shared = true;
105  buf->size = 0;
106  buf->last_token = -1;
107}
108
109
110/* Free the text of the buffer B.  Raise an error if B is shared.  */
111static void
112free_buffer (struct macro_buffer *b)
113{
114  gdb_assert (! b->shared);
115  if (b->size)
116    xfree (b->text);
117}
118
119/* Like free_buffer, but return the text as an xstrdup()d string.
120   This only exists to try to make the API relatively clean.  */
121
122static char *
123free_buffer_return_text (struct macro_buffer *b)
124{
125  gdb_assert (! b->shared);
126  gdb_assert (b->size);
127  /* Nothing to do.  */
128  return b->text;
129}
130
131/* A cleanup function for macro buffers.  */
132static void
133cleanup_macro_buffer (void *untyped_buf)
134{
135  free_buffer ((struct macro_buffer *) untyped_buf);
136}
137
138
139/* Resize the buffer B to be at least N bytes long.  Raise an error if
140   B shouldn't be resized.  */
141static void
142resize_buffer (struct macro_buffer *b, int n)
143{
144  /* We shouldn't be trying to resize shared strings.  */
145  gdb_assert (! b->shared);
146
147  if (b->size == 0)
148    b->size = n;
149  else
150    while (b->size <= n)
151      b->size *= 2;
152
153  b->text = (char *) xrealloc (b->text, b->size);
154}
155
156
157/* Append the character C to the buffer B.  */
158static void
159appendc (struct macro_buffer *b, int c)
160{
161  int new_len = b->len + 1;
162
163  if (new_len > b->size)
164    resize_buffer (b, new_len);
165
166  b->text[b->len] = c;
167  b->len = new_len;
168}
169
170
171/* Append the LEN bytes at ADDR to the buffer B.  */
172static void
173appendmem (struct macro_buffer *b, const char *addr, int len)
174{
175  int new_len = b->len + len;
176
177  if (new_len > b->size)
178    resize_buffer (b, new_len);
179
180  memcpy (b->text + b->len, addr, len);
181  b->len = new_len;
182}
183
184
185
186/* Recognizing preprocessor tokens.  */
187
188
189int
190macro_is_whitespace (int c)
191{
192  return (c == ' '
193          || c == '\t'
194          || c == '\n'
195          || c == '\v'
196          || c == '\f');
197}
198
199
200int
201macro_is_digit (int c)
202{
203  return ('0' <= c && c <= '9');
204}
205
206
207int
208macro_is_identifier_nondigit (int c)
209{
210  return (c == '_'
211          || ('a' <= c && c <= 'z')
212          || ('A' <= c && c <= 'Z'));
213}
214
215
216static void
217set_token (struct macro_buffer *tok, char *start, char *end)
218{
219  init_shared_buffer (tok, start, end - start);
220  tok->last_token = 0;
221
222  /* Presumed; get_identifier may overwrite this.  */
223  tok->is_identifier = 0;
224}
225
226
227static int
228get_comment (struct macro_buffer *tok, char *p, char *end)
229{
230  if (p + 2 > end)
231    return 0;
232  else if (p[0] == '/'
233           && p[1] == '*')
234    {
235      char *tok_start = p;
236
237      p += 2;
238
239      for (; p < end; p++)
240        if (p + 2 <= end
241            && p[0] == '*'
242            && p[1] == '/')
243          {
244            p += 2;
245            set_token (tok, tok_start, p);
246            return 1;
247          }
248
249      error (_("Unterminated comment in macro expansion."));
250    }
251  else if (p[0] == '/'
252           && p[1] == '/')
253    {
254      char *tok_start = p;
255
256      p += 2;
257      for (; p < end; p++)
258        if (*p == '\n')
259          break;
260
261      set_token (tok, tok_start, p);
262      return 1;
263    }
264  else
265    return 0;
266}
267
268
269static int
270get_identifier (struct macro_buffer *tok, char *p, char *end)
271{
272  if (p < end
273      && macro_is_identifier_nondigit (*p))
274    {
275      char *tok_start = p;
276
277      while (p < end
278             && (macro_is_identifier_nondigit (*p)
279                 || macro_is_digit (*p)))
280        p++;
281
282      set_token (tok, tok_start, p);
283      tok->is_identifier = 1;
284      return 1;
285    }
286  else
287    return 0;
288}
289
290
291static int
292get_pp_number (struct macro_buffer *tok, char *p, char *end)
293{
294  if (p < end
295      && (macro_is_digit (*p)
296          || (*p == '.'
297	      && p + 2 <= end
298	      && macro_is_digit (p[1]))))
299    {
300      char *tok_start = p;
301
302      while (p < end)
303        {
304	  if (p + 2 <= end
305	      && strchr ("eEpP", *p)
306	      && (p[1] == '+' || p[1] == '-'))
307            p += 2;
308          else if (macro_is_digit (*p)
309		   || macro_is_identifier_nondigit (*p)
310		   || *p == '.')
311            p++;
312          else
313            break;
314        }
315
316      set_token (tok, tok_start, p);
317      return 1;
318    }
319  else
320    return 0;
321}
322
323
324
325/* If the text starting at P going up to (but not including) END
326   starts with a character constant, set *TOK to point to that
327   character constant, and return 1.  Otherwise, return zero.
328   Signal an error if it contains a malformed or incomplete character
329   constant.  */
330static int
331get_character_constant (struct macro_buffer *tok, char *p, char *end)
332{
333  /* ISO/IEC 9899:1999 (E)  Section 6.4.4.4  paragraph 1
334     But of course, what really matters is that we handle it the same
335     way GDB's C/C++ lexer does.  So we call parse_escape in utils.c
336     to handle escape sequences.  */
337  if ((p + 1 <= end && *p == '\'')
338      || (p + 2 <= end
339	  && (p[0] == 'L' || p[0] == 'u' || p[0] == 'U')
340	  && p[1] == '\''))
341    {
342      char *tok_start = p;
343      int char_count = 0;
344
345      if (*p == '\'')
346        p++;
347      else if (*p == 'L' || *p == 'u' || *p == 'U')
348        p += 2;
349      else
350        gdb_assert_not_reached ("unexpected character constant");
351
352      for (;;)
353        {
354          if (p >= end)
355            error (_("Unmatched single quote."));
356          else if (*p == '\'')
357            {
358              if (!char_count)
359                error (_("A character constant must contain at least one "
360                       "character."));
361              p++;
362              break;
363            }
364          else if (*p == '\\')
365            {
366	      const char *s, *o;
367
368	      s = o = ++p;
369	      char_count += c_parse_escape (&s, NULL);
370	      p += s - o;
371            }
372          else
373	    {
374	      p++;
375	      char_count++;
376	    }
377        }
378
379      set_token (tok, tok_start, p);
380      return 1;
381    }
382  else
383    return 0;
384}
385
386
387/* If the text starting at P going up to (but not including) END
388   starts with a string literal, set *TOK to point to that string
389   literal, and return 1.  Otherwise, return zero.  Signal an error if
390   it contains a malformed or incomplete string literal.  */
391static int
392get_string_literal (struct macro_buffer *tok, char *p, char *end)
393{
394  if ((p + 1 <= end
395       && *p == '"')
396      || (p + 2 <= end
397          && (p[0] == 'L' || p[0] == 'u' || p[0] == 'U')
398          && p[1] == '"'))
399    {
400      char *tok_start = p;
401
402      if (*p == '"')
403        p++;
404      else if (*p == 'L' || *p == 'u' || *p == 'U')
405        p += 2;
406      else
407        gdb_assert_not_reached ("unexpected string literal");
408
409      for (;;)
410        {
411          if (p >= end)
412            error (_("Unterminated string in expression."));
413          else if (*p == '"')
414            {
415              p++;
416              break;
417            }
418          else if (*p == '\n')
419            error (_("Newline characters may not appear in string "
420                   "constants."));
421          else if (*p == '\\')
422            {
423	      const char *s, *o;
424
425	      s = o = ++p;
426	      c_parse_escape (&s, NULL);
427	      p += s - o;
428            }
429          else
430            p++;
431        }
432
433      set_token (tok, tok_start, p);
434      return 1;
435    }
436  else
437    return 0;
438}
439
440
441static int
442get_punctuator (struct macro_buffer *tok, char *p, char *end)
443{
444  /* Here, speed is much less important than correctness and clarity.  */
445
446  /* ISO/IEC 9899:1999 (E)  Section 6.4.6  Paragraph 1.
447     Note that this table is ordered in a special way.  A punctuator
448     which is a prefix of another punctuator must appear after its
449     "extension".  Otherwise, the wrong token will be returned.  */
450  static const char * const punctuators[] = {
451    "[", "]", "(", ")", "{", "}", "?", ";", ",", "~",
452    "...", ".",
453    "->", "--", "-=", "-",
454    "++", "+=", "+",
455    "*=", "*",
456    "!=", "!",
457    "&&", "&=", "&",
458    "/=", "/",
459    "%>", "%:%:", "%:", "%=", "%",
460    "^=", "^",
461    "##", "#",
462    ":>", ":",
463    "||", "|=", "|",
464    "<<=", "<<", "<=", "<:", "<%", "<",
465    ">>=", ">>", ">=", ">",
466    "==", "=",
467    0
468  };
469
470  int i;
471
472  if (p + 1 <= end)
473    {
474      for (i = 0; punctuators[i]; i++)
475        {
476          const char *punctuator = punctuators[i];
477
478          if (p[0] == punctuator[0])
479            {
480              int len = strlen (punctuator);
481
482              if (p + len <= end
483                  && ! memcmp (p, punctuator, len))
484                {
485                  set_token (tok, p, p + len);
486                  return 1;
487                }
488            }
489        }
490    }
491
492  return 0;
493}
494
495
496/* Peel the next preprocessor token off of SRC, and put it in TOK.
497   Mutate TOK to refer to the first token in SRC, and mutate SRC to
498   refer to the text after that token.  SRC must be a shared buffer;
499   the resulting TOK will be shared, pointing into the same string SRC
500   does.  Initialize TOK's last_token field.  Return non-zero if we
501   succeed, or 0 if we didn't find any more tokens in SRC.  */
502static int
503get_token (struct macro_buffer *tok,
504           struct macro_buffer *src)
505{
506  char *p = src->text;
507  char *end = p + src->len;
508
509  gdb_assert (src->shared);
510
511  /* From the ISO C standard, ISO/IEC 9899:1999 (E), section 6.4:
512
513     preprocessing-token:
514         header-name
515         identifier
516         pp-number
517         character-constant
518         string-literal
519         punctuator
520         each non-white-space character that cannot be one of the above
521
522     We don't have to deal with header-name tokens, since those can
523     only occur after a #include, which we will never see.  */
524
525  while (p < end)
526    if (macro_is_whitespace (*p))
527      p++;
528    else if (get_comment (tok, p, end))
529      p += tok->len;
530    else if (get_pp_number (tok, p, end)
531             || get_character_constant (tok, p, end)
532             || get_string_literal (tok, p, end)
533             /* Note: the grammar in the standard seems to be
534                ambiguous: L'x' can be either a wide character
535                constant, or an identifier followed by a normal
536                character constant.  By trying `get_identifier' after
537                we try get_character_constant and get_string_literal,
538                we give the wide character syntax precedence.  Now,
539                since GDB doesn't handle wide character constants
540                anyway, is this the right thing to do?  */
541             || get_identifier (tok, p, end)
542             || get_punctuator (tok, p, end))
543      {
544        /* How many characters did we consume, including whitespace?  */
545        int consumed = p - src->text + tok->len;
546
547        src->text += consumed;
548        src->len -= consumed;
549        return 1;
550      }
551    else
552      {
553        /* We have found a "non-whitespace character that cannot be
554           one of the above."  Make a token out of it.  */
555        int consumed;
556
557        set_token (tok, p, p + 1);
558        consumed = p - src->text + tok->len;
559        src->text += consumed;
560        src->len -= consumed;
561        return 1;
562      }
563
564  return 0;
565}
566
567
568
569/* Appending token strings, with and without splicing  */
570
571
572/* Append the macro buffer SRC to the end of DEST, and ensure that
573   doing so doesn't splice the token at the end of SRC with the token
574   at the beginning of DEST.  SRC and DEST must have their last_token
575   fields set.  Upon return, DEST's last_token field is set correctly.
576
577   For example:
578
579   If DEST is "(" and SRC is "y", then we can return with
580   DEST set to "(y" --- we've simply appended the two buffers.
581
582   However, if DEST is "x" and SRC is "y", then we must not return
583   with DEST set to "xy" --- that would splice the two tokens "x" and
584   "y" together to make a single token "xy".  However, it would be
585   fine to return with DEST set to "x y".  Similarly, "<" and "<" must
586   yield "< <", not "<<", etc.  */
587static void
588append_tokens_without_splicing (struct macro_buffer *dest,
589                                struct macro_buffer *src)
590{
591  int original_dest_len = dest->len;
592  struct macro_buffer dest_tail, new_token;
593
594  gdb_assert (src->last_token != -1);
595  gdb_assert (dest->last_token != -1);
596
597  /* First, just try appending the two, and call get_token to see if
598     we got a splice.  */
599  appendmem (dest, src->text, src->len);
600
601  /* If DEST originally had no token abutting its end, then we can't
602     have spliced anything, so we're done.  */
603  if (dest->last_token == original_dest_len)
604    {
605      dest->last_token = original_dest_len + src->last_token;
606      return;
607    }
608
609  /* Set DEST_TAIL to point to the last token in DEST, followed by
610     all the stuff we just appended.  */
611  init_shared_buffer (&dest_tail,
612                      dest->text + dest->last_token,
613                      dest->len - dest->last_token);
614
615  /* Re-parse DEST's last token.  We know that DEST used to contain
616     at least one token, so if it doesn't contain any after the
617     append, then we must have spliced "/" and "*" or "/" and "/" to
618     make a comment start.  (Just for the record, I got this right
619     the first time.  This is not a bug fix.)  */
620  if (get_token (&new_token, &dest_tail)
621      && (new_token.text + new_token.len
622          == dest->text + original_dest_len))
623    {
624      /* No splice, so we're done.  */
625      dest->last_token = original_dest_len + src->last_token;
626      return;
627    }
628
629  /* Okay, a simple append caused a splice.  Let's chop dest back to
630     its original length and try again, but separate the texts with a
631     space.  */
632  dest->len = original_dest_len;
633  appendc (dest, ' ');
634  appendmem (dest, src->text, src->len);
635
636  init_shared_buffer (&dest_tail,
637                      dest->text + dest->last_token,
638                      dest->len - dest->last_token);
639
640  /* Try to re-parse DEST's last token, as above.  */
641  if (get_token (&new_token, &dest_tail)
642      && (new_token.text + new_token.len
643          == dest->text + original_dest_len))
644    {
645      /* No splice, so we're done.  */
646      dest->last_token = original_dest_len + 1 + src->last_token;
647      return;
648    }
649
650  /* As far as I know, there's no case where inserting a space isn't
651     enough to prevent a splice.  */
652  internal_error (__FILE__, __LINE__,
653                  _("unable to avoid splicing tokens during macro expansion"));
654}
655
656/* Stringify an argument, and insert it into DEST.  ARG is the text to
657   stringify; it is LEN bytes long.  */
658
659static void
660stringify (struct macro_buffer *dest, const char *arg, int len)
661{
662  /* Trim initial whitespace from ARG.  */
663  while (len > 0 && macro_is_whitespace (*arg))
664    {
665      ++arg;
666      --len;
667    }
668
669  /* Trim trailing whitespace from ARG.  */
670  while (len > 0 && macro_is_whitespace (arg[len - 1]))
671    --len;
672
673  /* Insert the string.  */
674  appendc (dest, '"');
675  while (len > 0)
676    {
677      /* We could try to handle strange cases here, like control
678	 characters, but there doesn't seem to be much point.  */
679      if (macro_is_whitespace (*arg))
680	{
681	  /* Replace a sequence of whitespace with a single space.  */
682	  appendc (dest, ' ');
683	  while (len > 1 && macro_is_whitespace (arg[1]))
684	    {
685	      ++arg;
686	      --len;
687	    }
688	}
689      else if (*arg == '\\' || *arg == '"')
690	{
691	  appendc (dest, '\\');
692	  appendc (dest, *arg);
693	}
694      else
695	appendc (dest, *arg);
696      ++arg;
697      --len;
698    }
699  appendc (dest, '"');
700  dest->last_token = dest->len;
701}
702
703/* See macroexp.h.  */
704
705char *
706macro_stringify (const char *str)
707{
708  struct macro_buffer buffer;
709  int len = strlen (str);
710
711  init_buffer (&buffer, len);
712  stringify (&buffer, str, len);
713  appendc (&buffer, '\0');
714
715  return free_buffer_return_text (&buffer);
716}
717
718
719/* Expanding macros!  */
720
721
722/* A singly-linked list of the names of the macros we are currently
723   expanding --- for detecting expansion loops.  */
724struct macro_name_list {
725  const char *name;
726  struct macro_name_list *next;
727};
728
729
730/* Return non-zero if we are currently expanding the macro named NAME,
731   according to LIST; otherwise, return zero.
732
733   You know, it would be possible to get rid of all the NO_LOOP
734   arguments to these functions by simply generating a new lookup
735   function and baton which refuses to find the definition for a
736   particular macro, and otherwise delegates the decision to another
737   function/baton pair.  But that makes the linked list of excluded
738   macros chained through untyped baton pointers, which will make it
739   harder to debug.  :(  */
740static int
741currently_rescanning (struct macro_name_list *list, const char *name)
742{
743  for (; list; list = list->next)
744    if (strcmp (name, list->name) == 0)
745      return 1;
746
747  return 0;
748}
749
750
751/* Gather the arguments to a macro expansion.
752
753   NAME is the name of the macro being invoked.  (It's only used for
754   printing error messages.)
755
756   Assume that SRC is the text of the macro invocation immediately
757   following the macro name.  For example, if we're processing the
758   text foo(bar, baz), then NAME would be foo and SRC will be (bar,
759   baz).
760
761   If SRC doesn't start with an open paren ( token at all, return
762   zero, leave SRC unchanged, and don't set *ARGC_P to anything.
763
764   If SRC doesn't contain a properly terminated argument list, then
765   raise an error.
766
767   For a variadic macro, NARGS holds the number of formal arguments to
768   the macro.  For a GNU-style variadic macro, this should be the
769   number of named arguments.  For a non-variadic macro, NARGS should
770   be -1.
771
772   Otherwise, return a pointer to the first element of an array of
773   macro buffers referring to the argument texts, and set *ARGC_P to
774   the number of arguments we found --- the number of elements in the
775   array.  The macro buffers share their text with SRC, and their
776   last_token fields are initialized.  The array is allocated with
777   xmalloc, and the caller is responsible for freeing it.
778
779   NOTE WELL: if SRC starts with a open paren ( token followed
780   immediately by a close paren ) token (e.g., the invocation looks
781   like "foo()"), we treat that as one argument, which happens to be
782   the empty list of tokens.  The caller should keep in mind that such
783   a sequence of tokens is a valid way to invoke one-parameter
784   function-like macros, but also a valid way to invoke zero-parameter
785   function-like macros.  Eeew.
786
787   Consume the tokens from SRC; after this call, SRC contains the text
788   following the invocation.  */
789
790static struct macro_buffer *
791gather_arguments (const char *name, struct macro_buffer *src,
792		  int nargs, int *argc_p)
793{
794  struct macro_buffer tok;
795  int args_len, args_size;
796  struct macro_buffer *args = NULL;
797  struct cleanup *back_to = make_cleanup (free_current_contents, &args);
798
799  /* Does SRC start with an opening paren token?  Read from a copy of
800     SRC, so SRC itself is unaffected if we don't find an opening
801     paren.  */
802  {
803    struct macro_buffer temp;
804
805    init_shared_buffer (&temp, src->text, src->len);
806
807    if (! get_token (&tok, &temp)
808        || tok.len != 1
809        || tok.text[0] != '(')
810      {
811        discard_cleanups (back_to);
812        return 0;
813      }
814  }
815
816  /* Consume SRC's opening paren.  */
817  get_token (&tok, src);
818
819  args_len = 0;
820  args_size = 6;
821  args = XNEWVEC (struct macro_buffer, args_size);
822
823  for (;;)
824    {
825      struct macro_buffer *arg;
826      int depth;
827
828      /* Make sure we have room for the next argument.  */
829      if (args_len >= args_size)
830        {
831          args_size *= 2;
832          args = XRESIZEVEC (struct macro_buffer, args, args_size);
833        }
834
835      /* Initialize the next argument.  */
836      arg = &args[args_len++];
837      set_token (arg, src->text, src->text);
838
839      /* Gather the argument's tokens.  */
840      depth = 0;
841      for (;;)
842        {
843          if (! get_token (&tok, src))
844            error (_("Malformed argument list for macro `%s'."), name);
845
846          /* Is tok an opening paren?  */
847          if (tok.len == 1 && tok.text[0] == '(')
848            depth++;
849
850          /* Is tok is a closing paren?  */
851          else if (tok.len == 1 && tok.text[0] == ')')
852            {
853              /* If it's a closing paren at the top level, then that's
854                 the end of the argument list.  */
855              if (depth == 0)
856                {
857		  /* In the varargs case, the last argument may be
858		     missing.  Add an empty argument in this case.  */
859		  if (nargs != -1 && args_len == nargs - 1)
860		    {
861		      /* Make sure we have room for the argument.  */
862		      if (args_len >= args_size)
863			{
864			  args_size++;
865			  args = XRESIZEVEC (struct macro_buffer, args,
866					     args_size);
867			}
868		      arg = &args[args_len++];
869		      set_token (arg, src->text, src->text);
870		    }
871
872                  discard_cleanups (back_to);
873                  *argc_p = args_len;
874                  return args;
875                }
876
877              depth--;
878            }
879
880          /* If tok is a comma at top level, then that's the end of
881             the current argument.  However, if we are handling a
882             variadic macro and we are computing the last argument, we
883             want to include the comma and remaining tokens.  */
884          else if (tok.len == 1 && tok.text[0] == ',' && depth == 0
885		   && (nargs == -1 || args_len < nargs))
886            break;
887
888          /* Extend the current argument to enclose this token.  If
889             this is the current argument's first token, leave out any
890             leading whitespace, just for aesthetics.  */
891          if (arg->len == 0)
892            {
893              arg->text = tok.text;
894              arg->len = tok.len;
895              arg->last_token = 0;
896            }
897          else
898            {
899              arg->len = (tok.text + tok.len) - arg->text;
900              arg->last_token = tok.text - arg->text;
901            }
902        }
903    }
904}
905
906
907/* The `expand' and `substitute_args' functions both invoke `scan'
908   recursively, so we need a forward declaration somewhere.  */
909static void scan (struct macro_buffer *dest,
910                  struct macro_buffer *src,
911                  struct macro_name_list *no_loop,
912                  macro_lookup_ftype *lookup_func,
913                  void *lookup_baton);
914
915
916/* A helper function for substitute_args.
917
918   ARGV is a vector of all the arguments; ARGC is the number of
919   arguments.  IS_VARARGS is true if the macro being substituted is a
920   varargs macro; in this case VA_ARG_NAME is the name of the
921   "variable" argument.  VA_ARG_NAME is ignored if IS_VARARGS is
922   false.
923
924   If the token TOK is the name of a parameter, return the parameter's
925   index.  If TOK is not an argument, return -1.  */
926
927static int
928find_parameter (const struct macro_buffer *tok,
929		int is_varargs, const struct macro_buffer *va_arg_name,
930		int argc, const char * const *argv)
931{
932  int i;
933
934  if (! tok->is_identifier)
935    return -1;
936
937  for (i = 0; i < argc; ++i)
938    if (tok->len == strlen (argv[i])
939	&& !memcmp (tok->text, argv[i], tok->len))
940      return i;
941
942  if (is_varargs && tok->len == va_arg_name->len
943      && ! memcmp (tok->text, va_arg_name->text, tok->len))
944    return argc - 1;
945
946  return -1;
947}
948
949/* Given the macro definition DEF, being invoked with the actual
950   arguments given by ARGC and ARGV, substitute the arguments into the
951   replacement list, and store the result in DEST.
952
953   IS_VARARGS should be true if DEF is a varargs macro.  In this case,
954   VA_ARG_NAME should be the name of the "variable" argument -- either
955   __VA_ARGS__ for c99-style varargs, or the final argument name, for
956   GNU-style varargs.  If IS_VARARGS is false, this parameter is
957   ignored.
958
959   If it is necessary to expand macro invocations in one of the
960   arguments, use LOOKUP_FUNC and LOOKUP_BATON to find the macro
961   definitions, and don't expand invocations of the macros listed in
962   NO_LOOP.  */
963
964static void
965substitute_args (struct macro_buffer *dest,
966                 struct macro_definition *def,
967		 int is_varargs, const struct macro_buffer *va_arg_name,
968                 int argc, struct macro_buffer *argv,
969                 struct macro_name_list *no_loop,
970                 macro_lookup_ftype *lookup_func,
971                 void *lookup_baton)
972{
973  /* A macro buffer for the macro's replacement list.  */
974  struct macro_buffer replacement_list;
975  /* The token we are currently considering.  */
976  struct macro_buffer tok;
977  /* The replacement list's pointer from just before TOK was lexed.  */
978  char *original_rl_start;
979  /* We have a single lookahead token to handle token splicing.  */
980  struct macro_buffer lookahead;
981  /* The lookahead token might not be valid.  */
982  int lookahead_valid;
983  /* The replacement list's pointer from just before LOOKAHEAD was
984     lexed.  */
985  char *lookahead_rl_start;
986
987  init_shared_buffer (&replacement_list, def->replacement,
988                      strlen (def->replacement));
989
990  gdb_assert (dest->len == 0);
991  dest->last_token = 0;
992
993  original_rl_start = replacement_list.text;
994  if (! get_token (&tok, &replacement_list))
995    return;
996  lookahead_rl_start = replacement_list.text;
997  lookahead_valid = get_token (&lookahead, &replacement_list);
998
999  for (;;)
1000    {
1001      /* Just for aesthetics.  If we skipped some whitespace, copy
1002         that to DEST.  */
1003      if (tok.text > original_rl_start)
1004        {
1005          appendmem (dest, original_rl_start, tok.text - original_rl_start);
1006          dest->last_token = dest->len;
1007        }
1008
1009      /* Is this token the stringification operator?  */
1010      if (tok.len == 1
1011          && tok.text[0] == '#')
1012	{
1013	  int arg;
1014
1015	  if (!lookahead_valid)
1016	    error (_("Stringification operator requires an argument."));
1017
1018	  arg = find_parameter (&lookahead, is_varargs, va_arg_name,
1019				def->argc, def->argv);
1020	  if (arg == -1)
1021	    error (_("Argument to stringification operator must name "
1022		     "a macro parameter."));
1023
1024	  stringify (dest, argv[arg].text, argv[arg].len);
1025
1026	  /* Read one token and let the loop iteration code handle the
1027	     rest.  */
1028	  lookahead_rl_start = replacement_list.text;
1029	  lookahead_valid = get_token (&lookahead, &replacement_list);
1030	}
1031      /* Is this token the splicing operator?  */
1032      else if (tok.len == 2
1033	       && tok.text[0] == '#'
1034	       && tok.text[1] == '#')
1035	error (_("Stray splicing operator"));
1036      /* Is the next token the splicing operator?  */
1037      else if (lookahead_valid
1038	       && lookahead.len == 2
1039	       && lookahead.text[0] == '#'
1040	       && lookahead.text[1] == '#')
1041	{
1042	  int finished = 0;
1043	  int prev_was_comma = 0;
1044
1045	  /* Note that GCC warns if the result of splicing is not a
1046	     token.  In the debugger there doesn't seem to be much
1047	     benefit from doing this.  */
1048
1049	  /* Insert the first token.  */
1050	  if (tok.len == 1 && tok.text[0] == ',')
1051	    prev_was_comma = 1;
1052	  else
1053	    {
1054	      int arg = find_parameter (&tok, is_varargs, va_arg_name,
1055					def->argc, def->argv);
1056
1057	      if (arg != -1)
1058		appendmem (dest, argv[arg].text, argv[arg].len);
1059	      else
1060		appendmem (dest, tok.text, tok.len);
1061	    }
1062
1063	  /* Apply a possible sequence of ## operators.  */
1064	  for (;;)
1065	    {
1066	      if (! get_token (&tok, &replacement_list))
1067		error (_("Splicing operator at end of macro"));
1068
1069	      /* Handle a comma before a ##.  If we are handling
1070		 varargs, and the token on the right hand side is the
1071		 varargs marker, and the final argument is empty or
1072		 missing, then drop the comma.  This is a GNU
1073		 extension.  There is one ambiguous case here,
1074		 involving pedantic behavior with an empty argument,
1075		 but we settle that in favor of GNU-style (GCC uses an
1076		 option).  If we aren't dealing with varargs, we
1077		 simply insert the comma.  */
1078	      if (prev_was_comma)
1079		{
1080		  if (! (is_varargs
1081			 && tok.len == va_arg_name->len
1082			 && !memcmp (tok.text, va_arg_name->text, tok.len)
1083			 && argv[argc - 1].len == 0))
1084		    appendmem (dest, ",", 1);
1085		  prev_was_comma = 0;
1086		}
1087
1088	      /* Insert the token.  If it is a parameter, insert the
1089		 argument.  If it is a comma, treat it specially.  */
1090	      if (tok.len == 1 && tok.text[0] == ',')
1091		prev_was_comma = 1;
1092	      else
1093		{
1094		  int arg = find_parameter (&tok, is_varargs, va_arg_name,
1095					    def->argc, def->argv);
1096
1097		  if (arg != -1)
1098		    appendmem (dest, argv[arg].text, argv[arg].len);
1099		  else
1100		    appendmem (dest, tok.text, tok.len);
1101		}
1102
1103	      /* Now read another token.  If it is another splice, we
1104		 loop.  */
1105	      original_rl_start = replacement_list.text;
1106	      if (! get_token (&tok, &replacement_list))
1107		{
1108		  finished = 1;
1109		  break;
1110		}
1111
1112	      if (! (tok.len == 2
1113		     && tok.text[0] == '#'
1114		     && tok.text[1] == '#'))
1115		break;
1116	    }
1117
1118	  if (prev_was_comma)
1119	    {
1120	      /* We saw a comma.  Insert it now.  */
1121	      appendmem (dest, ",", 1);
1122	    }
1123
1124          dest->last_token = dest->len;
1125	  if (finished)
1126	    lookahead_valid = 0;
1127	  else
1128	    {
1129	      /* Set up for the loop iterator.  */
1130	      lookahead = tok;
1131	      lookahead_rl_start = original_rl_start;
1132	      lookahead_valid = 1;
1133	    }
1134	}
1135      else
1136	{
1137	  /* Is this token an identifier?  */
1138	  int substituted = 0;
1139	  int arg = find_parameter (&tok, is_varargs, va_arg_name,
1140				    def->argc, def->argv);
1141
1142	  if (arg != -1)
1143	    {
1144	      struct macro_buffer arg_src;
1145
1146	      /* Expand any macro invocations in the argument text,
1147		 and append the result to dest.  Remember that scan
1148		 mutates its source, so we need to scan a new buffer
1149		 referring to the argument's text, not the argument
1150		 itself.  */
1151	      init_shared_buffer (&arg_src, argv[arg].text, argv[arg].len);
1152	      scan (dest, &arg_src, no_loop, lookup_func, lookup_baton);
1153	      substituted = 1;
1154	    }
1155
1156	  /* If it wasn't a parameter, then just copy it across.  */
1157	  if (! substituted)
1158	    append_tokens_without_splicing (dest, &tok);
1159	}
1160
1161      if (! lookahead_valid)
1162	break;
1163
1164      tok = lookahead;
1165      original_rl_start = lookahead_rl_start;
1166
1167      lookahead_rl_start = replacement_list.text;
1168      lookahead_valid = get_token (&lookahead, &replacement_list);
1169    }
1170}
1171
1172
1173/* Expand a call to a macro named ID, whose definition is DEF.  Append
1174   its expansion to DEST.  SRC is the input text following the ID
1175   token.  We are currently rescanning the expansions of the macros
1176   named in NO_LOOP; don't re-expand them.  Use LOOKUP_FUNC and
1177   LOOKUP_BATON to find definitions for any nested macro references.
1178
1179   Return 1 if we decided to expand it, zero otherwise.  (If it's a
1180   function-like macro name that isn't followed by an argument list,
1181   we don't expand it.)  If we return zero, leave SRC unchanged.  */
1182static int
1183expand (const char *id,
1184        struct macro_definition *def,
1185        struct macro_buffer *dest,
1186        struct macro_buffer *src,
1187        struct macro_name_list *no_loop,
1188        macro_lookup_ftype *lookup_func,
1189        void *lookup_baton)
1190{
1191  struct macro_name_list new_no_loop;
1192
1193  /* Create a new node to be added to the front of the no-expand list.
1194     This list is appropriate for re-scanning replacement lists, but
1195     it is *not* appropriate for scanning macro arguments; invocations
1196     of the macro whose arguments we are gathering *do* get expanded
1197     there.  */
1198  new_no_loop.name = id;
1199  new_no_loop.next = no_loop;
1200
1201  /* What kind of macro are we expanding?  */
1202  if (def->kind == macro_object_like)
1203    {
1204      struct macro_buffer replacement_list;
1205
1206      init_shared_buffer (&replacement_list, def->replacement,
1207                          strlen (def->replacement));
1208
1209      scan (dest, &replacement_list, &new_no_loop, lookup_func, lookup_baton);
1210      return 1;
1211    }
1212  else if (def->kind == macro_function_like)
1213    {
1214      struct cleanup *back_to = make_cleanup (null_cleanup, 0);
1215      int argc = 0;
1216      struct macro_buffer *argv = NULL;
1217      struct macro_buffer substituted;
1218      struct macro_buffer substituted_src;
1219      struct macro_buffer va_arg_name = {0};
1220      int is_varargs = 0;
1221
1222      if (def->argc >= 1)
1223	{
1224	  if (strcmp (def->argv[def->argc - 1], "...") == 0)
1225	    {
1226	      /* In C99-style varargs, substitution is done using
1227		 __VA_ARGS__.  */
1228	      init_shared_buffer (&va_arg_name, "__VA_ARGS__",
1229				  strlen ("__VA_ARGS__"));
1230	      is_varargs = 1;
1231	    }
1232	  else
1233	    {
1234	      int len = strlen (def->argv[def->argc - 1]);
1235
1236	      if (len > 3
1237		  && strcmp (def->argv[def->argc - 1] + len - 3, "...") == 0)
1238		{
1239		  /* In GNU-style varargs, the name of the
1240		     substitution parameter is the name of the formal
1241		     argument without the "...".  */
1242		  init_shared_buffer (&va_arg_name,
1243				      def->argv[def->argc - 1],
1244				      len - 3);
1245		  is_varargs = 1;
1246		}
1247	    }
1248	}
1249
1250      make_cleanup (free_current_contents, &argv);
1251      argv = gather_arguments (id, src, is_varargs ? def->argc : -1,
1252			       &argc);
1253
1254      /* If we couldn't find any argument list, then we don't expand
1255         this macro.  */
1256      if (! argv)
1257        {
1258          do_cleanups (back_to);
1259          return 0;
1260        }
1261
1262      /* Check that we're passing an acceptable number of arguments for
1263         this macro.  */
1264      if (argc != def->argc)
1265        {
1266	  if (is_varargs && argc >= def->argc - 1)
1267	    {
1268	      /* Ok.  */
1269	    }
1270          /* Remember that a sequence of tokens like "foo()" is a
1271             valid invocation of a macro expecting either zero or one
1272             arguments.  */
1273          else if (! (argc == 1
1274		      && argv[0].len == 0
1275		      && def->argc == 0))
1276            error (_("Wrong number of arguments to macro `%s' "
1277                   "(expected %d, got %d)."),
1278                   id, def->argc, argc);
1279        }
1280
1281      /* Note that we don't expand macro invocations in the arguments
1282         yet --- we let subst_args take care of that.  Parameters that
1283         appear as operands of the stringifying operator "#" or the
1284         splicing operator "##" don't get macro references expanded,
1285         so we can't really tell whether it's appropriate to macro-
1286         expand an argument until we see how it's being used.  */
1287      init_buffer (&substituted, 0);
1288      make_cleanup (cleanup_macro_buffer, &substituted);
1289      substitute_args (&substituted, def, is_varargs, &va_arg_name,
1290		       argc, argv, no_loop, lookup_func, lookup_baton);
1291
1292      /* Now `substituted' is the macro's replacement list, with all
1293         argument values substituted into it properly.  Re-scan it for
1294         macro references, but don't expand invocations of this macro.
1295
1296         We create a new buffer, `substituted_src', which points into
1297         `substituted', and scan that.  We can't scan `substituted'
1298         itself, since the tokenization process moves the buffer's
1299         text pointer around, and we still need to be able to find
1300         `substituted's original text buffer after scanning it so we
1301         can free it.  */
1302      init_shared_buffer (&substituted_src, substituted.text, substituted.len);
1303      scan (dest, &substituted_src, &new_no_loop, lookup_func, lookup_baton);
1304
1305      do_cleanups (back_to);
1306
1307      return 1;
1308    }
1309  else
1310    internal_error (__FILE__, __LINE__, _("bad macro definition kind"));
1311}
1312
1313
1314/* If the single token in SRC_FIRST followed by the tokens in SRC_REST
1315   constitute a macro invokation not forbidden in NO_LOOP, append its
1316   expansion to DEST and return non-zero.  Otherwise, return zero, and
1317   leave DEST unchanged.
1318
1319   SRC_FIRST and SRC_REST must be shared buffers; DEST must not be one.
1320   SRC_FIRST must be a string built by get_token.  */
1321static int
1322maybe_expand (struct macro_buffer *dest,
1323              struct macro_buffer *src_first,
1324              struct macro_buffer *src_rest,
1325              struct macro_name_list *no_loop,
1326              macro_lookup_ftype *lookup_func,
1327              void *lookup_baton)
1328{
1329  gdb_assert (src_first->shared);
1330  gdb_assert (src_rest->shared);
1331  gdb_assert (! dest->shared);
1332
1333  /* Is this token an identifier?  */
1334  if (src_first->is_identifier)
1335    {
1336      /* Make a null-terminated copy of it, since that's what our
1337         lookup function expects.  */
1338      char *id = (char *) xmalloc (src_first->len + 1);
1339      struct cleanup *back_to = make_cleanup (xfree, id);
1340
1341      memcpy (id, src_first->text, src_first->len);
1342      id[src_first->len] = 0;
1343
1344      /* If we're currently re-scanning the result of expanding
1345         this macro, don't expand it again.  */
1346      if (! currently_rescanning (no_loop, id))
1347        {
1348          /* Does this identifier have a macro definition in scope?  */
1349          struct macro_definition *def = lookup_func (id, lookup_baton);
1350
1351          if (def && expand (id, def, dest, src_rest, no_loop,
1352                             lookup_func, lookup_baton))
1353            {
1354              do_cleanups (back_to);
1355              return 1;
1356            }
1357        }
1358
1359      do_cleanups (back_to);
1360    }
1361
1362  return 0;
1363}
1364
1365
1366/* Expand macro references in SRC, appending the results to DEST.
1367   Assume we are re-scanning the result of expanding the macros named
1368   in NO_LOOP, and don't try to re-expand references to them.
1369
1370   SRC must be a shared buffer; DEST must not be one.  */
1371static void
1372scan (struct macro_buffer *dest,
1373      struct macro_buffer *src,
1374      struct macro_name_list *no_loop,
1375      macro_lookup_ftype *lookup_func,
1376      void *lookup_baton)
1377{
1378  gdb_assert (src->shared);
1379  gdb_assert (! dest->shared);
1380
1381  for (;;)
1382    {
1383      struct macro_buffer tok;
1384      char *original_src_start = src->text;
1385
1386      /* Find the next token in SRC.  */
1387      if (! get_token (&tok, src))
1388        break;
1389
1390      /* Just for aesthetics.  If we skipped some whitespace, copy
1391         that to DEST.  */
1392      if (tok.text > original_src_start)
1393        {
1394          appendmem (dest, original_src_start, tok.text - original_src_start);
1395          dest->last_token = dest->len;
1396        }
1397
1398      if (! maybe_expand (dest, &tok, src, no_loop, lookup_func, lookup_baton))
1399        /* We didn't end up expanding tok as a macro reference, so
1400           simply append it to dest.  */
1401        append_tokens_without_splicing (dest, &tok);
1402    }
1403
1404  /* Just for aesthetics.  If there was any trailing whitespace in
1405     src, copy it to dest.  */
1406  if (src->len)
1407    {
1408      appendmem (dest, src->text, src->len);
1409      dest->last_token = dest->len;
1410    }
1411}
1412
1413
1414char *
1415macro_expand (const char *source,
1416              macro_lookup_ftype *lookup_func,
1417              void *lookup_func_baton)
1418{
1419  struct macro_buffer src, dest;
1420  struct cleanup *back_to;
1421
1422  init_shared_buffer (&src, source, strlen (source));
1423
1424  init_buffer (&dest, 0);
1425  dest.last_token = 0;
1426  back_to = make_cleanup (cleanup_macro_buffer, &dest);
1427
1428  scan (&dest, &src, 0, lookup_func, lookup_func_baton);
1429
1430  appendc (&dest, '\0');
1431
1432  discard_cleanups (back_to);
1433  return dest.text;
1434}
1435
1436
1437char *
1438macro_expand_once (const char *source,
1439                   macro_lookup_ftype *lookup_func,
1440                   void *lookup_func_baton)
1441{
1442  error (_("Expand-once not implemented yet."));
1443}
1444
1445
1446char *
1447macro_expand_next (const char **lexptr,
1448                   macro_lookup_ftype *lookup_func,
1449                   void *lookup_baton)
1450{
1451  struct macro_buffer src, dest, tok;
1452  struct cleanup *back_to;
1453
1454  /* Set up SRC to refer to the input text, pointed to by *lexptr.  */
1455  init_shared_buffer (&src, *lexptr, strlen (*lexptr));
1456
1457  /* Set up DEST to receive the expansion, if there is one.  */
1458  init_buffer (&dest, 0);
1459  dest.last_token = 0;
1460  back_to = make_cleanup (cleanup_macro_buffer, &dest);
1461
1462  /* Get the text's first preprocessing token.  */
1463  if (! get_token (&tok, &src))
1464    {
1465      do_cleanups (back_to);
1466      return 0;
1467    }
1468
1469  /* If it's a macro invocation, expand it.  */
1470  if (maybe_expand (&dest, &tok, &src, 0, lookup_func, lookup_baton))
1471    {
1472      /* It was a macro invocation!  Package up the expansion as a
1473         null-terminated string and return it.  Set *lexptr to the
1474         start of the next token in the input.  */
1475      appendc (&dest, '\0');
1476      discard_cleanups (back_to);
1477      *lexptr = src.text;
1478      return dest.text;
1479    }
1480  else
1481    {
1482      /* It wasn't a macro invocation.  */
1483      do_cleanups (back_to);
1484      return 0;
1485    }
1486}
1487