macroexp.c revision 1.5
1/* C preprocessor macro expansion for GDB.
2   Copyright (C) 2002-2015 Free Software Foundation, Inc.
3   Contributed by Red Hat, Inc.
4
5   This file is part of GDB.
6
7   This program is free software; you can redistribute it and/or modify
8   it under the terms of the GNU General Public License as published by
9   the Free Software Foundation; either version 3 of the License, or
10   (at your option) any later version.
11
12   This program is distributed in the hope that it will be useful,
13   but WITHOUT ANY WARRANTY; without even the implied warranty of
14   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15   GNU General Public License for more details.
16
17   You should have received a copy of the GNU General Public License
18   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
19
20#include "defs.h"
21#include "gdb_obstack.h"
22#include "bcache.h"
23#include "macrotab.h"
24#include "macroexp.h"
25#include "c-lang.h"
26
27
28
29/* A resizeable, substringable string type.  */
30
31
32/* A string type that we can resize, quickly append to, and use to
33   refer to substrings of other strings.  */
34struct macro_buffer
35{
36  /* An array of characters.  The first LEN bytes are the real text,
37     but there are SIZE bytes allocated to the array.  If SIZE is
38     zero, then this doesn't point to a malloc'ed block.  If SHARED is
39     non-zero, then this buffer is actually a pointer into some larger
40     string, and we shouldn't append characters to it, etc.  Because
41     of sharing, we can't assume in general that the text is
42     null-terminated.  */
43  char *text;
44
45  /* The number of characters in the string.  */
46  int len;
47
48  /* The number of characters allocated to the string.  If SHARED is
49     non-zero, this is meaningless; in this case, we set it to zero so
50     that any "do we have room to append something?" tests will fail,
51     so we don't always have to check SHARED before using this field.  */
52  int size;
53
54  /* Zero if TEXT can be safely realloc'ed (i.e., it's its own malloc
55     block).  Non-zero if TEXT is actually pointing into the middle of
56     some other block, and we shouldn't reallocate it.  */
57  int shared;
58
59  /* For detecting token splicing.
60
61     This is the index in TEXT of the first character of the token
62     that abuts the end of TEXT.  If TEXT contains no tokens, then we
63     set this equal to LEN.  If TEXT ends in whitespace, then there is
64     no token abutting the end of TEXT (it's just whitespace), and
65     again, we set this equal to LEN.  We set this to -1 if we don't
66     know the nature of TEXT.  */
67  int last_token;
68
69  /* If this buffer is holding the result from get_token, then this
70     is non-zero if it is an identifier token, zero otherwise.  */
71  int is_identifier;
72};
73
74
75/* Set the macro buffer *B to the empty string, guessing that its
76   final contents will fit in N bytes.  (It'll get resized if it
77   doesn't, so the guess doesn't have to be right.)  Allocate the
78   initial storage with xmalloc.  */
79static void
80init_buffer (struct macro_buffer *b, int n)
81{
82  b->size = n;
83  if (n > 0)
84    b->text = (char *) xmalloc (n);
85  else
86    b->text = NULL;
87  b->len = 0;
88  b->shared = 0;
89  b->last_token = -1;
90}
91
92
93/* Set the macro buffer *BUF to refer to the LEN bytes at ADDR, as a
94   shared substring.  */
95static void
96init_shared_buffer (struct macro_buffer *buf, char *addr, int len)
97{
98  buf->text = addr;
99  buf->len = len;
100  buf->shared = 1;
101  buf->size = 0;
102  buf->last_token = -1;
103}
104
105
106/* Free the text of the buffer B.  Raise an error if B is shared.  */
107static void
108free_buffer (struct macro_buffer *b)
109{
110  gdb_assert (! b->shared);
111  if (b->size)
112    xfree (b->text);
113}
114
115/* Like free_buffer, but return the text as an xstrdup()d string.
116   This only exists to try to make the API relatively clean.  */
117
118static char *
119free_buffer_return_text (struct macro_buffer *b)
120{
121  gdb_assert (! b->shared);
122  gdb_assert (b->size);
123  /* Nothing to do.  */
124  return b->text;
125}
126
127/* A cleanup function for macro buffers.  */
128static void
129cleanup_macro_buffer (void *untyped_buf)
130{
131  free_buffer ((struct macro_buffer *) untyped_buf);
132}
133
134
135/* Resize the buffer B to be at least N bytes long.  Raise an error if
136   B shouldn't be resized.  */
137static void
138resize_buffer (struct macro_buffer *b, int n)
139{
140  /* We shouldn't be trying to resize shared strings.  */
141  gdb_assert (! b->shared);
142
143  if (b->size == 0)
144    b->size = n;
145  else
146    while (b->size <= n)
147      b->size *= 2;
148
149  b->text = xrealloc (b->text, b->size);
150}
151
152
153/* Append the character C to the buffer B.  */
154static void
155appendc (struct macro_buffer *b, int c)
156{
157  int new_len = b->len + 1;
158
159  if (new_len > b->size)
160    resize_buffer (b, new_len);
161
162  b->text[b->len] = c;
163  b->len = new_len;
164}
165
166
167/* Append the LEN bytes at ADDR to the buffer B.  */
168static void
169appendmem (struct macro_buffer *b, char *addr, int len)
170{
171  int new_len = b->len + len;
172
173  if (new_len > b->size)
174    resize_buffer (b, new_len);
175
176  memcpy (b->text + b->len, addr, len);
177  b->len = new_len;
178}
179
180
181
182/* Recognizing preprocessor tokens.  */
183
184
185int
186macro_is_whitespace (int c)
187{
188  return (c == ' '
189          || c == '\t'
190          || c == '\n'
191          || c == '\v'
192          || c == '\f');
193}
194
195
196int
197macro_is_digit (int c)
198{
199  return ('0' <= c && c <= '9');
200}
201
202
203int
204macro_is_identifier_nondigit (int c)
205{
206  return (c == '_'
207          || ('a' <= c && c <= 'z')
208          || ('A' <= c && c <= 'Z'));
209}
210
211
212static void
213set_token (struct macro_buffer *tok, char *start, char *end)
214{
215  init_shared_buffer (tok, start, end - start);
216  tok->last_token = 0;
217
218  /* Presumed; get_identifier may overwrite this.  */
219  tok->is_identifier = 0;
220}
221
222
223static int
224get_comment (struct macro_buffer *tok, char *p, char *end)
225{
226  if (p + 2 > end)
227    return 0;
228  else if (p[0] == '/'
229           && p[1] == '*')
230    {
231      char *tok_start = p;
232
233      p += 2;
234
235      for (; p < end; p++)
236        if (p + 2 <= end
237            && p[0] == '*'
238            && p[1] == '/')
239          {
240            p += 2;
241            set_token (tok, tok_start, p);
242            return 1;
243          }
244
245      error (_("Unterminated comment in macro expansion."));
246    }
247  else if (p[0] == '/'
248           && p[1] == '/')
249    {
250      char *tok_start = p;
251
252      p += 2;
253      for (; p < end; p++)
254        if (*p == '\n')
255          break;
256
257      set_token (tok, tok_start, p);
258      return 1;
259    }
260  else
261    return 0;
262}
263
264
265static int
266get_identifier (struct macro_buffer *tok, char *p, char *end)
267{
268  if (p < end
269      && macro_is_identifier_nondigit (*p))
270    {
271      char *tok_start = p;
272
273      while (p < end
274             && (macro_is_identifier_nondigit (*p)
275                 || macro_is_digit (*p)))
276        p++;
277
278      set_token (tok, tok_start, p);
279      tok->is_identifier = 1;
280      return 1;
281    }
282  else
283    return 0;
284}
285
286
287static int
288get_pp_number (struct macro_buffer *tok, char *p, char *end)
289{
290  if (p < end
291      && (macro_is_digit (*p)
292          || (*p == '.'
293	      && p + 2 <= end
294	      && macro_is_digit (p[1]))))
295    {
296      char *tok_start = p;
297
298      while (p < end)
299        {
300	  if (p + 2 <= end
301	      && strchr ("eEpP", *p)
302	      && (p[1] == '+' || p[1] == '-'))
303            p += 2;
304          else if (macro_is_digit (*p)
305		   || macro_is_identifier_nondigit (*p)
306		   || *p == '.')
307            p++;
308          else
309            break;
310        }
311
312      set_token (tok, tok_start, p);
313      return 1;
314    }
315  else
316    return 0;
317}
318
319
320
321/* If the text starting at P going up to (but not including) END
322   starts with a character constant, set *TOK to point to that
323   character constant, and return 1.  Otherwise, return zero.
324   Signal an error if it contains a malformed or incomplete character
325   constant.  */
326static int
327get_character_constant (struct macro_buffer *tok, char *p, char *end)
328{
329  /* ISO/IEC 9899:1999 (E)  Section 6.4.4.4  paragraph 1
330     But of course, what really matters is that we handle it the same
331     way GDB's C/C++ lexer does.  So we call parse_escape in utils.c
332     to handle escape sequences.  */
333  if ((p + 1 <= end && *p == '\'')
334      || (p + 2 <= end
335	  && (p[0] == 'L' || p[0] == 'u' || p[0] == 'U')
336	  && p[1] == '\''))
337    {
338      char *tok_start = p;
339      int char_count = 0;
340
341      if (*p == '\'')
342        p++;
343      else if (*p == 'L' || *p == 'u' || *p == 'U')
344        p += 2;
345      else
346        gdb_assert_not_reached ("unexpected character constant");
347
348      for (;;)
349        {
350          if (p >= end)
351            error (_("Unmatched single quote."));
352          else if (*p == '\'')
353            {
354              if (!char_count)
355                error (_("A character constant must contain at least one "
356                       "character."));
357              p++;
358              break;
359            }
360          else if (*p == '\\')
361            {
362	      const char *s, *o;
363
364	      s = o = ++p;
365	      char_count += c_parse_escape (&s, NULL);
366	      p += s - o;
367            }
368          else
369	    {
370	      p++;
371	      char_count++;
372	    }
373        }
374
375      set_token (tok, tok_start, p);
376      return 1;
377    }
378  else
379    return 0;
380}
381
382
383/* If the text starting at P going up to (but not including) END
384   starts with a string literal, set *TOK to point to that string
385   literal, and return 1.  Otherwise, return zero.  Signal an error if
386   it contains a malformed or incomplete string literal.  */
387static int
388get_string_literal (struct macro_buffer *tok, char *p, char *end)
389{
390  if ((p + 1 <= end
391       && *p == '"')
392      || (p + 2 <= end
393          && (p[0] == 'L' || p[0] == 'u' || p[0] == 'U')
394          && p[1] == '"'))
395    {
396      char *tok_start = p;
397
398      if (*p == '"')
399        p++;
400      else if (*p == 'L' || *p == 'u' || *p == 'U')
401        p += 2;
402      else
403        gdb_assert_not_reached ("unexpected string literal");
404
405      for (;;)
406        {
407          if (p >= end)
408            error (_("Unterminated string in expression."));
409          else if (*p == '"')
410            {
411              p++;
412              break;
413            }
414          else if (*p == '\n')
415            error (_("Newline characters may not appear in string "
416                   "constants."));
417          else if (*p == '\\')
418            {
419	      const char *s, *o;
420
421	      s = o = ++p;
422	      c_parse_escape (&s, NULL);
423	      p += s - o;
424            }
425          else
426            p++;
427        }
428
429      set_token (tok, tok_start, p);
430      return 1;
431    }
432  else
433    return 0;
434}
435
436
437static int
438get_punctuator (struct macro_buffer *tok, char *p, char *end)
439{
440  /* Here, speed is much less important than correctness and clarity.  */
441
442  /* ISO/IEC 9899:1999 (E)  Section 6.4.6  Paragraph 1.
443     Note that this table is ordered in a special way.  A punctuator
444     which is a prefix of another punctuator must appear after its
445     "extension".  Otherwise, the wrong token will be returned.  */
446  static const char * const punctuators[] = {
447    "[", "]", "(", ")", "{", "}", "?", ";", ",", "~",
448    "...", ".",
449    "->", "--", "-=", "-",
450    "++", "+=", "+",
451    "*=", "*",
452    "!=", "!",
453    "&&", "&=", "&",
454    "/=", "/",
455    "%>", "%:%:", "%:", "%=", "%",
456    "^=", "^",
457    "##", "#",
458    ":>", ":",
459    "||", "|=", "|",
460    "<<=", "<<", "<=", "<:", "<%", "<",
461    ">>=", ">>", ">=", ">",
462    "==", "=",
463    0
464  };
465
466  int i;
467
468  if (p + 1 <= end)
469    {
470      for (i = 0; punctuators[i]; i++)
471        {
472          const char *punctuator = punctuators[i];
473
474          if (p[0] == punctuator[0])
475            {
476              int len = strlen (punctuator);
477
478              if (p + len <= end
479                  && ! memcmp (p, punctuator, len))
480                {
481                  set_token (tok, p, p + len);
482                  return 1;
483                }
484            }
485        }
486    }
487
488  return 0;
489}
490
491
492/* Peel the next preprocessor token off of SRC, and put it in TOK.
493   Mutate TOK to refer to the first token in SRC, and mutate SRC to
494   refer to the text after that token.  SRC must be a shared buffer;
495   the resulting TOK will be shared, pointing into the same string SRC
496   does.  Initialize TOK's last_token field.  Return non-zero if we
497   succeed, or 0 if we didn't find any more tokens in SRC.  */
498static int
499get_token (struct macro_buffer *tok,
500           struct macro_buffer *src)
501{
502  char *p = src->text;
503  char *end = p + src->len;
504
505  gdb_assert (src->shared);
506
507  /* From the ISO C standard, ISO/IEC 9899:1999 (E), section 6.4:
508
509     preprocessing-token:
510         header-name
511         identifier
512         pp-number
513         character-constant
514         string-literal
515         punctuator
516         each non-white-space character that cannot be one of the above
517
518     We don't have to deal with header-name tokens, since those can
519     only occur after a #include, which we will never see.  */
520
521  while (p < end)
522    if (macro_is_whitespace (*p))
523      p++;
524    else if (get_comment (tok, p, end))
525      p += tok->len;
526    else if (get_pp_number (tok, p, end)
527             || get_character_constant (tok, p, end)
528             || get_string_literal (tok, p, end)
529             /* Note: the grammar in the standard seems to be
530                ambiguous: L'x' can be either a wide character
531                constant, or an identifier followed by a normal
532                character constant.  By trying `get_identifier' after
533                we try get_character_constant and get_string_literal,
534                we give the wide character syntax precedence.  Now,
535                since GDB doesn't handle wide character constants
536                anyway, is this the right thing to do?  */
537             || get_identifier (tok, p, end)
538             || get_punctuator (tok, p, end))
539      {
540        /* How many characters did we consume, including whitespace?  */
541        int consumed = p - src->text + tok->len;
542
543        src->text += consumed;
544        src->len -= consumed;
545        return 1;
546      }
547    else
548      {
549        /* We have found a "non-whitespace character that cannot be
550           one of the above."  Make a token out of it.  */
551        int consumed;
552
553        set_token (tok, p, p + 1);
554        consumed = p - src->text + tok->len;
555        src->text += consumed;
556        src->len -= consumed;
557        return 1;
558      }
559
560  return 0;
561}
562
563
564
565/* Appending token strings, with and without splicing  */
566
567
568/* Append the macro buffer SRC to the end of DEST, and ensure that
569   doing so doesn't splice the token at the end of SRC with the token
570   at the beginning of DEST.  SRC and DEST must have their last_token
571   fields set.  Upon return, DEST's last_token field is set correctly.
572
573   For example:
574
575   If DEST is "(" and SRC is "y", then we can return with
576   DEST set to "(y" --- we've simply appended the two buffers.
577
578   However, if DEST is "x" and SRC is "y", then we must not return
579   with DEST set to "xy" --- that would splice the two tokens "x" and
580   "y" together to make a single token "xy".  However, it would be
581   fine to return with DEST set to "x y".  Similarly, "<" and "<" must
582   yield "< <", not "<<", etc.  */
583static void
584append_tokens_without_splicing (struct macro_buffer *dest,
585                                struct macro_buffer *src)
586{
587  int original_dest_len = dest->len;
588  struct macro_buffer dest_tail, new_token;
589
590  gdb_assert (src->last_token != -1);
591  gdb_assert (dest->last_token != -1);
592
593  /* First, just try appending the two, and call get_token to see if
594     we got a splice.  */
595  appendmem (dest, src->text, src->len);
596
597  /* If DEST originally had no token abutting its end, then we can't
598     have spliced anything, so we're done.  */
599  if (dest->last_token == original_dest_len)
600    {
601      dest->last_token = original_dest_len + src->last_token;
602      return;
603    }
604
605  /* Set DEST_TAIL to point to the last token in DEST, followed by
606     all the stuff we just appended.  */
607  init_shared_buffer (&dest_tail,
608                      dest->text + dest->last_token,
609                      dest->len - dest->last_token);
610
611  /* Re-parse DEST's last token.  We know that DEST used to contain
612     at least one token, so if it doesn't contain any after the
613     append, then we must have spliced "/" and "*" or "/" and "/" to
614     make a comment start.  (Just for the record, I got this right
615     the first time.  This is not a bug fix.)  */
616  if (get_token (&new_token, &dest_tail)
617      && (new_token.text + new_token.len
618          == dest->text + original_dest_len))
619    {
620      /* No splice, so we're done.  */
621      dest->last_token = original_dest_len + src->last_token;
622      return;
623    }
624
625  /* Okay, a simple append caused a splice.  Let's chop dest back to
626     its original length and try again, but separate the texts with a
627     space.  */
628  dest->len = original_dest_len;
629  appendc (dest, ' ');
630  appendmem (dest, src->text, src->len);
631
632  init_shared_buffer (&dest_tail,
633                      dest->text + dest->last_token,
634                      dest->len - dest->last_token);
635
636  /* Try to re-parse DEST's last token, as above.  */
637  if (get_token (&new_token, &dest_tail)
638      && (new_token.text + new_token.len
639          == dest->text + original_dest_len))
640    {
641      /* No splice, so we're done.  */
642      dest->last_token = original_dest_len + 1 + src->last_token;
643      return;
644    }
645
646  /* As far as I know, there's no case where inserting a space isn't
647     enough to prevent a splice.  */
648  internal_error (__FILE__, __LINE__,
649                  _("unable to avoid splicing tokens during macro expansion"));
650}
651
652/* Stringify an argument, and insert it into DEST.  ARG is the text to
653   stringify; it is LEN bytes long.  */
654
655static void
656stringify (struct macro_buffer *dest, const char *arg, int len)
657{
658  /* Trim initial whitespace from ARG.  */
659  while (len > 0 && macro_is_whitespace (*arg))
660    {
661      ++arg;
662      --len;
663    }
664
665  /* Trim trailing whitespace from ARG.  */
666  while (len > 0 && macro_is_whitespace (arg[len - 1]))
667    --len;
668
669  /* Insert the string.  */
670  appendc (dest, '"');
671  while (len > 0)
672    {
673      /* We could try to handle strange cases here, like control
674	 characters, but there doesn't seem to be much point.  */
675      if (macro_is_whitespace (*arg))
676	{
677	  /* Replace a sequence of whitespace with a single space.  */
678	  appendc (dest, ' ');
679	  while (len > 1 && macro_is_whitespace (arg[1]))
680	    {
681	      ++arg;
682	      --len;
683	    }
684	}
685      else if (*arg == '\\' || *arg == '"')
686	{
687	  appendc (dest, '\\');
688	  appendc (dest, *arg);
689	}
690      else
691	appendc (dest, *arg);
692      ++arg;
693      --len;
694    }
695  appendc (dest, '"');
696  dest->last_token = dest->len;
697}
698
699/* See macroexp.h.  */
700
701char *
702macro_stringify (const char *str)
703{
704  struct macro_buffer buffer;
705  int len = strlen (str);
706
707  init_buffer (&buffer, len);
708  stringify (&buffer, str, len);
709  appendc (&buffer, '\0');
710
711  return free_buffer_return_text (&buffer);
712}
713
714
715/* Expanding macros!  */
716
717
718/* A singly-linked list of the names of the macros we are currently
719   expanding --- for detecting expansion loops.  */
720struct macro_name_list {
721  const char *name;
722  struct macro_name_list *next;
723};
724
725
726/* Return non-zero if we are currently expanding the macro named NAME,
727   according to LIST; otherwise, return zero.
728
729   You know, it would be possible to get rid of all the NO_LOOP
730   arguments to these functions by simply generating a new lookup
731   function and baton which refuses to find the definition for a
732   particular macro, and otherwise delegates the decision to another
733   function/baton pair.  But that makes the linked list of excluded
734   macros chained through untyped baton pointers, which will make it
735   harder to debug.  :(  */
736static int
737currently_rescanning (struct macro_name_list *list, const char *name)
738{
739  for (; list; list = list->next)
740    if (strcmp (name, list->name) == 0)
741      return 1;
742
743  return 0;
744}
745
746
747/* Gather the arguments to a macro expansion.
748
749   NAME is the name of the macro being invoked.  (It's only used for
750   printing error messages.)
751
752   Assume that SRC is the text of the macro invocation immediately
753   following the macro name.  For example, if we're processing the
754   text foo(bar, baz), then NAME would be foo and SRC will be (bar,
755   baz).
756
757   If SRC doesn't start with an open paren ( token at all, return
758   zero, leave SRC unchanged, and don't set *ARGC_P to anything.
759
760   If SRC doesn't contain a properly terminated argument list, then
761   raise an error.
762
763   For a variadic macro, NARGS holds the number of formal arguments to
764   the macro.  For a GNU-style variadic macro, this should be the
765   number of named arguments.  For a non-variadic macro, NARGS should
766   be -1.
767
768   Otherwise, return a pointer to the first element of an array of
769   macro buffers referring to the argument texts, and set *ARGC_P to
770   the number of arguments we found --- the number of elements in the
771   array.  The macro buffers share their text with SRC, and their
772   last_token fields are initialized.  The array is allocated with
773   xmalloc, and the caller is responsible for freeing it.
774
775   NOTE WELL: if SRC starts with a open paren ( token followed
776   immediately by a close paren ) token (e.g., the invocation looks
777   like "foo()"), we treat that as one argument, which happens to be
778   the empty list of tokens.  The caller should keep in mind that such
779   a sequence of tokens is a valid way to invoke one-parameter
780   function-like macros, but also a valid way to invoke zero-parameter
781   function-like macros.  Eeew.
782
783   Consume the tokens from SRC; after this call, SRC contains the text
784   following the invocation.  */
785
786static struct macro_buffer *
787gather_arguments (const char *name, struct macro_buffer *src,
788		  int nargs, int *argc_p)
789{
790  struct macro_buffer tok;
791  int args_len, args_size;
792  struct macro_buffer *args = NULL;
793  struct cleanup *back_to = make_cleanup (free_current_contents, &args);
794
795  /* Does SRC start with an opening paren token?  Read from a copy of
796     SRC, so SRC itself is unaffected if we don't find an opening
797     paren.  */
798  {
799    struct macro_buffer temp;
800
801    init_shared_buffer (&temp, src->text, src->len);
802
803    if (! get_token (&tok, &temp)
804        || tok.len != 1
805        || tok.text[0] != '(')
806      {
807        discard_cleanups (back_to);
808        return 0;
809      }
810  }
811
812  /* Consume SRC's opening paren.  */
813  get_token (&tok, src);
814
815  args_len = 0;
816  args_size = 6;
817  args = (struct macro_buffer *) xmalloc (sizeof (*args) * args_size);
818
819  for (;;)
820    {
821      struct macro_buffer *arg;
822      int depth;
823
824      /* Make sure we have room for the next argument.  */
825      if (args_len >= args_size)
826        {
827          args_size *= 2;
828          args = xrealloc (args, sizeof (*args) * args_size);
829        }
830
831      /* Initialize the next argument.  */
832      arg = &args[args_len++];
833      set_token (arg, src->text, src->text);
834
835      /* Gather the argument's tokens.  */
836      depth = 0;
837      for (;;)
838        {
839          if (! get_token (&tok, src))
840            error (_("Malformed argument list for macro `%s'."), name);
841
842          /* Is tok an opening paren?  */
843          if (tok.len == 1 && tok.text[0] == '(')
844            depth++;
845
846          /* Is tok is a closing paren?  */
847          else if (tok.len == 1 && tok.text[0] == ')')
848            {
849              /* If it's a closing paren at the top level, then that's
850                 the end of the argument list.  */
851              if (depth == 0)
852                {
853		  /* In the varargs case, the last argument may be
854		     missing.  Add an empty argument in this case.  */
855		  if (nargs != -1 && args_len == nargs - 1)
856		    {
857		      /* Make sure we have room for the argument.  */
858		      if (args_len >= args_size)
859			{
860			  args_size++;
861			  args = xrealloc (args, sizeof (*args) * args_size);
862			}
863		      arg = &args[args_len++];
864		      set_token (arg, src->text, src->text);
865		    }
866
867                  discard_cleanups (back_to);
868                  *argc_p = args_len;
869                  return args;
870                }
871
872              depth--;
873            }
874
875          /* If tok is a comma at top level, then that's the end of
876             the current argument.  However, if we are handling a
877             variadic macro and we are computing the last argument, we
878             want to include the comma and remaining tokens.  */
879          else if (tok.len == 1 && tok.text[0] == ',' && depth == 0
880		   && (nargs == -1 || args_len < nargs))
881            break;
882
883          /* Extend the current argument to enclose this token.  If
884             this is the current argument's first token, leave out any
885             leading whitespace, just for aesthetics.  */
886          if (arg->len == 0)
887            {
888              arg->text = tok.text;
889              arg->len = tok.len;
890              arg->last_token = 0;
891            }
892          else
893            {
894              arg->len = (tok.text + tok.len) - arg->text;
895              arg->last_token = tok.text - arg->text;
896            }
897        }
898    }
899}
900
901
902/* The `expand' and `substitute_args' functions both invoke `scan'
903   recursively, so we need a forward declaration somewhere.  */
904static void scan (struct macro_buffer *dest,
905                  struct macro_buffer *src,
906                  struct macro_name_list *no_loop,
907                  macro_lookup_ftype *lookup_func,
908                  void *lookup_baton);
909
910
911/* A helper function for substitute_args.
912
913   ARGV is a vector of all the arguments; ARGC is the number of
914   arguments.  IS_VARARGS is true if the macro being substituted is a
915   varargs macro; in this case VA_ARG_NAME is the name of the
916   "variable" argument.  VA_ARG_NAME is ignored if IS_VARARGS is
917   false.
918
919   If the token TOK is the name of a parameter, return the parameter's
920   index.  If TOK is not an argument, return -1.  */
921
922static int
923find_parameter (const struct macro_buffer *tok,
924		int is_varargs, const struct macro_buffer *va_arg_name,
925		int argc, const char * const *argv)
926{
927  int i;
928
929  if (! tok->is_identifier)
930    return -1;
931
932  for (i = 0; i < argc; ++i)
933    if (tok->len == strlen (argv[i])
934	&& !memcmp (tok->text, argv[i], tok->len))
935      return i;
936
937  if (is_varargs && tok->len == va_arg_name->len
938      && ! memcmp (tok->text, va_arg_name->text, tok->len))
939    return argc - 1;
940
941  return -1;
942}
943
944/* Given the macro definition DEF, being invoked with the actual
945   arguments given by ARGC and ARGV, substitute the arguments into the
946   replacement list, and store the result in DEST.
947
948   IS_VARARGS should be true if DEF is a varargs macro.  In this case,
949   VA_ARG_NAME should be the name of the "variable" argument -- either
950   __VA_ARGS__ for c99-style varargs, or the final argument name, for
951   GNU-style varargs.  If IS_VARARGS is false, this parameter is
952   ignored.
953
954   If it is necessary to expand macro invocations in one of the
955   arguments, use LOOKUP_FUNC and LOOKUP_BATON to find the macro
956   definitions, and don't expand invocations of the macros listed in
957   NO_LOOP.  */
958
959static void
960substitute_args (struct macro_buffer *dest,
961                 struct macro_definition *def,
962		 int is_varargs, const struct macro_buffer *va_arg_name,
963                 int argc, struct macro_buffer *argv,
964                 struct macro_name_list *no_loop,
965                 macro_lookup_ftype *lookup_func,
966                 void *lookup_baton)
967{
968  /* A macro buffer for the macro's replacement list.  */
969  struct macro_buffer replacement_list;
970  /* The token we are currently considering.  */
971  struct macro_buffer tok;
972  /* The replacement list's pointer from just before TOK was lexed.  */
973  char *original_rl_start;
974  /* We have a single lookahead token to handle token splicing.  */
975  struct macro_buffer lookahead;
976  /* The lookahead token might not be valid.  */
977  int lookahead_valid;
978  /* The replacement list's pointer from just before LOOKAHEAD was
979     lexed.  */
980  char *lookahead_rl_start;
981
982  init_shared_buffer (&replacement_list, (char *) def->replacement,
983                      strlen (def->replacement));
984
985  gdb_assert (dest->len == 0);
986  dest->last_token = 0;
987
988  original_rl_start = replacement_list.text;
989  if (! get_token (&tok, &replacement_list))
990    return;
991  lookahead_rl_start = replacement_list.text;
992  lookahead_valid = get_token (&lookahead, &replacement_list);
993
994  for (;;)
995    {
996      /* Just for aesthetics.  If we skipped some whitespace, copy
997         that to DEST.  */
998      if (tok.text > original_rl_start)
999        {
1000          appendmem (dest, original_rl_start, tok.text - original_rl_start);
1001          dest->last_token = dest->len;
1002        }
1003
1004      /* Is this token the stringification operator?  */
1005      if (tok.len == 1
1006          && tok.text[0] == '#')
1007	{
1008	  int arg;
1009
1010	  if (!lookahead_valid)
1011	    error (_("Stringification operator requires an argument."));
1012
1013	  arg = find_parameter (&lookahead, is_varargs, va_arg_name,
1014				def->argc, def->argv);
1015	  if (arg == -1)
1016	    error (_("Argument to stringification operator must name "
1017		     "a macro parameter."));
1018
1019	  stringify (dest, argv[arg].text, argv[arg].len);
1020
1021	  /* Read one token and let the loop iteration code handle the
1022	     rest.  */
1023	  lookahead_rl_start = replacement_list.text;
1024	  lookahead_valid = get_token (&lookahead, &replacement_list);
1025	}
1026      /* Is this token the splicing operator?  */
1027      else if (tok.len == 2
1028	       && tok.text[0] == '#'
1029	       && tok.text[1] == '#')
1030	error (_("Stray splicing operator"));
1031      /* Is the next token the splicing operator?  */
1032      else if (lookahead_valid
1033	       && lookahead.len == 2
1034	       && lookahead.text[0] == '#'
1035	       && lookahead.text[1] == '#')
1036	{
1037	  int finished = 0;
1038	  int prev_was_comma = 0;
1039
1040	  /* Note that GCC warns if the result of splicing is not a
1041	     token.  In the debugger there doesn't seem to be much
1042	     benefit from doing this.  */
1043
1044	  /* Insert the first token.  */
1045	  if (tok.len == 1 && tok.text[0] == ',')
1046	    prev_was_comma = 1;
1047	  else
1048	    {
1049	      int arg = find_parameter (&tok, is_varargs, va_arg_name,
1050					def->argc, def->argv);
1051
1052	      if (arg != -1)
1053		appendmem (dest, argv[arg].text, argv[arg].len);
1054	      else
1055		appendmem (dest, tok.text, tok.len);
1056	    }
1057
1058	  /* Apply a possible sequence of ## operators.  */
1059	  for (;;)
1060	    {
1061	      if (! get_token (&tok, &replacement_list))
1062		error (_("Splicing operator at end of macro"));
1063
1064	      /* Handle a comma before a ##.  If we are handling
1065		 varargs, and the token on the right hand side is the
1066		 varargs marker, and the final argument is empty or
1067		 missing, then drop the comma.  This is a GNU
1068		 extension.  There is one ambiguous case here,
1069		 involving pedantic behavior with an empty argument,
1070		 but we settle that in favor of GNU-style (GCC uses an
1071		 option).  If we aren't dealing with varargs, we
1072		 simply insert the comma.  */
1073	      if (prev_was_comma)
1074		{
1075		  if (! (is_varargs
1076			 && tok.len == va_arg_name->len
1077			 && !memcmp (tok.text, va_arg_name->text, tok.len)
1078			 && argv[argc - 1].len == 0))
1079		    appendmem (dest, ",", 1);
1080		  prev_was_comma = 0;
1081		}
1082
1083	      /* Insert the token.  If it is a parameter, insert the
1084		 argument.  If it is a comma, treat it specially.  */
1085	      if (tok.len == 1 && tok.text[0] == ',')
1086		prev_was_comma = 1;
1087	      else
1088		{
1089		  int arg = find_parameter (&tok, is_varargs, va_arg_name,
1090					    def->argc, def->argv);
1091
1092		  if (arg != -1)
1093		    appendmem (dest, argv[arg].text, argv[arg].len);
1094		  else
1095		    appendmem (dest, tok.text, tok.len);
1096		}
1097
1098	      /* Now read another token.  If it is another splice, we
1099		 loop.  */
1100	      original_rl_start = replacement_list.text;
1101	      if (! get_token (&tok, &replacement_list))
1102		{
1103		  finished = 1;
1104		  break;
1105		}
1106
1107	      if (! (tok.len == 2
1108		     && tok.text[0] == '#'
1109		     && tok.text[1] == '#'))
1110		break;
1111	    }
1112
1113	  if (prev_was_comma)
1114	    {
1115	      /* We saw a comma.  Insert it now.  */
1116	      appendmem (dest, ",", 1);
1117	    }
1118
1119          dest->last_token = dest->len;
1120	  if (finished)
1121	    lookahead_valid = 0;
1122	  else
1123	    {
1124	      /* Set up for the loop iterator.  */
1125	      lookahead = tok;
1126	      lookahead_rl_start = original_rl_start;
1127	      lookahead_valid = 1;
1128	    }
1129	}
1130      else
1131	{
1132	  /* Is this token an identifier?  */
1133	  int substituted = 0;
1134	  int arg = find_parameter (&tok, is_varargs, va_arg_name,
1135				    def->argc, def->argv);
1136
1137	  if (arg != -1)
1138	    {
1139	      struct macro_buffer arg_src;
1140
1141	      /* Expand any macro invocations in the argument text,
1142		 and append the result to dest.  Remember that scan
1143		 mutates its source, so we need to scan a new buffer
1144		 referring to the argument's text, not the argument
1145		 itself.  */
1146	      init_shared_buffer (&arg_src, argv[arg].text, argv[arg].len);
1147	      scan (dest, &arg_src, no_loop, lookup_func, lookup_baton);
1148	      substituted = 1;
1149	    }
1150
1151	  /* If it wasn't a parameter, then just copy it across.  */
1152	  if (! substituted)
1153	    append_tokens_without_splicing (dest, &tok);
1154	}
1155
1156      if (! lookahead_valid)
1157	break;
1158
1159      tok = lookahead;
1160      original_rl_start = lookahead_rl_start;
1161
1162      lookahead_rl_start = replacement_list.text;
1163      lookahead_valid = get_token (&lookahead, &replacement_list);
1164    }
1165}
1166
1167
1168/* Expand a call to a macro named ID, whose definition is DEF.  Append
1169   its expansion to DEST.  SRC is the input text following the ID
1170   token.  We are currently rescanning the expansions of the macros
1171   named in NO_LOOP; don't re-expand them.  Use LOOKUP_FUNC and
1172   LOOKUP_BATON to find definitions for any nested macro references.
1173
1174   Return 1 if we decided to expand it, zero otherwise.  (If it's a
1175   function-like macro name that isn't followed by an argument list,
1176   we don't expand it.)  If we return zero, leave SRC unchanged.  */
1177static int
1178expand (const char *id,
1179        struct macro_definition *def,
1180        struct macro_buffer *dest,
1181        struct macro_buffer *src,
1182        struct macro_name_list *no_loop,
1183        macro_lookup_ftype *lookup_func,
1184        void *lookup_baton)
1185{
1186  struct macro_name_list new_no_loop;
1187
1188  /* Create a new node to be added to the front of the no-expand list.
1189     This list is appropriate for re-scanning replacement lists, but
1190     it is *not* appropriate for scanning macro arguments; invocations
1191     of the macro whose arguments we are gathering *do* get expanded
1192     there.  */
1193  new_no_loop.name = id;
1194  new_no_loop.next = no_loop;
1195
1196  /* What kind of macro are we expanding?  */
1197  if (def->kind == macro_object_like)
1198    {
1199      struct macro_buffer replacement_list;
1200
1201      init_shared_buffer (&replacement_list, (char *) def->replacement,
1202                          strlen (def->replacement));
1203
1204      scan (dest, &replacement_list, &new_no_loop, lookup_func, lookup_baton);
1205      return 1;
1206    }
1207  else if (def->kind == macro_function_like)
1208    {
1209      struct cleanup *back_to = make_cleanup (null_cleanup, 0);
1210      int argc = 0;
1211      struct macro_buffer *argv = NULL;
1212      struct macro_buffer substituted;
1213      struct macro_buffer substituted_src;
1214      struct macro_buffer va_arg_name = {0};
1215      int is_varargs = 0;
1216
1217      if (def->argc >= 1)
1218	{
1219	  if (strcmp (def->argv[def->argc - 1], "...") == 0)
1220	    {
1221	      /* In C99-style varargs, substitution is done using
1222		 __VA_ARGS__.  */
1223	      init_shared_buffer (&va_arg_name, "__VA_ARGS__",
1224				  strlen ("__VA_ARGS__"));
1225	      is_varargs = 1;
1226	    }
1227	  else
1228	    {
1229	      int len = strlen (def->argv[def->argc - 1]);
1230
1231	      if (len > 3
1232		  && strcmp (def->argv[def->argc - 1] + len - 3, "...") == 0)
1233		{
1234		  /* In GNU-style varargs, the name of the
1235		     substitution parameter is the name of the formal
1236		     argument without the "...".  */
1237		  init_shared_buffer (&va_arg_name,
1238				      (char *) def->argv[def->argc - 1],
1239				      len - 3);
1240		  is_varargs = 1;
1241		}
1242	    }
1243	}
1244
1245      make_cleanup (free_current_contents, &argv);
1246      argv = gather_arguments (id, src, is_varargs ? def->argc : -1,
1247			       &argc);
1248
1249      /* If we couldn't find any argument list, then we don't expand
1250         this macro.  */
1251      if (! argv)
1252        {
1253          do_cleanups (back_to);
1254          return 0;
1255        }
1256
1257      /* Check that we're passing an acceptable number of arguments for
1258         this macro.  */
1259      if (argc != def->argc)
1260        {
1261	  if (is_varargs && argc >= def->argc - 1)
1262	    {
1263	      /* Ok.  */
1264	    }
1265          /* Remember that a sequence of tokens like "foo()" is a
1266             valid invocation of a macro expecting either zero or one
1267             arguments.  */
1268          else if (! (argc == 1
1269		      && argv[0].len == 0
1270		      && def->argc == 0))
1271            error (_("Wrong number of arguments to macro `%s' "
1272                   "(expected %d, got %d)."),
1273                   id, def->argc, argc);
1274        }
1275
1276      /* Note that we don't expand macro invocations in the arguments
1277         yet --- we let subst_args take care of that.  Parameters that
1278         appear as operands of the stringifying operator "#" or the
1279         splicing operator "##" don't get macro references expanded,
1280         so we can't really tell whether it's appropriate to macro-
1281         expand an argument until we see how it's being used.  */
1282      init_buffer (&substituted, 0);
1283      make_cleanup (cleanup_macro_buffer, &substituted);
1284      substitute_args (&substituted, def, is_varargs, &va_arg_name,
1285		       argc, argv, no_loop, lookup_func, lookup_baton);
1286
1287      /* Now `substituted' is the macro's replacement list, with all
1288         argument values substituted into it properly.  Re-scan it for
1289         macro references, but don't expand invocations of this macro.
1290
1291         We create a new buffer, `substituted_src', which points into
1292         `substituted', and scan that.  We can't scan `substituted'
1293         itself, since the tokenization process moves the buffer's
1294         text pointer around, and we still need to be able to find
1295         `substituted's original text buffer after scanning it so we
1296         can free it.  */
1297      init_shared_buffer (&substituted_src, substituted.text, substituted.len);
1298      scan (dest, &substituted_src, &new_no_loop, lookup_func, lookup_baton);
1299
1300      do_cleanups (back_to);
1301
1302      return 1;
1303    }
1304  else
1305    internal_error (__FILE__, __LINE__, _("bad macro definition kind"));
1306}
1307
1308
1309/* If the single token in SRC_FIRST followed by the tokens in SRC_REST
1310   constitute a macro invokation not forbidden in NO_LOOP, append its
1311   expansion to DEST and return non-zero.  Otherwise, return zero, and
1312   leave DEST unchanged.
1313
1314   SRC_FIRST and SRC_REST must be shared buffers; DEST must not be one.
1315   SRC_FIRST must be a string built by get_token.  */
1316static int
1317maybe_expand (struct macro_buffer *dest,
1318              struct macro_buffer *src_first,
1319              struct macro_buffer *src_rest,
1320              struct macro_name_list *no_loop,
1321              macro_lookup_ftype *lookup_func,
1322              void *lookup_baton)
1323{
1324  gdb_assert (src_first->shared);
1325  gdb_assert (src_rest->shared);
1326  gdb_assert (! dest->shared);
1327
1328  /* Is this token an identifier?  */
1329  if (src_first->is_identifier)
1330    {
1331      /* Make a null-terminated copy of it, since that's what our
1332         lookup function expects.  */
1333      char *id = xmalloc (src_first->len + 1);
1334      struct cleanup *back_to = make_cleanup (xfree, id);
1335
1336      memcpy (id, src_first->text, src_first->len);
1337      id[src_first->len] = 0;
1338
1339      /* If we're currently re-scanning the result of expanding
1340         this macro, don't expand it again.  */
1341      if (! currently_rescanning (no_loop, id))
1342        {
1343          /* Does this identifier have a macro definition in scope?  */
1344          struct macro_definition *def = lookup_func (id, lookup_baton);
1345
1346          if (def && expand (id, def, dest, src_rest, no_loop,
1347                             lookup_func, lookup_baton))
1348            {
1349              do_cleanups (back_to);
1350              return 1;
1351            }
1352        }
1353
1354      do_cleanups (back_to);
1355    }
1356
1357  return 0;
1358}
1359
1360
1361/* Expand macro references in SRC, appending the results to DEST.
1362   Assume we are re-scanning the result of expanding the macros named
1363   in NO_LOOP, and don't try to re-expand references to them.
1364
1365   SRC must be a shared buffer; DEST must not be one.  */
1366static void
1367scan (struct macro_buffer *dest,
1368      struct macro_buffer *src,
1369      struct macro_name_list *no_loop,
1370      macro_lookup_ftype *lookup_func,
1371      void *lookup_baton)
1372{
1373  gdb_assert (src->shared);
1374  gdb_assert (! dest->shared);
1375
1376  for (;;)
1377    {
1378      struct macro_buffer tok;
1379      char *original_src_start = src->text;
1380
1381      /* Find the next token in SRC.  */
1382      if (! get_token (&tok, src))
1383        break;
1384
1385      /* Just for aesthetics.  If we skipped some whitespace, copy
1386         that to DEST.  */
1387      if (tok.text > original_src_start)
1388        {
1389          appendmem (dest, original_src_start, tok.text - original_src_start);
1390          dest->last_token = dest->len;
1391        }
1392
1393      if (! maybe_expand (dest, &tok, src, no_loop, lookup_func, lookup_baton))
1394        /* We didn't end up expanding tok as a macro reference, so
1395           simply append it to dest.  */
1396        append_tokens_without_splicing (dest, &tok);
1397    }
1398
1399  /* Just for aesthetics.  If there was any trailing whitespace in
1400     src, copy it to dest.  */
1401  if (src->len)
1402    {
1403      appendmem (dest, src->text, src->len);
1404      dest->last_token = dest->len;
1405    }
1406}
1407
1408
1409char *
1410macro_expand (const char *source,
1411              macro_lookup_ftype *lookup_func,
1412              void *lookup_func_baton)
1413{
1414  struct macro_buffer src, dest;
1415  struct cleanup *back_to;
1416
1417  init_shared_buffer (&src, (char *) source, strlen (source));
1418
1419  init_buffer (&dest, 0);
1420  dest.last_token = 0;
1421  back_to = make_cleanup (cleanup_macro_buffer, &dest);
1422
1423  scan (&dest, &src, 0, lookup_func, lookup_func_baton);
1424
1425  appendc (&dest, '\0');
1426
1427  discard_cleanups (back_to);
1428  return dest.text;
1429}
1430
1431
1432char *
1433macro_expand_once (const char *source,
1434                   macro_lookup_ftype *lookup_func,
1435                   void *lookup_func_baton)
1436{
1437  error (_("Expand-once not implemented yet."));
1438}
1439
1440
1441char *
1442macro_expand_next (const char **lexptr,
1443                   macro_lookup_ftype *lookup_func,
1444                   void *lookup_baton)
1445{
1446  struct macro_buffer src, dest, tok;
1447  struct cleanup *back_to;
1448
1449  /* Set up SRC to refer to the input text, pointed to by *lexptr.  */
1450  init_shared_buffer (&src, (char *) *lexptr, strlen (*lexptr));
1451
1452  /* Set up DEST to receive the expansion, if there is one.  */
1453  init_buffer (&dest, 0);
1454  dest.last_token = 0;
1455  back_to = make_cleanup (cleanup_macro_buffer, &dest);
1456
1457  /* Get the text's first preprocessing token.  */
1458  if (! get_token (&tok, &src))
1459    {
1460      do_cleanups (back_to);
1461      return 0;
1462    }
1463
1464  /* If it's a macro invocation, expand it.  */
1465  if (maybe_expand (&dest, &tok, &src, 0, lookup_func, lookup_baton))
1466    {
1467      /* It was a macro invocation!  Package up the expansion as a
1468         null-terminated string and return it.  Set *lexptr to the
1469         start of the next token in the input.  */
1470      appendc (&dest, '\0');
1471      discard_cleanups (back_to);
1472      *lexptr = src.text;
1473      return dest.text;
1474    }
1475  else
1476    {
1477      /* It wasn't a macro invocation.  */
1478      do_cleanups (back_to);
1479      return 0;
1480    }
1481}
1482