1/* CPP Library - traditional lexical analysis and macro expansion.
2   Copyright (C) 2002-2020 Free Software Foundation, Inc.
3   Contributed by Neil Booth, May 2002
4
5This program is free software; you can redistribute it and/or modify it
6under the terms of the GNU General Public License as published by the
7Free Software Foundation; either version 3, or (at your option) any
8later version.
9
10This program is distributed in the hope that it will be useful,
11but WITHOUT ANY WARRANTY; without even the implied warranty of
12MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13GNU General Public License for more details.
14
15You should have received a copy of the GNU General Public License
16along with this program; see the file COPYING3.  If not see
17<http://www.gnu.org/licenses/>.  */
18
19#include "config.h"
20#include "system.h"
21#include "cpplib.h"
22#include "internal.h"
23
24/* The replacement text of a function-like macro is stored as a
25   contiguous sequence of aligned blocks, each representing the text
26   between subsequent parameters.
27
28   Each block comprises the text between its surrounding parameters,
29   the length of that text, and the one-based index of the following
30   parameter.  The final block in the replacement text is easily
31   recognizable as it has an argument index of zero.  */
32
33struct block
34{
35  unsigned int text_len;
36  unsigned short arg_index;
37  uchar text[1];
38};
39
40#define BLOCK_HEADER_LEN offsetof (struct block, text)
41#define BLOCK_LEN(TEXT_LEN) CPP_ALIGN (BLOCK_HEADER_LEN + (TEXT_LEN))
42
43/* Structure holding information about a function-like macro
44   invocation.  */
45struct fun_macro
46{
47  /* Memory buffer holding the trad_arg array.  */
48  _cpp_buff *buff;
49
50  /* An array of size the number of macro parameters + 1, containing
51     the offsets of the start of each macro argument in the output
52     buffer.  The argument continues until the character before the
53     start of the next one.  */
54  size_t *args;
55
56  /* The hashnode of the macro.  */
57  cpp_hashnode *node;
58
59  /* The offset of the macro name in the output buffer.  */
60  size_t offset;
61
62  /* The line the macro name appeared on.  */
63  location_t line;
64
65  /* Number of parameters.  */
66  unsigned int paramc;
67
68  /* Zero-based index of argument being currently lexed.  */
69  unsigned int argc;
70};
71
72/* Lexing state.  It is mostly used to prevent macro expansion.  */
73enum ls {ls_none = 0,		/* Normal state.  */
74	 ls_fun_open,		/* When looking for '('.  */
75	 ls_fun_close,		/* When looking for ')'.  */
76	 ls_defined,		/* After defined.  */
77	 ls_defined_close,	/* Looking for ')' of defined().  */
78	 ls_hash,		/* After # in preprocessor conditional.  */
79	 ls_predicate,		/* After the predicate, maybe paren?  */
80	 ls_answer		/* In answer to predicate.  */
81};
82
83/* Lexing TODO: Maybe handle space in escaped newlines.  Stop lex.c
84   from recognizing comments and directives during its lexing pass.  */
85
86static const uchar *skip_whitespace (cpp_reader *, const uchar *, int);
87static cpp_hashnode *lex_identifier (cpp_reader *, const uchar *);
88static const uchar *copy_comment (cpp_reader *, const uchar *, int);
89static void check_output_buffer (cpp_reader *, size_t);
90static void push_replacement_text (cpp_reader *, cpp_hashnode *);
91static bool scan_parameters (cpp_reader *, unsigned *);
92static bool recursive_macro (cpp_reader *, cpp_hashnode *);
93static void save_replacement_text (cpp_reader *, cpp_macro *, unsigned int);
94static void maybe_start_funlike (cpp_reader *, cpp_hashnode *, const uchar *,
95				 struct fun_macro *);
96static void save_argument (struct fun_macro *, size_t);
97static void replace_args_and_push (cpp_reader *, struct fun_macro *);
98static size_t canonicalize_text (uchar *, const uchar *, size_t, uchar *);
99
100/* Ensures we have N bytes' space in the output buffer, and
101   reallocates it if not.  */
102static void
103check_output_buffer (cpp_reader *pfile, size_t n)
104{
105  /* We might need two bytes to terminate an unterminated comment, and
106     one more to terminate the line with a NUL.  */
107  n += 2 + 1;
108
109  if (n > (size_t) (pfile->out.limit - pfile->out.cur))
110    {
111      size_t size = pfile->out.cur - pfile->out.base;
112      size_t new_size = (size + n) * 3 / 2;
113
114      pfile->out.base = XRESIZEVEC (unsigned char, pfile->out.base, new_size);
115      pfile->out.limit = pfile->out.base + new_size;
116      pfile->out.cur = pfile->out.base + size;
117    }
118}
119
120/* Skip a C-style block comment in a macro as a result of -CC.
121   PFILE->buffer->cur points to the initial asterisk of the comment,
122   change it to point to after the '*' and '/' characters that terminate it.
123   Return true if the macro has not been termined, in that case set
124   PFILE->buffer->cur to the end of the buffer.  */
125static bool
126skip_macro_block_comment (cpp_reader *pfile)
127{
128  const uchar *cur = pfile->buffer->cur;
129
130  cur++;
131  if (*cur == '/')
132    cur++;
133
134  /* People like decorating comments with '*', so check for '/'
135     instead for efficiency.  */
136  while (! (*cur++ == '/' && cur[-2] == '*'))
137    if (cur[-1] == '\n')
138      {
139	pfile->buffer->cur = cur - 1;
140	return true;
141      }
142
143  pfile->buffer->cur = cur;
144  return false;
145}
146
147/* CUR points to the asterisk introducing a comment in the current
148   context.  IN_DEFINE is true if we are in the replacement text of a
149   macro.
150
151   The asterisk and following comment is copied to the buffer pointed
152   to by pfile->out.cur, which must be of sufficient size.
153   Unterminated comments are diagnosed, and correctly terminated in
154   the output.  pfile->out.cur is updated depending upon IN_DEFINE,
155   -C, -CC and pfile->state.in_directive.
156
157   Returns a pointer to the first character after the comment in the
158   input buffer.  */
159static const uchar *
160copy_comment (cpp_reader *pfile, const uchar *cur, int in_define)
161{
162  bool unterminated, copy = false;
163  location_t src_loc = pfile->line_table->highest_line;
164  cpp_buffer *buffer = pfile->buffer;
165
166  buffer->cur = cur;
167  if (pfile->context->prev)
168    unterminated = skip_macro_block_comment (pfile);
169  else
170    unterminated = _cpp_skip_block_comment (pfile);
171
172  if (unterminated)
173    cpp_error_with_line (pfile, CPP_DL_ERROR, src_loc, 0,
174			 "unterminated comment");
175
176  /* Comments in directives become spaces so that tokens are properly
177     separated when the ISO preprocessor re-lexes the line.  The
178     exception is #define.  */
179  if (pfile->state.in_directive)
180    {
181      if (in_define)
182	{
183	  if (CPP_OPTION (pfile, discard_comments_in_macro_exp))
184	    pfile->out.cur--;
185	  else
186	    copy = true;
187	}
188      else
189	pfile->out.cur[-1] = ' ';
190    }
191  else if (CPP_OPTION (pfile, discard_comments))
192    pfile->out.cur--;
193  else
194    copy = true;
195
196  if (copy)
197    {
198      size_t len = (size_t) (buffer->cur - cur);
199      memcpy (pfile->out.cur, cur, len);
200      pfile->out.cur += len;
201      if (unterminated)
202	{
203	  *pfile->out.cur++ = '*';
204	  *pfile->out.cur++ = '/';
205	}
206    }
207
208  return buffer->cur;
209}
210
211/* CUR points to any character in the input buffer.  Skips over all
212   contiguous horizontal white space and NULs, including comments if
213   SKIP_COMMENTS, until reaching the first non-horizontal-whitespace
214   character or the end of the current context.  Escaped newlines are
215   removed.
216
217   The whitespace is copied verbatim to the output buffer, except that
218   comments are handled as described in copy_comment().
219   pfile->out.cur is updated.
220
221   Returns a pointer to the first character after the whitespace in
222   the input buffer.  */
223static const uchar *
224skip_whitespace (cpp_reader *pfile, const uchar *cur, int skip_comments)
225{
226  uchar *out = pfile->out.cur;
227
228  for (;;)
229    {
230      unsigned int c = *cur++;
231      *out++ = c;
232
233      if (is_nvspace (c))
234	continue;
235
236      if (c == '/' && *cur == '*' && skip_comments)
237	{
238	  pfile->out.cur = out;
239	  cur = copy_comment (pfile, cur, false /* in_define */);
240	  out = pfile->out.cur;
241	  continue;
242	}
243
244      out--;
245      break;
246    }
247
248  pfile->out.cur = out;
249  return cur - 1;
250}
251
252/* Lexes and outputs an identifier starting at CUR, which is assumed
253   to point to a valid first character of an identifier.  Returns
254   the hashnode, and updates out.cur.  */
255static cpp_hashnode *
256lex_identifier (cpp_reader *pfile, const uchar *cur)
257{
258  size_t len;
259  uchar *out = pfile->out.cur;
260  cpp_hashnode *result;
261
262  do
263    *out++ = *cur++;
264  while (is_numchar (*cur));
265
266  CUR (pfile->context) = cur;
267  len = out - pfile->out.cur;
268  result = CPP_HASHNODE (ht_lookup (pfile->hash_table, pfile->out.cur,
269				    len, HT_ALLOC));
270  pfile->out.cur = out;
271  return result;
272}
273
274/* Overlays the true file buffer temporarily with text of length LEN
275   starting at START.  The true buffer is restored upon calling
276   restore_buff().  */
277void
278_cpp_overlay_buffer (cpp_reader *pfile, const uchar *start, size_t len)
279{
280  cpp_buffer *buffer = pfile->buffer;
281
282  pfile->overlaid_buffer = buffer;
283  pfile->saved_cur = buffer->cur;
284  pfile->saved_rlimit = buffer->rlimit;
285  pfile->saved_line_base = buffer->next_line;
286  buffer->need_line = false;
287
288  buffer->cur = start;
289  buffer->line_base = start;
290  buffer->rlimit = start + len;
291}
292
293/* Restores a buffer overlaid by _cpp_overlay_buffer().  */
294void
295_cpp_remove_overlay (cpp_reader *pfile)
296{
297  cpp_buffer *buffer = pfile->overlaid_buffer;
298
299  buffer->cur = pfile->saved_cur;
300  buffer->rlimit = pfile->saved_rlimit;
301  buffer->line_base = pfile->saved_line_base;
302  buffer->need_line = true;
303
304  pfile->overlaid_buffer = NULL;
305}
306
307/* Reads a logical line into the output buffer.  Returns TRUE if there
308   is more text left in the buffer.  */
309bool
310_cpp_read_logical_line_trad (cpp_reader *pfile)
311{
312  do
313    {
314      if (pfile->buffer->need_line && !_cpp_get_fresh_line (pfile))
315	return false;
316    }
317  while (!_cpp_scan_out_logical_line (pfile, NULL, false)
318	 || pfile->state.skipping);
319
320  return pfile->buffer != NULL;
321}
322
323/* Return true if NODE is a fun_like macro.  */
324static inline bool
325fun_like_macro (cpp_hashnode *node)
326{
327  if (cpp_builtin_macro_p (node))
328    return (node->value.builtin == BT_HAS_ATTRIBUTE
329	    || node->value.builtin == BT_HAS_BUILTIN
330	    || node->value.builtin == BT_HAS_INCLUDE
331	    || node->value.builtin == BT_HAS_INCLUDE_NEXT);
332  return node->value.macro->fun_like;
333}
334
335/* Set up state for finding the opening '(' of a function-like
336   macro.  */
337static void
338maybe_start_funlike (cpp_reader *pfile, cpp_hashnode *node, const uchar *start,
339		     struct fun_macro *macro)
340{
341  unsigned int n;
342  if (cpp_builtin_macro_p (node))
343    n = 1;
344  else
345    n = node->value.macro->paramc;
346
347  if (macro->buff)
348    _cpp_release_buff (pfile, macro->buff);
349  macro->buff = _cpp_get_buff (pfile, (n + 1) * sizeof (size_t));
350  macro->args = (size_t *) BUFF_FRONT (macro->buff);
351  macro->node = node;
352  macro->offset = start - pfile->out.base;
353  macro->paramc = n;
354  macro->argc = 0;
355}
356
357/* Save the OFFSET of the start of the next argument to MACRO.  */
358static void
359save_argument (struct fun_macro *macro, size_t offset)
360{
361  macro->argc++;
362  if (macro->argc <= macro->paramc)
363    macro->args[macro->argc] = offset;
364}
365
366/* Copies the next logical line in the current buffer (starting at
367   buffer->cur) to the output buffer.  The output is guaranteed to
368   terminate with a NUL character.  buffer->cur is updated.
369
370   If MACRO is non-NULL, then we are scanning the replacement list of
371   MACRO, and we call save_replacement_text() every time we meet an
372   argument.
373
374   If BUILTIN_MACRO_ARG is true, this is called to macro expand
375   arguments of builtin function-like macros.  */
376bool
377_cpp_scan_out_logical_line (cpp_reader *pfile, cpp_macro *macro,
378			    bool builtin_macro_arg)
379{
380  bool result = true;
381  cpp_context *context;
382  const uchar *cur;
383  uchar *out;
384  struct fun_macro fmacro;
385  unsigned int c, paren_depth = 0, quote;
386  enum ls lex_state = ls_none;
387  bool header_ok;
388  const uchar *start_of_input_line;
389
390  fmacro.buff = NULL;
391  fmacro.args = NULL;
392  fmacro.node = NULL;
393  fmacro.offset = 0;
394  fmacro.line = 0;
395  fmacro.paramc = 0;
396  fmacro.argc = 0;
397
398  quote = 0;
399  header_ok = pfile->state.angled_headers;
400  CUR (pfile->context) = pfile->buffer->cur;
401  RLIMIT (pfile->context) = pfile->buffer->rlimit;
402  if (!builtin_macro_arg)
403    {
404      pfile->out.cur = pfile->out.base;
405      pfile->out.first_line = pfile->line_table->highest_line;
406    }
407  /* start_of_input_line is needed to make sure that directives really,
408     really start at the first character of the line.  */
409  start_of_input_line = pfile->buffer->cur;
410 new_context:
411  context = pfile->context;
412  cur = CUR (context);
413  check_output_buffer (pfile, RLIMIT (context) - cur);
414  out = pfile->out.cur;
415
416  for (;;)
417    {
418      if (!context->prev
419	  && !builtin_macro_arg
420	  && cur >= pfile->buffer->notes[pfile->buffer->cur_note].pos)
421	{
422	  pfile->buffer->cur = cur;
423	  _cpp_process_line_notes (pfile, false);
424	}
425      c = *cur++;
426      *out++ = c;
427
428      /* Whitespace should "continue" out of the switch,
429	 non-whitespace should "break" out of it.  */
430      switch (c)
431	{
432	case ' ':
433	case '\t':
434	case '\f':
435	case '\v':
436	case '\0':
437	  continue;
438
439	case '\n':
440	  /* If this is a macro's expansion, pop it.  */
441	  if (context->prev)
442	    {
443	      pfile->out.cur = out - 1;
444	      _cpp_pop_context (pfile);
445	      goto new_context;
446	    }
447
448	  /* Omit the newline from the output buffer.  */
449	  pfile->out.cur = out - 1;
450	  pfile->buffer->cur = cur;
451	  if (builtin_macro_arg)
452	    goto done;
453	  pfile->buffer->need_line = true;
454	  CPP_INCREMENT_LINE (pfile, 0);
455
456	  if ((lex_state == ls_fun_open || lex_state == ls_fun_close)
457	      && !pfile->state.in_directive
458	      && _cpp_get_fresh_line (pfile))
459	    {
460	      /* Newlines in arguments become a space, but we don't
461		 clear any in-progress quote.  */
462	      if (lex_state == ls_fun_close)
463		out[-1] = ' ';
464	      cur = pfile->buffer->cur;
465	      continue;
466	    }
467	  goto done;
468
469	case '<':
470	  if (header_ok)
471	    quote = '>';
472	  break;
473	case '>':
474	  if (c == quote)
475	    quote = 0;
476	  break;
477
478	case '"':
479	case '\'':
480	  if (c == quote)
481	    quote = 0;
482	  else if (!quote)
483	    quote = c;
484	  break;
485
486	case '\\':
487	  /* Skip escaped quotes here, it's easier than above.  */
488	  if (*cur == '\\' || *cur == '"' || *cur == '\'')
489	    *out++ = *cur++;
490	  break;
491
492	case '/':
493	  /* Traditional CPP does not recognize comments within
494	     literals.  */
495	  if (!quote && *cur == '*')
496	    {
497	      pfile->out.cur = out;
498	      cur = copy_comment (pfile, cur, macro != 0);
499	      out = pfile->out.cur;
500	      continue;
501	    }
502	  break;
503
504	case '_':
505	case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
506	case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
507	case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
508	case 's': case 't': case 'u': case 'v': case 'w': case 'x':
509	case 'y': case 'z':
510	case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
511	case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
512	case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
513	case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
514	case 'Y': case 'Z':
515	  if (!pfile->state.skipping && (quote == 0 || macro))
516	    {
517	      cpp_hashnode *node;
518	      uchar *out_start = out - 1;
519
520	      pfile->out.cur = out_start;
521	      node = lex_identifier (pfile, cur - 1);
522	      out = pfile->out.cur;
523	      cur = CUR (context);
524
525	      if (cpp_macro_p (node)
526		  /* Should we expand for ls_answer?  */
527		  && (lex_state == ls_none || lex_state == ls_fun_open)
528		  && !pfile->state.prevent_expansion)
529		{
530		  /* Macros invalidate MI optimization.  */
531		  pfile->mi_valid = false;
532		  if (fun_like_macro (node))
533		    {
534		      maybe_start_funlike (pfile, node, out_start, &fmacro);
535		      lex_state = ls_fun_open;
536		      fmacro.line = pfile->line_table->highest_line;
537		      continue;
538		    }
539		  else if (!recursive_macro (pfile, node))
540		    {
541		      /* Remove the object-like macro's name from the
542			 output, and push its replacement text.  */
543		      pfile->out.cur = out_start;
544		      push_replacement_text (pfile, node);
545		      lex_state = ls_none;
546		      goto new_context;
547		    }
548		}
549	      else if (macro && node->type == NT_MACRO_ARG)
550		{
551		  /* Found a parameter in the replacement text of a
552		     #define.  Remove its name from the output.  */
553		  pfile->out.cur = out_start;
554		  save_replacement_text (pfile, macro, node->value.arg_index);
555		  out = pfile->out.base;
556		}
557	      else if (lex_state == ls_hash)
558		{
559		  lex_state = ls_predicate;
560		  continue;
561		}
562	      else if (pfile->state.in_expression
563		       && node == pfile->spec_nodes.n_defined)
564		{
565		  lex_state = ls_defined;
566		  continue;
567		}
568	    }
569	  break;
570
571	case '(':
572	  if (quote == 0)
573	    {
574	      paren_depth++;
575	      if (lex_state == ls_fun_open)
576		{
577		  if (recursive_macro (pfile, fmacro.node))
578		    lex_state = ls_none;
579		  else
580		    {
581		      lex_state = ls_fun_close;
582		      paren_depth = 1;
583		      out = pfile->out.base + fmacro.offset;
584		      fmacro.args[0] = fmacro.offset;
585		    }
586		}
587	      else if (lex_state == ls_predicate)
588		lex_state = ls_answer;
589	      else if (lex_state == ls_defined)
590		lex_state = ls_defined_close;
591	    }
592	  break;
593
594	case ',':
595	  if (quote == 0 && lex_state == ls_fun_close && paren_depth == 1)
596	    save_argument (&fmacro, out - pfile->out.base);
597	  break;
598
599	case ')':
600	  if (quote == 0)
601	    {
602	      paren_depth--;
603	      if (lex_state == ls_fun_close && paren_depth == 0)
604		{
605		  if (cpp_builtin_macro_p (fmacro.node))
606		    {
607		      /* Handle builtin function-like macros like
608			 __has_attribute.  The already parsed arguments
609			 are put into a buffer, which is then preprocessed
610			 and the result is fed to _cpp_push_text_context
611			 with disabled expansion, where the ISO preprocessor
612			 parses it.  While in traditional preprocessing
613			 macro arguments aren't immediately expanded, they in
614			 the end are because the macro with replaced arguments
615			 is preprocessed again.  For the builtin function-like
616			 macros we need the argument immediately though,
617			 if we don't preprocess them, they would behave
618			 very differently from ISO preprocessor handling
619			 of those builtin macros.  So, this handling is
620			 more similar to traditional preprocessing of
621			 #if directives, where we also keep preprocessing
622			 until everything is expanded, and then feed the
623			 result with disabled expansion to ISO preprocessor
624			 for handling the directives.  */
625		      lex_state = ls_none;
626		      save_argument (&fmacro, out - pfile->out.base);
627		      cpp_macro m;
628		      memset (&m, '\0', sizeof (m));
629		      m.paramc = fmacro.paramc;
630		      if (_cpp_arguments_ok (pfile, &m, fmacro.node,
631					     fmacro.argc))
632			{
633			  size_t len = fmacro.args[1] - fmacro.args[0];
634			  uchar *buf;
635
636			  /* Remove the macro's invocation from the
637			     output, and push its replacement text.  */
638			  pfile->out.cur = pfile->out.base + fmacro.offset;
639			  CUR (context) = cur;
640			  buf = _cpp_unaligned_alloc (pfile, len + 2);
641			  buf[0] = '(';
642			  memcpy (buf + 1, pfile->out.base + fmacro.args[0],
643				  len);
644			  buf[len + 1] = '\n';
645
646			  const unsigned char *ctx_rlimit = RLIMIT (context);
647			  const unsigned char *saved_cur = pfile->buffer->cur;
648			  const unsigned char *saved_rlimit
649			    = pfile->buffer->rlimit;
650			  const unsigned char *saved_line_base
651			    = pfile->buffer->line_base;
652			  bool saved_need_line = pfile->buffer->need_line;
653			  cpp_buffer *saved_overlaid_buffer
654			    = pfile->overlaid_buffer;
655			  pfile->buffer->cur = buf;
656			  pfile->buffer->line_base = buf;
657			  pfile->buffer->rlimit = buf + len + 1;
658			  pfile->buffer->need_line = false;
659			  pfile->overlaid_buffer = pfile->buffer;
660			  bool saved_in_directive = pfile->state.in_directive;
661			  pfile->state.in_directive = true;
662			  cpp_context *saved_prev_context = context->prev;
663			  context->prev = NULL;
664
665			  _cpp_scan_out_logical_line (pfile, NULL, true);
666
667			  pfile->state.in_directive = saved_in_directive;
668			  check_output_buffer (pfile, 1);
669			  *pfile->out.cur = '\n';
670			  pfile->buffer->cur = pfile->out.base + fmacro.offset;
671			  pfile->buffer->line_base = pfile->buffer->cur;
672			  pfile->buffer->rlimit = pfile->out.cur;
673			  CUR (context) = pfile->buffer->cur;
674			  RLIMIT (context) = pfile->buffer->rlimit;
675
676			  pfile->state.prevent_expansion++;
677			  const uchar *text
678			    = _cpp_builtin_macro_text (pfile, fmacro.node);
679			  pfile->state.prevent_expansion--;
680
681			  context->prev = saved_prev_context;
682			  pfile->buffer->cur = saved_cur;
683			  pfile->buffer->rlimit = saved_rlimit;
684			  pfile->buffer->line_base = saved_line_base;
685			  pfile->buffer->need_line = saved_need_line;
686			  pfile->overlaid_buffer = saved_overlaid_buffer;
687			  pfile->out.cur = pfile->out.base + fmacro.offset;
688			  CUR (context) = cur;
689			  RLIMIT (context) = ctx_rlimit;
690			  len = ustrlen (text);
691			  buf = _cpp_unaligned_alloc (pfile, len + 1);
692			  memcpy (buf, text, len);
693			  buf[len] = '\n';
694			  text = buf;
695			  _cpp_push_text_context (pfile, fmacro.node,
696						  text, len);
697			  goto new_context;
698			}
699		      break;
700		    }
701
702		  cpp_macro *m = fmacro.node->value.macro;
703
704		  m->used = 1;
705		  lex_state = ls_none;
706		  save_argument (&fmacro, out - pfile->out.base);
707
708		  /* A single zero-length argument is no argument.  */
709		  if (fmacro.argc == 1
710		      && m->paramc == 0
711		      && out == pfile->out.base + fmacro.offset + 1)
712		    fmacro.argc = 0;
713
714		  if (_cpp_arguments_ok (pfile, m, fmacro.node, fmacro.argc))
715		    {
716		      /* Remove the macro's invocation from the
717			 output, and push its replacement text.  */
718		      pfile->out.cur = pfile->out.base + fmacro.offset;
719		      CUR (context) = cur;
720		      replace_args_and_push (pfile, &fmacro);
721		      goto new_context;
722		    }
723		}
724	      else if (lex_state == ls_answer || lex_state == ls_defined_close)
725		lex_state = ls_none;
726	    }
727	  break;
728
729	case '#':
730	  if (cur - 1 == start_of_input_line
731	      /* A '#' from a macro doesn't start a directive.  */
732	      && !pfile->context->prev
733	      && !pfile->state.in_directive)
734	    {
735	      /* A directive.  With the way _cpp_handle_directive
736		 currently works, we only want to call it if either we
737		 know the directive is OK, or we want it to fail and
738		 be removed from the output.  If we want it to be
739		 passed through (the assembler case) then we must not
740		 call _cpp_handle_directive.  */
741	      pfile->out.cur = out;
742	      cur = skip_whitespace (pfile, cur, true /* skip_comments */);
743	      out = pfile->out.cur;
744
745	      if (*cur == '\n')
746		{
747		  /* Null directive.  Ignore it and don't invalidate
748		     the MI optimization.  */
749		  pfile->buffer->need_line = true;
750		  CPP_INCREMENT_LINE (pfile, 0);
751		  result = false;
752		  goto done;
753		}
754	      else
755		{
756		  bool do_it = false;
757
758		  if (is_numstart (*cur)
759		      && CPP_OPTION (pfile, lang) != CLK_ASM)
760		    do_it = true;
761		  else if (is_idstart (*cur))
762		    /* Check whether we know this directive, but don't
763		       advance.  */
764		    do_it = lex_identifier (pfile, cur)->is_directive;
765
766		  if (do_it || CPP_OPTION (pfile, lang) != CLK_ASM)
767		    {
768		      /* This is a kludge.  We want to have the ISO
769			 preprocessor lex the next token.  */
770		      pfile->buffer->cur = cur;
771		      _cpp_handle_directive (pfile, false /* indented */);
772		      result = false;
773		      goto done;
774		    }
775		}
776	    }
777
778	  if (pfile->state.in_expression)
779	    {
780	      lex_state = ls_hash;
781	      continue;
782	    }
783	  break;
784
785	default:
786	  break;
787	}
788
789      /* Non-whitespace disables MI optimization and stops treating
790	 '<' as a quote in #include.  */
791      header_ok = false;
792      if (!pfile->state.in_directive)
793	pfile->mi_valid = false;
794
795      if (lex_state == ls_none)
796	continue;
797
798      /* Some of these transitions of state are syntax errors.  The
799	 ISO preprocessor will issue errors later.  */
800      if (lex_state == ls_fun_open)
801	/* Missing '('.  */
802	lex_state = ls_none;
803      else if (lex_state == ls_hash
804	       || lex_state == ls_predicate
805	       || lex_state == ls_defined)
806	lex_state = ls_none;
807
808      /* ls_answer and ls_defined_close keep going until ')'.  */
809    }
810
811 done:
812  if (fmacro.buff)
813    _cpp_release_buff (pfile, fmacro.buff);
814
815  if (lex_state == ls_fun_close)
816    cpp_error_with_line (pfile, CPP_DL_ERROR, fmacro.line, 0,
817			 "unterminated argument list invoking macro \"%s\"",
818			 NODE_NAME (fmacro.node));
819  return result;
820}
821
822/* Push a context holding the replacement text of the macro NODE on
823   the context stack.  NODE is either object-like, or a function-like
824   macro with no arguments.  */
825static void
826push_replacement_text (cpp_reader *pfile, cpp_hashnode *node)
827{
828  size_t len;
829  const uchar *text;
830  uchar *buf;
831
832  if (cpp_builtin_macro_p (node))
833    {
834      text = _cpp_builtin_macro_text (pfile, node);
835      len = ustrlen (text);
836      buf = _cpp_unaligned_alloc (pfile, len + 1);
837      memcpy (buf, text, len);
838      buf[len] = '\n';
839      text = buf;
840    }
841  else
842    {
843      cpp_macro *macro = node->value.macro;
844      macro->used = 1;
845      text = macro->exp.text;
846      len = macro->count;
847    }
848
849  _cpp_push_text_context (pfile, node, text, len);
850}
851
852/* Returns TRUE if traditional macro recursion is detected.  */
853static bool
854recursive_macro (cpp_reader *pfile, cpp_hashnode *node)
855{
856  bool recursing = !!(node->flags & NODE_DISABLED);
857
858  /* Object-like macros that are already expanding are necessarily
859     recursive.
860
861     However, it is possible to have traditional function-like macros
862     that are not infinitely recursive but recurse to any given depth.
863     Further, it is easy to construct examples that get ever longer
864     until the point they stop recursing.  So there is no easy way to
865     detect true recursion; instead we assume any expansion more than
866     20 deep since the first invocation of this macro must be
867     recursing.  */
868  if (recursing && fun_like_macro (node))
869    {
870      size_t depth = 0;
871      cpp_context *context = pfile->context;
872
873      do
874	{
875	  depth++;
876	  if (context->c.macro == node && depth > 20)
877	    break;
878	  context = context->prev;
879	}
880      while (context);
881      recursing = context != NULL;
882    }
883
884  if (recursing)
885    cpp_error (pfile, CPP_DL_ERROR,
886	       "detected recursion whilst expanding macro \"%s\"",
887	       NODE_NAME (node));
888
889  return recursing;
890}
891
892/* Return the length of the replacement text of a function-like or
893   object-like non-builtin macro.  */
894size_t
895_cpp_replacement_text_len (const cpp_macro *macro)
896{
897  size_t len;
898
899  if (macro->fun_like && (macro->paramc != 0))
900    {
901      const uchar *exp;
902
903      len = 0;
904      for (exp = macro->exp.text;;)
905	{
906	  struct block *b = (struct block *) exp;
907
908	  len += b->text_len;
909	  if (b->arg_index == 0)
910	    break;
911	  len += NODE_LEN (macro->parm.params[b->arg_index - 1]);
912	  exp += BLOCK_LEN (b->text_len);
913	}
914    }
915  else
916    len = macro->count;
917
918  return len;
919}
920
921/* Copy the replacement text of MACRO to DEST, which must be of
922   sufficient size.  It is not NUL-terminated.  The next character is
923   returned.  */
924uchar *
925_cpp_copy_replacement_text (const cpp_macro *macro, uchar *dest)
926{
927  if (macro->fun_like && (macro->paramc != 0))
928    {
929      const uchar *exp;
930
931      for (exp = macro->exp.text;;)
932	{
933	  struct block *b = (struct block *) exp;
934	  cpp_hashnode *param;
935
936	  memcpy (dest, b->text, b->text_len);
937	  dest += b->text_len;
938	  if (b->arg_index == 0)
939	    break;
940	  param = macro->parm.params[b->arg_index - 1];
941	  memcpy (dest, NODE_NAME (param), NODE_LEN (param));
942	  dest += NODE_LEN (param);
943	  exp += BLOCK_LEN (b->text_len);
944	}
945    }
946  else
947    {
948      memcpy (dest, macro->exp.text, macro->count);
949      dest += macro->count;
950    }
951
952  return dest;
953}
954
955/* Push a context holding the replacement text of the macro NODE on
956   the context stack.  NODE is either object-like, or a function-like
957   macro with no arguments.  */
958static void
959replace_args_and_push (cpp_reader *pfile, struct fun_macro *fmacro)
960{
961  cpp_macro *macro = fmacro->node->value.macro;
962
963  if (macro->paramc == 0)
964    push_replacement_text (pfile, fmacro->node);
965  else
966    {
967      const uchar *exp;
968      uchar *p;
969      _cpp_buff *buff;
970      size_t len = 0;
971      int cxtquote = 0;
972
973      /* Get an estimate of the length of the argument-replaced text.
974	 This is a worst case estimate, assuming that every replacement
975	 text character needs quoting.  */
976      for (exp = macro->exp.text;;)
977	{
978	  struct block *b = (struct block *) exp;
979
980	  len += b->text_len;
981	  if (b->arg_index == 0)
982	    break;
983	  len += 2 * (fmacro->args[b->arg_index]
984		      - fmacro->args[b->arg_index - 1] - 1);
985	  exp += BLOCK_LEN (b->text_len);
986	}
987
988      /* Allocate room for the expansion plus \n.  */
989      buff = _cpp_get_buff (pfile, len + 1);
990
991      /* Copy the expansion and replace arguments.  */
992      /* Accumulate actual length, including quoting as necessary */
993      p = BUFF_FRONT (buff);
994      len = 0;
995      for (exp = macro->exp.text;;)
996	{
997	  struct block *b = (struct block *) exp;
998	  size_t arglen;
999	  int argquote;
1000	  uchar *base;
1001	  uchar *in;
1002
1003	  len += b->text_len;
1004	  /* Copy the non-argument text literally, keeping
1005	     track of whether matching quotes have been seen. */
1006	  for (arglen = b->text_len, in = b->text; arglen > 0; arglen--)
1007	    {
1008	      if (*in == '"')
1009		cxtquote = ! cxtquote;
1010	      *p++ = *in++;
1011	    }
1012	  /* Done if no more arguments */
1013	  if (b->arg_index == 0)
1014	    break;
1015	  arglen = (fmacro->args[b->arg_index]
1016		    - fmacro->args[b->arg_index - 1] - 1);
1017	  base = pfile->out.base + fmacro->args[b->arg_index - 1];
1018	  in = base;
1019#if 0
1020	  /* Skip leading whitespace in the text for the argument to
1021	     be substituted. To be compatible with gcc 2.95, we would
1022	     also need to trim trailing whitespace. Gcc 2.95 trims
1023	     leading and trailing whitespace, which may be a bug.  The
1024	     current gcc testsuite explicitly checks that this leading
1025	     and trailing whitespace in actual arguments is
1026	     preserved. */
1027	  while (arglen > 0 && is_space (*in))
1028	    {
1029	      in++;
1030	      arglen--;
1031	    }
1032#endif
1033	  for (argquote = 0; arglen > 0; arglen--)
1034	    {
1035	      if (cxtquote && *in == '"')
1036		{
1037		  if (in > base && *(in-1) != '\\')
1038		    argquote = ! argquote;
1039		  /* Always add backslash before double quote if argument
1040		     is expanded in a quoted context */
1041		  *p++ = '\\';
1042		  len++;
1043		}
1044	      else if (cxtquote && argquote && *in == '\\')
1045		{
1046		  /* Always add backslash before a backslash in an argument
1047		     that is expanded in a quoted context and also in the
1048		     range of a quoted context in the argument itself. */
1049		  *p++ = '\\';
1050		  len++;
1051		}
1052	      *p++ = *in++;
1053	      len++;
1054	    }
1055	  exp += BLOCK_LEN (b->text_len);
1056	}
1057
1058      /* \n-terminate.  */
1059      *p = '\n';
1060      _cpp_push_text_context (pfile, fmacro->node, BUFF_FRONT (buff), len);
1061
1062      /* So we free buffer allocation when macro is left.  */
1063      pfile->context->buff = buff;
1064    }
1065}
1066
1067/* Read and record the parameters, if any, of a function-like macro
1068   definition.  Destroys pfile->out.cur.
1069
1070   Returns true on success, false on failure (syntax error or a
1071   duplicate parameter).  On success, CUR (pfile->context) is just
1072   past the closing parenthesis.  */
1073static bool
1074scan_parameters (cpp_reader *pfile, unsigned *n_ptr)
1075{
1076  const uchar *cur = CUR (pfile->context) + 1;
1077  bool ok;
1078
1079  unsigned nparms = 0;
1080  for (;;)
1081    {
1082      cur = skip_whitespace (pfile, cur, true /* skip_comments */);
1083
1084      if (is_idstart (*cur))
1085	{
1086	  struct cpp_hashnode *id = lex_identifier (pfile, cur);
1087	  ok = false;
1088	  if (!_cpp_save_parameter (pfile, nparms, id, id))
1089	    break;
1090	  nparms++;
1091	  cur = skip_whitespace (pfile, CUR (pfile->context),
1092				 true /* skip_comments */);
1093	  if (*cur == ',')
1094	    {
1095	      cur++;
1096	      continue;
1097	    }
1098	  ok = (*cur == ')');
1099	  break;
1100	}
1101
1102      ok = (*cur == ')' && !nparms);
1103      break;
1104    }
1105
1106  *n_ptr = nparms;
1107
1108  if (!ok)
1109    cpp_error (pfile, CPP_DL_ERROR, "syntax error in macro parameter list");
1110
1111  CUR (pfile->context) = cur + (*cur == ')');
1112
1113  return ok;
1114}
1115
1116/* Save the text from pfile->out.base to pfile->out.cur as
1117   the replacement text for the current macro, followed by argument
1118   ARG_INDEX, with zero indicating the end of the replacement
1119   text.  */
1120static void
1121save_replacement_text (cpp_reader *pfile, cpp_macro *macro,
1122		       unsigned int arg_index)
1123{
1124  size_t len = pfile->out.cur - pfile->out.base;
1125  uchar *exp;
1126
1127  if (macro->paramc == 0)
1128    {
1129      /* Object-like and function-like macros without parameters
1130	 simply store their \n-terminated replacement text.  */
1131      exp = _cpp_unaligned_alloc (pfile, len + 1);
1132      memcpy (exp, pfile->out.base, len);
1133      exp[len] = '\n';
1134      macro->exp.text = exp;
1135      macro->count = len;
1136    }
1137  else
1138    {
1139      /* Store the text's length (unsigned int), the argument index
1140	 (unsigned short, base 1) and then the text.  */
1141      size_t blen = BLOCK_LEN (len);
1142      struct block *block;
1143
1144      if (macro->count + blen > BUFF_ROOM (pfile->a_buff))
1145	_cpp_extend_buff (pfile, &pfile->a_buff, macro->count + blen);
1146
1147      exp = BUFF_FRONT (pfile->a_buff);
1148      block = (struct block *) (exp + macro->count);
1149      macro->exp.text = exp;
1150
1151      /* Write out the block information.  */
1152      block->text_len = len;
1153      block->arg_index = arg_index;
1154      memcpy (block->text, pfile->out.base, len);
1155
1156      /* Lex the rest into the start of the output buffer.  */
1157      pfile->out.cur = pfile->out.base;
1158
1159      macro->count += blen;
1160
1161      /* If we've finished, commit the memory.  */
1162      if (arg_index == 0)
1163	BUFF_FRONT (pfile->a_buff) += macro->count;
1164    }
1165}
1166
1167/* Analyze and save the replacement text of a macro.  Returns true on
1168   success.  */
1169cpp_macro *
1170_cpp_create_trad_definition (cpp_reader *pfile)
1171{
1172  const uchar *cur;
1173  uchar *limit;
1174  cpp_context *context = pfile->context;
1175  unsigned nparms = 0;
1176  int fun_like = 0;
1177  cpp_hashnode **params = NULL;
1178
1179  /* The context has not been set up for command line defines, and CUR
1180     has not been updated for the macro name for in-file defines.  */
1181  pfile->out.cur = pfile->out.base;
1182  CUR (context) = pfile->buffer->cur;
1183  RLIMIT (context) = pfile->buffer->rlimit;
1184  check_output_buffer (pfile, RLIMIT (context) - CUR (context));
1185
1186  /* Is this a function-like macro?  */
1187  if (* CUR (context) == '(')
1188    {
1189      fun_like = +1;
1190      if (scan_parameters (pfile, &nparms))
1191	params = (cpp_hashnode **)_cpp_commit_buff
1192	  (pfile, sizeof (cpp_hashnode *) * nparms);
1193      else
1194	fun_like = -1;
1195    }
1196
1197  cpp_macro *macro = NULL;
1198
1199  if (fun_like >= 0)
1200    {
1201      macro = _cpp_new_macro (pfile, cmk_traditional,
1202			      _cpp_aligned_alloc (pfile, sizeof (cpp_macro)));
1203      macro->parm.params = params;
1204      macro->paramc = nparms;
1205      macro->fun_like = fun_like != 0;
1206    }
1207
1208  /* Skip leading whitespace in the replacement text.  */
1209  pfile->buffer->cur
1210    = skip_whitespace (pfile, CUR (context),
1211		       CPP_OPTION (pfile, discard_comments_in_macro_exp));
1212
1213  pfile->state.prevent_expansion++;
1214  _cpp_scan_out_logical_line (pfile, macro, false);
1215  pfile->state.prevent_expansion--;
1216
1217  _cpp_unsave_parameters (pfile, nparms);
1218
1219  if (macro)
1220    {
1221      /* Skip trailing white space.  */
1222      cur = pfile->out.base;
1223      limit = pfile->out.cur;
1224      while (limit > cur && is_space (limit[-1]))
1225	limit--;
1226      pfile->out.cur = limit;
1227      save_replacement_text (pfile, macro, 0);
1228    }
1229
1230  return macro;
1231}
1232
1233/* Copy SRC of length LEN to DEST, but convert all contiguous
1234   whitespace to a single space, provided it is not in quotes.  The
1235   quote currently in effect is pointed to by PQUOTE, and is updated
1236   by the function.  Returns the number of bytes copied.  */
1237static size_t
1238canonicalize_text (uchar *dest, const uchar *src, size_t len, uchar *pquote)
1239{
1240  uchar *orig_dest = dest;
1241  uchar quote = *pquote;
1242
1243  while (len)
1244    {
1245      if (is_space (*src) && !quote)
1246	{
1247	  do
1248	    src++, len--;
1249	  while (len && is_space (*src));
1250	  *dest++ = ' ';
1251	}
1252      else
1253	{
1254	  if (*src == '\'' || *src == '"')
1255	    {
1256	      if (!quote)
1257		quote = *src;
1258	      else if (quote == *src)
1259		quote = 0;
1260	    }
1261	  *dest++ = *src++, len--;
1262	}
1263    }
1264
1265  *pquote = quote;
1266  return dest - orig_dest;
1267}
1268
1269/* Returns true if MACRO1 and MACRO2 have expansions different other
1270   than in the form of their whitespace.  */
1271bool
1272_cpp_expansions_different_trad (const cpp_macro *macro1,
1273				const cpp_macro *macro2)
1274{
1275  uchar *p1 = XNEWVEC (uchar, macro1->count + macro2->count);
1276  uchar *p2 = p1 + macro1->count;
1277  uchar quote1 = 0, quote2 = 0;
1278  bool mismatch;
1279  size_t len1, len2;
1280
1281  if (macro1->paramc > 0)
1282    {
1283      const uchar *exp1 = macro1->exp.text, *exp2 = macro2->exp.text;
1284
1285      mismatch = true;
1286      for (;;)
1287	{
1288	  struct block *b1 = (struct block *) exp1;
1289	  struct block *b2 = (struct block *) exp2;
1290
1291	  if (b1->arg_index != b2->arg_index)
1292	    break;
1293
1294	  len1 = canonicalize_text (p1, b1->text, b1->text_len, &quote1);
1295	  len2 = canonicalize_text (p2, b2->text, b2->text_len, &quote2);
1296	  if (len1 != len2 || memcmp (p1, p2, len1))
1297	    break;
1298	  if (b1->arg_index == 0)
1299	    {
1300	      mismatch = false;
1301	      break;
1302	    }
1303	  exp1 += BLOCK_LEN (b1->text_len);
1304	  exp2 += BLOCK_LEN (b2->text_len);
1305	}
1306    }
1307  else
1308    {
1309      len1 = canonicalize_text (p1, macro1->exp.text, macro1->count, &quote1);
1310      len2 = canonicalize_text (p2, macro2->exp.text, macro2->count, &quote2);
1311      mismatch = (len1 != len2 || memcmp (p1, p2, len1));
1312    }
1313
1314  free (p1);
1315  return mismatch;
1316}
1317