1/* CPP Library - lexical analysis.
2   Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
3   Contributed by Per Bothner, 1994-95.
4   Based on CCCP program by Paul Rubin, June 1986
5   Adapted to ANSI C, Richard Stallman, Jan 1987
6   Broken out to separate file, Zack Weinberg, Mar 2000
7
8This program is free software; you can redistribute it and/or modify it
9under the terms of the GNU General Public License as published by the
10Free Software Foundation; either version 2, or (at your option) any
11later version.
12
13This program is distributed in the hope that it will be useful,
14but WITHOUT ANY WARRANTY; without even the implied warranty of
15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16GNU General Public License for more details.
17
18You should have received a copy of the GNU General Public License
19along with this program; if not, write to the Free Software
20Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
21
22#include "config.h"
23#include "system.h"
24#include "cpplib.h"
25#include "internal.h"
26
27enum spell_type
28{
29  SPELL_OPERATOR = 0,
30  SPELL_IDENT,
31  SPELL_LITERAL,
32  SPELL_NONE
33};
34
35struct token_spelling
36{
37  enum spell_type category;
38  const unsigned char *name;
39};
40
41static const unsigned char *const digraph_spellings[] =
42{ U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
43
44#define OP(e, s) { SPELL_OPERATOR, U s  },
45#define TK(e, s) { SPELL_ ## s,    U #e },
46static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
47#undef OP
48#undef TK
49
50#define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
51#define TOKEN_NAME(token) (token_spellings[(token)->type].name)
52
53static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
54static int skip_line_comment (cpp_reader *);
55static void skip_whitespace (cpp_reader *, cppchar_t);
56static void lex_string (cpp_reader *, cpp_token *, const uchar *);
57static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
58static void create_literal (cpp_reader *, cpp_token *, const uchar *,
59			    unsigned int, enum cpp_ttype);
60static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
61static int name_p (cpp_reader *, const cpp_string *);
62static tokenrun *next_tokenrun (tokenrun *);
63
64static _cpp_buff *new_buff (size_t);
65
66
67/* Utility routine:
68
69   Compares, the token TOKEN to the NUL-terminated string STRING.
70   TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
71int
72cpp_ideq (const cpp_token *token, const char *string)
73{
74  if (token->type != CPP_NAME)
75    return 0;
76
77  return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
78}
79
80/* Record a note TYPE at byte POS into the current cleaned logical
81   line.  */
82static void
83add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
84{
85  if (buffer->notes_used == buffer->notes_cap)
86    {
87      buffer->notes_cap = buffer->notes_cap * 2 + 200;
88      buffer->notes = XRESIZEVEC (_cpp_line_note, buffer->notes,
89                                  buffer->notes_cap);
90    }
91
92  buffer->notes[buffer->notes_used].pos = pos;
93  buffer->notes[buffer->notes_used].type = type;
94  buffer->notes_used++;
95}
96
97/* Returns with a logical line that contains no escaped newlines or
98   trigraphs.  This is a time-critical inner loop.  */
99void
100_cpp_clean_line (cpp_reader *pfile)
101{
102  cpp_buffer *buffer;
103  const uchar *s;
104  uchar c, *d, *p;
105
106  buffer = pfile->buffer;
107  buffer->cur_note = buffer->notes_used = 0;
108  buffer->cur = buffer->line_base = buffer->next_line;
109  buffer->need_line = false;
110  s = buffer->next_line - 1;
111
112  if (!buffer->from_stage3)
113    {
114      const uchar *pbackslash = NULL;
115
116      /* Short circuit for the common case of an un-escaped line with
117	 no trigraphs.  The primary win here is by not writing any
118	 data back to memory until we have to.  */
119      for (;;)
120	{
121	  c = *++s;
122	  if (__builtin_expect (c == '\n', false)
123	      || __builtin_expect (c == '\r', false))
124	    {
125	      d = (uchar *) s;
126
127	      if (__builtin_expect (s == buffer->rlimit, false))
128		goto done;
129
130	      /* DOS line ending? */
131	      if (__builtin_expect (c == '\r', false)
132		  && s[1] == '\n')
133		{
134		  s++;
135		  if (s == buffer->rlimit)
136		    goto done;
137		}
138
139	      if (__builtin_expect (pbackslash == NULL, true))
140		goto done;
141
142	      /* Check for escaped newline.  */
143	      p = d;
144	      while (is_nvspace (p[-1]))
145		p--;
146	      if (p - 1 != pbackslash)
147		goto done;
148
149	      /* Have an escaped newline; process it and proceed to
150		 the slow path.  */
151	      add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
152	      d = p - 2;
153	      buffer->next_line = p - 1;
154	      break;
155	    }
156	  if (__builtin_expect (c == '\\', false))
157	    pbackslash = s;
158	  else if (__builtin_expect (c == '?', false)
159		   && __builtin_expect (s[1] == '?', false)
160		   && _cpp_trigraph_map[s[2]])
161	    {
162	      /* Have a trigraph.  We may or may not have to convert
163		 it.  Add a line note regardless, for -Wtrigraphs.  */
164	      add_line_note (buffer, s, s[2]);
165	      if (CPP_OPTION (pfile, trigraphs))
166		{
167		  /* We do, and that means we have to switch to the
168		     slow path.  */
169		  d = (uchar *) s;
170		  *d = _cpp_trigraph_map[s[2]];
171		  s += 2;
172		  break;
173		}
174	    }
175	}
176
177
178      for (;;)
179	{
180	  c = *++s;
181	  *++d = c;
182
183	  if (c == '\n' || c == '\r')
184	    {
185		  /* Handle DOS line endings.  */
186	      if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
187		s++;
188	      if (s == buffer->rlimit)
189		break;
190
191	      /* Escaped?  */
192	      p = d;
193	      while (p != buffer->next_line && is_nvspace (p[-1]))
194		p--;
195	      if (p == buffer->next_line || p[-1] != '\\')
196		break;
197
198	      add_line_note (buffer, p - 1, p != d ? ' ': '\\');
199	      d = p - 2;
200	      buffer->next_line = p - 1;
201	    }
202	  else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
203	    {
204	      /* Add a note regardless, for the benefit of -Wtrigraphs.  */
205	      add_line_note (buffer, d, s[2]);
206	      if (CPP_OPTION (pfile, trigraphs))
207		{
208		  *d = _cpp_trigraph_map[s[2]];
209		  s += 2;
210		}
211	    }
212	}
213    }
214  else
215    {
216      do
217	s++;
218      while (*s != '\n' && *s != '\r');
219      d = (uchar *) s;
220
221      /* Handle DOS line endings.  */
222      if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
223	s++;
224    }
225
226 done:
227  *d = '\n';
228  /* A sentinel note that should never be processed.  */
229  add_line_note (buffer, d + 1, '\n');
230  buffer->next_line = s + 1;
231}
232
233/* Return true if the trigraph indicated by NOTE should be warned
234   about in a comment.  */
235static bool
236warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
237{
238  const uchar *p;
239
240  /* Within comments we don't warn about trigraphs, unless the
241     trigraph forms an escaped newline, as that may change
242     behavior.  */
243  if (note->type != '/')
244    return false;
245
246  /* If -trigraphs, then this was an escaped newline iff the next note
247     is coincident.  */
248  if (CPP_OPTION (pfile, trigraphs))
249    return note[1].pos == note->pos;
250
251  /* Otherwise, see if this forms an escaped newline.  */
252  p = note->pos + 3;
253  while (is_nvspace (*p))
254    p++;
255
256  /* There might have been escaped newlines between the trigraph and the
257     newline we found.  Hence the position test.  */
258  return (*p == '\n' && p < note[1].pos);
259}
260
261/* Process the notes created by add_line_note as far as the current
262   location.  */
263void
264_cpp_process_line_notes (cpp_reader *pfile, int in_comment)
265{
266  cpp_buffer *buffer = pfile->buffer;
267
268  for (;;)
269    {
270      _cpp_line_note *note = &buffer->notes[buffer->cur_note];
271      unsigned int col;
272
273      if (note->pos > buffer->cur)
274	break;
275
276      buffer->cur_note++;
277      col = CPP_BUF_COLUMN (buffer, note->pos + 1);
278
279      if (note->type == '\\' || note->type == ' ')
280	{
281	  if (note->type == ' ' && !in_comment)
282	    cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
283				 "backslash and newline separated by space");
284
285	  if (buffer->next_line > buffer->rlimit)
286	    {
287	      cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col,
288				   "backslash-newline at end of file");
289	      /* Prevent "no newline at end of file" warning.  */
290	      buffer->next_line = buffer->rlimit;
291	    }
292
293	  buffer->line_base = note->pos;
294	  CPP_INCREMENT_LINE (pfile, 0);
295	}
296      else if (_cpp_trigraph_map[note->type])
297	{
298	  if (CPP_OPTION (pfile, warn_trigraphs)
299	      && (!in_comment || warn_in_comment (pfile, note)))
300	    {
301	      if (CPP_OPTION (pfile, trigraphs))
302		cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
303				     "trigraph ??%c converted to %c",
304				     note->type,
305				     (int) _cpp_trigraph_map[note->type]);
306	      else
307		{
308		  cpp_error_with_line
309		    (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
310		     "trigraph ??%c ignored, use -trigraphs to enable",
311		     note->type);
312		}
313	    }
314	}
315      else
316	abort ();
317    }
318}
319
320/* Skip a C-style block comment.  We find the end of the comment by
321   seeing if an asterisk is before every '/' we encounter.  Returns
322   nonzero if comment terminated by EOF, zero otherwise.
323
324   Buffer->cur points to the initial asterisk of the comment.  */
325bool
326_cpp_skip_block_comment (cpp_reader *pfile)
327{
328  cpp_buffer *buffer = pfile->buffer;
329  const uchar *cur = buffer->cur;
330  uchar c;
331
332  cur++;
333  if (*cur == '/')
334    cur++;
335
336  for (;;)
337    {
338      /* People like decorating comments with '*', so check for '/'
339	 instead for efficiency.  */
340      c = *cur++;
341
342      if (c == '/')
343	{
344	  if (cur[-2] == '*')
345	    break;
346
347	  /* Warn about potential nested comments, but not if the '/'
348	     comes immediately before the true comment delimiter.
349	     Don't bother to get it right across escaped newlines.  */
350	  if (CPP_OPTION (pfile, warn_comments)
351	      && cur[0] == '*' && cur[1] != '/')
352	    {
353	      buffer->cur = cur;
354	      cpp_error_with_line (pfile, CPP_DL_WARNING,
355				   pfile->line_table->highest_line, CPP_BUF_COL (buffer),
356				   "\"/*\" within comment");
357	    }
358	}
359      else if (c == '\n')
360	{
361	  unsigned int cols;
362	  buffer->cur = cur - 1;
363	  _cpp_process_line_notes (pfile, true);
364	  if (buffer->next_line >= buffer->rlimit)
365	    return true;
366	  _cpp_clean_line (pfile);
367
368	  cols = buffer->next_line - buffer->line_base;
369	  CPP_INCREMENT_LINE (pfile, cols);
370
371	  cur = buffer->cur;
372	}
373    }
374
375  buffer->cur = cur;
376  _cpp_process_line_notes (pfile, true);
377  return false;
378}
379
380/* Skip a C++ line comment, leaving buffer->cur pointing to the
381   terminating newline.  Handles escaped newlines.  Returns nonzero
382   if a multiline comment.  */
383static int
384skip_line_comment (cpp_reader *pfile)
385{
386  cpp_buffer *buffer = pfile->buffer;
387  unsigned int orig_line = pfile->line_table->highest_line;
388
389  while (*buffer->cur != '\n')
390    buffer->cur++;
391
392  _cpp_process_line_notes (pfile, true);
393  return orig_line != pfile->line_table->highest_line;
394}
395
396/* Skips whitespace, saving the next non-whitespace character.  */
397static void
398skip_whitespace (cpp_reader *pfile, cppchar_t c)
399{
400  cpp_buffer *buffer = pfile->buffer;
401  bool saw_NUL = false;
402
403  do
404    {
405      /* Horizontal space always OK.  */
406      if (c == ' ' || c == '\t')
407	;
408      /* Just \f \v or \0 left.  */
409      else if (c == '\0')
410	saw_NUL = true;
411      else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
412	cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
413			     CPP_BUF_COL (buffer),
414			     "%s in preprocessing directive",
415			     c == '\f' ? "form feed" : "vertical tab");
416
417      c = *buffer->cur++;
418    }
419  /* We only want non-vertical space, i.e. ' ' \t \f \v \0.  */
420  while (is_nvspace (c));
421
422  if (saw_NUL)
423    cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored");
424
425  buffer->cur--;
426}
427
428/* See if the characters of a number token are valid in a name (no
429   '.', '+' or '-').  */
430static int
431name_p (cpp_reader *pfile, const cpp_string *string)
432{
433  unsigned int i;
434
435  for (i = 0; i < string->len; i++)
436    if (!is_idchar (string->text[i]))
437      return 0;
438
439  return 1;
440}
441
442/* After parsing an identifier or other sequence, produce a warning about
443   sequences not in NFC/NFKC.  */
444static void
445warn_about_normalization (cpp_reader *pfile,
446			  const cpp_token *token,
447			  const struct normalize_state *s)
448{
449  if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s)
450      && !pfile->state.skipping)
451    {
452      /* Make sure that the token is printed using UCNs, even
453	 if we'd otherwise happily print UTF-8.  */
454      unsigned char *buf = XNEWVEC (unsigned char, cpp_token_len (token));
455      size_t sz;
456
457      sz = cpp_spell_token (pfile, token, buf, false) - buf;
458      if (NORMALIZE_STATE_RESULT (s) == normalized_C)
459	cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
460			     "`%.*s' is not in NFKC", (int) sz, buf);
461      else
462	cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
463			     "`%.*s' is not in NFC", (int) sz, buf);
464    }
465}
466
467/* Returns TRUE if the sequence starting at buffer->cur is invalid in
468   an identifier.  FIRST is TRUE if this starts an identifier.  */
469static bool
470forms_identifier_p (cpp_reader *pfile, int first,
471		    struct normalize_state *state)
472{
473  cpp_buffer *buffer = pfile->buffer;
474
475  if (*buffer->cur == '$')
476    {
477      if (!CPP_OPTION (pfile, dollars_in_ident))
478	return false;
479
480      buffer->cur++;
481      if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
482	{
483	  CPP_OPTION (pfile, warn_dollars) = 0;
484	  cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
485	}
486
487      return true;
488    }
489
490  /* Is this a syntactically valid UCN?  */
491  if (CPP_OPTION (pfile, extended_identifiers)
492      && *buffer->cur == '\\'
493      && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
494    {
495      buffer->cur += 2;
496      if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
497			  state))
498	return true;
499      buffer->cur -= 2;
500    }
501
502  return false;
503}
504
505/* Lex an identifier starting at BUFFER->CUR - 1.  */
506static cpp_hashnode *
507lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
508		struct normalize_state *nst)
509{
510  cpp_hashnode *result;
511  const uchar *cur;
512  unsigned int len;
513  unsigned int hash = HT_HASHSTEP (0, *base);
514
515  cur = pfile->buffer->cur;
516  if (! starts_ucn)
517    while (ISIDNUM (*cur))
518      {
519	hash = HT_HASHSTEP (hash, *cur);
520	cur++;
521      }
522  pfile->buffer->cur = cur;
523  if (starts_ucn || forms_identifier_p (pfile, false, nst))
524    {
525      /* Slower version for identifiers containing UCNs (or $).  */
526      do {
527	while (ISIDNUM (*pfile->buffer->cur))
528	  {
529	    pfile->buffer->cur++;
530	    NORMALIZE_STATE_UPDATE_IDNUM (nst);
531	  }
532      } while (forms_identifier_p (pfile, false, nst));
533      result = _cpp_interpret_identifier (pfile, base,
534					  pfile->buffer->cur - base);
535    }
536  else
537    {
538      len = cur - base;
539      hash = HT_HASHFINISH (hash, len);
540
541      result = (cpp_hashnode *)
542	ht_lookup_with_hash (pfile->hash_table, base, len, hash, HT_ALLOC);
543    }
544
545  /* Rarely, identifiers require diagnostics when lexed.  */
546  if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
547			&& !pfile->state.skipping, 0))
548    {
549      /* It is allowed to poison the same identifier twice.  */
550      if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
551	cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
552		   NODE_NAME (result));
553
554      /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
555	 replacement list of a variadic macro.  */
556      if (result == pfile->spec_nodes.n__VA_ARGS__
557	  && !pfile->state.va_args_ok)
558	cpp_error (pfile, CPP_DL_PEDWARN,
559		   "__VA_ARGS__ can only appear in the expansion"
560		   " of a C99 variadic macro");
561    }
562
563  return result;
564}
565
566/* Lex a number to NUMBER starting at BUFFER->CUR - 1.  */
567static void
568lex_number (cpp_reader *pfile, cpp_string *number,
569	    struct normalize_state *nst)
570{
571  const uchar *cur;
572  const uchar *base;
573  uchar *dest;
574
575  base = pfile->buffer->cur - 1;
576  do
577    {
578      cur = pfile->buffer->cur;
579
580      /* N.B. ISIDNUM does not include $.  */
581      while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
582	{
583	  cur++;
584	  NORMALIZE_STATE_UPDATE_IDNUM (nst);
585	}
586
587      pfile->buffer->cur = cur;
588    }
589  while (forms_identifier_p (pfile, false, nst));
590
591  number->len = cur - base;
592  dest = _cpp_unaligned_alloc (pfile, number->len + 1);
593  memcpy (dest, base, number->len);
594  dest[number->len] = '\0';
595  number->text = dest;
596}
597
598/* Create a token of type TYPE with a literal spelling.  */
599static void
600create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
601		unsigned int len, enum cpp_ttype type)
602{
603  uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
604
605  memcpy (dest, base, len);
606  dest[len] = '\0';
607  token->type = type;
608  token->val.str.len = len;
609  token->val.str.text = dest;
610}
611
612/* Lexes a string, character constant, or angle-bracketed header file
613   name.  The stored string contains the spelling, including opening
614   quote and leading any leading 'L'.  It returns the type of the
615   literal, or CPP_OTHER if it was not properly terminated.
616
617   The spelling is NUL-terminated, but it is not guaranteed that this
618   is the first NUL since embedded NULs are preserved.  */
619static void
620lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
621{
622  bool saw_NUL = false;
623  const uchar *cur;
624  cppchar_t terminator;
625  enum cpp_ttype type;
626
627  cur = base;
628  terminator = *cur++;
629  if (terminator == 'L')
630    terminator = *cur++;
631  if (terminator == '\"')
632    type = *base == 'L' ? CPP_WSTRING: CPP_STRING;
633  else if (terminator == '\'')
634    type = *base == 'L' ? CPP_WCHAR: CPP_CHAR;
635  else
636    terminator = '>', type = CPP_HEADER_NAME;
637
638  for (;;)
639    {
640      cppchar_t c = *cur++;
641
642      /* In #include-style directives, terminators are not escapable.  */
643      if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
644	cur++;
645      else if (c == terminator)
646	break;
647      else if (c == '\n')
648	{
649	  cur--;
650	  type = CPP_OTHER;
651	  break;
652	}
653      else if (c == '\0')
654	saw_NUL = true;
655    }
656
657  if (saw_NUL && !pfile->state.skipping)
658    cpp_error (pfile, CPP_DL_WARNING,
659	       "null character(s) preserved in literal");
660
661  /* APPLE LOCAL begin #error with unmatched quotes 5607574 */
662  if (type == CPP_OTHER
663      && CPP_OPTION (pfile, lang) != CLK_ASM
664      && !pfile->state.in_diagnostic
665      && !pfile->state.skipping)
666  /* APPLE LOCAL end #error with unmatched quotes 5607574 */
667    cpp_error (pfile, CPP_DL_PEDWARN, "missing terminating %c character",
668	       (int) terminator);
669
670  pfile->buffer->cur = cur;
671  create_literal (pfile, token, base, cur - base, type);
672}
673
674/* The stored comment includes the comment start and any terminator.  */
675static void
676save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
677	      cppchar_t type)
678{
679  unsigned char *buffer;
680  unsigned int len, clen;
681
682  len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
683
684  /* C++ comments probably (not definitely) have moved past a new
685     line, which we don't want to save in the comment.  */
686  if (is_vspace (pfile->buffer->cur[-1]))
687    len--;
688
689  /* If we are currently in a directive, then we need to store all
690     C++ comments as C comments internally, and so we need to
691     allocate a little extra space in that case.
692
693     Note that the only time we encounter a directive here is
694     when we are saving comments in a "#define".  */
695  clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
696
697  buffer = _cpp_unaligned_alloc (pfile, clen);
698
699  token->type = CPP_COMMENT;
700  token->val.str.len = clen;
701  token->val.str.text = buffer;
702
703  buffer[0] = '/';
704  memcpy (buffer + 1, from, len - 1);
705
706  /* Finish conversion to a C comment, if necessary.  */
707  if (pfile->state.in_directive && type == '/')
708    {
709      buffer[1] = '*';
710      buffer[clen - 2] = '*';
711      buffer[clen - 1] = '/';
712    }
713}
714
715/* Allocate COUNT tokens for RUN.  */
716void
717_cpp_init_tokenrun (tokenrun *run, unsigned int count)
718{
719  run->base = XNEWVEC (cpp_token, count);
720  run->limit = run->base + count;
721  run->next = NULL;
722}
723
724/* Returns the next tokenrun, or creates one if there is none.  */
725static tokenrun *
726next_tokenrun (tokenrun *run)
727{
728  if (run->next == NULL)
729    {
730      run->next = XNEW (tokenrun);
731      run->next->prev = run;
732      _cpp_init_tokenrun (run->next, 250);
733    }
734
735  return run->next;
736}
737
738/* Allocate a single token that is invalidated at the same time as the
739   rest of the tokens on the line.  Has its line and col set to the
740   same as the last lexed token, so that diagnostics appear in the
741   right place.  */
742cpp_token *
743_cpp_temp_token (cpp_reader *pfile)
744{
745  cpp_token *old, *result;
746
747  old = pfile->cur_token - 1;
748  if (pfile->cur_token == pfile->cur_run->limit)
749    {
750      pfile->cur_run = next_tokenrun (pfile->cur_run);
751      pfile->cur_token = pfile->cur_run->base;
752    }
753
754  result = pfile->cur_token++;
755  result->src_loc = old->src_loc;
756  return result;
757}
758
759/* Lex a token into RESULT (external interface).  Takes care of issues
760   like directive handling, token lookahead, multiple include
761   optimization and skipping.  */
762const cpp_token *
763_cpp_lex_token (cpp_reader *pfile)
764{
765  cpp_token *result;
766
767  for (;;)
768    {
769      if (pfile->cur_token == pfile->cur_run->limit)
770	{
771	  pfile->cur_run = next_tokenrun (pfile->cur_run);
772	  pfile->cur_token = pfile->cur_run->base;
773	}
774      /* We assume that the current token is somewhere in the current
775	 run.  */
776      if (pfile->cur_token < pfile->cur_run->base
777	  || pfile->cur_token >= pfile->cur_run->limit)
778	abort ();
779
780      if (pfile->lookaheads)
781	{
782	  pfile->lookaheads--;
783	  result = pfile->cur_token++;
784	}
785      else
786	result = _cpp_lex_direct (pfile);
787
788      if (result->flags & BOL)
789	{
790	  /* Is this a directive.  If _cpp_handle_directive returns
791	     false, it is an assembler #.  */
792	  if (result->type == CPP_HASH
793	      /* 6.10.3 p 11: Directives in a list of macro arguments
794		 gives undefined behavior.  This implementation
795		 handles the directive as normal.  */
796	      && pfile->state.parsing_args != 1)
797	    {
798	      if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
799		{
800		  if (pfile->directive_result.type == CPP_PADDING)
801		    continue;
802		  result = &pfile->directive_result;
803		}
804	    }
805	  else if (pfile->state.in_deferred_pragma)
806	    result = &pfile->directive_result;
807
808	  if (pfile->cb.line_change && !pfile->state.skipping)
809	    pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
810	}
811
812      /* We don't skip tokens in directives.  */
813      if (pfile->state.in_directive || pfile->state.in_deferred_pragma)
814	break;
815
816      /* Outside a directive, invalidate controlling macros.  At file
817	 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
818	 get here and MI optimization works.  */
819      pfile->mi_valid = false;
820
821      if (!pfile->state.skipping || result->type == CPP_EOF)
822	break;
823    }
824
825  return result;
826}
827
828/* Returns true if a fresh line has been loaded.  */
829bool
830_cpp_get_fresh_line (cpp_reader *pfile)
831{
832  int return_at_eof;
833
834  /* We can't get a new line until we leave the current directive.  */
835  if (pfile->state.in_directive)
836    return false;
837
838  for (;;)
839    {
840      cpp_buffer *buffer = pfile->buffer;
841
842      if (!buffer->need_line)
843	return true;
844
845      if (buffer->next_line < buffer->rlimit)
846	{
847	  _cpp_clean_line (pfile);
848	  return true;
849	}
850
851      /* First, get out of parsing arguments state.  */
852      if (pfile->state.parsing_args)
853	return false;
854
855      /* End of buffer.  Non-empty files should end in a newline.  */
856      if (buffer->buf != buffer->rlimit
857	  && buffer->next_line > buffer->rlimit
858	  && !buffer->from_stage3)
859	{
860	  /* Clip to buffer size.  */
861	  buffer->next_line = buffer->rlimit;
862	  /* APPLE LOCAL begin suppress no newline warning.  */
863	  if ( CPP_OPTION (pfile, warn_newline_at_eof))
864	    {
865	      cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
866				   CPP_BUF_COLUMN (buffer, buffer->cur),
867				   "no newline at end of file");
868	    }
869	  /* APPLE LOCAL end suppress no newline warning.  */
870	}
871
872      return_at_eof = buffer->return_at_eof;
873      _cpp_pop_buffer (pfile);
874      if (pfile->buffer == NULL || return_at_eof)
875	return false;
876    }
877}
878
879#define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE)		\
880  do							\
881    {							\
882      result->type = ELSE_TYPE;				\
883      if (*buffer->cur == CHAR)				\
884	buffer->cur++, result->type = THEN_TYPE;	\
885    }							\
886  while (0)
887
888/* Lex a token into pfile->cur_token, which is also incremented, to
889   get diagnostics pointing to the correct location.
890
891   Does not handle issues such as token lookahead, multiple-include
892   optimization, directives, skipping etc.  This function is only
893   suitable for use by _cpp_lex_token, and in special cases like
894   lex_expansion_token which doesn't care for any of these issues.
895
896   When meeting a newline, returns CPP_EOF if parsing a directive,
897   otherwise returns to the start of the token buffer if permissible.
898   Returns the location of the lexed token.  */
899cpp_token *
900_cpp_lex_direct (cpp_reader *pfile)
901{
902  cppchar_t c;
903  cpp_buffer *buffer;
904  const unsigned char *comment_start;
905  cpp_token *result = pfile->cur_token++;
906
907 fresh_line:
908  result->flags = 0;
909  buffer = pfile->buffer;
910  if (buffer->need_line)
911    {
912      if (pfile->state.in_deferred_pragma)
913	{
914	  result->type = CPP_PRAGMA_EOL;
915	  pfile->state.in_deferred_pragma = false;
916	  if (!pfile->state.pragma_allow_expansion)
917	    pfile->state.prevent_expansion--;
918	  return result;
919	}
920      if (!_cpp_get_fresh_line (pfile))
921	{
922	  result->type = CPP_EOF;
923	  if (!pfile->state.in_directive)
924	    {
925	      /* Tell the compiler the line number of the EOF token.  */
926	      result->src_loc = pfile->line_table->highest_line;
927	      result->flags = BOL;
928	    }
929	  return result;
930	}
931      if (!pfile->keep_tokens)
932	{
933	  pfile->cur_run = &pfile->base_run;
934	  result = pfile->base_run.base;
935	  pfile->cur_token = result + 1;
936	}
937      result->flags = BOL;
938      if (pfile->state.parsing_args == 2)
939	result->flags |= PREV_WHITE;
940    }
941  buffer = pfile->buffer;
942 update_tokens_line:
943  result->src_loc = pfile->line_table->highest_line;
944
945 skipped_white:
946  if (buffer->cur >= buffer->notes[buffer->cur_note].pos
947      && !pfile->overlaid_buffer)
948    {
949      _cpp_process_line_notes (pfile, false);
950      result->src_loc = pfile->line_table->highest_line;
951    }
952  c = *buffer->cur++;
953
954  LINEMAP_POSITION_FOR_COLUMN (result->src_loc, pfile->line_table,
955			       CPP_BUF_COLUMN (buffer, buffer->cur));
956
957  switch (c)
958    {
959    case ' ': case '\t': case '\f': case '\v': case '\0':
960      result->flags |= PREV_WHITE;
961      skip_whitespace (pfile, c);
962      goto skipped_white;
963
964    case '\n':
965      if (buffer->cur < buffer->rlimit)
966	CPP_INCREMENT_LINE (pfile, 0);
967      buffer->need_line = true;
968      goto fresh_line;
969
970    case '0': case '1': case '2': case '3': case '4':
971    case '5': case '6': case '7': case '8': case '9':
972      {
973	struct normalize_state nst = INITIAL_NORMALIZE_STATE;
974	result->type = CPP_NUMBER;
975	lex_number (pfile, &result->val.str, &nst);
976	warn_about_normalization (pfile, result, &nst);
977	break;
978      }
979
980    case 'L':
981      /* 'L' may introduce wide characters or strings.  */
982      if (*buffer->cur == '\'' || *buffer->cur == '"')
983	{
984	  lex_string (pfile, result, buffer->cur - 1);
985	  break;
986	}
987      /* Fall through.  */
988
989    case '_':
990    case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
991    case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
992    case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
993    case 's': case 't': case 'u': case 'v': case 'w': case 'x':
994    case 'y': case 'z':
995    case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
996    case 'G': case 'H': case 'I': case 'J': case 'K':
997    case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
998    case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
999    case 'Y': case 'Z':
1000      result->type = CPP_NAME;
1001      {
1002	struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1003	result->val.node = lex_identifier (pfile, buffer->cur - 1, false,
1004					   &nst);
1005	warn_about_normalization (pfile, result, &nst);
1006      }
1007
1008      /* Convert named operators to their proper types.  */
1009      if (result->val.node->flags & NODE_OPERATOR)
1010	{
1011	  result->flags |= NAMED_OP;
1012	  result->type = (enum cpp_ttype) result->val.node->directive_index;
1013	}
1014      break;
1015
1016    case '\'':
1017    case '"':
1018      lex_string (pfile, result, buffer->cur - 1);
1019      break;
1020
1021    case '/':
1022      /* A potential block or line comment.  */
1023      comment_start = buffer->cur;
1024      c = *buffer->cur;
1025
1026      if (c == '*')
1027	{
1028	  if (_cpp_skip_block_comment (pfile))
1029	    cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
1030	}
1031      else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
1032			    || cpp_in_system_header (pfile)))
1033	{
1034	  /* Warn about comments only if pedantically GNUC89, and not
1035	     in system headers.  */
1036	  if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1037	      && ! buffer->warned_cplusplus_comments)
1038	    {
1039	      cpp_error (pfile, CPP_DL_PEDWARN,
1040			 "C++ style comments are not allowed in ISO C90");
1041	      cpp_error (pfile, CPP_DL_PEDWARN,
1042			 "(this will be reported only once per input file)");
1043	      buffer->warned_cplusplus_comments = 1;
1044	    }
1045
1046	  if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1047	    cpp_error (pfile, CPP_DL_WARNING, "multi-line comment");
1048	}
1049      else if (c == '=')
1050	{
1051	  buffer->cur++;
1052	  result->type = CPP_DIV_EQ;
1053	  break;
1054	}
1055      else
1056	{
1057	  result->type = CPP_DIV;
1058	  break;
1059	}
1060
1061      if (!pfile->state.save_comments)
1062	{
1063	  result->flags |= PREV_WHITE;
1064	  goto update_tokens_line;
1065	}
1066
1067      /* Save the comment as a token in its own right.  */
1068      save_comment (pfile, result, comment_start, c);
1069      break;
1070
1071    case '<':
1072      if (pfile->state.angled_headers)
1073	{
1074	  lex_string (pfile, result, buffer->cur - 1);
1075	  break;
1076	}
1077
1078      result->type = CPP_LESS;
1079      if (*buffer->cur == '=')
1080	buffer->cur++, result->type = CPP_LESS_EQ;
1081      else if (*buffer->cur == '<')
1082	{
1083	  buffer->cur++;
1084	  IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
1085	}
1086      else if (CPP_OPTION (pfile, digraphs))
1087	{
1088	  if (*buffer->cur == ':')
1089	    {
1090	      buffer->cur++;
1091	      result->flags |= DIGRAPH;
1092	      result->type = CPP_OPEN_SQUARE;
1093	    }
1094	  else if (*buffer->cur == '%')
1095	    {
1096	      buffer->cur++;
1097	      result->flags |= DIGRAPH;
1098	      result->type = CPP_OPEN_BRACE;
1099	    }
1100	}
1101      break;
1102
1103    case '>':
1104      result->type = CPP_GREATER;
1105      if (*buffer->cur == '=')
1106	buffer->cur++, result->type = CPP_GREATER_EQ;
1107      else if (*buffer->cur == '>')
1108	{
1109	  buffer->cur++;
1110	  IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1111	}
1112      break;
1113
1114    case '%':
1115      result->type = CPP_MOD;
1116      if (*buffer->cur == '=')
1117	buffer->cur++, result->type = CPP_MOD_EQ;
1118      else if (CPP_OPTION (pfile, digraphs))
1119	{
1120	  if (*buffer->cur == ':')
1121	    {
1122	      buffer->cur++;
1123	      result->flags |= DIGRAPH;
1124	      result->type = CPP_HASH;
1125	      if (*buffer->cur == '%' && buffer->cur[1] == ':')
1126		buffer->cur += 2, result->type = CPP_PASTE;
1127	    }
1128	  else if (*buffer->cur == '>')
1129	    {
1130	      buffer->cur++;
1131	      result->flags |= DIGRAPH;
1132	      result->type = CPP_CLOSE_BRACE;
1133	    }
1134	}
1135      break;
1136
1137    case '.':
1138      result->type = CPP_DOT;
1139      if (ISDIGIT (*buffer->cur))
1140	{
1141	  struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1142	  result->type = CPP_NUMBER;
1143	  lex_number (pfile, &result->val.str, &nst);
1144	  warn_about_normalization (pfile, result, &nst);
1145	}
1146      else if (*buffer->cur == '.' && buffer->cur[1] == '.')
1147	buffer->cur += 2, result->type = CPP_ELLIPSIS;
1148      else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1149	buffer->cur++, result->type = CPP_DOT_STAR;
1150      break;
1151
1152    case '+':
1153      result->type = CPP_PLUS;
1154      if (*buffer->cur == '+')
1155	buffer->cur++, result->type = CPP_PLUS_PLUS;
1156      else if (*buffer->cur == '=')
1157	buffer->cur++, result->type = CPP_PLUS_EQ;
1158      break;
1159
1160    case '-':
1161      result->type = CPP_MINUS;
1162      if (*buffer->cur == '>')
1163	{
1164	  buffer->cur++;
1165	  result->type = CPP_DEREF;
1166	  if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1167	    buffer->cur++, result->type = CPP_DEREF_STAR;
1168	}
1169      else if (*buffer->cur == '-')
1170	buffer->cur++, result->type = CPP_MINUS_MINUS;
1171      else if (*buffer->cur == '=')
1172	buffer->cur++, result->type = CPP_MINUS_EQ;
1173      break;
1174
1175    case '&':
1176      result->type = CPP_AND;
1177      if (*buffer->cur == '&')
1178	buffer->cur++, result->type = CPP_AND_AND;
1179      else if (*buffer->cur == '=')
1180	buffer->cur++, result->type = CPP_AND_EQ;
1181      break;
1182
1183    case '|':
1184      result->type = CPP_OR;
1185      if (*buffer->cur == '|')
1186	buffer->cur++, result->type = CPP_OR_OR;
1187      else if (*buffer->cur == '=')
1188	buffer->cur++, result->type = CPP_OR_EQ;
1189      break;
1190
1191    case ':':
1192      result->type = CPP_COLON;
1193      if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
1194	buffer->cur++, result->type = CPP_SCOPE;
1195      else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
1196	{
1197	  buffer->cur++;
1198	  result->flags |= DIGRAPH;
1199	  result->type = CPP_CLOSE_SQUARE;
1200	}
1201      break;
1202
1203    case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1204    case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1205    case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1206    case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1207    case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1208
1209    case '?': result->type = CPP_QUERY; break;
1210    case '~': result->type = CPP_COMPL; break;
1211    case ',': result->type = CPP_COMMA; break;
1212    case '(': result->type = CPP_OPEN_PAREN; break;
1213    case ')': result->type = CPP_CLOSE_PAREN; break;
1214    case '[': result->type = CPP_OPEN_SQUARE; break;
1215    case ']': result->type = CPP_CLOSE_SQUARE; break;
1216    case '{': result->type = CPP_OPEN_BRACE; break;
1217    case '}': result->type = CPP_CLOSE_BRACE; break;
1218    case ';': result->type = CPP_SEMICOLON; break;
1219
1220      /* @ is a punctuator in Objective-C.  */
1221    case '@': result->type = CPP_ATSIGN; break;
1222
1223    case '$':
1224    case '\\':
1225      {
1226	const uchar *base = --buffer->cur;
1227	struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1228
1229	if (forms_identifier_p (pfile, true, &nst))
1230	  {
1231	    result->type = CPP_NAME;
1232	    result->val.node = lex_identifier (pfile, base, true, &nst);
1233	    warn_about_normalization (pfile, result, &nst);
1234	    break;
1235	  }
1236	buffer->cur++;
1237      }
1238
1239    default:
1240      create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
1241      break;
1242    }
1243
1244  return result;
1245}
1246
1247/* An upper bound on the number of bytes needed to spell TOKEN.
1248   Does not include preceding whitespace.  */
1249unsigned int
1250cpp_token_len (const cpp_token *token)
1251{
1252  unsigned int len;
1253
1254  switch (TOKEN_SPELL (token))
1255    {
1256    default:		len = 4;				break;
1257    case SPELL_LITERAL:	len = token->val.str.len;		break;
1258    case SPELL_IDENT:	len = NODE_LEN (token->val.node) * 10;	break;
1259    }
1260
1261  return len;
1262}
1263
1264/* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER.
1265   Return the number of bytes read out of NAME.  (There are always
1266   10 bytes written to BUFFER.)  */
1267
1268static size_t
1269utf8_to_ucn (unsigned char *buffer, const unsigned char *name)
1270{
1271  int j;
1272  int ucn_len = 0;
1273  int ucn_len_c;
1274  unsigned t;
1275  unsigned long utf32;
1276
1277  /* Compute the length of the UTF-8 sequence.  */
1278  for (t = *name; t & 0x80; t <<= 1)
1279    ucn_len++;
1280
1281  utf32 = *name & (0x7F >> ucn_len);
1282  for (ucn_len_c = 1; ucn_len_c < ucn_len; ucn_len_c++)
1283    {
1284      utf32 = (utf32 << 6) | (*++name & 0x3F);
1285
1286      /* Ill-formed UTF-8.  */
1287      if ((*name & ~0x3F) != 0x80)
1288	abort ();
1289    }
1290
1291  *buffer++ = '\\';
1292  *buffer++ = 'U';
1293  for (j = 7; j >= 0; j--)
1294    *buffer++ = "0123456789abcdef"[(utf32 >> (4 * j)) & 0xF];
1295  return ucn_len;
1296}
1297
1298
1299/* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1300   already contain the enough space to hold the token's spelling.
1301   Returns a pointer to the character after the last character written.
1302   FORSTRING is true if this is to be the spelling after translation
1303   phase 1 (this is different for UCNs).
1304   FIXME: Would be nice if we didn't need the PFILE argument.  */
1305unsigned char *
1306cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
1307		 unsigned char *buffer, bool forstring)
1308{
1309  switch (TOKEN_SPELL (token))
1310    {
1311    case SPELL_OPERATOR:
1312      {
1313	const unsigned char *spelling;
1314	unsigned char c;
1315
1316	if (token->flags & DIGRAPH)
1317	  spelling
1318	    = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1319	else if (token->flags & NAMED_OP)
1320	  goto spell_ident;
1321	else
1322	  spelling = TOKEN_NAME (token);
1323
1324	while ((c = *spelling++) != '\0')
1325	  *buffer++ = c;
1326      }
1327      break;
1328
1329    spell_ident:
1330    case SPELL_IDENT:
1331      if (forstring)
1332	{
1333	  memcpy (buffer, NODE_NAME (token->val.node),
1334		  NODE_LEN (token->val.node));
1335	  buffer += NODE_LEN (token->val.node);
1336	}
1337      else
1338	{
1339	  size_t i;
1340	  const unsigned char * name = NODE_NAME (token->val.node);
1341
1342	  for (i = 0; i < NODE_LEN (token->val.node); i++)
1343	    if (name[i] & ~0x7F)
1344	      {
1345		i += utf8_to_ucn (buffer, name + i) - 1;
1346		buffer += 10;
1347	      }
1348	    else
1349	      *buffer++ = NODE_NAME (token->val.node)[i];
1350	}
1351      break;
1352
1353    case SPELL_LITERAL:
1354      memcpy (buffer, token->val.str.text, token->val.str.len);
1355      buffer += token->val.str.len;
1356      break;
1357
1358    case SPELL_NONE:
1359      cpp_error (pfile, CPP_DL_ICE,
1360		 "unspellable token %s", TOKEN_NAME (token));
1361      break;
1362    }
1363
1364  return buffer;
1365}
1366
1367/* Returns TOKEN spelt as a null-terminated string.  The string is
1368   freed when the reader is destroyed.  Useful for diagnostics.  */
1369unsigned char *
1370cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
1371{
1372  unsigned int len = cpp_token_len (token) + 1;
1373  unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1374
1375  end = cpp_spell_token (pfile, token, start, false);
1376  end[0] = '\0';
1377
1378  return start;
1379}
1380
1381/* Used by C front ends, which really should move to using
1382   cpp_token_as_text.  */
1383const char *
1384cpp_type2name (enum cpp_ttype type)
1385{
1386  return (const char *) token_spellings[type].name;
1387}
1388
1389/* Writes the spelling of token to FP, without any preceding space.
1390   Separated from cpp_spell_token for efficiency - to avoid stdio
1391   double-buffering.  */
1392void
1393cpp_output_token (const cpp_token *token, FILE *fp)
1394{
1395  switch (TOKEN_SPELL (token))
1396    {
1397    case SPELL_OPERATOR:
1398      {
1399	const unsigned char *spelling;
1400	int c;
1401
1402	if (token->flags & DIGRAPH)
1403	  spelling
1404	    = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1405	else if (token->flags & NAMED_OP)
1406	  goto spell_ident;
1407	else
1408	  spelling = TOKEN_NAME (token);
1409
1410	c = *spelling;
1411	do
1412	  putc (c, fp);
1413	while ((c = *++spelling) != '\0');
1414      }
1415      break;
1416
1417    spell_ident:
1418    case SPELL_IDENT:
1419      {
1420	size_t i;
1421	const unsigned char * name = NODE_NAME (token->val.node);
1422
1423	for (i = 0; i < NODE_LEN (token->val.node); i++)
1424	  if (name[i] & ~0x7F)
1425	    {
1426	      unsigned char buffer[10];
1427	      i += utf8_to_ucn (buffer, name + i) - 1;
1428	      fwrite (buffer, 1, 10, fp);
1429	    }
1430	  else
1431	    fputc (NODE_NAME (token->val.node)[i], fp);
1432      }
1433      break;
1434
1435    case SPELL_LITERAL:
1436      fwrite (token->val.str.text, 1, token->val.str.len, fp);
1437      break;
1438
1439    case SPELL_NONE:
1440      /* An error, most probably.  */
1441      break;
1442    }
1443}
1444
1445/* Compare two tokens.  */
1446int
1447_cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
1448{
1449  if (a->type == b->type && a->flags == b->flags)
1450    switch (TOKEN_SPELL (a))
1451      {
1452      default:			/* Keep compiler happy.  */
1453      case SPELL_OPERATOR:
1454	return 1;
1455      case SPELL_NONE:
1456	return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1457      case SPELL_IDENT:
1458	return a->val.node == b->val.node;
1459      case SPELL_LITERAL:
1460	return (a->val.str.len == b->val.str.len
1461		&& !memcmp (a->val.str.text, b->val.str.text,
1462			    a->val.str.len));
1463      }
1464
1465  return 0;
1466}
1467
1468/* Returns nonzero if a space should be inserted to avoid an
1469   accidental token paste for output.  For simplicity, it is
1470   conservative, and occasionally advises a space where one is not
1471   needed, e.g. "." and ".2".  */
1472int
1473cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
1474		 const cpp_token *token2)
1475{
1476  enum cpp_ttype a = token1->type, b = token2->type;
1477  cppchar_t c;
1478
1479  if (token1->flags & NAMED_OP)
1480    a = CPP_NAME;
1481  if (token2->flags & NAMED_OP)
1482    b = CPP_NAME;
1483
1484  c = EOF;
1485  if (token2->flags & DIGRAPH)
1486    c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1487  else if (token_spellings[b].category == SPELL_OPERATOR)
1488    c = token_spellings[b].name[0];
1489
1490  /* Quickly get everything that can paste with an '='.  */
1491  if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1492    return 1;
1493
1494  switch (a)
1495    {
1496    case CPP_GREATER:	return c == '>';
1497    case CPP_LESS:	return c == '<' || c == '%' || c == ':';
1498    case CPP_PLUS:	return c == '+';
1499    case CPP_MINUS:	return c == '-' || c == '>';
1500    case CPP_DIV:	return c == '/' || c == '*'; /* Comments.  */
1501    case CPP_MOD:	return c == ':' || c == '>';
1502    case CPP_AND:	return c == '&';
1503    case CPP_OR:	return c == '|';
1504    case CPP_COLON:	return c == ':' || c == '>';
1505    case CPP_DEREF:	return c == '*';
1506    case CPP_DOT:	return c == '.' || c == '%' || b == CPP_NUMBER;
1507    case CPP_HASH:	return c == '#' || c == '%'; /* Digraph form.  */
1508    case CPP_NAME:	return ((b == CPP_NUMBER
1509				 && name_p (pfile, &token2->val.str))
1510				|| b == CPP_NAME
1511				|| b == CPP_CHAR || b == CPP_STRING); /* L */
1512    case CPP_NUMBER:	return (b == CPP_NUMBER || b == CPP_NAME
1513				|| c == '.' || c == '+' || c == '-');
1514				      /* UCNs */
1515    case CPP_OTHER:	return ((token1->val.str.text[0] == '\\'
1516				 && b == CPP_NAME)
1517				|| (CPP_OPTION (pfile, objc)
1518				    && token1->val.str.text[0] == '@'
1519				    && (b == CPP_NAME || b == CPP_STRING)));
1520    default:		break;
1521    }
1522
1523  return 0;
1524}
1525
1526/* Output all the remaining tokens on the current line, and a newline
1527   character, to FP.  Leading whitespace is removed.  If there are
1528   macros, special token padding is not performed.  */
1529void
1530cpp_output_line (cpp_reader *pfile, FILE *fp)
1531{
1532  const cpp_token *token;
1533
1534  token = cpp_get_token (pfile);
1535  while (token->type != CPP_EOF)
1536    {
1537      cpp_output_token (token, fp);
1538      token = cpp_get_token (pfile);
1539      if (token->flags & PREV_WHITE)
1540	putc (' ', fp);
1541    }
1542
1543  putc ('\n', fp);
1544}
1545
1546/* Memory buffers.  Changing these three constants can have a dramatic
1547   effect on performance.  The values here are reasonable defaults,
1548   but might be tuned.  If you adjust them, be sure to test across a
1549   range of uses of cpplib, including heavy nested function-like macro
1550   expansion.  Also check the change in peak memory usage (NJAMD is a
1551   good tool for this).  */
1552#define MIN_BUFF_SIZE 8000
1553#define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1554#define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1555	(MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
1556
1557#if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1558  #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1559#endif
1560
1561/* Create a new allocation buffer.  Place the control block at the end
1562   of the buffer, so that buffer overflows will cause immediate chaos.  */
1563static _cpp_buff *
1564new_buff (size_t len)
1565{
1566  _cpp_buff *result;
1567  unsigned char *base;
1568
1569  if (len < MIN_BUFF_SIZE)
1570    len = MIN_BUFF_SIZE;
1571  len = CPP_ALIGN (len);
1572
1573  base = XNEWVEC (unsigned char, len + sizeof (_cpp_buff));
1574  result = (_cpp_buff *) (base + len);
1575  result->base = base;
1576  result->cur = base;
1577  result->limit = base + len;
1578  result->next = NULL;
1579  return result;
1580}
1581
1582/* Place a chain of unwanted allocation buffers on the free list.  */
1583void
1584_cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
1585{
1586  _cpp_buff *end = buff;
1587
1588  while (end->next)
1589    end = end->next;
1590  end->next = pfile->free_buffs;
1591  pfile->free_buffs = buff;
1592}
1593
1594/* Return a free buffer of size at least MIN_SIZE.  */
1595_cpp_buff *
1596_cpp_get_buff (cpp_reader *pfile, size_t min_size)
1597{
1598  _cpp_buff *result, **p;
1599
1600  for (p = &pfile->free_buffs;; p = &(*p)->next)
1601    {
1602      size_t size;
1603
1604      if (*p == NULL)
1605	return new_buff (min_size);
1606      result = *p;
1607      size = result->limit - result->base;
1608      /* Return a buffer that's big enough, but don't waste one that's
1609         way too big.  */
1610      if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
1611	break;
1612    }
1613
1614  *p = result->next;
1615  result->next = NULL;
1616  result->cur = result->base;
1617  return result;
1618}
1619
1620/* Creates a new buffer with enough space to hold the uncommitted
1621   remaining bytes of BUFF, and at least MIN_EXTRA more bytes.  Copies
1622   the excess bytes to the new buffer.  Chains the new buffer after
1623   BUFF, and returns the new buffer.  */
1624_cpp_buff *
1625_cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
1626{
1627  size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
1628  _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
1629
1630  buff->next = new_buff;
1631  memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
1632  return new_buff;
1633}
1634
1635/* Creates a new buffer with enough space to hold the uncommitted
1636   remaining bytes of the buffer pointed to by BUFF, and at least
1637   MIN_EXTRA more bytes.  Copies the excess bytes to the new buffer.
1638   Chains the new buffer before the buffer pointed to by BUFF, and
1639   updates the pointer to point to the new buffer.  */
1640void
1641_cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
1642{
1643  _cpp_buff *new_buff, *old_buff = *pbuff;
1644  size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
1645
1646  new_buff = _cpp_get_buff (pfile, size);
1647  memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
1648  new_buff->next = old_buff;
1649  *pbuff = new_buff;
1650}
1651
1652/* Free a chain of buffers starting at BUFF.  */
1653void
1654_cpp_free_buff (_cpp_buff *buff)
1655{
1656  _cpp_buff *next;
1657
1658  for (; buff; buff = next)
1659    {
1660      next = buff->next;
1661      free (buff->base);
1662    }
1663}
1664
1665/* Allocate permanent, unaligned storage of length LEN.  */
1666unsigned char *
1667_cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
1668{
1669  _cpp_buff *buff = pfile->u_buff;
1670  unsigned char *result = buff->cur;
1671
1672  if (len > (size_t) (buff->limit - result))
1673    {
1674      buff = _cpp_get_buff (pfile, len);
1675      buff->next = pfile->u_buff;
1676      pfile->u_buff = buff;
1677      result = buff->cur;
1678    }
1679
1680  buff->cur = result + len;
1681  return result;
1682}
1683
1684/* Allocate permanent, unaligned storage of length LEN from a_buff.
1685   That buffer is used for growing allocations when saving macro
1686   replacement lists in a #define, and when parsing an answer to an
1687   assertion in #assert, #unassert or #if (and therefore possibly
1688   whilst expanding macros).  It therefore must not be used by any
1689   code that they might call: specifically the lexer and the guts of
1690   the macro expander.
1691
1692   All existing other uses clearly fit this restriction: storing
1693   registered pragmas during initialization.  */
1694unsigned char *
1695_cpp_aligned_alloc (cpp_reader *pfile, size_t len)
1696{
1697  _cpp_buff *buff = pfile->a_buff;
1698  unsigned char *result = buff->cur;
1699
1700  if (len > (size_t) (buff->limit - result))
1701    {
1702      buff = _cpp_get_buff (pfile, len);
1703      buff->next = pfile->a_buff;
1704      pfile->a_buff = buff;
1705      result = buff->cur;
1706    }
1707
1708  buff->cur = result + len;
1709  return result;
1710}
1711
1712/* Say which field of TOK is in use.  */
1713
1714enum cpp_token_fld_kind
1715cpp_token_val_index (cpp_token *tok)
1716{
1717  switch (TOKEN_SPELL (tok))
1718    {
1719    case SPELL_IDENT:
1720      return CPP_TOKEN_FLD_NODE;
1721    case SPELL_LITERAL:
1722      return CPP_TOKEN_FLD_STR;
1723    case SPELL_NONE:
1724      if (tok->type == CPP_MACRO_ARG)
1725	return CPP_TOKEN_FLD_ARG_NO;
1726      else if (tok->type == CPP_PADDING)
1727	return CPP_TOKEN_FLD_SOURCE;
1728      else if (tok->type == CPP_PRAGMA)
1729	return CPP_TOKEN_FLD_PRAGMA;
1730      /* else fall through */
1731    default:
1732      return CPP_TOKEN_FLD_NONE;
1733    }
1734}
1735