1/* CPP Library - lexical analysis.
2   Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008, 2009
3   Free Software Foundation, Inc.
4   Contributed by Per Bothner, 1994-95.
5   Based on CCCP program by Paul Rubin, June 1986
6   Adapted to ANSI C, Richard Stallman, Jan 1987
7   Broken out to separate file, Zack Weinberg, Mar 2000
8
9This program is free software; you can redistribute it and/or modify it
10under the terms of the GNU General Public License as published by the
11Free Software Foundation; either version 3, or (at your option) any
12later version.
13
14This program is distributed in the hope that it will be useful,
15but WITHOUT ANY WARRANTY; without even the implied warranty of
16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17GNU General Public License for more details.
18
19You should have received a copy of the GNU General Public License
20along with this program; see the file COPYING3.  If not see
21<http://www.gnu.org/licenses/>.  */
22
23#include "config.h"
24#include "system.h"
25#include "cpplib.h"
26#include "internal.h"
27
28enum spell_type
29{
30  SPELL_OPERATOR = 0,
31  SPELL_IDENT,
32  SPELL_LITERAL,
33  SPELL_NONE
34};
35
36struct token_spelling
37{
38  enum spell_type category;
39  const unsigned char *name;
40};
41
42static const unsigned char *const digraph_spellings[] =
43{ UC"%:", UC"%:%:", UC"<:", UC":>", UC"<%", UC"%>" };
44
45#define OP(e, s) { SPELL_OPERATOR, UC s  },
46#define TK(e, s) { SPELL_ ## s,    UC #e },
47static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
48#undef OP
49#undef TK
50
51#define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
52#define TOKEN_NAME(token) (token_spellings[(token)->type].name)
53
54static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
55static int skip_line_comment (cpp_reader *);
56static void skip_whitespace (cpp_reader *, cppchar_t);
57static void lex_string (cpp_reader *, cpp_token *, const uchar *);
58static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
59static void store_comment (cpp_reader *, cpp_token *);
60static void create_literal (cpp_reader *, cpp_token *, const uchar *,
61			    unsigned int, enum cpp_ttype);
62static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
63static int name_p (cpp_reader *, const cpp_string *);
64static tokenrun *next_tokenrun (tokenrun *);
65
66static _cpp_buff *new_buff (size_t);
67
68
69/* Utility routine:
70
71   Compares, the token TOKEN to the NUL-terminated string STRING.
72   TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
73int
74cpp_ideq (const cpp_token *token, const char *string)
75{
76  if (token->type != CPP_NAME)
77    return 0;
78
79  return !ustrcmp (NODE_NAME (token->val.node.node), (const uchar *) string);
80}
81
82/* Record a note TYPE at byte POS into the current cleaned logical
83   line.  */
84static void
85add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
86{
87  if (buffer->notes_used == buffer->notes_cap)
88    {
89      buffer->notes_cap = buffer->notes_cap * 2 + 200;
90      buffer->notes = XRESIZEVEC (_cpp_line_note, buffer->notes,
91                                  buffer->notes_cap);
92    }
93
94  buffer->notes[buffer->notes_used].pos = pos;
95  buffer->notes[buffer->notes_used].type = type;
96  buffer->notes_used++;
97}
98
99/* Returns with a logical line that contains no escaped newlines or
100   trigraphs.  This is a time-critical inner loop.  */
101void
102_cpp_clean_line (cpp_reader *pfile)
103{
104  cpp_buffer *buffer;
105  const uchar *s;
106  uchar c, *d, *p;
107
108  buffer = pfile->buffer;
109  buffer->cur_note = buffer->notes_used = 0;
110  buffer->cur = buffer->line_base = buffer->next_line;
111  buffer->need_line = false;
112  s = buffer->next_line - 1;
113
114  if (!buffer->from_stage3)
115    {
116      const uchar *pbackslash = NULL;
117
118      /* Short circuit for the common case of an un-escaped line with
119	 no trigraphs.  The primary win here is by not writing any
120	 data back to memory until we have to.  */
121      for (;;)
122	{
123	  c = *++s;
124	  if (__builtin_expect (c == '\n', false)
125	      || __builtin_expect (c == '\r', false))
126	    {
127	      d = (uchar *) s;
128
129	      if (__builtin_expect (s == buffer->rlimit, false))
130		goto done;
131
132	      /* DOS line ending? */
133	      if (__builtin_expect (c == '\r', false)
134		  && s[1] == '\n')
135		{
136		  s++;
137		  if (s == buffer->rlimit)
138		    goto done;
139		}
140
141	      if (__builtin_expect (pbackslash == NULL, true))
142		goto done;
143
144	      /* Check for escaped newline.  */
145	      p = d;
146	      while (is_nvspace (p[-1]))
147		p--;
148	      if (p - 1 != pbackslash)
149		goto done;
150
151	      /* Have an escaped newline; process it and proceed to
152		 the slow path.  */
153	      add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
154	      d = p - 2;
155	      buffer->next_line = p - 1;
156	      break;
157	    }
158	  if (__builtin_expect (c == '\\', false))
159	    pbackslash = s;
160	  else if (__builtin_expect (c == '?', false)
161		   && __builtin_expect (s[1] == '?', false)
162		   && _cpp_trigraph_map[s[2]])
163	    {
164	      /* Have a trigraph.  We may or may not have to convert
165		 it.  Add a line note regardless, for -Wtrigraphs.  */
166	      add_line_note (buffer, s, s[2]);
167	      if (CPP_OPTION (pfile, trigraphs))
168		{
169		  /* We do, and that means we have to switch to the
170		     slow path.  */
171		  d = (uchar *) s;
172		  *d = _cpp_trigraph_map[s[2]];
173		  s += 2;
174		  break;
175		}
176	    }
177	}
178
179
180      for (;;)
181	{
182	  c = *++s;
183	  *++d = c;
184
185	  if (c == '\n' || c == '\r')
186	    {
187		  /* Handle DOS line endings.  */
188	      if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
189		s++;
190	      if (s == buffer->rlimit)
191		break;
192
193	      /* Escaped?  */
194	      p = d;
195	      while (p != buffer->next_line && is_nvspace (p[-1]))
196		p--;
197	      if (p == buffer->next_line || p[-1] != '\\')
198		break;
199
200	      add_line_note (buffer, p - 1, p != d ? ' ': '\\');
201	      d = p - 2;
202	      buffer->next_line = p - 1;
203	    }
204	  else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
205	    {
206	      /* Add a note regardless, for the benefit of -Wtrigraphs.  */
207	      add_line_note (buffer, d, s[2]);
208	      if (CPP_OPTION (pfile, trigraphs))
209		{
210		  *d = _cpp_trigraph_map[s[2]];
211		  s += 2;
212		}
213	    }
214	}
215    }
216  else
217    {
218      do
219	s++;
220      while (*s != '\n' && *s != '\r');
221      d = (uchar *) s;
222
223      /* Handle DOS line endings.  */
224      if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
225	s++;
226    }
227
228 done:
229  *d = '\n';
230  /* A sentinel note that should never be processed.  */
231  add_line_note (buffer, d + 1, '\n');
232  buffer->next_line = s + 1;
233}
234
235/* Return true if the trigraph indicated by NOTE should be warned
236   about in a comment.  */
237static bool
238warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
239{
240  const uchar *p;
241
242  /* Within comments we don't warn about trigraphs, unless the
243     trigraph forms an escaped newline, as that may change
244     behavior.  */
245  if (note->type != '/')
246    return false;
247
248  /* If -trigraphs, then this was an escaped newline iff the next note
249     is coincident.  */
250  if (CPP_OPTION (pfile, trigraphs))
251    return note[1].pos == note->pos;
252
253  /* Otherwise, see if this forms an escaped newline.  */
254  p = note->pos + 3;
255  while (is_nvspace (*p))
256    p++;
257
258  /* There might have been escaped newlines between the trigraph and the
259     newline we found.  Hence the position test.  */
260  return (*p == '\n' && p < note[1].pos);
261}
262
263/* Process the notes created by add_line_note as far as the current
264   location.  */
265void
266_cpp_process_line_notes (cpp_reader *pfile, int in_comment)
267{
268  cpp_buffer *buffer = pfile->buffer;
269
270  for (;;)
271    {
272      _cpp_line_note *note = &buffer->notes[buffer->cur_note];
273      unsigned int col;
274
275      if (note->pos > buffer->cur)
276	break;
277
278      buffer->cur_note++;
279      col = CPP_BUF_COLUMN (buffer, note->pos + 1);
280
281      if (note->type == '\\' || note->type == ' ')
282	{
283	  if (note->type == ' ' && !in_comment)
284	    cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
285				 "backslash and newline separated by space");
286
287	  if (buffer->next_line > buffer->rlimit)
288	    {
289	      cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col,
290				   "backslash-newline at end of file");
291	      /* Prevent "no newline at end of file" warning.  */
292	      buffer->next_line = buffer->rlimit;
293	    }
294
295	  buffer->line_base = note->pos;
296	  CPP_INCREMENT_LINE (pfile, 0);
297	}
298      else if (_cpp_trigraph_map[note->type])
299	{
300	  if (CPP_OPTION (pfile, warn_trigraphs)
301	      && (!in_comment || warn_in_comment (pfile, note)))
302	    {
303	      if (CPP_OPTION (pfile, trigraphs))
304		cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
305				     "trigraph ??%c converted to %c",
306				     note->type,
307				     (int) _cpp_trigraph_map[note->type]);
308	      else
309		{
310		  cpp_error_with_line
311		    (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
312		     "trigraph ??%c ignored, use -trigraphs to enable",
313		     note->type);
314		}
315	    }
316	}
317      else if (note->type == 0)
318	/* Already processed in lex_raw_string.  */;
319      else
320	abort ();
321    }
322}
323
324/* Skip a C-style block comment.  We find the end of the comment by
325   seeing if an asterisk is before every '/' we encounter.  Returns
326   nonzero if comment terminated by EOF, zero otherwise.
327
328   Buffer->cur points to the initial asterisk of the comment.  */
329bool
330_cpp_skip_block_comment (cpp_reader *pfile)
331{
332  cpp_buffer *buffer = pfile->buffer;
333  const uchar *cur = buffer->cur;
334  uchar c;
335
336  cur++;
337  if (*cur == '/')
338    cur++;
339
340  for (;;)
341    {
342      /* People like decorating comments with '*', so check for '/'
343	 instead for efficiency.  */
344      c = *cur++;
345
346      if (c == '/')
347	{
348	  if (cur[-2] == '*')
349	    break;
350
351	  /* Warn about potential nested comments, but not if the '/'
352	     comes immediately before the true comment delimiter.
353	     Don't bother to get it right across escaped newlines.  */
354	  if (CPP_OPTION (pfile, warn_comments)
355	      && cur[0] == '*' && cur[1] != '/')
356	    {
357	      buffer->cur = cur;
358	      cpp_error_with_line (pfile, CPP_DL_WARNING,
359				   pfile->line_table->highest_line, CPP_BUF_COL (buffer),
360				   "\"/*\" within comment");
361	    }
362	}
363      else if (c == '\n')
364	{
365	  unsigned int cols;
366	  buffer->cur = cur - 1;
367	  _cpp_process_line_notes (pfile, true);
368	  if (buffer->next_line >= buffer->rlimit)
369	    return true;
370	  _cpp_clean_line (pfile);
371
372	  cols = buffer->next_line - buffer->line_base;
373	  CPP_INCREMENT_LINE (pfile, cols);
374
375	  cur = buffer->cur;
376	}
377    }
378
379  buffer->cur = cur;
380  _cpp_process_line_notes (pfile, true);
381  return false;
382}
383
384/* Skip a C++ line comment, leaving buffer->cur pointing to the
385   terminating newline.  Handles escaped newlines.  Returns nonzero
386   if a multiline comment.  */
387static int
388skip_line_comment (cpp_reader *pfile)
389{
390  cpp_buffer *buffer = pfile->buffer;
391  source_location orig_line = pfile->line_table->highest_line;
392
393  while (*buffer->cur != '\n')
394    buffer->cur++;
395
396  _cpp_process_line_notes (pfile, true);
397  return orig_line != pfile->line_table->highest_line;
398}
399
400/* Skips whitespace, saving the next non-whitespace character.  */
401static void
402skip_whitespace (cpp_reader *pfile, cppchar_t c)
403{
404  cpp_buffer *buffer = pfile->buffer;
405  bool saw_NUL = false;
406
407  do
408    {
409      /* Horizontal space always OK.  */
410      if (c == ' ' || c == '\t')
411	;
412      /* Just \f \v or \0 left.  */
413      else if (c == '\0')
414	saw_NUL = true;
415      else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
416	cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
417			     CPP_BUF_COL (buffer),
418			     "%s in preprocessing directive",
419			     c == '\f' ? "form feed" : "vertical tab");
420
421      c = *buffer->cur++;
422    }
423  /* We only want non-vertical space, i.e. ' ' \t \f \v \0.  */
424  while (is_nvspace (c));
425
426  if (saw_NUL)
427    cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored");
428
429  buffer->cur--;
430}
431
432/* See if the characters of a number token are valid in a name (no
433   '.', '+' or '-').  */
434static int
435name_p (cpp_reader *pfile, const cpp_string *string)
436{
437  unsigned int i;
438
439  for (i = 0; i < string->len; i++)
440    if (!is_idchar (string->text[i]))
441      return 0;
442
443  return 1;
444}
445
446/* After parsing an identifier or other sequence, produce a warning about
447   sequences not in NFC/NFKC.  */
448static void
449warn_about_normalization (cpp_reader *pfile,
450			  const cpp_token *token,
451			  const struct normalize_state *s)
452{
453  if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s)
454      && !pfile->state.skipping)
455    {
456      /* Make sure that the token is printed using UCNs, even
457	 if we'd otherwise happily print UTF-8.  */
458      unsigned char *buf = XNEWVEC (unsigned char, cpp_token_len (token));
459      size_t sz;
460
461      sz = cpp_spell_token (pfile, token, buf, false) - buf;
462      if (NORMALIZE_STATE_RESULT (s) == normalized_C)
463	cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
464			     "`%.*s' is not in NFKC", (int) sz, buf);
465      else
466	cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
467			     "`%.*s' is not in NFC", (int) sz, buf);
468    }
469}
470
471/* Returns TRUE if the sequence starting at buffer->cur is invalid in
472   an identifier.  FIRST is TRUE if this starts an identifier.  */
473static bool
474forms_identifier_p (cpp_reader *pfile, int first,
475		    struct normalize_state *state)
476{
477  cpp_buffer *buffer = pfile->buffer;
478
479  if (*buffer->cur == '$')
480    {
481      if (!CPP_OPTION (pfile, dollars_in_ident))
482	return false;
483
484      buffer->cur++;
485      if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
486	{
487	  CPP_OPTION (pfile, warn_dollars) = 0;
488	  cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
489	}
490
491      return true;
492    }
493
494  /* Is this a syntactically valid UCN?  */
495  if (CPP_OPTION (pfile, extended_identifiers)
496      && *buffer->cur == '\\'
497      && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
498    {
499      buffer->cur += 2;
500      if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
501			  state))
502	return true;
503      buffer->cur -= 2;
504    }
505
506  return false;
507}
508
509/* Helper function to get the cpp_hashnode of the identifier BASE.  */
510static cpp_hashnode *
511lex_identifier_intern (cpp_reader *pfile, const uchar *base)
512{
513  cpp_hashnode *result;
514  const uchar *cur;
515  unsigned int len;
516  unsigned int hash = HT_HASHSTEP (0, *base);
517
518  cur = base + 1;
519  while (ISIDNUM (*cur))
520    {
521      hash = HT_HASHSTEP (hash, *cur);
522      cur++;
523    }
524  len = cur - base;
525  hash = HT_HASHFINISH (hash, len);
526  result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
527					      base, len, hash, HT_ALLOC));
528
529  /* Rarely, identifiers require diagnostics when lexed.  */
530  if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
531			&& !pfile->state.skipping, 0))
532    {
533      /* It is allowed to poison the same identifier twice.  */
534      if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
535	cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
536		   NODE_NAME (result));
537
538      /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
539	 replacement list of a variadic macro.  */
540      if (result == pfile->spec_nodes.n__VA_ARGS__
541	  && !pfile->state.va_args_ok)
542	cpp_error (pfile, CPP_DL_PEDWARN,
543		   "__VA_ARGS__ can only appear in the expansion"
544		   " of a C99 variadic macro");
545
546      /* For -Wc++-compat, warn about use of C++ named operators.  */
547      if (result->flags & NODE_WARN_OPERATOR)
548	cpp_error (pfile, CPP_DL_WARNING,
549		   "identifier \"%s\" is a special operator name in C++",
550		   NODE_NAME (result));
551    }
552
553  return result;
554}
555
556/* Get the cpp_hashnode of an identifier specified by NAME in
557   the current cpp_reader object.  If none is found, NULL is returned.  */
558cpp_hashnode *
559_cpp_lex_identifier (cpp_reader *pfile, const char *name)
560{
561  cpp_hashnode *result;
562  result = lex_identifier_intern (pfile, (uchar *) name);
563  return result;
564}
565
566/* Lex an identifier starting at BUFFER->CUR - 1.  */
567static cpp_hashnode *
568lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
569		struct normalize_state *nst)
570{
571  cpp_hashnode *result;
572  const uchar *cur;
573  unsigned int len;
574  unsigned int hash = HT_HASHSTEP (0, *base);
575
576  cur = pfile->buffer->cur;
577  if (! starts_ucn)
578    while (ISIDNUM (*cur))
579      {
580	hash = HT_HASHSTEP (hash, *cur);
581	cur++;
582      }
583  pfile->buffer->cur = cur;
584  if (starts_ucn || forms_identifier_p (pfile, false, nst))
585    {
586      /* Slower version for identifiers containing UCNs (or $).  */
587      do {
588	while (ISIDNUM (*pfile->buffer->cur))
589	  {
590	    pfile->buffer->cur++;
591	    NORMALIZE_STATE_UPDATE_IDNUM (nst);
592	  }
593      } while (forms_identifier_p (pfile, false, nst));
594      result = _cpp_interpret_identifier (pfile, base,
595					  pfile->buffer->cur - base);
596    }
597  else
598    {
599      len = cur - base;
600      hash = HT_HASHFINISH (hash, len);
601
602      result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
603						  base, len, hash, HT_ALLOC));
604    }
605
606  /* Rarely, identifiers require diagnostics when lexed.  */
607  if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
608			&& !pfile->state.skipping, 0))
609    {
610      /* It is allowed to poison the same identifier twice.  */
611      if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
612	cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
613		   NODE_NAME (result));
614
615      /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
616	 replacement list of a variadic macro.  */
617      if (result == pfile->spec_nodes.n__VA_ARGS__
618	  && !pfile->state.va_args_ok)
619	cpp_error (pfile, CPP_DL_PEDWARN,
620		   "__VA_ARGS__ can only appear in the expansion"
621		   " of a C99 variadic macro");
622
623      /* For -Wc++-compat, warn about use of C++ named operators.  */
624      if (result->flags & NODE_WARN_OPERATOR)
625	cpp_error (pfile, CPP_DL_WARNING,
626		   "identifier \"%s\" is a special operator name in C++",
627		   NODE_NAME (result));
628    }
629
630  return result;
631}
632
633/* Lex a number to NUMBER starting at BUFFER->CUR - 1.  */
634static void
635lex_number (cpp_reader *pfile, cpp_string *number,
636	    struct normalize_state *nst)
637{
638  const uchar *cur;
639  const uchar *base;
640  uchar *dest;
641
642  base = pfile->buffer->cur - 1;
643  do
644    {
645      cur = pfile->buffer->cur;
646
647      /* N.B. ISIDNUM does not include $.  */
648      while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
649	{
650	  cur++;
651	  NORMALIZE_STATE_UPDATE_IDNUM (nst);
652	}
653
654      pfile->buffer->cur = cur;
655    }
656  while (forms_identifier_p (pfile, false, nst));
657
658  number->len = cur - base;
659  dest = _cpp_unaligned_alloc (pfile, number->len + 1);
660  memcpy (dest, base, number->len);
661  dest[number->len] = '\0';
662  number->text = dest;
663}
664
665/* Create a token of type TYPE with a literal spelling.  */
666static void
667create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
668		unsigned int len, enum cpp_ttype type)
669{
670  uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
671
672  memcpy (dest, base, len);
673  dest[len] = '\0';
674  token->type = type;
675  token->val.str.len = len;
676  token->val.str.text = dest;
677}
678
679/* Subroutine of lex_raw_string: Append LEN chars from BASE to the buffer
680   sequence from *FIRST_BUFF_P to LAST_BUFF_P.  */
681
682static void
683bufring_append (cpp_reader *pfile, const uchar *base, size_t len,
684		_cpp_buff **first_buff_p, _cpp_buff **last_buff_p)
685{
686  _cpp_buff *first_buff = *first_buff_p;
687  _cpp_buff *last_buff = *last_buff_p;
688
689  if (first_buff == NULL)
690    first_buff = last_buff = _cpp_get_buff (pfile, len);
691  else if (len > BUFF_ROOM (last_buff))
692    {
693      size_t room = BUFF_ROOM (last_buff);
694      memcpy (BUFF_FRONT (last_buff), base, room);
695      BUFF_FRONT (last_buff) += room;
696      base += room;
697      len -= room;
698      last_buff = _cpp_append_extend_buff (pfile, last_buff, len);
699    }
700
701  memcpy (BUFF_FRONT (last_buff), base, len);
702  BUFF_FRONT (last_buff) += len;
703
704  *first_buff_p = first_buff;
705  *last_buff_p = last_buff;
706}
707
708/* Lexes a raw string.  The stored string contains the spelling, including
709   double quotes, delimiter string, '(' and ')', any leading
710   'L', 'u', 'U' or 'u8' and 'R' modifier.  It returns the type of the
711   literal, or CPP_OTHER if it was not properly terminated.
712
713   The spelling is NUL-terminated, but it is not guaranteed that this
714   is the first NUL since embedded NULs are preserved.  */
715
716static void
717lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base,
718		const uchar *cur)
719{
720  source_location saw_NUL = 0;
721  const uchar *raw_prefix;
722  unsigned int raw_prefix_len = 0;
723  enum cpp_ttype type;
724  size_t total_len = 0;
725  _cpp_buff *first_buff = NULL, *last_buff = NULL;
726  _cpp_line_note *note = &pfile->buffer->notes[pfile->buffer->cur_note];
727
728  type = (*base == 'L' ? CPP_WSTRING :
729	  *base == 'U' ? CPP_STRING32 :
730	  *base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16)
731	  : CPP_STRING);
732
733  raw_prefix = cur + 1;
734  while (raw_prefix_len < 16)
735    {
736      switch (raw_prefix[raw_prefix_len])
737	{
738	case ' ': case '(': case ')': case '\\': case '\t':
739	case '\v': case '\f': case '\n': default:
740	  break;
741	/* Basic source charset except the above chars.  */
742	case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
743	case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
744	case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
745	case 's': case 't': case 'u': case 'v': case 'w': case 'x':
746	case 'y': case 'z':
747	case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
748	case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
749	case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
750	case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
751	case 'Y': case 'Z':
752	case '0': case '1': case '2': case '3': case '4': case '5':
753	case '6': case '7': case '8': case '9':
754	case '_': case '{': case '}': case '#': case '[': case ']':
755	case '<': case '>': case '%': case ':': case ';': case '.':
756	case '?': case '*': case '+': case '-': case '/': case '^':
757	case '&': case '|': case '~': case '!': case '=': case ',':
758	case '"': case '\'':
759	  raw_prefix_len++;
760	  continue;
761	}
762      break;
763    }
764
765  if (raw_prefix[raw_prefix_len] != '(')
766    {
767      int col = CPP_BUF_COLUMN (pfile->buffer, raw_prefix + raw_prefix_len)
768		+ 1;
769      if (raw_prefix_len == 16)
770	cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, col,
771			     "raw string delimiter longer than 16 characters");
772      else
773	cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, col,
774			     "invalid character '%c' in raw string delimiter",
775			     (int) raw_prefix[raw_prefix_len]);
776      pfile->buffer->cur = raw_prefix - 1;
777      create_literal (pfile, token, base, raw_prefix - 1 - base, CPP_OTHER);
778      return;
779    }
780
781  cur = raw_prefix + raw_prefix_len + 1;
782  for (;;)
783    {
784#define BUF_APPEND(STR,LEN)					\
785      do {							\
786	bufring_append (pfile, (const uchar *)(STR), (LEN),	\
787			&first_buff, &last_buff);		\
788	total_len += (LEN);					\
789      } while (0);
790
791      cppchar_t c;
792
793      /* If we previously performed any trigraph or line splicing
794	 transformations, undo them within the body of the raw string.  */
795      while (note->pos < cur)
796	++note;
797      for (; note->pos == cur; ++note)
798	{
799	  switch (note->type)
800	    {
801	    case '\\':
802	    case ' ':
803	      /* Restore backslash followed by newline.  */
804	      BUF_APPEND (base, cur - base);
805	      base = cur;
806	      BUF_APPEND ("\\", 1);
807	    after_backslash:
808	      if (note->type == ' ')
809		{
810		  /* GNU backslash whitespace newline extension.  FIXME
811		     could be any sequence of non-vertical space.  When we
812		     can properly restore any such sequence, we should mark
813		     this note as handled so _cpp_process_line_notes
814		     doesn't warn.  */
815		  BUF_APPEND (" ", 1);
816		}
817
818	      BUF_APPEND ("\n", 1);
819	      break;
820
821	    case 0:
822	      /* Already handled.  */
823	      break;
824
825	    default:
826	      if (_cpp_trigraph_map[note->type])
827		{
828		  /* Don't warn about this trigraph in
829		     _cpp_process_line_notes, since trigraphs show up as
830		     trigraphs in raw strings.  */
831		  uchar type = note->type;
832		  note->type = 0;
833
834		  if (!CPP_OPTION (pfile, trigraphs))
835		    /* If we didn't convert the trigraph in the first
836		       place, don't do anything now either.  */
837		    break;
838
839		  BUF_APPEND (base, cur - base);
840		  base = cur;
841		  BUF_APPEND ("??", 2);
842
843		  /* ??/ followed by newline gets two line notes, one for
844		     the trigraph and one for the backslash/newline.  */
845		  if (type == '/' && note[1].pos == cur)
846		    {
847		      if (note[1].type != '\\'
848			  && note[1].type != ' ')
849			abort ();
850		      BUF_APPEND ("/", 1);
851		      ++note;
852		      goto after_backslash;
853		    }
854		  /* The ) from ??) could be part of the suffix.  */
855		  else if (type == ')'
856			   && strncmp ((const char *) cur+1,
857				       (const char *) raw_prefix,
858				       raw_prefix_len) == 0
859			   && cur[raw_prefix_len+1] == '"')
860		    {
861		      BUF_APPEND (")", 1);
862		      base++;
863		      cur += raw_prefix_len + 2;
864		      goto break_outer_loop;
865		    }
866		  else
867		    {
868		      /* Skip the replacement character.  */
869		      base = ++cur;
870		      BUF_APPEND (&type, 1);
871		    }
872		}
873	      else
874		abort ();
875	      break;
876	    }
877	}
878      c = *cur++;
879
880      if (c == ')'
881	  && strncmp ((const char *) cur, (const char *) raw_prefix,
882		      raw_prefix_len) == 0
883	  && cur[raw_prefix_len] == '"')
884	{
885	  cur += raw_prefix_len + 1;
886	  break;
887	}
888      else if (c == '\n')
889	{
890	  if (pfile->state.in_directive
891	      || pfile->state.parsing_args
892	      || pfile->state.in_deferred_pragma)
893	    {
894	      cur--;
895	      type = CPP_OTHER;
896	      cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, 0,
897				   "unterminated raw string");
898	      break;
899	    }
900
901	  BUF_APPEND (base, cur - base);
902
903	  if (pfile->buffer->cur < pfile->buffer->rlimit)
904	    CPP_INCREMENT_LINE (pfile, 0);
905	  pfile->buffer->need_line = true;
906
907	  pfile->buffer->cur = cur-1;
908	  _cpp_process_line_notes (pfile, false);
909	  if (!_cpp_get_fresh_line (pfile))
910	    {
911	      source_location src_loc = token->src_loc;
912	      token->type = CPP_EOF;
913	      /* Tell the compiler the line number of the EOF token.  */
914	      token->src_loc = pfile->line_table->highest_line;
915	      token->flags = BOL;
916	      if (first_buff != NULL)
917		_cpp_release_buff (pfile, first_buff);
918	      cpp_error_with_line (pfile, CPP_DL_ERROR, src_loc, 0,
919				   "unterminated raw string");
920	      return;
921	    }
922
923	  cur = base = pfile->buffer->cur;
924	  note = &pfile->buffer->notes[pfile->buffer->cur_note];
925	}
926      else if (c == '\0' && !saw_NUL)
927	LINEMAP_POSITION_FOR_COLUMN (saw_NUL, pfile->line_table,
928				     CPP_BUF_COLUMN (pfile->buffer, cur));
929    }
930 break_outer_loop:
931
932  if (saw_NUL && !pfile->state.skipping)
933    cpp_error_with_line (pfile, CPP_DL_WARNING, saw_NUL, 0,
934	       "null character(s) preserved in literal");
935
936  pfile->buffer->cur = cur;
937  if (first_buff == NULL)
938    create_literal (pfile, token, base, cur - base, type);
939  else
940    {
941      uchar *dest = _cpp_unaligned_alloc (pfile, total_len + (cur - base) + 1);
942
943      token->type = type;
944      token->val.str.len = total_len + (cur - base);
945      token->val.str.text = dest;
946      last_buff = first_buff;
947      while (last_buff != NULL)
948	{
949	  memcpy (dest, last_buff->base,
950		  BUFF_FRONT (last_buff) - last_buff->base);
951	  dest += BUFF_FRONT (last_buff) - last_buff->base;
952	  last_buff = last_buff->next;
953	}
954      _cpp_release_buff (pfile, first_buff);
955      memcpy (dest, base, cur - base);
956      dest[cur - base] = '\0';
957    }
958}
959
960/* Lexes a string, character constant, or angle-bracketed header file
961   name.  The stored string contains the spelling, including opening
962   quote and any leading 'L', 'u', 'U' or 'u8' and optional
963   'R' modifier.  It returns the type of the literal, or CPP_OTHER
964   if it was not properly terminated, or CPP_LESS for an unterminated
965   header name which must be relexed as normal tokens.
966
967   The spelling is NUL-terminated, but it is not guaranteed that this
968   is the first NUL since embedded NULs are preserved.  */
969static void
970lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
971{
972  bool saw_NUL = false;
973  const uchar *cur;
974  cppchar_t terminator;
975  enum cpp_ttype type;
976
977  cur = base;
978  terminator = *cur++;
979  if (terminator == 'L' || terminator == 'U')
980    terminator = *cur++;
981  else if (terminator == 'u')
982    {
983      terminator = *cur++;
984      if (terminator == '8')
985	terminator = *cur++;
986    }
987  if (terminator == 'R')
988    {
989      lex_raw_string (pfile, token, base, cur);
990      return;
991    }
992  if (terminator == '"')
993    type = (*base == 'L' ? CPP_WSTRING :
994	    *base == 'U' ? CPP_STRING32 :
995	    *base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16)
996			 : CPP_STRING);
997  else if (terminator == '\'')
998    type = (*base == 'L' ? CPP_WCHAR :
999	    *base == 'U' ? CPP_CHAR32 :
1000	    *base == 'u' ? CPP_CHAR16 : CPP_CHAR);
1001  else
1002    terminator = '>', type = CPP_HEADER_NAME;
1003
1004  for (;;)
1005    {
1006      cppchar_t c = *cur++;
1007
1008      /* In #include-style directives, terminators are not escapable.  */
1009      if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
1010	cur++;
1011      else if (c == terminator)
1012	break;
1013      else if (c == '\n')
1014	{
1015	  cur--;
1016	  /* Unmatched quotes always yield undefined behavior, but
1017	     greedy lexing means that what appears to be an unterminated
1018	     header name may actually be a legitimate sequence of tokens.  */
1019	  if (terminator == '>')
1020	    {
1021	      token->type = CPP_LESS;
1022	      return;
1023	    }
1024	  type = CPP_OTHER;
1025	  break;
1026	}
1027      else if (c == '\0')
1028	saw_NUL = true;
1029    }
1030
1031  if (saw_NUL && !pfile->state.skipping)
1032    cpp_error (pfile, CPP_DL_WARNING,
1033	       "null character(s) preserved in literal");
1034
1035  if (type == CPP_OTHER && CPP_OPTION (pfile, lang) != CLK_ASM)
1036    cpp_error (pfile, CPP_DL_PEDWARN, "missing terminating %c character",
1037	       (int) terminator);
1038
1039  pfile->buffer->cur = cur;
1040  create_literal (pfile, token, base, cur - base, type);
1041}
1042
1043/* Return the comment table. The client may not make any assumption
1044   about the ordering of the table.  */
1045cpp_comment_table *
1046cpp_get_comments (cpp_reader *pfile)
1047{
1048  return &pfile->comments;
1049}
1050
1051/* Append a comment to the end of the comment table. */
1052static void
1053store_comment (cpp_reader *pfile, cpp_token *token)
1054{
1055  int len;
1056
1057  if (pfile->comments.allocated == 0)
1058    {
1059      pfile->comments.allocated = 256;
1060      pfile->comments.entries = (cpp_comment *) xmalloc
1061	(pfile->comments.allocated * sizeof (cpp_comment));
1062    }
1063
1064  if (pfile->comments.count == pfile->comments.allocated)
1065    {
1066      pfile->comments.allocated *= 2;
1067      pfile->comments.entries = (cpp_comment *) xrealloc
1068	(pfile->comments.entries,
1069	 pfile->comments.allocated * sizeof (cpp_comment));
1070    }
1071
1072  len = token->val.str.len;
1073
1074  /* Copy comment. Note, token may not be NULL terminated. */
1075  pfile->comments.entries[pfile->comments.count].comment =
1076    (char *) xmalloc (sizeof (char) * (len + 1));
1077  memcpy (pfile->comments.entries[pfile->comments.count].comment,
1078	  token->val.str.text, len);
1079  pfile->comments.entries[pfile->comments.count].comment[len] = '\0';
1080
1081  /* Set source location. */
1082  pfile->comments.entries[pfile->comments.count].sloc = token->src_loc;
1083
1084  /* Increment the count of entries in the comment table. */
1085  pfile->comments.count++;
1086}
1087
1088/* The stored comment includes the comment start and any terminator.  */
1089static void
1090save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
1091	      cppchar_t type)
1092{
1093  unsigned char *buffer;
1094  unsigned int len, clen;
1095  int convert_to_c = (pfile->state.in_directive || pfile->state.collecting_args)
1096    && type == '/';
1097
1098  len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
1099
1100  /* C++ comments probably (not definitely) have moved past a new
1101     line, which we don't want to save in the comment.  */
1102  if (is_vspace (pfile->buffer->cur[-1]))
1103    len--;
1104
1105  /* If we are currently in a directive, then we need to store all
1106     C++ comments as C comments internally, and so we need to
1107     allocate a little extra space in that case.
1108
1109     Note that the only time we encounter a directive here is
1110     when we are saving comments in a "#define".  */
1111  clen = convert_to_c ? len + 2 : len;
1112
1113  buffer = _cpp_unaligned_alloc (pfile, clen);
1114
1115  token->type = CPP_COMMENT;
1116  token->val.str.len = clen;
1117  token->val.str.text = buffer;
1118
1119  buffer[0] = '/';
1120  memcpy (buffer + 1, from, len - 1);
1121
1122  /* Finish conversion to a C comment, if necessary.  */
1123  if (convert_to_c)
1124    {
1125      buffer[1] = '*';
1126      buffer[clen - 2] = '*';
1127      buffer[clen - 1] = '/';
1128    }
1129
1130  /* Finally store this comment for use by clients of libcpp. */
1131  store_comment (pfile, token);
1132}
1133
1134/* Allocate COUNT tokens for RUN.  */
1135void
1136_cpp_init_tokenrun (tokenrun *run, unsigned int count)
1137{
1138  run->base = XNEWVEC (cpp_token, count);
1139  run->limit = run->base + count;
1140  run->next = NULL;
1141}
1142
1143/* Returns the next tokenrun, or creates one if there is none.  */
1144static tokenrun *
1145next_tokenrun (tokenrun *run)
1146{
1147  if (run->next == NULL)
1148    {
1149      run->next = XNEW (tokenrun);
1150      run->next->prev = run;
1151      _cpp_init_tokenrun (run->next, 250);
1152    }
1153
1154  return run->next;
1155}
1156
1157/* Look ahead in the input stream.  */
1158const cpp_token *
1159cpp_peek_token (cpp_reader *pfile, int index)
1160{
1161  cpp_context *context = pfile->context;
1162  const cpp_token *peektok;
1163  int count;
1164
1165  /* First, scan through any pending cpp_context objects.  */
1166  while (context->prev)
1167    {
1168      ptrdiff_t sz = (context->direct_p
1169                      ? LAST (context).token - FIRST (context).token
1170                      : LAST (context).ptoken - FIRST (context).ptoken);
1171
1172      if (index < (int) sz)
1173        return (context->direct_p
1174                ? FIRST (context).token + index
1175                : *(FIRST (context).ptoken + index));
1176
1177      index -= (int) sz;
1178      context = context->prev;
1179    }
1180
1181  /* We will have to read some new tokens after all (and do so
1182     without invalidating preceding tokens).  */
1183  count = index;
1184  pfile->keep_tokens++;
1185
1186  do
1187    {
1188      peektok = _cpp_lex_token (pfile);
1189      if (peektok->type == CPP_EOF)
1190	return peektok;
1191    }
1192  while (index--);
1193
1194  _cpp_backup_tokens_direct (pfile, count + 1);
1195  pfile->keep_tokens--;
1196
1197  return peektok;
1198}
1199
1200/* Allocate a single token that is invalidated at the same time as the
1201   rest of the tokens on the line.  Has its line and col set to the
1202   same as the last lexed token, so that diagnostics appear in the
1203   right place.  */
1204cpp_token *
1205_cpp_temp_token (cpp_reader *pfile)
1206{
1207  cpp_token *old, *result;
1208  ptrdiff_t sz = pfile->cur_run->limit - pfile->cur_token;
1209  ptrdiff_t la = (ptrdiff_t) pfile->lookaheads;
1210
1211  old = pfile->cur_token - 1;
1212  /* Any pre-existing lookaheads must not be clobbered.  */
1213  if (la)
1214    {
1215      if (sz <= la)
1216        {
1217          tokenrun *next = next_tokenrun (pfile->cur_run);
1218
1219          if (sz < la)
1220            memmove (next->base + 1, next->base,
1221                     (la - sz) * sizeof (cpp_token));
1222
1223          next->base[0] = pfile->cur_run->limit[-1];
1224        }
1225
1226      if (sz > 1)
1227        memmove (pfile->cur_token + 1, pfile->cur_token,
1228                 MIN (la, sz - 1) * sizeof (cpp_token));
1229    }
1230
1231  if (!sz && pfile->cur_token == pfile->cur_run->limit)
1232    {
1233      pfile->cur_run = next_tokenrun (pfile->cur_run);
1234      pfile->cur_token = pfile->cur_run->base;
1235    }
1236
1237  result = pfile->cur_token++;
1238  result->src_loc = old->src_loc;
1239  return result;
1240}
1241
1242/* Lex a token into RESULT (external interface).  Takes care of issues
1243   like directive handling, token lookahead, multiple include
1244   optimization and skipping.  */
1245const cpp_token *
1246_cpp_lex_token (cpp_reader *pfile)
1247{
1248  cpp_token *result;
1249
1250  for (;;)
1251    {
1252      if (pfile->cur_token == pfile->cur_run->limit)
1253	{
1254	  pfile->cur_run = next_tokenrun (pfile->cur_run);
1255	  pfile->cur_token = pfile->cur_run->base;
1256	}
1257      /* We assume that the current token is somewhere in the current
1258	 run.  */
1259      if (pfile->cur_token < pfile->cur_run->base
1260	  || pfile->cur_token >= pfile->cur_run->limit)
1261	abort ();
1262
1263      if (pfile->lookaheads)
1264	{
1265	  pfile->lookaheads--;
1266	  result = pfile->cur_token++;
1267	}
1268      else
1269	result = _cpp_lex_direct (pfile);
1270
1271      if (result->flags & BOL)
1272	{
1273	  /* Is this a directive.  If _cpp_handle_directive returns
1274	     false, it is an assembler #.  */
1275	  if (result->type == CPP_HASH
1276	      /* 6.10.3 p 11: Directives in a list of macro arguments
1277		 gives undefined behavior.  This implementation
1278		 handles the directive as normal.  */
1279	      && pfile->state.parsing_args != 1)
1280	    {
1281	      if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
1282		{
1283		  if (pfile->directive_result.type == CPP_PADDING)
1284		    continue;
1285		  result = &pfile->directive_result;
1286		}
1287	    }
1288	  else if (pfile->state.in_deferred_pragma)
1289	    result = &pfile->directive_result;
1290
1291	  if (pfile->cb.line_change && !pfile->state.skipping)
1292	    pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
1293	}
1294
1295      /* We don't skip tokens in directives.  */
1296      if (pfile->state.in_directive || pfile->state.in_deferred_pragma)
1297	break;
1298
1299      /* Outside a directive, invalidate controlling macros.  At file
1300	 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
1301	 get here and MI optimization works.  */
1302      pfile->mi_valid = false;
1303
1304      if (!pfile->state.skipping || result->type == CPP_EOF)
1305	break;
1306    }
1307
1308  return result;
1309}
1310
1311/* Returns true if a fresh line has been loaded.  */
1312bool
1313_cpp_get_fresh_line (cpp_reader *pfile)
1314{
1315  int return_at_eof;
1316
1317  /* We can't get a new line until we leave the current directive.  */
1318  if (pfile->state.in_directive)
1319    return false;
1320
1321  for (;;)
1322    {
1323      cpp_buffer *buffer = pfile->buffer;
1324
1325      if (!buffer->need_line)
1326	return true;
1327
1328      if (buffer->next_line < buffer->rlimit)
1329	{
1330	  _cpp_clean_line (pfile);
1331	  return true;
1332	}
1333
1334      /* First, get out of parsing arguments state.  */
1335      if (pfile->state.parsing_args)
1336	return false;
1337
1338      /* End of buffer.  Non-empty files should end in a newline.  */
1339      if (buffer->buf != buffer->rlimit
1340	  && buffer->next_line > buffer->rlimit
1341	  && !buffer->from_stage3)
1342	{
1343	  /* Clip to buffer size.  */
1344	  buffer->next_line = buffer->rlimit;
1345	}
1346
1347      return_at_eof = buffer->return_at_eof;
1348      _cpp_pop_buffer (pfile);
1349      if (pfile->buffer == NULL || return_at_eof)
1350	return false;
1351    }
1352}
1353
1354#define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE)		\
1355  do							\
1356    {							\
1357      result->type = ELSE_TYPE;				\
1358      if (*buffer->cur == CHAR)				\
1359	buffer->cur++, result->type = THEN_TYPE;	\
1360    }							\
1361  while (0)
1362
1363/* Lex a token into pfile->cur_token, which is also incremented, to
1364   get diagnostics pointing to the correct location.
1365
1366   Does not handle issues such as token lookahead, multiple-include
1367   optimization, directives, skipping etc.  This function is only
1368   suitable for use by _cpp_lex_token, and in special cases like
1369   lex_expansion_token which doesn't care for any of these issues.
1370
1371   When meeting a newline, returns CPP_EOF if parsing a directive,
1372   otherwise returns to the start of the token buffer if permissible.
1373   Returns the location of the lexed token.  */
1374cpp_token *
1375_cpp_lex_direct (cpp_reader *pfile)
1376{
1377  cppchar_t c;
1378  cpp_buffer *buffer;
1379  const unsigned char *comment_start;
1380  cpp_token *result = pfile->cur_token++;
1381
1382 fresh_line:
1383  result->flags = 0;
1384  buffer = pfile->buffer;
1385  if (buffer->need_line)
1386    {
1387      if (pfile->state.in_deferred_pragma)
1388	{
1389	  result->type = CPP_PRAGMA_EOL;
1390	  pfile->state.in_deferred_pragma = false;
1391	  if (!pfile->state.pragma_allow_expansion)
1392	    pfile->state.prevent_expansion--;
1393	  return result;
1394	}
1395      if (!_cpp_get_fresh_line (pfile))
1396	{
1397	  result->type = CPP_EOF;
1398	  if (!pfile->state.in_directive)
1399	    {
1400	      /* Tell the compiler the line number of the EOF token.  */
1401	      result->src_loc = pfile->line_table->highest_line;
1402	      result->flags = BOL;
1403	    }
1404	  return result;
1405	}
1406      if (!pfile->keep_tokens)
1407	{
1408	  pfile->cur_run = &pfile->base_run;
1409	  result = pfile->base_run.base;
1410	  pfile->cur_token = result + 1;
1411	}
1412      result->flags = BOL;
1413      if (pfile->state.parsing_args == 2)
1414	result->flags |= PREV_WHITE;
1415    }
1416  buffer = pfile->buffer;
1417 update_tokens_line:
1418  result->src_loc = pfile->line_table->highest_line;
1419
1420 skipped_white:
1421  if (buffer->cur >= buffer->notes[buffer->cur_note].pos
1422      && !pfile->overlaid_buffer)
1423    {
1424      _cpp_process_line_notes (pfile, false);
1425      result->src_loc = pfile->line_table->highest_line;
1426    }
1427  c = *buffer->cur++;
1428
1429  LINEMAP_POSITION_FOR_COLUMN (result->src_loc, pfile->line_table,
1430			       CPP_BUF_COLUMN (buffer, buffer->cur));
1431
1432  switch (c)
1433    {
1434    case ' ': case '\t': case '\f': case '\v': case '\0':
1435      result->flags |= PREV_WHITE;
1436      skip_whitespace (pfile, c);
1437      goto skipped_white;
1438
1439    case '\n':
1440      if (buffer->cur < buffer->rlimit)
1441	CPP_INCREMENT_LINE (pfile, 0);
1442      buffer->need_line = true;
1443      goto fresh_line;
1444
1445    case '0': case '1': case '2': case '3': case '4':
1446    case '5': case '6': case '7': case '8': case '9':
1447      {
1448	struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1449	result->type = CPP_NUMBER;
1450	lex_number (pfile, &result->val.str, &nst);
1451	warn_about_normalization (pfile, result, &nst);
1452	break;
1453      }
1454
1455    case 'L':
1456    case 'u':
1457    case 'U':
1458    case 'R':
1459      /* 'L', 'u', 'U', 'u8' or 'R' may introduce wide characters,
1460	 wide strings or raw strings.  */
1461      if (c == 'L' || CPP_OPTION (pfile, uliterals))
1462	{
1463	  if ((*buffer->cur == '\'' && c != 'R')
1464	      || *buffer->cur == '"'
1465	      || (*buffer->cur == 'R'
1466		  && c != 'R'
1467		  && buffer->cur[1] == '"'
1468		  && CPP_OPTION (pfile, uliterals))
1469	      || (*buffer->cur == '8'
1470		  && c == 'u'
1471		  && (buffer->cur[1] == '"'
1472		      || (buffer->cur[1] == 'R' && buffer->cur[2] == '"'))))
1473	    {
1474	      lex_string (pfile, result, buffer->cur - 1);
1475	      break;
1476	    }
1477	}
1478      /* Fall through.  */
1479
1480    case '_':
1481    case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1482    case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1483    case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1484    case 's': case 't':           case 'v': case 'w': case 'x':
1485    case 'y': case 'z':
1486    case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1487    case 'G': case 'H': case 'I': case 'J': case 'K':
1488    case 'M': case 'N': case 'O': case 'P': case 'Q':
1489    case 'S': case 'T':           case 'V': case 'W': case 'X':
1490    case 'Y': case 'Z':
1491      result->type = CPP_NAME;
1492      {
1493	struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1494	result->val.node.node = lex_identifier (pfile, buffer->cur - 1, false,
1495						&nst);
1496	warn_about_normalization (pfile, result, &nst);
1497      }
1498
1499      /* Convert named operators to their proper types.  */
1500      if (result->val.node.node->flags & NODE_OPERATOR)
1501	{
1502	  result->flags |= NAMED_OP;
1503	  result->type = (enum cpp_ttype) result->val.node.node->directive_index;
1504	}
1505      break;
1506
1507    case '\'':
1508    case '"':
1509      lex_string (pfile, result, buffer->cur - 1);
1510      break;
1511
1512    case '/':
1513      /* A potential block or line comment.  */
1514      comment_start = buffer->cur;
1515      c = *buffer->cur;
1516
1517      if (c == '*')
1518	{
1519	  if (_cpp_skip_block_comment (pfile))
1520	    cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
1521	}
1522      else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
1523			    || cpp_in_system_header (pfile)))
1524	{
1525	  /* Warn about comments only if pedantically GNUC89, and not
1526	     in system headers.  */
1527	  if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1528	      && ! buffer->warned_cplusplus_comments)
1529	    {
1530	      cpp_error (pfile, CPP_DL_PEDWARN,
1531			 "C++ style comments are not allowed in ISO C90");
1532	      cpp_error (pfile, CPP_DL_PEDWARN,
1533			 "(this will be reported only once per input file)");
1534	      buffer->warned_cplusplus_comments = 1;
1535	    }
1536
1537	  if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1538	    cpp_error (pfile, CPP_DL_WARNING, "multi-line comment");
1539	}
1540      else if (c == '=')
1541	{
1542	  buffer->cur++;
1543	  result->type = CPP_DIV_EQ;
1544	  break;
1545	}
1546      else
1547	{
1548	  result->type = CPP_DIV;
1549	  break;
1550	}
1551
1552      if (!pfile->state.save_comments)
1553	{
1554	  result->flags |= PREV_WHITE;
1555	  goto update_tokens_line;
1556	}
1557
1558      /* Save the comment as a token in its own right.  */
1559      save_comment (pfile, result, comment_start, c);
1560      break;
1561
1562    case '<':
1563      if (pfile->state.angled_headers)
1564	{
1565	  lex_string (pfile, result, buffer->cur - 1);
1566	  if (result->type != CPP_LESS)
1567	    break;
1568	}
1569
1570      result->type = CPP_LESS;
1571      if (*buffer->cur == '=')
1572	buffer->cur++, result->type = CPP_LESS_EQ;
1573      else if (*buffer->cur == '<')
1574	{
1575	  buffer->cur++;
1576	  IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
1577	}
1578      else if (CPP_OPTION (pfile, digraphs))
1579	{
1580	  if (*buffer->cur == ':')
1581	    {
1582	      buffer->cur++;
1583	      result->flags |= DIGRAPH;
1584	      result->type = CPP_OPEN_SQUARE;
1585	    }
1586	  else if (*buffer->cur == '%')
1587	    {
1588	      buffer->cur++;
1589	      result->flags |= DIGRAPH;
1590	      result->type = CPP_OPEN_BRACE;
1591	    }
1592	}
1593      break;
1594
1595    case '>':
1596      result->type = CPP_GREATER;
1597      if (*buffer->cur == '=')
1598	buffer->cur++, result->type = CPP_GREATER_EQ;
1599      else if (*buffer->cur == '>')
1600	{
1601	  buffer->cur++;
1602	  IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1603	}
1604      break;
1605
1606    case '%':
1607      result->type = CPP_MOD;
1608      if (*buffer->cur == '=')
1609	buffer->cur++, result->type = CPP_MOD_EQ;
1610      else if (CPP_OPTION (pfile, digraphs))
1611	{
1612	  if (*buffer->cur == ':')
1613	    {
1614	      buffer->cur++;
1615	      result->flags |= DIGRAPH;
1616	      result->type = CPP_HASH;
1617	      if (*buffer->cur == '%' && buffer->cur[1] == ':')
1618		buffer->cur += 2, result->type = CPP_PASTE, result->val.token_no = 0;
1619	    }
1620	  else if (*buffer->cur == '>')
1621	    {
1622	      buffer->cur++;
1623	      result->flags |= DIGRAPH;
1624	      result->type = CPP_CLOSE_BRACE;
1625	    }
1626	}
1627      break;
1628
1629    case '.':
1630      result->type = CPP_DOT;
1631      if (ISDIGIT (*buffer->cur))
1632	{
1633	  struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1634	  result->type = CPP_NUMBER;
1635	  lex_number (pfile, &result->val.str, &nst);
1636	  warn_about_normalization (pfile, result, &nst);
1637	}
1638      else if (*buffer->cur == '.' && buffer->cur[1] == '.')
1639	buffer->cur += 2, result->type = CPP_ELLIPSIS;
1640      else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1641	buffer->cur++, result->type = CPP_DOT_STAR;
1642      break;
1643
1644    case '+':
1645      result->type = CPP_PLUS;
1646      if (*buffer->cur == '+')
1647	buffer->cur++, result->type = CPP_PLUS_PLUS;
1648      else if (*buffer->cur == '=')
1649	buffer->cur++, result->type = CPP_PLUS_EQ;
1650      break;
1651
1652    case '-':
1653      result->type = CPP_MINUS;
1654      if (*buffer->cur == '>')
1655	{
1656	  buffer->cur++;
1657	  result->type = CPP_DEREF;
1658	  if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1659	    buffer->cur++, result->type = CPP_DEREF_STAR;
1660	}
1661      else if (*buffer->cur == '-')
1662	buffer->cur++, result->type = CPP_MINUS_MINUS;
1663      else if (*buffer->cur == '=')
1664	buffer->cur++, result->type = CPP_MINUS_EQ;
1665      break;
1666
1667    case '&':
1668      result->type = CPP_AND;
1669      if (*buffer->cur == '&')
1670	buffer->cur++, result->type = CPP_AND_AND;
1671      else if (*buffer->cur == '=')
1672	buffer->cur++, result->type = CPP_AND_EQ;
1673      break;
1674
1675    case '|':
1676      result->type = CPP_OR;
1677      if (*buffer->cur == '|')
1678	buffer->cur++, result->type = CPP_OR_OR;
1679      else if (*buffer->cur == '=')
1680	buffer->cur++, result->type = CPP_OR_EQ;
1681      break;
1682
1683    case ':':
1684      result->type = CPP_COLON;
1685      if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
1686	buffer->cur++, result->type = CPP_SCOPE;
1687      else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
1688	{
1689	  buffer->cur++;
1690	  result->flags |= DIGRAPH;
1691	  result->type = CPP_CLOSE_SQUARE;
1692	}
1693      break;
1694
1695    case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1696    case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1697    case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1698    case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1699    case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); result->val.token_no = 0; break;
1700
1701    case '?': result->type = CPP_QUERY; break;
1702    case '~': result->type = CPP_COMPL; break;
1703    case ',': result->type = CPP_COMMA; break;
1704    case '(': result->type = CPP_OPEN_PAREN; break;
1705    case ')': result->type = CPP_CLOSE_PAREN; break;
1706    case '[': result->type = CPP_OPEN_SQUARE; break;
1707    case ']': result->type = CPP_CLOSE_SQUARE; break;
1708    case '{': result->type = CPP_OPEN_BRACE; break;
1709    case '}': result->type = CPP_CLOSE_BRACE; break;
1710    case ';': result->type = CPP_SEMICOLON; break;
1711
1712      /* @ is a punctuator in Objective-C.  */
1713    case '@': result->type = CPP_ATSIGN; break;
1714
1715    case '$':
1716    case '\\':
1717      {
1718	const uchar *base = --buffer->cur;
1719	struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1720
1721	if (forms_identifier_p (pfile, true, &nst))
1722	  {
1723	    result->type = CPP_NAME;
1724	    result->val.node.node = lex_identifier (pfile, base, true, &nst);
1725	    warn_about_normalization (pfile, result, &nst);
1726	    break;
1727	  }
1728	buffer->cur++;
1729      }
1730
1731    default:
1732      create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
1733      break;
1734    }
1735
1736  return result;
1737}
1738
1739/* An upper bound on the number of bytes needed to spell TOKEN.
1740   Does not include preceding whitespace.  */
1741unsigned int
1742cpp_token_len (const cpp_token *token)
1743{
1744  unsigned int len;
1745
1746  switch (TOKEN_SPELL (token))
1747    {
1748    default:		len = 6;				break;
1749    case SPELL_LITERAL:	len = token->val.str.len;		break;
1750    case SPELL_IDENT:	len = NODE_LEN (token->val.node.node) * 10;	break;
1751    }
1752
1753  return len;
1754}
1755
1756/* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER.
1757   Return the number of bytes read out of NAME.  (There are always
1758   10 bytes written to BUFFER.)  */
1759
1760static size_t
1761utf8_to_ucn (unsigned char *buffer, const unsigned char *name)
1762{
1763  int j;
1764  int ucn_len = 0;
1765  int ucn_len_c;
1766  unsigned t;
1767  unsigned long utf32;
1768
1769  /* Compute the length of the UTF-8 sequence.  */
1770  for (t = *name; t & 0x80; t <<= 1)
1771    ucn_len++;
1772
1773  utf32 = *name & (0x7F >> ucn_len);
1774  for (ucn_len_c = 1; ucn_len_c < ucn_len; ucn_len_c++)
1775    {
1776      utf32 = (utf32 << 6) | (*++name & 0x3F);
1777
1778      /* Ill-formed UTF-8.  */
1779      if ((*name & ~0x3F) != 0x80)
1780	abort ();
1781    }
1782
1783  *buffer++ = '\\';
1784  *buffer++ = 'U';
1785  for (j = 7; j >= 0; j--)
1786    *buffer++ = "0123456789abcdef"[(utf32 >> (4 * j)) & 0xF];
1787  return ucn_len;
1788}
1789
1790/* Given a token TYPE corresponding to a digraph, return a pointer to
1791   the spelling of the digraph.  */
1792static const unsigned char *
1793cpp_digraph2name (enum cpp_ttype type)
1794{
1795  return digraph_spellings[(int) type - (int) CPP_FIRST_DIGRAPH];
1796}
1797
1798/* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1799   already contain the enough space to hold the token's spelling.
1800   Returns a pointer to the character after the last character written.
1801   FORSTRING is true if this is to be the spelling after translation
1802   phase 1 (this is different for UCNs).
1803   FIXME: Would be nice if we didn't need the PFILE argument.  */
1804unsigned char *
1805cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
1806		 unsigned char *buffer, bool forstring)
1807{
1808  switch (TOKEN_SPELL (token))
1809    {
1810    case SPELL_OPERATOR:
1811      {
1812	const unsigned char *spelling;
1813	unsigned char c;
1814
1815	if (token->flags & DIGRAPH)
1816	  spelling = cpp_digraph2name (token->type);
1817	else if (token->flags & NAMED_OP)
1818	  goto spell_ident;
1819	else
1820	  spelling = TOKEN_NAME (token);
1821
1822	while ((c = *spelling++) != '\0')
1823	  *buffer++ = c;
1824      }
1825      break;
1826
1827    spell_ident:
1828    case SPELL_IDENT:
1829      if (forstring)
1830	{
1831	  memcpy (buffer, NODE_NAME (token->val.node.node),
1832		  NODE_LEN (token->val.node.node));
1833	  buffer += NODE_LEN (token->val.node.node);
1834	}
1835      else
1836	{
1837	  size_t i;
1838	  const unsigned char * name = NODE_NAME (token->val.node.node);
1839
1840	  for (i = 0; i < NODE_LEN (token->val.node.node); i++)
1841	    if (name[i] & ~0x7F)
1842	      {
1843		i += utf8_to_ucn (buffer, name + i) - 1;
1844		buffer += 10;
1845	      }
1846	    else
1847	      *buffer++ = NODE_NAME (token->val.node.node)[i];
1848	}
1849      break;
1850
1851    case SPELL_LITERAL:
1852      memcpy (buffer, token->val.str.text, token->val.str.len);
1853      buffer += token->val.str.len;
1854      break;
1855
1856    case SPELL_NONE:
1857      cpp_error (pfile, CPP_DL_ICE,
1858		 "unspellable token %s", TOKEN_NAME (token));
1859      break;
1860    }
1861
1862  return buffer;
1863}
1864
1865/* Returns TOKEN spelt as a null-terminated string.  The string is
1866   freed when the reader is destroyed.  Useful for diagnostics.  */
1867unsigned char *
1868cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
1869{
1870  unsigned int len = cpp_token_len (token) + 1;
1871  unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1872
1873  end = cpp_spell_token (pfile, token, start, false);
1874  end[0] = '\0';
1875
1876  return start;
1877}
1878
1879/* Returns a pointer to a string which spells the token defined by
1880   TYPE and FLAGS.  Used by C front ends, which really should move to
1881   using cpp_token_as_text.  */
1882const char *
1883cpp_type2name (enum cpp_ttype type, unsigned char flags)
1884{
1885  if (flags & DIGRAPH)
1886    return (const char *) cpp_digraph2name (type);
1887  else if (flags & NAMED_OP)
1888    return cpp_named_operator2name (type);
1889
1890  return (const char *) token_spellings[type].name;
1891}
1892
1893/* Writes the spelling of token to FP, without any preceding space.
1894   Separated from cpp_spell_token for efficiency - to avoid stdio
1895   double-buffering.  */
1896void
1897cpp_output_token (const cpp_token *token, FILE *fp)
1898{
1899  switch (TOKEN_SPELL (token))
1900    {
1901    case SPELL_OPERATOR:
1902      {
1903	const unsigned char *spelling;
1904	int c;
1905
1906	if (token->flags & DIGRAPH)
1907	  spelling = cpp_digraph2name (token->type);
1908	else if (token->flags & NAMED_OP)
1909	  goto spell_ident;
1910	else
1911	  spelling = TOKEN_NAME (token);
1912
1913	c = *spelling;
1914	do
1915	  putc (c, fp);
1916	while ((c = *++spelling) != '\0');
1917      }
1918      break;
1919
1920    spell_ident:
1921    case SPELL_IDENT:
1922      {
1923	size_t i;
1924	const unsigned char * name = NODE_NAME (token->val.node.node);
1925
1926	for (i = 0; i < NODE_LEN (token->val.node.node); i++)
1927	  if (name[i] & ~0x7F)
1928	    {
1929	      unsigned char buffer[10];
1930	      i += utf8_to_ucn (buffer, name + i) - 1;
1931	      fwrite (buffer, 1, 10, fp);
1932	    }
1933	  else
1934	    fputc (NODE_NAME (token->val.node.node)[i], fp);
1935      }
1936      break;
1937
1938    case SPELL_LITERAL:
1939      fwrite (token->val.str.text, 1, token->val.str.len, fp);
1940      break;
1941
1942    case SPELL_NONE:
1943      /* An error, most probably.  */
1944      break;
1945    }
1946}
1947
1948/* Compare two tokens.  */
1949int
1950_cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
1951{
1952  if (a->type == b->type && a->flags == b->flags)
1953    switch (TOKEN_SPELL (a))
1954      {
1955      default:			/* Keep compiler happy.  */
1956      case SPELL_OPERATOR:
1957	/* token_no is used to track where multiple consecutive ##
1958	   tokens were originally located.  */
1959	return (a->type != CPP_PASTE || a->val.token_no == b->val.token_no);
1960      case SPELL_NONE:
1961	return (a->type != CPP_MACRO_ARG
1962		|| a->val.macro_arg.arg_no == b->val.macro_arg.arg_no);
1963      case SPELL_IDENT:
1964	return a->val.node.node == b->val.node.node;
1965      case SPELL_LITERAL:
1966	return (a->val.str.len == b->val.str.len
1967		&& !memcmp (a->val.str.text, b->val.str.text,
1968			    a->val.str.len));
1969      }
1970
1971  return 0;
1972}
1973
1974/* Returns nonzero if a space should be inserted to avoid an
1975   accidental token paste for output.  For simplicity, it is
1976   conservative, and occasionally advises a space where one is not
1977   needed, e.g. "." and ".2".  */
1978int
1979cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
1980		 const cpp_token *token2)
1981{
1982  enum cpp_ttype a = token1->type, b = token2->type;
1983  cppchar_t c;
1984
1985  if (token1->flags & NAMED_OP)
1986    a = CPP_NAME;
1987  if (token2->flags & NAMED_OP)
1988    b = CPP_NAME;
1989
1990  c = EOF;
1991  if (token2->flags & DIGRAPH)
1992    c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1993  else if (token_spellings[b].category == SPELL_OPERATOR)
1994    c = token_spellings[b].name[0];
1995
1996  /* Quickly get everything that can paste with an '='.  */
1997  if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1998    return 1;
1999
2000  switch (a)
2001    {
2002    case CPP_GREATER:	return c == '>';
2003    case CPP_LESS:	return c == '<' || c == '%' || c == ':';
2004    case CPP_PLUS:	return c == '+';
2005    case CPP_MINUS:	return c == '-' || c == '>';
2006    case CPP_DIV:	return c == '/' || c == '*'; /* Comments.  */
2007    case CPP_MOD:	return c == ':' || c == '>';
2008    case CPP_AND:	return c == '&';
2009    case CPP_OR:	return c == '|';
2010    case CPP_COLON:	return c == ':' || c == '>';
2011    case CPP_DEREF:	return c == '*';
2012    case CPP_DOT:	return c == '.' || c == '%' || b == CPP_NUMBER;
2013    case CPP_HASH:	return c == '#' || c == '%'; /* Digraph form.  */
2014    case CPP_NAME:	return ((b == CPP_NUMBER
2015				 && name_p (pfile, &token2->val.str))
2016				|| b == CPP_NAME
2017				|| b == CPP_CHAR || b == CPP_STRING); /* L */
2018    case CPP_NUMBER:	return (b == CPP_NUMBER || b == CPP_NAME
2019				|| c == '.' || c == '+' || c == '-');
2020				      /* UCNs */
2021    case CPP_OTHER:	return ((token1->val.str.text[0] == '\\'
2022				 && b == CPP_NAME)
2023				|| (CPP_OPTION (pfile, objc)
2024				    && token1->val.str.text[0] == '@'
2025				    && (b == CPP_NAME || b == CPP_STRING)));
2026    default:		break;
2027    }
2028
2029  return 0;
2030}
2031
2032/* Output all the remaining tokens on the current line, and a newline
2033   character, to FP.  Leading whitespace is removed.  If there are
2034   macros, special token padding is not performed.  */
2035void
2036cpp_output_line (cpp_reader *pfile, FILE *fp)
2037{
2038  const cpp_token *token;
2039
2040  token = cpp_get_token (pfile);
2041  while (token->type != CPP_EOF)
2042    {
2043      cpp_output_token (token, fp);
2044      token = cpp_get_token (pfile);
2045      if (token->flags & PREV_WHITE)
2046	putc (' ', fp);
2047    }
2048
2049  putc ('\n', fp);
2050}
2051
2052/* Return a string representation of all the remaining tokens on the
2053   current line.  The result is allocated using xmalloc and must be
2054   freed by the caller.  */
2055unsigned char *
2056cpp_output_line_to_string (cpp_reader *pfile, const unsigned char *dir_name)
2057{
2058  const cpp_token *token;
2059  unsigned int out = dir_name ? ustrlen (dir_name) : 0;
2060  unsigned int alloced = 120 + out;
2061  unsigned char *result = (unsigned char *) xmalloc (alloced);
2062
2063  /* If DIR_NAME is empty, there are no initial contents.  */
2064  if (dir_name)
2065    {
2066      sprintf ((char *) result, "#%s ", dir_name);
2067      out += 2;
2068    }
2069
2070  token = cpp_get_token (pfile);
2071  while (token->type != CPP_EOF)
2072    {
2073      unsigned char *last;
2074      /* Include room for a possible space and the terminating nul.  */
2075      unsigned int len = cpp_token_len (token) + 2;
2076
2077      if (out + len > alloced)
2078	{
2079	  alloced *= 2;
2080	  if (out + len > alloced)
2081	    alloced = out + len;
2082	  result = (unsigned char *) xrealloc (result, alloced);
2083	}
2084
2085      last = cpp_spell_token (pfile, token, &result[out], 0);
2086      out = last - result;
2087
2088      token = cpp_get_token (pfile);
2089      if (token->flags & PREV_WHITE)
2090	result[out++] = ' ';
2091    }
2092
2093  result[out] = '\0';
2094  return result;
2095}
2096
2097/* Memory buffers.  Changing these three constants can have a dramatic
2098   effect on performance.  The values here are reasonable defaults,
2099   but might be tuned.  If you adjust them, be sure to test across a
2100   range of uses of cpplib, including heavy nested function-like macro
2101   expansion.  Also check the change in peak memory usage (NJAMD is a
2102   good tool for this).  */
2103#define MIN_BUFF_SIZE 8000
2104#define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
2105#define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
2106	(MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
2107
2108#if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
2109  #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
2110#endif
2111
2112/* Create a new allocation buffer.  Place the control block at the end
2113   of the buffer, so that buffer overflows will cause immediate chaos.  */
2114static _cpp_buff *
2115new_buff (size_t len)
2116{
2117  _cpp_buff *result;
2118  unsigned char *base;
2119
2120  if (len < MIN_BUFF_SIZE)
2121    len = MIN_BUFF_SIZE;
2122  len = CPP_ALIGN (len);
2123
2124  base = XNEWVEC (unsigned char, len + sizeof (_cpp_buff));
2125  result = (_cpp_buff *) (base + len);
2126  result->base = base;
2127  result->cur = base;
2128  result->limit = base + len;
2129  result->next = NULL;
2130  return result;
2131}
2132
2133/* Place a chain of unwanted allocation buffers on the free list.  */
2134void
2135_cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
2136{
2137  _cpp_buff *end = buff;
2138
2139  while (end->next)
2140    end = end->next;
2141  end->next = pfile->free_buffs;
2142  pfile->free_buffs = buff;
2143}
2144
2145/* Return a free buffer of size at least MIN_SIZE.  */
2146_cpp_buff *
2147_cpp_get_buff (cpp_reader *pfile, size_t min_size)
2148{
2149  _cpp_buff *result, **p;
2150
2151  for (p = &pfile->free_buffs;; p = &(*p)->next)
2152    {
2153      size_t size;
2154
2155      if (*p == NULL)
2156	return new_buff (min_size);
2157      result = *p;
2158      size = result->limit - result->base;
2159      /* Return a buffer that's big enough, but don't waste one that's
2160         way too big.  */
2161      if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
2162	break;
2163    }
2164
2165  *p = result->next;
2166  result->next = NULL;
2167  result->cur = result->base;
2168  return result;
2169}
2170
2171/* Creates a new buffer with enough space to hold the uncommitted
2172   remaining bytes of BUFF, and at least MIN_EXTRA more bytes.  Copies
2173   the excess bytes to the new buffer.  Chains the new buffer after
2174   BUFF, and returns the new buffer.  */
2175_cpp_buff *
2176_cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
2177{
2178  size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
2179  _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
2180
2181  buff->next = new_buff;
2182  memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
2183  return new_buff;
2184}
2185
2186/* Creates a new buffer with enough space to hold the uncommitted
2187   remaining bytes of the buffer pointed to by BUFF, and at least
2188   MIN_EXTRA more bytes.  Copies the excess bytes to the new buffer.
2189   Chains the new buffer before the buffer pointed to by BUFF, and
2190   updates the pointer to point to the new buffer.  */
2191void
2192_cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
2193{
2194  _cpp_buff *new_buff, *old_buff = *pbuff;
2195  size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
2196
2197  new_buff = _cpp_get_buff (pfile, size);
2198  memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
2199  new_buff->next = old_buff;
2200  *pbuff = new_buff;
2201}
2202
2203/* Free a chain of buffers starting at BUFF.  */
2204void
2205_cpp_free_buff (_cpp_buff *buff)
2206{
2207  _cpp_buff *next;
2208
2209  for (; buff; buff = next)
2210    {
2211      next = buff->next;
2212      free (buff->base);
2213    }
2214}
2215
2216/* Allocate permanent, unaligned storage of length LEN.  */
2217unsigned char *
2218_cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
2219{
2220  _cpp_buff *buff = pfile->u_buff;
2221  unsigned char *result = buff->cur;
2222
2223  if (len > (size_t) (buff->limit - result))
2224    {
2225      buff = _cpp_get_buff (pfile, len);
2226      buff->next = pfile->u_buff;
2227      pfile->u_buff = buff;
2228      result = buff->cur;
2229    }
2230
2231  buff->cur = result + len;
2232  return result;
2233}
2234
2235/* Allocate permanent, unaligned storage of length LEN from a_buff.
2236   That buffer is used for growing allocations when saving macro
2237   replacement lists in a #define, and when parsing an answer to an
2238   assertion in #assert, #unassert or #if (and therefore possibly
2239   whilst expanding macros).  It therefore must not be used by any
2240   code that they might call: specifically the lexer and the guts of
2241   the macro expander.
2242
2243   All existing other uses clearly fit this restriction: storing
2244   registered pragmas during initialization.  */
2245unsigned char *
2246_cpp_aligned_alloc (cpp_reader *pfile, size_t len)
2247{
2248  _cpp_buff *buff = pfile->a_buff;
2249  unsigned char *result = buff->cur;
2250
2251  if (len > (size_t) (buff->limit - result))
2252    {
2253      buff = _cpp_get_buff (pfile, len);
2254      buff->next = pfile->a_buff;
2255      pfile->a_buff = buff;
2256      result = buff->cur;
2257    }
2258
2259  buff->cur = result + len;
2260  return result;
2261}
2262
2263/* Say which field of TOK is in use.  */
2264
2265enum cpp_token_fld_kind
2266cpp_token_val_index (cpp_token *tok)
2267{
2268  switch (TOKEN_SPELL (tok))
2269    {
2270    case SPELL_IDENT:
2271      return CPP_TOKEN_FLD_NODE;
2272    case SPELL_LITERAL:
2273      return CPP_TOKEN_FLD_STR;
2274    case SPELL_OPERATOR:
2275      if (tok->type == CPP_PASTE)
2276	return CPP_TOKEN_FLD_TOKEN_NO;
2277      else
2278	return CPP_TOKEN_FLD_NONE;
2279    case SPELL_NONE:
2280      if (tok->type == CPP_MACRO_ARG)
2281	return CPP_TOKEN_FLD_ARG_NO;
2282      else if (tok->type == CPP_PADDING)
2283	return CPP_TOKEN_FLD_SOURCE;
2284      else if (tok->type == CPP_PRAGMA)
2285	return CPP_TOKEN_FLD_PRAGMA;
2286      /* else fall through */
2287    default:
2288      return CPP_TOKEN_FLD_NONE;
2289    }
2290}
2291