1169695Skan/* CPP Library - lexical analysis.
2169695Skan   Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
3169695Skan   Contributed by Per Bothner, 1994-95.
4169695Skan   Based on CCCP program by Paul Rubin, June 1986
5169695Skan   Adapted to ANSI C, Richard Stallman, Jan 1987
6169695Skan   Broken out to separate file, Zack Weinberg, Mar 2000
7169695Skan
8169695SkanThis program is free software; you can redistribute it and/or modify it
9169695Skanunder the terms of the GNU General Public License as published by the
10169695SkanFree Software Foundation; either version 2, or (at your option) any
11169695Skanlater version.
12169695Skan
13169695SkanThis program is distributed in the hope that it will be useful,
14169695Skanbut WITHOUT ANY WARRANTY; without even the implied warranty of
15169695SkanMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16169695SkanGNU General Public License for more details.
17169695Skan
18169695SkanYou should have received a copy of the GNU General Public License
19169695Skanalong with this program; if not, write to the Free Software
20169695SkanFoundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
21169695Skan
22169695Skan#include "config.h"
23169695Skan#include "system.h"
24169695Skan#include "cpplib.h"
25169695Skan#include "internal.h"
26169695Skan
27169695Skanenum spell_type
28169695Skan{
29169695Skan  SPELL_OPERATOR = 0,
30169695Skan  SPELL_IDENT,
31169695Skan  SPELL_LITERAL,
32169695Skan  SPELL_NONE
33169695Skan};
34169695Skan
35169695Skanstruct token_spelling
36169695Skan{
37169695Skan  enum spell_type category;
38169695Skan  const unsigned char *name;
39169695Skan};
40169695Skan
41169695Skanstatic const unsigned char *const digraph_spellings[] =
42169695Skan{ U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
43169695Skan
44169695Skan#define OP(e, s) { SPELL_OPERATOR, U s  },
45169695Skan#define TK(e, s) { SPELL_ ## s,    U #e },
46169695Skanstatic const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
47169695Skan#undef OP
48169695Skan#undef TK
49169695Skan
50169695Skan#define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
51169695Skan#define TOKEN_NAME(token) (token_spellings[(token)->type].name)
52169695Skan
53169695Skanstatic void add_line_note (cpp_buffer *, const uchar *, unsigned int);
54169695Skanstatic int skip_line_comment (cpp_reader *);
55169695Skanstatic void skip_whitespace (cpp_reader *, cppchar_t);
56169695Skanstatic void lex_string (cpp_reader *, cpp_token *, const uchar *);
57169695Skanstatic void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
58169695Skanstatic void create_literal (cpp_reader *, cpp_token *, const uchar *,
59169695Skan			    unsigned int, enum cpp_ttype);
60169695Skanstatic bool warn_in_comment (cpp_reader *, _cpp_line_note *);
61169695Skanstatic int name_p (cpp_reader *, const cpp_string *);
62169695Skanstatic tokenrun *next_tokenrun (tokenrun *);
63169695Skan
64169695Skanstatic _cpp_buff *new_buff (size_t);
65169695Skan
66169695Skan
67169695Skan/* Utility routine:
68169695Skan
69169695Skan   Compares, the token TOKEN to the NUL-terminated string STRING.
70169695Skan   TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
71169695Skanint
72169695Skancpp_ideq (const cpp_token *token, const char *string)
73169695Skan{
74169695Skan  if (token->type != CPP_NAME)
75169695Skan    return 0;
76169695Skan
77169695Skan  return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
78169695Skan}
79169695Skan
80169695Skan/* Record a note TYPE at byte POS into the current cleaned logical
81169695Skan   line.  */
82169695Skanstatic void
83169695Skanadd_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
84169695Skan{
85169695Skan  if (buffer->notes_used == buffer->notes_cap)
86169695Skan    {
87169695Skan      buffer->notes_cap = buffer->notes_cap * 2 + 200;
88169695Skan      buffer->notes = XRESIZEVEC (_cpp_line_note, buffer->notes,
89169695Skan                                  buffer->notes_cap);
90169695Skan    }
91169695Skan
92169695Skan  buffer->notes[buffer->notes_used].pos = pos;
93169695Skan  buffer->notes[buffer->notes_used].type = type;
94169695Skan  buffer->notes_used++;
95169695Skan}
96169695Skan
97169695Skan/* Returns with a logical line that contains no escaped newlines or
98169695Skan   trigraphs.  This is a time-critical inner loop.  */
99169695Skanvoid
100169695Skan_cpp_clean_line (cpp_reader *pfile)
101169695Skan{
102169695Skan  cpp_buffer *buffer;
103169695Skan  const uchar *s;
104169695Skan  uchar c, *d, *p;
105169695Skan
106169695Skan  buffer = pfile->buffer;
107169695Skan  buffer->cur_note = buffer->notes_used = 0;
108169695Skan  buffer->cur = buffer->line_base = buffer->next_line;
109169695Skan  buffer->need_line = false;
110169695Skan  s = buffer->next_line - 1;
111169695Skan
112169695Skan  if (!buffer->from_stage3)
113169695Skan    {
114259268Spfg      const uchar *pbackslash = NULL;
115259268Spfg
116169695Skan      /* Short circuit for the common case of an un-escaped line with
117169695Skan	 no trigraphs.  The primary win here is by not writing any
118169695Skan	 data back to memory until we have to.  */
119169695Skan      for (;;)
120169695Skan	{
121169695Skan	  c = *++s;
122259268Spfg	  if (__builtin_expect (c == '\n', false)
123259268Spfg	      || __builtin_expect (c == '\r', false))
124169695Skan	    {
125169695Skan	      d = (uchar *) s;
126169695Skan
127259268Spfg	      if (__builtin_expect (s == buffer->rlimit, false))
128169695Skan		goto done;
129169695Skan
130169695Skan	      /* DOS line ending? */
131259268Spfg	      if (__builtin_expect (c == '\r', false)
132259268Spfg		  && s[1] == '\n')
133259268Spfg		{
134259268Spfg		  s++;
135259268Spfg		  if (s == buffer->rlimit)
136259268Spfg		    goto done;
137259268Spfg		}
138169695Skan
139259268Spfg	      if (__builtin_expect (pbackslash == NULL, true))
140169695Skan		goto done;
141169695Skan
142259268Spfg	      /* Check for escaped newline.  */
143169695Skan	      p = d;
144259268Spfg	      while (is_nvspace (p[-1]))
145169695Skan		p--;
146259268Spfg	      if (p - 1 != pbackslash)
147169695Skan		goto done;
148169695Skan
149169695Skan	      /* Have an escaped newline; process it and proceed to
150169695Skan		 the slow path.  */
151169695Skan	      add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
152169695Skan	      d = p - 2;
153169695Skan	      buffer->next_line = p - 1;
154169695Skan	      break;
155169695Skan	    }
156259268Spfg	  if (__builtin_expect (c == '\\', false))
157259268Spfg	    pbackslash = s;
158259268Spfg	  else if (__builtin_expect (c == '?', false)
159259268Spfg		   && __builtin_expect (s[1] == '?', false)
160259268Spfg		   && _cpp_trigraph_map[s[2]])
161169695Skan	    {
162169695Skan	      /* Have a trigraph.  We may or may not have to convert
163169695Skan		 it.  Add a line note regardless, for -Wtrigraphs.  */
164169695Skan	      add_line_note (buffer, s, s[2]);
165169695Skan	      if (CPP_OPTION (pfile, trigraphs))
166169695Skan		{
167169695Skan		  /* We do, and that means we have to switch to the
168169695Skan		     slow path.  */
169169695Skan		  d = (uchar *) s;
170169695Skan		  *d = _cpp_trigraph_map[s[2]];
171169695Skan		  s += 2;
172169695Skan		  break;
173169695Skan		}
174169695Skan	    }
175169695Skan	}
176169695Skan
177169695Skan
178169695Skan      for (;;)
179169695Skan	{
180169695Skan	  c = *++s;
181169695Skan	  *++d = c;
182169695Skan
183169695Skan	  if (c == '\n' || c == '\r')
184169695Skan	    {
185169695Skan		  /* Handle DOS line endings.  */
186169695Skan	      if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
187169695Skan		s++;
188169695Skan	      if (s == buffer->rlimit)
189169695Skan		break;
190169695Skan
191169695Skan	      /* Escaped?  */
192169695Skan	      p = d;
193169695Skan	      while (p != buffer->next_line && is_nvspace (p[-1]))
194169695Skan		p--;
195169695Skan	      if (p == buffer->next_line || p[-1] != '\\')
196169695Skan		break;
197169695Skan
198169695Skan	      add_line_note (buffer, p - 1, p != d ? ' ': '\\');
199169695Skan	      d = p - 2;
200169695Skan	      buffer->next_line = p - 1;
201169695Skan	    }
202169695Skan	  else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
203169695Skan	    {
204169695Skan	      /* Add a note regardless, for the benefit of -Wtrigraphs.  */
205169695Skan	      add_line_note (buffer, d, s[2]);
206169695Skan	      if (CPP_OPTION (pfile, trigraphs))
207169695Skan		{
208169695Skan		  *d = _cpp_trigraph_map[s[2]];
209169695Skan		  s += 2;
210169695Skan		}
211169695Skan	    }
212169695Skan	}
213169695Skan    }
214169695Skan  else
215169695Skan    {
216169695Skan      do
217169695Skan	s++;
218169695Skan      while (*s != '\n' && *s != '\r');
219169695Skan      d = (uchar *) s;
220169695Skan
221169695Skan      /* Handle DOS line endings.  */
222169695Skan      if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
223169695Skan	s++;
224169695Skan    }
225169695Skan
226169695Skan done:
227169695Skan  *d = '\n';
228169695Skan  /* A sentinel note that should never be processed.  */
229169695Skan  add_line_note (buffer, d + 1, '\n');
230169695Skan  buffer->next_line = s + 1;
231169695Skan}
232169695Skan
233169695Skan/* Return true if the trigraph indicated by NOTE should be warned
234169695Skan   about in a comment.  */
235169695Skanstatic bool
236169695Skanwarn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
237169695Skan{
238169695Skan  const uchar *p;
239169695Skan
240169695Skan  /* Within comments we don't warn about trigraphs, unless the
241169695Skan     trigraph forms an escaped newline, as that may change
242169695Skan     behavior.  */
243169695Skan  if (note->type != '/')
244169695Skan    return false;
245169695Skan
246169695Skan  /* If -trigraphs, then this was an escaped newline iff the next note
247169695Skan     is coincident.  */
248169695Skan  if (CPP_OPTION (pfile, trigraphs))
249169695Skan    return note[1].pos == note->pos;
250169695Skan
251169695Skan  /* Otherwise, see if this forms an escaped newline.  */
252169695Skan  p = note->pos + 3;
253169695Skan  while (is_nvspace (*p))
254169695Skan    p++;
255169695Skan
256169695Skan  /* There might have been escaped newlines between the trigraph and the
257169695Skan     newline we found.  Hence the position test.  */
258169695Skan  return (*p == '\n' && p < note[1].pos);
259169695Skan}
260169695Skan
261169695Skan/* Process the notes created by add_line_note as far as the current
262169695Skan   location.  */
263169695Skanvoid
264169695Skan_cpp_process_line_notes (cpp_reader *pfile, int in_comment)
265169695Skan{
266169695Skan  cpp_buffer *buffer = pfile->buffer;
267169695Skan
268169695Skan  for (;;)
269169695Skan    {
270169695Skan      _cpp_line_note *note = &buffer->notes[buffer->cur_note];
271169695Skan      unsigned int col;
272169695Skan
273169695Skan      if (note->pos > buffer->cur)
274169695Skan	break;
275169695Skan
276169695Skan      buffer->cur_note++;
277169695Skan      col = CPP_BUF_COLUMN (buffer, note->pos + 1);
278169695Skan
279169695Skan      if (note->type == '\\' || note->type == ' ')
280169695Skan	{
281169695Skan	  if (note->type == ' ' && !in_comment)
282169695Skan	    cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
283169695Skan				 "backslash and newline separated by space");
284169695Skan
285169695Skan	  if (buffer->next_line > buffer->rlimit)
286169695Skan	    {
287169695Skan	      cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col,
288169695Skan				   "backslash-newline at end of file");
289169695Skan	      /* Prevent "no newline at end of file" warning.  */
290169695Skan	      buffer->next_line = buffer->rlimit;
291169695Skan	    }
292169695Skan
293169695Skan	  buffer->line_base = note->pos;
294169695Skan	  CPP_INCREMENT_LINE (pfile, 0);
295169695Skan	}
296169695Skan      else if (_cpp_trigraph_map[note->type])
297169695Skan	{
298169695Skan	  if (CPP_OPTION (pfile, warn_trigraphs)
299169695Skan	      && (!in_comment || warn_in_comment (pfile, note)))
300169695Skan	    {
301169695Skan	      if (CPP_OPTION (pfile, trigraphs))
302169695Skan		cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
303169695Skan				     "trigraph ??%c converted to %c",
304169695Skan				     note->type,
305169695Skan				     (int) _cpp_trigraph_map[note->type]);
306169695Skan	      else
307169695Skan		{
308169695Skan		  cpp_error_with_line
309169695Skan		    (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
310169695Skan		     "trigraph ??%c ignored, use -trigraphs to enable",
311169695Skan		     note->type);
312169695Skan		}
313169695Skan	    }
314169695Skan	}
315169695Skan      else
316169695Skan	abort ();
317169695Skan    }
318169695Skan}
319169695Skan
320169695Skan/* Skip a C-style block comment.  We find the end of the comment by
321169695Skan   seeing if an asterisk is before every '/' we encounter.  Returns
322169695Skan   nonzero if comment terminated by EOF, zero otherwise.
323169695Skan
324169695Skan   Buffer->cur points to the initial asterisk of the comment.  */
325169695Skanbool
326169695Skan_cpp_skip_block_comment (cpp_reader *pfile)
327169695Skan{
328169695Skan  cpp_buffer *buffer = pfile->buffer;
329169695Skan  const uchar *cur = buffer->cur;
330169695Skan  uchar c;
331169695Skan
332169695Skan  cur++;
333169695Skan  if (*cur == '/')
334169695Skan    cur++;
335169695Skan
336169695Skan  for (;;)
337169695Skan    {
338169695Skan      /* People like decorating comments with '*', so check for '/'
339169695Skan	 instead for efficiency.  */
340169695Skan      c = *cur++;
341169695Skan
342169695Skan      if (c == '/')
343169695Skan	{
344169695Skan	  if (cur[-2] == '*')
345169695Skan	    break;
346169695Skan
347169695Skan	  /* Warn about potential nested comments, but not if the '/'
348169695Skan	     comes immediately before the true comment delimiter.
349169695Skan	     Don't bother to get it right across escaped newlines.  */
350169695Skan	  if (CPP_OPTION (pfile, warn_comments)
351169695Skan	      && cur[0] == '*' && cur[1] != '/')
352169695Skan	    {
353169695Skan	      buffer->cur = cur;
354169695Skan	      cpp_error_with_line (pfile, CPP_DL_WARNING,
355169695Skan				   pfile->line_table->highest_line, CPP_BUF_COL (buffer),
356169695Skan				   "\"/*\" within comment");
357169695Skan	    }
358169695Skan	}
359169695Skan      else if (c == '\n')
360169695Skan	{
361169695Skan	  unsigned int cols;
362169695Skan	  buffer->cur = cur - 1;
363169695Skan	  _cpp_process_line_notes (pfile, true);
364169695Skan	  if (buffer->next_line >= buffer->rlimit)
365169695Skan	    return true;
366169695Skan	  _cpp_clean_line (pfile);
367169695Skan
368169695Skan	  cols = buffer->next_line - buffer->line_base;
369169695Skan	  CPP_INCREMENT_LINE (pfile, cols);
370169695Skan
371169695Skan	  cur = buffer->cur;
372169695Skan	}
373169695Skan    }
374169695Skan
375169695Skan  buffer->cur = cur;
376169695Skan  _cpp_process_line_notes (pfile, true);
377169695Skan  return false;
378169695Skan}
379169695Skan
380169695Skan/* Skip a C++ line comment, leaving buffer->cur pointing to the
381169695Skan   terminating newline.  Handles escaped newlines.  Returns nonzero
382169695Skan   if a multiline comment.  */
383169695Skanstatic int
384169695Skanskip_line_comment (cpp_reader *pfile)
385169695Skan{
386169695Skan  cpp_buffer *buffer = pfile->buffer;
387169695Skan  unsigned int orig_line = pfile->line_table->highest_line;
388169695Skan
389169695Skan  while (*buffer->cur != '\n')
390169695Skan    buffer->cur++;
391169695Skan
392169695Skan  _cpp_process_line_notes (pfile, true);
393169695Skan  return orig_line != pfile->line_table->highest_line;
394169695Skan}
395169695Skan
396169695Skan/* Skips whitespace, saving the next non-whitespace character.  */
397169695Skanstatic void
398169695Skanskip_whitespace (cpp_reader *pfile, cppchar_t c)
399169695Skan{
400169695Skan  cpp_buffer *buffer = pfile->buffer;
401169695Skan  bool saw_NUL = false;
402169695Skan
403169695Skan  do
404169695Skan    {
405169695Skan      /* Horizontal space always OK.  */
406169695Skan      if (c == ' ' || c == '\t')
407169695Skan	;
408169695Skan      /* Just \f \v or \0 left.  */
409169695Skan      else if (c == '\0')
410169695Skan	saw_NUL = true;
411169695Skan      else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
412169695Skan	cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
413169695Skan			     CPP_BUF_COL (buffer),
414169695Skan			     "%s in preprocessing directive",
415169695Skan			     c == '\f' ? "form feed" : "vertical tab");
416169695Skan
417169695Skan      c = *buffer->cur++;
418169695Skan    }
419169695Skan  /* We only want non-vertical space, i.e. ' ' \t \f \v \0.  */
420169695Skan  while (is_nvspace (c));
421169695Skan
422169695Skan  if (saw_NUL)
423169695Skan    cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored");
424169695Skan
425169695Skan  buffer->cur--;
426169695Skan}
427169695Skan
428169695Skan/* See if the characters of a number token are valid in a name (no
429169695Skan   '.', '+' or '-').  */
430169695Skanstatic int
431169695Skanname_p (cpp_reader *pfile, const cpp_string *string)
432169695Skan{
433169695Skan  unsigned int i;
434169695Skan
435169695Skan  for (i = 0; i < string->len; i++)
436169695Skan    if (!is_idchar (string->text[i]))
437169695Skan      return 0;
438169695Skan
439169695Skan  return 1;
440169695Skan}
441169695Skan
442169695Skan/* After parsing an identifier or other sequence, produce a warning about
443169695Skan   sequences not in NFC/NFKC.  */
444169695Skanstatic void
445169695Skanwarn_about_normalization (cpp_reader *pfile,
446169695Skan			  const cpp_token *token,
447169695Skan			  const struct normalize_state *s)
448169695Skan{
449169695Skan  if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s)
450169695Skan      && !pfile->state.skipping)
451169695Skan    {
452169695Skan      /* Make sure that the token is printed using UCNs, even
453169695Skan	 if we'd otherwise happily print UTF-8.  */
454169695Skan      unsigned char *buf = XNEWVEC (unsigned char, cpp_token_len (token));
455169695Skan      size_t sz;
456169695Skan
457169695Skan      sz = cpp_spell_token (pfile, token, buf, false) - buf;
458169695Skan      if (NORMALIZE_STATE_RESULT (s) == normalized_C)
459169695Skan	cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
460169695Skan			     "`%.*s' is not in NFKC", (int) sz, buf);
461169695Skan      else
462169695Skan	cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
463169695Skan			     "`%.*s' is not in NFC", (int) sz, buf);
464169695Skan    }
465169695Skan}
466169695Skan
467169695Skan/* Returns TRUE if the sequence starting at buffer->cur is invalid in
468169695Skan   an identifier.  FIRST is TRUE if this starts an identifier.  */
469169695Skanstatic bool
470169695Skanforms_identifier_p (cpp_reader *pfile, int first,
471169695Skan		    struct normalize_state *state)
472169695Skan{
473169695Skan  cpp_buffer *buffer = pfile->buffer;
474169695Skan
475169695Skan  if (*buffer->cur == '$')
476169695Skan    {
477169695Skan      if (!CPP_OPTION (pfile, dollars_in_ident))
478169695Skan	return false;
479169695Skan
480169695Skan      buffer->cur++;
481169695Skan      if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
482169695Skan	{
483169695Skan	  CPP_OPTION (pfile, warn_dollars) = 0;
484169695Skan	  cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
485169695Skan	}
486169695Skan
487169695Skan      return true;
488169695Skan    }
489169695Skan
490169695Skan  /* Is this a syntactically valid UCN?  */
491169695Skan  if (CPP_OPTION (pfile, extended_identifiers)
492169695Skan      && *buffer->cur == '\\'
493169695Skan      && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
494169695Skan    {
495169695Skan      buffer->cur += 2;
496169695Skan      if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
497169695Skan			  state))
498169695Skan	return true;
499169695Skan      buffer->cur -= 2;
500169695Skan    }
501169695Skan
502169695Skan  return false;
503169695Skan}
504169695Skan
505169695Skan/* Lex an identifier starting at BUFFER->CUR - 1.  */
506169695Skanstatic cpp_hashnode *
507169695Skanlex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
508169695Skan		struct normalize_state *nst)
509169695Skan{
510169695Skan  cpp_hashnode *result;
511169695Skan  const uchar *cur;
512169695Skan  unsigned int len;
513169695Skan  unsigned int hash = HT_HASHSTEP (0, *base);
514169695Skan
515169695Skan  cur = pfile->buffer->cur;
516169695Skan  if (! starts_ucn)
517169695Skan    while (ISIDNUM (*cur))
518169695Skan      {
519169695Skan	hash = HT_HASHSTEP (hash, *cur);
520169695Skan	cur++;
521169695Skan      }
522169695Skan  pfile->buffer->cur = cur;
523169695Skan  if (starts_ucn || forms_identifier_p (pfile, false, nst))
524169695Skan    {
525169695Skan      /* Slower version for identifiers containing UCNs (or $).  */
526169695Skan      do {
527169695Skan	while (ISIDNUM (*pfile->buffer->cur))
528169695Skan	  {
529169695Skan	    pfile->buffer->cur++;
530169695Skan	    NORMALIZE_STATE_UPDATE_IDNUM (nst);
531169695Skan	  }
532169695Skan      } while (forms_identifier_p (pfile, false, nst));
533169695Skan      result = _cpp_interpret_identifier (pfile, base,
534169695Skan					  pfile->buffer->cur - base);
535169695Skan    }
536169695Skan  else
537169695Skan    {
538169695Skan      len = cur - base;
539169695Skan      hash = HT_HASHFINISH (hash, len);
540169695Skan
541169695Skan      result = (cpp_hashnode *)
542169695Skan	ht_lookup_with_hash (pfile->hash_table, base, len, hash, HT_ALLOC);
543169695Skan    }
544169695Skan
545169695Skan  /* Rarely, identifiers require diagnostics when lexed.  */
546169695Skan  if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
547169695Skan			&& !pfile->state.skipping, 0))
548169695Skan    {
549169695Skan      /* It is allowed to poison the same identifier twice.  */
550169695Skan      if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
551169695Skan	cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
552169695Skan		   NODE_NAME (result));
553169695Skan
554169695Skan      /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
555169695Skan	 replacement list of a variadic macro.  */
556169695Skan      if (result == pfile->spec_nodes.n__VA_ARGS__
557169695Skan	  && !pfile->state.va_args_ok)
558169695Skan	cpp_error (pfile, CPP_DL_PEDWARN,
559169695Skan		   "__VA_ARGS__ can only appear in the expansion"
560169695Skan		   " of a C99 variadic macro");
561169695Skan    }
562169695Skan
563169695Skan  return result;
564169695Skan}
565169695Skan
566169695Skan/* Lex a number to NUMBER starting at BUFFER->CUR - 1.  */
567169695Skanstatic void
568169695Skanlex_number (cpp_reader *pfile, cpp_string *number,
569169695Skan	    struct normalize_state *nst)
570169695Skan{
571169695Skan  const uchar *cur;
572169695Skan  const uchar *base;
573169695Skan  uchar *dest;
574169695Skan
575169695Skan  base = pfile->buffer->cur - 1;
576169695Skan  do
577169695Skan    {
578169695Skan      cur = pfile->buffer->cur;
579169695Skan
580169695Skan      /* N.B. ISIDNUM does not include $.  */
581169695Skan      while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
582169695Skan	{
583169695Skan	  cur++;
584169695Skan	  NORMALIZE_STATE_UPDATE_IDNUM (nst);
585169695Skan	}
586169695Skan
587169695Skan      pfile->buffer->cur = cur;
588169695Skan    }
589169695Skan  while (forms_identifier_p (pfile, false, nst));
590169695Skan
591169695Skan  number->len = cur - base;
592169695Skan  dest = _cpp_unaligned_alloc (pfile, number->len + 1);
593169695Skan  memcpy (dest, base, number->len);
594169695Skan  dest[number->len] = '\0';
595169695Skan  number->text = dest;
596169695Skan}
597169695Skan
598169695Skan/* Create a token of type TYPE with a literal spelling.  */
599169695Skanstatic void
600169695Skancreate_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
601169695Skan		unsigned int len, enum cpp_ttype type)
602169695Skan{
603169695Skan  uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
604169695Skan
605169695Skan  memcpy (dest, base, len);
606169695Skan  dest[len] = '\0';
607169695Skan  token->type = type;
608169695Skan  token->val.str.len = len;
609169695Skan  token->val.str.text = dest;
610169695Skan}
611169695Skan
612169695Skan/* Lexes a string, character constant, or angle-bracketed header file
613169695Skan   name.  The stored string contains the spelling, including opening
614169695Skan   quote and leading any leading 'L'.  It returns the type of the
615169695Skan   literal, or CPP_OTHER if it was not properly terminated.
616169695Skan
617169695Skan   The spelling is NUL-terminated, but it is not guaranteed that this
618169695Skan   is the first NUL since embedded NULs are preserved.  */
619169695Skanstatic void
620169695Skanlex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
621169695Skan{
622169695Skan  bool saw_NUL = false;
623169695Skan  const uchar *cur;
624169695Skan  cppchar_t terminator;
625169695Skan  enum cpp_ttype type;
626169695Skan
627169695Skan  cur = base;
628169695Skan  terminator = *cur++;
629169695Skan  if (terminator == 'L')
630169695Skan    terminator = *cur++;
631169695Skan  if (terminator == '\"')
632169695Skan    type = *base == 'L' ? CPP_WSTRING: CPP_STRING;
633169695Skan  else if (terminator == '\'')
634169695Skan    type = *base == 'L' ? CPP_WCHAR: CPP_CHAR;
635169695Skan  else
636169695Skan    terminator = '>', type = CPP_HEADER_NAME;
637169695Skan
638169695Skan  for (;;)
639169695Skan    {
640169695Skan      cppchar_t c = *cur++;
641169695Skan
642169695Skan      /* In #include-style directives, terminators are not escapable.  */
643169695Skan      if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
644169695Skan	cur++;
645169695Skan      else if (c == terminator)
646169695Skan	break;
647169695Skan      else if (c == '\n')
648169695Skan	{
649169695Skan	  cur--;
650169695Skan	  type = CPP_OTHER;
651169695Skan	  break;
652169695Skan	}
653169695Skan      else if (c == '\0')
654169695Skan	saw_NUL = true;
655169695Skan    }
656169695Skan
657169695Skan  if (saw_NUL && !pfile->state.skipping)
658169695Skan    cpp_error (pfile, CPP_DL_WARNING,
659169695Skan	       "null character(s) preserved in literal");
660169695Skan
661260573Spfg  /* APPLE LOCAL begin #error with unmatched quotes 5607574 */
662260573Spfg  if (type == CPP_OTHER
663260573Spfg      && CPP_OPTION (pfile, lang) != CLK_ASM
664260573Spfg      && !pfile->state.in_diagnostic
665260573Spfg      && !pfile->state.skipping)
666260573Spfg  /* APPLE LOCAL end #error with unmatched quotes 5607574 */
667169695Skan    cpp_error (pfile, CPP_DL_PEDWARN, "missing terminating %c character",
668169695Skan	       (int) terminator);
669169695Skan
670169695Skan  pfile->buffer->cur = cur;
671169695Skan  create_literal (pfile, token, base, cur - base, type);
672169695Skan}
673169695Skan
674169695Skan/* The stored comment includes the comment start and any terminator.  */
675169695Skanstatic void
676169695Skansave_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
677169695Skan	      cppchar_t type)
678169695Skan{
679169695Skan  unsigned char *buffer;
680169695Skan  unsigned int len, clen;
681169695Skan
682169695Skan  len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
683169695Skan
684169695Skan  /* C++ comments probably (not definitely) have moved past a new
685169695Skan     line, which we don't want to save in the comment.  */
686169695Skan  if (is_vspace (pfile->buffer->cur[-1]))
687169695Skan    len--;
688169695Skan
689169695Skan  /* If we are currently in a directive, then we need to store all
690169695Skan     C++ comments as C comments internally, and so we need to
691169695Skan     allocate a little extra space in that case.
692169695Skan
693169695Skan     Note that the only time we encounter a directive here is
694169695Skan     when we are saving comments in a "#define".  */
695169695Skan  clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
696169695Skan
697169695Skan  buffer = _cpp_unaligned_alloc (pfile, clen);
698169695Skan
699169695Skan  token->type = CPP_COMMENT;
700169695Skan  token->val.str.len = clen;
701169695Skan  token->val.str.text = buffer;
702169695Skan
703169695Skan  buffer[0] = '/';
704169695Skan  memcpy (buffer + 1, from, len - 1);
705169695Skan
706169695Skan  /* Finish conversion to a C comment, if necessary.  */
707169695Skan  if (pfile->state.in_directive && type == '/')
708169695Skan    {
709169695Skan      buffer[1] = '*';
710169695Skan      buffer[clen - 2] = '*';
711169695Skan      buffer[clen - 1] = '/';
712169695Skan    }
713169695Skan}
714169695Skan
715169695Skan/* Allocate COUNT tokens for RUN.  */
716169695Skanvoid
717169695Skan_cpp_init_tokenrun (tokenrun *run, unsigned int count)
718169695Skan{
719169695Skan  run->base = XNEWVEC (cpp_token, count);
720169695Skan  run->limit = run->base + count;
721169695Skan  run->next = NULL;
722169695Skan}
723169695Skan
724169695Skan/* Returns the next tokenrun, or creates one if there is none.  */
725169695Skanstatic tokenrun *
726169695Skannext_tokenrun (tokenrun *run)
727169695Skan{
728169695Skan  if (run->next == NULL)
729169695Skan    {
730169695Skan      run->next = XNEW (tokenrun);
731169695Skan      run->next->prev = run;
732169695Skan      _cpp_init_tokenrun (run->next, 250);
733169695Skan    }
734169695Skan
735169695Skan  return run->next;
736169695Skan}
737169695Skan
738169695Skan/* Allocate a single token that is invalidated at the same time as the
739169695Skan   rest of the tokens on the line.  Has its line and col set to the
740169695Skan   same as the last lexed token, so that diagnostics appear in the
741169695Skan   right place.  */
742169695Skancpp_token *
743169695Skan_cpp_temp_token (cpp_reader *pfile)
744169695Skan{
745169695Skan  cpp_token *old, *result;
746169695Skan
747169695Skan  old = pfile->cur_token - 1;
748169695Skan  if (pfile->cur_token == pfile->cur_run->limit)
749169695Skan    {
750169695Skan      pfile->cur_run = next_tokenrun (pfile->cur_run);
751169695Skan      pfile->cur_token = pfile->cur_run->base;
752169695Skan    }
753169695Skan
754169695Skan  result = pfile->cur_token++;
755169695Skan  result->src_loc = old->src_loc;
756169695Skan  return result;
757169695Skan}
758169695Skan
759169695Skan/* Lex a token into RESULT (external interface).  Takes care of issues
760169695Skan   like directive handling, token lookahead, multiple include
761169695Skan   optimization and skipping.  */
762169695Skanconst cpp_token *
763169695Skan_cpp_lex_token (cpp_reader *pfile)
764169695Skan{
765169695Skan  cpp_token *result;
766169695Skan
767169695Skan  for (;;)
768169695Skan    {
769169695Skan      if (pfile->cur_token == pfile->cur_run->limit)
770169695Skan	{
771169695Skan	  pfile->cur_run = next_tokenrun (pfile->cur_run);
772169695Skan	  pfile->cur_token = pfile->cur_run->base;
773169695Skan	}
774259890Spfg      /* We assume that the current token is somewhere in the current
775259890Spfg	 run.  */
776259890Spfg      if (pfile->cur_token < pfile->cur_run->base
777259890Spfg	  || pfile->cur_token >= pfile->cur_run->limit)
778259890Spfg	abort ();
779169695Skan
780169695Skan      if (pfile->lookaheads)
781169695Skan	{
782169695Skan	  pfile->lookaheads--;
783169695Skan	  result = pfile->cur_token++;
784169695Skan	}
785169695Skan      else
786169695Skan	result = _cpp_lex_direct (pfile);
787169695Skan
788169695Skan      if (result->flags & BOL)
789169695Skan	{
790169695Skan	  /* Is this a directive.  If _cpp_handle_directive returns
791169695Skan	     false, it is an assembler #.  */
792169695Skan	  if (result->type == CPP_HASH
793169695Skan	      /* 6.10.3 p 11: Directives in a list of macro arguments
794169695Skan		 gives undefined behavior.  This implementation
795169695Skan		 handles the directive as normal.  */
796169695Skan	      && pfile->state.parsing_args != 1)
797169695Skan	    {
798169695Skan	      if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
799169695Skan		{
800169695Skan		  if (pfile->directive_result.type == CPP_PADDING)
801169695Skan		    continue;
802169695Skan		  result = &pfile->directive_result;
803169695Skan		}
804169695Skan	    }
805169695Skan	  else if (pfile->state.in_deferred_pragma)
806169695Skan	    result = &pfile->directive_result;
807169695Skan
808169695Skan	  if (pfile->cb.line_change && !pfile->state.skipping)
809169695Skan	    pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
810169695Skan	}
811169695Skan
812169695Skan      /* We don't skip tokens in directives.  */
813169695Skan      if (pfile->state.in_directive || pfile->state.in_deferred_pragma)
814169695Skan	break;
815169695Skan
816169695Skan      /* Outside a directive, invalidate controlling macros.  At file
817169695Skan	 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
818169695Skan	 get here and MI optimization works.  */
819169695Skan      pfile->mi_valid = false;
820169695Skan
821169695Skan      if (!pfile->state.skipping || result->type == CPP_EOF)
822169695Skan	break;
823169695Skan    }
824169695Skan
825169695Skan  return result;
826169695Skan}
827169695Skan
828169695Skan/* Returns true if a fresh line has been loaded.  */
829169695Skanbool
830169695Skan_cpp_get_fresh_line (cpp_reader *pfile)
831169695Skan{
832169695Skan  int return_at_eof;
833169695Skan
834169695Skan  /* We can't get a new line until we leave the current directive.  */
835169695Skan  if (pfile->state.in_directive)
836169695Skan    return false;
837169695Skan
838169695Skan  for (;;)
839169695Skan    {
840169695Skan      cpp_buffer *buffer = pfile->buffer;
841169695Skan
842169695Skan      if (!buffer->need_line)
843169695Skan	return true;
844169695Skan
845169695Skan      if (buffer->next_line < buffer->rlimit)
846169695Skan	{
847169695Skan	  _cpp_clean_line (pfile);
848169695Skan	  return true;
849169695Skan	}
850169695Skan
851169695Skan      /* First, get out of parsing arguments state.  */
852169695Skan      if (pfile->state.parsing_args)
853169695Skan	return false;
854169695Skan
855169695Skan      /* End of buffer.  Non-empty files should end in a newline.  */
856169695Skan      if (buffer->buf != buffer->rlimit
857169695Skan	  && buffer->next_line > buffer->rlimit
858169695Skan	  && !buffer->from_stage3)
859169695Skan	{
860259890Spfg	  /* Clip to buffer size.  */
861169695Skan	  buffer->next_line = buffer->rlimit;
862259890Spfg	  /* APPLE LOCAL begin suppress no newline warning.  */
863259890Spfg	  if ( CPP_OPTION (pfile, warn_newline_at_eof))
864259890Spfg	    {
865259890Spfg	      cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
866259890Spfg				   CPP_BUF_COLUMN (buffer, buffer->cur),
867259890Spfg				   "no newline at end of file");
868259890Spfg	    }
869259890Spfg	  /* APPLE LOCAL end suppress no newline warning.  */
870169695Skan	}
871169695Skan
872169695Skan      return_at_eof = buffer->return_at_eof;
873169695Skan      _cpp_pop_buffer (pfile);
874169695Skan      if (pfile->buffer == NULL || return_at_eof)
875169695Skan	return false;
876169695Skan    }
877169695Skan}
878169695Skan
879169695Skan#define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE)		\
880169695Skan  do							\
881169695Skan    {							\
882169695Skan      result->type = ELSE_TYPE;				\
883169695Skan      if (*buffer->cur == CHAR)				\
884169695Skan	buffer->cur++, result->type = THEN_TYPE;	\
885169695Skan    }							\
886169695Skan  while (0)
887169695Skan
888169695Skan/* Lex a token into pfile->cur_token, which is also incremented, to
889169695Skan   get diagnostics pointing to the correct location.
890169695Skan
891169695Skan   Does not handle issues such as token lookahead, multiple-include
892169695Skan   optimization, directives, skipping etc.  This function is only
893169695Skan   suitable for use by _cpp_lex_token, and in special cases like
894169695Skan   lex_expansion_token which doesn't care for any of these issues.
895169695Skan
896169695Skan   When meeting a newline, returns CPP_EOF if parsing a directive,
897169695Skan   otherwise returns to the start of the token buffer if permissible.
898169695Skan   Returns the location of the lexed token.  */
899169695Skancpp_token *
900169695Skan_cpp_lex_direct (cpp_reader *pfile)
901169695Skan{
902169695Skan  cppchar_t c;
903169695Skan  cpp_buffer *buffer;
904169695Skan  const unsigned char *comment_start;
905169695Skan  cpp_token *result = pfile->cur_token++;
906169695Skan
907169695Skan fresh_line:
908169695Skan  result->flags = 0;
909169695Skan  buffer = pfile->buffer;
910169695Skan  if (buffer->need_line)
911169695Skan    {
912169695Skan      if (pfile->state.in_deferred_pragma)
913169695Skan	{
914169695Skan	  result->type = CPP_PRAGMA_EOL;
915169695Skan	  pfile->state.in_deferred_pragma = false;
916169695Skan	  if (!pfile->state.pragma_allow_expansion)
917169695Skan	    pfile->state.prevent_expansion--;
918169695Skan	  return result;
919169695Skan	}
920169695Skan      if (!_cpp_get_fresh_line (pfile))
921169695Skan	{
922169695Skan	  result->type = CPP_EOF;
923169695Skan	  if (!pfile->state.in_directive)
924169695Skan	    {
925169695Skan	      /* Tell the compiler the line number of the EOF token.  */
926169695Skan	      result->src_loc = pfile->line_table->highest_line;
927169695Skan	      result->flags = BOL;
928169695Skan	    }
929169695Skan	  return result;
930169695Skan	}
931169695Skan      if (!pfile->keep_tokens)
932169695Skan	{
933169695Skan	  pfile->cur_run = &pfile->base_run;
934169695Skan	  result = pfile->base_run.base;
935169695Skan	  pfile->cur_token = result + 1;
936169695Skan	}
937169695Skan      result->flags = BOL;
938169695Skan      if (pfile->state.parsing_args == 2)
939169695Skan	result->flags |= PREV_WHITE;
940169695Skan    }
941169695Skan  buffer = pfile->buffer;
942169695Skan update_tokens_line:
943169695Skan  result->src_loc = pfile->line_table->highest_line;
944169695Skan
945169695Skan skipped_white:
946169695Skan  if (buffer->cur >= buffer->notes[buffer->cur_note].pos
947169695Skan      && !pfile->overlaid_buffer)
948169695Skan    {
949169695Skan      _cpp_process_line_notes (pfile, false);
950169695Skan      result->src_loc = pfile->line_table->highest_line;
951169695Skan    }
952169695Skan  c = *buffer->cur++;
953169695Skan
954169695Skan  LINEMAP_POSITION_FOR_COLUMN (result->src_loc, pfile->line_table,
955169695Skan			       CPP_BUF_COLUMN (buffer, buffer->cur));
956169695Skan
957169695Skan  switch (c)
958169695Skan    {
959169695Skan    case ' ': case '\t': case '\f': case '\v': case '\0':
960169695Skan      result->flags |= PREV_WHITE;
961169695Skan      skip_whitespace (pfile, c);
962169695Skan      goto skipped_white;
963169695Skan
964169695Skan    case '\n':
965169695Skan      if (buffer->cur < buffer->rlimit)
966169695Skan	CPP_INCREMENT_LINE (pfile, 0);
967169695Skan      buffer->need_line = true;
968169695Skan      goto fresh_line;
969169695Skan
970169695Skan    case '0': case '1': case '2': case '3': case '4':
971169695Skan    case '5': case '6': case '7': case '8': case '9':
972169695Skan      {
973169695Skan	struct normalize_state nst = INITIAL_NORMALIZE_STATE;
974169695Skan	result->type = CPP_NUMBER;
975169695Skan	lex_number (pfile, &result->val.str, &nst);
976169695Skan	warn_about_normalization (pfile, result, &nst);
977169695Skan	break;
978169695Skan      }
979169695Skan
980169695Skan    case 'L':
981169695Skan      /* 'L' may introduce wide characters or strings.  */
982169695Skan      if (*buffer->cur == '\'' || *buffer->cur == '"')
983169695Skan	{
984169695Skan	  lex_string (pfile, result, buffer->cur - 1);
985169695Skan	  break;
986169695Skan	}
987169695Skan      /* Fall through.  */
988169695Skan
989169695Skan    case '_':
990169695Skan    case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
991169695Skan    case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
992169695Skan    case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
993169695Skan    case 's': case 't': case 'u': case 'v': case 'w': case 'x':
994169695Skan    case 'y': case 'z':
995169695Skan    case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
996169695Skan    case 'G': case 'H': case 'I': case 'J': case 'K':
997169695Skan    case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
998169695Skan    case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
999169695Skan    case 'Y': case 'Z':
1000169695Skan      result->type = CPP_NAME;
1001169695Skan      {
1002169695Skan	struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1003169695Skan	result->val.node = lex_identifier (pfile, buffer->cur - 1, false,
1004169695Skan					   &nst);
1005169695Skan	warn_about_normalization (pfile, result, &nst);
1006169695Skan      }
1007169695Skan
1008169695Skan      /* Convert named operators to their proper types.  */
1009169695Skan      if (result->val.node->flags & NODE_OPERATOR)
1010169695Skan	{
1011169695Skan	  result->flags |= NAMED_OP;
1012169695Skan	  result->type = (enum cpp_ttype) result->val.node->directive_index;
1013169695Skan	}
1014169695Skan      break;
1015169695Skan
1016169695Skan    case '\'':
1017169695Skan    case '"':
1018169695Skan      lex_string (pfile, result, buffer->cur - 1);
1019169695Skan      break;
1020169695Skan
1021169695Skan    case '/':
1022169695Skan      /* A potential block or line comment.  */
1023169695Skan      comment_start = buffer->cur;
1024169695Skan      c = *buffer->cur;
1025169695Skan
1026169695Skan      if (c == '*')
1027169695Skan	{
1028169695Skan	  if (_cpp_skip_block_comment (pfile))
1029169695Skan	    cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
1030169695Skan	}
1031169695Skan      else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
1032169695Skan			    || cpp_in_system_header (pfile)))
1033169695Skan	{
1034169695Skan	  /* Warn about comments only if pedantically GNUC89, and not
1035169695Skan	     in system headers.  */
1036169695Skan	  if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1037169695Skan	      && ! buffer->warned_cplusplus_comments)
1038169695Skan	    {
1039169695Skan	      cpp_error (pfile, CPP_DL_PEDWARN,
1040169695Skan			 "C++ style comments are not allowed in ISO C90");
1041169695Skan	      cpp_error (pfile, CPP_DL_PEDWARN,
1042169695Skan			 "(this will be reported only once per input file)");
1043169695Skan	      buffer->warned_cplusplus_comments = 1;
1044169695Skan	    }
1045169695Skan
1046169695Skan	  if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1047169695Skan	    cpp_error (pfile, CPP_DL_WARNING, "multi-line comment");
1048169695Skan	}
1049169695Skan      else if (c == '=')
1050169695Skan	{
1051169695Skan	  buffer->cur++;
1052169695Skan	  result->type = CPP_DIV_EQ;
1053169695Skan	  break;
1054169695Skan	}
1055169695Skan      else
1056169695Skan	{
1057169695Skan	  result->type = CPP_DIV;
1058169695Skan	  break;
1059169695Skan	}
1060169695Skan
1061169695Skan      if (!pfile->state.save_comments)
1062169695Skan	{
1063169695Skan	  result->flags |= PREV_WHITE;
1064169695Skan	  goto update_tokens_line;
1065169695Skan	}
1066169695Skan
1067169695Skan      /* Save the comment as a token in its own right.  */
1068169695Skan      save_comment (pfile, result, comment_start, c);
1069169695Skan      break;
1070169695Skan
1071169695Skan    case '<':
1072169695Skan      if (pfile->state.angled_headers)
1073169695Skan	{
1074169695Skan	  lex_string (pfile, result, buffer->cur - 1);
1075169695Skan	  break;
1076169695Skan	}
1077169695Skan
1078169695Skan      result->type = CPP_LESS;
1079169695Skan      if (*buffer->cur == '=')
1080169695Skan	buffer->cur++, result->type = CPP_LESS_EQ;
1081169695Skan      else if (*buffer->cur == '<')
1082169695Skan	{
1083169695Skan	  buffer->cur++;
1084169695Skan	  IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
1085169695Skan	}
1086169695Skan      else if (CPP_OPTION (pfile, digraphs))
1087169695Skan	{
1088169695Skan	  if (*buffer->cur == ':')
1089169695Skan	    {
1090169695Skan	      buffer->cur++;
1091169695Skan	      result->flags |= DIGRAPH;
1092169695Skan	      result->type = CPP_OPEN_SQUARE;
1093169695Skan	    }
1094169695Skan	  else if (*buffer->cur == '%')
1095169695Skan	    {
1096169695Skan	      buffer->cur++;
1097169695Skan	      result->flags |= DIGRAPH;
1098169695Skan	      result->type = CPP_OPEN_BRACE;
1099169695Skan	    }
1100169695Skan	}
1101169695Skan      break;
1102169695Skan
1103169695Skan    case '>':
1104169695Skan      result->type = CPP_GREATER;
1105169695Skan      if (*buffer->cur == '=')
1106169695Skan	buffer->cur++, result->type = CPP_GREATER_EQ;
1107169695Skan      else if (*buffer->cur == '>')
1108169695Skan	{
1109169695Skan	  buffer->cur++;
1110169695Skan	  IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1111169695Skan	}
1112169695Skan      break;
1113169695Skan
1114169695Skan    case '%':
1115169695Skan      result->type = CPP_MOD;
1116169695Skan      if (*buffer->cur == '=')
1117169695Skan	buffer->cur++, result->type = CPP_MOD_EQ;
1118169695Skan      else if (CPP_OPTION (pfile, digraphs))
1119169695Skan	{
1120169695Skan	  if (*buffer->cur == ':')
1121169695Skan	    {
1122169695Skan	      buffer->cur++;
1123169695Skan	      result->flags |= DIGRAPH;
1124169695Skan	      result->type = CPP_HASH;
1125169695Skan	      if (*buffer->cur == '%' && buffer->cur[1] == ':')
1126169695Skan		buffer->cur += 2, result->type = CPP_PASTE;
1127169695Skan	    }
1128169695Skan	  else if (*buffer->cur == '>')
1129169695Skan	    {
1130169695Skan	      buffer->cur++;
1131169695Skan	      result->flags |= DIGRAPH;
1132169695Skan	      result->type = CPP_CLOSE_BRACE;
1133169695Skan	    }
1134169695Skan	}
1135169695Skan      break;
1136169695Skan
1137169695Skan    case '.':
1138169695Skan      result->type = CPP_DOT;
1139169695Skan      if (ISDIGIT (*buffer->cur))
1140169695Skan	{
1141169695Skan	  struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1142169695Skan	  result->type = CPP_NUMBER;
1143169695Skan	  lex_number (pfile, &result->val.str, &nst);
1144169695Skan	  warn_about_normalization (pfile, result, &nst);
1145169695Skan	}
1146169695Skan      else if (*buffer->cur == '.' && buffer->cur[1] == '.')
1147169695Skan	buffer->cur += 2, result->type = CPP_ELLIPSIS;
1148169695Skan      else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1149169695Skan	buffer->cur++, result->type = CPP_DOT_STAR;
1150169695Skan      break;
1151169695Skan
1152169695Skan    case '+':
1153169695Skan      result->type = CPP_PLUS;
1154169695Skan      if (*buffer->cur == '+')
1155169695Skan	buffer->cur++, result->type = CPP_PLUS_PLUS;
1156169695Skan      else if (*buffer->cur == '=')
1157169695Skan	buffer->cur++, result->type = CPP_PLUS_EQ;
1158169695Skan      break;
1159169695Skan
1160169695Skan    case '-':
1161169695Skan      result->type = CPP_MINUS;
1162169695Skan      if (*buffer->cur == '>')
1163169695Skan	{
1164169695Skan	  buffer->cur++;
1165169695Skan	  result->type = CPP_DEREF;
1166169695Skan	  if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1167169695Skan	    buffer->cur++, result->type = CPP_DEREF_STAR;
1168169695Skan	}
1169169695Skan      else if (*buffer->cur == '-')
1170169695Skan	buffer->cur++, result->type = CPP_MINUS_MINUS;
1171169695Skan      else if (*buffer->cur == '=')
1172169695Skan	buffer->cur++, result->type = CPP_MINUS_EQ;
1173169695Skan      break;
1174169695Skan
1175169695Skan    case '&':
1176169695Skan      result->type = CPP_AND;
1177169695Skan      if (*buffer->cur == '&')
1178169695Skan	buffer->cur++, result->type = CPP_AND_AND;
1179169695Skan      else if (*buffer->cur == '=')
1180169695Skan	buffer->cur++, result->type = CPP_AND_EQ;
1181169695Skan      break;
1182169695Skan
1183169695Skan    case '|':
1184169695Skan      result->type = CPP_OR;
1185169695Skan      if (*buffer->cur == '|')
1186169695Skan	buffer->cur++, result->type = CPP_OR_OR;
1187169695Skan      else if (*buffer->cur == '=')
1188169695Skan	buffer->cur++, result->type = CPP_OR_EQ;
1189169695Skan      break;
1190169695Skan
1191169695Skan    case ':':
1192169695Skan      result->type = CPP_COLON;
1193169695Skan      if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
1194169695Skan	buffer->cur++, result->type = CPP_SCOPE;
1195169695Skan      else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
1196169695Skan	{
1197169695Skan	  buffer->cur++;
1198169695Skan	  result->flags |= DIGRAPH;
1199169695Skan	  result->type = CPP_CLOSE_SQUARE;
1200169695Skan	}
1201169695Skan      break;
1202169695Skan
1203169695Skan    case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1204169695Skan    case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1205169695Skan    case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1206169695Skan    case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1207169695Skan    case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1208169695Skan
1209169695Skan    case '?': result->type = CPP_QUERY; break;
1210169695Skan    case '~': result->type = CPP_COMPL; break;
1211169695Skan    case ',': result->type = CPP_COMMA; break;
1212169695Skan    case '(': result->type = CPP_OPEN_PAREN; break;
1213169695Skan    case ')': result->type = CPP_CLOSE_PAREN; break;
1214169695Skan    case '[': result->type = CPP_OPEN_SQUARE; break;
1215169695Skan    case ']': result->type = CPP_CLOSE_SQUARE; break;
1216169695Skan    case '{': result->type = CPP_OPEN_BRACE; break;
1217169695Skan    case '}': result->type = CPP_CLOSE_BRACE; break;
1218169695Skan    case ';': result->type = CPP_SEMICOLON; break;
1219169695Skan
1220169695Skan      /* @ is a punctuator in Objective-C.  */
1221169695Skan    case '@': result->type = CPP_ATSIGN; break;
1222169695Skan
1223169695Skan    case '$':
1224169695Skan    case '\\':
1225169695Skan      {
1226169695Skan	const uchar *base = --buffer->cur;
1227169695Skan	struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1228169695Skan
1229169695Skan	if (forms_identifier_p (pfile, true, &nst))
1230169695Skan	  {
1231169695Skan	    result->type = CPP_NAME;
1232169695Skan	    result->val.node = lex_identifier (pfile, base, true, &nst);
1233169695Skan	    warn_about_normalization (pfile, result, &nst);
1234169695Skan	    break;
1235169695Skan	  }
1236169695Skan	buffer->cur++;
1237169695Skan      }
1238169695Skan
1239169695Skan    default:
1240169695Skan      create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
1241169695Skan      break;
1242169695Skan    }
1243169695Skan
1244169695Skan  return result;
1245169695Skan}
1246169695Skan
1247169695Skan/* An upper bound on the number of bytes needed to spell TOKEN.
1248169695Skan   Does not include preceding whitespace.  */
1249169695Skanunsigned int
1250169695Skancpp_token_len (const cpp_token *token)
1251169695Skan{
1252169695Skan  unsigned int len;
1253169695Skan
1254169695Skan  switch (TOKEN_SPELL (token))
1255169695Skan    {
1256169695Skan    default:		len = 4;				break;
1257169695Skan    case SPELL_LITERAL:	len = token->val.str.len;		break;
1258169695Skan    case SPELL_IDENT:	len = NODE_LEN (token->val.node) * 10;	break;
1259169695Skan    }
1260169695Skan
1261169695Skan  return len;
1262169695Skan}
1263169695Skan
1264169695Skan/* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER.
1265169695Skan   Return the number of bytes read out of NAME.  (There are always
1266169695Skan   10 bytes written to BUFFER.)  */
1267169695Skan
1268169695Skanstatic size_t
1269169695Skanutf8_to_ucn (unsigned char *buffer, const unsigned char *name)
1270169695Skan{
1271169695Skan  int j;
1272169695Skan  int ucn_len = 0;
1273169695Skan  int ucn_len_c;
1274169695Skan  unsigned t;
1275169695Skan  unsigned long utf32;
1276169695Skan
1277169695Skan  /* Compute the length of the UTF-8 sequence.  */
1278169695Skan  for (t = *name; t & 0x80; t <<= 1)
1279169695Skan    ucn_len++;
1280169695Skan
1281169695Skan  utf32 = *name & (0x7F >> ucn_len);
1282169695Skan  for (ucn_len_c = 1; ucn_len_c < ucn_len; ucn_len_c++)
1283169695Skan    {
1284169695Skan      utf32 = (utf32 << 6) | (*++name & 0x3F);
1285169695Skan
1286169695Skan      /* Ill-formed UTF-8.  */
1287169695Skan      if ((*name & ~0x3F) != 0x80)
1288169695Skan	abort ();
1289169695Skan    }
1290169695Skan
1291169695Skan  *buffer++ = '\\';
1292169695Skan  *buffer++ = 'U';
1293169695Skan  for (j = 7; j >= 0; j--)
1294169695Skan    *buffer++ = "0123456789abcdef"[(utf32 >> (4 * j)) & 0xF];
1295169695Skan  return ucn_len;
1296169695Skan}
1297169695Skan
1298169695Skan
1299169695Skan/* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1300169695Skan   already contain the enough space to hold the token's spelling.
1301169695Skan   Returns a pointer to the character after the last character written.
1302169695Skan   FORSTRING is true if this is to be the spelling after translation
1303169695Skan   phase 1 (this is different for UCNs).
1304169695Skan   FIXME: Would be nice if we didn't need the PFILE argument.  */
1305169695Skanunsigned char *
1306169695Skancpp_spell_token (cpp_reader *pfile, const cpp_token *token,
1307169695Skan		 unsigned char *buffer, bool forstring)
1308169695Skan{
1309169695Skan  switch (TOKEN_SPELL (token))
1310169695Skan    {
1311169695Skan    case SPELL_OPERATOR:
1312169695Skan      {
1313169695Skan	const unsigned char *spelling;
1314169695Skan	unsigned char c;
1315169695Skan
1316169695Skan	if (token->flags & DIGRAPH)
1317169695Skan	  spelling
1318169695Skan	    = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1319169695Skan	else if (token->flags & NAMED_OP)
1320169695Skan	  goto spell_ident;
1321169695Skan	else
1322169695Skan	  spelling = TOKEN_NAME (token);
1323169695Skan
1324169695Skan	while ((c = *spelling++) != '\0')
1325169695Skan	  *buffer++ = c;
1326169695Skan      }
1327169695Skan      break;
1328169695Skan
1329169695Skan    spell_ident:
1330169695Skan    case SPELL_IDENT:
1331169695Skan      if (forstring)
1332169695Skan	{
1333169695Skan	  memcpy (buffer, NODE_NAME (token->val.node),
1334169695Skan		  NODE_LEN (token->val.node));
1335169695Skan	  buffer += NODE_LEN (token->val.node);
1336169695Skan	}
1337169695Skan      else
1338169695Skan	{
1339169695Skan	  size_t i;
1340169695Skan	  const unsigned char * name = NODE_NAME (token->val.node);
1341169695Skan
1342169695Skan	  for (i = 0; i < NODE_LEN (token->val.node); i++)
1343169695Skan	    if (name[i] & ~0x7F)
1344169695Skan	      {
1345169695Skan		i += utf8_to_ucn (buffer, name + i) - 1;
1346169695Skan		buffer += 10;
1347169695Skan	      }
1348169695Skan	    else
1349169695Skan	      *buffer++ = NODE_NAME (token->val.node)[i];
1350169695Skan	}
1351169695Skan      break;
1352169695Skan
1353169695Skan    case SPELL_LITERAL:
1354169695Skan      memcpy (buffer, token->val.str.text, token->val.str.len);
1355169695Skan      buffer += token->val.str.len;
1356169695Skan      break;
1357169695Skan
1358169695Skan    case SPELL_NONE:
1359169695Skan      cpp_error (pfile, CPP_DL_ICE,
1360169695Skan		 "unspellable token %s", TOKEN_NAME (token));
1361169695Skan      break;
1362169695Skan    }
1363169695Skan
1364169695Skan  return buffer;
1365169695Skan}
1366169695Skan
1367169695Skan/* Returns TOKEN spelt as a null-terminated string.  The string is
1368169695Skan   freed when the reader is destroyed.  Useful for diagnostics.  */
1369169695Skanunsigned char *
1370169695Skancpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
1371169695Skan{
1372169695Skan  unsigned int len = cpp_token_len (token) + 1;
1373169695Skan  unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1374169695Skan
1375169695Skan  end = cpp_spell_token (pfile, token, start, false);
1376169695Skan  end[0] = '\0';
1377169695Skan
1378169695Skan  return start;
1379169695Skan}
1380169695Skan
1381169695Skan/* Used by C front ends, which really should move to using
1382169695Skan   cpp_token_as_text.  */
1383169695Skanconst char *
1384169695Skancpp_type2name (enum cpp_ttype type)
1385169695Skan{
1386169695Skan  return (const char *) token_spellings[type].name;
1387169695Skan}
1388169695Skan
1389169695Skan/* Writes the spelling of token to FP, without any preceding space.
1390169695Skan   Separated from cpp_spell_token for efficiency - to avoid stdio
1391169695Skan   double-buffering.  */
1392169695Skanvoid
1393169695Skancpp_output_token (const cpp_token *token, FILE *fp)
1394169695Skan{
1395169695Skan  switch (TOKEN_SPELL (token))
1396169695Skan    {
1397169695Skan    case SPELL_OPERATOR:
1398169695Skan      {
1399169695Skan	const unsigned char *spelling;
1400169695Skan	int c;
1401169695Skan
1402169695Skan	if (token->flags & DIGRAPH)
1403169695Skan	  spelling
1404169695Skan	    = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1405169695Skan	else if (token->flags & NAMED_OP)
1406169695Skan	  goto spell_ident;
1407169695Skan	else
1408169695Skan	  spelling = TOKEN_NAME (token);
1409169695Skan
1410169695Skan	c = *spelling;
1411169695Skan	do
1412169695Skan	  putc (c, fp);
1413169695Skan	while ((c = *++spelling) != '\0');
1414169695Skan      }
1415169695Skan      break;
1416169695Skan
1417169695Skan    spell_ident:
1418169695Skan    case SPELL_IDENT:
1419169695Skan      {
1420169695Skan	size_t i;
1421169695Skan	const unsigned char * name = NODE_NAME (token->val.node);
1422169695Skan
1423169695Skan	for (i = 0; i < NODE_LEN (token->val.node); i++)
1424169695Skan	  if (name[i] & ~0x7F)
1425169695Skan	    {
1426169695Skan	      unsigned char buffer[10];
1427169695Skan	      i += utf8_to_ucn (buffer, name + i) - 1;
1428169695Skan	      fwrite (buffer, 1, 10, fp);
1429169695Skan	    }
1430169695Skan	  else
1431169695Skan	    fputc (NODE_NAME (token->val.node)[i], fp);
1432169695Skan      }
1433169695Skan      break;
1434169695Skan
1435169695Skan    case SPELL_LITERAL:
1436169695Skan      fwrite (token->val.str.text, 1, token->val.str.len, fp);
1437169695Skan      break;
1438169695Skan
1439169695Skan    case SPELL_NONE:
1440169695Skan      /* An error, most probably.  */
1441169695Skan      break;
1442169695Skan    }
1443169695Skan}
1444169695Skan
1445169695Skan/* Compare two tokens.  */
1446169695Skanint
1447169695Skan_cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
1448169695Skan{
1449169695Skan  if (a->type == b->type && a->flags == b->flags)
1450169695Skan    switch (TOKEN_SPELL (a))
1451169695Skan      {
1452169695Skan      default:			/* Keep compiler happy.  */
1453169695Skan      case SPELL_OPERATOR:
1454169695Skan	return 1;
1455169695Skan      case SPELL_NONE:
1456169695Skan	return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1457169695Skan      case SPELL_IDENT:
1458169695Skan	return a->val.node == b->val.node;
1459169695Skan      case SPELL_LITERAL:
1460169695Skan	return (a->val.str.len == b->val.str.len
1461169695Skan		&& !memcmp (a->val.str.text, b->val.str.text,
1462169695Skan			    a->val.str.len));
1463169695Skan      }
1464169695Skan
1465169695Skan  return 0;
1466169695Skan}
1467169695Skan
1468169695Skan/* Returns nonzero if a space should be inserted to avoid an
1469169695Skan   accidental token paste for output.  For simplicity, it is
1470169695Skan   conservative, and occasionally advises a space where one is not
1471169695Skan   needed, e.g. "." and ".2".  */
1472169695Skanint
1473169695Skancpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
1474169695Skan		 const cpp_token *token2)
1475169695Skan{
1476169695Skan  enum cpp_ttype a = token1->type, b = token2->type;
1477169695Skan  cppchar_t c;
1478169695Skan
1479169695Skan  if (token1->flags & NAMED_OP)
1480169695Skan    a = CPP_NAME;
1481169695Skan  if (token2->flags & NAMED_OP)
1482169695Skan    b = CPP_NAME;
1483169695Skan
1484169695Skan  c = EOF;
1485169695Skan  if (token2->flags & DIGRAPH)
1486169695Skan    c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1487169695Skan  else if (token_spellings[b].category == SPELL_OPERATOR)
1488169695Skan    c = token_spellings[b].name[0];
1489169695Skan
1490169695Skan  /* Quickly get everything that can paste with an '='.  */
1491169695Skan  if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1492169695Skan    return 1;
1493169695Skan
1494169695Skan  switch (a)
1495169695Skan    {
1496169695Skan    case CPP_GREATER:	return c == '>';
1497169695Skan    case CPP_LESS:	return c == '<' || c == '%' || c == ':';
1498169695Skan    case CPP_PLUS:	return c == '+';
1499169695Skan    case CPP_MINUS:	return c == '-' || c == '>';
1500169695Skan    case CPP_DIV:	return c == '/' || c == '*'; /* Comments.  */
1501169695Skan    case CPP_MOD:	return c == ':' || c == '>';
1502169695Skan    case CPP_AND:	return c == '&';
1503169695Skan    case CPP_OR:	return c == '|';
1504169695Skan    case CPP_COLON:	return c == ':' || c == '>';
1505169695Skan    case CPP_DEREF:	return c == '*';
1506169695Skan    case CPP_DOT:	return c == '.' || c == '%' || b == CPP_NUMBER;
1507169695Skan    case CPP_HASH:	return c == '#' || c == '%'; /* Digraph form.  */
1508169695Skan    case CPP_NAME:	return ((b == CPP_NUMBER
1509169695Skan				 && name_p (pfile, &token2->val.str))
1510169695Skan				|| b == CPP_NAME
1511169695Skan				|| b == CPP_CHAR || b == CPP_STRING); /* L */
1512169695Skan    case CPP_NUMBER:	return (b == CPP_NUMBER || b == CPP_NAME
1513169695Skan				|| c == '.' || c == '+' || c == '-');
1514169695Skan				      /* UCNs */
1515169695Skan    case CPP_OTHER:	return ((token1->val.str.text[0] == '\\'
1516169695Skan				 && b == CPP_NAME)
1517169695Skan				|| (CPP_OPTION (pfile, objc)
1518169695Skan				    && token1->val.str.text[0] == '@'
1519169695Skan				    && (b == CPP_NAME || b == CPP_STRING)));
1520169695Skan    default:		break;
1521169695Skan    }
1522169695Skan
1523169695Skan  return 0;
1524169695Skan}
1525169695Skan
1526169695Skan/* Output all the remaining tokens on the current line, and a newline
1527169695Skan   character, to FP.  Leading whitespace is removed.  If there are
1528169695Skan   macros, special token padding is not performed.  */
1529169695Skanvoid
1530169695Skancpp_output_line (cpp_reader *pfile, FILE *fp)
1531169695Skan{
1532169695Skan  const cpp_token *token;
1533169695Skan
1534169695Skan  token = cpp_get_token (pfile);
1535169695Skan  while (token->type != CPP_EOF)
1536169695Skan    {
1537169695Skan      cpp_output_token (token, fp);
1538169695Skan      token = cpp_get_token (pfile);
1539169695Skan      if (token->flags & PREV_WHITE)
1540169695Skan	putc (' ', fp);
1541169695Skan    }
1542169695Skan
1543169695Skan  putc ('\n', fp);
1544169695Skan}
1545169695Skan
1546169695Skan/* Memory buffers.  Changing these three constants can have a dramatic
1547169695Skan   effect on performance.  The values here are reasonable defaults,
1548169695Skan   but might be tuned.  If you adjust them, be sure to test across a
1549169695Skan   range of uses of cpplib, including heavy nested function-like macro
1550169695Skan   expansion.  Also check the change in peak memory usage (NJAMD is a
1551169695Skan   good tool for this).  */
1552169695Skan#define MIN_BUFF_SIZE 8000
1553169695Skan#define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1554169695Skan#define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1555169695Skan	(MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
1556169695Skan
1557169695Skan#if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1558169695Skan  #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1559169695Skan#endif
1560169695Skan
1561169695Skan/* Create a new allocation buffer.  Place the control block at the end
1562169695Skan   of the buffer, so that buffer overflows will cause immediate chaos.  */
1563169695Skanstatic _cpp_buff *
1564169695Skannew_buff (size_t len)
1565169695Skan{
1566169695Skan  _cpp_buff *result;
1567169695Skan  unsigned char *base;
1568169695Skan
1569169695Skan  if (len < MIN_BUFF_SIZE)
1570169695Skan    len = MIN_BUFF_SIZE;
1571169695Skan  len = CPP_ALIGN (len);
1572169695Skan
1573169695Skan  base = XNEWVEC (unsigned char, len + sizeof (_cpp_buff));
1574169695Skan  result = (_cpp_buff *) (base + len);
1575169695Skan  result->base = base;
1576169695Skan  result->cur = base;
1577169695Skan  result->limit = base + len;
1578169695Skan  result->next = NULL;
1579169695Skan  return result;
1580169695Skan}
1581169695Skan
1582169695Skan/* Place a chain of unwanted allocation buffers on the free list.  */
1583169695Skanvoid
1584169695Skan_cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
1585169695Skan{
1586169695Skan  _cpp_buff *end = buff;
1587169695Skan
1588169695Skan  while (end->next)
1589169695Skan    end = end->next;
1590169695Skan  end->next = pfile->free_buffs;
1591169695Skan  pfile->free_buffs = buff;
1592169695Skan}
1593169695Skan
1594169695Skan/* Return a free buffer of size at least MIN_SIZE.  */
1595169695Skan_cpp_buff *
1596169695Skan_cpp_get_buff (cpp_reader *pfile, size_t min_size)
1597169695Skan{
1598169695Skan  _cpp_buff *result, **p;
1599169695Skan
1600169695Skan  for (p = &pfile->free_buffs;; p = &(*p)->next)
1601169695Skan    {
1602169695Skan      size_t size;
1603169695Skan
1604169695Skan      if (*p == NULL)
1605169695Skan	return new_buff (min_size);
1606169695Skan      result = *p;
1607169695Skan      size = result->limit - result->base;
1608169695Skan      /* Return a buffer that's big enough, but don't waste one that's
1609169695Skan         way too big.  */
1610169695Skan      if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
1611169695Skan	break;
1612169695Skan    }
1613169695Skan
1614169695Skan  *p = result->next;
1615169695Skan  result->next = NULL;
1616169695Skan  result->cur = result->base;
1617169695Skan  return result;
1618169695Skan}
1619169695Skan
1620169695Skan/* Creates a new buffer with enough space to hold the uncommitted
1621169695Skan   remaining bytes of BUFF, and at least MIN_EXTRA more bytes.  Copies
1622169695Skan   the excess bytes to the new buffer.  Chains the new buffer after
1623169695Skan   BUFF, and returns the new buffer.  */
1624169695Skan_cpp_buff *
1625169695Skan_cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
1626169695Skan{
1627169695Skan  size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
1628169695Skan  _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
1629169695Skan
1630169695Skan  buff->next = new_buff;
1631169695Skan  memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
1632169695Skan  return new_buff;
1633169695Skan}
1634169695Skan
1635169695Skan/* Creates a new buffer with enough space to hold the uncommitted
1636169695Skan   remaining bytes of the buffer pointed to by BUFF, and at least
1637169695Skan   MIN_EXTRA more bytes.  Copies the excess bytes to the new buffer.
1638169695Skan   Chains the new buffer before the buffer pointed to by BUFF, and
1639169695Skan   updates the pointer to point to the new buffer.  */
1640169695Skanvoid
1641169695Skan_cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
1642169695Skan{
1643169695Skan  _cpp_buff *new_buff, *old_buff = *pbuff;
1644169695Skan  size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
1645169695Skan
1646169695Skan  new_buff = _cpp_get_buff (pfile, size);
1647169695Skan  memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
1648169695Skan  new_buff->next = old_buff;
1649169695Skan  *pbuff = new_buff;
1650169695Skan}
1651169695Skan
1652169695Skan/* Free a chain of buffers starting at BUFF.  */
1653169695Skanvoid
1654169695Skan_cpp_free_buff (_cpp_buff *buff)
1655169695Skan{
1656169695Skan  _cpp_buff *next;
1657169695Skan
1658169695Skan  for (; buff; buff = next)
1659169695Skan    {
1660169695Skan      next = buff->next;
1661169695Skan      free (buff->base);
1662169695Skan    }
1663169695Skan}
1664169695Skan
1665169695Skan/* Allocate permanent, unaligned storage of length LEN.  */
1666169695Skanunsigned char *
1667169695Skan_cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
1668169695Skan{
1669169695Skan  _cpp_buff *buff = pfile->u_buff;
1670169695Skan  unsigned char *result = buff->cur;
1671169695Skan
1672169695Skan  if (len > (size_t) (buff->limit - result))
1673169695Skan    {
1674169695Skan      buff = _cpp_get_buff (pfile, len);
1675169695Skan      buff->next = pfile->u_buff;
1676169695Skan      pfile->u_buff = buff;
1677169695Skan      result = buff->cur;
1678169695Skan    }
1679169695Skan
1680169695Skan  buff->cur = result + len;
1681169695Skan  return result;
1682169695Skan}
1683169695Skan
1684169695Skan/* Allocate permanent, unaligned storage of length LEN from a_buff.
1685169695Skan   That buffer is used for growing allocations when saving macro
1686169695Skan   replacement lists in a #define, and when parsing an answer to an
1687169695Skan   assertion in #assert, #unassert or #if (and therefore possibly
1688169695Skan   whilst expanding macros).  It therefore must not be used by any
1689169695Skan   code that they might call: specifically the lexer and the guts of
1690169695Skan   the macro expander.
1691169695Skan
1692169695Skan   All existing other uses clearly fit this restriction: storing
1693169695Skan   registered pragmas during initialization.  */
1694169695Skanunsigned char *
1695169695Skan_cpp_aligned_alloc (cpp_reader *pfile, size_t len)
1696169695Skan{
1697169695Skan  _cpp_buff *buff = pfile->a_buff;
1698169695Skan  unsigned char *result = buff->cur;
1699169695Skan
1700169695Skan  if (len > (size_t) (buff->limit - result))
1701169695Skan    {
1702169695Skan      buff = _cpp_get_buff (pfile, len);
1703169695Skan      buff->next = pfile->a_buff;
1704169695Skan      pfile->a_buff = buff;
1705169695Skan      result = buff->cur;
1706169695Skan    }
1707169695Skan
1708169695Skan  buff->cur = result + len;
1709169695Skan  return result;
1710169695Skan}
1711169695Skan
1712169695Skan/* Say which field of TOK is in use.  */
1713169695Skan
1714169695Skanenum cpp_token_fld_kind
1715169695Skancpp_token_val_index (cpp_token *tok)
1716169695Skan{
1717169695Skan  switch (TOKEN_SPELL (tok))
1718169695Skan    {
1719169695Skan    case SPELL_IDENT:
1720169695Skan      return CPP_TOKEN_FLD_NODE;
1721169695Skan    case SPELL_LITERAL:
1722169695Skan      return CPP_TOKEN_FLD_STR;
1723169695Skan    case SPELL_NONE:
1724169695Skan      if (tok->type == CPP_MACRO_ARG)
1725169695Skan	return CPP_TOKEN_FLD_ARG_NO;
1726169695Skan      else if (tok->type == CPP_PADDING)
1727169695Skan	return CPP_TOKEN_FLD_SOURCE;
1728169695Skan      else if (tok->type == CPP_PRAGMA)
1729169695Skan	return CPP_TOKEN_FLD_PRAGMA;
1730169695Skan      /* else fall through */
1731169695Skan    default:
1732169695Skan      return CPP_TOKEN_FLD_NONE;
1733169695Skan    }
1734169695Skan}
1735