1169695Skan/* CPP Library - lexical analysis.
2169695Skan   Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
3169695Skan   Contributed by Per Bothner, 1994-95.
4169695Skan   Based on CCCP program by Paul Rubin, June 1986
5169695Skan   Adapted to ANSI C, Richard Stallman, Jan 1987
6169695Skan   Broken out to separate file, Zack Weinberg, Mar 2000
7169695Skan
8169695SkanThis program is free software; you can redistribute it and/or modify it
9169695Skanunder the terms of the GNU General Public License as published by the
10169695SkanFree Software Foundation; either version 2, or (at your option) any
11169695Skanlater version.
12169695Skan
13169695SkanThis program is distributed in the hope that it will be useful,
14169695Skanbut WITHOUT ANY WARRANTY; without even the implied warranty of
15169695SkanMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16169695SkanGNU General Public License for more details.
17169695Skan
18169695SkanYou should have received a copy of the GNU General Public License
19169695Skanalong with this program; if not, write to the Free Software
20169695SkanFoundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
21169695Skan
22169695Skan#include "config.h"
23169695Skan#include "system.h"
24169695Skan#include "cpplib.h"
25169695Skan#include "internal.h"
26169695Skan
27169695Skanenum spell_type
28169695Skan{
29169695Skan  SPELL_OPERATOR = 0,
30169695Skan  SPELL_IDENT,
31169695Skan  SPELL_LITERAL,
32169695Skan  SPELL_NONE
33169695Skan};
34169695Skan
35169695Skanstruct token_spelling
36169695Skan{
37169695Skan  enum spell_type category;
38169695Skan  const unsigned char *name;
39169695Skan};
40169695Skan
41169695Skanstatic const unsigned char *const digraph_spellings[] =
42169695Skan{ U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
43169695Skan
44169695Skan#define OP(e, s) { SPELL_OPERATOR, U s  },
45169695Skan#define TK(e, s) { SPELL_ ## s,    U #e },
46169695Skanstatic const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
47169695Skan#undef OP
48169695Skan#undef TK
49169695Skan
50169695Skan#define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
51169695Skan#define TOKEN_NAME(token) (token_spellings[(token)->type].name)
52169695Skan
53169695Skanstatic void add_line_note (cpp_buffer *, const uchar *, unsigned int);
54169695Skanstatic int skip_line_comment (cpp_reader *);
55169695Skanstatic void skip_whitespace (cpp_reader *, cppchar_t);
56169695Skanstatic void lex_string (cpp_reader *, cpp_token *, const uchar *);
57169695Skanstatic void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
58169695Skanstatic void create_literal (cpp_reader *, cpp_token *, const uchar *,
59169695Skan			    unsigned int, enum cpp_ttype);
60169695Skanstatic bool warn_in_comment (cpp_reader *, _cpp_line_note *);
61169695Skanstatic int name_p (cpp_reader *, const cpp_string *);
62169695Skanstatic tokenrun *next_tokenrun (tokenrun *);
63169695Skan
64169695Skanstatic _cpp_buff *new_buff (size_t);
65169695Skan
66169695Skan
67169695Skan/* Utility routine:
68169695Skan
69169695Skan   Compares, the token TOKEN to the NUL-terminated string STRING.
70169695Skan   TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
71169695Skanint
72169695Skancpp_ideq (const cpp_token *token, const char *string)
73169695Skan{
74169695Skan  if (token->type != CPP_NAME)
75169695Skan    return 0;
76169695Skan
77169695Skan  return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
78169695Skan}
79169695Skan
80169695Skan/* Record a note TYPE at byte POS into the current cleaned logical
81169695Skan   line.  */
82169695Skanstatic void
83169695Skanadd_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
84169695Skan{
85169695Skan  if (buffer->notes_used == buffer->notes_cap)
86169695Skan    {
87169695Skan      buffer->notes_cap = buffer->notes_cap * 2 + 200;
88169695Skan      buffer->notes = XRESIZEVEC (_cpp_line_note, buffer->notes,
89169695Skan                                  buffer->notes_cap);
90169695Skan    }
91169695Skan
92169695Skan  buffer->notes[buffer->notes_used].pos = pos;
93169695Skan  buffer->notes[buffer->notes_used].type = type;
94169695Skan  buffer->notes_used++;
95169695Skan}
96169695Skan
97169695Skan/* Returns with a logical line that contains no escaped newlines or
98169695Skan   trigraphs.  This is a time-critical inner loop.  */
99169695Skanvoid
100169695Skan_cpp_clean_line (cpp_reader *pfile)
101169695Skan{
102169695Skan  cpp_buffer *buffer;
103169695Skan  const uchar *s;
104169695Skan  uchar c, *d, *p;
105169695Skan
106169695Skan  buffer = pfile->buffer;
107169695Skan  buffer->cur_note = buffer->notes_used = 0;
108169695Skan  buffer->cur = buffer->line_base = buffer->next_line;
109169695Skan  buffer->need_line = false;
110169695Skan  s = buffer->next_line - 1;
111169695Skan
112169695Skan  if (!buffer->from_stage3)
113169695Skan    {
114169695Skan      /* Short circuit for the common case of an un-escaped line with
115169695Skan	 no trigraphs.  The primary win here is by not writing any
116169695Skan	 data back to memory until we have to.  */
117169695Skan      for (;;)
118169695Skan	{
119169695Skan	  c = *++s;
120169695Skan	  if (c == '\n' || c == '\r')
121169695Skan	    {
122169695Skan	      d = (uchar *) s;
123169695Skan
124169695Skan	      if (s == buffer->rlimit)
125169695Skan		goto done;
126169695Skan
127169695Skan	      /* DOS line ending? */
128169695Skan	      if (c == '\r' && s[1] == '\n')
129169695Skan		s++;
130169695Skan
131169695Skan	      if (s == buffer->rlimit)
132169695Skan		goto done;
133169695Skan
134169695Skan	      /* check for escaped newline */
135169695Skan	      p = d;
136169695Skan	      while (p != buffer->next_line && is_nvspace (p[-1]))
137169695Skan		p--;
138169695Skan	      if (p == buffer->next_line || p[-1] != '\\')
139169695Skan		goto done;
140169695Skan
141169695Skan	      /* Have an escaped newline; process it and proceed to
142169695Skan		 the slow path.  */
143169695Skan	      add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
144169695Skan	      d = p - 2;
145169695Skan	      buffer->next_line = p - 1;
146169695Skan	      break;
147169695Skan	    }
148169695Skan	  if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
149169695Skan	    {
150169695Skan	      /* Have a trigraph.  We may or may not have to convert
151169695Skan		 it.  Add a line note regardless, for -Wtrigraphs.  */
152169695Skan	      add_line_note (buffer, s, s[2]);
153169695Skan	      if (CPP_OPTION (pfile, trigraphs))
154169695Skan		{
155169695Skan		  /* We do, and that means we have to switch to the
156169695Skan		     slow path.  */
157169695Skan		  d = (uchar *) s;
158169695Skan		  *d = _cpp_trigraph_map[s[2]];
159169695Skan		  s += 2;
160169695Skan		  break;
161169695Skan		}
162169695Skan	    }
163169695Skan	}
164169695Skan
165169695Skan
166169695Skan      for (;;)
167169695Skan	{
168169695Skan	  c = *++s;
169169695Skan	  *++d = c;
170169695Skan
171169695Skan	  if (c == '\n' || c == '\r')
172169695Skan	    {
173169695Skan		  /* Handle DOS line endings.  */
174169695Skan	      if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
175169695Skan		s++;
176169695Skan	      if (s == buffer->rlimit)
177169695Skan		break;
178169695Skan
179169695Skan	      /* Escaped?  */
180169695Skan	      p = d;
181169695Skan	      while (p != buffer->next_line && is_nvspace (p[-1]))
182169695Skan		p--;
183169695Skan	      if (p == buffer->next_line || p[-1] != '\\')
184169695Skan		break;
185169695Skan
186169695Skan	      add_line_note (buffer, p - 1, p != d ? ' ': '\\');
187169695Skan	      d = p - 2;
188169695Skan	      buffer->next_line = p - 1;
189169695Skan	    }
190169695Skan	  else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
191169695Skan	    {
192169695Skan	      /* Add a note regardless, for the benefit of -Wtrigraphs.  */
193169695Skan	      add_line_note (buffer, d, s[2]);
194169695Skan	      if (CPP_OPTION (pfile, trigraphs))
195169695Skan		{
196169695Skan		  *d = _cpp_trigraph_map[s[2]];
197169695Skan		  s += 2;
198169695Skan		}
199169695Skan	    }
200169695Skan	}
201169695Skan    }
202169695Skan  else
203169695Skan    {
204169695Skan      do
205169695Skan	s++;
206169695Skan      while (*s != '\n' && *s != '\r');
207169695Skan      d = (uchar *) s;
208169695Skan
209169695Skan      /* Handle DOS line endings.  */
210169695Skan      if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
211169695Skan	s++;
212169695Skan    }
213169695Skan
214169695Skan done:
215169695Skan  *d = '\n';
216169695Skan  /* A sentinel note that should never be processed.  */
217169695Skan  add_line_note (buffer, d + 1, '\n');
218169695Skan  buffer->next_line = s + 1;
219169695Skan}
220169695Skan
221169695Skan/* Return true if the trigraph indicated by NOTE should be warned
222169695Skan   about in a comment.  */
223169695Skanstatic bool
224169695Skanwarn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
225169695Skan{
226169695Skan  const uchar *p;
227169695Skan
228169695Skan  /* Within comments we don't warn about trigraphs, unless the
229169695Skan     trigraph forms an escaped newline, as that may change
230169695Skan     behavior.  */
231169695Skan  if (note->type != '/')
232169695Skan    return false;
233169695Skan
234169695Skan  /* If -trigraphs, then this was an escaped newline iff the next note
235169695Skan     is coincident.  */
236169695Skan  if (CPP_OPTION (pfile, trigraphs))
237169695Skan    return note[1].pos == note->pos;
238169695Skan
239169695Skan  /* Otherwise, see if this forms an escaped newline.  */
240169695Skan  p = note->pos + 3;
241169695Skan  while (is_nvspace (*p))
242169695Skan    p++;
243169695Skan
244169695Skan  /* There might have been escaped newlines between the trigraph and the
245169695Skan     newline we found.  Hence the position test.  */
246169695Skan  return (*p == '\n' && p < note[1].pos);
247169695Skan}
248169695Skan
249169695Skan/* Process the notes created by add_line_note as far as the current
250169695Skan   location.  */
251169695Skanvoid
252169695Skan_cpp_process_line_notes (cpp_reader *pfile, int in_comment)
253169695Skan{
254169695Skan  cpp_buffer *buffer = pfile->buffer;
255169695Skan
256169695Skan  for (;;)
257169695Skan    {
258169695Skan      _cpp_line_note *note = &buffer->notes[buffer->cur_note];
259169695Skan      unsigned int col;
260169695Skan
261169695Skan      if (note->pos > buffer->cur)
262169695Skan	break;
263169695Skan
264169695Skan      buffer->cur_note++;
265169695Skan      col = CPP_BUF_COLUMN (buffer, note->pos + 1);
266169695Skan
267169695Skan      if (note->type == '\\' || note->type == ' ')
268169695Skan	{
269169695Skan	  if (note->type == ' ' && !in_comment)
270169695Skan	    cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
271169695Skan				 "backslash and newline separated by space");
272169695Skan
273169695Skan	  if (buffer->next_line > buffer->rlimit)
274169695Skan	    {
275169695Skan	      cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col,
276169695Skan				   "backslash-newline at end of file");
277169695Skan	      /* Prevent "no newline at end of file" warning.  */
278169695Skan	      buffer->next_line = buffer->rlimit;
279169695Skan	    }
280169695Skan
281169695Skan	  buffer->line_base = note->pos;
282169695Skan	  CPP_INCREMENT_LINE (pfile, 0);
283169695Skan	}
284169695Skan      else if (_cpp_trigraph_map[note->type])
285169695Skan	{
286169695Skan	  if (CPP_OPTION (pfile, warn_trigraphs)
287169695Skan	      && (!in_comment || warn_in_comment (pfile, note)))
288169695Skan	    {
289169695Skan	      if (CPP_OPTION (pfile, trigraphs))
290169695Skan		cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
291169695Skan				     "trigraph ??%c converted to %c",
292169695Skan				     note->type,
293169695Skan				     (int) _cpp_trigraph_map[note->type]);
294169695Skan	      else
295169695Skan		{
296169695Skan		  cpp_error_with_line
297169695Skan		    (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
298169695Skan		     "trigraph ??%c ignored, use -trigraphs to enable",
299169695Skan		     note->type);
300169695Skan		}
301169695Skan	    }
302169695Skan	}
303169695Skan      else
304169695Skan	abort ();
305169695Skan    }
306169695Skan}
307169695Skan
308169695Skan/* Skip a C-style block comment.  We find the end of the comment by
309169695Skan   seeing if an asterisk is before every '/' we encounter.  Returns
310169695Skan   nonzero if comment terminated by EOF, zero otherwise.
311169695Skan
312169695Skan   Buffer->cur points to the initial asterisk of the comment.  */
313169695Skanbool
314169695Skan_cpp_skip_block_comment (cpp_reader *pfile)
315169695Skan{
316169695Skan  cpp_buffer *buffer = pfile->buffer;
317169695Skan  const uchar *cur = buffer->cur;
318169695Skan  uchar c;
319169695Skan
320169695Skan  cur++;
321169695Skan  if (*cur == '/')
322169695Skan    cur++;
323169695Skan
324169695Skan  for (;;)
325169695Skan    {
326169695Skan      /* People like decorating comments with '*', so check for '/'
327169695Skan	 instead for efficiency.  */
328169695Skan      c = *cur++;
329169695Skan
330169695Skan      if (c == '/')
331169695Skan	{
332169695Skan	  if (cur[-2] == '*')
333169695Skan	    break;
334169695Skan
335169695Skan	  /* Warn about potential nested comments, but not if the '/'
336169695Skan	     comes immediately before the true comment delimiter.
337169695Skan	     Don't bother to get it right across escaped newlines.  */
338169695Skan	  if (CPP_OPTION (pfile, warn_comments)
339169695Skan	      && cur[0] == '*' && cur[1] != '/')
340169695Skan	    {
341169695Skan	      buffer->cur = cur;
342169695Skan	      cpp_error_with_line (pfile, CPP_DL_WARNING,
343169695Skan				   pfile->line_table->highest_line, CPP_BUF_COL (buffer),
344169695Skan				   "\"/*\" within comment");
345169695Skan	    }
346169695Skan	}
347169695Skan      else if (c == '\n')
348169695Skan	{
349169695Skan	  unsigned int cols;
350169695Skan	  buffer->cur = cur - 1;
351169695Skan	  _cpp_process_line_notes (pfile, true);
352169695Skan	  if (buffer->next_line >= buffer->rlimit)
353169695Skan	    return true;
354169695Skan	  _cpp_clean_line (pfile);
355169695Skan
356169695Skan	  cols = buffer->next_line - buffer->line_base;
357169695Skan	  CPP_INCREMENT_LINE (pfile, cols);
358169695Skan
359169695Skan	  cur = buffer->cur;
360169695Skan	}
361169695Skan    }
362169695Skan
363169695Skan  buffer->cur = cur;
364169695Skan  _cpp_process_line_notes (pfile, true);
365169695Skan  return false;
366169695Skan}
367169695Skan
368169695Skan/* Skip a C++ line comment, leaving buffer->cur pointing to the
369169695Skan   terminating newline.  Handles escaped newlines.  Returns nonzero
370169695Skan   if a multiline comment.  */
371169695Skanstatic int
372169695Skanskip_line_comment (cpp_reader *pfile)
373169695Skan{
374169695Skan  cpp_buffer *buffer = pfile->buffer;
375169695Skan  unsigned int orig_line = pfile->line_table->highest_line;
376169695Skan
377169695Skan  while (*buffer->cur != '\n')
378169695Skan    buffer->cur++;
379169695Skan
380169695Skan  _cpp_process_line_notes (pfile, true);
381169695Skan  return orig_line != pfile->line_table->highest_line;
382169695Skan}
383169695Skan
384169695Skan/* Skips whitespace, saving the next non-whitespace character.  */
385169695Skanstatic void
386169695Skanskip_whitespace (cpp_reader *pfile, cppchar_t c)
387169695Skan{
388169695Skan  cpp_buffer *buffer = pfile->buffer;
389169695Skan  bool saw_NUL = false;
390169695Skan
391169695Skan  do
392169695Skan    {
393169695Skan      /* Horizontal space always OK.  */
394169695Skan      if (c == ' ' || c == '\t')
395169695Skan	;
396169695Skan      /* Just \f \v or \0 left.  */
397169695Skan      else if (c == '\0')
398169695Skan	saw_NUL = true;
399169695Skan      else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
400169695Skan	cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
401169695Skan			     CPP_BUF_COL (buffer),
402169695Skan			     "%s in preprocessing directive",
403169695Skan			     c == '\f' ? "form feed" : "vertical tab");
404169695Skan
405169695Skan      c = *buffer->cur++;
406169695Skan    }
407169695Skan  /* We only want non-vertical space, i.e. ' ' \t \f \v \0.  */
408169695Skan  while (is_nvspace (c));
409169695Skan
410169695Skan  if (saw_NUL)
411169695Skan    cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored");
412169695Skan
413169695Skan  buffer->cur--;
414169695Skan}
415169695Skan
416169695Skan/* See if the characters of a number token are valid in a name (no
417169695Skan   '.', '+' or '-').  */
418169695Skanstatic int
419169695Skanname_p (cpp_reader *pfile, const cpp_string *string)
420169695Skan{
421169695Skan  unsigned int i;
422169695Skan
423169695Skan  for (i = 0; i < string->len; i++)
424169695Skan    if (!is_idchar (string->text[i]))
425169695Skan      return 0;
426169695Skan
427169695Skan  return 1;
428169695Skan}
429169695Skan
430169695Skan/* After parsing an identifier or other sequence, produce a warning about
431169695Skan   sequences not in NFC/NFKC.  */
432169695Skanstatic void
433169695Skanwarn_about_normalization (cpp_reader *pfile,
434169695Skan			  const cpp_token *token,
435169695Skan			  const struct normalize_state *s)
436169695Skan{
437169695Skan  if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s)
438169695Skan      && !pfile->state.skipping)
439169695Skan    {
440169695Skan      /* Make sure that the token is printed using UCNs, even
441169695Skan	 if we'd otherwise happily print UTF-8.  */
442169695Skan      unsigned char *buf = XNEWVEC (unsigned char, cpp_token_len (token));
443169695Skan      size_t sz;
444169695Skan
445169695Skan      sz = cpp_spell_token (pfile, token, buf, false) - buf;
446169695Skan      if (NORMALIZE_STATE_RESULT (s) == normalized_C)
447169695Skan	cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
448169695Skan			     "`%.*s' is not in NFKC", (int) sz, buf);
449169695Skan      else
450169695Skan	cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
451169695Skan			     "`%.*s' is not in NFC", (int) sz, buf);
452169695Skan    }
453169695Skan}
454169695Skan
455169695Skan/* Returns TRUE if the sequence starting at buffer->cur is invalid in
456169695Skan   an identifier.  FIRST is TRUE if this starts an identifier.  */
457169695Skanstatic bool
458169695Skanforms_identifier_p (cpp_reader *pfile, int first,
459169695Skan		    struct normalize_state *state)
460169695Skan{
461169695Skan  cpp_buffer *buffer = pfile->buffer;
462169695Skan
463169695Skan  if (*buffer->cur == '$')
464169695Skan    {
465169695Skan      if (!CPP_OPTION (pfile, dollars_in_ident))
466169695Skan	return false;
467169695Skan
468169695Skan      buffer->cur++;
469169695Skan      if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
470169695Skan	{
471169695Skan	  CPP_OPTION (pfile, warn_dollars) = 0;
472169695Skan	  cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
473169695Skan	}
474169695Skan
475169695Skan      return true;
476169695Skan    }
477169695Skan
478169695Skan  /* Is this a syntactically valid UCN?  */
479169695Skan  if (CPP_OPTION (pfile, extended_identifiers)
480169695Skan      && *buffer->cur == '\\'
481169695Skan      && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
482169695Skan    {
483169695Skan      buffer->cur += 2;
484169695Skan      if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
485169695Skan			  state))
486169695Skan	return true;
487169695Skan      buffer->cur -= 2;
488169695Skan    }
489169695Skan
490169695Skan  return false;
491169695Skan}
492169695Skan
493169695Skan/* Lex an identifier starting at BUFFER->CUR - 1.  */
494169695Skanstatic cpp_hashnode *
495169695Skanlex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
496169695Skan		struct normalize_state *nst)
497169695Skan{
498169695Skan  cpp_hashnode *result;
499169695Skan  const uchar *cur;
500169695Skan  unsigned int len;
501169695Skan  unsigned int hash = HT_HASHSTEP (0, *base);
502169695Skan
503169695Skan  cur = pfile->buffer->cur;
504169695Skan  if (! starts_ucn)
505169695Skan    while (ISIDNUM (*cur))
506169695Skan      {
507169695Skan	hash = HT_HASHSTEP (hash, *cur);
508169695Skan	cur++;
509169695Skan      }
510169695Skan  pfile->buffer->cur = cur;
511169695Skan  if (starts_ucn || forms_identifier_p (pfile, false, nst))
512169695Skan    {
513169695Skan      /* Slower version for identifiers containing UCNs (or $).  */
514169695Skan      do {
515169695Skan	while (ISIDNUM (*pfile->buffer->cur))
516169695Skan	  {
517169695Skan	    pfile->buffer->cur++;
518169695Skan	    NORMALIZE_STATE_UPDATE_IDNUM (nst);
519169695Skan	  }
520169695Skan      } while (forms_identifier_p (pfile, false, nst));
521169695Skan      result = _cpp_interpret_identifier (pfile, base,
522169695Skan					  pfile->buffer->cur - base);
523169695Skan    }
524169695Skan  else
525169695Skan    {
526169695Skan      len = cur - base;
527169695Skan      hash = HT_HASHFINISH (hash, len);
528169695Skan
529169695Skan      result = (cpp_hashnode *)
530169695Skan	ht_lookup_with_hash (pfile->hash_table, base, len, hash, HT_ALLOC);
531169695Skan    }
532169695Skan
533169695Skan  /* Rarely, identifiers require diagnostics when lexed.  */
534169695Skan  if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
535169695Skan			&& !pfile->state.skipping, 0))
536169695Skan    {
537169695Skan      /* It is allowed to poison the same identifier twice.  */
538169695Skan      if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
539169695Skan	cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
540169695Skan		   NODE_NAME (result));
541169695Skan
542169695Skan      /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
543169695Skan	 replacement list of a variadic macro.  */
544169695Skan      if (result == pfile->spec_nodes.n__VA_ARGS__
545169695Skan	  && !pfile->state.va_args_ok)
546169695Skan	cpp_error (pfile, CPP_DL_PEDWARN,
547169695Skan		   "__VA_ARGS__ can only appear in the expansion"
548169695Skan		   " of a C99 variadic macro");
549169695Skan    }
550169695Skan
551169695Skan  return result;
552169695Skan}
553169695Skan
554169695Skan/* Lex a number to NUMBER starting at BUFFER->CUR - 1.  */
555169695Skanstatic void
556169695Skanlex_number (cpp_reader *pfile, cpp_string *number,
557169695Skan	    struct normalize_state *nst)
558169695Skan{
559169695Skan  const uchar *cur;
560169695Skan  const uchar *base;
561169695Skan  uchar *dest;
562169695Skan
563169695Skan  base = pfile->buffer->cur - 1;
564169695Skan  do
565169695Skan    {
566169695Skan      cur = pfile->buffer->cur;
567169695Skan
568169695Skan      /* N.B. ISIDNUM does not include $.  */
569169695Skan      while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
570169695Skan	{
571169695Skan	  cur++;
572169695Skan	  NORMALIZE_STATE_UPDATE_IDNUM (nst);
573169695Skan	}
574169695Skan
575169695Skan      pfile->buffer->cur = cur;
576169695Skan    }
577169695Skan  while (forms_identifier_p (pfile, false, nst));
578169695Skan
579169695Skan  number->len = cur - base;
580169695Skan  dest = _cpp_unaligned_alloc (pfile, number->len + 1);
581169695Skan  memcpy (dest, base, number->len);
582169695Skan  dest[number->len] = '\0';
583169695Skan  number->text = dest;
584169695Skan}
585169695Skan
586169695Skan/* Create a token of type TYPE with a literal spelling.  */
587169695Skanstatic void
588169695Skancreate_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
589169695Skan		unsigned int len, enum cpp_ttype type)
590169695Skan{
591169695Skan  uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
592169695Skan
593169695Skan  memcpy (dest, base, len);
594169695Skan  dest[len] = '\0';
595169695Skan  token->type = type;
596169695Skan  token->val.str.len = len;
597169695Skan  token->val.str.text = dest;
598169695Skan}
599169695Skan
600169695Skan/* Lexes a string, character constant, or angle-bracketed header file
601169695Skan   name.  The stored string contains the spelling, including opening
602169695Skan   quote and leading any leading 'L'.  It returns the type of the
603169695Skan   literal, or CPP_OTHER if it was not properly terminated.
604169695Skan
605169695Skan   The spelling is NUL-terminated, but it is not guaranteed that this
606169695Skan   is the first NUL since embedded NULs are preserved.  */
607169695Skanstatic void
608169695Skanlex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
609169695Skan{
610169695Skan  bool saw_NUL = false;
611169695Skan  const uchar *cur;
612169695Skan  cppchar_t terminator;
613169695Skan  enum cpp_ttype type;
614169695Skan
615169695Skan  cur = base;
616169695Skan  terminator = *cur++;
617169695Skan  if (terminator == 'L')
618169695Skan    terminator = *cur++;
619169695Skan  if (terminator == '\"')
620169695Skan    type = *base == 'L' ? CPP_WSTRING: CPP_STRING;
621169695Skan  else if (terminator == '\'')
622169695Skan    type = *base == 'L' ? CPP_WCHAR: CPP_CHAR;
623169695Skan  else
624169695Skan    terminator = '>', type = CPP_HEADER_NAME;
625169695Skan
626169695Skan  for (;;)
627169695Skan    {
628169695Skan      cppchar_t c = *cur++;
629169695Skan
630169695Skan      /* In #include-style directives, terminators are not escapable.  */
631169695Skan      if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
632169695Skan	cur++;
633169695Skan      else if (c == terminator)
634169695Skan	break;
635169695Skan      else if (c == '\n')
636169695Skan	{
637169695Skan	  cur--;
638169695Skan	  type = CPP_OTHER;
639169695Skan	  break;
640169695Skan	}
641169695Skan      else if (c == '\0')
642169695Skan	saw_NUL = true;
643169695Skan    }
644169695Skan
645169695Skan  if (saw_NUL && !pfile->state.skipping)
646169695Skan    cpp_error (pfile, CPP_DL_WARNING,
647169695Skan	       "null character(s) preserved in literal");
648169695Skan
649169695Skan  if (type == CPP_OTHER && CPP_OPTION (pfile, lang) != CLK_ASM)
650169695Skan    cpp_error (pfile, CPP_DL_PEDWARN, "missing terminating %c character",
651169695Skan	       (int) terminator);
652169695Skan
653169695Skan  pfile->buffer->cur = cur;
654169695Skan  create_literal (pfile, token, base, cur - base, type);
655169695Skan}
656169695Skan
657169695Skan/* The stored comment includes the comment start and any terminator.  */
658169695Skanstatic void
659169695Skansave_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
660169695Skan	      cppchar_t type)
661169695Skan{
662169695Skan  unsigned char *buffer;
663169695Skan  unsigned int len, clen;
664169695Skan
665169695Skan  len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
666169695Skan
667169695Skan  /* C++ comments probably (not definitely) have moved past a new
668169695Skan     line, which we don't want to save in the comment.  */
669169695Skan  if (is_vspace (pfile->buffer->cur[-1]))
670169695Skan    len--;
671169695Skan
672169695Skan  /* If we are currently in a directive, then we need to store all
673169695Skan     C++ comments as C comments internally, and so we need to
674169695Skan     allocate a little extra space in that case.
675169695Skan
676169695Skan     Note that the only time we encounter a directive here is
677169695Skan     when we are saving comments in a "#define".  */
678169695Skan  clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
679169695Skan
680169695Skan  buffer = _cpp_unaligned_alloc (pfile, clen);
681169695Skan
682169695Skan  token->type = CPP_COMMENT;
683169695Skan  token->val.str.len = clen;
684169695Skan  token->val.str.text = buffer;
685169695Skan
686169695Skan  buffer[0] = '/';
687169695Skan  memcpy (buffer + 1, from, len - 1);
688169695Skan
689169695Skan  /* Finish conversion to a C comment, if necessary.  */
690169695Skan  if (pfile->state.in_directive && type == '/')
691169695Skan    {
692169695Skan      buffer[1] = '*';
693169695Skan      buffer[clen - 2] = '*';
694169695Skan      buffer[clen - 1] = '/';
695169695Skan    }
696169695Skan}
697169695Skan
698169695Skan/* Allocate COUNT tokens for RUN.  */
699169695Skanvoid
700169695Skan_cpp_init_tokenrun (tokenrun *run, unsigned int count)
701169695Skan{
702169695Skan  run->base = XNEWVEC (cpp_token, count);
703169695Skan  run->limit = run->base + count;
704169695Skan  run->next = NULL;
705169695Skan}
706169695Skan
707169695Skan/* Returns the next tokenrun, or creates one if there is none.  */
708169695Skanstatic tokenrun *
709169695Skannext_tokenrun (tokenrun *run)
710169695Skan{
711169695Skan  if (run->next == NULL)
712169695Skan    {
713169695Skan      run->next = XNEW (tokenrun);
714169695Skan      run->next->prev = run;
715169695Skan      _cpp_init_tokenrun (run->next, 250);
716169695Skan    }
717169695Skan
718169695Skan  return run->next;
719169695Skan}
720169695Skan
721169695Skan/* Allocate a single token that is invalidated at the same time as the
722169695Skan   rest of the tokens on the line.  Has its line and col set to the
723169695Skan   same as the last lexed token, so that diagnostics appear in the
724169695Skan   right place.  */
725169695Skancpp_token *
726169695Skan_cpp_temp_token (cpp_reader *pfile)
727169695Skan{
728169695Skan  cpp_token *old, *result;
729169695Skan
730169695Skan  old = pfile->cur_token - 1;
731169695Skan  if (pfile->cur_token == pfile->cur_run->limit)
732169695Skan    {
733169695Skan      pfile->cur_run = next_tokenrun (pfile->cur_run);
734169695Skan      pfile->cur_token = pfile->cur_run->base;
735169695Skan    }
736169695Skan
737169695Skan  result = pfile->cur_token++;
738169695Skan  result->src_loc = old->src_loc;
739169695Skan  return result;
740169695Skan}
741169695Skan
742169695Skan/* Lex a token into RESULT (external interface).  Takes care of issues
743169695Skan   like directive handling, token lookahead, multiple include
744169695Skan   optimization and skipping.  */
745169695Skanconst cpp_token *
746169695Skan_cpp_lex_token (cpp_reader *pfile)
747169695Skan{
748169695Skan  cpp_token *result;
749169695Skan
750169695Skan  for (;;)
751169695Skan    {
752169695Skan      if (pfile->cur_token == pfile->cur_run->limit)
753169695Skan	{
754169695Skan	  pfile->cur_run = next_tokenrun (pfile->cur_run);
755169695Skan	  pfile->cur_token = pfile->cur_run->base;
756169695Skan	}
757169695Skan
758169695Skan      if (pfile->lookaheads)
759169695Skan	{
760169695Skan	  pfile->lookaheads--;
761169695Skan	  result = pfile->cur_token++;
762169695Skan	}
763169695Skan      else
764169695Skan	result = _cpp_lex_direct (pfile);
765169695Skan
766169695Skan      if (result->flags & BOL)
767169695Skan	{
768169695Skan	  /* Is this a directive.  If _cpp_handle_directive returns
769169695Skan	     false, it is an assembler #.  */
770169695Skan	  if (result->type == CPP_HASH
771169695Skan	      /* 6.10.3 p 11: Directives in a list of macro arguments
772169695Skan		 gives undefined behavior.  This implementation
773169695Skan		 handles the directive as normal.  */
774169695Skan	      && pfile->state.parsing_args != 1)
775169695Skan	    {
776169695Skan	      if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
777169695Skan		{
778169695Skan		  if (pfile->directive_result.type == CPP_PADDING)
779169695Skan		    continue;
780169695Skan		  result = &pfile->directive_result;
781169695Skan		}
782169695Skan	    }
783169695Skan	  else if (pfile->state.in_deferred_pragma)
784169695Skan	    result = &pfile->directive_result;
785169695Skan
786169695Skan	  if (pfile->cb.line_change && !pfile->state.skipping)
787169695Skan	    pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
788169695Skan	}
789169695Skan
790169695Skan      /* We don't skip tokens in directives.  */
791169695Skan      if (pfile->state.in_directive || pfile->state.in_deferred_pragma)
792169695Skan	break;
793169695Skan
794169695Skan      /* Outside a directive, invalidate controlling macros.  At file
795169695Skan	 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
796169695Skan	 get here and MI optimization works.  */
797169695Skan      pfile->mi_valid = false;
798169695Skan
799169695Skan      if (!pfile->state.skipping || result->type == CPP_EOF)
800169695Skan	break;
801169695Skan    }
802169695Skan
803169695Skan  return result;
804169695Skan}
805169695Skan
806169695Skan/* Returns true if a fresh line has been loaded.  */
807169695Skanbool
808169695Skan_cpp_get_fresh_line (cpp_reader *pfile)
809169695Skan{
810169695Skan  int return_at_eof;
811169695Skan
812169695Skan  /* We can't get a new line until we leave the current directive.  */
813169695Skan  if (pfile->state.in_directive)
814169695Skan    return false;
815169695Skan
816169695Skan  for (;;)
817169695Skan    {
818169695Skan      cpp_buffer *buffer = pfile->buffer;
819169695Skan
820169695Skan      if (!buffer->need_line)
821169695Skan	return true;
822169695Skan
823169695Skan      if (buffer->next_line < buffer->rlimit)
824169695Skan	{
825169695Skan	  _cpp_clean_line (pfile);
826169695Skan	  return true;
827169695Skan	}
828169695Skan
829169695Skan      /* First, get out of parsing arguments state.  */
830169695Skan      if (pfile->state.parsing_args)
831169695Skan	return false;
832169695Skan
833169695Skan      /* End of buffer.  Non-empty files should end in a newline.  */
834169695Skan      if (buffer->buf != buffer->rlimit
835169695Skan	  && buffer->next_line > buffer->rlimit
836169695Skan	  && !buffer->from_stage3)
837169695Skan	{
838169695Skan	  /* Only warn once.  */
839169695Skan	  buffer->next_line = buffer->rlimit;
840169695Skan	  cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
841169695Skan			       CPP_BUF_COLUMN (buffer, buffer->cur),
842169695Skan			       "no newline at end of file");
843169695Skan	}
844169695Skan
845169695Skan      return_at_eof = buffer->return_at_eof;
846169695Skan      _cpp_pop_buffer (pfile);
847169695Skan      if (pfile->buffer == NULL || return_at_eof)
848169695Skan	return false;
849169695Skan    }
850169695Skan}
851169695Skan
852169695Skan#define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE)		\
853169695Skan  do							\
854169695Skan    {							\
855169695Skan      result->type = ELSE_TYPE;				\
856169695Skan      if (*buffer->cur == CHAR)				\
857169695Skan	buffer->cur++, result->type = THEN_TYPE;	\
858169695Skan    }							\
859169695Skan  while (0)
860169695Skan
861169695Skan/* Lex a token into pfile->cur_token, which is also incremented, to
862169695Skan   get diagnostics pointing to the correct location.
863169695Skan
864169695Skan   Does not handle issues such as token lookahead, multiple-include
865169695Skan   optimization, directives, skipping etc.  This function is only
866169695Skan   suitable for use by _cpp_lex_token, and in special cases like
867169695Skan   lex_expansion_token which doesn't care for any of these issues.
868169695Skan
869169695Skan   When meeting a newline, returns CPP_EOF if parsing a directive,
870169695Skan   otherwise returns to the start of the token buffer if permissible.
871169695Skan   Returns the location of the lexed token.  */
872169695Skancpp_token *
873169695Skan_cpp_lex_direct (cpp_reader *pfile)
874169695Skan{
875169695Skan  cppchar_t c;
876169695Skan  cpp_buffer *buffer;
877169695Skan  const unsigned char *comment_start;
878169695Skan  cpp_token *result = pfile->cur_token++;
879169695Skan
880169695Skan fresh_line:
881169695Skan  result->flags = 0;
882169695Skan  buffer = pfile->buffer;
883169695Skan  if (buffer->need_line)
884169695Skan    {
885169695Skan      if (pfile->state.in_deferred_pragma)
886169695Skan	{
887169695Skan	  result->type = CPP_PRAGMA_EOL;
888169695Skan	  pfile->state.in_deferred_pragma = false;
889169695Skan	  if (!pfile->state.pragma_allow_expansion)
890169695Skan	    pfile->state.prevent_expansion--;
891169695Skan	  return result;
892169695Skan	}
893169695Skan      if (!_cpp_get_fresh_line (pfile))
894169695Skan	{
895169695Skan	  result->type = CPP_EOF;
896169695Skan	  if (!pfile->state.in_directive)
897169695Skan	    {
898169695Skan	      /* Tell the compiler the line number of the EOF token.  */
899169695Skan	      result->src_loc = pfile->line_table->highest_line;
900169695Skan	      result->flags = BOL;
901169695Skan	    }
902169695Skan	  return result;
903169695Skan	}
904169695Skan      if (!pfile->keep_tokens)
905169695Skan	{
906169695Skan	  pfile->cur_run = &pfile->base_run;
907169695Skan	  result = pfile->base_run.base;
908169695Skan	  pfile->cur_token = result + 1;
909169695Skan	}
910169695Skan      result->flags = BOL;
911169695Skan      if (pfile->state.parsing_args == 2)
912169695Skan	result->flags |= PREV_WHITE;
913169695Skan    }
914169695Skan  buffer = pfile->buffer;
915169695Skan update_tokens_line:
916169695Skan  result->src_loc = pfile->line_table->highest_line;
917169695Skan
918169695Skan skipped_white:
919169695Skan  if (buffer->cur >= buffer->notes[buffer->cur_note].pos
920169695Skan      && !pfile->overlaid_buffer)
921169695Skan    {
922169695Skan      _cpp_process_line_notes (pfile, false);
923169695Skan      result->src_loc = pfile->line_table->highest_line;
924169695Skan    }
925169695Skan  c = *buffer->cur++;
926169695Skan
927169695Skan  LINEMAP_POSITION_FOR_COLUMN (result->src_loc, pfile->line_table,
928169695Skan			       CPP_BUF_COLUMN (buffer, buffer->cur));
929169695Skan
930169695Skan  switch (c)
931169695Skan    {
932169695Skan    case ' ': case '\t': case '\f': case '\v': case '\0':
933169695Skan      result->flags |= PREV_WHITE;
934169695Skan      skip_whitespace (pfile, c);
935169695Skan      goto skipped_white;
936169695Skan
937169695Skan    case '\n':
938169695Skan      if (buffer->cur < buffer->rlimit)
939169695Skan	CPP_INCREMENT_LINE (pfile, 0);
940169695Skan      buffer->need_line = true;
941169695Skan      goto fresh_line;
942169695Skan
943169695Skan    case '0': case '1': case '2': case '3': case '4':
944169695Skan    case '5': case '6': case '7': case '8': case '9':
945169695Skan      {
946169695Skan	struct normalize_state nst = INITIAL_NORMALIZE_STATE;
947169695Skan	result->type = CPP_NUMBER;
948169695Skan	lex_number (pfile, &result->val.str, &nst);
949169695Skan	warn_about_normalization (pfile, result, &nst);
950169695Skan	break;
951169695Skan      }
952169695Skan
953169695Skan    case 'L':
954169695Skan      /* 'L' may introduce wide characters or strings.  */
955169695Skan      if (*buffer->cur == '\'' || *buffer->cur == '"')
956169695Skan	{
957169695Skan	  lex_string (pfile, result, buffer->cur - 1);
958169695Skan	  break;
959169695Skan	}
960169695Skan      /* Fall through.  */
961169695Skan
962169695Skan    case '_':
963169695Skan    case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
964169695Skan    case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
965169695Skan    case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
966169695Skan    case 's': case 't': case 'u': case 'v': case 'w': case 'x':
967169695Skan    case 'y': case 'z':
968169695Skan    case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
969169695Skan    case 'G': case 'H': case 'I': case 'J': case 'K':
970169695Skan    case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
971169695Skan    case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
972169695Skan    case 'Y': case 'Z':
973169695Skan      result->type = CPP_NAME;
974169695Skan      {
975169695Skan	struct normalize_state nst = INITIAL_NORMALIZE_STATE;
976169695Skan	result->val.node = lex_identifier (pfile, buffer->cur - 1, false,
977169695Skan					   &nst);
978169695Skan	warn_about_normalization (pfile, result, &nst);
979169695Skan      }
980169695Skan
981169695Skan      /* Convert named operators to their proper types.  */
982169695Skan      if (result->val.node->flags & NODE_OPERATOR)
983169695Skan	{
984169695Skan	  result->flags |= NAMED_OP;
985169695Skan	  result->type = (enum cpp_ttype) result->val.node->directive_index;
986169695Skan	}
987169695Skan      break;
988169695Skan
989169695Skan    case '\'':
990169695Skan    case '"':
991169695Skan      lex_string (pfile, result, buffer->cur - 1);
992169695Skan      break;
993169695Skan
994169695Skan    case '/':
995169695Skan      /* A potential block or line comment.  */
996169695Skan      comment_start = buffer->cur;
997169695Skan      c = *buffer->cur;
998169695Skan
999169695Skan      if (c == '*')
1000169695Skan	{
1001169695Skan	  if (_cpp_skip_block_comment (pfile))
1002169695Skan	    cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
1003169695Skan	}
1004169695Skan      else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
1005169695Skan			    || cpp_in_system_header (pfile)))
1006169695Skan	{
1007169695Skan	  /* Warn about comments only if pedantically GNUC89, and not
1008169695Skan	     in system headers.  */
1009169695Skan	  if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1010169695Skan	      && ! buffer->warned_cplusplus_comments)
1011169695Skan	    {
1012169695Skan	      cpp_error (pfile, CPP_DL_PEDWARN,
1013169695Skan			 "C++ style comments are not allowed in ISO C90");
1014169695Skan	      cpp_error (pfile, CPP_DL_PEDWARN,
1015169695Skan			 "(this will be reported only once per input file)");
1016169695Skan	      buffer->warned_cplusplus_comments = 1;
1017169695Skan	    }
1018169695Skan
1019169695Skan	  if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1020169695Skan	    cpp_error (pfile, CPP_DL_WARNING, "multi-line comment");
1021169695Skan	}
1022169695Skan      else if (c == '=')
1023169695Skan	{
1024169695Skan	  buffer->cur++;
1025169695Skan	  result->type = CPP_DIV_EQ;
1026169695Skan	  break;
1027169695Skan	}
1028169695Skan      else
1029169695Skan	{
1030169695Skan	  result->type = CPP_DIV;
1031169695Skan	  break;
1032169695Skan	}
1033169695Skan
1034169695Skan      if (!pfile->state.save_comments)
1035169695Skan	{
1036169695Skan	  result->flags |= PREV_WHITE;
1037169695Skan	  goto update_tokens_line;
1038169695Skan	}
1039169695Skan
1040169695Skan      /* Save the comment as a token in its own right.  */
1041169695Skan      save_comment (pfile, result, comment_start, c);
1042169695Skan      break;
1043169695Skan
1044169695Skan    case '<':
1045169695Skan      if (pfile->state.angled_headers)
1046169695Skan	{
1047169695Skan	  lex_string (pfile, result, buffer->cur - 1);
1048169695Skan	  break;
1049169695Skan	}
1050169695Skan
1051169695Skan      result->type = CPP_LESS;
1052169695Skan      if (*buffer->cur == '=')
1053169695Skan	buffer->cur++, result->type = CPP_LESS_EQ;
1054169695Skan      else if (*buffer->cur == '<')
1055169695Skan	{
1056169695Skan	  buffer->cur++;
1057169695Skan	  IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
1058169695Skan	}
1059169695Skan      else if (CPP_OPTION (pfile, digraphs))
1060169695Skan	{
1061169695Skan	  if (*buffer->cur == ':')
1062169695Skan	    {
1063169695Skan	      buffer->cur++;
1064169695Skan	      result->flags |= DIGRAPH;
1065169695Skan	      result->type = CPP_OPEN_SQUARE;
1066169695Skan	    }
1067169695Skan	  else if (*buffer->cur == '%')
1068169695Skan	    {
1069169695Skan	      buffer->cur++;
1070169695Skan	      result->flags |= DIGRAPH;
1071169695Skan	      result->type = CPP_OPEN_BRACE;
1072169695Skan	    }
1073169695Skan	}
1074169695Skan      break;
1075169695Skan
1076169695Skan    case '>':
1077169695Skan      result->type = CPP_GREATER;
1078169695Skan      if (*buffer->cur == '=')
1079169695Skan	buffer->cur++, result->type = CPP_GREATER_EQ;
1080169695Skan      else if (*buffer->cur == '>')
1081169695Skan	{
1082169695Skan	  buffer->cur++;
1083169695Skan	  IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1084169695Skan	}
1085169695Skan      break;
1086169695Skan
1087169695Skan    case '%':
1088169695Skan      result->type = CPP_MOD;
1089169695Skan      if (*buffer->cur == '=')
1090169695Skan	buffer->cur++, result->type = CPP_MOD_EQ;
1091169695Skan      else if (CPP_OPTION (pfile, digraphs))
1092169695Skan	{
1093169695Skan	  if (*buffer->cur == ':')
1094169695Skan	    {
1095169695Skan	      buffer->cur++;
1096169695Skan	      result->flags |= DIGRAPH;
1097169695Skan	      result->type = CPP_HASH;
1098169695Skan	      if (*buffer->cur == '%' && buffer->cur[1] == ':')
1099169695Skan		buffer->cur += 2, result->type = CPP_PASTE;
1100169695Skan	    }
1101169695Skan	  else if (*buffer->cur == '>')
1102169695Skan	    {
1103169695Skan	      buffer->cur++;
1104169695Skan	      result->flags |= DIGRAPH;
1105169695Skan	      result->type = CPP_CLOSE_BRACE;
1106169695Skan	    }
1107169695Skan	}
1108169695Skan      break;
1109169695Skan
1110169695Skan    case '.':
1111169695Skan      result->type = CPP_DOT;
1112169695Skan      if (ISDIGIT (*buffer->cur))
1113169695Skan	{
1114169695Skan	  struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1115169695Skan	  result->type = CPP_NUMBER;
1116169695Skan	  lex_number (pfile, &result->val.str, &nst);
1117169695Skan	  warn_about_normalization (pfile, result, &nst);
1118169695Skan	}
1119169695Skan      else if (*buffer->cur == '.' && buffer->cur[1] == '.')
1120169695Skan	buffer->cur += 2, result->type = CPP_ELLIPSIS;
1121169695Skan      else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1122169695Skan	buffer->cur++, result->type = CPP_DOT_STAR;
1123169695Skan      break;
1124169695Skan
1125169695Skan    case '+':
1126169695Skan      result->type = CPP_PLUS;
1127169695Skan      if (*buffer->cur == '+')
1128169695Skan	buffer->cur++, result->type = CPP_PLUS_PLUS;
1129169695Skan      else if (*buffer->cur == '=')
1130169695Skan	buffer->cur++, result->type = CPP_PLUS_EQ;
1131169695Skan      break;
1132169695Skan
1133169695Skan    case '-':
1134169695Skan      result->type = CPP_MINUS;
1135169695Skan      if (*buffer->cur == '>')
1136169695Skan	{
1137169695Skan	  buffer->cur++;
1138169695Skan	  result->type = CPP_DEREF;
1139169695Skan	  if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1140169695Skan	    buffer->cur++, result->type = CPP_DEREF_STAR;
1141169695Skan	}
1142169695Skan      else if (*buffer->cur == '-')
1143169695Skan	buffer->cur++, result->type = CPP_MINUS_MINUS;
1144169695Skan      else if (*buffer->cur == '=')
1145169695Skan	buffer->cur++, result->type = CPP_MINUS_EQ;
1146169695Skan      break;
1147169695Skan
1148169695Skan    case '&':
1149169695Skan      result->type = CPP_AND;
1150169695Skan      if (*buffer->cur == '&')
1151169695Skan	buffer->cur++, result->type = CPP_AND_AND;
1152169695Skan      else if (*buffer->cur == '=')
1153169695Skan	buffer->cur++, result->type = CPP_AND_EQ;
1154169695Skan      break;
1155169695Skan
1156169695Skan    case '|':
1157169695Skan      result->type = CPP_OR;
1158169695Skan      if (*buffer->cur == '|')
1159169695Skan	buffer->cur++, result->type = CPP_OR_OR;
1160169695Skan      else if (*buffer->cur == '=')
1161169695Skan	buffer->cur++, result->type = CPP_OR_EQ;
1162169695Skan      break;
1163169695Skan
1164169695Skan    case ':':
1165169695Skan      result->type = CPP_COLON;
1166169695Skan      if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
1167169695Skan	buffer->cur++, result->type = CPP_SCOPE;
1168169695Skan      else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
1169169695Skan	{
1170169695Skan	  buffer->cur++;
1171169695Skan	  result->flags |= DIGRAPH;
1172169695Skan	  result->type = CPP_CLOSE_SQUARE;
1173169695Skan	}
1174169695Skan      break;
1175169695Skan
1176169695Skan    case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1177169695Skan    case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1178169695Skan    case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1179169695Skan    case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1180169695Skan    case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1181169695Skan
1182169695Skan    case '?': result->type = CPP_QUERY; break;
1183169695Skan    case '~': result->type = CPP_COMPL; break;
1184169695Skan    case ',': result->type = CPP_COMMA; break;
1185169695Skan    case '(': result->type = CPP_OPEN_PAREN; break;
1186169695Skan    case ')': result->type = CPP_CLOSE_PAREN; break;
1187169695Skan    case '[': result->type = CPP_OPEN_SQUARE; break;
1188169695Skan    case ']': result->type = CPP_CLOSE_SQUARE; break;
1189169695Skan    case '{': result->type = CPP_OPEN_BRACE; break;
1190169695Skan    case '}': result->type = CPP_CLOSE_BRACE; break;
1191169695Skan    case ';': result->type = CPP_SEMICOLON; break;
1192169695Skan
1193169695Skan      /* @ is a punctuator in Objective-C.  */
1194169695Skan    case '@': result->type = CPP_ATSIGN; break;
1195169695Skan
1196169695Skan    case '$':
1197169695Skan    case '\\':
1198169695Skan      {
1199169695Skan	const uchar *base = --buffer->cur;
1200169695Skan	struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1201169695Skan
1202169695Skan	if (forms_identifier_p (pfile, true, &nst))
1203169695Skan	  {
1204169695Skan	    result->type = CPP_NAME;
1205169695Skan	    result->val.node = lex_identifier (pfile, base, true, &nst);
1206169695Skan	    warn_about_normalization (pfile, result, &nst);
1207169695Skan	    break;
1208169695Skan	  }
1209169695Skan	buffer->cur++;
1210169695Skan      }
1211169695Skan
1212169695Skan    default:
1213169695Skan      create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
1214169695Skan      break;
1215169695Skan    }
1216169695Skan
1217169695Skan  return result;
1218169695Skan}
1219169695Skan
1220169695Skan/* An upper bound on the number of bytes needed to spell TOKEN.
1221169695Skan   Does not include preceding whitespace.  */
1222169695Skanunsigned int
1223169695Skancpp_token_len (const cpp_token *token)
1224169695Skan{
1225169695Skan  unsigned int len;
1226169695Skan
1227169695Skan  switch (TOKEN_SPELL (token))
1228169695Skan    {
1229169695Skan    default:		len = 4;				break;
1230169695Skan    case SPELL_LITERAL:	len = token->val.str.len;		break;
1231169695Skan    case SPELL_IDENT:	len = NODE_LEN (token->val.node) * 10;	break;
1232169695Skan    }
1233169695Skan
1234169695Skan  return len;
1235169695Skan}
1236169695Skan
1237169695Skan/* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER.
1238169695Skan   Return the number of bytes read out of NAME.  (There are always
1239169695Skan   10 bytes written to BUFFER.)  */
1240169695Skan
1241169695Skanstatic size_t
1242169695Skanutf8_to_ucn (unsigned char *buffer, const unsigned char *name)
1243169695Skan{
1244169695Skan  int j;
1245169695Skan  int ucn_len = 0;
1246169695Skan  int ucn_len_c;
1247169695Skan  unsigned t;
1248169695Skan  unsigned long utf32;
1249169695Skan
1250169695Skan  /* Compute the length of the UTF-8 sequence.  */
1251169695Skan  for (t = *name; t & 0x80; t <<= 1)
1252169695Skan    ucn_len++;
1253169695Skan
1254169695Skan  utf32 = *name & (0x7F >> ucn_len);
1255169695Skan  for (ucn_len_c = 1; ucn_len_c < ucn_len; ucn_len_c++)
1256169695Skan    {
1257169695Skan      utf32 = (utf32 << 6) | (*++name & 0x3F);
1258169695Skan
1259169695Skan      /* Ill-formed UTF-8.  */
1260169695Skan      if ((*name & ~0x3F) != 0x80)
1261169695Skan	abort ();
1262169695Skan    }
1263169695Skan
1264169695Skan  *buffer++ = '\\';
1265169695Skan  *buffer++ = 'U';
1266169695Skan  for (j = 7; j >= 0; j--)
1267169695Skan    *buffer++ = "0123456789abcdef"[(utf32 >> (4 * j)) & 0xF];
1268169695Skan  return ucn_len;
1269169695Skan}
1270169695Skan
1271169695Skan
1272169695Skan/* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1273169695Skan   already contain the enough space to hold the token's spelling.
1274169695Skan   Returns a pointer to the character after the last character written.
1275169695Skan   FORSTRING is true if this is to be the spelling after translation
1276169695Skan   phase 1 (this is different for UCNs).
1277169695Skan   FIXME: Would be nice if we didn't need the PFILE argument.  */
1278169695Skanunsigned char *
1279169695Skancpp_spell_token (cpp_reader *pfile, const cpp_token *token,
1280169695Skan		 unsigned char *buffer, bool forstring)
1281169695Skan{
1282169695Skan  switch (TOKEN_SPELL (token))
1283169695Skan    {
1284169695Skan    case SPELL_OPERATOR:
1285169695Skan      {
1286169695Skan	const unsigned char *spelling;
1287169695Skan	unsigned char c;
1288169695Skan
1289169695Skan	if (token->flags & DIGRAPH)
1290169695Skan	  spelling
1291169695Skan	    = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1292169695Skan	else if (token->flags & NAMED_OP)
1293169695Skan	  goto spell_ident;
1294169695Skan	else
1295169695Skan	  spelling = TOKEN_NAME (token);
1296169695Skan
1297169695Skan	while ((c = *spelling++) != '\0')
1298169695Skan	  *buffer++ = c;
1299169695Skan      }
1300169695Skan      break;
1301169695Skan
1302169695Skan    spell_ident:
1303169695Skan    case SPELL_IDENT:
1304169695Skan      if (forstring)
1305169695Skan	{
1306169695Skan	  memcpy (buffer, NODE_NAME (token->val.node),
1307169695Skan		  NODE_LEN (token->val.node));
1308169695Skan	  buffer += NODE_LEN (token->val.node);
1309169695Skan	}
1310169695Skan      else
1311169695Skan	{
1312169695Skan	  size_t i;
1313169695Skan	  const unsigned char * name = NODE_NAME (token->val.node);
1314169695Skan
1315169695Skan	  for (i = 0; i < NODE_LEN (token->val.node); i++)
1316169695Skan	    if (name[i] & ~0x7F)
1317169695Skan	      {
1318169695Skan		i += utf8_to_ucn (buffer, name + i) - 1;
1319169695Skan		buffer += 10;
1320169695Skan	      }
1321169695Skan	    else
1322169695Skan	      *buffer++ = NODE_NAME (token->val.node)[i];
1323169695Skan	}
1324169695Skan      break;
1325169695Skan
1326169695Skan    case SPELL_LITERAL:
1327169695Skan      memcpy (buffer, token->val.str.text, token->val.str.len);
1328169695Skan      buffer += token->val.str.len;
1329169695Skan      break;
1330169695Skan
1331169695Skan    case SPELL_NONE:
1332169695Skan      cpp_error (pfile, CPP_DL_ICE,
1333169695Skan		 "unspellable token %s", TOKEN_NAME (token));
1334169695Skan      break;
1335169695Skan    }
1336169695Skan
1337169695Skan  return buffer;
1338169695Skan}
1339169695Skan
1340169695Skan/* Returns TOKEN spelt as a null-terminated string.  The string is
1341169695Skan   freed when the reader is destroyed.  Useful for diagnostics.  */
1342169695Skanunsigned char *
1343169695Skancpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
1344169695Skan{
1345169695Skan  unsigned int len = cpp_token_len (token) + 1;
1346169695Skan  unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1347169695Skan
1348169695Skan  end = cpp_spell_token (pfile, token, start, false);
1349169695Skan  end[0] = '\0';
1350169695Skan
1351169695Skan  return start;
1352169695Skan}
1353169695Skan
1354169695Skan/* Used by C front ends, which really should move to using
1355169695Skan   cpp_token_as_text.  */
1356169695Skanconst char *
1357169695Skancpp_type2name (enum cpp_ttype type)
1358169695Skan{
1359169695Skan  return (const char *) token_spellings[type].name;
1360169695Skan}
1361169695Skan
1362169695Skan/* Writes the spelling of token to FP, without any preceding space.
1363169695Skan   Separated from cpp_spell_token for efficiency - to avoid stdio
1364169695Skan   double-buffering.  */
1365169695Skanvoid
1366169695Skancpp_output_token (const cpp_token *token, FILE *fp)
1367169695Skan{
1368169695Skan  switch (TOKEN_SPELL (token))
1369169695Skan    {
1370169695Skan    case SPELL_OPERATOR:
1371169695Skan      {
1372169695Skan	const unsigned char *spelling;
1373169695Skan	int c;
1374169695Skan
1375169695Skan	if (token->flags & DIGRAPH)
1376169695Skan	  spelling
1377169695Skan	    = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1378169695Skan	else if (token->flags & NAMED_OP)
1379169695Skan	  goto spell_ident;
1380169695Skan	else
1381169695Skan	  spelling = TOKEN_NAME (token);
1382169695Skan
1383169695Skan	c = *spelling;
1384169695Skan	do
1385169695Skan	  putc (c, fp);
1386169695Skan	while ((c = *++spelling) != '\0');
1387169695Skan      }
1388169695Skan      break;
1389169695Skan
1390169695Skan    spell_ident:
1391169695Skan    case SPELL_IDENT:
1392169695Skan      {
1393169695Skan	size_t i;
1394169695Skan	const unsigned char * name = NODE_NAME (token->val.node);
1395169695Skan
1396169695Skan	for (i = 0; i < NODE_LEN (token->val.node); i++)
1397169695Skan	  if (name[i] & ~0x7F)
1398169695Skan	    {
1399169695Skan	      unsigned char buffer[10];
1400169695Skan	      i += utf8_to_ucn (buffer, name + i) - 1;
1401169695Skan	      fwrite (buffer, 1, 10, fp);
1402169695Skan	    }
1403169695Skan	  else
1404169695Skan	    fputc (NODE_NAME (token->val.node)[i], fp);
1405169695Skan      }
1406169695Skan      break;
1407169695Skan
1408169695Skan    case SPELL_LITERAL:
1409169695Skan      fwrite (token->val.str.text, 1, token->val.str.len, fp);
1410169695Skan      break;
1411169695Skan
1412169695Skan    case SPELL_NONE:
1413169695Skan      /* An error, most probably.  */
1414169695Skan      break;
1415169695Skan    }
1416169695Skan}
1417169695Skan
1418169695Skan/* Compare two tokens.  */
1419169695Skanint
1420169695Skan_cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
1421169695Skan{
1422169695Skan  if (a->type == b->type && a->flags == b->flags)
1423169695Skan    switch (TOKEN_SPELL (a))
1424169695Skan      {
1425169695Skan      default:			/* Keep compiler happy.  */
1426169695Skan      case SPELL_OPERATOR:
1427169695Skan	return 1;
1428169695Skan      case SPELL_NONE:
1429169695Skan	return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1430169695Skan      case SPELL_IDENT:
1431169695Skan	return a->val.node == b->val.node;
1432169695Skan      case SPELL_LITERAL:
1433169695Skan	return (a->val.str.len == b->val.str.len
1434169695Skan		&& !memcmp (a->val.str.text, b->val.str.text,
1435169695Skan			    a->val.str.len));
1436169695Skan      }
1437169695Skan
1438169695Skan  return 0;
1439169695Skan}
1440169695Skan
1441169695Skan/* Returns nonzero if a space should be inserted to avoid an
1442169695Skan   accidental token paste for output.  For simplicity, it is
1443169695Skan   conservative, and occasionally advises a space where one is not
1444169695Skan   needed, e.g. "." and ".2".  */
1445169695Skanint
1446169695Skancpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
1447169695Skan		 const cpp_token *token2)
1448169695Skan{
1449169695Skan  enum cpp_ttype a = token1->type, b = token2->type;
1450169695Skan  cppchar_t c;
1451169695Skan
1452169695Skan  if (token1->flags & NAMED_OP)
1453169695Skan    a = CPP_NAME;
1454169695Skan  if (token2->flags & NAMED_OP)
1455169695Skan    b = CPP_NAME;
1456169695Skan
1457169695Skan  c = EOF;
1458169695Skan  if (token2->flags & DIGRAPH)
1459169695Skan    c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1460169695Skan  else if (token_spellings[b].category == SPELL_OPERATOR)
1461169695Skan    c = token_spellings[b].name[0];
1462169695Skan
1463169695Skan  /* Quickly get everything that can paste with an '='.  */
1464169695Skan  if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1465169695Skan    return 1;
1466169695Skan
1467169695Skan  switch (a)
1468169695Skan    {
1469169695Skan    case CPP_GREATER:	return c == '>';
1470169695Skan    case CPP_LESS:	return c == '<' || c == '%' || c == ':';
1471169695Skan    case CPP_PLUS:	return c == '+';
1472169695Skan    case CPP_MINUS:	return c == '-' || c == '>';
1473169695Skan    case CPP_DIV:	return c == '/' || c == '*'; /* Comments.  */
1474169695Skan    case CPP_MOD:	return c == ':' || c == '>';
1475169695Skan    case CPP_AND:	return c == '&';
1476169695Skan    case CPP_OR:	return c == '|';
1477169695Skan    case CPP_COLON:	return c == ':' || c == '>';
1478169695Skan    case CPP_DEREF:	return c == '*';
1479169695Skan    case CPP_DOT:	return c == '.' || c == '%' || b == CPP_NUMBER;
1480169695Skan    case CPP_HASH:	return c == '#' || c == '%'; /* Digraph form.  */
1481169695Skan    case CPP_NAME:	return ((b == CPP_NUMBER
1482169695Skan				 && name_p (pfile, &token2->val.str))
1483169695Skan				|| b == CPP_NAME
1484169695Skan				|| b == CPP_CHAR || b == CPP_STRING); /* L */
1485169695Skan    case CPP_NUMBER:	return (b == CPP_NUMBER || b == CPP_NAME
1486169695Skan				|| c == '.' || c == '+' || c == '-');
1487169695Skan				      /* UCNs */
1488169695Skan    case CPP_OTHER:	return ((token1->val.str.text[0] == '\\'
1489169695Skan				 && b == CPP_NAME)
1490169695Skan				|| (CPP_OPTION (pfile, objc)
1491169695Skan				    && token1->val.str.text[0] == '@'
1492169695Skan				    && (b == CPP_NAME || b == CPP_STRING)));
1493169695Skan    default:		break;
1494169695Skan    }
1495169695Skan
1496169695Skan  return 0;
1497169695Skan}
1498169695Skan
1499169695Skan/* Output all the remaining tokens on the current line, and a newline
1500169695Skan   character, to FP.  Leading whitespace is removed.  If there are
1501169695Skan   macros, special token padding is not performed.  */
1502169695Skanvoid
1503169695Skancpp_output_line (cpp_reader *pfile, FILE *fp)
1504169695Skan{
1505169695Skan  const cpp_token *token;
1506169695Skan
1507169695Skan  token = cpp_get_token (pfile);
1508169695Skan  while (token->type != CPP_EOF)
1509169695Skan    {
1510169695Skan      cpp_output_token (token, fp);
1511169695Skan      token = cpp_get_token (pfile);
1512169695Skan      if (token->flags & PREV_WHITE)
1513169695Skan	putc (' ', fp);
1514169695Skan    }
1515169695Skan
1516169695Skan  putc ('\n', fp);
1517169695Skan}
1518169695Skan
1519169695Skan/* Memory buffers.  Changing these three constants can have a dramatic
1520169695Skan   effect on performance.  The values here are reasonable defaults,
1521169695Skan   but might be tuned.  If you adjust them, be sure to test across a
1522169695Skan   range of uses of cpplib, including heavy nested function-like macro
1523169695Skan   expansion.  Also check the change in peak memory usage (NJAMD is a
1524169695Skan   good tool for this).  */
1525169695Skan#define MIN_BUFF_SIZE 8000
1526169695Skan#define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1527169695Skan#define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1528169695Skan	(MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
1529169695Skan
1530169695Skan#if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1531169695Skan  #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1532169695Skan#endif
1533169695Skan
1534169695Skan/* Create a new allocation buffer.  Place the control block at the end
1535169695Skan   of the buffer, so that buffer overflows will cause immediate chaos.  */
1536169695Skanstatic _cpp_buff *
1537169695Skannew_buff (size_t len)
1538169695Skan{
1539169695Skan  _cpp_buff *result;
1540169695Skan  unsigned char *base;
1541169695Skan
1542169695Skan  if (len < MIN_BUFF_SIZE)
1543169695Skan    len = MIN_BUFF_SIZE;
1544169695Skan  len = CPP_ALIGN (len);
1545169695Skan
1546169695Skan  base = XNEWVEC (unsigned char, len + sizeof (_cpp_buff));
1547169695Skan  result = (_cpp_buff *) (base + len);
1548169695Skan  result->base = base;
1549169695Skan  result->cur = base;
1550169695Skan  result->limit = base + len;
1551169695Skan  result->next = NULL;
1552169695Skan  return result;
1553169695Skan}
1554169695Skan
1555169695Skan/* Place a chain of unwanted allocation buffers on the free list.  */
1556169695Skanvoid
1557169695Skan_cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
1558169695Skan{
1559169695Skan  _cpp_buff *end = buff;
1560169695Skan
1561169695Skan  while (end->next)
1562169695Skan    end = end->next;
1563169695Skan  end->next = pfile->free_buffs;
1564169695Skan  pfile->free_buffs = buff;
1565169695Skan}
1566169695Skan
1567169695Skan/* Return a free buffer of size at least MIN_SIZE.  */
1568169695Skan_cpp_buff *
1569169695Skan_cpp_get_buff (cpp_reader *pfile, size_t min_size)
1570169695Skan{
1571169695Skan  _cpp_buff *result, **p;
1572169695Skan
1573169695Skan  for (p = &pfile->free_buffs;; p = &(*p)->next)
1574169695Skan    {
1575169695Skan      size_t size;
1576169695Skan
1577169695Skan      if (*p == NULL)
1578169695Skan	return new_buff (min_size);
1579169695Skan      result = *p;
1580169695Skan      size = result->limit - result->base;
1581169695Skan      /* Return a buffer that's big enough, but don't waste one that's
1582169695Skan         way too big.  */
1583169695Skan      if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
1584169695Skan	break;
1585169695Skan    }
1586169695Skan
1587169695Skan  *p = result->next;
1588169695Skan  result->next = NULL;
1589169695Skan  result->cur = result->base;
1590169695Skan  return result;
1591169695Skan}
1592169695Skan
1593169695Skan/* Creates a new buffer with enough space to hold the uncommitted
1594169695Skan   remaining bytes of BUFF, and at least MIN_EXTRA more bytes.  Copies
1595169695Skan   the excess bytes to the new buffer.  Chains the new buffer after
1596169695Skan   BUFF, and returns the new buffer.  */
1597169695Skan_cpp_buff *
1598169695Skan_cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
1599169695Skan{
1600169695Skan  size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
1601169695Skan  _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
1602169695Skan
1603169695Skan  buff->next = new_buff;
1604169695Skan  memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
1605169695Skan  return new_buff;
1606169695Skan}
1607169695Skan
1608169695Skan/* Creates a new buffer with enough space to hold the uncommitted
1609169695Skan   remaining bytes of the buffer pointed to by BUFF, and at least
1610169695Skan   MIN_EXTRA more bytes.  Copies the excess bytes to the new buffer.
1611169695Skan   Chains the new buffer before the buffer pointed to by BUFF, and
1612169695Skan   updates the pointer to point to the new buffer.  */
1613169695Skanvoid
1614169695Skan_cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
1615169695Skan{
1616169695Skan  _cpp_buff *new_buff, *old_buff = *pbuff;
1617169695Skan  size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
1618169695Skan
1619169695Skan  new_buff = _cpp_get_buff (pfile, size);
1620169695Skan  memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
1621169695Skan  new_buff->next = old_buff;
1622169695Skan  *pbuff = new_buff;
1623169695Skan}
1624169695Skan
1625169695Skan/* Free a chain of buffers starting at BUFF.  */
1626169695Skanvoid
1627169695Skan_cpp_free_buff (_cpp_buff *buff)
1628169695Skan{
1629169695Skan  _cpp_buff *next;
1630169695Skan
1631169695Skan  for (; buff; buff = next)
1632169695Skan    {
1633169695Skan      next = buff->next;
1634169695Skan      free (buff->base);
1635169695Skan    }
1636169695Skan}
1637169695Skan
1638169695Skan/* Allocate permanent, unaligned storage of length LEN.  */
1639169695Skanunsigned char *
1640169695Skan_cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
1641169695Skan{
1642169695Skan  _cpp_buff *buff = pfile->u_buff;
1643169695Skan  unsigned char *result = buff->cur;
1644169695Skan
1645169695Skan  if (len > (size_t) (buff->limit - result))
1646169695Skan    {
1647169695Skan      buff = _cpp_get_buff (pfile, len);
1648169695Skan      buff->next = pfile->u_buff;
1649169695Skan      pfile->u_buff = buff;
1650169695Skan      result = buff->cur;
1651169695Skan    }
1652169695Skan
1653169695Skan  buff->cur = result + len;
1654169695Skan  return result;
1655169695Skan}
1656169695Skan
1657169695Skan/* Allocate permanent, unaligned storage of length LEN from a_buff.
1658169695Skan   That buffer is used for growing allocations when saving macro
1659169695Skan   replacement lists in a #define, and when parsing an answer to an
1660169695Skan   assertion in #assert, #unassert or #if (and therefore possibly
1661169695Skan   whilst expanding macros).  It therefore must not be used by any
1662169695Skan   code that they might call: specifically the lexer and the guts of
1663169695Skan   the macro expander.
1664169695Skan
1665169695Skan   All existing other uses clearly fit this restriction: storing
1666169695Skan   registered pragmas during initialization.  */
1667169695Skanunsigned char *
1668169695Skan_cpp_aligned_alloc (cpp_reader *pfile, size_t len)
1669169695Skan{
1670169695Skan  _cpp_buff *buff = pfile->a_buff;
1671169695Skan  unsigned char *result = buff->cur;
1672169695Skan
1673169695Skan  if (len > (size_t) (buff->limit - result))
1674169695Skan    {
1675169695Skan      buff = _cpp_get_buff (pfile, len);
1676169695Skan      buff->next = pfile->a_buff;
1677169695Skan      pfile->a_buff = buff;
1678169695Skan      result = buff->cur;
1679169695Skan    }
1680169695Skan
1681169695Skan  buff->cur = result + len;
1682169695Skan  return result;
1683169695Skan}
1684169695Skan
1685169695Skan/* Say which field of TOK is in use.  */
1686169695Skan
1687169695Skanenum cpp_token_fld_kind
1688169695Skancpp_token_val_index (cpp_token *tok)
1689169695Skan{
1690169695Skan  switch (TOKEN_SPELL (tok))
1691169695Skan    {
1692169695Skan    case SPELL_IDENT:
1693169695Skan      return CPP_TOKEN_FLD_NODE;
1694169695Skan    case SPELL_LITERAL:
1695169695Skan      return CPP_TOKEN_FLD_STR;
1696169695Skan    case SPELL_NONE:
1697169695Skan      if (tok->type == CPP_MACRO_ARG)
1698169695Skan	return CPP_TOKEN_FLD_ARG_NO;
1699169695Skan      else if (tok->type == CPP_PADDING)
1700169695Skan	return CPP_TOKEN_FLD_SOURCE;
1701169695Skan      else if (tok->type == CPP_PRAGMA)
1702169695Skan	return CPP_TOKEN_FLD_PRAGMA;
1703169695Skan      /* else fall through */
1704169695Skan    default:
1705169695Skan      return CPP_TOKEN_FLD_NONE;
1706169695Skan    }
1707169695Skan}
1708