1/* CPP Library - lexical analysis.
2   Copyright (C) 2000-2022 Free Software Foundation, Inc.
3   Contributed by Per Bothner, 1994-95.
4   Based on CCCP program by Paul Rubin, June 1986
5   Adapted to ANSI C, Richard Stallman, Jan 1987
6   Broken out to separate file, Zack Weinberg, Mar 2000
7
8This program is free software; you can redistribute it and/or modify it
9under the terms of the GNU General Public License as published by the
10Free Software Foundation; either version 3, or (at your option) any
11later version.
12
13This program is distributed in the hope that it will be useful,
14but WITHOUT ANY WARRANTY; without even the implied warranty of
15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16GNU General Public License for more details.
17
18You should have received a copy of the GNU General Public License
19along with this program; see the file COPYING3.  If not see
20<http://www.gnu.org/licenses/>.  */
21
22#include "config.h"
23#include "system.h"
24#include "cpplib.h"
25#include "internal.h"
26
27enum spell_type
28{
29  SPELL_OPERATOR = 0,
30  SPELL_IDENT,
31  SPELL_LITERAL,
32  SPELL_NONE
33};
34
35struct token_spelling
36{
37  enum spell_type category;
38  const unsigned char *name;
39};
40
41static const unsigned char *const digraph_spellings[] =
42{ UC"%:", UC"%:%:", UC"<:", UC":>", UC"<%", UC"%>" };
43
44#define OP(e, s) { SPELL_OPERATOR, UC s  },
45#define TK(e, s) { SPELL_ ## s,    UC #e },
46static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
47#undef OP
48#undef TK
49
50#define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
51#define TOKEN_NAME(token) (token_spellings[(token)->type].name)
52
53static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
54static int skip_line_comment (cpp_reader *);
55static void skip_whitespace (cpp_reader *, cppchar_t);
56static void lex_string (cpp_reader *, cpp_token *, const uchar *);
57static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
58static void store_comment (cpp_reader *, cpp_token *);
59static void create_literal (cpp_reader *, cpp_token *, const uchar *,
60			    unsigned int, enum cpp_ttype);
61static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
62static int name_p (cpp_reader *, const cpp_string *);
63static tokenrun *next_tokenrun (tokenrun *);
64
65static _cpp_buff *new_buff (size_t);
66
67
68/* Utility routine:
69
70   Compares, the token TOKEN to the NUL-terminated string STRING.
71   TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
72int
73cpp_ideq (const cpp_token *token, const char *string)
74{
75  if (token->type != CPP_NAME)
76    return 0;
77
78  return !ustrcmp (NODE_NAME (token->val.node.node), (const uchar *) string);
79}
80
81/* Record a note TYPE at byte POS into the current cleaned logical
82   line.  */
83static void
84add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
85{
86  if (buffer->notes_used == buffer->notes_cap)
87    {
88      buffer->notes_cap = buffer->notes_cap * 2 + 200;
89      buffer->notes = XRESIZEVEC (_cpp_line_note, buffer->notes,
90                                  buffer->notes_cap);
91    }
92
93  buffer->notes[buffer->notes_used].pos = pos;
94  buffer->notes[buffer->notes_used].type = type;
95  buffer->notes_used++;
96}
97
98
99/* Fast path to find line special characters using optimized character
100   scanning algorithms.  Anything complicated falls back to the slow
101   path below.  Since this loop is very hot it's worth doing these kinds
102   of optimizations.
103
104   One of the paths through the ifdefs should provide
105
106     const uchar *search_line_fast (const uchar *s, const uchar *end);
107
108   Between S and END, search for \n, \r, \\, ?.  Return a pointer to
109   the found character.
110
111   Note that the last character of the buffer is *always* a newline,
112   as forced by _cpp_convert_input.  This fact can be used to avoid
113   explicitly looking for the end of the buffer.  */
114
115/* Configure gives us an ifdef test.  */
116#ifndef WORDS_BIGENDIAN
117#define WORDS_BIGENDIAN 0
118#endif
119
120/* We'd like the largest integer that fits into a register.  There's nothing
121   in <stdint.h> that gives us that.  For most hosts this is unsigned long,
122   but MS decided on an LLP64 model.  Thankfully when building with GCC we
123   can get the "real" word size.  */
124#ifdef __GNUC__
125typedef unsigned int word_type __attribute__((__mode__(__word__)));
126#else
127typedef unsigned long word_type;
128#endif
129
130/* The code below is only expecting sizes 4 or 8.
131   Die at compile-time if this expectation is violated.  */
132typedef char check_word_type_size
133  [(sizeof(word_type) == 8 || sizeof(word_type) == 4) * 2 - 1];
134
135/* Return X with the first N bytes forced to values that won't match one
136   of the interesting characters.  Note that NUL is not interesting.  */
137
138static inline word_type
139acc_char_mask_misalign (word_type val, unsigned int n)
140{
141  word_type mask = -1;
142  if (WORDS_BIGENDIAN)
143    mask >>= n * 8;
144  else
145    mask <<= n * 8;
146  return val & mask;
147}
148
149/* Return X replicated to all byte positions within WORD_TYPE.  */
150
151static inline word_type
152acc_char_replicate (uchar x)
153{
154  word_type ret;
155
156  ret = (x << 24) | (x << 16) | (x << 8) | x;
157  if (sizeof(word_type) == 8)
158    ret = (ret << 16 << 16) | ret;
159  return ret;
160}
161
162/* Return non-zero if some byte of VAL is (probably) C.  */
163
164static inline word_type
165acc_char_cmp (word_type val, word_type c)
166{
167#if defined(__GNUC__) && defined(__alpha__)
168  /* We can get exact results using a compare-bytes instruction.
169     Get (val == c) via (0 >= (val ^ c)).  */
170  return __builtin_alpha_cmpbge (0, val ^ c);
171#else
172  word_type magic = 0x7efefefeU;
173  if (sizeof(word_type) == 8)
174    magic = (magic << 16 << 16) | 0xfefefefeU;
175  magic |= 1;
176
177  val ^= c;
178  return ((val + magic) ^ ~val) & ~magic;
179#endif
180}
181
182/* Given the result of acc_char_cmp is non-zero, return the index of
183   the found character.  If this was a false positive, return -1.  */
184
185static inline int
186acc_char_index (word_type cmp ATTRIBUTE_UNUSED,
187		word_type val ATTRIBUTE_UNUSED)
188{
189#if defined(__GNUC__) && defined(__alpha__) && !WORDS_BIGENDIAN
190  /* The cmpbge instruction sets *bits* of the result corresponding to
191     matches in the bytes with no false positives.  */
192  return __builtin_ctzl (cmp);
193#else
194  unsigned int i;
195
196  /* ??? It would be nice to force unrolling here,
197     and have all of these constants folded.  */
198  for (i = 0; i < sizeof(word_type); ++i)
199    {
200      uchar c;
201      if (WORDS_BIGENDIAN)
202	c = (val >> (sizeof(word_type) - i - 1) * 8) & 0xff;
203      else
204	c = (val >> i * 8) & 0xff;
205
206      if (c == '\n' || c == '\r' || c == '\\' || c == '?')
207	return i;
208    }
209
210  return -1;
211#endif
212}
213
214/* A version of the fast scanner using bit fiddling techniques.
215
216   For 32-bit words, one would normally perform 16 comparisons and
217   16 branches.  With this algorithm one performs 24 arithmetic
218   operations and one branch.  Whether this is faster with a 32-bit
219   word size is going to be somewhat system dependent.
220
221   For 64-bit words, we eliminate twice the number of comparisons
222   and branches without increasing the number of arithmetic operations.
223   It's almost certainly going to be a win with 64-bit word size.  */
224
225static const uchar * search_line_acc_char (const uchar *, const uchar *)
226  ATTRIBUTE_UNUSED;
227
228static const uchar *
229search_line_acc_char (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
230{
231  const word_type repl_nl = acc_char_replicate ('\n');
232  const word_type repl_cr = acc_char_replicate ('\r');
233  const word_type repl_bs = acc_char_replicate ('\\');
234  const word_type repl_qm = acc_char_replicate ('?');
235
236  unsigned int misalign;
237  const word_type *p;
238  word_type val, t;
239
240  /* Align the buffer.  Mask out any bytes from before the beginning.  */
241  p = (word_type *)((uintptr_t)s & -sizeof(word_type));
242  val = *p;
243  misalign = (uintptr_t)s & (sizeof(word_type) - 1);
244  if (misalign)
245    val = acc_char_mask_misalign (val, misalign);
246
247  /* Main loop.  */
248  while (1)
249    {
250      t  = acc_char_cmp (val, repl_nl);
251      t |= acc_char_cmp (val, repl_cr);
252      t |= acc_char_cmp (val, repl_bs);
253      t |= acc_char_cmp (val, repl_qm);
254
255      if (__builtin_expect (t != 0, 0))
256	{
257	  int i = acc_char_index (t, val);
258	  if (i >= 0)
259	    return (const uchar *)p + i;
260	}
261
262      val = *++p;
263    }
264}
265
266/* Disable on Solaris 2/x86 until the following problem can be properly
267   autoconfed:
268
269   The Solaris 10+ assembler tags objects with the instruction set
270   extensions used, so SSE4.2 executables cannot run on machines that
271   don't support that extension.  */
272
273#if (GCC_VERSION >= 4005) && (__GNUC__ >= 5 || !defined(__PIC__)) && (defined(__i386__) || defined(__x86_64__)) && !(defined(__sun__) && defined(__svr4__))
274
275/* Replicated character data to be shared between implementations.
276   Recall that outside of a context with vector support we can't
277   define compatible vector types, therefore these are all defined
278   in terms of raw characters.  */
279static const char repl_chars[4][16] __attribute__((aligned(16))) = {
280  { '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
281    '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n' },
282  { '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
283    '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r' },
284  { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
285    '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' },
286  { '?', '?', '?', '?', '?', '?', '?', '?',
287    '?', '?', '?', '?', '?', '?', '?', '?' },
288};
289
290/* A version of the fast scanner using MMX vectorized byte compare insns.
291
292   This uses the PMOVMSKB instruction which was introduced with "MMX2",
293   which was packaged into SSE1; it is also present in the AMD MMX
294   extension.  Mark the function as using "sse" so that we emit a real
295   "emms" instruction, rather than the 3dNOW "femms" instruction.  */
296
297static const uchar *
298#ifndef __SSE__
299__attribute__((__target__("sse")))
300#endif
301search_line_mmx (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
302{
303  typedef char v8qi __attribute__ ((__vector_size__ (8)));
304  typedef int __m64 __attribute__ ((__vector_size__ (8), __may_alias__));
305
306  const v8qi repl_nl = *(const v8qi *)repl_chars[0];
307  const v8qi repl_cr = *(const v8qi *)repl_chars[1];
308  const v8qi repl_bs = *(const v8qi *)repl_chars[2];
309  const v8qi repl_qm = *(const v8qi *)repl_chars[3];
310
311  unsigned int misalign, found, mask;
312  const v8qi *p;
313  v8qi data, t, c;
314
315  /* Align the source pointer.  While MMX doesn't generate unaligned data
316     faults, this allows us to safely scan to the end of the buffer without
317     reading beyond the end of the last page.  */
318  misalign = (uintptr_t)s & 7;
319  p = (const v8qi *)((uintptr_t)s & -8);
320  data = *p;
321
322  /* Create a mask for the bytes that are valid within the first
323     16-byte block.  The Idea here is that the AND with the mask
324     within the loop is "free", since we need some AND or TEST
325     insn in order to set the flags for the branch anyway.  */
326  mask = -1u << misalign;
327
328  /* Main loop processing 8 bytes at a time.  */
329  goto start;
330  do
331    {
332      data = *++p;
333      mask = -1;
334
335    start:
336      t = __builtin_ia32_pcmpeqb(data, repl_nl);
337      c = __builtin_ia32_pcmpeqb(data, repl_cr);
338      t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
339      c = __builtin_ia32_pcmpeqb(data, repl_bs);
340      t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
341      c = __builtin_ia32_pcmpeqb(data, repl_qm);
342      t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
343      found = __builtin_ia32_pmovmskb (t);
344      found &= mask;
345    }
346  while (!found);
347
348  __builtin_ia32_emms ();
349
350  /* FOUND contains 1 in bits for which we matched a relevant
351     character.  Conversion to the byte index is trivial.  */
352  found = __builtin_ctz(found);
353  return (const uchar *)p + found;
354}
355
356/* A version of the fast scanner using SSE2 vectorized byte compare insns.  */
357
358static const uchar *
359#ifndef __SSE2__
360__attribute__((__target__("sse2")))
361#endif
362search_line_sse2 (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
363{
364  typedef char v16qi __attribute__ ((__vector_size__ (16)));
365
366  const v16qi repl_nl = *(const v16qi *)repl_chars[0];
367  const v16qi repl_cr = *(const v16qi *)repl_chars[1];
368  const v16qi repl_bs = *(const v16qi *)repl_chars[2];
369  const v16qi repl_qm = *(const v16qi *)repl_chars[3];
370
371  unsigned int misalign, found, mask;
372  const v16qi *p;
373  v16qi data, t;
374
375  /* Align the source pointer.  */
376  misalign = (uintptr_t)s & 15;
377  p = (const v16qi *)((uintptr_t)s & -16);
378  data = *p;
379
380  /* Create a mask for the bytes that are valid within the first
381     16-byte block.  The Idea here is that the AND with the mask
382     within the loop is "free", since we need some AND or TEST
383     insn in order to set the flags for the branch anyway.  */
384  mask = -1u << misalign;
385
386  /* Main loop processing 16 bytes at a time.  */
387  goto start;
388  do
389    {
390      data = *++p;
391      mask = -1;
392
393    start:
394      t  = data == repl_nl;
395      t |= data == repl_cr;
396      t |= data == repl_bs;
397      t |= data == repl_qm;
398      found = __builtin_ia32_pmovmskb128 (t);
399      found &= mask;
400    }
401  while (!found);
402
403  /* FOUND contains 1 in bits for which we matched a relevant
404     character.  Conversion to the byte index is trivial.  */
405  found = __builtin_ctz(found);
406  return (const uchar *)p + found;
407}
408
409#ifdef HAVE_SSE4
410/* A version of the fast scanner using SSE 4.2 vectorized string insns.  */
411
412static const uchar *
413#ifndef __SSE4_2__
414__attribute__((__target__("sse4.2")))
415#endif
416search_line_sse42 (const uchar *s, const uchar *end)
417{
418  typedef char v16qi __attribute__ ((__vector_size__ (16)));
419  static const v16qi search = { '\n', '\r', '?', '\\' };
420
421  uintptr_t si = (uintptr_t)s;
422  uintptr_t index;
423
424  /* Check for unaligned input.  */
425  if (si & 15)
426    {
427      v16qi sv;
428
429      if (__builtin_expect (end - s < 16, 0)
430	  && __builtin_expect ((si & 0xfff) > 0xff0, 0))
431	{
432	  /* There are less than 16 bytes left in the buffer, and less
433	     than 16 bytes left on the page.  Reading 16 bytes at this
434	     point might generate a spurious page fault.  Defer to the
435	     SSE2 implementation, which already handles alignment.  */
436	  return search_line_sse2 (s, end);
437	}
438
439      /* ??? The builtin doesn't understand that the PCMPESTRI read from
440	 memory need not be aligned.  */
441      sv = __builtin_ia32_loaddqu ((const char *) s);
442      index = __builtin_ia32_pcmpestri128 (search, 4, sv, 16, 0);
443
444      if (__builtin_expect (index < 16, 0))
445	goto found;
446
447      /* Advance the pointer to an aligned address.  We will re-scan a
448	 few bytes, but we no longer need care for reading past the
449	 end of a page, since we're guaranteed a match.  */
450      s = (const uchar *)((si + 15) & -16);
451    }
452
453  /* Main loop, processing 16 bytes at a time.  */
454#ifdef __GCC_ASM_FLAG_OUTPUTS__
455  while (1)
456    {
457      char f;
458
459      /* By using inline assembly instead of the builtin,
460	 we can use the result, as well as the flags set.  */
461      __asm ("%vpcmpestri\t$0, %2, %3"
462	     : "=c"(index), "=@ccc"(f)
463	     : "m"(*s), "x"(search), "a"(4), "d"(16));
464      if (f)
465	break;
466
467      s += 16;
468    }
469#else
470  s -= 16;
471  /* By doing the whole loop in inline assembly,
472     we can make proper use of the flags set.  */
473  __asm (      ".balign 16\n"
474	"0:	add $16, %1\n"
475	"	%vpcmpestri\t$0, (%1), %2\n"
476	"	jnc 0b"
477	: "=&c"(index), "+r"(s)
478	: "x"(search), "a"(4), "d"(16));
479#endif
480
481 found:
482  return s + index;
483}
484
485#else
486/* Work around out-dated assemblers without sse4 support.  */
487#define search_line_sse42 search_line_sse2
488#endif
489
490/* Check the CPU capabilities.  */
491
492#include "../gcc/config/i386/cpuid.h"
493
494typedef const uchar * (*search_line_fast_type) (const uchar *, const uchar *);
495static search_line_fast_type search_line_fast;
496
497#define HAVE_init_vectorized_lexer 1
498static inline void
499init_vectorized_lexer (void)
500{
501  unsigned dummy, ecx = 0, edx = 0;
502  search_line_fast_type impl = search_line_acc_char;
503  int minimum = 0;
504
505#if defined(__SSE4_2__)
506  minimum = 3;
507#elif defined(__SSE2__)
508  minimum = 2;
509#elif defined(__SSE__)
510  minimum = 1;
511#endif
512
513  if (minimum == 3)
514    impl = search_line_sse42;
515  else if (__get_cpuid (1, &dummy, &dummy, &ecx, &edx) || minimum == 2)
516    {
517      if (minimum == 3 || (ecx & bit_SSE4_2))
518        impl = search_line_sse42;
519      else if (minimum == 2 || (edx & bit_SSE2))
520	impl = search_line_sse2;
521      else if (minimum == 1 || (edx & bit_SSE))
522	impl = search_line_mmx;
523    }
524  else if (__get_cpuid (0x80000001, &dummy, &dummy, &dummy, &edx))
525    {
526      if (minimum == 1
527	  || (edx & (bit_MMXEXT | bit_CMOV)) == (bit_MMXEXT | bit_CMOV))
528	impl = search_line_mmx;
529    }
530
531  search_line_fast = impl;
532}
533
534#elif (GCC_VERSION >= 4005) && defined(_ARCH_PWR8) && defined(__ALTIVEC__)
535
536/* A vection of the fast scanner using AltiVec vectorized byte compares
537   and VSX unaligned loads (when VSX is available).  This is otherwise
538   the same as the AltiVec version.  */
539
540ATTRIBUTE_NO_SANITIZE_UNDEFINED
541static const uchar *
542search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
543{
544  typedef __attribute__((altivec(vector))) unsigned char vc;
545
546  const vc repl_nl = {
547    '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
548    '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'
549  };
550  const vc repl_cr = {
551    '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
552    '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r'
553  };
554  const vc repl_bs = {
555    '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
556    '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\'
557  };
558  const vc repl_qm = {
559    '?', '?', '?', '?', '?', '?', '?', '?',
560    '?', '?', '?', '?', '?', '?', '?', '?',
561  };
562  const vc zero = { 0 };
563
564  vc data, t;
565
566  /* Main loop processing 16 bytes at a time.  */
567  do
568    {
569      vc m_nl, m_cr, m_bs, m_qm;
570
571      data = __builtin_vec_vsx_ld (0, s);
572      s += 16;
573
574      m_nl = (vc) __builtin_vec_cmpeq(data, repl_nl);
575      m_cr = (vc) __builtin_vec_cmpeq(data, repl_cr);
576      m_bs = (vc) __builtin_vec_cmpeq(data, repl_bs);
577      m_qm = (vc) __builtin_vec_cmpeq(data, repl_qm);
578      t = (m_nl | m_cr) | (m_bs | m_qm);
579
580      /* T now contains 0xff in bytes for which we matched one of the relevant
581	 characters.  We want to exit the loop if any byte in T is non-zero.
582	 Below is the expansion of vec_any_ne(t, zero).  */
583    }
584  while (!__builtin_vec_vcmpeq_p(/*__CR6_LT_REV*/3, t, zero));
585
586  /* Restore s to to point to the 16 bytes we just processed.  */
587  s -= 16;
588
589  {
590#define N  (sizeof(vc) / sizeof(long))
591
592    union {
593      vc v;
594      /* Statically assert that N is 2 or 4.  */
595      unsigned long l[(N == 2 || N == 4) ? N : -1];
596    } u;
597    unsigned long l, i = 0;
598
599    u.v = t;
600
601    /* Find the first word of T that is non-zero.  */
602    switch (N)
603      {
604      case 4:
605	l = u.l[i++];
606	if (l != 0)
607	  break;
608	s += sizeof(unsigned long);
609	l = u.l[i++];
610	if (l != 0)
611	  break;
612	s += sizeof(unsigned long);
613	/* FALLTHRU */
614      case 2:
615	l = u.l[i++];
616	if (l != 0)
617	  break;
618	s += sizeof(unsigned long);
619	l = u.l[i];
620      }
621
622    /* L now contains 0xff in bytes for which we matched one of the
623       relevant characters.  We can find the byte index by finding
624       its bit index and dividing by 8.  */
625#ifdef __BIG_ENDIAN__
626    l = __builtin_clzl(l) >> 3;
627#else
628    l = __builtin_ctzl(l) >> 3;
629#endif
630    return s + l;
631
632#undef N
633  }
634}
635
636#elif (GCC_VERSION >= 4005) && defined(__ALTIVEC__) && defined (__BIG_ENDIAN__)
637
638/* A vection of the fast scanner using AltiVec vectorized byte compares.
639   This cannot be used for little endian because vec_lvsl/lvsr are
640   deprecated for little endian and the code won't work properly.  */
641/* ??? Unfortunately, attribute(target("altivec")) is not yet supported,
642   so we can't compile this function without -maltivec on the command line
643   (or implied by some other switch).  */
644
645static const uchar *
646search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
647{
648  typedef __attribute__((altivec(vector))) unsigned char vc;
649
650  const vc repl_nl = {
651    '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
652    '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'
653  };
654  const vc repl_cr = {
655    '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
656    '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r'
657  };
658  const vc repl_bs = {
659    '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
660    '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\'
661  };
662  const vc repl_qm = {
663    '?', '?', '?', '?', '?', '?', '?', '?',
664    '?', '?', '?', '?', '?', '?', '?', '?',
665  };
666  const vc ones = {
667    -1, -1, -1, -1, -1, -1, -1, -1,
668    -1, -1, -1, -1, -1, -1, -1, -1,
669  };
670  const vc zero = { 0 };
671
672  vc data, mask, t;
673
674  /* Altivec loads automatically mask addresses with -16.  This lets us
675     issue the first load as early as possible.  */
676  data = __builtin_vec_ld(0, (const vc *)s);
677
678  /* Discard bytes before the beginning of the buffer.  Do this by
679     beginning with all ones and shifting in zeros according to the
680     mis-alignment.  The LVSR instruction pulls the exact shift we
681     want from the address.  */
682  mask = __builtin_vec_lvsr(0, s);
683  mask = __builtin_vec_perm(zero, ones, mask);
684  data &= mask;
685
686  /* While altivec loads mask addresses, we still need to align S so
687     that the offset we compute at the end is correct.  */
688  s = (const uchar *)((uintptr_t)s & -16);
689
690  /* Main loop processing 16 bytes at a time.  */
691  goto start;
692  do
693    {
694      vc m_nl, m_cr, m_bs, m_qm;
695
696      s += 16;
697      data = __builtin_vec_ld(0, (const vc *)s);
698
699    start:
700      m_nl = (vc) __builtin_vec_cmpeq(data, repl_nl);
701      m_cr = (vc) __builtin_vec_cmpeq(data, repl_cr);
702      m_bs = (vc) __builtin_vec_cmpeq(data, repl_bs);
703      m_qm = (vc) __builtin_vec_cmpeq(data, repl_qm);
704      t = (m_nl | m_cr) | (m_bs | m_qm);
705
706      /* T now contains 0xff in bytes for which we matched one of the relevant
707	 characters.  We want to exit the loop if any byte in T is non-zero.
708	 Below is the expansion of vec_any_ne(t, zero).  */
709    }
710  while (!__builtin_vec_vcmpeq_p(/*__CR6_LT_REV*/3, t, zero));
711
712  {
713#define N  (sizeof(vc) / sizeof(long))
714
715    union {
716      vc v;
717      /* Statically assert that N is 2 or 4.  */
718      unsigned long l[(N == 2 || N == 4) ? N : -1];
719    } u;
720    unsigned long l, i = 0;
721
722    u.v = t;
723
724    /* Find the first word of T that is non-zero.  */
725    switch (N)
726      {
727      case 4:
728	l = u.l[i++];
729	if (l != 0)
730	  break;
731	s += sizeof(unsigned long);
732	l = u.l[i++];
733	if (l != 0)
734	  break;
735	s += sizeof(unsigned long);
736	/* FALLTHROUGH */
737      case 2:
738	l = u.l[i++];
739	if (l != 0)
740	  break;
741	s += sizeof(unsigned long);
742	l = u.l[i];
743      }
744
745    /* L now contains 0xff in bytes for which we matched one of the
746       relevant characters.  We can find the byte index by finding
747       its bit index and dividing by 8.  */
748    l = __builtin_clzl(l) >> 3;
749    return s + l;
750
751#undef N
752  }
753}
754
755#elif defined (__ARM_NEON) && defined (__ARM_64BIT_STATE)
756#include "arm_neon.h"
757
758/* This doesn't have to be the exact page size, but no system may use
759   a size smaller than this.  ARMv8 requires a minimum page size of
760   4k.  The impact of being conservative here is a small number of
761   cases will take the slightly slower entry path into the main
762   loop.  */
763
764#define AARCH64_MIN_PAGE_SIZE 4096
765
766static const uchar *
767search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
768{
769  const uint8x16_t repl_nl = vdupq_n_u8 ('\n');
770  const uint8x16_t repl_cr = vdupq_n_u8 ('\r');
771  const uint8x16_t repl_bs = vdupq_n_u8 ('\\');
772  const uint8x16_t repl_qm = vdupq_n_u8 ('?');
773  const uint8x16_t xmask = (uint8x16_t) vdupq_n_u64 (0x8040201008040201ULL);
774
775#ifdef __ARM_BIG_ENDIAN
776  const int16x8_t shift = {8, 8, 8, 8, 0, 0, 0, 0};
777#else
778  const int16x8_t shift = {0, 0, 0, 0, 8, 8, 8, 8};
779#endif
780
781  unsigned int found;
782  const uint8_t *p;
783  uint8x16_t data;
784  uint8x16_t t;
785  uint16x8_t m;
786  uint8x16_t u, v, w;
787
788  /* Align the source pointer.  */
789  p = (const uint8_t *)((uintptr_t)s & -16);
790
791  /* Assuming random string start positions, with a 4k page size we'll take
792     the slow path about 0.37% of the time.  */
793  if (__builtin_expect ((AARCH64_MIN_PAGE_SIZE
794			 - (((uintptr_t) s) & (AARCH64_MIN_PAGE_SIZE - 1)))
795			< 16, 0))
796    {
797      /* Slow path: the string starts near a possible page boundary.  */
798      uint32_t misalign, mask;
799
800      misalign = (uintptr_t)s & 15;
801      mask = (-1u << misalign) & 0xffff;
802      data = vld1q_u8 (p);
803      t = vceqq_u8 (data, repl_nl);
804      u = vceqq_u8 (data, repl_cr);
805      v = vorrq_u8 (t, vceqq_u8 (data, repl_bs));
806      w = vorrq_u8 (u, vceqq_u8 (data, repl_qm));
807      t = vorrq_u8 (v, w);
808      t = vandq_u8 (t, xmask);
809      m = vpaddlq_u8 (t);
810      m = vshlq_u16 (m, shift);
811      found = vaddvq_u16 (m);
812      found &= mask;
813      if (found)
814	return (const uchar*)p + __builtin_ctz (found);
815    }
816  else
817    {
818      data = vld1q_u8 ((const uint8_t *) s);
819      t = vceqq_u8 (data, repl_nl);
820      u = vceqq_u8 (data, repl_cr);
821      v = vorrq_u8 (t, vceqq_u8 (data, repl_bs));
822      w = vorrq_u8 (u, vceqq_u8 (data, repl_qm));
823      t = vorrq_u8 (v, w);
824      if (__builtin_expect (vpaddd_u64 ((uint64x2_t)t) != 0, 0))
825	goto done;
826    }
827
828  do
829    {
830      p += 16;
831      data = vld1q_u8 (p);
832      t = vceqq_u8 (data, repl_nl);
833      u = vceqq_u8 (data, repl_cr);
834      v = vorrq_u8 (t, vceqq_u8 (data, repl_bs));
835      w = vorrq_u8 (u, vceqq_u8 (data, repl_qm));
836      t = vorrq_u8 (v, w);
837    } while (!vpaddd_u64 ((uint64x2_t)t));
838
839done:
840  /* Now that we've found the terminating substring, work out precisely where
841     we need to stop.  */
842  t = vandq_u8 (t, xmask);
843  m = vpaddlq_u8 (t);
844  m = vshlq_u16 (m, shift);
845  found = vaddvq_u16 (m);
846  return (((((uintptr_t) p) < (uintptr_t) s) ? s : (const uchar *)p)
847	  + __builtin_ctz (found));
848}
849
850#elif defined (__ARM_NEON)
851#include "arm_neon.h"
852
853static const uchar *
854search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
855{
856  const uint8x16_t repl_nl = vdupq_n_u8 ('\n');
857  const uint8x16_t repl_cr = vdupq_n_u8 ('\r');
858  const uint8x16_t repl_bs = vdupq_n_u8 ('\\');
859  const uint8x16_t repl_qm = vdupq_n_u8 ('?');
860  const uint8x16_t xmask = (uint8x16_t) vdupq_n_u64 (0x8040201008040201ULL);
861
862  unsigned int misalign, found, mask;
863  const uint8_t *p;
864  uint8x16_t data;
865
866  /* Align the source pointer.  */
867  misalign = (uintptr_t)s & 15;
868  p = (const uint8_t *)((uintptr_t)s & -16);
869  data = vld1q_u8 (p);
870
871  /* Create a mask for the bytes that are valid within the first
872     16-byte block.  The Idea here is that the AND with the mask
873     within the loop is "free", since we need some AND or TEST
874     insn in order to set the flags for the branch anyway.  */
875  mask = (-1u << misalign) & 0xffff;
876
877  /* Main loop, processing 16 bytes at a time.  */
878  goto start;
879
880  do
881    {
882      uint8x8_t l;
883      uint16x4_t m;
884      uint32x2_t n;
885      uint8x16_t t, u, v, w;
886
887      p += 16;
888      data = vld1q_u8 (p);
889      mask = 0xffff;
890
891    start:
892      t = vceqq_u8 (data, repl_nl);
893      u = vceqq_u8 (data, repl_cr);
894      v = vorrq_u8 (t, vceqq_u8 (data, repl_bs));
895      w = vorrq_u8 (u, vceqq_u8 (data, repl_qm));
896      t = vandq_u8 (vorrq_u8 (v, w), xmask);
897      l = vpadd_u8 (vget_low_u8 (t), vget_high_u8 (t));
898      m = vpaddl_u8 (l);
899      n = vpaddl_u16 (m);
900
901      found = vget_lane_u32 ((uint32x2_t) vorr_u64 ((uint64x1_t) n,
902	      vshr_n_u64 ((uint64x1_t) n, 24)), 0);
903      found &= mask;
904    }
905  while (!found);
906
907  /* FOUND contains 1 in bits for which we matched a relevant
908     character.  Conversion to the byte index is trivial.  */
909  found = __builtin_ctz (found);
910  return (const uchar *)p + found;
911}
912
913#else
914
915/* We only have one accelerated alternative.  Use a direct call so that
916   we encourage inlining.  */
917
918#define search_line_fast  search_line_acc_char
919
920#endif
921
922/* Initialize the lexer if needed.  */
923
924void
925_cpp_init_lexer (void)
926{
927#ifdef HAVE_init_vectorized_lexer
928  init_vectorized_lexer ();
929#endif
930}
931
932/* Returns with a logical line that contains no escaped newlines or
933   trigraphs.  This is a time-critical inner loop.  */
934void
935_cpp_clean_line (cpp_reader *pfile)
936{
937  cpp_buffer *buffer;
938  const uchar *s;
939  uchar c, *d, *p;
940
941  buffer = pfile->buffer;
942  buffer->cur_note = buffer->notes_used = 0;
943  buffer->cur = buffer->line_base = buffer->next_line;
944  buffer->need_line = false;
945  s = buffer->next_line;
946
947  if (!buffer->from_stage3)
948    {
949      const uchar *pbackslash = NULL;
950
951      /* Fast path.  This is the common case of an un-escaped line with
952	 no trigraphs.  The primary win here is by not writing any
953	 data back to memory until we have to.  */
954      while (1)
955	{
956	  /* Perform an optimized search for \n, \r, \\, ?.  */
957	  s = search_line_fast (s, buffer->rlimit);
958
959	  c = *s;
960	  if (c == '\\')
961	    {
962	      /* Record the location of the backslash and continue.  */
963	      pbackslash = s++;
964	    }
965	  else if (__builtin_expect (c == '?', 0))
966	    {
967	      if (__builtin_expect (s[1] == '?', false)
968		   && _cpp_trigraph_map[s[2]])
969		{
970		  /* Have a trigraph.  We may or may not have to convert
971		     it.  Add a line note regardless, for -Wtrigraphs.  */
972		  add_line_note (buffer, s, s[2]);
973		  if (CPP_OPTION (pfile, trigraphs))
974		    {
975		      /* We do, and that means we have to switch to the
976		         slow path.  */
977		      d = (uchar *) s;
978		      *d = _cpp_trigraph_map[s[2]];
979		      s += 2;
980		      goto slow_path;
981		    }
982		}
983	      /* Not a trigraph.  Continue on fast-path.  */
984	      s++;
985	    }
986	  else
987	    break;
988	}
989
990      /* This must be \r or \n.  We're either done, or we'll be forced
991	 to write back to the buffer and continue on the slow path.  */
992      d = (uchar *) s;
993
994      if (__builtin_expect (s == buffer->rlimit, false))
995	goto done;
996
997      /* DOS line ending? */
998      if (__builtin_expect (c == '\r', false) && s[1] == '\n')
999	{
1000	  s++;
1001	  if (s == buffer->rlimit)
1002	    goto done;
1003	}
1004
1005      if (__builtin_expect (pbackslash == NULL, true))
1006	goto done;
1007
1008      /* Check for escaped newline.  */
1009      p = d;
1010      while (is_nvspace (p[-1]))
1011	p--;
1012      if (p - 1 != pbackslash)
1013	goto done;
1014
1015      /* Have an escaped newline; process it and proceed to
1016	 the slow path.  */
1017      add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
1018      d = p - 2;
1019      buffer->next_line = p - 1;
1020
1021    slow_path:
1022      while (1)
1023	{
1024	  c = *++s;
1025	  *++d = c;
1026
1027	  if (c == '\n' || c == '\r')
1028	    {
1029	      /* Handle DOS line endings.  */
1030	      if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
1031		s++;
1032	      if (s == buffer->rlimit)
1033		break;
1034
1035	      /* Escaped?  */
1036	      p = d;
1037	      while (p != buffer->next_line && is_nvspace (p[-1]))
1038		p--;
1039	      if (p == buffer->next_line || p[-1] != '\\')
1040		break;
1041
1042	      add_line_note (buffer, p - 1, p != d ? ' ': '\\');
1043	      d = p - 2;
1044	      buffer->next_line = p - 1;
1045	    }
1046	  else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
1047	    {
1048	      /* Add a note regardless, for the benefit of -Wtrigraphs.  */
1049	      add_line_note (buffer, d, s[2]);
1050	      if (CPP_OPTION (pfile, trigraphs))
1051		{
1052		  *d = _cpp_trigraph_map[s[2]];
1053		  s += 2;
1054		}
1055	    }
1056	}
1057    }
1058  else
1059    {
1060      while (*s != '\n' && *s != '\r')
1061	s++;
1062      d = (uchar *) s;
1063
1064      /* Handle DOS line endings.  */
1065      if (*s == '\r' && s + 1 != buffer->rlimit && s[1] == '\n')
1066	s++;
1067    }
1068
1069 done:
1070  *d = '\n';
1071  /* A sentinel note that should never be processed.  */
1072  add_line_note (buffer, d + 1, '\n');
1073  buffer->next_line = s + 1;
1074}
1075
1076/* Return true if the trigraph indicated by NOTE should be warned
1077   about in a comment.  */
1078static bool
1079warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
1080{
1081  const uchar *p;
1082
1083  /* Within comments we don't warn about trigraphs, unless the
1084     trigraph forms an escaped newline, as that may change
1085     behavior.  */
1086  if (note->type != '/')
1087    return false;
1088
1089  /* If -trigraphs, then this was an escaped newline iff the next note
1090     is coincident.  */
1091  if (CPP_OPTION (pfile, trigraphs))
1092    return note[1].pos == note->pos;
1093
1094  /* Otherwise, see if this forms an escaped newline.  */
1095  p = note->pos + 3;
1096  while (is_nvspace (*p))
1097    p++;
1098
1099  /* There might have been escaped newlines between the trigraph and the
1100     newline we found.  Hence the position test.  */
1101  return (*p == '\n' && p < note[1].pos);
1102}
1103
1104/* Process the notes created by add_line_note as far as the current
1105   location.  */
1106void
1107_cpp_process_line_notes (cpp_reader *pfile, int in_comment)
1108{
1109  cpp_buffer *buffer = pfile->buffer;
1110
1111  for (;;)
1112    {
1113      _cpp_line_note *note = &buffer->notes[buffer->cur_note];
1114      unsigned int col;
1115
1116      if (note->pos > buffer->cur)
1117	break;
1118
1119      buffer->cur_note++;
1120      col = CPP_BUF_COLUMN (buffer, note->pos + 1);
1121
1122      if (note->type == '\\' || note->type == ' ')
1123	{
1124	  if (note->type == ' ' && !in_comment)
1125	    cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
1126				 "backslash and newline separated by space");
1127
1128	  if (buffer->next_line > buffer->rlimit)
1129	    {
1130	      cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col,
1131				   "backslash-newline at end of file");
1132	      /* Prevent "no newline at end of file" warning.  */
1133	      buffer->next_line = buffer->rlimit;
1134	    }
1135
1136	  buffer->line_base = note->pos;
1137	  CPP_INCREMENT_LINE (pfile, 0);
1138	}
1139      else if (_cpp_trigraph_map[note->type])
1140	{
1141	  if (CPP_OPTION (pfile, warn_trigraphs)
1142	      && (!in_comment || warn_in_comment (pfile, note)))
1143	    {
1144	      if (CPP_OPTION (pfile, trigraphs))
1145		cpp_warning_with_line (pfile, CPP_W_TRIGRAPHS,
1146                                       pfile->line_table->highest_line, col,
1147				       "trigraph ??%c converted to %c",
1148				       note->type,
1149				       (int) _cpp_trigraph_map[note->type]);
1150	      else
1151		{
1152		  cpp_warning_with_line
1153		    (pfile, CPP_W_TRIGRAPHS,
1154                     pfile->line_table->highest_line, col,
1155		     "trigraph ??%c ignored, use -trigraphs to enable",
1156		     note->type);
1157		}
1158	    }
1159	}
1160      else if (note->type == 0)
1161	/* Already processed in lex_raw_string.  */;
1162      else
1163	abort ();
1164    }
1165}
1166
1167namespace bidi {
1168  enum class kind {
1169    NONE, LRE, RLE, LRO, RLO, LRI, RLI, FSI, PDF, PDI, LTR, RTL
1170  };
1171
1172  /* All the UTF-8 encodings of bidi characters start with E2.  */
1173  constexpr uchar utf8_start = 0xe2;
1174
1175  struct context
1176  {
1177    context () {}
1178    context (location_t loc, kind k, bool pdf, bool ucn)
1179    : m_loc (loc), m_kind (k), m_pdf (pdf), m_ucn (ucn)
1180    {
1181    }
1182
1183    kind get_pop_kind () const
1184    {
1185      return m_pdf ? kind::PDF : kind::PDI;
1186    }
1187    bool ucn_p () const
1188    {
1189      return m_ucn;
1190    }
1191
1192    location_t m_loc;
1193    kind m_kind;
1194    unsigned m_pdf : 1;
1195    unsigned m_ucn : 1;
1196  };
1197
1198  /* A vector holding currently open bidi contexts.  We use a char for
1199     each context, its LSB is 1 if it represents a PDF context, 0 if it
1200     represents a PDI context.  The next bit is 1 if this context was open
1201     by a bidi character written as a UCN, and 0 when it was UTF-8.  */
1202  semi_embedded_vec <context, 16> vec;
1203
1204  /* Close the whole comment/identifier/string literal/character constant
1205     context.  */
1206  void on_close ()
1207  {
1208    vec.truncate (0);
1209  }
1210
1211  /* Pop the last element in the vector.  */
1212  void pop ()
1213  {
1214    unsigned int len = vec.count ();
1215    gcc_checking_assert (len > 0);
1216    vec.truncate (len - 1);
1217  }
1218
1219  /* Return the pop kind of the context of the Ith element.  */
1220  kind pop_kind_at (unsigned int i)
1221  {
1222    return vec[i].get_pop_kind ();
1223  }
1224
1225  /* Return the pop kind of the context that is currently opened.  */
1226  kind current_ctx ()
1227  {
1228    unsigned int len = vec.count ();
1229    if (len == 0)
1230      return kind::NONE;
1231    return vec[len - 1].get_pop_kind ();
1232  }
1233
1234  /* Return true if the current context comes from a UCN origin, that is,
1235     the bidi char which started this bidi context was written as a UCN.  */
1236  bool current_ctx_ucn_p ()
1237  {
1238    unsigned int len = vec.count ();
1239    gcc_checking_assert (len > 0);
1240    return vec[len - 1].m_ucn;
1241  }
1242
1243  location_t current_ctx_loc ()
1244  {
1245    unsigned int len = vec.count ();
1246    gcc_checking_assert (len > 0);
1247    return vec[len - 1].m_loc;
1248  }
1249
1250  /* We've read a bidi char, update the current vector as necessary.
1251     LOC is only valid when K is not kind::NONE.  */
1252  void on_char (kind k, bool ucn_p, location_t loc)
1253  {
1254    switch (k)
1255      {
1256      case kind::LRE:
1257      case kind::RLE:
1258      case kind::LRO:
1259      case kind::RLO:
1260	vec.push (context (loc, k, true, ucn_p));
1261	break;
1262      case kind::LRI:
1263      case kind::RLI:
1264      case kind::FSI:
1265	vec.push (context (loc, k, false, ucn_p));
1266	break;
1267      /* PDF terminates the scope of the last LRE, RLE, LRO, or RLO
1268	 whose scope has not yet been terminated.  */
1269      case kind::PDF:
1270	if (current_ctx () == kind::PDF)
1271	  pop ();
1272	break;
1273      /* PDI terminates the scope of the last LRI, RLI, or FSI whose
1274	 scope has not yet been terminated, as well as the scopes of
1275	 any subsequent LREs, RLEs, LROs, or RLOs whose scopes have not
1276	 yet been terminated.  */
1277      case kind::PDI:
1278	for (int i = vec.count () - 1; i >= 0; --i)
1279	  if (pop_kind_at (i) == kind::PDI)
1280	    {
1281	      vec.truncate (i);
1282	      break;
1283	    }
1284	break;
1285      case kind::LTR:
1286      case kind::RTL:
1287	/* These aren't popped by a PDF/PDI.  */
1288	break;
1289      ATTR_LIKELY case kind::NONE:
1290	break;
1291      default:
1292	abort ();
1293      }
1294  }
1295
1296  /* Return a descriptive string for K.  */
1297  const char *to_str (kind k)
1298  {
1299    switch (k)
1300      {
1301      case kind::LRE:
1302	return "U+202A (LEFT-TO-RIGHT EMBEDDING)";
1303      case kind::RLE:
1304	return "U+202B (RIGHT-TO-LEFT EMBEDDING)";
1305      case kind::LRO:
1306	return "U+202D (LEFT-TO-RIGHT OVERRIDE)";
1307      case kind::RLO:
1308	return "U+202E (RIGHT-TO-LEFT OVERRIDE)";
1309      case kind::LRI:
1310	return "U+2066 (LEFT-TO-RIGHT ISOLATE)";
1311      case kind::RLI:
1312	return "U+2067 (RIGHT-TO-LEFT ISOLATE)";
1313      case kind::FSI:
1314	return "U+2068 (FIRST STRONG ISOLATE)";
1315      case kind::PDF:
1316	return "U+202C (POP DIRECTIONAL FORMATTING)";
1317      case kind::PDI:
1318	return "U+2069 (POP DIRECTIONAL ISOLATE)";
1319      case kind::LTR:
1320	return "U+200E (LEFT-TO-RIGHT MARK)";
1321      case kind::RTL:
1322	return "U+200F (RIGHT-TO-LEFT MARK)";
1323      default:
1324	abort ();
1325      }
1326  }
1327}
1328
1329/* Get location_t for the range of bytes [START, START + NUM_BYTES)
1330   within the current line in FILE, with the caret at START.  */
1331
1332static location_t
1333get_location_for_byte_range_in_cur_line (cpp_reader *pfile,
1334					 const unsigned char *const start,
1335					 size_t num_bytes)
1336{
1337  gcc_checking_assert (num_bytes > 0);
1338
1339  /* CPP_BUF_COLUMN and linemap_position_for_column both refer
1340     to offsets in bytes, but CPP_BUF_COLUMN is 0-based,
1341     whereas linemap_position_for_column is 1-based.  */
1342
1343  /* Get 0-based offsets within the line.  */
1344  size_t start_offset = CPP_BUF_COLUMN (pfile->buffer, start);
1345  size_t end_offset = start_offset + num_bytes - 1;
1346
1347  /* Now convert to location_t, where "columns" are 1-based byte offsets.  */
1348  location_t start_loc = linemap_position_for_column (pfile->line_table,
1349						      start_offset + 1);
1350  location_t end_loc = linemap_position_for_column (pfile->line_table,
1351						     end_offset + 1);
1352
1353  if (start_loc == end_loc)
1354    return start_loc;
1355
1356  source_range src_range;
1357  src_range.m_start = start_loc;
1358  src_range.m_finish = end_loc;
1359  location_t combined_loc = COMBINE_LOCATION_DATA (pfile->line_table,
1360						   start_loc,
1361						   src_range,
1362						   NULL);
1363  return combined_loc;
1364}
1365
1366/* Parse a sequence of 3 bytes starting with P and return its bidi code.  */
1367
1368static bidi::kind
1369get_bidi_utf8_1 (const unsigned char *const p)
1370{
1371  gcc_checking_assert (p[0] == bidi::utf8_start);
1372
1373  if (p[1] == 0x80)
1374    switch (p[2])
1375      {
1376      case 0xaa:
1377	return bidi::kind::LRE;
1378      case 0xab:
1379	return bidi::kind::RLE;
1380      case 0xac:
1381	return bidi::kind::PDF;
1382      case 0xad:
1383	return bidi::kind::LRO;
1384      case 0xae:
1385	return bidi::kind::RLO;
1386      case 0x8e:
1387	return bidi::kind::LTR;
1388      case 0x8f:
1389	return bidi::kind::RTL;
1390      default:
1391	break;
1392      }
1393  else if (p[1] == 0x81)
1394    switch (p[2])
1395      {
1396      case 0xa6:
1397	return bidi::kind::LRI;
1398      case 0xa7:
1399	return bidi::kind::RLI;
1400      case 0xa8:
1401	return bidi::kind::FSI;
1402      case 0xa9:
1403	return bidi::kind::PDI;
1404      default:
1405	break;
1406      }
1407
1408  return bidi::kind::NONE;
1409}
1410
1411/* Parse a sequence of 3 bytes starting with P and return its bidi code.
1412   If the kind is not NONE, write the location to *OUT.*/
1413
1414static bidi::kind
1415get_bidi_utf8 (cpp_reader *pfile, const unsigned char *const p, location_t *out)
1416{
1417  bidi::kind result = get_bidi_utf8_1 (p);
1418  if (result != bidi::kind::NONE)
1419    {
1420      /* We have a sequence of 3 bytes starting at P.  */
1421      *out = get_location_for_byte_range_in_cur_line (pfile, p, 3);
1422    }
1423  return result;
1424}
1425
1426/* Parse a UCN where P points just past \u or \U and return its bidi code.  */
1427
1428static bidi::kind
1429get_bidi_ucn_1 (const unsigned char *p, bool is_U)
1430{
1431  /* 6.4.3 Universal Character Names
1432      \u hex-quad
1433      \U hex-quad hex-quad
1434     where \unnnn means \U0000nnnn.  */
1435
1436  if (is_U)
1437    {
1438      if (p[0] != '0' || p[1] != '0' || p[2] != '0' || p[3] != '0')
1439	return bidi::kind::NONE;
1440      /* Skip 4B so we can treat \u and \U the same below.  */
1441      p += 4;
1442    }
1443
1444  /* All code points we are looking for start with 20xx.  */
1445  if (p[0] != '2' || p[1] != '0')
1446    return bidi::kind::NONE;
1447  else if (p[2] == '2')
1448    switch (p[3])
1449      {
1450      case 'a':
1451      case 'A':
1452	return bidi::kind::LRE;
1453      case 'b':
1454      case 'B':
1455	return bidi::kind::RLE;
1456      case 'c':
1457      case 'C':
1458	return bidi::kind::PDF;
1459      case 'd':
1460      case 'D':
1461	return bidi::kind::LRO;
1462      case 'e':
1463      case 'E':
1464	return bidi::kind::RLO;
1465      default:
1466	break;
1467      }
1468  else if (p[2] == '6')
1469    switch (p[3])
1470      {
1471      case '6':
1472	return bidi::kind::LRI;
1473      case '7':
1474	return bidi::kind::RLI;
1475      case '8':
1476	return bidi::kind::FSI;
1477      case '9':
1478	return bidi::kind::PDI;
1479      default:
1480	break;
1481      }
1482  else if (p[2] == '0')
1483    switch (p[3])
1484      {
1485      case 'e':
1486      case 'E':
1487	return bidi::kind::LTR;
1488      case 'f':
1489      case 'F':
1490	return bidi::kind::RTL;
1491      default:
1492	break;
1493      }
1494
1495  return bidi::kind::NONE;
1496}
1497
1498/* Parse a UCN where P points just past \u or \U and return its bidi code.
1499   If the kind is not NONE, write the location to *OUT.*/
1500
1501static bidi::kind
1502get_bidi_ucn (cpp_reader *pfile,  const unsigned char *p, bool is_U,
1503	      location_t *out)
1504{
1505  bidi::kind result = get_bidi_ucn_1 (p, is_U);
1506  if (result != bidi::kind::NONE)
1507    {
1508      const unsigned char *start = p - 2;
1509      size_t num_bytes = 2 + (is_U ? 8 : 4);
1510      *out = get_location_for_byte_range_in_cur_line (pfile, start, num_bytes);
1511    }
1512  return result;
1513}
1514
1515/* Subclass of rich_location for reporting on unpaired UTF-8
1516   bidirectional control character(s).
1517   Escape the source lines on output, and show all unclosed
1518   bidi context, labelling everything.  */
1519
1520class unpaired_bidi_rich_location : public rich_location
1521{
1522 public:
1523  class custom_range_label : public range_label
1524  {
1525   public:
1526     label_text get_text (unsigned range_idx) const FINAL OVERRIDE
1527     {
1528       /* range 0 is the primary location; each subsequent range i + 1
1529	  is for bidi::vec[i].  */
1530       if (range_idx > 0)
1531	 {
1532	   const bidi::context &ctxt (bidi::vec[range_idx - 1]);
1533	   return label_text::borrow (bidi::to_str (ctxt.m_kind));
1534	 }
1535       else
1536	 return label_text::borrow (_("end of bidirectional context"));
1537     }
1538  };
1539
1540  unpaired_bidi_rich_location (cpp_reader *pfile, location_t loc)
1541  : rich_location (pfile->line_table, loc, &m_custom_label)
1542  {
1543    set_escape_on_output (true);
1544    for (unsigned i = 0; i < bidi::vec.count (); i++)
1545      add_range (bidi::vec[i].m_loc,
1546		 SHOW_RANGE_WITHOUT_CARET,
1547		 &m_custom_label);
1548  }
1549
1550 private:
1551   custom_range_label m_custom_label;
1552};
1553
1554/* We're closing a bidi context, that is, we've encountered a newline,
1555   are closing a C-style comment, or are at the end of a string literal,
1556   character constant, or identifier.  Warn if this context was not
1557   properly terminated by a PDI or PDF.  P points to the last character
1558   in this context.  */
1559
1560static void
1561maybe_warn_bidi_on_close (cpp_reader *pfile, const uchar *p)
1562{
1563  const auto warn_bidi = CPP_OPTION (pfile, cpp_warn_bidirectional);
1564  if (bidi::vec.count () > 0
1565      && (warn_bidi & bidirectional_unpaired
1566	  && (!bidi::current_ctx_ucn_p ()
1567	      || (warn_bidi & bidirectional_ucn))))
1568    {
1569      const location_t loc
1570	= linemap_position_for_column (pfile->line_table,
1571				       CPP_BUF_COLUMN (pfile->buffer, p));
1572      unpaired_bidi_rich_location rich_loc (pfile, loc);
1573      /* cpp_callbacks doesn't yet have a way to handle singular vs plural
1574	 forms of a diagnostic, so fake it for now.  */
1575      if (bidi::vec.count () > 1)
1576	cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc,
1577			"unpaired UTF-8 bidirectional control characters "
1578			"detected");
1579      else
1580	cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc,
1581			"unpaired UTF-8 bidirectional control character "
1582			"detected");
1583    }
1584  /* We're done with this context.  */
1585  bidi::on_close ();
1586}
1587
1588/* We're at the beginning or in the middle of an identifier/comment/string
1589   literal/character constant.  Warn if we've encountered a bidi character.
1590   KIND says which bidi control character it was; UCN_P is true iff this bidi
1591   control character was written as a UCN.  LOC is the location of the
1592   character, but is only valid if KIND != bidi::kind::NONE.  */
1593
1594static void
1595maybe_warn_bidi_on_char (cpp_reader *pfile, bidi::kind kind,
1596			 bool ucn_p, location_t loc)
1597{
1598  if (__builtin_expect (kind == bidi::kind::NONE, 1))
1599    return;
1600
1601  const auto warn_bidi = CPP_OPTION (pfile, cpp_warn_bidirectional);
1602
1603  if (warn_bidi & (bidirectional_unpaired|bidirectional_any))
1604    {
1605      rich_location rich_loc (pfile->line_table, loc);
1606      rich_loc.set_escape_on_output (true);
1607
1608      /* It seems excessive to warn about a PDI/PDF that is closing
1609	 an opened context because we've already warned about the
1610	 opening character.  Except warn when we have a UCN x UTF-8
1611	 mismatch, if UCN checking is enabled.  */
1612      if (kind == bidi::current_ctx ())
1613	{
1614	  if (warn_bidi == (bidirectional_unpaired|bidirectional_ucn)
1615	      && bidi::current_ctx_ucn_p () != ucn_p)
1616	    {
1617	      rich_loc.add_range (bidi::current_ctx_loc ());
1618	      cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc,
1619			      "UTF-8 vs UCN mismatch when closing "
1620			      "a context by \"%s\"", bidi::to_str (kind));
1621	    }
1622	}
1623      else if (warn_bidi & bidirectional_any
1624	       && (!ucn_p || (warn_bidi & bidirectional_ucn)))
1625	{
1626	  if (kind == bidi::kind::PDF || kind == bidi::kind::PDI)
1627	    cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc,
1628			    "\"%s\" is closing an unopened context",
1629			    bidi::to_str (kind));
1630	  else
1631	    cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc,
1632			    "found problematic Unicode character \"%s\"",
1633			    bidi::to_str (kind));
1634	}
1635    }
1636  /* We're done with this context.  */
1637  bidi::on_char (kind, ucn_p, loc);
1638}
1639
1640/* Skip a C-style block comment.  We find the end of the comment by
1641   seeing if an asterisk is before every '/' we encounter.  Returns
1642   nonzero if comment terminated by EOF, zero otherwise.
1643
1644   Buffer->cur points to the initial asterisk of the comment.  */
1645bool
1646_cpp_skip_block_comment (cpp_reader *pfile)
1647{
1648  cpp_buffer *buffer = pfile->buffer;
1649  const uchar *cur = buffer->cur;
1650  uchar c;
1651  const bool warn_bidi_p = pfile->warn_bidi_p ();
1652
1653  cur++;
1654  if (*cur == '/')
1655    cur++;
1656
1657  for (;;)
1658    {
1659      /* People like decorating comments with '*', so check for '/'
1660	 instead for efficiency.  */
1661      c = *cur++;
1662
1663      if (c == '/')
1664	{
1665	  if (cur[-2] == '*')
1666	    {
1667	      if (warn_bidi_p)
1668		maybe_warn_bidi_on_close (pfile, cur);
1669	      break;
1670	    }
1671
1672	  /* Warn about potential nested comments, but not if the '/'
1673	     comes immediately before the true comment delimiter.
1674	     Don't bother to get it right across escaped newlines.  */
1675	  if (CPP_OPTION (pfile, warn_comments)
1676	      && cur[0] == '*' && cur[1] != '/')
1677	    {
1678	      buffer->cur = cur;
1679	      cpp_warning_with_line (pfile, CPP_W_COMMENTS,
1680				     pfile->line_table->highest_line,
1681				     CPP_BUF_COL (buffer),
1682				     "\"/*\" within comment");
1683	    }
1684	}
1685      else if (c == '\n')
1686	{
1687	  unsigned int cols;
1688	  buffer->cur = cur - 1;
1689	  if (warn_bidi_p)
1690	    maybe_warn_bidi_on_close (pfile, cur);
1691	  _cpp_process_line_notes (pfile, true);
1692	  if (buffer->next_line >= buffer->rlimit)
1693	    return true;
1694	  _cpp_clean_line (pfile);
1695
1696	  cols = buffer->next_line - buffer->line_base;
1697	  CPP_INCREMENT_LINE (pfile, cols);
1698
1699	  cur = buffer->cur;
1700	}
1701      /* If this is a beginning of a UTF-8 encoding, it might be
1702	 a bidirectional control character.  */
1703      else if (__builtin_expect (c == bidi::utf8_start, 0) && warn_bidi_p)
1704	{
1705	  location_t loc;
1706	  bidi::kind kind = get_bidi_utf8 (pfile, cur - 1, &loc);
1707	  maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/false, loc);
1708	}
1709    }
1710
1711  buffer->cur = cur;
1712  _cpp_process_line_notes (pfile, true);
1713  return false;
1714}
1715
1716/* Skip a C++ line comment, leaving buffer->cur pointing to the
1717   terminating newline.  Handles escaped newlines.  Returns nonzero
1718   if a multiline comment.  */
1719static int
1720skip_line_comment (cpp_reader *pfile)
1721{
1722  cpp_buffer *buffer = pfile->buffer;
1723  location_t orig_line = pfile->line_table->highest_line;
1724  const bool warn_bidi_p = pfile->warn_bidi_p ();
1725
1726  if (!warn_bidi_p)
1727    while (*buffer->cur != '\n')
1728      buffer->cur++;
1729  else
1730    {
1731      while (*buffer->cur != '\n'
1732	     && *buffer->cur != bidi::utf8_start)
1733	buffer->cur++;
1734      if (__builtin_expect (*buffer->cur == bidi::utf8_start, 0))
1735	{
1736	  while (*buffer->cur != '\n')
1737	    {
1738	      if (__builtin_expect (*buffer->cur == bidi::utf8_start, 0))
1739		{
1740		  location_t loc;
1741		  bidi::kind kind = get_bidi_utf8 (pfile, buffer->cur, &loc);
1742		  maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/false, loc);
1743		}
1744	      buffer->cur++;
1745	    }
1746	  maybe_warn_bidi_on_close (pfile, buffer->cur);
1747	}
1748    }
1749
1750  _cpp_process_line_notes (pfile, true);
1751  return orig_line != pfile->line_table->highest_line;
1752}
1753
1754/* Skips whitespace, saving the next non-whitespace character.  */
1755static void
1756skip_whitespace (cpp_reader *pfile, cppchar_t c)
1757{
1758  cpp_buffer *buffer = pfile->buffer;
1759  bool saw_NUL = false;
1760
1761  do
1762    {
1763      /* Horizontal space always OK.  */
1764      if (c == ' ' || c == '\t')
1765	;
1766      /* Just \f \v or \0 left.  */
1767      else if (c == '\0')
1768	saw_NUL = true;
1769      else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
1770	cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
1771			     CPP_BUF_COL (buffer),
1772			     "%s in preprocessing directive",
1773			     c == '\f' ? "form feed" : "vertical tab");
1774
1775      c = *buffer->cur++;
1776    }
1777  /* We only want non-vertical space, i.e. ' ' \t \f \v \0.  */
1778  while (is_nvspace (c));
1779
1780  if (saw_NUL)
1781    {
1782      encoding_rich_location rich_loc (pfile);
1783      cpp_error_at (pfile, CPP_DL_WARNING, &rich_loc,
1784		    "null character(s) ignored");
1785    }
1786
1787  buffer->cur--;
1788}
1789
1790/* See if the characters of a number token are valid in a name (no
1791   '.', '+' or '-').  */
1792static int
1793name_p (cpp_reader *pfile, const cpp_string *string)
1794{
1795  unsigned int i;
1796
1797  for (i = 0; i < string->len; i++)
1798    if (!is_idchar (string->text[i]))
1799      return 0;
1800
1801  return 1;
1802}
1803
1804/* After parsing an identifier or other sequence, produce a warning about
1805   sequences not in NFC/NFKC.  */
1806static void
1807warn_about_normalization (cpp_reader *pfile,
1808			  const cpp_token *token,
1809			  const struct normalize_state *s)
1810{
1811  if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s)
1812      && !pfile->state.skipping)
1813    {
1814      location_t loc = token->src_loc;
1815
1816      /* If possible, create a location range for the token.  */
1817      if (loc >= RESERVED_LOCATION_COUNT
1818	  && token->type != CPP_EOF
1819	  /* There must be no line notes to process.  */
1820	  && (!(pfile->buffer->cur
1821		>= pfile->buffer->notes[pfile->buffer->cur_note].pos
1822		&& !pfile->overlaid_buffer)))
1823	{
1824	  source_range tok_range;
1825	  tok_range.m_start = loc;
1826	  tok_range.m_finish
1827	    = linemap_position_for_column (pfile->line_table,
1828					   CPP_BUF_COLUMN (pfile->buffer,
1829							   pfile->buffer->cur));
1830	  loc = COMBINE_LOCATION_DATA (pfile->line_table,
1831				       loc, tok_range, NULL);
1832	}
1833
1834      encoding_rich_location rich_loc (pfile, loc);
1835
1836      /* Make sure that the token is printed using UCNs, even
1837	 if we'd otherwise happily print UTF-8.  */
1838      unsigned char *buf = XNEWVEC (unsigned char, cpp_token_len (token));
1839      size_t sz;
1840
1841      sz = cpp_spell_token (pfile, token, buf, false) - buf;
1842      if (NORMALIZE_STATE_RESULT (s) == normalized_C)
1843	cpp_warning_at (pfile, CPP_W_NORMALIZE, &rich_loc,
1844			"`%.*s' is not in NFKC", (int) sz, buf);
1845      else if (CPP_OPTION (pfile, cplusplus))
1846	cpp_pedwarning_at (pfile, CPP_W_NORMALIZE, &rich_loc,
1847				  "`%.*s' is not in NFC", (int) sz, buf);
1848      else
1849	cpp_warning_at (pfile, CPP_W_NORMALIZE, &rich_loc,
1850			"`%.*s' is not in NFC", (int) sz, buf);
1851      free (buf);
1852    }
1853}
1854
1855static const cppchar_t utf8_signifier = 0xC0;
1856
1857/* Returns TRUE if the sequence starting at buffer->cur is valid in
1858   an identifier.  FIRST is TRUE if this starts an identifier.  */
1859
1860static bool
1861forms_identifier_p (cpp_reader *pfile, int first,
1862		    struct normalize_state *state)
1863{
1864  cpp_buffer *buffer = pfile->buffer;
1865  const bool warn_bidi_p = pfile->warn_bidi_p ();
1866
1867  if (*buffer->cur == '$')
1868    {
1869      if (!CPP_OPTION (pfile, dollars_in_ident))
1870	return false;
1871
1872      buffer->cur++;
1873      if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
1874	{
1875	  CPP_OPTION (pfile, warn_dollars) = 0;
1876	  cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
1877	}
1878
1879      return true;
1880    }
1881
1882  /* Is this a syntactically valid UCN or a valid UTF-8 char?  */
1883  if (CPP_OPTION (pfile, extended_identifiers))
1884    {
1885      cppchar_t s;
1886      if (*buffer->cur >= utf8_signifier)
1887	{
1888	  if (__builtin_expect (*buffer->cur == bidi::utf8_start, 0)
1889	      && warn_bidi_p)
1890	    {
1891	      location_t loc;
1892	      bidi::kind kind = get_bidi_utf8 (pfile, buffer->cur, &loc);
1893	      maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/false, loc);
1894	    }
1895	  if (_cpp_valid_utf8 (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
1896			       state, &s))
1897	    return true;
1898	}
1899      else if (*buffer->cur == '\\'
1900	       && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
1901	{
1902	  buffer->cur += 2;
1903	  if (warn_bidi_p)
1904	    {
1905	      location_t loc;
1906	      bidi::kind kind = get_bidi_ucn (pfile,
1907					      buffer->cur,
1908					      buffer->cur[-1] == 'U',
1909					      &loc);
1910	      maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/true, loc);
1911	    }
1912	  if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
1913			      state, &s, NULL, NULL))
1914	    return true;
1915	  buffer->cur -= 2;
1916	}
1917    }
1918
1919  return false;
1920}
1921
1922/* Helper function to issue error about improper __VA_OPT__ use.  */
1923static void
1924maybe_va_opt_error (cpp_reader *pfile)
1925{
1926  if (CPP_PEDANTIC (pfile) && !CPP_OPTION (pfile, va_opt))
1927    {
1928      /* __VA_OPT__ should not be accepted at all, but allow it in
1929	 system headers.  */
1930      if (!_cpp_in_system_header (pfile))
1931	cpp_error (pfile, CPP_DL_PEDWARN,
1932		   "__VA_OPT__ is not available until C++20");
1933    }
1934  else if (!pfile->state.va_args_ok)
1935    {
1936      /* __VA_OPT__ should only appear in the replacement list of a
1937	 variadic macro.  */
1938      cpp_error (pfile, CPP_DL_PEDWARN,
1939		 "__VA_OPT__ can only appear in the expansion"
1940		 " of a C++20 variadic macro");
1941    }
1942}
1943
1944/* Helper function to get the cpp_hashnode of the identifier BASE.  */
1945static cpp_hashnode *
1946lex_identifier_intern (cpp_reader *pfile, const uchar *base)
1947{
1948  cpp_hashnode *result;
1949  const uchar *cur;
1950  unsigned int len;
1951  unsigned int hash = HT_HASHSTEP (0, *base);
1952
1953  cur = base + 1;
1954  while (ISIDNUM (*cur))
1955    {
1956      hash = HT_HASHSTEP (hash, *cur);
1957      cur++;
1958    }
1959  len = cur - base;
1960  hash = HT_HASHFINISH (hash, len);
1961  result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
1962					      base, len, hash, HT_ALLOC));
1963
1964  /* Rarely, identifiers require diagnostics when lexed.  */
1965  if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
1966			&& !pfile->state.skipping, 0))
1967    {
1968      /* It is allowed to poison the same identifier twice.  */
1969      if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
1970	cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
1971		   NODE_NAME (result));
1972
1973      /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
1974	 replacement list of a variadic macro.  */
1975      if (result == pfile->spec_nodes.n__VA_ARGS__
1976	  && !pfile->state.va_args_ok)
1977	{
1978	  if (CPP_OPTION (pfile, cplusplus))
1979	    cpp_error (pfile, CPP_DL_PEDWARN,
1980		       "__VA_ARGS__ can only appear in the expansion"
1981		       " of a C++11 variadic macro");
1982	  else
1983	    cpp_error (pfile, CPP_DL_PEDWARN,
1984		       "__VA_ARGS__ can only appear in the expansion"
1985		       " of a C99 variadic macro");
1986	}
1987
1988      if (result == pfile->spec_nodes.n__VA_OPT__)
1989	maybe_va_opt_error (pfile);
1990
1991      /* For -Wc++-compat, warn about use of C++ named operators.  */
1992      if (result->flags & NODE_WARN_OPERATOR)
1993	cpp_warning (pfile, CPP_W_CXX_OPERATOR_NAMES,
1994		     "identifier \"%s\" is a special operator name in C++",
1995		     NODE_NAME (result));
1996    }
1997
1998  return result;
1999}
2000
2001/* Get the cpp_hashnode of an identifier specified by NAME in
2002   the current cpp_reader object.  If none is found, NULL is returned.  */
2003cpp_hashnode *
2004_cpp_lex_identifier (cpp_reader *pfile, const char *name)
2005{
2006  cpp_hashnode *result;
2007  result = lex_identifier_intern (pfile, (uchar *) name);
2008  return result;
2009}
2010
2011/* Lex an identifier starting at BUFFER->CUR - 1.  */
2012static cpp_hashnode *
2013lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
2014		struct normalize_state *nst, cpp_hashnode **spelling)
2015{
2016  cpp_hashnode *result;
2017  const uchar *cur;
2018  unsigned int len;
2019  unsigned int hash = HT_HASHSTEP (0, *base);
2020  const bool warn_bidi_p = pfile->warn_bidi_p ();
2021
2022  cur = pfile->buffer->cur;
2023  if (! starts_ucn)
2024    {
2025      while (ISIDNUM (*cur))
2026	{
2027	  hash = HT_HASHSTEP (hash, *cur);
2028	  cur++;
2029	}
2030      NORMALIZE_STATE_UPDATE_IDNUM (nst, *(cur - 1));
2031    }
2032  pfile->buffer->cur = cur;
2033  if (starts_ucn || forms_identifier_p (pfile, false, nst))
2034    {
2035      /* Slower version for identifiers containing UCNs
2036	 or extended chars (including $).  */
2037      do {
2038	while (ISIDNUM (*pfile->buffer->cur))
2039	  {
2040	    NORMALIZE_STATE_UPDATE_IDNUM (nst, *pfile->buffer->cur);
2041	    pfile->buffer->cur++;
2042	  }
2043      } while (forms_identifier_p (pfile, false, nst));
2044      if (warn_bidi_p)
2045	maybe_warn_bidi_on_close (pfile, pfile->buffer->cur);
2046      result = _cpp_interpret_identifier (pfile, base,
2047					  pfile->buffer->cur - base);
2048      *spelling = cpp_lookup (pfile, base, pfile->buffer->cur - base);
2049    }
2050  else
2051    {
2052      len = cur - base;
2053      hash = HT_HASHFINISH (hash, len);
2054
2055      result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
2056						  base, len, hash, HT_ALLOC));
2057      *spelling = result;
2058    }
2059
2060  /* Rarely, identifiers require diagnostics when lexed.  */
2061  if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
2062			&& !pfile->state.skipping, 0))
2063    {
2064      /* It is allowed to poison the same identifier twice.  */
2065      if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
2066	cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
2067		   NODE_NAME (result));
2068
2069      /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
2070	 replacement list of a variadic macro.  */
2071      if (result == pfile->spec_nodes.n__VA_ARGS__
2072	  && !pfile->state.va_args_ok)
2073	{
2074	  if (CPP_OPTION (pfile, cplusplus))
2075	    cpp_error (pfile, CPP_DL_PEDWARN,
2076		       "__VA_ARGS__ can only appear in the expansion"
2077		       " of a C++11 variadic macro");
2078	  else
2079	    cpp_error (pfile, CPP_DL_PEDWARN,
2080		       "__VA_ARGS__ can only appear in the expansion"
2081		       " of a C99 variadic macro");
2082	}
2083
2084      /* __VA_OPT__ should only appear in the replacement list of a
2085	 variadic macro.  */
2086      if (result == pfile->spec_nodes.n__VA_OPT__)
2087	maybe_va_opt_error (pfile);
2088
2089      /* For -Wc++-compat, warn about use of C++ named operators.  */
2090      if (result->flags & NODE_WARN_OPERATOR)
2091	cpp_warning (pfile, CPP_W_CXX_OPERATOR_NAMES,
2092		     "identifier \"%s\" is a special operator name in C++",
2093		     NODE_NAME (result));
2094    }
2095
2096  return result;
2097}
2098
2099/* Lex a number to NUMBER starting at BUFFER->CUR - 1.  */
2100static void
2101lex_number (cpp_reader *pfile, cpp_string *number,
2102	    struct normalize_state *nst)
2103{
2104  const uchar *cur;
2105  const uchar *base;
2106  uchar *dest;
2107
2108  base = pfile->buffer->cur - 1;
2109  do
2110    {
2111      const uchar *adj_digit_sep = NULL;
2112      cur = pfile->buffer->cur;
2113
2114      /* N.B. ISIDNUM does not include $.  */
2115      while (ISIDNUM (*cur)
2116	     || (*cur == '.' && !DIGIT_SEP (cur[-1]))
2117	     || DIGIT_SEP (*cur)
2118	     || (VALID_SIGN (*cur, cur[-1]) && !DIGIT_SEP (cur[-2])))
2119	{
2120	  NORMALIZE_STATE_UPDATE_IDNUM (nst, *cur);
2121	  /* Adjacent digit separators do not form part of the pp-number syntax.
2122	     However, they can safely be diagnosed here as an error, since '' is
2123	     not a valid preprocessing token.  */
2124	  if (DIGIT_SEP (*cur) && DIGIT_SEP (cur[-1]) && !adj_digit_sep)
2125	    adj_digit_sep = cur;
2126	  cur++;
2127	}
2128      /* A number can't end with a digit separator.  */
2129      while (cur > pfile->buffer->cur && DIGIT_SEP (cur[-1]))
2130	--cur;
2131      if (adj_digit_sep && adj_digit_sep < cur)
2132	cpp_error (pfile, CPP_DL_ERROR, "adjacent digit separators");
2133
2134      pfile->buffer->cur = cur;
2135    }
2136  while (forms_identifier_p (pfile, false, nst));
2137
2138  number->len = cur - base;
2139  dest = _cpp_unaligned_alloc (pfile, number->len + 1);
2140  memcpy (dest, base, number->len);
2141  dest[number->len] = '\0';
2142  number->text = dest;
2143}
2144
2145/* Create a token of type TYPE with a literal spelling.  */
2146static void
2147create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
2148		unsigned int len, enum cpp_ttype type)
2149{
2150  token->type = type;
2151  token->val.str.len = len;
2152  token->val.str.text = cpp_alloc_token_string (pfile, base, len);
2153}
2154
2155const uchar *
2156cpp_alloc_token_string (cpp_reader *pfile,
2157			const unsigned char *ptr, unsigned len)
2158{
2159  uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
2160
2161  dest[len] = 0;
2162  memcpy (dest, ptr, len);
2163  return dest;
2164}
2165
2166/* A pair of raw buffer pointers.  The currently open one is [1], the
2167   first one is [0].  Used for string literal lexing.  */
2168struct lit_accum {
2169  _cpp_buff *first;
2170  _cpp_buff *last;
2171  const uchar *rpos;
2172  size_t accum;
2173
2174  lit_accum ()
2175    : first (NULL), last (NULL), rpos (0), accum (0)
2176  {
2177  }
2178
2179  void append (cpp_reader *, const uchar *, size_t);
2180
2181  void read_begin (cpp_reader *);
2182  bool reading_p () const
2183  {
2184    return rpos != NULL;
2185  }
2186  char read_char ()
2187  {
2188    char c = *rpos++;
2189    if (rpos == BUFF_FRONT (last))
2190      rpos = NULL;
2191    return c;
2192  }
2193};
2194
2195/* Subroutine of lex_raw_string: Append LEN chars from BASE to the buffer
2196   sequence from *FIRST_BUFF_P to LAST_BUFF_P.  */
2197
2198void
2199lit_accum::append (cpp_reader *pfile, const uchar *base, size_t len)
2200{
2201  if (!last)
2202    /* Starting.  */
2203    first = last = _cpp_get_buff (pfile, len);
2204  else if (len > BUFF_ROOM (last))
2205    {
2206      /* There is insufficient room in the buffer.  Copy what we can,
2207	 and then either extend or create a new one.  */
2208      size_t room = BUFF_ROOM (last);
2209      memcpy (BUFF_FRONT (last), base, room);
2210      BUFF_FRONT (last) += room;
2211      base += room;
2212      len -= room;
2213      accum += room;
2214
2215      gcc_checking_assert (!rpos);
2216
2217      last = _cpp_append_extend_buff (pfile, last, len);
2218    }
2219
2220  memcpy (BUFF_FRONT (last), base, len);
2221  BUFF_FRONT (last) += len;
2222  accum += len;
2223}
2224
2225void
2226lit_accum::read_begin (cpp_reader *pfile)
2227{
2228  /* We never accumulate more than 4 chars to read.  */
2229  if (BUFF_ROOM (last) < 4)
2230
2231    last = _cpp_append_extend_buff (pfile, last, 4);
2232  rpos = BUFF_FRONT (last);
2233}
2234
2235/* Returns true if a macro has been defined.
2236   This might not work if compile with -save-temps,
2237   or preprocess separately from compilation.  */
2238
2239static bool
2240is_macro(cpp_reader *pfile, const uchar *base)
2241{
2242  const uchar *cur = base;
2243  if (! ISIDST (*cur))
2244    return false;
2245  unsigned int hash = HT_HASHSTEP (0, *cur);
2246  ++cur;
2247  while (ISIDNUM (*cur))
2248    {
2249      hash = HT_HASHSTEP (hash, *cur);
2250      ++cur;
2251    }
2252  hash = HT_HASHFINISH (hash, cur - base);
2253
2254  cpp_hashnode *result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
2255					base, cur - base, hash, HT_NO_INSERT));
2256
2257  return result && cpp_macro_p (result);
2258}
2259
2260/* Returns true if a literal suffix does not have the expected form
2261   and is defined as a macro.  */
2262
2263static bool
2264is_macro_not_literal_suffix(cpp_reader *pfile, const uchar *base)
2265{
2266  /* User-defined literals outside of namespace std must start with a single
2267     underscore, so assume anything of that form really is a UDL suffix.
2268     We don't need to worry about UDLs defined inside namespace std because
2269     their names are reserved, so cannot be used as macro names in valid
2270     programs.  */
2271  if (base[0] == '_' && base[1] != '_')
2272    return false;
2273  return is_macro (pfile, base);
2274}
2275
2276/* Lexes a raw string.  The stored string contains the spelling,
2277   including double quotes, delimiter string, '(' and ')', any leading
2278   'L', 'u', 'U' or 'u8' and 'R' modifier.  The created token contains
2279   the type of the literal, or CPP_OTHER if it was not properly
2280   terminated.
2281
2282   BASE is the start of the token.  Updates pfile->buffer->cur to just
2283   after the lexed string.
2284
2285   The spelling is NUL-terminated, but it is not guaranteed that this
2286   is the first NUL since embedded NULs are preserved.  */
2287
2288static void
2289lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
2290{
2291  const uchar *pos = base;
2292  const bool warn_bidi_p = pfile->warn_bidi_p ();
2293
2294  /* 'tis a pity this information isn't passed down from the lexer's
2295     initial categorization of the token.  */
2296  enum cpp_ttype type = CPP_STRING;
2297
2298  if (*pos == 'L')
2299    {
2300      type = CPP_WSTRING;
2301      pos++;
2302    }
2303  else if (*pos == 'U')
2304    {
2305      type = CPP_STRING32;
2306      pos++;
2307    }
2308  else if (*pos == 'u')
2309    {
2310      if (pos[1] == '8')
2311	{
2312	  type = CPP_UTF8STRING;
2313	  pos++;
2314	}
2315      else
2316	type = CPP_STRING16;
2317      pos++;
2318    }
2319
2320  gcc_checking_assert (pos[0] == 'R' && pos[1] == '"');
2321  pos += 2;
2322
2323  _cpp_line_note *note = &pfile->buffer->notes[pfile->buffer->cur_note];
2324
2325  /* Skip notes before the ".  */
2326  while (note->pos < pos)
2327    ++note;
2328
2329  lit_accum accum;
2330
2331  uchar prefix[17];
2332  unsigned prefix_len = 0;
2333  enum Phase
2334  {
2335   PHASE_PREFIX = -2,
2336   PHASE_NONE = -1,
2337   PHASE_SUFFIX = 0
2338  } phase = PHASE_PREFIX;
2339
2340  for (;;)
2341    {
2342      gcc_checking_assert (note->pos >= pos);
2343
2344      /* Undo any escaped newlines and trigraphs.  */
2345      if (!accum.reading_p () && note->pos == pos)
2346	switch (note->type)
2347	  {
2348	  case '\\':
2349	  case ' ':
2350	    /* Restore backslash followed by newline.  */
2351	    accum.append (pfile, base, pos - base);
2352	    base = pos;
2353	    accum.read_begin (pfile);
2354	    accum.append (pfile, UC"\\", 1);
2355
2356	  after_backslash:
2357	    if (note->type == ' ')
2358	      /* GNU backslash whitespace newline extension.  FIXME
2359		 could be any sequence of non-vertical space.  When we
2360		 can properly restore any such sequence, we should
2361		 mark this note as handled so _cpp_process_line_notes
2362		 doesn't warn.  */
2363	      accum.append (pfile, UC" ", 1);
2364
2365	    accum.append (pfile, UC"\n", 1);
2366	    note++;
2367	    break;
2368
2369	  case '\n':
2370	    /* This can happen for ??/<NEWLINE> when trigraphs are not
2371	       being interpretted.  */
2372	    gcc_checking_assert (!CPP_OPTION (pfile, trigraphs));
2373	    note->type = 0;
2374	    note++;
2375	    break;
2376
2377	  default:
2378	    gcc_checking_assert (_cpp_trigraph_map[note->type]);
2379
2380	    /* Don't warn about this trigraph in
2381	       _cpp_process_line_notes, since trigraphs show up as
2382	       trigraphs in raw strings.  */
2383	    uchar type = note->type;
2384	    note->type = 0;
2385
2386	    if (CPP_OPTION (pfile, trigraphs))
2387	      {
2388		accum.append (pfile, base, pos - base);
2389		base = pos;
2390		accum.read_begin (pfile);
2391		accum.append (pfile, UC"??", 2);
2392		accum.append (pfile, &type, 1);
2393
2394		/* ??/ followed by newline gets two line notes, one for
2395		   the trigraph and one for the backslash/newline.  */
2396		if (type == '/' && note[1].pos == pos)
2397		  {
2398		    note++;
2399		    gcc_assert (note->type == '\\' || note->type == ' ');
2400		    goto after_backslash;
2401		  }
2402		/* Skip the replacement character.  */
2403		base = ++pos;
2404	      }
2405
2406	    note++;
2407	    break;
2408	  }
2409
2410      /* Now get a char to process.  Either from an expanded note, or
2411	 from the line buffer.  */
2412      bool read_note = accum.reading_p ();
2413      char c = read_note ? accum.read_char () : *pos++;
2414
2415      if (phase == PHASE_PREFIX)
2416	{
2417	  if (c == '(')
2418	    {
2419	      /* Done.  */
2420	      phase = PHASE_NONE;
2421	      prefix[prefix_len++] = '"';
2422	    }
2423	  else if (prefix_len < 16
2424		   /* Prefix chars are any of the basic character set,
2425		      [lex.charset] except for '
2426		      ()\\\t\v\f\n'. Optimized for a contiguous
2427		      alphabet.  */
2428		   /* Unlike a switch, this collapses down to one or
2429		      two shift and bitmask operations on an ASCII
2430		      system, with an outlier or two.   */
2431		   && (('Z' - 'A' == 25
2432			? ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))
2433			: ISIDST (c))
2434		       || (c >= '0' && c <= '9')
2435		       || c == '_' || c == '{' || c == '}'
2436		       || c == '[' || c == ']' || c == '#'
2437		       || c == '<' || c == '>' || c == '%'
2438		       || c == ':' || c == ';' || c == '.' || c == '?'
2439		       || c == '*' || c == '+' || c == '-' || c == '/'
2440		       || c == '^' || c == '&' || c == '|' || c == '~'
2441		       || c == '!' || c == '=' || c == ','
2442		       || c == '"' || c == '\''))
2443	    prefix[prefix_len++] = c;
2444	  else
2445	    {
2446	      /* Something is wrong.  */
2447	      int col = CPP_BUF_COLUMN (pfile->buffer, pos) + read_note;
2448	      if (prefix_len == 16)
2449		cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
2450				     col, "raw string delimiter longer "
2451				     "than 16 characters");
2452	      else if (c == '\n')
2453		cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
2454				     col, "invalid new-line in raw "
2455				     "string delimiter");
2456	      else
2457		cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
2458				     col, "invalid character '%c' in "
2459				     "raw string delimiter", c);
2460	      type = CPP_OTHER;
2461	      phase = PHASE_NONE;
2462	      /* Continue until we get a close quote, that's probably
2463		 the best failure mode.  */
2464	      prefix_len = 0;
2465	    }
2466	  if (c != '\n')
2467	    continue;
2468	}
2469
2470      if (phase != PHASE_NONE)
2471	{
2472	  if (prefix[phase] != c)
2473	    phase = PHASE_NONE;
2474	  else if (unsigned (phase + 1) == prefix_len)
2475	    break;
2476	  else
2477	    {
2478	      phase = Phase (phase + 1);
2479	      continue;
2480	    }
2481	}
2482
2483      if (!prefix_len && c == '"')
2484	/* Failure mode lexing.  */
2485	goto out;
2486      else if (prefix_len && c == ')')
2487	phase = PHASE_SUFFIX;
2488      else if (!read_note && c == '\n')
2489	{
2490	  pos--;
2491	  pfile->buffer->cur = pos;
2492	  if (pfile->state.in_directive
2493	      || (pfile->state.parsing_args
2494		  && pfile->buffer->next_line >= pfile->buffer->rlimit))
2495	    {
2496	      cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, 0,
2497				   "unterminated raw string");
2498	      type = CPP_OTHER;
2499	      goto out;
2500	    }
2501
2502	  accum.append (pfile, base, pos - base + 1);
2503	  _cpp_process_line_notes (pfile, false);
2504
2505	  if (pfile->buffer->next_line < pfile->buffer->rlimit)
2506	    CPP_INCREMENT_LINE (pfile, 0);
2507	  pfile->buffer->need_line = true;
2508
2509	  if (!_cpp_get_fresh_line (pfile))
2510	    {
2511	      /* We ran out of file and failed to get a line.  */
2512	      location_t src_loc = token->src_loc;
2513	      token->type = CPP_EOF;
2514	      /* Tell the compiler the line number of the EOF token.  */
2515	      token->src_loc = pfile->line_table->highest_line;
2516	      token->flags = BOL;
2517	      if (accum.first)
2518		_cpp_release_buff (pfile, accum.first);
2519	      cpp_error_with_line (pfile, CPP_DL_ERROR, src_loc, 0,
2520				   "unterminated raw string");
2521	      /* Now pop the buffer that _cpp_get_fresh_line did not.  */
2522	      _cpp_pop_buffer (pfile);
2523	      return;
2524	    }
2525
2526	  pos = base = pfile->buffer->cur;
2527	  note = &pfile->buffer->notes[pfile->buffer->cur_note];
2528	}
2529      else if (__builtin_expect ((unsigned char) c == bidi::utf8_start, 0)
2530	       && warn_bidi_p)
2531	{
2532	  location_t loc;
2533	  bidi::kind kind = get_bidi_utf8 (pfile, pos - 1, &loc);
2534	  maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/false, loc);
2535	}
2536    }
2537
2538  if (warn_bidi_p)
2539    maybe_warn_bidi_on_close (pfile, pos);
2540
2541  if (CPP_OPTION (pfile, user_literals))
2542    {
2543      /* If a string format macro, say from inttypes.h, is placed touching
2544	 a string literal it could be parsed as a C++11 user-defined string
2545	 literal thus breaking the program.  */
2546      if (is_macro_not_literal_suffix (pfile, pos))
2547	{
2548	  /* Raise a warning, but do not consume subsequent tokens.  */
2549	  if (CPP_OPTION (pfile, warn_literal_suffix) && !pfile->state.skipping)
2550	    cpp_warning_with_line (pfile, CPP_W_LITERAL_SUFFIX,
2551				   token->src_loc, 0,
2552				   "invalid suffix on literal; C++11 requires "
2553				   "a space between literal and string macro");
2554	}
2555      /* Grab user defined literal suffix.  */
2556      else if (ISIDST (*pos))
2557	{
2558	  type = cpp_userdef_string_add_type (type);
2559	  ++pos;
2560
2561	  while (ISIDNUM (*pos))
2562	    ++pos;
2563	}
2564    }
2565
2566 out:
2567  pfile->buffer->cur = pos;
2568  if (!accum.accum)
2569    create_literal (pfile, token, base, pos - base, type);
2570  else
2571    {
2572      size_t extra_len = pos - base;
2573      uchar *dest = _cpp_unaligned_alloc (pfile, accum.accum + extra_len + 1);
2574
2575      token->type = type;
2576      token->val.str.len = accum.accum + extra_len;
2577      token->val.str.text = dest;
2578      for (_cpp_buff *buf = accum.first; buf; buf = buf->next)
2579	{
2580	  size_t len = BUFF_FRONT (buf) - buf->base;
2581	  memcpy (dest, buf->base, len);
2582	  dest += len;
2583	}
2584      _cpp_release_buff (pfile, accum.first);
2585      memcpy (dest, base, extra_len);
2586      dest[extra_len] = '\0';
2587    }
2588}
2589
2590/* Lexes a string, character constant, or angle-bracketed header file
2591   name.  The stored string contains the spelling, including opening
2592   quote and any leading 'L', 'u', 'U' or 'u8' and optional
2593   'R' modifier.  It returns the type of the literal, or CPP_OTHER
2594   if it was not properly terminated, or CPP_LESS for an unterminated
2595   header name which must be relexed as normal tokens.
2596
2597   The spelling is NUL-terminated, but it is not guaranteed that this
2598   is the first NUL since embedded NULs are preserved.  */
2599static void
2600lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
2601{
2602  bool saw_NUL = false;
2603  const uchar *cur;
2604  cppchar_t terminator;
2605  enum cpp_ttype type;
2606
2607  cur = base;
2608  terminator = *cur++;
2609  if (terminator == 'L' || terminator == 'U')
2610    terminator = *cur++;
2611  else if (terminator == 'u')
2612    {
2613      terminator = *cur++;
2614      if (terminator == '8')
2615	terminator = *cur++;
2616    }
2617  if (terminator == 'R')
2618    {
2619      lex_raw_string (pfile, token, base);
2620      return;
2621    }
2622  if (terminator == '"')
2623    type = (*base == 'L' ? CPP_WSTRING :
2624	    *base == 'U' ? CPP_STRING32 :
2625	    *base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16)
2626			 : CPP_STRING);
2627  else if (terminator == '\'')
2628    type = (*base == 'L' ? CPP_WCHAR :
2629	    *base == 'U' ? CPP_CHAR32 :
2630	    *base == 'u' ? (base[1] == '8' ? CPP_UTF8CHAR : CPP_CHAR16)
2631			 : CPP_CHAR);
2632  else
2633    terminator = '>', type = CPP_HEADER_NAME;
2634
2635  const bool warn_bidi_p = pfile->warn_bidi_p ();
2636  for (;;)
2637    {
2638      cppchar_t c = *cur++;
2639
2640      /* In #include-style directives, terminators are not escapable.  */
2641      if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
2642	{
2643	  if ((cur[0] == 'u' || cur[0] == 'U') && warn_bidi_p)
2644	    {
2645	      location_t loc;
2646	      bidi::kind kind = get_bidi_ucn (pfile, cur + 1, cur[0] == 'U',
2647					      &loc);
2648	      maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/true, loc);
2649	    }
2650	  cur++;
2651	}
2652      else if (c == terminator)
2653	{
2654	  if (warn_bidi_p)
2655	    maybe_warn_bidi_on_close (pfile, cur - 1);
2656	  break;
2657	}
2658      else if (c == '\n')
2659	{
2660	  cur--;
2661	  /* Unmatched quotes always yield undefined behavior, but
2662	     greedy lexing means that what appears to be an unterminated
2663	     header name may actually be a legitimate sequence of tokens.  */
2664	  if (terminator == '>')
2665	    {
2666	      token->type = CPP_LESS;
2667	      return;
2668	    }
2669	  type = CPP_OTHER;
2670	  break;
2671	}
2672      else if (c == '\0')
2673	saw_NUL = true;
2674      else if (__builtin_expect (c == bidi::utf8_start, 0) && warn_bidi_p)
2675	{
2676	  location_t loc;
2677	  bidi::kind kind = get_bidi_utf8 (pfile, cur - 1, &loc);
2678	  maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/false, loc);
2679	}
2680    }
2681
2682  if (saw_NUL && !pfile->state.skipping)
2683    cpp_error (pfile, CPP_DL_WARNING,
2684	       "null character(s) preserved in literal");
2685
2686  if (type == CPP_OTHER && CPP_OPTION (pfile, lang) != CLK_ASM)
2687    cpp_error (pfile, CPP_DL_PEDWARN, "missing terminating %c character",
2688	       (int) terminator);
2689
2690  if (CPP_OPTION (pfile, user_literals))
2691    {
2692      /* If a string format macro, say from inttypes.h, is placed touching
2693	 a string literal it could be parsed as a C++11 user-defined string
2694	 literal thus breaking the program.  */
2695      if (is_macro_not_literal_suffix (pfile, cur))
2696	{
2697	  /* Raise a warning, but do not consume subsequent tokens.  */
2698	  if (CPP_OPTION (pfile, warn_literal_suffix) && !pfile->state.skipping)
2699	    cpp_warning_with_line (pfile, CPP_W_LITERAL_SUFFIX,
2700				   token->src_loc, 0,
2701				   "invalid suffix on literal; C++11 requires "
2702				   "a space between literal and string macro");
2703	}
2704      /* Grab user defined literal suffix.  */
2705      else if (ISIDST (*cur))
2706	{
2707	  type = cpp_userdef_char_add_type (type);
2708	  type = cpp_userdef_string_add_type (type);
2709          ++cur;
2710
2711	  while (ISIDNUM (*cur))
2712	    ++cur;
2713	}
2714    }
2715  else if (CPP_OPTION (pfile, cpp_warn_cxx11_compat)
2716	   && is_macro (pfile, cur)
2717	   && !pfile->state.skipping)
2718    cpp_warning_with_line (pfile, CPP_W_CXX11_COMPAT,
2719			   token->src_loc, 0, "C++11 requires a space "
2720			   "between string literal and macro");
2721
2722  pfile->buffer->cur = cur;
2723  create_literal (pfile, token, base, cur - base, type);
2724}
2725
2726/* Return the comment table. The client may not make any assumption
2727   about the ordering of the table.  */
2728cpp_comment_table *
2729cpp_get_comments (cpp_reader *pfile)
2730{
2731  return &pfile->comments;
2732}
2733
2734/* Append a comment to the end of the comment table. */
2735static void
2736store_comment (cpp_reader *pfile, cpp_token *token)
2737{
2738  int len;
2739
2740  if (pfile->comments.allocated == 0)
2741    {
2742      pfile->comments.allocated = 256;
2743      pfile->comments.entries = (cpp_comment *) xmalloc
2744	(pfile->comments.allocated * sizeof (cpp_comment));
2745    }
2746
2747  if (pfile->comments.count == pfile->comments.allocated)
2748    {
2749      pfile->comments.allocated *= 2;
2750      pfile->comments.entries = (cpp_comment *) xrealloc
2751	(pfile->comments.entries,
2752	 pfile->comments.allocated * sizeof (cpp_comment));
2753    }
2754
2755  len = token->val.str.len;
2756
2757  /* Copy comment. Note, token may not be NULL terminated. */
2758  pfile->comments.entries[pfile->comments.count].comment =
2759    (char *) xmalloc (sizeof (char) * (len + 1));
2760  memcpy (pfile->comments.entries[pfile->comments.count].comment,
2761	  token->val.str.text, len);
2762  pfile->comments.entries[pfile->comments.count].comment[len] = '\0';
2763
2764  /* Set source location. */
2765  pfile->comments.entries[pfile->comments.count].sloc = token->src_loc;
2766
2767  /* Increment the count of entries in the comment table. */
2768  pfile->comments.count++;
2769}
2770
2771/* The stored comment includes the comment start and any terminator.  */
2772static void
2773save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
2774	      cppchar_t type)
2775{
2776  unsigned char *buffer;
2777  unsigned int len, clen, i;
2778  int convert_to_c = (pfile->state.in_directive || pfile->state.parsing_args)
2779    && type == '/';
2780
2781  len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
2782
2783  /* C++ comments probably (not definitely) have moved past a new
2784     line, which we don't want to save in the comment.  */
2785  if (is_vspace (pfile->buffer->cur[-1]))
2786    len--;
2787
2788  /* If we are currently in a directive or in argument parsing, then
2789     we need to store all C++ comments as C comments internally, and
2790     so we need to allocate a little extra space in that case.
2791
2792     Note that the only time we encounter a directive here is
2793     when we are saving comments in a "#define".  */
2794  clen = convert_to_c ? len + 2 : len;
2795
2796  buffer = _cpp_unaligned_alloc (pfile, clen);
2797
2798  token->type = CPP_COMMENT;
2799  token->val.str.len = clen;
2800  token->val.str.text = buffer;
2801
2802  buffer[0] = '/';
2803  memcpy (buffer + 1, from, len - 1);
2804
2805  /* Finish conversion to a C comment, if necessary.  */
2806  if (convert_to_c)
2807    {
2808      buffer[1] = '*';
2809      buffer[clen - 2] = '*';
2810      buffer[clen - 1] = '/';
2811      /* As there can be in a C++ comments illegal sequences for C comments
2812         we need to filter them out.  */
2813      for (i = 2; i < (clen - 2); i++)
2814        if (buffer[i] == '/' && (buffer[i - 1] == '*' || buffer[i + 1] == '*'))
2815          buffer[i] = '|';
2816    }
2817
2818  /* Finally store this comment for use by clients of libcpp. */
2819  store_comment (pfile, token);
2820}
2821
2822/* Returns true if comment at COMMENT_START is a recognized FALLTHROUGH
2823   comment.  */
2824
2825static bool
2826fallthrough_comment_p (cpp_reader *pfile, const unsigned char *comment_start)
2827{
2828  const unsigned char *from = comment_start + 1;
2829
2830  switch (CPP_OPTION (pfile, cpp_warn_implicit_fallthrough))
2831    {
2832      /* For both -Wimplicit-fallthrough=0 and -Wimplicit-fallthrough=5 we
2833	 don't recognize any comments.  The latter only checks attributes,
2834	 the former doesn't warn.  */
2835    case 0:
2836    default:
2837      return false;
2838      /* -Wimplicit-fallthrough=1 considers any comment, no matter what
2839	 content it has.  */
2840    case 1:
2841      return true;
2842    case 2:
2843      /* -Wimplicit-fallthrough=2 looks for (case insensitive)
2844	 .*falls?[ \t-]*thr(u|ough).* regex.  */
2845      for (; (size_t) (pfile->buffer->cur - from) >= sizeof "fallthru" - 1;
2846	   from++)
2847	{
2848	  /* Is there anything like strpbrk with upper boundary, or
2849	     memchr looking for 2 characters rather than just one?  */
2850	  if (from[0] != 'f' && from[0] != 'F')
2851	    continue;
2852	  if (from[1] != 'a' && from[1] != 'A')
2853	    continue;
2854	  if (from[2] != 'l' && from[2] != 'L')
2855	    continue;
2856	  if (from[3] != 'l' && from[3] != 'L')
2857	    continue;
2858	  from += sizeof "fall" - 1;
2859	  if (from[0] == 's' || from[0] == 'S')
2860	    from++;
2861	  while (*from == ' ' || *from == '\t' || *from == '-')
2862	    from++;
2863	  if (from[0] != 't' && from[0] != 'T')
2864	    continue;
2865	  if (from[1] != 'h' && from[1] != 'H')
2866	    continue;
2867	  if (from[2] != 'r' && from[2] != 'R')
2868	    continue;
2869	  if (from[3] == 'u' || from[3] == 'U')
2870	    return true;
2871	  if (from[3] != 'o' && from[3] != 'O')
2872	    continue;
2873	  if (from[4] != 'u' && from[4] != 'U')
2874	    continue;
2875	  if (from[5] != 'g' && from[5] != 'G')
2876	    continue;
2877	  if (from[6] != 'h' && from[6] != 'H')
2878	    continue;
2879	  return true;
2880	}
2881      return false;
2882    case 3:
2883    case 4:
2884      break;
2885    }
2886
2887  /* Whole comment contents:
2888     -fallthrough
2889     @fallthrough@
2890   */
2891  if (*from == '-' || *from == '@')
2892    {
2893      size_t len = sizeof "fallthrough" - 1;
2894      if ((size_t) (pfile->buffer->cur - from - 1) < len)
2895	return false;
2896      if (memcmp (from + 1, "fallthrough", len))
2897	return false;
2898      if (*from == '@')
2899	{
2900	  if (from[len + 1] != '@')
2901	    return false;
2902	  len++;
2903	}
2904      from += 1 + len;
2905    }
2906  /* Whole comment contents (regex):
2907     lint -fallthrough[ \t]*
2908   */
2909  else if (*from == 'l')
2910    {
2911      size_t len = sizeof "int -fallthrough" - 1;
2912      if ((size_t) (pfile->buffer->cur - from - 1) < len)
2913	return false;
2914      if (memcmp (from + 1, "int -fallthrough", len))
2915	return false;
2916      from += 1 + len;
2917      while (*from == ' ' || *from == '\t')
2918	from++;
2919    }
2920  /* Whole comment contents (regex):
2921     [ \t]*FALLTHR(U|OUGH)[ \t]*
2922   */
2923  else if (CPP_OPTION (pfile, cpp_warn_implicit_fallthrough) == 4)
2924    {
2925      while (*from == ' ' || *from == '\t')
2926	from++;
2927      if ((size_t) (pfile->buffer->cur - from)  < sizeof "FALLTHRU" - 1)
2928	return false;
2929      if (memcmp (from, "FALLTHR", sizeof "FALLTHR" - 1))
2930	return false;
2931      from += sizeof "FALLTHR" - 1;
2932      if (*from == 'U')
2933	from++;
2934      else if ((size_t) (pfile->buffer->cur - from)  < sizeof "OUGH" - 1)
2935	return false;
2936      else if (memcmp (from, "OUGH", sizeof "OUGH" - 1))
2937	return false;
2938      else
2939	from += sizeof "OUGH" - 1;
2940      while (*from == ' ' || *from == '\t')
2941	from++;
2942    }
2943  /* Whole comment contents (regex):
2944     [ \t.!]*(ELSE,? |INTENTIONAL(LY)? )?FALL(S | |-)?THR(OUGH|U)[ \t.!]*(-[^\n\r]*)?
2945     [ \t.!]*(Else,? |Intentional(ly)? )?Fall((s | |-)[Tt]|t)hr(ough|u)[ \t.!]*(-[^\n\r]*)?
2946     [ \t.!]*([Ee]lse,? |[Ii]ntentional(ly)? )?fall(s | |-)?thr(ough|u)[ \t.!]*(-[^\n\r]*)?
2947   */
2948  else
2949    {
2950      while (*from == ' ' || *from == '\t' || *from == '.' || *from == '!')
2951	from++;
2952      unsigned char f = *from;
2953      bool all_upper = false;
2954      if (f == 'E' || f == 'e')
2955	{
2956	  if ((size_t) (pfile->buffer->cur - from)
2957	      < sizeof "else fallthru" - 1)
2958	    return false;
2959	  if (f == 'E' && memcmp (from + 1, "LSE", sizeof "LSE" - 1) == 0)
2960	    all_upper = true;
2961	  else if (memcmp (from + 1, "lse", sizeof "lse" - 1))
2962	    return false;
2963	  from += sizeof "else" - 1;
2964	  if (*from == ',')
2965	    from++;
2966	  if (*from != ' ')
2967	    return false;
2968	  from++;
2969	  if (all_upper && *from == 'f')
2970	    return false;
2971	  if (f == 'e' && *from == 'F')
2972	    return false;
2973	  f = *from;
2974	}
2975      else if (f == 'I' || f == 'i')
2976	{
2977	  if ((size_t) (pfile->buffer->cur - from)
2978	      < sizeof "intentional fallthru" - 1)
2979	    return false;
2980	  if (f == 'I' && memcmp (from + 1, "NTENTIONAL",
2981				  sizeof "NTENTIONAL" - 1) == 0)
2982	    all_upper = true;
2983	  else if (memcmp (from + 1, "ntentional",
2984			   sizeof "ntentional" - 1))
2985	    return false;
2986	  from += sizeof "intentional" - 1;
2987	  if (*from == ' ')
2988	    {
2989	      from++;
2990	      if (all_upper && *from == 'f')
2991		return false;
2992	    }
2993	  else if (all_upper)
2994	    {
2995	      if (memcmp (from, "LY F", sizeof "LY F" - 1))
2996		return false;
2997	      from += sizeof "LY " - 1;
2998	    }
2999	  else
3000	    {
3001	      if (memcmp (from, "ly ", sizeof "ly " - 1))
3002		return false;
3003	      from += sizeof "ly " - 1;
3004	    }
3005	  if (f == 'i' && *from == 'F')
3006	    return false;
3007	  f = *from;
3008	}
3009      if (f != 'F' && f != 'f')
3010	return false;
3011      if ((size_t) (pfile->buffer->cur - from) < sizeof "fallthru" - 1)
3012	return false;
3013      if (f == 'F' && memcmp (from + 1, "ALL", sizeof "ALL" - 1) == 0)
3014	all_upper = true;
3015      else if (all_upper)
3016	return false;
3017      else if (memcmp (from + 1, "all", sizeof "all" - 1))
3018	return false;
3019      from += sizeof "fall" - 1;
3020      if (*from == (all_upper ? 'S' : 's') && from[1] == ' ')
3021	from += 2;
3022      else if (*from == ' ' || *from == '-')
3023	from++;
3024      else if (*from != (all_upper ? 'T' : 't'))
3025	return false;
3026      if ((f == 'f' || *from != 'T') && (all_upper || *from != 't'))
3027	return false;
3028      if ((size_t) (pfile->buffer->cur - from) < sizeof "thru" - 1)
3029	return false;
3030      if (memcmp (from + 1, all_upper ? "HRU" : "hru", sizeof "hru" - 1))
3031	{
3032	  if ((size_t) (pfile->buffer->cur - from) < sizeof "through" - 1)
3033	    return false;
3034	  if (memcmp (from + 1, all_upper ? "HROUGH" : "hrough",
3035		      sizeof "hrough" - 1))
3036	    return false;
3037	  from += sizeof "through" - 1;
3038	}
3039      else
3040	from += sizeof "thru" - 1;
3041      while (*from == ' ' || *from == '\t' || *from == '.' || *from == '!')
3042	from++;
3043      if (*from == '-')
3044	{
3045	  from++;
3046	  if (*comment_start == '*')
3047	    {
3048	      do
3049		{
3050		  while (*from && *from != '*'
3051			 && *from != '\n' && *from != '\r')
3052		    from++;
3053		  if (*from != '*' || from[1] == '/')
3054		    break;
3055		  from++;
3056		}
3057	      while (1);
3058	    }
3059	  else
3060	    while (*from && *from != '\n' && *from != '\r')
3061	      from++;
3062	}
3063    }
3064  /* C block comment.  */
3065  if (*comment_start == '*')
3066    {
3067      if (*from != '*' || from[1] != '/')
3068	return false;
3069    }
3070  /* C++ line comment.  */
3071  else if (*from != '\n')
3072    return false;
3073
3074  return true;
3075}
3076
3077/* Allocate COUNT tokens for RUN.  */
3078void
3079_cpp_init_tokenrun (tokenrun *run, unsigned int count)
3080{
3081  run->base = XNEWVEC (cpp_token, count);
3082  run->limit = run->base + count;
3083  run->next = NULL;
3084}
3085
3086/* Returns the next tokenrun, or creates one if there is none.  */
3087static tokenrun *
3088next_tokenrun (tokenrun *run)
3089{
3090  if (run->next == NULL)
3091    {
3092      run->next = XNEW (tokenrun);
3093      run->next->prev = run;
3094      _cpp_init_tokenrun (run->next, 250);
3095    }
3096
3097  return run->next;
3098}
3099
3100/* Return the number of not yet processed token in a given
3101   context.  */
3102int
3103_cpp_remaining_tokens_num_in_context (cpp_context *context)
3104{
3105  if (context->tokens_kind == TOKENS_KIND_DIRECT)
3106    return (LAST (context).token - FIRST (context).token);
3107  else if (context->tokens_kind == TOKENS_KIND_INDIRECT
3108	   || context->tokens_kind == TOKENS_KIND_EXTENDED)
3109    return (LAST (context).ptoken - FIRST (context).ptoken);
3110  else
3111      abort ();
3112}
3113
3114/* Returns the token present at index INDEX in a given context.  If
3115   INDEX is zero, the next token to be processed is returned.  */
3116static const cpp_token*
3117_cpp_token_from_context_at (cpp_context *context, int index)
3118{
3119  if (context->tokens_kind == TOKENS_KIND_DIRECT)
3120    return &(FIRST (context).token[index]);
3121  else if (context->tokens_kind == TOKENS_KIND_INDIRECT
3122	   || context->tokens_kind == TOKENS_KIND_EXTENDED)
3123    return FIRST (context).ptoken[index];
3124 else
3125   abort ();
3126}
3127
3128/* Look ahead in the input stream.  */
3129const cpp_token *
3130cpp_peek_token (cpp_reader *pfile, int index)
3131{
3132  cpp_context *context = pfile->context;
3133  const cpp_token *peektok;
3134  int count;
3135
3136  /* First, scan through any pending cpp_context objects.  */
3137  while (context->prev)
3138    {
3139      ptrdiff_t sz = _cpp_remaining_tokens_num_in_context (context);
3140
3141      if (index < (int) sz)
3142        return _cpp_token_from_context_at (context, index);
3143      index -= (int) sz;
3144      context = context->prev;
3145    }
3146
3147  /* We will have to read some new tokens after all (and do so
3148     without invalidating preceding tokens).  */
3149  count = index;
3150  pfile->keep_tokens++;
3151
3152  /* For peeked tokens temporarily disable line_change reporting,
3153     until the tokens are parsed for real.  */
3154  void (*line_change) (cpp_reader *, const cpp_token *, int)
3155    = pfile->cb.line_change;
3156  pfile->cb.line_change = NULL;
3157
3158  do
3159    {
3160      peektok = _cpp_lex_token (pfile);
3161      if (peektok->type == CPP_EOF)
3162	{
3163	  index--;
3164	  break;
3165	}
3166      else if (peektok->type == CPP_PRAGMA)
3167	{
3168	  /* Don't peek past a pragma.  */
3169	  if (peektok == &pfile->directive_result)
3170	    /* Save the pragma in the buffer.  */
3171	    *pfile->cur_token++ = *peektok;
3172	  index--;
3173	  break;
3174	}
3175    }
3176  while (index--);
3177
3178  _cpp_backup_tokens_direct (pfile, count - index);
3179  pfile->keep_tokens--;
3180  pfile->cb.line_change = line_change;
3181
3182  return peektok;
3183}
3184
3185/* Allocate a single token that is invalidated at the same time as the
3186   rest of the tokens on the line.  Has its line and col set to the
3187   same as the last lexed token, so that diagnostics appear in the
3188   right place.  */
3189cpp_token *
3190_cpp_temp_token (cpp_reader *pfile)
3191{
3192  cpp_token *old, *result;
3193  ptrdiff_t sz = pfile->cur_run->limit - pfile->cur_token;
3194  ptrdiff_t la = (ptrdiff_t) pfile->lookaheads;
3195
3196  old = pfile->cur_token - 1;
3197  /* Any pre-existing lookaheads must not be clobbered.  */
3198  if (la)
3199    {
3200      if (sz <= la)
3201        {
3202          tokenrun *next = next_tokenrun (pfile->cur_run);
3203
3204          if (sz < la)
3205            memmove (next->base + 1, next->base,
3206                     (la - sz) * sizeof (cpp_token));
3207
3208          next->base[0] = pfile->cur_run->limit[-1];
3209        }
3210
3211      if (sz > 1)
3212        memmove (pfile->cur_token + 1, pfile->cur_token,
3213                 MIN (la, sz - 1) * sizeof (cpp_token));
3214    }
3215
3216  if (!sz && pfile->cur_token == pfile->cur_run->limit)
3217    {
3218      pfile->cur_run = next_tokenrun (pfile->cur_run);
3219      pfile->cur_token = pfile->cur_run->base;
3220    }
3221
3222  result = pfile->cur_token++;
3223  result->src_loc = old->src_loc;
3224  return result;
3225}
3226
3227/* We're at the beginning of a logical line (so not in
3228  directives-mode) and RESULT is a CPP_NAME with NODE_MODULE set.  See
3229  if we should enter deferred_pragma mode to tokenize the rest of the
3230  line as a module control-line.  */
3231
3232static void
3233cpp_maybe_module_directive (cpp_reader *pfile, cpp_token *result)
3234{
3235  unsigned backup = 0; /* Tokens we peeked.  */
3236  cpp_hashnode *node = result->val.node.node;
3237  cpp_token *peek = result;
3238  cpp_token *keyword = peek;
3239  cpp_hashnode *(&n_modules)[spec_nodes::M_HWM][2] = pfile->spec_nodes.n_modules;
3240  int header_count = 0;
3241
3242  /* Make sure the incoming state is as we expect it.  This way we
3243     can restore it using constants.  */
3244  gcc_checking_assert (!pfile->state.in_deferred_pragma
3245		       && !pfile->state.skipping
3246		       && !pfile->state.parsing_args
3247		       && !pfile->state.angled_headers
3248		       && (pfile->state.save_comments
3249			   == !CPP_OPTION (pfile, discard_comments)));
3250
3251  /* Enter directives mode sufficiently for peeking.  We don't have
3252     to actually set in_directive.  */
3253  pfile->state.in_deferred_pragma = true;
3254
3255  /* These two fields are needed to process tokenization in deferred
3256     pragma mode.  They are not used outside deferred pragma mode or
3257     directives mode.  */
3258  pfile->state.pragma_allow_expansion = true;
3259  pfile->directive_line = result->src_loc;
3260
3261  /* Saving comments is incompatible with directives mode.   */
3262  pfile->state.save_comments = 0;
3263
3264  if (node == n_modules[spec_nodes::M_EXPORT][0])
3265    {
3266      peek = _cpp_lex_direct (pfile);
3267      keyword = peek;
3268      backup++;
3269      if (keyword->type != CPP_NAME)
3270	goto not_module;
3271      node = keyword->val.node.node;
3272      if (!(node->flags & NODE_MODULE))
3273	goto not_module;
3274    }
3275
3276  if (node == n_modules[spec_nodes::M__IMPORT][0])
3277    /* __import  */
3278    header_count = backup + 2 + 16;
3279  else if (node == n_modules[spec_nodes::M_IMPORT][0])
3280    /* import  */
3281    header_count = backup + 2 + (CPP_OPTION (pfile, preprocessed) ? 16 : 0);
3282  else if (node == n_modules[spec_nodes::M_MODULE][0])
3283    ; /* module  */
3284  else
3285    goto not_module;
3286
3287  /* We've seen [export] {module|import|__import}.  Check the next token.  */
3288  if (header_count)
3289    /* After '{,__}import' a header name may appear.  */
3290    pfile->state.angled_headers = true;
3291  peek = _cpp_lex_direct (pfile);
3292  backup++;
3293
3294  /* ... import followed by identifier, ':', '<' or
3295     header-name preprocessing tokens, or module
3296     followed by cpp-identifier, ':' or ';' preprocessing
3297     tokens.  C++ keywords are not yet relevant.  */
3298  if (peek->type == CPP_NAME
3299      || peek->type == CPP_COLON
3300      ||  (header_count
3301	   ? (peek->type == CPP_LESS
3302	      || (peek->type == CPP_STRING && peek->val.str.text[0] != 'R')
3303	      || peek->type == CPP_HEADER_NAME)
3304	   : peek->type == CPP_SEMICOLON))
3305    {
3306      pfile->state.pragma_allow_expansion = !CPP_OPTION (pfile, preprocessed);
3307      if (!pfile->state.pragma_allow_expansion)
3308	pfile->state.prevent_expansion++;
3309
3310      if (!header_count && linemap_included_from
3311	  (LINEMAPS_LAST_ORDINARY_MAP (pfile->line_table)))
3312	cpp_error_with_line (pfile, CPP_DL_ERROR, keyword->src_loc, 0,
3313			     "module control-line cannot be in included file");
3314
3315      /* The first one or two tokens cannot be macro names.  */
3316      for (int ix = backup; ix--;)
3317	{
3318	  cpp_token *tok = ix ? keyword : result;
3319	  cpp_hashnode *node = tok->val.node.node;
3320
3321	  /* Don't attempt to expand the token.  */
3322	  tok->flags |= NO_EXPAND;
3323	  if (_cpp_defined_macro_p (node)
3324	      && _cpp_maybe_notify_macro_use (pfile, node, tok->src_loc)
3325	      && !cpp_fun_like_macro_p (node))
3326	    cpp_error_with_line (pfile, CPP_DL_ERROR, tok->src_loc, 0,
3327				 "module control-line \"%s\" cannot be"
3328				 " an object-like macro",
3329				 NODE_NAME (node));
3330	}
3331
3332      /* Map to underbar variants.  */
3333      keyword->val.node.node = n_modules[header_count
3334					 ? spec_nodes::M_IMPORT
3335					 : spec_nodes::M_MODULE][1];
3336      if (backup != 1)
3337	result->val.node.node = n_modules[spec_nodes::M_EXPORT][1];
3338
3339      /* Maybe tell the tokenizer we expect a header-name down the
3340	 road.  */
3341      pfile->state.directive_file_token = header_count;
3342    }
3343  else
3344    {
3345    not_module:
3346      /* Drop out of directive mode.  */
3347      /* We aaserted save_comments had this value upon entry.  */
3348      pfile->state.save_comments
3349	= !CPP_OPTION (pfile, discard_comments);
3350      pfile->state.in_deferred_pragma = false;
3351      /* Do not let this remain on.  */
3352      pfile->state.angled_headers = false;
3353    }
3354
3355  /* In either case we want to backup the peeked tokens.  */
3356  if (backup)
3357    {
3358      /* If we saw EOL, we should drop it, because this isn't a module
3359	 control-line after all.  */
3360      bool eol = peek->type == CPP_PRAGMA_EOL;
3361      if (!eol || backup > 1)
3362	{
3363	  /* Put put the peeked tokens back  */
3364	  _cpp_backup_tokens_direct (pfile, backup);
3365	  /* But if the last one was an EOL, forget it.  */
3366	  if (eol)
3367	    pfile->lookaheads--;
3368	}
3369    }
3370}
3371
3372/* Lex a token into RESULT (external interface).  Takes care of issues
3373   like directive handling, token lookahead, multiple include
3374   optimization and skipping.  */
3375const cpp_token *
3376_cpp_lex_token (cpp_reader *pfile)
3377{
3378  cpp_token *result;
3379
3380  for (;;)
3381    {
3382      if (pfile->cur_token == pfile->cur_run->limit)
3383	{
3384	  pfile->cur_run = next_tokenrun (pfile->cur_run);
3385	  pfile->cur_token = pfile->cur_run->base;
3386	}
3387      /* We assume that the current token is somewhere in the current
3388	 run.  */
3389      if (pfile->cur_token < pfile->cur_run->base
3390	  || pfile->cur_token >= pfile->cur_run->limit)
3391	abort ();
3392
3393      if (pfile->lookaheads)
3394	{
3395	  pfile->lookaheads--;
3396	  result = pfile->cur_token++;
3397	}
3398      else
3399	result = _cpp_lex_direct (pfile);
3400
3401      if (result->flags & BOL)
3402	{
3403	  /* Is this a directive.  If _cpp_handle_directive returns
3404	     false, it is an assembler #.  */
3405	  if (result->type == CPP_HASH
3406	      /* 6.10.3 p 11: Directives in a list of macro arguments
3407		 gives undefined behavior.  This implementation
3408		 handles the directive as normal.  */
3409	      && pfile->state.parsing_args != 1)
3410	    {
3411	      if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
3412		{
3413		  if (pfile->directive_result.type == CPP_PADDING)
3414		    continue;
3415		  result = &pfile->directive_result;
3416		}
3417	    }
3418	  else if (pfile->state.in_deferred_pragma)
3419	    result = &pfile->directive_result;
3420	  else if (result->type == CPP_NAME
3421		   && (result->val.node.node->flags & NODE_MODULE)
3422		   && !pfile->state.skipping
3423		   /* Unlike regular directives, we do not deal with
3424		      tokenizing module directives as macro arguments.
3425		      That's not permitted.  */
3426		   && !pfile->state.parsing_args)
3427	    {
3428	      /* P1857.  Before macro expansion, At start of logical
3429		 line ... */
3430	      /* We don't have to consider lookaheads at this point.  */
3431	      gcc_checking_assert (!pfile->lookaheads);
3432
3433	      cpp_maybe_module_directive (pfile, result);
3434	    }
3435
3436	  if (pfile->cb.line_change && !pfile->state.skipping)
3437	    pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
3438	}
3439
3440      /* We don't skip tokens in directives.  */
3441      if (pfile->state.in_directive || pfile->state.in_deferred_pragma)
3442	break;
3443
3444      /* Outside a directive, invalidate controlling macros.  At file
3445	 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
3446	 get here and MI optimization works.  */
3447      pfile->mi_valid = false;
3448
3449      if (!pfile->state.skipping || result->type == CPP_EOF)
3450	break;
3451    }
3452
3453  return result;
3454}
3455
3456/* Returns true if a fresh line has been loaded.  */
3457bool
3458_cpp_get_fresh_line (cpp_reader *pfile)
3459{
3460  /* We can't get a new line until we leave the current directive.  */
3461  if (pfile->state.in_directive)
3462    return false;
3463
3464  for (;;)
3465    {
3466      cpp_buffer *buffer = pfile->buffer;
3467
3468      if (!buffer->need_line)
3469	return true;
3470
3471      if (buffer->next_line < buffer->rlimit)
3472	{
3473	  _cpp_clean_line (pfile);
3474	  return true;
3475	}
3476
3477      /* First, get out of parsing arguments state.  */
3478      if (pfile->state.parsing_args)
3479	return false;
3480
3481      /* End of buffer.  Non-empty files should end in a newline.  */
3482      if (buffer->buf != buffer->rlimit
3483	  && buffer->next_line > buffer->rlimit
3484	  && !buffer->from_stage3)
3485	{
3486	  /* Clip to buffer size.  */
3487	  buffer->next_line = buffer->rlimit;
3488	}
3489
3490      if (buffer->prev && !buffer->return_at_eof)
3491	_cpp_pop_buffer (pfile);
3492      else
3493	{
3494	  /* End of translation.  Do not pop the buffer yet. Increment
3495	     line number so that the EOF token is on a line of its own
3496	     (_cpp_lex_direct doesn't increment in that case, because
3497	     it's hard for it to distinguish this special case). */
3498	  CPP_INCREMENT_LINE (pfile, 0);
3499	  return false;
3500	}
3501    }
3502}
3503
3504#define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE)		\
3505  do							\
3506    {							\
3507      result->type = ELSE_TYPE;				\
3508      if (*buffer->cur == CHAR)				\
3509	buffer->cur++, result->type = THEN_TYPE;	\
3510    }							\
3511  while (0)
3512
3513/* Lex a token into pfile->cur_token, which is also incremented, to
3514   get diagnostics pointing to the correct location.
3515
3516   Does not handle issues such as token lookahead, multiple-include
3517   optimization, directives, skipping etc.  This function is only
3518   suitable for use by _cpp_lex_token, and in special cases like
3519   lex_expansion_token which doesn't care for any of these issues.
3520
3521   When meeting a newline, returns CPP_EOF if parsing a directive,
3522   otherwise returns to the start of the token buffer if permissible.
3523   Returns the location of the lexed token.  */
3524cpp_token *
3525_cpp_lex_direct (cpp_reader *pfile)
3526{
3527  cppchar_t c;
3528  cpp_buffer *buffer;
3529  const unsigned char *comment_start;
3530  bool fallthrough_comment = false;
3531  cpp_token *result = pfile->cur_token++;
3532
3533 fresh_line:
3534  result->flags = 0;
3535  buffer = pfile->buffer;
3536  if (buffer->need_line)
3537    {
3538      if (pfile->state.in_deferred_pragma)
3539	{
3540	  /* This can happen in cases like:
3541	     #define loop(x) whatever
3542	     #pragma omp loop
3543	     where when trying to expand loop we need to peek
3544	     next token after loop, but aren't still in_deferred_pragma
3545	     mode but are in in_directive mode, so buffer->need_line
3546	     is set, a CPP_EOF is peeked.  */
3547	  result->type = CPP_PRAGMA_EOL;
3548	  pfile->state.in_deferred_pragma = false;
3549	  if (!pfile->state.pragma_allow_expansion)
3550	    pfile->state.prevent_expansion--;
3551	  return result;
3552	}
3553      if (!_cpp_get_fresh_line (pfile))
3554	{
3555	  result->type = CPP_EOF;
3556	  /* Not a real EOF in a directive or arg parsing -- we refuse
3557  	     to advance to the next file now, and will once we're out
3558  	     of those modes.  */
3559	  if (!pfile->state.in_directive && !pfile->state.parsing_args)
3560	    {
3561	      /* Tell the compiler the line number of the EOF token.  */
3562	      result->src_loc = pfile->line_table->highest_line;
3563	      result->flags = BOL;
3564	      /* Now pop the buffer that _cpp_get_fresh_line did not.  */
3565	      _cpp_pop_buffer (pfile);
3566	    }
3567	  return result;
3568	}
3569      if (buffer != pfile->buffer)
3570	fallthrough_comment = false;
3571      if (!pfile->keep_tokens)
3572	{
3573	  pfile->cur_run = &pfile->base_run;
3574	  result = pfile->base_run.base;
3575	  pfile->cur_token = result + 1;
3576	}
3577      result->flags = BOL;
3578      if (pfile->state.parsing_args == 2)
3579	result->flags |= PREV_WHITE;
3580    }
3581  buffer = pfile->buffer;
3582 update_tokens_line:
3583  result->src_loc = pfile->line_table->highest_line;
3584
3585 skipped_white:
3586  if (buffer->cur >= buffer->notes[buffer->cur_note].pos
3587      && !pfile->overlaid_buffer)
3588    {
3589      _cpp_process_line_notes (pfile, false);
3590      result->src_loc = pfile->line_table->highest_line;
3591    }
3592  c = *buffer->cur++;
3593
3594  if (pfile->forced_token_location)
3595    result->src_loc = pfile->forced_token_location;
3596  else
3597    result->src_loc = linemap_position_for_column (pfile->line_table,
3598					  CPP_BUF_COLUMN (buffer, buffer->cur));
3599
3600  switch (c)
3601    {
3602    case ' ': case '\t': case '\f': case '\v': case '\0':
3603      result->flags |= PREV_WHITE;
3604      skip_whitespace (pfile, c);
3605      goto skipped_white;
3606
3607    case '\n':
3608      /* Increment the line, unless this is the last line ...  */
3609      if (buffer->cur < buffer->rlimit
3610	  /* ... or this is a #include, (where _cpp_stack_file needs to
3611	     unwind by one line) ...  */
3612	  || (pfile->state.in_directive > 1
3613	      /* ... except traditional-cpp increments this elsewhere.  */
3614	      && !CPP_OPTION (pfile, traditional)))
3615	CPP_INCREMENT_LINE (pfile, 0);
3616      buffer->need_line = true;
3617      if (pfile->state.in_deferred_pragma)
3618	{
3619	  /* Produce the PRAGMA_EOL on this line.  File reading
3620	     ensures there is always a \n at end of the buffer, thus
3621	     in a deferred pragma we always see CPP_PRAGMA_EOL before
3622	     any CPP_EOF.  */
3623	  result->type = CPP_PRAGMA_EOL;
3624	  result->flags &= ~PREV_WHITE;
3625	  pfile->state.in_deferred_pragma = false;
3626	  if (!pfile->state.pragma_allow_expansion)
3627	    pfile->state.prevent_expansion--;
3628	  return result;
3629	}
3630      goto fresh_line;
3631
3632    case '0': case '1': case '2': case '3': case '4':
3633    case '5': case '6': case '7': case '8': case '9':
3634      {
3635	struct normalize_state nst = INITIAL_NORMALIZE_STATE;
3636	result->type = CPP_NUMBER;
3637	lex_number (pfile, &result->val.str, &nst);
3638	warn_about_normalization (pfile, result, &nst);
3639	break;
3640      }
3641
3642    case 'L':
3643    case 'u':
3644    case 'U':
3645    case 'R':
3646      /* 'L', 'u', 'U', 'u8' or 'R' may introduce wide characters,
3647	 wide strings or raw strings.  */
3648      if (c == 'L' || CPP_OPTION (pfile, rliterals)
3649	  || (c != 'R' && CPP_OPTION (pfile, uliterals)))
3650	{
3651	  if ((*buffer->cur == '\'' && c != 'R')
3652	      || *buffer->cur == '"'
3653	      || (*buffer->cur == 'R'
3654		  && c != 'R'
3655		  && buffer->cur[1] == '"'
3656		  && CPP_OPTION (pfile, rliterals))
3657	      || (*buffer->cur == '8'
3658		  && c == 'u'
3659		  && ((buffer->cur[1] == '"' || (buffer->cur[1] == '\''
3660				&& CPP_OPTION (pfile, utf8_char_literals)))
3661		      || (buffer->cur[1] == 'R' && buffer->cur[2] == '"'
3662			  && CPP_OPTION (pfile, rliterals)))))
3663	    {
3664	      lex_string (pfile, result, buffer->cur - 1);
3665	      break;
3666	    }
3667	}
3668      /* Fall through.  */
3669
3670    case '_':
3671    case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
3672    case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
3673    case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
3674    case 's': case 't':           case 'v': case 'w': case 'x':
3675    case 'y': case 'z':
3676    case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
3677    case 'G': case 'H': case 'I': case 'J': case 'K':
3678    case 'M': case 'N': case 'O': case 'P': case 'Q':
3679    case 'S': case 'T':           case 'V': case 'W': case 'X':
3680    case 'Y': case 'Z':
3681      result->type = CPP_NAME;
3682      {
3683	struct normalize_state nst = INITIAL_NORMALIZE_STATE;
3684	result->val.node.node = lex_identifier (pfile, buffer->cur - 1, false,
3685						&nst,
3686						&result->val.node.spelling);
3687	warn_about_normalization (pfile, result, &nst);
3688      }
3689
3690      /* Convert named operators to their proper types.  */
3691      if (result->val.node.node->flags & NODE_OPERATOR)
3692	{
3693	  result->flags |= NAMED_OP;
3694	  result->type = (enum cpp_ttype) result->val.node.node->directive_index;
3695	}
3696
3697      /* Signal FALLTHROUGH comment followed by another token.  */
3698      if (fallthrough_comment)
3699	result->flags |= PREV_FALLTHROUGH;
3700      break;
3701
3702    case '\'':
3703    case '"':
3704      lex_string (pfile, result, buffer->cur - 1);
3705      break;
3706
3707    case '/':
3708      /* A potential block or line comment.  */
3709      comment_start = buffer->cur;
3710      c = *buffer->cur;
3711
3712      if (c == '*')
3713	{
3714	  if (_cpp_skip_block_comment (pfile))
3715	    cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
3716	}
3717      else if (c == '/' && ! CPP_OPTION (pfile, traditional))
3718	{
3719	  /* Don't warn for system headers.  */
3720	  if (_cpp_in_system_header (pfile))
3721	    ;
3722	  /* Warn about comments if pedantically GNUC89, and not
3723	     in system headers.  */
3724	  else if (CPP_OPTION (pfile, lang) == CLK_GNUC89
3725		   && CPP_PEDANTIC (pfile)
3726		   && ! buffer->warned_cplusplus_comments)
3727	    {
3728	      if (cpp_error (pfile, CPP_DL_PEDWARN,
3729			     "C++ style comments are not allowed in ISO C90"))
3730		cpp_error (pfile, CPP_DL_NOTE,
3731			   "(this will be reported only once per input file)");
3732	      buffer->warned_cplusplus_comments = 1;
3733	    }
3734	  /* Or if specifically desired via -Wc90-c99-compat.  */
3735	  else if (CPP_OPTION (pfile, cpp_warn_c90_c99_compat) > 0
3736		   && ! CPP_OPTION (pfile, cplusplus)
3737		   && ! buffer->warned_cplusplus_comments)
3738	    {
3739	      if (cpp_error (pfile, CPP_DL_WARNING,
3740			     "C++ style comments are incompatible with C90"))
3741		cpp_error (pfile, CPP_DL_NOTE,
3742			   "(this will be reported only once per input file)");
3743	      buffer->warned_cplusplus_comments = 1;
3744	    }
3745	  /* In C89/C94, C++ style comments are forbidden.  */
3746	  else if ((CPP_OPTION (pfile, lang) == CLK_STDC89
3747		    || CPP_OPTION (pfile, lang) == CLK_STDC94))
3748	    {
3749	      /* But don't be confused about valid code such as
3750	         - // immediately followed by *,
3751		 - // in a preprocessing directive,
3752		 - // in an #if 0 block.  */
3753	      if (buffer->cur[1] == '*'
3754		  || pfile->state.in_directive
3755		  || pfile->state.skipping)
3756		{
3757		  result->type = CPP_DIV;
3758		  break;
3759		}
3760	      else if (! buffer->warned_cplusplus_comments)
3761		{
3762		  if (cpp_error (pfile, CPP_DL_ERROR,
3763				 "C++ style comments are not allowed in "
3764				 "ISO C90"))
3765		    cpp_error (pfile, CPP_DL_NOTE,
3766			       "(this will be reported only once per input "
3767			       "file)");
3768		  buffer->warned_cplusplus_comments = 1;
3769		}
3770	    }
3771	  if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
3772	    cpp_warning (pfile, CPP_W_COMMENTS, "multi-line comment");
3773	}
3774      else if (c == '=')
3775	{
3776	  buffer->cur++;
3777	  result->type = CPP_DIV_EQ;
3778	  break;
3779	}
3780      else
3781	{
3782	  result->type = CPP_DIV;
3783	  break;
3784	}
3785
3786      if (fallthrough_comment_p (pfile, comment_start))
3787	fallthrough_comment = true;
3788
3789      if (pfile->cb.comment)
3790	{
3791	  size_t len = pfile->buffer->cur - comment_start;
3792	  pfile->cb.comment (pfile, result->src_loc, comment_start - 1,
3793			     len + 1);
3794	}
3795
3796      if (!pfile->state.save_comments)
3797	{
3798	  result->flags |= PREV_WHITE;
3799	  goto update_tokens_line;
3800	}
3801
3802      if (fallthrough_comment)
3803	result->flags |= PREV_FALLTHROUGH;
3804
3805      /* Save the comment as a token in its own right.  */
3806      save_comment (pfile, result, comment_start, c);
3807      break;
3808
3809    case '<':
3810      if (pfile->state.angled_headers)
3811	{
3812	  lex_string (pfile, result, buffer->cur - 1);
3813	  if (result->type != CPP_LESS)
3814	    break;
3815	}
3816
3817      result->type = CPP_LESS;
3818      if (*buffer->cur == '=')
3819	{
3820	  buffer->cur++, result->type = CPP_LESS_EQ;
3821	  if (*buffer->cur == '>'
3822	      && CPP_OPTION (pfile, cplusplus)
3823	      && CPP_OPTION (pfile, lang) >= CLK_GNUCXX20)
3824	    buffer->cur++, result->type = CPP_SPACESHIP;
3825	}
3826      else if (*buffer->cur == '<')
3827	{
3828	  buffer->cur++;
3829	  IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
3830	}
3831      else if (CPP_OPTION (pfile, digraphs))
3832	{
3833	  if (*buffer->cur == ':')
3834	    {
3835	      /* C++11 [2.5/3 lex.pptoken], "Otherwise, if the next
3836		 three characters are <:: and the subsequent character
3837		 is neither : nor >, the < is treated as a preprocessor
3838		 token by itself".  */
3839	      if (CPP_OPTION (pfile, cplusplus)
3840		  && CPP_OPTION (pfile, lang) != CLK_CXX98
3841		  && CPP_OPTION (pfile, lang) != CLK_GNUCXX
3842		  && buffer->cur[1] == ':'
3843		  && buffer->cur[2] != ':' && buffer->cur[2] != '>')
3844		break;
3845
3846	      buffer->cur++;
3847	      result->flags |= DIGRAPH;
3848	      result->type = CPP_OPEN_SQUARE;
3849	    }
3850	  else if (*buffer->cur == '%')
3851	    {
3852	      buffer->cur++;
3853	      result->flags |= DIGRAPH;
3854	      result->type = CPP_OPEN_BRACE;
3855	    }
3856	}
3857      break;
3858
3859    case '>':
3860      result->type = CPP_GREATER;
3861      if (*buffer->cur == '=')
3862	buffer->cur++, result->type = CPP_GREATER_EQ;
3863      else if (*buffer->cur == '>')
3864	{
3865	  buffer->cur++;
3866	  IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
3867	}
3868      break;
3869
3870    case '%':
3871      result->type = CPP_MOD;
3872      if (*buffer->cur == '=')
3873	buffer->cur++, result->type = CPP_MOD_EQ;
3874      else if (CPP_OPTION (pfile, digraphs))
3875	{
3876	  if (*buffer->cur == ':')
3877	    {
3878	      buffer->cur++;
3879	      result->flags |= DIGRAPH;
3880	      result->type = CPP_HASH;
3881	      if (*buffer->cur == '%' && buffer->cur[1] == ':')
3882		buffer->cur += 2, result->type = CPP_PASTE, result->val.token_no = 0;
3883	    }
3884	  else if (*buffer->cur == '>')
3885	    {
3886	      buffer->cur++;
3887	      result->flags |= DIGRAPH;
3888	      result->type = CPP_CLOSE_BRACE;
3889	    }
3890	}
3891      break;
3892
3893    case '.':
3894      result->type = CPP_DOT;
3895      if (ISDIGIT (*buffer->cur))
3896	{
3897	  struct normalize_state nst = INITIAL_NORMALIZE_STATE;
3898	  result->type = CPP_NUMBER;
3899	  lex_number (pfile, &result->val.str, &nst);
3900	  warn_about_normalization (pfile, result, &nst);
3901	}
3902      else if (*buffer->cur == '.' && buffer->cur[1] == '.')
3903	buffer->cur += 2, result->type = CPP_ELLIPSIS;
3904      else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
3905	buffer->cur++, result->type = CPP_DOT_STAR;
3906      break;
3907
3908    case '+':
3909      result->type = CPP_PLUS;
3910      if (*buffer->cur == '+')
3911	buffer->cur++, result->type = CPP_PLUS_PLUS;
3912      else if (*buffer->cur == '=')
3913	buffer->cur++, result->type = CPP_PLUS_EQ;
3914      break;
3915
3916    case '-':
3917      result->type = CPP_MINUS;
3918      if (*buffer->cur == '>')
3919	{
3920	  buffer->cur++;
3921	  result->type = CPP_DEREF;
3922	  if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
3923	    buffer->cur++, result->type = CPP_DEREF_STAR;
3924	}
3925      else if (*buffer->cur == '-')
3926	buffer->cur++, result->type = CPP_MINUS_MINUS;
3927      else if (*buffer->cur == '=')
3928	buffer->cur++, result->type = CPP_MINUS_EQ;
3929      break;
3930
3931    case '&':
3932      result->type = CPP_AND;
3933      if (*buffer->cur == '&')
3934	buffer->cur++, result->type = CPP_AND_AND;
3935      else if (*buffer->cur == '=')
3936	buffer->cur++, result->type = CPP_AND_EQ;
3937      break;
3938
3939    case '|':
3940      result->type = CPP_OR;
3941      if (*buffer->cur == '|')
3942	buffer->cur++, result->type = CPP_OR_OR;
3943      else if (*buffer->cur == '=')
3944	buffer->cur++, result->type = CPP_OR_EQ;
3945      break;
3946
3947    case ':':
3948      result->type = CPP_COLON;
3949      if (*buffer->cur == ':' && CPP_OPTION (pfile, scope))
3950	buffer->cur++, result->type = CPP_SCOPE;
3951      else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
3952	{
3953	  buffer->cur++;
3954	  result->flags |= DIGRAPH;
3955	  result->type = CPP_CLOSE_SQUARE;
3956	}
3957      break;
3958
3959    case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
3960    case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
3961    case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
3962    case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
3963    case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); result->val.token_no = 0; break;
3964
3965    case '?': result->type = CPP_QUERY; break;
3966    case '~': result->type = CPP_COMPL; break;
3967    case ',': result->type = CPP_COMMA; break;
3968    case '(': result->type = CPP_OPEN_PAREN; break;
3969    case ')': result->type = CPP_CLOSE_PAREN; break;
3970    case '[': result->type = CPP_OPEN_SQUARE; break;
3971    case ']': result->type = CPP_CLOSE_SQUARE; break;
3972    case '{': result->type = CPP_OPEN_BRACE; break;
3973    case '}': result->type = CPP_CLOSE_BRACE; break;
3974    case ';': result->type = CPP_SEMICOLON; break;
3975
3976      /* @ is a punctuator in Objective-C.  */
3977    case '@': result->type = CPP_ATSIGN; break;
3978
3979    default:
3980      {
3981	const uchar *base = --buffer->cur;
3982
3983	/* Check for an extended identifier ($ or UCN or UTF-8).  */
3984	struct normalize_state nst = INITIAL_NORMALIZE_STATE;
3985	if (forms_identifier_p (pfile, true, &nst))
3986	  {
3987	    result->type = CPP_NAME;
3988	    result->val.node.node = lex_identifier (pfile, base, true, &nst,
3989						    &result->val.node.spelling);
3990	    warn_about_normalization (pfile, result, &nst);
3991	    break;
3992	  }
3993
3994	/* Otherwise this will form a CPP_OTHER token.  Parse valid UTF-8 as a
3995	   single token.  */
3996	buffer->cur++;
3997	if (c >= utf8_signifier)
3998	  {
3999	    const uchar *pstr = base;
4000	    cppchar_t s;
4001	    if (_cpp_valid_utf8 (pfile, &pstr, buffer->rlimit, 0, NULL, &s))
4002	      buffer->cur = pstr;
4003	  }
4004	create_literal (pfile, result, base, buffer->cur - base, CPP_OTHER);
4005	break;
4006      }
4007
4008    }
4009
4010  /* Potentially convert the location of the token to a range.  */
4011  if (result->src_loc >= RESERVED_LOCATION_COUNT
4012      && result->type != CPP_EOF)
4013    {
4014      /* Ensure that any line notes are processed, so that we have the
4015	 correct physical line/column for the end-point of the token even
4016	 when a logical line is split via one or more backslashes.  */
4017      if (buffer->cur >= buffer->notes[buffer->cur_note].pos
4018	  && !pfile->overlaid_buffer)
4019	_cpp_process_line_notes (pfile, false);
4020
4021      source_range tok_range;
4022      tok_range.m_start = result->src_loc;
4023      tok_range.m_finish
4024	= linemap_position_for_column (pfile->line_table,
4025				       CPP_BUF_COLUMN (buffer, buffer->cur));
4026
4027      result->src_loc = COMBINE_LOCATION_DATA (pfile->line_table,
4028					       result->src_loc,
4029					       tok_range, NULL);
4030    }
4031
4032  return result;
4033}
4034
4035/* An upper bound on the number of bytes needed to spell TOKEN.
4036   Does not include preceding whitespace.  */
4037unsigned int
4038cpp_token_len (const cpp_token *token)
4039{
4040  unsigned int len;
4041
4042  switch (TOKEN_SPELL (token))
4043    {
4044    default:		len = 6;				break;
4045    case SPELL_LITERAL:	len = token->val.str.len;		break;
4046    case SPELL_IDENT:	len = NODE_LEN (token->val.node.node) * 10;	break;
4047    }
4048
4049  return len;
4050}
4051
4052/* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER.
4053   Return the number of bytes read out of NAME.  (There are always
4054   10 bytes written to BUFFER.)  */
4055
4056static size_t
4057utf8_to_ucn (unsigned char *buffer, const unsigned char *name)
4058{
4059  int j;
4060  int ucn_len = 0;
4061  int ucn_len_c;
4062  unsigned t;
4063  unsigned long utf32;
4064
4065  /* Compute the length of the UTF-8 sequence.  */
4066  for (t = *name; t & 0x80; t <<= 1)
4067    ucn_len++;
4068
4069  utf32 = *name & (0x7F >> ucn_len);
4070  for (ucn_len_c = 1; ucn_len_c < ucn_len; ucn_len_c++)
4071    {
4072      utf32 = (utf32 << 6) | (*++name & 0x3F);
4073
4074      /* Ill-formed UTF-8.  */
4075      if ((*name & ~0x3F) != 0x80)
4076	abort ();
4077    }
4078
4079  *buffer++ = '\\';
4080  *buffer++ = 'U';
4081  for (j = 7; j >= 0; j--)
4082    *buffer++ = "0123456789abcdef"[(utf32 >> (4 * j)) & 0xF];
4083  return ucn_len;
4084}
4085
4086/* Given a token TYPE corresponding to a digraph, return a pointer to
4087   the spelling of the digraph.  */
4088static const unsigned char *
4089cpp_digraph2name (enum cpp_ttype type)
4090{
4091  return digraph_spellings[(int) type - (int) CPP_FIRST_DIGRAPH];
4092}
4093
4094/* Write the spelling of an identifier IDENT, using UCNs, to BUFFER.
4095   The buffer must already contain the enough space to hold the
4096   token's spelling.  Returns a pointer to the character after the
4097   last character written.  */
4098unsigned char *
4099_cpp_spell_ident_ucns (unsigned char *buffer, cpp_hashnode *ident)
4100{
4101  size_t i;
4102  const unsigned char *name = NODE_NAME (ident);
4103
4104  for (i = 0; i < NODE_LEN (ident); i++)
4105    if (name[i] & ~0x7F)
4106      {
4107	i += utf8_to_ucn (buffer, name + i) - 1;
4108	buffer += 10;
4109      }
4110    else
4111      *buffer++ = name[i];
4112
4113  return buffer;
4114}
4115
4116/* Write the spelling of a token TOKEN to BUFFER.  The buffer must
4117   already contain the enough space to hold the token's spelling.
4118   Returns a pointer to the character after the last character written.
4119   FORSTRING is true if this is to be the spelling after translation
4120   phase 1 (with the original spelling of extended identifiers), false
4121   if extended identifiers should always be written using UCNs (there is
4122   no option for always writing them in the internal UTF-8 form).
4123   FIXME: Would be nice if we didn't need the PFILE argument.  */
4124unsigned char *
4125cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
4126		 unsigned char *buffer, bool forstring)
4127{
4128  switch (TOKEN_SPELL (token))
4129    {
4130    case SPELL_OPERATOR:
4131      {
4132	const unsigned char *spelling;
4133	unsigned char c;
4134
4135	if (token->flags & DIGRAPH)
4136	  spelling = cpp_digraph2name (token->type);
4137	else if (token->flags & NAMED_OP)
4138	  goto spell_ident;
4139	else
4140	  spelling = TOKEN_NAME (token);
4141
4142	while ((c = *spelling++) != '\0')
4143	  *buffer++ = c;
4144      }
4145      break;
4146
4147    spell_ident:
4148    case SPELL_IDENT:
4149      if (forstring)
4150	{
4151	  memcpy (buffer, NODE_NAME (token->val.node.spelling),
4152		  NODE_LEN (token->val.node.spelling));
4153	  buffer += NODE_LEN (token->val.node.spelling);
4154	}
4155      else
4156	buffer = _cpp_spell_ident_ucns (buffer, token->val.node.node);
4157      break;
4158
4159    case SPELL_LITERAL:
4160      memcpy (buffer, token->val.str.text, token->val.str.len);
4161      buffer += token->val.str.len;
4162      break;
4163
4164    case SPELL_NONE:
4165      cpp_error (pfile, CPP_DL_ICE,
4166		 "unspellable token %s", TOKEN_NAME (token));
4167      break;
4168    }
4169
4170  return buffer;
4171}
4172
4173/* Returns TOKEN spelt as a null-terminated string.  The string is
4174   freed when the reader is destroyed.  Useful for diagnostics.  */
4175unsigned char *
4176cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
4177{
4178  unsigned int len = cpp_token_len (token) + 1;
4179  unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
4180
4181  end = cpp_spell_token (pfile, token, start, false);
4182  end[0] = '\0';
4183
4184  return start;
4185}
4186
4187/* Returns a pointer to a string which spells the token defined by
4188   TYPE and FLAGS.  Used by C front ends, which really should move to
4189   using cpp_token_as_text.  */
4190const char *
4191cpp_type2name (enum cpp_ttype type, unsigned char flags)
4192{
4193  if (flags & DIGRAPH)
4194    return (const char *) cpp_digraph2name (type);
4195  else if (flags & NAMED_OP)
4196    return cpp_named_operator2name (type);
4197
4198  return (const char *) token_spellings[type].name;
4199}
4200
4201/* Writes the spelling of token to FP, without any preceding space.
4202   Separated from cpp_spell_token for efficiency - to avoid stdio
4203   double-buffering.  */
4204void
4205cpp_output_token (const cpp_token *token, FILE *fp)
4206{
4207  switch (TOKEN_SPELL (token))
4208    {
4209    case SPELL_OPERATOR:
4210      {
4211	const unsigned char *spelling;
4212	int c;
4213
4214	if (token->flags & DIGRAPH)
4215	  spelling = cpp_digraph2name (token->type);
4216	else if (token->flags & NAMED_OP)
4217	  goto spell_ident;
4218	else
4219	  spelling = TOKEN_NAME (token);
4220
4221	c = *spelling;
4222	do
4223	  putc (c, fp);
4224	while ((c = *++spelling) != '\0');
4225      }
4226      break;
4227
4228    spell_ident:
4229    case SPELL_IDENT:
4230      {
4231	size_t i;
4232	const unsigned char * name = NODE_NAME (token->val.node.node);
4233
4234	for (i = 0; i < NODE_LEN (token->val.node.node); i++)
4235	  if (name[i] & ~0x7F)
4236	    {
4237	      unsigned char buffer[10];
4238	      i += utf8_to_ucn (buffer, name + i) - 1;
4239	      fwrite (buffer, 1, 10, fp);
4240	    }
4241	  else
4242	    fputc (NODE_NAME (token->val.node.node)[i], fp);
4243      }
4244      break;
4245
4246    case SPELL_LITERAL:
4247      if (token->type == CPP_HEADER_NAME)
4248	fputc ('"', fp);
4249      fwrite (token->val.str.text, 1, token->val.str.len, fp);
4250      if (token->type == CPP_HEADER_NAME)
4251	fputc ('"', fp);
4252      break;
4253
4254    case SPELL_NONE:
4255      /* An error, most probably.  */
4256      break;
4257    }
4258}
4259
4260/* Compare two tokens.  */
4261int
4262_cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
4263{
4264  if (a->type == b->type && a->flags == b->flags)
4265    switch (TOKEN_SPELL (a))
4266      {
4267      default:			/* Keep compiler happy.  */
4268      case SPELL_OPERATOR:
4269	/* token_no is used to track where multiple consecutive ##
4270	   tokens were originally located.  */
4271	return (a->type != CPP_PASTE || a->val.token_no == b->val.token_no);
4272      case SPELL_NONE:
4273	return (a->type != CPP_MACRO_ARG
4274		|| (a->val.macro_arg.arg_no == b->val.macro_arg.arg_no
4275		    && a->val.macro_arg.spelling == b->val.macro_arg.spelling));
4276      case SPELL_IDENT:
4277	return (a->val.node.node == b->val.node.node
4278		&& a->val.node.spelling == b->val.node.spelling);
4279      case SPELL_LITERAL:
4280	return (a->val.str.len == b->val.str.len
4281		&& !memcmp (a->val.str.text, b->val.str.text,
4282			    a->val.str.len));
4283      }
4284
4285  return 0;
4286}
4287
4288/* Returns nonzero if a space should be inserted to avoid an
4289   accidental token paste for output.  For simplicity, it is
4290   conservative, and occasionally advises a space where one is not
4291   needed, e.g. "." and ".2".  */
4292int
4293cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
4294		 const cpp_token *token2)
4295{
4296  enum cpp_ttype a = token1->type, b = token2->type;
4297  cppchar_t c;
4298
4299  if (token1->flags & NAMED_OP)
4300    a = CPP_NAME;
4301  if (token2->flags & NAMED_OP)
4302    b = CPP_NAME;
4303
4304  c = EOF;
4305  if (token2->flags & DIGRAPH)
4306    c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
4307  else if (token_spellings[b].category == SPELL_OPERATOR)
4308    c = token_spellings[b].name[0];
4309
4310  /* Quickly get everything that can paste with an '='.  */
4311  if ((int) a <= (int) CPP_LAST_EQ && c == '=')
4312    return 1;
4313
4314  switch (a)
4315    {
4316    case CPP_GREATER:	return c == '>';
4317    case CPP_LESS:	return c == '<' || c == '%' || c == ':';
4318    case CPP_PLUS:	return c == '+';
4319    case CPP_MINUS:	return c == '-' || c == '>';
4320    case CPP_DIV:	return c == '/' || c == '*'; /* Comments.  */
4321    case CPP_MOD:	return c == ':' || c == '>';
4322    case CPP_AND:	return c == '&';
4323    case CPP_OR:	return c == '|';
4324    case CPP_COLON:	return c == ':' || c == '>';
4325    case CPP_DEREF:	return c == '*';
4326    case CPP_DOT:	return c == '.' || c == '%' || b == CPP_NUMBER;
4327    case CPP_HASH:	return c == '#' || c == '%'; /* Digraph form.  */
4328    case CPP_PRAGMA:
4329    case CPP_NAME:	return ((b == CPP_NUMBER
4330				 && name_p (pfile, &token2->val.str))
4331				|| b == CPP_NAME
4332				|| b == CPP_CHAR || b == CPP_STRING); /* L */
4333    case CPP_NUMBER:	return (b == CPP_NUMBER || b == CPP_NAME
4334				|| b == CPP_CHAR
4335				|| c == '.' || c == '+' || c == '-');
4336				      /* UCNs */
4337    case CPP_OTHER:	return ((token1->val.str.text[0] == '\\'
4338				 && b == CPP_NAME)
4339				|| (CPP_OPTION (pfile, objc)
4340				    && token1->val.str.text[0] == '@'
4341				    && (b == CPP_NAME || b == CPP_STRING)));
4342    case CPP_LESS_EQ:	return c == '>';
4343    case CPP_STRING:
4344    case CPP_WSTRING:
4345    case CPP_UTF8STRING:
4346    case CPP_STRING16:
4347    case CPP_STRING32:	return (CPP_OPTION (pfile, user_literals)
4348				&& (b == CPP_NAME
4349				    || (TOKEN_SPELL (token2) == SPELL_LITERAL
4350					&& ISIDST (token2->val.str.text[0]))));
4351
4352    default:		break;
4353    }
4354
4355  return 0;
4356}
4357
4358/* Output all the remaining tokens on the current line, and a newline
4359   character, to FP.  Leading whitespace is removed.  If there are
4360   macros, special token padding is not performed.  */
4361void
4362cpp_output_line (cpp_reader *pfile, FILE *fp)
4363{
4364  const cpp_token *token;
4365
4366  token = cpp_get_token (pfile);
4367  while (token->type != CPP_EOF)
4368    {
4369      cpp_output_token (token, fp);
4370      token = cpp_get_token (pfile);
4371      if (token->flags & PREV_WHITE)
4372	putc (' ', fp);
4373    }
4374
4375  putc ('\n', fp);
4376}
4377
4378/* Return a string representation of all the remaining tokens on the
4379   current line.  The result is allocated using xmalloc and must be
4380   freed by the caller.  */
4381unsigned char *
4382cpp_output_line_to_string (cpp_reader *pfile, const unsigned char *dir_name)
4383{
4384  const cpp_token *token;
4385  unsigned int out = dir_name ? ustrlen (dir_name) : 0;
4386  unsigned int alloced = 120 + out;
4387  unsigned char *result = (unsigned char *) xmalloc (alloced);
4388
4389  /* If DIR_NAME is empty, there are no initial contents.  */
4390  if (dir_name)
4391    {
4392      sprintf ((char *) result, "#%s ", dir_name);
4393      out += 2;
4394    }
4395
4396  token = cpp_get_token (pfile);
4397  while (token->type != CPP_EOF)
4398    {
4399      unsigned char *last;
4400      /* Include room for a possible space and the terminating nul.  */
4401      unsigned int len = cpp_token_len (token) + 2;
4402
4403      if (out + len > alloced)
4404	{
4405	  alloced *= 2;
4406	  if (out + len > alloced)
4407	    alloced = out + len;
4408	  result = (unsigned char *) xrealloc (result, alloced);
4409	}
4410
4411      last = cpp_spell_token (pfile, token, &result[out], 0);
4412      out = last - result;
4413
4414      token = cpp_get_token (pfile);
4415      if (token->flags & PREV_WHITE)
4416	result[out++] = ' ';
4417    }
4418
4419  result[out] = '\0';
4420  return result;
4421}
4422
4423/* Memory buffers.  Changing these three constants can have a dramatic
4424   effect on performance.  The values here are reasonable defaults,
4425   but might be tuned.  If you adjust them, be sure to test across a
4426   range of uses of cpplib, including heavy nested function-like macro
4427   expansion.  Also check the change in peak memory usage (NJAMD is a
4428   good tool for this).  */
4429#define MIN_BUFF_SIZE 8000
4430#define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
4431#define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
4432	(MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
4433
4434#if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
4435  #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
4436#endif
4437
4438/* Create a new allocation buffer.  Place the control block at the end
4439   of the buffer, so that buffer overflows will cause immediate chaos.  */
4440static _cpp_buff *
4441new_buff (size_t len)
4442{
4443  _cpp_buff *result;
4444  unsigned char *base;
4445
4446  if (len < MIN_BUFF_SIZE)
4447    len = MIN_BUFF_SIZE;
4448  len = CPP_ALIGN (len);
4449
4450#ifdef ENABLE_VALGRIND_ANNOTATIONS
4451  /* Valgrind warns about uses of interior pointers, so put _cpp_buff
4452     struct first.  */
4453  size_t slen = CPP_ALIGN2 (sizeof (_cpp_buff), 2 * DEFAULT_ALIGNMENT);
4454  base = XNEWVEC (unsigned char, len + slen);
4455  result = (_cpp_buff *) base;
4456  base += slen;
4457#else
4458  base = XNEWVEC (unsigned char, len + sizeof (_cpp_buff));
4459  result = (_cpp_buff *) (base + len);
4460#endif
4461  result->base = base;
4462  result->cur = base;
4463  result->limit = base + len;
4464  result->next = NULL;
4465  return result;
4466}
4467
4468/* Place a chain of unwanted allocation buffers on the free list.  */
4469void
4470_cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
4471{
4472  _cpp_buff *end = buff;
4473
4474  while (end->next)
4475    end = end->next;
4476  end->next = pfile->free_buffs;
4477  pfile->free_buffs = buff;
4478}
4479
4480/* Return a free buffer of size at least MIN_SIZE.  */
4481_cpp_buff *
4482_cpp_get_buff (cpp_reader *pfile, size_t min_size)
4483{
4484  _cpp_buff *result, **p;
4485
4486  for (p = &pfile->free_buffs;; p = &(*p)->next)
4487    {
4488      size_t size;
4489
4490      if (*p == NULL)
4491	return new_buff (min_size);
4492      result = *p;
4493      size = result->limit - result->base;
4494      /* Return a buffer that's big enough, but don't waste one that's
4495         way too big.  */
4496      if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
4497	break;
4498    }
4499
4500  *p = result->next;
4501  result->next = NULL;
4502  result->cur = result->base;
4503  return result;
4504}
4505
4506/* Creates a new buffer with enough space to hold the uncommitted
4507   remaining bytes of BUFF, and at least MIN_EXTRA more bytes.  Copies
4508   the excess bytes to the new buffer.  Chains the new buffer after
4509   BUFF, and returns the new buffer.  */
4510_cpp_buff *
4511_cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
4512{
4513  size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
4514  _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
4515
4516  buff->next = new_buff;
4517  memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
4518  return new_buff;
4519}
4520
4521/* Creates a new buffer with enough space to hold the uncommitted
4522   remaining bytes of the buffer pointed to by BUFF, and at least
4523   MIN_EXTRA more bytes.  Copies the excess bytes to the new buffer.
4524   Chains the new buffer before the buffer pointed to by BUFF, and
4525   updates the pointer to point to the new buffer.  */
4526void
4527_cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
4528{
4529  _cpp_buff *new_buff, *old_buff = *pbuff;
4530  size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
4531
4532  new_buff = _cpp_get_buff (pfile, size);
4533  memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
4534  new_buff->next = old_buff;
4535  *pbuff = new_buff;
4536}
4537
4538/* Free a chain of buffers starting at BUFF.  */
4539void
4540_cpp_free_buff (_cpp_buff *buff)
4541{
4542  _cpp_buff *next;
4543
4544  for (; buff; buff = next)
4545    {
4546      next = buff->next;
4547#ifdef ENABLE_VALGRIND_ANNOTATIONS
4548      free (buff);
4549#else
4550      free (buff->base);
4551#endif
4552    }
4553}
4554
4555/* Allocate permanent, unaligned storage of length LEN.  */
4556unsigned char *
4557_cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
4558{
4559  _cpp_buff *buff = pfile->u_buff;
4560  unsigned char *result = buff->cur;
4561
4562  if (len > (size_t) (buff->limit - result))
4563    {
4564      buff = _cpp_get_buff (pfile, len);
4565      buff->next = pfile->u_buff;
4566      pfile->u_buff = buff;
4567      result = buff->cur;
4568    }
4569
4570  buff->cur = result + len;
4571  return result;
4572}
4573
4574/* Allocate permanent, unaligned storage of length LEN from a_buff.
4575   That buffer is used for growing allocations when saving macro
4576   replacement lists in a #define, and when parsing an answer to an
4577   assertion in #assert, #unassert or #if (and therefore possibly
4578   whilst expanding macros).  It therefore must not be used by any
4579   code that they might call: specifically the lexer and the guts of
4580   the macro expander.
4581
4582   All existing other uses clearly fit this restriction: storing
4583   registered pragmas during initialization.  */
4584unsigned char *
4585_cpp_aligned_alloc (cpp_reader *pfile, size_t len)
4586{
4587  _cpp_buff *buff = pfile->a_buff;
4588  unsigned char *result = buff->cur;
4589
4590  if (len > (size_t) (buff->limit - result))
4591    {
4592      buff = _cpp_get_buff (pfile, len);
4593      buff->next = pfile->a_buff;
4594      pfile->a_buff = buff;
4595      result = buff->cur;
4596    }
4597
4598  buff->cur = result + len;
4599  return result;
4600}
4601
4602/* Commit or allocate storage from a buffer.  */
4603
4604void *
4605_cpp_commit_buff (cpp_reader *pfile, size_t size)
4606{
4607  void *ptr = BUFF_FRONT (pfile->a_buff);
4608
4609  if (pfile->hash_table->alloc_subobject)
4610    {
4611      void *copy = pfile->hash_table->alloc_subobject (size);
4612      memcpy (copy, ptr, size);
4613      ptr = copy;
4614    }
4615  else
4616    BUFF_FRONT (pfile->a_buff) += size;
4617
4618  return ptr;
4619}
4620
4621/* Say which field of TOK is in use.  */
4622
4623enum cpp_token_fld_kind
4624cpp_token_val_index (const cpp_token *tok)
4625{
4626  switch (TOKEN_SPELL (tok))
4627    {
4628    case SPELL_IDENT:
4629      return CPP_TOKEN_FLD_NODE;
4630    case SPELL_LITERAL:
4631      return CPP_TOKEN_FLD_STR;
4632    case SPELL_OPERATOR:
4633      /* Operands which were originally spelled as ident keep around
4634         the node for the exact spelling.  */
4635      if (tok->flags & NAMED_OP)
4636	return CPP_TOKEN_FLD_NODE;
4637      else if (tok->type == CPP_PASTE)
4638	return CPP_TOKEN_FLD_TOKEN_NO;
4639      else
4640	return CPP_TOKEN_FLD_NONE;
4641    case SPELL_NONE:
4642      if (tok->type == CPP_MACRO_ARG)
4643	return CPP_TOKEN_FLD_ARG_NO;
4644      else if (tok->type == CPP_PADDING)
4645	return CPP_TOKEN_FLD_SOURCE;
4646      else if (tok->type == CPP_PRAGMA)
4647	return CPP_TOKEN_FLD_PRAGMA;
4648      /* fall through */
4649    default:
4650      return CPP_TOKEN_FLD_NONE;
4651    }
4652}
4653
4654/* All tokens lexed in R after calling this function will be forced to
4655   have their location_t to be P, until
4656   cpp_stop_forcing_token_locations is called for R.  */
4657
4658void
4659cpp_force_token_locations (cpp_reader *r, location_t loc)
4660{
4661  r->forced_token_location = loc;
4662}
4663
4664/* Go back to assigning locations naturally for lexed tokens.  */
4665
4666void
4667cpp_stop_forcing_token_locations (cpp_reader *r)
4668{
4669  r->forced_token_location = 0;
4670}
4671
4672/* We're looking at \, if it's escaping EOL, look past it.  If at
4673   LIMIT, don't advance.  */
4674
4675static const unsigned char *
4676do_peek_backslash (const unsigned char *peek, const unsigned char *limit)
4677{
4678  const unsigned char *probe = peek;
4679
4680  if (__builtin_expect (peek[1] == '\n', true))
4681    {
4682    eol:
4683      probe += 2;
4684      if (__builtin_expect (probe < limit, true))
4685	{
4686	  peek = probe;
4687	  if (*peek == '\\')
4688	    /* The user might be perverse.  */
4689	    return do_peek_backslash (peek, limit);
4690	}
4691    }
4692  else if (__builtin_expect (peek[1] == '\r', false))
4693    {
4694      if (probe[2] == '\n')
4695	probe++;
4696      goto eol;
4697    }
4698
4699  return peek;
4700}
4701
4702static const unsigned char *
4703do_peek_next (const unsigned char *peek, const unsigned char *limit)
4704{
4705  if (__builtin_expect (*peek == '\\', false))
4706    peek = do_peek_backslash (peek, limit);
4707  return peek;
4708}
4709
4710static const unsigned char *
4711do_peek_prev (const unsigned char *peek, const unsigned char *bound)
4712{
4713  if (peek == bound)
4714    return NULL;
4715
4716  unsigned char c = *--peek;
4717  if (__builtin_expect (c == '\n', false)
4718      || __builtin_expect (c == 'r', false))
4719    {
4720      if (peek == bound)
4721	return peek;
4722      int ix = -1;
4723      if (c == '\n' && peek[ix] == '\r')
4724	{
4725	  if (peek + ix == bound)
4726	    return peek;
4727	  ix--;
4728	}
4729
4730      if (peek[ix] == '\\')
4731	return do_peek_prev (peek + ix, bound);
4732
4733      return peek;
4734    }
4735  else
4736    return peek;
4737}
4738
4739/* If PEEK[-1] is identifier MATCH, scan past it and trailing white
4740   space.  Otherwise return NULL.  */
4741
4742static const unsigned char *
4743do_peek_ident (const char *match, const unsigned char *peek,
4744	       const unsigned char *limit)
4745{
4746  for (; *++match; peek++)
4747    if (*peek != *match)
4748      {
4749	peek = do_peek_next (peek, limit);
4750	if (*peek != *match)
4751	  return NULL;
4752      }
4753
4754  /* Must now not be looking at an identifier char.  */
4755  peek = do_peek_next (peek, limit);
4756  if (ISIDNUM (*peek))
4757    return NULL;
4758
4759  /* Skip control-line whitespace.  */
4760 ws:
4761  while (*peek == ' ' || *peek == '\t')
4762    peek++;
4763  if (__builtin_expect (*peek == '\\', false))
4764    {
4765      peek = do_peek_backslash (peek, limit);
4766      if (*peek != '\\')
4767	goto ws;
4768    }
4769
4770  return peek;
4771}
4772
4773/* Are we looking at a module control line starting as PEEK - 1?  */
4774
4775static bool
4776do_peek_module (cpp_reader *pfile, unsigned char c,
4777		const unsigned char *peek, const unsigned char *limit)
4778{
4779  bool import = false;
4780
4781  if (__builtin_expect (c == 'e', false))
4782    {
4783      if (!((peek[0] == 'x' || peek[0] == '\\')
4784	    && (peek = do_peek_ident ("export", peek, limit))))
4785	return false;
4786
4787      /* export, peek for import or module.  No need to peek __import
4788	 here.  */
4789      if (peek[0] == 'i')
4790	{
4791	  if (!((peek[1] == 'm' || peek[1] == '\\')
4792		&& (peek = do_peek_ident ("import", peek + 1, limit))))
4793	    return false;
4794	  import = true;
4795	}
4796      else if (peek[0] == 'm')
4797	{
4798	  if (!((peek[1] == 'o' || peek[1] == '\\')
4799		&& (peek = do_peek_ident ("module", peek + 1, limit))))
4800	    return false;
4801	}
4802      else
4803	return false;
4804    }
4805  else if (__builtin_expect (c == 'i', false))
4806    {
4807      if (!((peek[0] == 'm' || peek[0] == '\\')
4808	    && (peek = do_peek_ident ("import", peek, limit))))
4809	return false;
4810      import = true;
4811    }
4812  else if (__builtin_expect (c == '_', false))
4813    {
4814      /* Needed for translated includes.   */
4815      if (!((peek[0] == '_' || peek[0] == '\\')
4816	    && (peek = do_peek_ident ("__import", peek, limit))))
4817	return false;
4818      import = true;
4819    }
4820  else if (__builtin_expect (c == 'm', false))
4821    {
4822      if (!((peek[0] == 'o' || peek[0] == '\\')
4823	    && (peek = do_peek_ident ("module", peek, limit))))
4824	return false;
4825    }
4826  else
4827    return false;
4828
4829  /* Peek the next character to see if it's good enough.  We'll be at
4830     the first non-whitespace char, including skipping an escaped
4831     newline.  */
4832  /* ... import followed by identifier, ':', '<' or header-name
4833     preprocessing tokens, or module followed by identifier, ':' or
4834     ';' preprocessing tokens.  */
4835  unsigned char p = *peek++;
4836
4837  /* A character literal is ... single quotes, ... optionally preceded
4838     by u8, u, U, or L */
4839  /* A string-literal is a ... double quotes, optionally prefixed by
4840     R, u8, u8R, u, uR, U, UR, L, or LR */
4841  if (p == 'u')
4842    {
4843      peek = do_peek_next (peek, limit);
4844      if (*peek == '8')
4845	{
4846	  peek++;
4847	  goto peek_u8;
4848	}
4849      goto peek_u;
4850    }
4851  else if (p == 'U' || p == 'L')
4852    {
4853    peek_u8:
4854      peek = do_peek_next (peek, limit);
4855    peek_u:
4856      if (*peek == '\"' || *peek == '\'')
4857	return false;
4858
4859      if (*peek == 'R')
4860	goto peek_R;
4861      /* Identifier. Ok.  */
4862    }
4863  else if (p == 'R')
4864    {
4865    peek_R:
4866      if (CPP_OPTION (pfile, rliterals))
4867	{
4868	  peek = do_peek_next (peek, limit);
4869	  if (*peek == '\"')
4870	    return false;
4871	}
4872      /* Identifier. Ok.  */
4873    }
4874  else if ('Z' - 'A' == 25
4875	   ? ((p >= 'A' && p <= 'Z') || (p >= 'a' && p <= 'z') || p == '_')
4876	   : ISIDST (p))
4877    {
4878      /* Identifier.  Ok. */
4879    }
4880  else if (p == '<')
4881    {
4882      /* Maybe angle header, ok for import.  Reject
4883	 '<=', '<<' digraph:'<:'.  */
4884      if (!import)
4885	return false;
4886      peek = do_peek_next (peek, limit);
4887      if (*peek == '=' || *peek == '<'
4888	  || (*peek == ':' && CPP_OPTION (pfile, digraphs)))
4889	return false;
4890    }
4891  else if (p == ';')
4892    {
4893      /* SEMICOLON, ok for module.  */
4894      if (import)
4895	return false;
4896    }
4897  else if (p == '"')
4898    {
4899      /* STRING, ok for import.  */
4900      if (!import)
4901	return false;
4902    }
4903  else if (p == ':')
4904    {
4905      /* Maybe COLON, ok.  Reject '::', digraph:':>'.  */
4906      peek = do_peek_next (peek, limit);
4907      if (*peek == ':' || (*peek == '>' && CPP_OPTION (pfile, digraphs)))
4908	return false;
4909    }
4910  else
4911    /* FIXME: Detect a unicode character, excluding those not
4912       permitted as the initial character. [lex.name]/1.  I presume
4913       we need to check the \[uU] spellings, and directly using
4914       Unicode in say UTF8 form?  Or perhaps we do the phase-1
4915       conversion of UTF8 to universal-character-names?  */
4916    return false;
4917
4918  return true;
4919}
4920
4921/* Directives-only scanning.  Somewhat more relaxed than correct
4922   parsing -- some ill-formed programs will not be rejected.  */
4923
4924void
4925cpp_directive_only_process (cpp_reader *pfile,
4926			    void *data,
4927			    void (*cb) (cpp_reader *, CPP_DO_task, void *, ...))
4928{
4929  bool module_p = CPP_OPTION (pfile, module_directives);
4930
4931  do
4932    {
4933    restart:
4934      /* Buffer initialization, but no line cleaning. */
4935      cpp_buffer *buffer = pfile->buffer;
4936      buffer->cur_note = buffer->notes_used = 0;
4937      buffer->cur = buffer->line_base = buffer->next_line;
4938      buffer->need_line = false;
4939      /* Files always end in a newline or carriage return.  We rely on this for
4940	 character peeking safety.  */
4941      gcc_assert (buffer->rlimit[0] == '\n' || buffer->rlimit[0] == '\r');
4942
4943      const unsigned char *base = buffer->cur;
4944      unsigned line_count = 0;
4945      const unsigned char *line_start = base;
4946
4947      bool bol = true;
4948      bool raw = false;
4949
4950      const unsigned char *lwm = base;
4951      for (const unsigned char *pos = base, *limit = buffer->rlimit;
4952	   pos < limit;)
4953	{
4954	  unsigned char c = *pos++;
4955	  /* This matches the switch in _cpp_lex_direct.  */
4956	  switch (c)
4957	    {
4958	    case ' ': case '\t': case '\f': case '\v':
4959	      /* Whitespace, do nothing.  */
4960	      break;
4961
4962	    case '\r': /* MAC line ending, or Windows \r\n  */
4963	      if (*pos == '\n')
4964		pos++;
4965	      /* FALLTHROUGH */
4966
4967	    case '\n':
4968	      bol = true;
4969
4970	    next_line:
4971	      CPP_INCREMENT_LINE (pfile, 0);
4972	      line_count++;
4973	      line_start = pos;
4974	      break;
4975
4976	    case '\\':
4977	      /* <backslash><newline> is removed, and doesn't undo any
4978		 preceeding escape or whatnot.  */
4979	      if (*pos == '\n')
4980		{
4981		  pos++;
4982		  goto next_line;
4983		}
4984	      else if (*pos == '\r')
4985		{
4986		  if (pos[1] == '\n')
4987		    pos++;
4988		  pos++;
4989		  goto next_line;
4990		}
4991	      goto dflt;
4992
4993	    case '#':
4994	      if (bol)
4995		{
4996		  /* Line directive.  */
4997		  if (pos - 1 > base && !pfile->state.skipping)
4998		    cb (pfile, CPP_DO_print, data,
4999			line_count, base, pos - 1 - base);
5000
5001		  /* Prep things for directive handling. */
5002		  buffer->next_line = pos;
5003		  buffer->need_line = true;
5004		  bool ok = _cpp_get_fresh_line (pfile);
5005		  gcc_checking_assert (ok);
5006
5007		  /* Ensure proper column numbering for generated
5008		     error messages. */
5009		  buffer->line_base -= pos - line_start;
5010
5011		  _cpp_handle_directive (pfile, line_start + 1 != pos);
5012
5013		  /* Sanitize the line settings.  Duplicate #include's can
5014		     mess things up. */
5015		  // FIXME: Necessary?
5016		  pfile->line_table->highest_location
5017		    = pfile->line_table->highest_line;
5018
5019		  if (!pfile->state.skipping
5020		      && pfile->buffer->next_line < pfile->buffer->rlimit)
5021		    cb (pfile, CPP_DO_location, data,
5022			pfile->line_table->highest_line);
5023
5024		  goto restart;
5025		}
5026	      goto dflt;
5027
5028	    case '/':
5029	      {
5030		const unsigned char *peek = do_peek_next (pos, limit);
5031		if (!(*peek == '/' || *peek == '*'))
5032		  goto dflt;
5033
5034		/* Line or block comment  */
5035		bool is_block = *peek == '*';
5036		bool star = false;
5037		bool esc = false;
5038		location_t sloc
5039		  = linemap_position_for_column (pfile->line_table,
5040						 pos - line_start);
5041
5042		while (pos < limit)
5043		  {
5044		    char c = *pos++;
5045		    switch (c)
5046		      {
5047		      case '\\':
5048			esc = true;
5049			break;
5050
5051		      case '\r':
5052			if (*pos == '\n')
5053			  pos++;
5054			/* FALLTHROUGH  */
5055
5056		      case '\n':
5057			{
5058			  CPP_INCREMENT_LINE (pfile, 0);
5059			  line_count++;
5060			  line_start = pos;
5061			  if (!esc && !is_block)
5062			    {
5063			      bol = true;
5064			      goto done_comment;
5065			    }
5066			}
5067			if (!esc)
5068			  star = false;
5069			esc = false;
5070			break;
5071
5072		      case '*':
5073			if (pos > peek)
5074			  star = is_block;
5075			esc = false;
5076			break;
5077
5078		      case '/':
5079			if (star)
5080			  goto done_comment;
5081			/* FALLTHROUGH  */
5082
5083		      default:
5084			star = false;
5085			esc = false;
5086			break;
5087		      }
5088		  }
5089		if (pos < limit || is_block)
5090		  cpp_error_with_line (pfile, CPP_DL_ERROR, sloc, 0,
5091				       "unterminated comment");
5092	      done_comment:
5093		lwm = pos;
5094		break;
5095	      }
5096
5097	    case '\'':
5098	      if (!CPP_OPTION (pfile, digit_separators))
5099		goto delimited_string;
5100
5101	      /* Possibly a number punctuator.  */
5102	      if (!ISIDNUM (*do_peek_next (pos, limit)))
5103		goto delimited_string;
5104
5105	      goto quote_peek;
5106
5107	    case '\"':
5108	      if (!CPP_OPTION (pfile, rliterals))
5109		goto delimited_string;
5110
5111	    quote_peek:
5112	      {
5113		/* For ' see if it's a number punctuator
5114		   \.?<digit>(<digit>|<identifier-nondigit>
5115		   |'<digit>|'<nondigit>|[eEpP]<sign>|\.)* */
5116		/* For " see if it's a raw string
5117		   {U,L,u,u8}R.  This includes CPP_NUMBER detection,
5118		   because that could be 0e+R.  */
5119		const unsigned char *peek = pos - 1;
5120		bool quote_first = c == '"';
5121		bool quote_eight = false;
5122		bool maybe_number_start = false;
5123		bool want_number = false;
5124
5125		while ((peek = do_peek_prev (peek, lwm)))
5126		  {
5127		    unsigned char p = *peek;
5128		    if (quote_first)
5129		      {
5130			if (!raw)
5131			  {
5132			    if (p != 'R')
5133			      break;
5134			    raw = true;
5135			    continue;
5136			  }
5137
5138			quote_first = false;
5139			if (p == 'L' || p == 'U' || p == 'u')
5140			  ;
5141			else if (p == '8')
5142			  quote_eight = true;
5143			else
5144			  goto second_raw;
5145		      }
5146		    else if (quote_eight)
5147		      {
5148			if (p != 'u')
5149			  {
5150			    raw = false;
5151			    break;
5152			  }
5153			quote_eight = false;
5154		      }
5155		    else if (c == '"')
5156		      {
5157		      second_raw:;
5158			if (!want_number && ISIDNUM (p))
5159			  {
5160			    raw = false;
5161			    break;
5162			  }
5163		      }
5164
5165		    if (ISDIGIT (p))
5166		      maybe_number_start = true;
5167		    else if (p == '.')
5168		      want_number = true;
5169		    else if (ISIDNUM (p))
5170		      maybe_number_start = false;
5171		    else if (p == '+' || p == '-')
5172		      {
5173			if (const unsigned char *peek_prev
5174			    = do_peek_prev (peek, lwm))
5175			  {
5176			    p = *peek_prev;
5177			    if (p == 'e' || p == 'E'
5178				|| p == 'p' || p == 'P')
5179			      {
5180				want_number = true;
5181				maybe_number_start = false;
5182			      }
5183			    else
5184			      break;
5185			  }
5186			else
5187			  break;
5188		      }
5189		    else if (p == '\'' || p == '\"')
5190		      {
5191			/* If this is lwm, this must be the end of a
5192			   previous string.  So this is a trailing
5193			   literal type, (a) if those are allowed,
5194			     and (b) maybe_start is false.  Otherwise
5195			     this must be a CPP_NUMBER because we've
5196			     met another ', and we'd have checked that
5197			     in its own right.  */
5198			if (peek == lwm && CPP_OPTION (pfile, uliterals))
5199			  {
5200			    if  (!maybe_number_start && !want_number)
5201			      /* Must be a literal type.  */
5202			      raw = false;
5203			  }
5204			else if (p == '\''
5205				 && CPP_OPTION (pfile, digit_separators))
5206			  maybe_number_start = true;
5207			break;
5208		      }
5209		    else if (c == '\'')
5210		      break;
5211		    else if (!quote_first && !quote_eight)
5212		      break;
5213		  }
5214
5215		if (maybe_number_start)
5216		  {
5217		    if (c == '\'')
5218		      /* A CPP NUMBER.  */
5219		      goto dflt;
5220		    raw = false;
5221		  }
5222
5223		goto delimited_string;
5224	      }
5225
5226	    delimited_string:
5227	      {
5228		/* (Possibly raw) string or char literal.  */
5229		unsigned char end = c;
5230		int delim_len = -1;
5231		const unsigned char *delim = NULL;
5232		location_t sloc = linemap_position_for_column (pfile->line_table,
5233							       pos - line_start);
5234		int esc = 0;
5235
5236		if (raw)
5237		  {
5238		    /* There can be no line breaks in the delimiter.  */
5239		    delim = pos;
5240		    for (delim_len = 0; (c = *pos++) != '('; delim_len++)
5241		      {
5242			if (delim_len == 16)
5243			  {
5244			    cpp_error_with_line (pfile, CPP_DL_ERROR,
5245						 sloc, 0,
5246						 "raw string delimiter"
5247						 " longer than %d"
5248						 " characters",
5249						 delim_len);
5250			    raw = false;
5251			    pos = delim;
5252			    break;
5253			  }
5254			if (strchr (") \\\t\v\f\n", c))
5255			  {
5256			    cpp_error_with_line (pfile, CPP_DL_ERROR,
5257						 sloc, 0,
5258						 "invalid character '%c'"
5259						 " in raw string"
5260						 " delimiter", c);
5261			    raw = false;
5262			    pos = delim;
5263			    break;
5264			  }
5265			if (pos >= limit)
5266			  goto bad_string;
5267		      }
5268		  }
5269
5270		while (pos < limit)
5271		  {
5272		    char c = *pos++;
5273		    switch (c)
5274		      {
5275		      case '\\':
5276			if (!raw)
5277			  esc++;
5278			break;
5279
5280		      case '\r':
5281			if (*pos == '\n')
5282			  pos++;
5283			/* FALLTHROUGH  */
5284
5285		      case '\n':
5286			{
5287			  CPP_INCREMENT_LINE (pfile, 0);
5288			  line_count++;
5289			  line_start = pos;
5290			}
5291			if (esc)
5292			  esc--;
5293			break;
5294
5295		      case ')':
5296			if (raw
5297			    && pos + delim_len + 1 < limit
5298			    && pos[delim_len] == end
5299			    && !memcmp (delim, pos, delim_len))
5300			  {
5301			    pos += delim_len + 1;
5302			    raw = false;
5303			    goto done_string;
5304			  }
5305			break;
5306
5307		      default:
5308			if (!raw && !(esc & 1) && c == end)
5309			  goto done_string;
5310			esc = 0;
5311			break;
5312		      }
5313		  }
5314	      bad_string:
5315		cpp_error_with_line (pfile, CPP_DL_ERROR, sloc, 0,
5316				     "unterminated literal");
5317
5318	      done_string:
5319		raw = false;
5320		lwm = pos - 1;
5321	      }
5322	      goto dflt;
5323
5324	    case '_':
5325	    case 'e':
5326	    case 'i':
5327	    case 'm':
5328	      if (bol && module_p && !pfile->state.skipping
5329		  && do_peek_module (pfile, c, pos, limit))
5330		{
5331		  /* We've seen the start of a module control line.
5332		     Start up the tokenizer.  */
5333		  pos--; /* Backup over the first character.  */
5334
5335		  /* Backup over whitespace to start of line.  */
5336		  while (pos > line_start
5337			 && (pos[-1] == ' ' || pos[-1] == '\t'))
5338		    pos--;
5339
5340		  if (pos > base)
5341		    cb (pfile, CPP_DO_print, data, line_count, base, pos - base);
5342
5343		  /* Prep things for directive handling. */
5344		  buffer->next_line = pos;
5345		  buffer->need_line = true;
5346
5347		  /* Now get tokens until the PRAGMA_EOL.  */
5348		  do
5349		    {
5350		      location_t spelling;
5351		      const cpp_token *tok
5352			= cpp_get_token_with_location (pfile, &spelling);
5353
5354		      gcc_assert (pfile->state.in_deferred_pragma
5355				  || tok->type == CPP_PRAGMA_EOL);
5356		      cb (pfile, CPP_DO_token, data, tok, spelling);
5357		    }
5358		  while (pfile->state.in_deferred_pragma);
5359
5360		  if (pfile->buffer->next_line < pfile->buffer->rlimit)
5361		    cb (pfile, CPP_DO_location, data,
5362			pfile->line_table->highest_line);
5363
5364		  pfile->mi_valid = false;
5365		  goto restart;
5366		}
5367	      goto dflt;
5368
5369	    default:
5370	    dflt:
5371	      bol = false;
5372	      pfile->mi_valid = false;
5373	      break;
5374	    }
5375	}
5376
5377      if (buffer->rlimit > base && !pfile->state.skipping)
5378	{
5379	  const unsigned char *limit = buffer->rlimit;
5380	  /* If the file was not newline terminated, add rlimit, which is
5381	     guaranteed to point to a newline, to the end of our range.  */
5382	  if (limit[-1] != '\n')
5383	    {
5384	      limit++;
5385	      CPP_INCREMENT_LINE (pfile, 0);
5386	      line_count++;
5387	    }
5388	  cb (pfile, CPP_DO_print, data, line_count, base, limit - base);
5389	}
5390
5391      _cpp_pop_buffer (pfile);
5392    }
5393  while (pfile->buffer);
5394}
5395