1169695Skan/* CPP Library - lexical analysis. 2169695Skan Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc. 3169695Skan Contributed by Per Bothner, 1994-95. 4169695Skan Based on CCCP program by Paul Rubin, June 1986 5169695Skan Adapted to ANSI C, Richard Stallman, Jan 1987 6169695Skan Broken out to separate file, Zack Weinberg, Mar 2000 7169695Skan 8169695SkanThis program is free software; you can redistribute it and/or modify it 9169695Skanunder the terms of the GNU General Public License as published by the 10169695SkanFree Software Foundation; either version 2, or (at your option) any 11169695Skanlater version. 12169695Skan 13169695SkanThis program is distributed in the hope that it will be useful, 14169695Skanbut WITHOUT ANY WARRANTY; without even the implied warranty of 15169695SkanMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16169695SkanGNU General Public License for more details. 17169695Skan 18169695SkanYou should have received a copy of the GNU General Public License 19169695Skanalong with this program; if not, write to the Free Software 20169695SkanFoundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ 21169695Skan 22169695Skan#include "config.h" 23169695Skan#include "system.h" 24169695Skan#include "cpplib.h" 25169695Skan#include "internal.h" 26169695Skan 27169695Skanenum spell_type 28169695Skan{ 29169695Skan SPELL_OPERATOR = 0, 30169695Skan SPELL_IDENT, 31169695Skan SPELL_LITERAL, 32169695Skan SPELL_NONE 33169695Skan}; 34169695Skan 35169695Skanstruct token_spelling 36169695Skan{ 37169695Skan enum spell_type category; 38169695Skan const unsigned char *name; 39169695Skan}; 40169695Skan 41169695Skanstatic const unsigned char *const digraph_spellings[] = 42169695Skan{ U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" }; 43169695Skan 44169695Skan#define OP(e, s) { SPELL_OPERATOR, U s }, 45169695Skan#define TK(e, s) { SPELL_ ## s, U #e }, 46169695Skanstatic const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE }; 47169695Skan#undef OP 48169695Skan#undef TK 49169695Skan 50169695Skan#define TOKEN_SPELL(token) (token_spellings[(token)->type].category) 51169695Skan#define TOKEN_NAME(token) (token_spellings[(token)->type].name) 52169695Skan 53169695Skanstatic void add_line_note (cpp_buffer *, const uchar *, unsigned int); 54169695Skanstatic int skip_line_comment (cpp_reader *); 55169695Skanstatic void skip_whitespace (cpp_reader *, cppchar_t); 56169695Skanstatic void lex_string (cpp_reader *, cpp_token *, const uchar *); 57169695Skanstatic void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t); 58169695Skanstatic void create_literal (cpp_reader *, cpp_token *, const uchar *, 59169695Skan unsigned int, enum cpp_ttype); 60169695Skanstatic bool warn_in_comment (cpp_reader *, _cpp_line_note *); 61169695Skanstatic int name_p (cpp_reader *, const cpp_string *); 62169695Skanstatic tokenrun *next_tokenrun (tokenrun *); 63169695Skan 64169695Skanstatic _cpp_buff *new_buff (size_t); 65169695Skan 66169695Skan 67169695Skan/* Utility routine: 68169695Skan 69169695Skan Compares, the token TOKEN to the NUL-terminated string STRING. 70169695Skan TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */ 71169695Skanint 72169695Skancpp_ideq (const cpp_token *token, const char *string) 73169695Skan{ 74169695Skan if (token->type != CPP_NAME) 75169695Skan return 0; 76169695Skan 77169695Skan return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string); 78169695Skan} 79169695Skan 80169695Skan/* Record a note TYPE at byte POS into the current cleaned logical 81169695Skan line. */ 82169695Skanstatic void 83169695Skanadd_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type) 84169695Skan{ 85169695Skan if (buffer->notes_used == buffer->notes_cap) 86169695Skan { 87169695Skan buffer->notes_cap = buffer->notes_cap * 2 + 200; 88169695Skan buffer->notes = XRESIZEVEC (_cpp_line_note, buffer->notes, 89169695Skan buffer->notes_cap); 90169695Skan } 91169695Skan 92169695Skan buffer->notes[buffer->notes_used].pos = pos; 93169695Skan buffer->notes[buffer->notes_used].type = type; 94169695Skan buffer->notes_used++; 95169695Skan} 96169695Skan 97169695Skan/* Returns with a logical line that contains no escaped newlines or 98169695Skan trigraphs. This is a time-critical inner loop. */ 99169695Skanvoid 100169695Skan_cpp_clean_line (cpp_reader *pfile) 101169695Skan{ 102169695Skan cpp_buffer *buffer; 103169695Skan const uchar *s; 104169695Skan uchar c, *d, *p; 105169695Skan 106169695Skan buffer = pfile->buffer; 107169695Skan buffer->cur_note = buffer->notes_used = 0; 108169695Skan buffer->cur = buffer->line_base = buffer->next_line; 109169695Skan buffer->need_line = false; 110169695Skan s = buffer->next_line - 1; 111169695Skan 112169695Skan if (!buffer->from_stage3) 113169695Skan { 114259268Spfg const uchar *pbackslash = NULL; 115259268Spfg 116169695Skan /* Short circuit for the common case of an un-escaped line with 117169695Skan no trigraphs. The primary win here is by not writing any 118169695Skan data back to memory until we have to. */ 119169695Skan for (;;) 120169695Skan { 121169695Skan c = *++s; 122259268Spfg if (__builtin_expect (c == '\n', false) 123259268Spfg || __builtin_expect (c == '\r', false)) 124169695Skan { 125169695Skan d = (uchar *) s; 126169695Skan 127259268Spfg if (__builtin_expect (s == buffer->rlimit, false)) 128169695Skan goto done; 129169695Skan 130169695Skan /* DOS line ending? */ 131259268Spfg if (__builtin_expect (c == '\r', false) 132259268Spfg && s[1] == '\n') 133259268Spfg { 134259268Spfg s++; 135259268Spfg if (s == buffer->rlimit) 136259268Spfg goto done; 137259268Spfg } 138169695Skan 139259268Spfg if (__builtin_expect (pbackslash == NULL, true)) 140169695Skan goto done; 141169695Skan 142259268Spfg /* Check for escaped newline. */ 143169695Skan p = d; 144259268Spfg while (is_nvspace (p[-1])) 145169695Skan p--; 146259268Spfg if (p - 1 != pbackslash) 147169695Skan goto done; 148169695Skan 149169695Skan /* Have an escaped newline; process it and proceed to 150169695Skan the slow path. */ 151169695Skan add_line_note (buffer, p - 1, p != d ? ' ' : '\\'); 152169695Skan d = p - 2; 153169695Skan buffer->next_line = p - 1; 154169695Skan break; 155169695Skan } 156259268Spfg if (__builtin_expect (c == '\\', false)) 157259268Spfg pbackslash = s; 158259268Spfg else if (__builtin_expect (c == '?', false) 159259268Spfg && __builtin_expect (s[1] == '?', false) 160259268Spfg && _cpp_trigraph_map[s[2]]) 161169695Skan { 162169695Skan /* Have a trigraph. We may or may not have to convert 163169695Skan it. Add a line note regardless, for -Wtrigraphs. */ 164169695Skan add_line_note (buffer, s, s[2]); 165169695Skan if (CPP_OPTION (pfile, trigraphs)) 166169695Skan { 167169695Skan /* We do, and that means we have to switch to the 168169695Skan slow path. */ 169169695Skan d = (uchar *) s; 170169695Skan *d = _cpp_trigraph_map[s[2]]; 171169695Skan s += 2; 172169695Skan break; 173169695Skan } 174169695Skan } 175169695Skan } 176169695Skan 177169695Skan 178169695Skan for (;;) 179169695Skan { 180169695Skan c = *++s; 181169695Skan *++d = c; 182169695Skan 183169695Skan if (c == '\n' || c == '\r') 184169695Skan { 185169695Skan /* Handle DOS line endings. */ 186169695Skan if (c == '\r' && s != buffer->rlimit && s[1] == '\n') 187169695Skan s++; 188169695Skan if (s == buffer->rlimit) 189169695Skan break; 190169695Skan 191169695Skan /* Escaped? */ 192169695Skan p = d; 193169695Skan while (p != buffer->next_line && is_nvspace (p[-1])) 194169695Skan p--; 195169695Skan if (p == buffer->next_line || p[-1] != '\\') 196169695Skan break; 197169695Skan 198169695Skan add_line_note (buffer, p - 1, p != d ? ' ': '\\'); 199169695Skan d = p - 2; 200169695Skan buffer->next_line = p - 1; 201169695Skan } 202169695Skan else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]]) 203169695Skan { 204169695Skan /* Add a note regardless, for the benefit of -Wtrigraphs. */ 205169695Skan add_line_note (buffer, d, s[2]); 206169695Skan if (CPP_OPTION (pfile, trigraphs)) 207169695Skan { 208169695Skan *d = _cpp_trigraph_map[s[2]]; 209169695Skan s += 2; 210169695Skan } 211169695Skan } 212169695Skan } 213169695Skan } 214169695Skan else 215169695Skan { 216169695Skan do 217169695Skan s++; 218169695Skan while (*s != '\n' && *s != '\r'); 219169695Skan d = (uchar *) s; 220169695Skan 221169695Skan /* Handle DOS line endings. */ 222169695Skan if (*s == '\r' && s != buffer->rlimit && s[1] == '\n') 223169695Skan s++; 224169695Skan } 225169695Skan 226169695Skan done: 227169695Skan *d = '\n'; 228169695Skan /* A sentinel note that should never be processed. */ 229169695Skan add_line_note (buffer, d + 1, '\n'); 230169695Skan buffer->next_line = s + 1; 231169695Skan} 232169695Skan 233169695Skan/* Return true if the trigraph indicated by NOTE should be warned 234169695Skan about in a comment. */ 235169695Skanstatic bool 236169695Skanwarn_in_comment (cpp_reader *pfile, _cpp_line_note *note) 237169695Skan{ 238169695Skan const uchar *p; 239169695Skan 240169695Skan /* Within comments we don't warn about trigraphs, unless the 241169695Skan trigraph forms an escaped newline, as that may change 242169695Skan behavior. */ 243169695Skan if (note->type != '/') 244169695Skan return false; 245169695Skan 246169695Skan /* If -trigraphs, then this was an escaped newline iff the next note 247169695Skan is coincident. */ 248169695Skan if (CPP_OPTION (pfile, trigraphs)) 249169695Skan return note[1].pos == note->pos; 250169695Skan 251169695Skan /* Otherwise, see if this forms an escaped newline. */ 252169695Skan p = note->pos + 3; 253169695Skan while (is_nvspace (*p)) 254169695Skan p++; 255169695Skan 256169695Skan /* There might have been escaped newlines between the trigraph and the 257169695Skan newline we found. Hence the position test. */ 258169695Skan return (*p == '\n' && p < note[1].pos); 259169695Skan} 260169695Skan 261169695Skan/* Process the notes created by add_line_note as far as the current 262169695Skan location. */ 263169695Skanvoid 264169695Skan_cpp_process_line_notes (cpp_reader *pfile, int in_comment) 265169695Skan{ 266169695Skan cpp_buffer *buffer = pfile->buffer; 267169695Skan 268169695Skan for (;;) 269169695Skan { 270169695Skan _cpp_line_note *note = &buffer->notes[buffer->cur_note]; 271169695Skan unsigned int col; 272169695Skan 273169695Skan if (note->pos > buffer->cur) 274169695Skan break; 275169695Skan 276169695Skan buffer->cur_note++; 277169695Skan col = CPP_BUF_COLUMN (buffer, note->pos + 1); 278169695Skan 279169695Skan if (note->type == '\\' || note->type == ' ') 280169695Skan { 281169695Skan if (note->type == ' ' && !in_comment) 282169695Skan cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col, 283169695Skan "backslash and newline separated by space"); 284169695Skan 285169695Skan if (buffer->next_line > buffer->rlimit) 286169695Skan { 287169695Skan cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col, 288169695Skan "backslash-newline at end of file"); 289169695Skan /* Prevent "no newline at end of file" warning. */ 290169695Skan buffer->next_line = buffer->rlimit; 291169695Skan } 292169695Skan 293169695Skan buffer->line_base = note->pos; 294169695Skan CPP_INCREMENT_LINE (pfile, 0); 295169695Skan } 296169695Skan else if (_cpp_trigraph_map[note->type]) 297169695Skan { 298169695Skan if (CPP_OPTION (pfile, warn_trigraphs) 299169695Skan && (!in_comment || warn_in_comment (pfile, note))) 300169695Skan { 301169695Skan if (CPP_OPTION (pfile, trigraphs)) 302169695Skan cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col, 303169695Skan "trigraph ??%c converted to %c", 304169695Skan note->type, 305169695Skan (int) _cpp_trigraph_map[note->type]); 306169695Skan else 307169695Skan { 308169695Skan cpp_error_with_line 309169695Skan (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col, 310169695Skan "trigraph ??%c ignored, use -trigraphs to enable", 311169695Skan note->type); 312169695Skan } 313169695Skan } 314169695Skan } 315169695Skan else 316169695Skan abort (); 317169695Skan } 318169695Skan} 319169695Skan 320169695Skan/* Skip a C-style block comment. We find the end of the comment by 321169695Skan seeing if an asterisk is before every '/' we encounter. Returns 322169695Skan nonzero if comment terminated by EOF, zero otherwise. 323169695Skan 324169695Skan Buffer->cur points to the initial asterisk of the comment. */ 325169695Skanbool 326169695Skan_cpp_skip_block_comment (cpp_reader *pfile) 327169695Skan{ 328169695Skan cpp_buffer *buffer = pfile->buffer; 329169695Skan const uchar *cur = buffer->cur; 330169695Skan uchar c; 331169695Skan 332169695Skan cur++; 333169695Skan if (*cur == '/') 334169695Skan cur++; 335169695Skan 336169695Skan for (;;) 337169695Skan { 338169695Skan /* People like decorating comments with '*', so check for '/' 339169695Skan instead for efficiency. */ 340169695Skan c = *cur++; 341169695Skan 342169695Skan if (c == '/') 343169695Skan { 344169695Skan if (cur[-2] == '*') 345169695Skan break; 346169695Skan 347169695Skan /* Warn about potential nested comments, but not if the '/' 348169695Skan comes immediately before the true comment delimiter. 349169695Skan Don't bother to get it right across escaped newlines. */ 350169695Skan if (CPP_OPTION (pfile, warn_comments) 351169695Skan && cur[0] == '*' && cur[1] != '/') 352169695Skan { 353169695Skan buffer->cur = cur; 354169695Skan cpp_error_with_line (pfile, CPP_DL_WARNING, 355169695Skan pfile->line_table->highest_line, CPP_BUF_COL (buffer), 356169695Skan "\"/*\" within comment"); 357169695Skan } 358169695Skan } 359169695Skan else if (c == '\n') 360169695Skan { 361169695Skan unsigned int cols; 362169695Skan buffer->cur = cur - 1; 363169695Skan _cpp_process_line_notes (pfile, true); 364169695Skan if (buffer->next_line >= buffer->rlimit) 365169695Skan return true; 366169695Skan _cpp_clean_line (pfile); 367169695Skan 368169695Skan cols = buffer->next_line - buffer->line_base; 369169695Skan CPP_INCREMENT_LINE (pfile, cols); 370169695Skan 371169695Skan cur = buffer->cur; 372169695Skan } 373169695Skan } 374169695Skan 375169695Skan buffer->cur = cur; 376169695Skan _cpp_process_line_notes (pfile, true); 377169695Skan return false; 378169695Skan} 379169695Skan 380169695Skan/* Skip a C++ line comment, leaving buffer->cur pointing to the 381169695Skan terminating newline. Handles escaped newlines. Returns nonzero 382169695Skan if a multiline comment. */ 383169695Skanstatic int 384169695Skanskip_line_comment (cpp_reader *pfile) 385169695Skan{ 386169695Skan cpp_buffer *buffer = pfile->buffer; 387169695Skan unsigned int orig_line = pfile->line_table->highest_line; 388169695Skan 389169695Skan while (*buffer->cur != '\n') 390169695Skan buffer->cur++; 391169695Skan 392169695Skan _cpp_process_line_notes (pfile, true); 393169695Skan return orig_line != pfile->line_table->highest_line; 394169695Skan} 395169695Skan 396169695Skan/* Skips whitespace, saving the next non-whitespace character. */ 397169695Skanstatic void 398169695Skanskip_whitespace (cpp_reader *pfile, cppchar_t c) 399169695Skan{ 400169695Skan cpp_buffer *buffer = pfile->buffer; 401169695Skan bool saw_NUL = false; 402169695Skan 403169695Skan do 404169695Skan { 405169695Skan /* Horizontal space always OK. */ 406169695Skan if (c == ' ' || c == '\t') 407169695Skan ; 408169695Skan /* Just \f \v or \0 left. */ 409169695Skan else if (c == '\0') 410169695Skan saw_NUL = true; 411169695Skan else if (pfile->state.in_directive && CPP_PEDANTIC (pfile)) 412169695Skan cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, 413169695Skan CPP_BUF_COL (buffer), 414169695Skan "%s in preprocessing directive", 415169695Skan c == '\f' ? "form feed" : "vertical tab"); 416169695Skan 417169695Skan c = *buffer->cur++; 418169695Skan } 419169695Skan /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */ 420169695Skan while (is_nvspace (c)); 421169695Skan 422169695Skan if (saw_NUL) 423169695Skan cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored"); 424169695Skan 425169695Skan buffer->cur--; 426169695Skan} 427169695Skan 428169695Skan/* See if the characters of a number token are valid in a name (no 429169695Skan '.', '+' or '-'). */ 430169695Skanstatic int 431169695Skanname_p (cpp_reader *pfile, const cpp_string *string) 432169695Skan{ 433169695Skan unsigned int i; 434169695Skan 435169695Skan for (i = 0; i < string->len; i++) 436169695Skan if (!is_idchar (string->text[i])) 437169695Skan return 0; 438169695Skan 439169695Skan return 1; 440169695Skan} 441169695Skan 442169695Skan/* After parsing an identifier or other sequence, produce a warning about 443169695Skan sequences not in NFC/NFKC. */ 444169695Skanstatic void 445169695Skanwarn_about_normalization (cpp_reader *pfile, 446169695Skan const cpp_token *token, 447169695Skan const struct normalize_state *s) 448169695Skan{ 449169695Skan if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s) 450169695Skan && !pfile->state.skipping) 451169695Skan { 452169695Skan /* Make sure that the token is printed using UCNs, even 453169695Skan if we'd otherwise happily print UTF-8. */ 454169695Skan unsigned char *buf = XNEWVEC (unsigned char, cpp_token_len (token)); 455169695Skan size_t sz; 456169695Skan 457169695Skan sz = cpp_spell_token (pfile, token, buf, false) - buf; 458169695Skan if (NORMALIZE_STATE_RESULT (s) == normalized_C) 459169695Skan cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0, 460169695Skan "`%.*s' is not in NFKC", (int) sz, buf); 461169695Skan else 462169695Skan cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0, 463169695Skan "`%.*s' is not in NFC", (int) sz, buf); 464169695Skan } 465169695Skan} 466169695Skan 467169695Skan/* Returns TRUE if the sequence starting at buffer->cur is invalid in 468169695Skan an identifier. FIRST is TRUE if this starts an identifier. */ 469169695Skanstatic bool 470169695Skanforms_identifier_p (cpp_reader *pfile, int first, 471169695Skan struct normalize_state *state) 472169695Skan{ 473169695Skan cpp_buffer *buffer = pfile->buffer; 474169695Skan 475169695Skan if (*buffer->cur == '$') 476169695Skan { 477169695Skan if (!CPP_OPTION (pfile, dollars_in_ident)) 478169695Skan return false; 479169695Skan 480169695Skan buffer->cur++; 481169695Skan if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping) 482169695Skan { 483169695Skan CPP_OPTION (pfile, warn_dollars) = 0; 484169695Skan cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number"); 485169695Skan } 486169695Skan 487169695Skan return true; 488169695Skan } 489169695Skan 490169695Skan /* Is this a syntactically valid UCN? */ 491169695Skan if (CPP_OPTION (pfile, extended_identifiers) 492169695Skan && *buffer->cur == '\\' 493169695Skan && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U')) 494169695Skan { 495169695Skan buffer->cur += 2; 496169695Skan if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first, 497169695Skan state)) 498169695Skan return true; 499169695Skan buffer->cur -= 2; 500169695Skan } 501169695Skan 502169695Skan return false; 503169695Skan} 504169695Skan 505169695Skan/* Lex an identifier starting at BUFFER->CUR - 1. */ 506169695Skanstatic cpp_hashnode * 507169695Skanlex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn, 508169695Skan struct normalize_state *nst) 509169695Skan{ 510169695Skan cpp_hashnode *result; 511169695Skan const uchar *cur; 512169695Skan unsigned int len; 513169695Skan unsigned int hash = HT_HASHSTEP (0, *base); 514169695Skan 515169695Skan cur = pfile->buffer->cur; 516169695Skan if (! starts_ucn) 517169695Skan while (ISIDNUM (*cur)) 518169695Skan { 519169695Skan hash = HT_HASHSTEP (hash, *cur); 520169695Skan cur++; 521169695Skan } 522169695Skan pfile->buffer->cur = cur; 523169695Skan if (starts_ucn || forms_identifier_p (pfile, false, nst)) 524169695Skan { 525169695Skan /* Slower version for identifiers containing UCNs (or $). */ 526169695Skan do { 527169695Skan while (ISIDNUM (*pfile->buffer->cur)) 528169695Skan { 529169695Skan pfile->buffer->cur++; 530169695Skan NORMALIZE_STATE_UPDATE_IDNUM (nst); 531169695Skan } 532169695Skan } while (forms_identifier_p (pfile, false, nst)); 533169695Skan result = _cpp_interpret_identifier (pfile, base, 534169695Skan pfile->buffer->cur - base); 535169695Skan } 536169695Skan else 537169695Skan { 538169695Skan len = cur - base; 539169695Skan hash = HT_HASHFINISH (hash, len); 540169695Skan 541169695Skan result = (cpp_hashnode *) 542169695Skan ht_lookup_with_hash (pfile->hash_table, base, len, hash, HT_ALLOC); 543169695Skan } 544169695Skan 545169695Skan /* Rarely, identifiers require diagnostics when lexed. */ 546169695Skan if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC) 547169695Skan && !pfile->state.skipping, 0)) 548169695Skan { 549169695Skan /* It is allowed to poison the same identifier twice. */ 550169695Skan if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok) 551169695Skan cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"", 552169695Skan NODE_NAME (result)); 553169695Skan 554169695Skan /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the 555169695Skan replacement list of a variadic macro. */ 556169695Skan if (result == pfile->spec_nodes.n__VA_ARGS__ 557169695Skan && !pfile->state.va_args_ok) 558169695Skan cpp_error (pfile, CPP_DL_PEDWARN, 559169695Skan "__VA_ARGS__ can only appear in the expansion" 560169695Skan " of a C99 variadic macro"); 561169695Skan } 562169695Skan 563169695Skan return result; 564169695Skan} 565169695Skan 566169695Skan/* Lex a number to NUMBER starting at BUFFER->CUR - 1. */ 567169695Skanstatic void 568169695Skanlex_number (cpp_reader *pfile, cpp_string *number, 569169695Skan struct normalize_state *nst) 570169695Skan{ 571169695Skan const uchar *cur; 572169695Skan const uchar *base; 573169695Skan uchar *dest; 574169695Skan 575169695Skan base = pfile->buffer->cur - 1; 576169695Skan do 577169695Skan { 578169695Skan cur = pfile->buffer->cur; 579169695Skan 580169695Skan /* N.B. ISIDNUM does not include $. */ 581169695Skan while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1])) 582169695Skan { 583169695Skan cur++; 584169695Skan NORMALIZE_STATE_UPDATE_IDNUM (nst); 585169695Skan } 586169695Skan 587169695Skan pfile->buffer->cur = cur; 588169695Skan } 589169695Skan while (forms_identifier_p (pfile, false, nst)); 590169695Skan 591169695Skan number->len = cur - base; 592169695Skan dest = _cpp_unaligned_alloc (pfile, number->len + 1); 593169695Skan memcpy (dest, base, number->len); 594169695Skan dest[number->len] = '\0'; 595169695Skan number->text = dest; 596169695Skan} 597169695Skan 598169695Skan/* Create a token of type TYPE with a literal spelling. */ 599169695Skanstatic void 600169695Skancreate_literal (cpp_reader *pfile, cpp_token *token, const uchar *base, 601169695Skan unsigned int len, enum cpp_ttype type) 602169695Skan{ 603169695Skan uchar *dest = _cpp_unaligned_alloc (pfile, len + 1); 604169695Skan 605169695Skan memcpy (dest, base, len); 606169695Skan dest[len] = '\0'; 607169695Skan token->type = type; 608169695Skan token->val.str.len = len; 609169695Skan token->val.str.text = dest; 610169695Skan} 611169695Skan 612169695Skan/* Lexes a string, character constant, or angle-bracketed header file 613169695Skan name. The stored string contains the spelling, including opening 614169695Skan quote and leading any leading 'L'. It returns the type of the 615169695Skan literal, or CPP_OTHER if it was not properly terminated. 616169695Skan 617169695Skan The spelling is NUL-terminated, but it is not guaranteed that this 618169695Skan is the first NUL since embedded NULs are preserved. */ 619169695Skanstatic void 620169695Skanlex_string (cpp_reader *pfile, cpp_token *token, const uchar *base) 621169695Skan{ 622169695Skan bool saw_NUL = false; 623169695Skan const uchar *cur; 624169695Skan cppchar_t terminator; 625169695Skan enum cpp_ttype type; 626169695Skan 627169695Skan cur = base; 628169695Skan terminator = *cur++; 629169695Skan if (terminator == 'L') 630169695Skan terminator = *cur++; 631169695Skan if (terminator == '\"') 632169695Skan type = *base == 'L' ? CPP_WSTRING: CPP_STRING; 633169695Skan else if (terminator == '\'') 634169695Skan type = *base == 'L' ? CPP_WCHAR: CPP_CHAR; 635169695Skan else 636169695Skan terminator = '>', type = CPP_HEADER_NAME; 637169695Skan 638169695Skan for (;;) 639169695Skan { 640169695Skan cppchar_t c = *cur++; 641169695Skan 642169695Skan /* In #include-style directives, terminators are not escapable. */ 643169695Skan if (c == '\\' && !pfile->state.angled_headers && *cur != '\n') 644169695Skan cur++; 645169695Skan else if (c == terminator) 646169695Skan break; 647169695Skan else if (c == '\n') 648169695Skan { 649169695Skan cur--; 650169695Skan type = CPP_OTHER; 651169695Skan break; 652169695Skan } 653169695Skan else if (c == '\0') 654169695Skan saw_NUL = true; 655169695Skan } 656169695Skan 657169695Skan if (saw_NUL && !pfile->state.skipping) 658169695Skan cpp_error (pfile, CPP_DL_WARNING, 659169695Skan "null character(s) preserved in literal"); 660169695Skan 661260573Spfg /* APPLE LOCAL begin #error with unmatched quotes 5607574 */ 662260573Spfg if (type == CPP_OTHER 663260573Spfg && CPP_OPTION (pfile, lang) != CLK_ASM 664260573Spfg && !pfile->state.in_diagnostic 665260573Spfg && !pfile->state.skipping) 666260573Spfg /* APPLE LOCAL end #error with unmatched quotes 5607574 */ 667169695Skan cpp_error (pfile, CPP_DL_PEDWARN, "missing terminating %c character", 668169695Skan (int) terminator); 669169695Skan 670169695Skan pfile->buffer->cur = cur; 671169695Skan create_literal (pfile, token, base, cur - base, type); 672169695Skan} 673169695Skan 674169695Skan/* The stored comment includes the comment start and any terminator. */ 675169695Skanstatic void 676169695Skansave_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from, 677169695Skan cppchar_t type) 678169695Skan{ 679169695Skan unsigned char *buffer; 680169695Skan unsigned int len, clen; 681169695Skan 682169695Skan len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */ 683169695Skan 684169695Skan /* C++ comments probably (not definitely) have moved past a new 685169695Skan line, which we don't want to save in the comment. */ 686169695Skan if (is_vspace (pfile->buffer->cur[-1])) 687169695Skan len--; 688169695Skan 689169695Skan /* If we are currently in a directive, then we need to store all 690169695Skan C++ comments as C comments internally, and so we need to 691169695Skan allocate a little extra space in that case. 692169695Skan 693169695Skan Note that the only time we encounter a directive here is 694169695Skan when we are saving comments in a "#define". */ 695169695Skan clen = (pfile->state.in_directive && type == '/') ? len + 2 : len; 696169695Skan 697169695Skan buffer = _cpp_unaligned_alloc (pfile, clen); 698169695Skan 699169695Skan token->type = CPP_COMMENT; 700169695Skan token->val.str.len = clen; 701169695Skan token->val.str.text = buffer; 702169695Skan 703169695Skan buffer[0] = '/'; 704169695Skan memcpy (buffer + 1, from, len - 1); 705169695Skan 706169695Skan /* Finish conversion to a C comment, if necessary. */ 707169695Skan if (pfile->state.in_directive && type == '/') 708169695Skan { 709169695Skan buffer[1] = '*'; 710169695Skan buffer[clen - 2] = '*'; 711169695Skan buffer[clen - 1] = '/'; 712169695Skan } 713169695Skan} 714169695Skan 715169695Skan/* Allocate COUNT tokens for RUN. */ 716169695Skanvoid 717169695Skan_cpp_init_tokenrun (tokenrun *run, unsigned int count) 718169695Skan{ 719169695Skan run->base = XNEWVEC (cpp_token, count); 720169695Skan run->limit = run->base + count; 721169695Skan run->next = NULL; 722169695Skan} 723169695Skan 724169695Skan/* Returns the next tokenrun, or creates one if there is none. */ 725169695Skanstatic tokenrun * 726169695Skannext_tokenrun (tokenrun *run) 727169695Skan{ 728169695Skan if (run->next == NULL) 729169695Skan { 730169695Skan run->next = XNEW (tokenrun); 731169695Skan run->next->prev = run; 732169695Skan _cpp_init_tokenrun (run->next, 250); 733169695Skan } 734169695Skan 735169695Skan return run->next; 736169695Skan} 737169695Skan 738169695Skan/* Allocate a single token that is invalidated at the same time as the 739169695Skan rest of the tokens on the line. Has its line and col set to the 740169695Skan same as the last lexed token, so that diagnostics appear in the 741169695Skan right place. */ 742169695Skancpp_token * 743169695Skan_cpp_temp_token (cpp_reader *pfile) 744169695Skan{ 745169695Skan cpp_token *old, *result; 746169695Skan 747169695Skan old = pfile->cur_token - 1; 748169695Skan if (pfile->cur_token == pfile->cur_run->limit) 749169695Skan { 750169695Skan pfile->cur_run = next_tokenrun (pfile->cur_run); 751169695Skan pfile->cur_token = pfile->cur_run->base; 752169695Skan } 753169695Skan 754169695Skan result = pfile->cur_token++; 755169695Skan result->src_loc = old->src_loc; 756169695Skan return result; 757169695Skan} 758169695Skan 759169695Skan/* Lex a token into RESULT (external interface). Takes care of issues 760169695Skan like directive handling, token lookahead, multiple include 761169695Skan optimization and skipping. */ 762169695Skanconst cpp_token * 763169695Skan_cpp_lex_token (cpp_reader *pfile) 764169695Skan{ 765169695Skan cpp_token *result; 766169695Skan 767169695Skan for (;;) 768169695Skan { 769169695Skan if (pfile->cur_token == pfile->cur_run->limit) 770169695Skan { 771169695Skan pfile->cur_run = next_tokenrun (pfile->cur_run); 772169695Skan pfile->cur_token = pfile->cur_run->base; 773169695Skan } 774259890Spfg /* We assume that the current token is somewhere in the current 775259890Spfg run. */ 776259890Spfg if (pfile->cur_token < pfile->cur_run->base 777259890Spfg || pfile->cur_token >= pfile->cur_run->limit) 778259890Spfg abort (); 779169695Skan 780169695Skan if (pfile->lookaheads) 781169695Skan { 782169695Skan pfile->lookaheads--; 783169695Skan result = pfile->cur_token++; 784169695Skan } 785169695Skan else 786169695Skan result = _cpp_lex_direct (pfile); 787169695Skan 788169695Skan if (result->flags & BOL) 789169695Skan { 790169695Skan /* Is this a directive. If _cpp_handle_directive returns 791169695Skan false, it is an assembler #. */ 792169695Skan if (result->type == CPP_HASH 793169695Skan /* 6.10.3 p 11: Directives in a list of macro arguments 794169695Skan gives undefined behavior. This implementation 795169695Skan handles the directive as normal. */ 796169695Skan && pfile->state.parsing_args != 1) 797169695Skan { 798169695Skan if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE)) 799169695Skan { 800169695Skan if (pfile->directive_result.type == CPP_PADDING) 801169695Skan continue; 802169695Skan result = &pfile->directive_result; 803169695Skan } 804169695Skan } 805169695Skan else if (pfile->state.in_deferred_pragma) 806169695Skan result = &pfile->directive_result; 807169695Skan 808169695Skan if (pfile->cb.line_change && !pfile->state.skipping) 809169695Skan pfile->cb.line_change (pfile, result, pfile->state.parsing_args); 810169695Skan } 811169695Skan 812169695Skan /* We don't skip tokens in directives. */ 813169695Skan if (pfile->state.in_directive || pfile->state.in_deferred_pragma) 814169695Skan break; 815169695Skan 816169695Skan /* Outside a directive, invalidate controlling macros. At file 817169695Skan EOF, _cpp_lex_direct takes care of popping the buffer, so we never 818169695Skan get here and MI optimization works. */ 819169695Skan pfile->mi_valid = false; 820169695Skan 821169695Skan if (!pfile->state.skipping || result->type == CPP_EOF) 822169695Skan break; 823169695Skan } 824169695Skan 825169695Skan return result; 826169695Skan} 827169695Skan 828169695Skan/* Returns true if a fresh line has been loaded. */ 829169695Skanbool 830169695Skan_cpp_get_fresh_line (cpp_reader *pfile) 831169695Skan{ 832169695Skan int return_at_eof; 833169695Skan 834169695Skan /* We can't get a new line until we leave the current directive. */ 835169695Skan if (pfile->state.in_directive) 836169695Skan return false; 837169695Skan 838169695Skan for (;;) 839169695Skan { 840169695Skan cpp_buffer *buffer = pfile->buffer; 841169695Skan 842169695Skan if (!buffer->need_line) 843169695Skan return true; 844169695Skan 845169695Skan if (buffer->next_line < buffer->rlimit) 846169695Skan { 847169695Skan _cpp_clean_line (pfile); 848169695Skan return true; 849169695Skan } 850169695Skan 851169695Skan /* First, get out of parsing arguments state. */ 852169695Skan if (pfile->state.parsing_args) 853169695Skan return false; 854169695Skan 855169695Skan /* End of buffer. Non-empty files should end in a newline. */ 856169695Skan if (buffer->buf != buffer->rlimit 857169695Skan && buffer->next_line > buffer->rlimit 858169695Skan && !buffer->from_stage3) 859169695Skan { 860259890Spfg /* Clip to buffer size. */ 861169695Skan buffer->next_line = buffer->rlimit; 862259890Spfg /* APPLE LOCAL begin suppress no newline warning. */ 863259890Spfg if ( CPP_OPTION (pfile, warn_newline_at_eof)) 864259890Spfg { 865259890Spfg cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, 866259890Spfg CPP_BUF_COLUMN (buffer, buffer->cur), 867259890Spfg "no newline at end of file"); 868259890Spfg } 869259890Spfg /* APPLE LOCAL end suppress no newline warning. */ 870169695Skan } 871169695Skan 872169695Skan return_at_eof = buffer->return_at_eof; 873169695Skan _cpp_pop_buffer (pfile); 874169695Skan if (pfile->buffer == NULL || return_at_eof) 875169695Skan return false; 876169695Skan } 877169695Skan} 878169695Skan 879169695Skan#define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \ 880169695Skan do \ 881169695Skan { \ 882169695Skan result->type = ELSE_TYPE; \ 883169695Skan if (*buffer->cur == CHAR) \ 884169695Skan buffer->cur++, result->type = THEN_TYPE; \ 885169695Skan } \ 886169695Skan while (0) 887169695Skan 888169695Skan/* Lex a token into pfile->cur_token, which is also incremented, to 889169695Skan get diagnostics pointing to the correct location. 890169695Skan 891169695Skan Does not handle issues such as token lookahead, multiple-include 892169695Skan optimization, directives, skipping etc. This function is only 893169695Skan suitable for use by _cpp_lex_token, and in special cases like 894169695Skan lex_expansion_token which doesn't care for any of these issues. 895169695Skan 896169695Skan When meeting a newline, returns CPP_EOF if parsing a directive, 897169695Skan otherwise returns to the start of the token buffer if permissible. 898169695Skan Returns the location of the lexed token. */ 899169695Skancpp_token * 900169695Skan_cpp_lex_direct (cpp_reader *pfile) 901169695Skan{ 902169695Skan cppchar_t c; 903169695Skan cpp_buffer *buffer; 904169695Skan const unsigned char *comment_start; 905169695Skan cpp_token *result = pfile->cur_token++; 906169695Skan 907169695Skan fresh_line: 908169695Skan result->flags = 0; 909169695Skan buffer = pfile->buffer; 910169695Skan if (buffer->need_line) 911169695Skan { 912169695Skan if (pfile->state.in_deferred_pragma) 913169695Skan { 914169695Skan result->type = CPP_PRAGMA_EOL; 915169695Skan pfile->state.in_deferred_pragma = false; 916169695Skan if (!pfile->state.pragma_allow_expansion) 917169695Skan pfile->state.prevent_expansion--; 918169695Skan return result; 919169695Skan } 920169695Skan if (!_cpp_get_fresh_line (pfile)) 921169695Skan { 922169695Skan result->type = CPP_EOF; 923169695Skan if (!pfile->state.in_directive) 924169695Skan { 925169695Skan /* Tell the compiler the line number of the EOF token. */ 926169695Skan result->src_loc = pfile->line_table->highest_line; 927169695Skan result->flags = BOL; 928169695Skan } 929169695Skan return result; 930169695Skan } 931169695Skan if (!pfile->keep_tokens) 932169695Skan { 933169695Skan pfile->cur_run = &pfile->base_run; 934169695Skan result = pfile->base_run.base; 935169695Skan pfile->cur_token = result + 1; 936169695Skan } 937169695Skan result->flags = BOL; 938169695Skan if (pfile->state.parsing_args == 2) 939169695Skan result->flags |= PREV_WHITE; 940169695Skan } 941169695Skan buffer = pfile->buffer; 942169695Skan update_tokens_line: 943169695Skan result->src_loc = pfile->line_table->highest_line; 944169695Skan 945169695Skan skipped_white: 946169695Skan if (buffer->cur >= buffer->notes[buffer->cur_note].pos 947169695Skan && !pfile->overlaid_buffer) 948169695Skan { 949169695Skan _cpp_process_line_notes (pfile, false); 950169695Skan result->src_loc = pfile->line_table->highest_line; 951169695Skan } 952169695Skan c = *buffer->cur++; 953169695Skan 954169695Skan LINEMAP_POSITION_FOR_COLUMN (result->src_loc, pfile->line_table, 955169695Skan CPP_BUF_COLUMN (buffer, buffer->cur)); 956169695Skan 957169695Skan switch (c) 958169695Skan { 959169695Skan case ' ': case '\t': case '\f': case '\v': case '\0': 960169695Skan result->flags |= PREV_WHITE; 961169695Skan skip_whitespace (pfile, c); 962169695Skan goto skipped_white; 963169695Skan 964169695Skan case '\n': 965169695Skan if (buffer->cur < buffer->rlimit) 966169695Skan CPP_INCREMENT_LINE (pfile, 0); 967169695Skan buffer->need_line = true; 968169695Skan goto fresh_line; 969169695Skan 970169695Skan case '0': case '1': case '2': case '3': case '4': 971169695Skan case '5': case '6': case '7': case '8': case '9': 972169695Skan { 973169695Skan struct normalize_state nst = INITIAL_NORMALIZE_STATE; 974169695Skan result->type = CPP_NUMBER; 975169695Skan lex_number (pfile, &result->val.str, &nst); 976169695Skan warn_about_normalization (pfile, result, &nst); 977169695Skan break; 978169695Skan } 979169695Skan 980169695Skan case 'L': 981169695Skan /* 'L' may introduce wide characters or strings. */ 982169695Skan if (*buffer->cur == '\'' || *buffer->cur == '"') 983169695Skan { 984169695Skan lex_string (pfile, result, buffer->cur - 1); 985169695Skan break; 986169695Skan } 987169695Skan /* Fall through. */ 988169695Skan 989169695Skan case '_': 990169695Skan case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': 991169695Skan case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': 992169695Skan case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': 993169695Skan case 's': case 't': case 'u': case 'v': case 'w': case 'x': 994169695Skan case 'y': case 'z': 995169695Skan case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': 996169695Skan case 'G': case 'H': case 'I': case 'J': case 'K': 997169695Skan case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': 998169695Skan case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': 999169695Skan case 'Y': case 'Z': 1000169695Skan result->type = CPP_NAME; 1001169695Skan { 1002169695Skan struct normalize_state nst = INITIAL_NORMALIZE_STATE; 1003169695Skan result->val.node = lex_identifier (pfile, buffer->cur - 1, false, 1004169695Skan &nst); 1005169695Skan warn_about_normalization (pfile, result, &nst); 1006169695Skan } 1007169695Skan 1008169695Skan /* Convert named operators to their proper types. */ 1009169695Skan if (result->val.node->flags & NODE_OPERATOR) 1010169695Skan { 1011169695Skan result->flags |= NAMED_OP; 1012169695Skan result->type = (enum cpp_ttype) result->val.node->directive_index; 1013169695Skan } 1014169695Skan break; 1015169695Skan 1016169695Skan case '\'': 1017169695Skan case '"': 1018169695Skan lex_string (pfile, result, buffer->cur - 1); 1019169695Skan break; 1020169695Skan 1021169695Skan case '/': 1022169695Skan /* A potential block or line comment. */ 1023169695Skan comment_start = buffer->cur; 1024169695Skan c = *buffer->cur; 1025169695Skan 1026169695Skan if (c == '*') 1027169695Skan { 1028169695Skan if (_cpp_skip_block_comment (pfile)) 1029169695Skan cpp_error (pfile, CPP_DL_ERROR, "unterminated comment"); 1030169695Skan } 1031169695Skan else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments) 1032169695Skan || cpp_in_system_header (pfile))) 1033169695Skan { 1034169695Skan /* Warn about comments only if pedantically GNUC89, and not 1035169695Skan in system headers. */ 1036169695Skan if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile) 1037169695Skan && ! buffer->warned_cplusplus_comments) 1038169695Skan { 1039169695Skan cpp_error (pfile, CPP_DL_PEDWARN, 1040169695Skan "C++ style comments are not allowed in ISO C90"); 1041169695Skan cpp_error (pfile, CPP_DL_PEDWARN, 1042169695Skan "(this will be reported only once per input file)"); 1043169695Skan buffer->warned_cplusplus_comments = 1; 1044169695Skan } 1045169695Skan 1046169695Skan if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments)) 1047169695Skan cpp_error (pfile, CPP_DL_WARNING, "multi-line comment"); 1048169695Skan } 1049169695Skan else if (c == '=') 1050169695Skan { 1051169695Skan buffer->cur++; 1052169695Skan result->type = CPP_DIV_EQ; 1053169695Skan break; 1054169695Skan } 1055169695Skan else 1056169695Skan { 1057169695Skan result->type = CPP_DIV; 1058169695Skan break; 1059169695Skan } 1060169695Skan 1061169695Skan if (!pfile->state.save_comments) 1062169695Skan { 1063169695Skan result->flags |= PREV_WHITE; 1064169695Skan goto update_tokens_line; 1065169695Skan } 1066169695Skan 1067169695Skan /* Save the comment as a token in its own right. */ 1068169695Skan save_comment (pfile, result, comment_start, c); 1069169695Skan break; 1070169695Skan 1071169695Skan case '<': 1072169695Skan if (pfile->state.angled_headers) 1073169695Skan { 1074169695Skan lex_string (pfile, result, buffer->cur - 1); 1075169695Skan break; 1076169695Skan } 1077169695Skan 1078169695Skan result->type = CPP_LESS; 1079169695Skan if (*buffer->cur == '=') 1080169695Skan buffer->cur++, result->type = CPP_LESS_EQ; 1081169695Skan else if (*buffer->cur == '<') 1082169695Skan { 1083169695Skan buffer->cur++; 1084169695Skan IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT); 1085169695Skan } 1086169695Skan else if (CPP_OPTION (pfile, digraphs)) 1087169695Skan { 1088169695Skan if (*buffer->cur == ':') 1089169695Skan { 1090169695Skan buffer->cur++; 1091169695Skan result->flags |= DIGRAPH; 1092169695Skan result->type = CPP_OPEN_SQUARE; 1093169695Skan } 1094169695Skan else if (*buffer->cur == '%') 1095169695Skan { 1096169695Skan buffer->cur++; 1097169695Skan result->flags |= DIGRAPH; 1098169695Skan result->type = CPP_OPEN_BRACE; 1099169695Skan } 1100169695Skan } 1101169695Skan break; 1102169695Skan 1103169695Skan case '>': 1104169695Skan result->type = CPP_GREATER; 1105169695Skan if (*buffer->cur == '=') 1106169695Skan buffer->cur++, result->type = CPP_GREATER_EQ; 1107169695Skan else if (*buffer->cur == '>') 1108169695Skan { 1109169695Skan buffer->cur++; 1110169695Skan IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT); 1111169695Skan } 1112169695Skan break; 1113169695Skan 1114169695Skan case '%': 1115169695Skan result->type = CPP_MOD; 1116169695Skan if (*buffer->cur == '=') 1117169695Skan buffer->cur++, result->type = CPP_MOD_EQ; 1118169695Skan else if (CPP_OPTION (pfile, digraphs)) 1119169695Skan { 1120169695Skan if (*buffer->cur == ':') 1121169695Skan { 1122169695Skan buffer->cur++; 1123169695Skan result->flags |= DIGRAPH; 1124169695Skan result->type = CPP_HASH; 1125169695Skan if (*buffer->cur == '%' && buffer->cur[1] == ':') 1126169695Skan buffer->cur += 2, result->type = CPP_PASTE; 1127169695Skan } 1128169695Skan else if (*buffer->cur == '>') 1129169695Skan { 1130169695Skan buffer->cur++; 1131169695Skan result->flags |= DIGRAPH; 1132169695Skan result->type = CPP_CLOSE_BRACE; 1133169695Skan } 1134169695Skan } 1135169695Skan break; 1136169695Skan 1137169695Skan case '.': 1138169695Skan result->type = CPP_DOT; 1139169695Skan if (ISDIGIT (*buffer->cur)) 1140169695Skan { 1141169695Skan struct normalize_state nst = INITIAL_NORMALIZE_STATE; 1142169695Skan result->type = CPP_NUMBER; 1143169695Skan lex_number (pfile, &result->val.str, &nst); 1144169695Skan warn_about_normalization (pfile, result, &nst); 1145169695Skan } 1146169695Skan else if (*buffer->cur == '.' && buffer->cur[1] == '.') 1147169695Skan buffer->cur += 2, result->type = CPP_ELLIPSIS; 1148169695Skan else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus)) 1149169695Skan buffer->cur++, result->type = CPP_DOT_STAR; 1150169695Skan break; 1151169695Skan 1152169695Skan case '+': 1153169695Skan result->type = CPP_PLUS; 1154169695Skan if (*buffer->cur == '+') 1155169695Skan buffer->cur++, result->type = CPP_PLUS_PLUS; 1156169695Skan else if (*buffer->cur == '=') 1157169695Skan buffer->cur++, result->type = CPP_PLUS_EQ; 1158169695Skan break; 1159169695Skan 1160169695Skan case '-': 1161169695Skan result->type = CPP_MINUS; 1162169695Skan if (*buffer->cur == '>') 1163169695Skan { 1164169695Skan buffer->cur++; 1165169695Skan result->type = CPP_DEREF; 1166169695Skan if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus)) 1167169695Skan buffer->cur++, result->type = CPP_DEREF_STAR; 1168169695Skan } 1169169695Skan else if (*buffer->cur == '-') 1170169695Skan buffer->cur++, result->type = CPP_MINUS_MINUS; 1171169695Skan else if (*buffer->cur == '=') 1172169695Skan buffer->cur++, result->type = CPP_MINUS_EQ; 1173169695Skan break; 1174169695Skan 1175169695Skan case '&': 1176169695Skan result->type = CPP_AND; 1177169695Skan if (*buffer->cur == '&') 1178169695Skan buffer->cur++, result->type = CPP_AND_AND; 1179169695Skan else if (*buffer->cur == '=') 1180169695Skan buffer->cur++, result->type = CPP_AND_EQ; 1181169695Skan break; 1182169695Skan 1183169695Skan case '|': 1184169695Skan result->type = CPP_OR; 1185169695Skan if (*buffer->cur == '|') 1186169695Skan buffer->cur++, result->type = CPP_OR_OR; 1187169695Skan else if (*buffer->cur == '=') 1188169695Skan buffer->cur++, result->type = CPP_OR_EQ; 1189169695Skan break; 1190169695Skan 1191169695Skan case ':': 1192169695Skan result->type = CPP_COLON; 1193169695Skan if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus)) 1194169695Skan buffer->cur++, result->type = CPP_SCOPE; 1195169695Skan else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs)) 1196169695Skan { 1197169695Skan buffer->cur++; 1198169695Skan result->flags |= DIGRAPH; 1199169695Skan result->type = CPP_CLOSE_SQUARE; 1200169695Skan } 1201169695Skan break; 1202169695Skan 1203169695Skan case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break; 1204169695Skan case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break; 1205169695Skan case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break; 1206169695Skan case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break; 1207169695Skan case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break; 1208169695Skan 1209169695Skan case '?': result->type = CPP_QUERY; break; 1210169695Skan case '~': result->type = CPP_COMPL; break; 1211169695Skan case ',': result->type = CPP_COMMA; break; 1212169695Skan case '(': result->type = CPP_OPEN_PAREN; break; 1213169695Skan case ')': result->type = CPP_CLOSE_PAREN; break; 1214169695Skan case '[': result->type = CPP_OPEN_SQUARE; break; 1215169695Skan case ']': result->type = CPP_CLOSE_SQUARE; break; 1216169695Skan case '{': result->type = CPP_OPEN_BRACE; break; 1217169695Skan case '}': result->type = CPP_CLOSE_BRACE; break; 1218169695Skan case ';': result->type = CPP_SEMICOLON; break; 1219169695Skan 1220169695Skan /* @ is a punctuator in Objective-C. */ 1221169695Skan case '@': result->type = CPP_ATSIGN; break; 1222169695Skan 1223169695Skan case '$': 1224169695Skan case '\\': 1225169695Skan { 1226169695Skan const uchar *base = --buffer->cur; 1227169695Skan struct normalize_state nst = INITIAL_NORMALIZE_STATE; 1228169695Skan 1229169695Skan if (forms_identifier_p (pfile, true, &nst)) 1230169695Skan { 1231169695Skan result->type = CPP_NAME; 1232169695Skan result->val.node = lex_identifier (pfile, base, true, &nst); 1233169695Skan warn_about_normalization (pfile, result, &nst); 1234169695Skan break; 1235169695Skan } 1236169695Skan buffer->cur++; 1237169695Skan } 1238169695Skan 1239169695Skan default: 1240169695Skan create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER); 1241169695Skan break; 1242169695Skan } 1243169695Skan 1244169695Skan return result; 1245169695Skan} 1246169695Skan 1247169695Skan/* An upper bound on the number of bytes needed to spell TOKEN. 1248169695Skan Does not include preceding whitespace. */ 1249169695Skanunsigned int 1250169695Skancpp_token_len (const cpp_token *token) 1251169695Skan{ 1252169695Skan unsigned int len; 1253169695Skan 1254169695Skan switch (TOKEN_SPELL (token)) 1255169695Skan { 1256169695Skan default: len = 4; break; 1257169695Skan case SPELL_LITERAL: len = token->val.str.len; break; 1258169695Skan case SPELL_IDENT: len = NODE_LEN (token->val.node) * 10; break; 1259169695Skan } 1260169695Skan 1261169695Skan return len; 1262169695Skan} 1263169695Skan 1264169695Skan/* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER. 1265169695Skan Return the number of bytes read out of NAME. (There are always 1266169695Skan 10 bytes written to BUFFER.) */ 1267169695Skan 1268169695Skanstatic size_t 1269169695Skanutf8_to_ucn (unsigned char *buffer, const unsigned char *name) 1270169695Skan{ 1271169695Skan int j; 1272169695Skan int ucn_len = 0; 1273169695Skan int ucn_len_c; 1274169695Skan unsigned t; 1275169695Skan unsigned long utf32; 1276169695Skan 1277169695Skan /* Compute the length of the UTF-8 sequence. */ 1278169695Skan for (t = *name; t & 0x80; t <<= 1) 1279169695Skan ucn_len++; 1280169695Skan 1281169695Skan utf32 = *name & (0x7F >> ucn_len); 1282169695Skan for (ucn_len_c = 1; ucn_len_c < ucn_len; ucn_len_c++) 1283169695Skan { 1284169695Skan utf32 = (utf32 << 6) | (*++name & 0x3F); 1285169695Skan 1286169695Skan /* Ill-formed UTF-8. */ 1287169695Skan if ((*name & ~0x3F) != 0x80) 1288169695Skan abort (); 1289169695Skan } 1290169695Skan 1291169695Skan *buffer++ = '\\'; 1292169695Skan *buffer++ = 'U'; 1293169695Skan for (j = 7; j >= 0; j--) 1294169695Skan *buffer++ = "0123456789abcdef"[(utf32 >> (4 * j)) & 0xF]; 1295169695Skan return ucn_len; 1296169695Skan} 1297169695Skan 1298169695Skan 1299169695Skan/* Write the spelling of a token TOKEN to BUFFER. The buffer must 1300169695Skan already contain the enough space to hold the token's spelling. 1301169695Skan Returns a pointer to the character after the last character written. 1302169695Skan FORSTRING is true if this is to be the spelling after translation 1303169695Skan phase 1 (this is different for UCNs). 1304169695Skan FIXME: Would be nice if we didn't need the PFILE argument. */ 1305169695Skanunsigned char * 1306169695Skancpp_spell_token (cpp_reader *pfile, const cpp_token *token, 1307169695Skan unsigned char *buffer, bool forstring) 1308169695Skan{ 1309169695Skan switch (TOKEN_SPELL (token)) 1310169695Skan { 1311169695Skan case SPELL_OPERATOR: 1312169695Skan { 1313169695Skan const unsigned char *spelling; 1314169695Skan unsigned char c; 1315169695Skan 1316169695Skan if (token->flags & DIGRAPH) 1317169695Skan spelling 1318169695Skan = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH]; 1319169695Skan else if (token->flags & NAMED_OP) 1320169695Skan goto spell_ident; 1321169695Skan else 1322169695Skan spelling = TOKEN_NAME (token); 1323169695Skan 1324169695Skan while ((c = *spelling++) != '\0') 1325169695Skan *buffer++ = c; 1326169695Skan } 1327169695Skan break; 1328169695Skan 1329169695Skan spell_ident: 1330169695Skan case SPELL_IDENT: 1331169695Skan if (forstring) 1332169695Skan { 1333169695Skan memcpy (buffer, NODE_NAME (token->val.node), 1334169695Skan NODE_LEN (token->val.node)); 1335169695Skan buffer += NODE_LEN (token->val.node); 1336169695Skan } 1337169695Skan else 1338169695Skan { 1339169695Skan size_t i; 1340169695Skan const unsigned char * name = NODE_NAME (token->val.node); 1341169695Skan 1342169695Skan for (i = 0; i < NODE_LEN (token->val.node); i++) 1343169695Skan if (name[i] & ~0x7F) 1344169695Skan { 1345169695Skan i += utf8_to_ucn (buffer, name + i) - 1; 1346169695Skan buffer += 10; 1347169695Skan } 1348169695Skan else 1349169695Skan *buffer++ = NODE_NAME (token->val.node)[i]; 1350169695Skan } 1351169695Skan break; 1352169695Skan 1353169695Skan case SPELL_LITERAL: 1354169695Skan memcpy (buffer, token->val.str.text, token->val.str.len); 1355169695Skan buffer += token->val.str.len; 1356169695Skan break; 1357169695Skan 1358169695Skan case SPELL_NONE: 1359169695Skan cpp_error (pfile, CPP_DL_ICE, 1360169695Skan "unspellable token %s", TOKEN_NAME (token)); 1361169695Skan break; 1362169695Skan } 1363169695Skan 1364169695Skan return buffer; 1365169695Skan} 1366169695Skan 1367169695Skan/* Returns TOKEN spelt as a null-terminated string. The string is 1368169695Skan freed when the reader is destroyed. Useful for diagnostics. */ 1369169695Skanunsigned char * 1370169695Skancpp_token_as_text (cpp_reader *pfile, const cpp_token *token) 1371169695Skan{ 1372169695Skan unsigned int len = cpp_token_len (token) + 1; 1373169695Skan unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end; 1374169695Skan 1375169695Skan end = cpp_spell_token (pfile, token, start, false); 1376169695Skan end[0] = '\0'; 1377169695Skan 1378169695Skan return start; 1379169695Skan} 1380169695Skan 1381169695Skan/* Used by C front ends, which really should move to using 1382169695Skan cpp_token_as_text. */ 1383169695Skanconst char * 1384169695Skancpp_type2name (enum cpp_ttype type) 1385169695Skan{ 1386169695Skan return (const char *) token_spellings[type].name; 1387169695Skan} 1388169695Skan 1389169695Skan/* Writes the spelling of token to FP, without any preceding space. 1390169695Skan Separated from cpp_spell_token for efficiency - to avoid stdio 1391169695Skan double-buffering. */ 1392169695Skanvoid 1393169695Skancpp_output_token (const cpp_token *token, FILE *fp) 1394169695Skan{ 1395169695Skan switch (TOKEN_SPELL (token)) 1396169695Skan { 1397169695Skan case SPELL_OPERATOR: 1398169695Skan { 1399169695Skan const unsigned char *spelling; 1400169695Skan int c; 1401169695Skan 1402169695Skan if (token->flags & DIGRAPH) 1403169695Skan spelling 1404169695Skan = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH]; 1405169695Skan else if (token->flags & NAMED_OP) 1406169695Skan goto spell_ident; 1407169695Skan else 1408169695Skan spelling = TOKEN_NAME (token); 1409169695Skan 1410169695Skan c = *spelling; 1411169695Skan do 1412169695Skan putc (c, fp); 1413169695Skan while ((c = *++spelling) != '\0'); 1414169695Skan } 1415169695Skan break; 1416169695Skan 1417169695Skan spell_ident: 1418169695Skan case SPELL_IDENT: 1419169695Skan { 1420169695Skan size_t i; 1421169695Skan const unsigned char * name = NODE_NAME (token->val.node); 1422169695Skan 1423169695Skan for (i = 0; i < NODE_LEN (token->val.node); i++) 1424169695Skan if (name[i] & ~0x7F) 1425169695Skan { 1426169695Skan unsigned char buffer[10]; 1427169695Skan i += utf8_to_ucn (buffer, name + i) - 1; 1428169695Skan fwrite (buffer, 1, 10, fp); 1429169695Skan } 1430169695Skan else 1431169695Skan fputc (NODE_NAME (token->val.node)[i], fp); 1432169695Skan } 1433169695Skan break; 1434169695Skan 1435169695Skan case SPELL_LITERAL: 1436169695Skan fwrite (token->val.str.text, 1, token->val.str.len, fp); 1437169695Skan break; 1438169695Skan 1439169695Skan case SPELL_NONE: 1440169695Skan /* An error, most probably. */ 1441169695Skan break; 1442169695Skan } 1443169695Skan} 1444169695Skan 1445169695Skan/* Compare two tokens. */ 1446169695Skanint 1447169695Skan_cpp_equiv_tokens (const cpp_token *a, const cpp_token *b) 1448169695Skan{ 1449169695Skan if (a->type == b->type && a->flags == b->flags) 1450169695Skan switch (TOKEN_SPELL (a)) 1451169695Skan { 1452169695Skan default: /* Keep compiler happy. */ 1453169695Skan case SPELL_OPERATOR: 1454169695Skan return 1; 1455169695Skan case SPELL_NONE: 1456169695Skan return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no); 1457169695Skan case SPELL_IDENT: 1458169695Skan return a->val.node == b->val.node; 1459169695Skan case SPELL_LITERAL: 1460169695Skan return (a->val.str.len == b->val.str.len 1461169695Skan && !memcmp (a->val.str.text, b->val.str.text, 1462169695Skan a->val.str.len)); 1463169695Skan } 1464169695Skan 1465169695Skan return 0; 1466169695Skan} 1467169695Skan 1468169695Skan/* Returns nonzero if a space should be inserted to avoid an 1469169695Skan accidental token paste for output. For simplicity, it is 1470169695Skan conservative, and occasionally advises a space where one is not 1471169695Skan needed, e.g. "." and ".2". */ 1472169695Skanint 1473169695Skancpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1, 1474169695Skan const cpp_token *token2) 1475169695Skan{ 1476169695Skan enum cpp_ttype a = token1->type, b = token2->type; 1477169695Skan cppchar_t c; 1478169695Skan 1479169695Skan if (token1->flags & NAMED_OP) 1480169695Skan a = CPP_NAME; 1481169695Skan if (token2->flags & NAMED_OP) 1482169695Skan b = CPP_NAME; 1483169695Skan 1484169695Skan c = EOF; 1485169695Skan if (token2->flags & DIGRAPH) 1486169695Skan c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0]; 1487169695Skan else if (token_spellings[b].category == SPELL_OPERATOR) 1488169695Skan c = token_spellings[b].name[0]; 1489169695Skan 1490169695Skan /* Quickly get everything that can paste with an '='. */ 1491169695Skan if ((int) a <= (int) CPP_LAST_EQ && c == '=') 1492169695Skan return 1; 1493169695Skan 1494169695Skan switch (a) 1495169695Skan { 1496169695Skan case CPP_GREATER: return c == '>'; 1497169695Skan case CPP_LESS: return c == '<' || c == '%' || c == ':'; 1498169695Skan case CPP_PLUS: return c == '+'; 1499169695Skan case CPP_MINUS: return c == '-' || c == '>'; 1500169695Skan case CPP_DIV: return c == '/' || c == '*'; /* Comments. */ 1501169695Skan case CPP_MOD: return c == ':' || c == '>'; 1502169695Skan case CPP_AND: return c == '&'; 1503169695Skan case CPP_OR: return c == '|'; 1504169695Skan case CPP_COLON: return c == ':' || c == '>'; 1505169695Skan case CPP_DEREF: return c == '*'; 1506169695Skan case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER; 1507169695Skan case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */ 1508169695Skan case CPP_NAME: return ((b == CPP_NUMBER 1509169695Skan && name_p (pfile, &token2->val.str)) 1510169695Skan || b == CPP_NAME 1511169695Skan || b == CPP_CHAR || b == CPP_STRING); /* L */ 1512169695Skan case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME 1513169695Skan || c == '.' || c == '+' || c == '-'); 1514169695Skan /* UCNs */ 1515169695Skan case CPP_OTHER: return ((token1->val.str.text[0] == '\\' 1516169695Skan && b == CPP_NAME) 1517169695Skan || (CPP_OPTION (pfile, objc) 1518169695Skan && token1->val.str.text[0] == '@' 1519169695Skan && (b == CPP_NAME || b == CPP_STRING))); 1520169695Skan default: break; 1521169695Skan } 1522169695Skan 1523169695Skan return 0; 1524169695Skan} 1525169695Skan 1526169695Skan/* Output all the remaining tokens on the current line, and a newline 1527169695Skan character, to FP. Leading whitespace is removed. If there are 1528169695Skan macros, special token padding is not performed. */ 1529169695Skanvoid 1530169695Skancpp_output_line (cpp_reader *pfile, FILE *fp) 1531169695Skan{ 1532169695Skan const cpp_token *token; 1533169695Skan 1534169695Skan token = cpp_get_token (pfile); 1535169695Skan while (token->type != CPP_EOF) 1536169695Skan { 1537169695Skan cpp_output_token (token, fp); 1538169695Skan token = cpp_get_token (pfile); 1539169695Skan if (token->flags & PREV_WHITE) 1540169695Skan putc (' ', fp); 1541169695Skan } 1542169695Skan 1543169695Skan putc ('\n', fp); 1544169695Skan} 1545169695Skan 1546169695Skan/* Memory buffers. Changing these three constants can have a dramatic 1547169695Skan effect on performance. The values here are reasonable defaults, 1548169695Skan but might be tuned. If you adjust them, be sure to test across a 1549169695Skan range of uses of cpplib, including heavy nested function-like macro 1550169695Skan expansion. Also check the change in peak memory usage (NJAMD is a 1551169695Skan good tool for this). */ 1552169695Skan#define MIN_BUFF_SIZE 8000 1553169695Skan#define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2) 1554169695Skan#define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \ 1555169695Skan (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2) 1556169695Skan 1557169695Skan#if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0) 1558169695Skan #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE! 1559169695Skan#endif 1560169695Skan 1561169695Skan/* Create a new allocation buffer. Place the control block at the end 1562169695Skan of the buffer, so that buffer overflows will cause immediate chaos. */ 1563169695Skanstatic _cpp_buff * 1564169695Skannew_buff (size_t len) 1565169695Skan{ 1566169695Skan _cpp_buff *result; 1567169695Skan unsigned char *base; 1568169695Skan 1569169695Skan if (len < MIN_BUFF_SIZE) 1570169695Skan len = MIN_BUFF_SIZE; 1571169695Skan len = CPP_ALIGN (len); 1572169695Skan 1573169695Skan base = XNEWVEC (unsigned char, len + sizeof (_cpp_buff)); 1574169695Skan result = (_cpp_buff *) (base + len); 1575169695Skan result->base = base; 1576169695Skan result->cur = base; 1577169695Skan result->limit = base + len; 1578169695Skan result->next = NULL; 1579169695Skan return result; 1580169695Skan} 1581169695Skan 1582169695Skan/* Place a chain of unwanted allocation buffers on the free list. */ 1583169695Skanvoid 1584169695Skan_cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff) 1585169695Skan{ 1586169695Skan _cpp_buff *end = buff; 1587169695Skan 1588169695Skan while (end->next) 1589169695Skan end = end->next; 1590169695Skan end->next = pfile->free_buffs; 1591169695Skan pfile->free_buffs = buff; 1592169695Skan} 1593169695Skan 1594169695Skan/* Return a free buffer of size at least MIN_SIZE. */ 1595169695Skan_cpp_buff * 1596169695Skan_cpp_get_buff (cpp_reader *pfile, size_t min_size) 1597169695Skan{ 1598169695Skan _cpp_buff *result, **p; 1599169695Skan 1600169695Skan for (p = &pfile->free_buffs;; p = &(*p)->next) 1601169695Skan { 1602169695Skan size_t size; 1603169695Skan 1604169695Skan if (*p == NULL) 1605169695Skan return new_buff (min_size); 1606169695Skan result = *p; 1607169695Skan size = result->limit - result->base; 1608169695Skan /* Return a buffer that's big enough, but don't waste one that's 1609169695Skan way too big. */ 1610169695Skan if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size)) 1611169695Skan break; 1612169695Skan } 1613169695Skan 1614169695Skan *p = result->next; 1615169695Skan result->next = NULL; 1616169695Skan result->cur = result->base; 1617169695Skan return result; 1618169695Skan} 1619169695Skan 1620169695Skan/* Creates a new buffer with enough space to hold the uncommitted 1621169695Skan remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies 1622169695Skan the excess bytes to the new buffer. Chains the new buffer after 1623169695Skan BUFF, and returns the new buffer. */ 1624169695Skan_cpp_buff * 1625169695Skan_cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra) 1626169695Skan{ 1627169695Skan size_t size = EXTENDED_BUFF_SIZE (buff, min_extra); 1628169695Skan _cpp_buff *new_buff = _cpp_get_buff (pfile, size); 1629169695Skan 1630169695Skan buff->next = new_buff; 1631169695Skan memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff)); 1632169695Skan return new_buff; 1633169695Skan} 1634169695Skan 1635169695Skan/* Creates a new buffer with enough space to hold the uncommitted 1636169695Skan remaining bytes of the buffer pointed to by BUFF, and at least 1637169695Skan MIN_EXTRA more bytes. Copies the excess bytes to the new buffer. 1638169695Skan Chains the new buffer before the buffer pointed to by BUFF, and 1639169695Skan updates the pointer to point to the new buffer. */ 1640169695Skanvoid 1641169695Skan_cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra) 1642169695Skan{ 1643169695Skan _cpp_buff *new_buff, *old_buff = *pbuff; 1644169695Skan size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra); 1645169695Skan 1646169695Skan new_buff = _cpp_get_buff (pfile, size); 1647169695Skan memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff)); 1648169695Skan new_buff->next = old_buff; 1649169695Skan *pbuff = new_buff; 1650169695Skan} 1651169695Skan 1652169695Skan/* Free a chain of buffers starting at BUFF. */ 1653169695Skanvoid 1654169695Skan_cpp_free_buff (_cpp_buff *buff) 1655169695Skan{ 1656169695Skan _cpp_buff *next; 1657169695Skan 1658169695Skan for (; buff; buff = next) 1659169695Skan { 1660169695Skan next = buff->next; 1661169695Skan free (buff->base); 1662169695Skan } 1663169695Skan} 1664169695Skan 1665169695Skan/* Allocate permanent, unaligned storage of length LEN. */ 1666169695Skanunsigned char * 1667169695Skan_cpp_unaligned_alloc (cpp_reader *pfile, size_t len) 1668169695Skan{ 1669169695Skan _cpp_buff *buff = pfile->u_buff; 1670169695Skan unsigned char *result = buff->cur; 1671169695Skan 1672169695Skan if (len > (size_t) (buff->limit - result)) 1673169695Skan { 1674169695Skan buff = _cpp_get_buff (pfile, len); 1675169695Skan buff->next = pfile->u_buff; 1676169695Skan pfile->u_buff = buff; 1677169695Skan result = buff->cur; 1678169695Skan } 1679169695Skan 1680169695Skan buff->cur = result + len; 1681169695Skan return result; 1682169695Skan} 1683169695Skan 1684169695Skan/* Allocate permanent, unaligned storage of length LEN from a_buff. 1685169695Skan That buffer is used for growing allocations when saving macro 1686169695Skan replacement lists in a #define, and when parsing an answer to an 1687169695Skan assertion in #assert, #unassert or #if (and therefore possibly 1688169695Skan whilst expanding macros). It therefore must not be used by any 1689169695Skan code that they might call: specifically the lexer and the guts of 1690169695Skan the macro expander. 1691169695Skan 1692169695Skan All existing other uses clearly fit this restriction: storing 1693169695Skan registered pragmas during initialization. */ 1694169695Skanunsigned char * 1695169695Skan_cpp_aligned_alloc (cpp_reader *pfile, size_t len) 1696169695Skan{ 1697169695Skan _cpp_buff *buff = pfile->a_buff; 1698169695Skan unsigned char *result = buff->cur; 1699169695Skan 1700169695Skan if (len > (size_t) (buff->limit - result)) 1701169695Skan { 1702169695Skan buff = _cpp_get_buff (pfile, len); 1703169695Skan buff->next = pfile->a_buff; 1704169695Skan pfile->a_buff = buff; 1705169695Skan result = buff->cur; 1706169695Skan } 1707169695Skan 1708169695Skan buff->cur = result + len; 1709169695Skan return result; 1710169695Skan} 1711169695Skan 1712169695Skan/* Say which field of TOK is in use. */ 1713169695Skan 1714169695Skanenum cpp_token_fld_kind 1715169695Skancpp_token_val_index (cpp_token *tok) 1716169695Skan{ 1717169695Skan switch (TOKEN_SPELL (tok)) 1718169695Skan { 1719169695Skan case SPELL_IDENT: 1720169695Skan return CPP_TOKEN_FLD_NODE; 1721169695Skan case SPELL_LITERAL: 1722169695Skan return CPP_TOKEN_FLD_STR; 1723169695Skan case SPELL_NONE: 1724169695Skan if (tok->type == CPP_MACRO_ARG) 1725169695Skan return CPP_TOKEN_FLD_ARG_NO; 1726169695Skan else if (tok->type == CPP_PADDING) 1727169695Skan return CPP_TOKEN_FLD_SOURCE; 1728169695Skan else if (tok->type == CPP_PRAGMA) 1729169695Skan return CPP_TOKEN_FLD_PRAGMA; 1730169695Skan /* else fall through */ 1731169695Skan default: 1732169695Skan return CPP_TOKEN_FLD_NONE; 1733169695Skan } 1734169695Skan} 1735