1169695Skan/* CPP Library - lexical analysis. 2169695Skan Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc. 3169695Skan Contributed by Per Bothner, 1994-95. 4169695Skan Based on CCCP program by Paul Rubin, June 1986 5169695Skan Adapted to ANSI C, Richard Stallman, Jan 1987 6169695Skan Broken out to separate file, Zack Weinberg, Mar 2000 7169695Skan 8169695SkanThis program is free software; you can redistribute it and/or modify it 9169695Skanunder the terms of the GNU General Public License as published by the 10169695SkanFree Software Foundation; either version 2, or (at your option) any 11169695Skanlater version. 12169695Skan 13169695SkanThis program is distributed in the hope that it will be useful, 14169695Skanbut WITHOUT ANY WARRANTY; without even the implied warranty of 15169695SkanMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16169695SkanGNU General Public License for more details. 17169695Skan 18169695SkanYou should have received a copy of the GNU General Public License 19169695Skanalong with this program; if not, write to the Free Software 20169695SkanFoundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ 21169695Skan 22169695Skan#include "config.h" 23169695Skan#include "system.h" 24169695Skan#include "cpplib.h" 25169695Skan#include "internal.h" 26169695Skan 27169695Skanenum spell_type 28169695Skan{ 29169695Skan SPELL_OPERATOR = 0, 30169695Skan SPELL_IDENT, 31169695Skan SPELL_LITERAL, 32169695Skan SPELL_NONE 33169695Skan}; 34169695Skan 35169695Skanstruct token_spelling 36169695Skan{ 37169695Skan enum spell_type category; 38169695Skan const unsigned char *name; 39169695Skan}; 40169695Skan 41169695Skanstatic const unsigned char *const digraph_spellings[] = 42169695Skan{ U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" }; 43169695Skan 44169695Skan#define OP(e, s) { SPELL_OPERATOR, U s }, 45169695Skan#define TK(e, s) { SPELL_ ## s, U #e }, 46169695Skanstatic const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE }; 47169695Skan#undef OP 48169695Skan#undef TK 49169695Skan 50169695Skan#define TOKEN_SPELL(token) (token_spellings[(token)->type].category) 51169695Skan#define TOKEN_NAME(token) (token_spellings[(token)->type].name) 52169695Skan 53169695Skanstatic void add_line_note (cpp_buffer *, const uchar *, unsigned int); 54169695Skanstatic int skip_line_comment (cpp_reader *); 55169695Skanstatic void skip_whitespace (cpp_reader *, cppchar_t); 56169695Skanstatic void lex_string (cpp_reader *, cpp_token *, const uchar *); 57169695Skanstatic void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t); 58169695Skanstatic void create_literal (cpp_reader *, cpp_token *, const uchar *, 59169695Skan unsigned int, enum cpp_ttype); 60169695Skanstatic bool warn_in_comment (cpp_reader *, _cpp_line_note *); 61169695Skanstatic int name_p (cpp_reader *, const cpp_string *); 62169695Skanstatic tokenrun *next_tokenrun (tokenrun *); 63169695Skan 64169695Skanstatic _cpp_buff *new_buff (size_t); 65169695Skan 66169695Skan 67169695Skan/* Utility routine: 68169695Skan 69169695Skan Compares, the token TOKEN to the NUL-terminated string STRING. 70169695Skan TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */ 71169695Skanint 72169695Skancpp_ideq (const cpp_token *token, const char *string) 73169695Skan{ 74169695Skan if (token->type != CPP_NAME) 75169695Skan return 0; 76169695Skan 77169695Skan return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string); 78169695Skan} 79169695Skan 80169695Skan/* Record a note TYPE at byte POS into the current cleaned logical 81169695Skan line. */ 82169695Skanstatic void 83169695Skanadd_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type) 84169695Skan{ 85169695Skan if (buffer->notes_used == buffer->notes_cap) 86169695Skan { 87169695Skan buffer->notes_cap = buffer->notes_cap * 2 + 200; 88169695Skan buffer->notes = XRESIZEVEC (_cpp_line_note, buffer->notes, 89169695Skan buffer->notes_cap); 90169695Skan } 91169695Skan 92169695Skan buffer->notes[buffer->notes_used].pos = pos; 93169695Skan buffer->notes[buffer->notes_used].type = type; 94169695Skan buffer->notes_used++; 95169695Skan} 96169695Skan 97169695Skan/* Returns with a logical line that contains no escaped newlines or 98169695Skan trigraphs. This is a time-critical inner loop. */ 99169695Skanvoid 100169695Skan_cpp_clean_line (cpp_reader *pfile) 101169695Skan{ 102169695Skan cpp_buffer *buffer; 103169695Skan const uchar *s; 104169695Skan uchar c, *d, *p; 105169695Skan 106169695Skan buffer = pfile->buffer; 107169695Skan buffer->cur_note = buffer->notes_used = 0; 108169695Skan buffer->cur = buffer->line_base = buffer->next_line; 109169695Skan buffer->need_line = false; 110169695Skan s = buffer->next_line - 1; 111169695Skan 112169695Skan if (!buffer->from_stage3) 113169695Skan { 114169695Skan /* Short circuit for the common case of an un-escaped line with 115169695Skan no trigraphs. The primary win here is by not writing any 116169695Skan data back to memory until we have to. */ 117169695Skan for (;;) 118169695Skan { 119169695Skan c = *++s; 120169695Skan if (c == '\n' || c == '\r') 121169695Skan { 122169695Skan d = (uchar *) s; 123169695Skan 124169695Skan if (s == buffer->rlimit) 125169695Skan goto done; 126169695Skan 127169695Skan /* DOS line ending? */ 128169695Skan if (c == '\r' && s[1] == '\n') 129169695Skan s++; 130169695Skan 131169695Skan if (s == buffer->rlimit) 132169695Skan goto done; 133169695Skan 134169695Skan /* check for escaped newline */ 135169695Skan p = d; 136169695Skan while (p != buffer->next_line && is_nvspace (p[-1])) 137169695Skan p--; 138169695Skan if (p == buffer->next_line || p[-1] != '\\') 139169695Skan goto done; 140169695Skan 141169695Skan /* Have an escaped newline; process it and proceed to 142169695Skan the slow path. */ 143169695Skan add_line_note (buffer, p - 1, p != d ? ' ' : '\\'); 144169695Skan d = p - 2; 145169695Skan buffer->next_line = p - 1; 146169695Skan break; 147169695Skan } 148169695Skan if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]]) 149169695Skan { 150169695Skan /* Have a trigraph. We may or may not have to convert 151169695Skan it. Add a line note regardless, for -Wtrigraphs. */ 152169695Skan add_line_note (buffer, s, s[2]); 153169695Skan if (CPP_OPTION (pfile, trigraphs)) 154169695Skan { 155169695Skan /* We do, and that means we have to switch to the 156169695Skan slow path. */ 157169695Skan d = (uchar *) s; 158169695Skan *d = _cpp_trigraph_map[s[2]]; 159169695Skan s += 2; 160169695Skan break; 161169695Skan } 162169695Skan } 163169695Skan } 164169695Skan 165169695Skan 166169695Skan for (;;) 167169695Skan { 168169695Skan c = *++s; 169169695Skan *++d = c; 170169695Skan 171169695Skan if (c == '\n' || c == '\r') 172169695Skan { 173169695Skan /* Handle DOS line endings. */ 174169695Skan if (c == '\r' && s != buffer->rlimit && s[1] == '\n') 175169695Skan s++; 176169695Skan if (s == buffer->rlimit) 177169695Skan break; 178169695Skan 179169695Skan /* Escaped? */ 180169695Skan p = d; 181169695Skan while (p != buffer->next_line && is_nvspace (p[-1])) 182169695Skan p--; 183169695Skan if (p == buffer->next_line || p[-1] != '\\') 184169695Skan break; 185169695Skan 186169695Skan add_line_note (buffer, p - 1, p != d ? ' ': '\\'); 187169695Skan d = p - 2; 188169695Skan buffer->next_line = p - 1; 189169695Skan } 190169695Skan else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]]) 191169695Skan { 192169695Skan /* Add a note regardless, for the benefit of -Wtrigraphs. */ 193169695Skan add_line_note (buffer, d, s[2]); 194169695Skan if (CPP_OPTION (pfile, trigraphs)) 195169695Skan { 196169695Skan *d = _cpp_trigraph_map[s[2]]; 197169695Skan s += 2; 198169695Skan } 199169695Skan } 200169695Skan } 201169695Skan } 202169695Skan else 203169695Skan { 204169695Skan do 205169695Skan s++; 206169695Skan while (*s != '\n' && *s != '\r'); 207169695Skan d = (uchar *) s; 208169695Skan 209169695Skan /* Handle DOS line endings. */ 210169695Skan if (*s == '\r' && s != buffer->rlimit && s[1] == '\n') 211169695Skan s++; 212169695Skan } 213169695Skan 214169695Skan done: 215169695Skan *d = '\n'; 216169695Skan /* A sentinel note that should never be processed. */ 217169695Skan add_line_note (buffer, d + 1, '\n'); 218169695Skan buffer->next_line = s + 1; 219169695Skan} 220169695Skan 221169695Skan/* Return true if the trigraph indicated by NOTE should be warned 222169695Skan about in a comment. */ 223169695Skanstatic bool 224169695Skanwarn_in_comment (cpp_reader *pfile, _cpp_line_note *note) 225169695Skan{ 226169695Skan const uchar *p; 227169695Skan 228169695Skan /* Within comments we don't warn about trigraphs, unless the 229169695Skan trigraph forms an escaped newline, as that may change 230169695Skan behavior. */ 231169695Skan if (note->type != '/') 232169695Skan return false; 233169695Skan 234169695Skan /* If -trigraphs, then this was an escaped newline iff the next note 235169695Skan is coincident. */ 236169695Skan if (CPP_OPTION (pfile, trigraphs)) 237169695Skan return note[1].pos == note->pos; 238169695Skan 239169695Skan /* Otherwise, see if this forms an escaped newline. */ 240169695Skan p = note->pos + 3; 241169695Skan while (is_nvspace (*p)) 242169695Skan p++; 243169695Skan 244169695Skan /* There might have been escaped newlines between the trigraph and the 245169695Skan newline we found. Hence the position test. */ 246169695Skan return (*p == '\n' && p < note[1].pos); 247169695Skan} 248169695Skan 249169695Skan/* Process the notes created by add_line_note as far as the current 250169695Skan location. */ 251169695Skanvoid 252169695Skan_cpp_process_line_notes (cpp_reader *pfile, int in_comment) 253169695Skan{ 254169695Skan cpp_buffer *buffer = pfile->buffer; 255169695Skan 256169695Skan for (;;) 257169695Skan { 258169695Skan _cpp_line_note *note = &buffer->notes[buffer->cur_note]; 259169695Skan unsigned int col; 260169695Skan 261169695Skan if (note->pos > buffer->cur) 262169695Skan break; 263169695Skan 264169695Skan buffer->cur_note++; 265169695Skan col = CPP_BUF_COLUMN (buffer, note->pos + 1); 266169695Skan 267169695Skan if (note->type == '\\' || note->type == ' ') 268169695Skan { 269169695Skan if (note->type == ' ' && !in_comment) 270169695Skan cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col, 271169695Skan "backslash and newline separated by space"); 272169695Skan 273169695Skan if (buffer->next_line > buffer->rlimit) 274169695Skan { 275169695Skan cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col, 276169695Skan "backslash-newline at end of file"); 277169695Skan /* Prevent "no newline at end of file" warning. */ 278169695Skan buffer->next_line = buffer->rlimit; 279169695Skan } 280169695Skan 281169695Skan buffer->line_base = note->pos; 282169695Skan CPP_INCREMENT_LINE (pfile, 0); 283169695Skan } 284169695Skan else if (_cpp_trigraph_map[note->type]) 285169695Skan { 286169695Skan if (CPP_OPTION (pfile, warn_trigraphs) 287169695Skan && (!in_comment || warn_in_comment (pfile, note))) 288169695Skan { 289169695Skan if (CPP_OPTION (pfile, trigraphs)) 290169695Skan cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col, 291169695Skan "trigraph ??%c converted to %c", 292169695Skan note->type, 293169695Skan (int) _cpp_trigraph_map[note->type]); 294169695Skan else 295169695Skan { 296169695Skan cpp_error_with_line 297169695Skan (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col, 298169695Skan "trigraph ??%c ignored, use -trigraphs to enable", 299169695Skan note->type); 300169695Skan } 301169695Skan } 302169695Skan } 303169695Skan else 304169695Skan abort (); 305169695Skan } 306169695Skan} 307169695Skan 308169695Skan/* Skip a C-style block comment. We find the end of the comment by 309169695Skan seeing if an asterisk is before every '/' we encounter. Returns 310169695Skan nonzero if comment terminated by EOF, zero otherwise. 311169695Skan 312169695Skan Buffer->cur points to the initial asterisk of the comment. */ 313169695Skanbool 314169695Skan_cpp_skip_block_comment (cpp_reader *pfile) 315169695Skan{ 316169695Skan cpp_buffer *buffer = pfile->buffer; 317169695Skan const uchar *cur = buffer->cur; 318169695Skan uchar c; 319169695Skan 320169695Skan cur++; 321169695Skan if (*cur == '/') 322169695Skan cur++; 323169695Skan 324169695Skan for (;;) 325169695Skan { 326169695Skan /* People like decorating comments with '*', so check for '/' 327169695Skan instead for efficiency. */ 328169695Skan c = *cur++; 329169695Skan 330169695Skan if (c == '/') 331169695Skan { 332169695Skan if (cur[-2] == '*') 333169695Skan break; 334169695Skan 335169695Skan /* Warn about potential nested comments, but not if the '/' 336169695Skan comes immediately before the true comment delimiter. 337169695Skan Don't bother to get it right across escaped newlines. */ 338169695Skan if (CPP_OPTION (pfile, warn_comments) 339169695Skan && cur[0] == '*' && cur[1] != '/') 340169695Skan { 341169695Skan buffer->cur = cur; 342169695Skan cpp_error_with_line (pfile, CPP_DL_WARNING, 343169695Skan pfile->line_table->highest_line, CPP_BUF_COL (buffer), 344169695Skan "\"/*\" within comment"); 345169695Skan } 346169695Skan } 347169695Skan else if (c == '\n') 348169695Skan { 349169695Skan unsigned int cols; 350169695Skan buffer->cur = cur - 1; 351169695Skan _cpp_process_line_notes (pfile, true); 352169695Skan if (buffer->next_line >= buffer->rlimit) 353169695Skan return true; 354169695Skan _cpp_clean_line (pfile); 355169695Skan 356169695Skan cols = buffer->next_line - buffer->line_base; 357169695Skan CPP_INCREMENT_LINE (pfile, cols); 358169695Skan 359169695Skan cur = buffer->cur; 360169695Skan } 361169695Skan } 362169695Skan 363169695Skan buffer->cur = cur; 364169695Skan _cpp_process_line_notes (pfile, true); 365169695Skan return false; 366169695Skan} 367169695Skan 368169695Skan/* Skip a C++ line comment, leaving buffer->cur pointing to the 369169695Skan terminating newline. Handles escaped newlines. Returns nonzero 370169695Skan if a multiline comment. */ 371169695Skanstatic int 372169695Skanskip_line_comment (cpp_reader *pfile) 373169695Skan{ 374169695Skan cpp_buffer *buffer = pfile->buffer; 375169695Skan unsigned int orig_line = pfile->line_table->highest_line; 376169695Skan 377169695Skan while (*buffer->cur != '\n') 378169695Skan buffer->cur++; 379169695Skan 380169695Skan _cpp_process_line_notes (pfile, true); 381169695Skan return orig_line != pfile->line_table->highest_line; 382169695Skan} 383169695Skan 384169695Skan/* Skips whitespace, saving the next non-whitespace character. */ 385169695Skanstatic void 386169695Skanskip_whitespace (cpp_reader *pfile, cppchar_t c) 387169695Skan{ 388169695Skan cpp_buffer *buffer = pfile->buffer; 389169695Skan bool saw_NUL = false; 390169695Skan 391169695Skan do 392169695Skan { 393169695Skan /* Horizontal space always OK. */ 394169695Skan if (c == ' ' || c == '\t') 395169695Skan ; 396169695Skan /* Just \f \v or \0 left. */ 397169695Skan else if (c == '\0') 398169695Skan saw_NUL = true; 399169695Skan else if (pfile->state.in_directive && CPP_PEDANTIC (pfile)) 400169695Skan cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, 401169695Skan CPP_BUF_COL (buffer), 402169695Skan "%s in preprocessing directive", 403169695Skan c == '\f' ? "form feed" : "vertical tab"); 404169695Skan 405169695Skan c = *buffer->cur++; 406169695Skan } 407169695Skan /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */ 408169695Skan while (is_nvspace (c)); 409169695Skan 410169695Skan if (saw_NUL) 411169695Skan cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored"); 412169695Skan 413169695Skan buffer->cur--; 414169695Skan} 415169695Skan 416169695Skan/* See if the characters of a number token are valid in a name (no 417169695Skan '.', '+' or '-'). */ 418169695Skanstatic int 419169695Skanname_p (cpp_reader *pfile, const cpp_string *string) 420169695Skan{ 421169695Skan unsigned int i; 422169695Skan 423169695Skan for (i = 0; i < string->len; i++) 424169695Skan if (!is_idchar (string->text[i])) 425169695Skan return 0; 426169695Skan 427169695Skan return 1; 428169695Skan} 429169695Skan 430169695Skan/* After parsing an identifier or other sequence, produce a warning about 431169695Skan sequences not in NFC/NFKC. */ 432169695Skanstatic void 433169695Skanwarn_about_normalization (cpp_reader *pfile, 434169695Skan const cpp_token *token, 435169695Skan const struct normalize_state *s) 436169695Skan{ 437169695Skan if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s) 438169695Skan && !pfile->state.skipping) 439169695Skan { 440169695Skan /* Make sure that the token is printed using UCNs, even 441169695Skan if we'd otherwise happily print UTF-8. */ 442169695Skan unsigned char *buf = XNEWVEC (unsigned char, cpp_token_len (token)); 443169695Skan size_t sz; 444169695Skan 445169695Skan sz = cpp_spell_token (pfile, token, buf, false) - buf; 446169695Skan if (NORMALIZE_STATE_RESULT (s) == normalized_C) 447169695Skan cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0, 448169695Skan "`%.*s' is not in NFKC", (int) sz, buf); 449169695Skan else 450169695Skan cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0, 451169695Skan "`%.*s' is not in NFC", (int) sz, buf); 452169695Skan } 453169695Skan} 454169695Skan 455169695Skan/* Returns TRUE if the sequence starting at buffer->cur is invalid in 456169695Skan an identifier. FIRST is TRUE if this starts an identifier. */ 457169695Skanstatic bool 458169695Skanforms_identifier_p (cpp_reader *pfile, int first, 459169695Skan struct normalize_state *state) 460169695Skan{ 461169695Skan cpp_buffer *buffer = pfile->buffer; 462169695Skan 463169695Skan if (*buffer->cur == '$') 464169695Skan { 465169695Skan if (!CPP_OPTION (pfile, dollars_in_ident)) 466169695Skan return false; 467169695Skan 468169695Skan buffer->cur++; 469169695Skan if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping) 470169695Skan { 471169695Skan CPP_OPTION (pfile, warn_dollars) = 0; 472169695Skan cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number"); 473169695Skan } 474169695Skan 475169695Skan return true; 476169695Skan } 477169695Skan 478169695Skan /* Is this a syntactically valid UCN? */ 479169695Skan if (CPP_OPTION (pfile, extended_identifiers) 480169695Skan && *buffer->cur == '\\' 481169695Skan && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U')) 482169695Skan { 483169695Skan buffer->cur += 2; 484169695Skan if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first, 485169695Skan state)) 486169695Skan return true; 487169695Skan buffer->cur -= 2; 488169695Skan } 489169695Skan 490169695Skan return false; 491169695Skan} 492169695Skan 493169695Skan/* Lex an identifier starting at BUFFER->CUR - 1. */ 494169695Skanstatic cpp_hashnode * 495169695Skanlex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn, 496169695Skan struct normalize_state *nst) 497169695Skan{ 498169695Skan cpp_hashnode *result; 499169695Skan const uchar *cur; 500169695Skan unsigned int len; 501169695Skan unsigned int hash = HT_HASHSTEP (0, *base); 502169695Skan 503169695Skan cur = pfile->buffer->cur; 504169695Skan if (! starts_ucn) 505169695Skan while (ISIDNUM (*cur)) 506169695Skan { 507169695Skan hash = HT_HASHSTEP (hash, *cur); 508169695Skan cur++; 509169695Skan } 510169695Skan pfile->buffer->cur = cur; 511169695Skan if (starts_ucn || forms_identifier_p (pfile, false, nst)) 512169695Skan { 513169695Skan /* Slower version for identifiers containing UCNs (or $). */ 514169695Skan do { 515169695Skan while (ISIDNUM (*pfile->buffer->cur)) 516169695Skan { 517169695Skan pfile->buffer->cur++; 518169695Skan NORMALIZE_STATE_UPDATE_IDNUM (nst); 519169695Skan } 520169695Skan } while (forms_identifier_p (pfile, false, nst)); 521169695Skan result = _cpp_interpret_identifier (pfile, base, 522169695Skan pfile->buffer->cur - base); 523169695Skan } 524169695Skan else 525169695Skan { 526169695Skan len = cur - base; 527169695Skan hash = HT_HASHFINISH (hash, len); 528169695Skan 529169695Skan result = (cpp_hashnode *) 530169695Skan ht_lookup_with_hash (pfile->hash_table, base, len, hash, HT_ALLOC); 531169695Skan } 532169695Skan 533169695Skan /* Rarely, identifiers require diagnostics when lexed. */ 534169695Skan if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC) 535169695Skan && !pfile->state.skipping, 0)) 536169695Skan { 537169695Skan /* It is allowed to poison the same identifier twice. */ 538169695Skan if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok) 539169695Skan cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"", 540169695Skan NODE_NAME (result)); 541169695Skan 542169695Skan /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the 543169695Skan replacement list of a variadic macro. */ 544169695Skan if (result == pfile->spec_nodes.n__VA_ARGS__ 545169695Skan && !pfile->state.va_args_ok) 546169695Skan cpp_error (pfile, CPP_DL_PEDWARN, 547169695Skan "__VA_ARGS__ can only appear in the expansion" 548169695Skan " of a C99 variadic macro"); 549169695Skan } 550169695Skan 551169695Skan return result; 552169695Skan} 553169695Skan 554169695Skan/* Lex a number to NUMBER starting at BUFFER->CUR - 1. */ 555169695Skanstatic void 556169695Skanlex_number (cpp_reader *pfile, cpp_string *number, 557169695Skan struct normalize_state *nst) 558169695Skan{ 559169695Skan const uchar *cur; 560169695Skan const uchar *base; 561169695Skan uchar *dest; 562169695Skan 563169695Skan base = pfile->buffer->cur - 1; 564169695Skan do 565169695Skan { 566169695Skan cur = pfile->buffer->cur; 567169695Skan 568169695Skan /* N.B. ISIDNUM does not include $. */ 569169695Skan while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1])) 570169695Skan { 571169695Skan cur++; 572169695Skan NORMALIZE_STATE_UPDATE_IDNUM (nst); 573169695Skan } 574169695Skan 575169695Skan pfile->buffer->cur = cur; 576169695Skan } 577169695Skan while (forms_identifier_p (pfile, false, nst)); 578169695Skan 579169695Skan number->len = cur - base; 580169695Skan dest = _cpp_unaligned_alloc (pfile, number->len + 1); 581169695Skan memcpy (dest, base, number->len); 582169695Skan dest[number->len] = '\0'; 583169695Skan number->text = dest; 584169695Skan} 585169695Skan 586169695Skan/* Create a token of type TYPE with a literal spelling. */ 587169695Skanstatic void 588169695Skancreate_literal (cpp_reader *pfile, cpp_token *token, const uchar *base, 589169695Skan unsigned int len, enum cpp_ttype type) 590169695Skan{ 591169695Skan uchar *dest = _cpp_unaligned_alloc (pfile, len + 1); 592169695Skan 593169695Skan memcpy (dest, base, len); 594169695Skan dest[len] = '\0'; 595169695Skan token->type = type; 596169695Skan token->val.str.len = len; 597169695Skan token->val.str.text = dest; 598169695Skan} 599169695Skan 600169695Skan/* Lexes a string, character constant, or angle-bracketed header file 601169695Skan name. The stored string contains the spelling, including opening 602169695Skan quote and leading any leading 'L'. It returns the type of the 603169695Skan literal, or CPP_OTHER if it was not properly terminated. 604169695Skan 605169695Skan The spelling is NUL-terminated, but it is not guaranteed that this 606169695Skan is the first NUL since embedded NULs are preserved. */ 607169695Skanstatic void 608169695Skanlex_string (cpp_reader *pfile, cpp_token *token, const uchar *base) 609169695Skan{ 610169695Skan bool saw_NUL = false; 611169695Skan const uchar *cur; 612169695Skan cppchar_t terminator; 613169695Skan enum cpp_ttype type; 614169695Skan 615169695Skan cur = base; 616169695Skan terminator = *cur++; 617169695Skan if (terminator == 'L') 618169695Skan terminator = *cur++; 619169695Skan if (terminator == '\"') 620169695Skan type = *base == 'L' ? CPP_WSTRING: CPP_STRING; 621169695Skan else if (terminator == '\'') 622169695Skan type = *base == 'L' ? CPP_WCHAR: CPP_CHAR; 623169695Skan else 624169695Skan terminator = '>', type = CPP_HEADER_NAME; 625169695Skan 626169695Skan for (;;) 627169695Skan { 628169695Skan cppchar_t c = *cur++; 629169695Skan 630169695Skan /* In #include-style directives, terminators are not escapable. */ 631169695Skan if (c == '\\' && !pfile->state.angled_headers && *cur != '\n') 632169695Skan cur++; 633169695Skan else if (c == terminator) 634169695Skan break; 635169695Skan else if (c == '\n') 636169695Skan { 637169695Skan cur--; 638169695Skan type = CPP_OTHER; 639169695Skan break; 640169695Skan } 641169695Skan else if (c == '\0') 642169695Skan saw_NUL = true; 643169695Skan } 644169695Skan 645169695Skan if (saw_NUL && !pfile->state.skipping) 646169695Skan cpp_error (pfile, CPP_DL_WARNING, 647169695Skan "null character(s) preserved in literal"); 648169695Skan 649169695Skan if (type == CPP_OTHER && CPP_OPTION (pfile, lang) != CLK_ASM) 650169695Skan cpp_error (pfile, CPP_DL_PEDWARN, "missing terminating %c character", 651169695Skan (int) terminator); 652169695Skan 653169695Skan pfile->buffer->cur = cur; 654169695Skan create_literal (pfile, token, base, cur - base, type); 655169695Skan} 656169695Skan 657169695Skan/* The stored comment includes the comment start and any terminator. */ 658169695Skanstatic void 659169695Skansave_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from, 660169695Skan cppchar_t type) 661169695Skan{ 662169695Skan unsigned char *buffer; 663169695Skan unsigned int len, clen; 664169695Skan 665169695Skan len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */ 666169695Skan 667169695Skan /* C++ comments probably (not definitely) have moved past a new 668169695Skan line, which we don't want to save in the comment. */ 669169695Skan if (is_vspace (pfile->buffer->cur[-1])) 670169695Skan len--; 671169695Skan 672169695Skan /* If we are currently in a directive, then we need to store all 673169695Skan C++ comments as C comments internally, and so we need to 674169695Skan allocate a little extra space in that case. 675169695Skan 676169695Skan Note that the only time we encounter a directive here is 677169695Skan when we are saving comments in a "#define". */ 678169695Skan clen = (pfile->state.in_directive && type == '/') ? len + 2 : len; 679169695Skan 680169695Skan buffer = _cpp_unaligned_alloc (pfile, clen); 681169695Skan 682169695Skan token->type = CPP_COMMENT; 683169695Skan token->val.str.len = clen; 684169695Skan token->val.str.text = buffer; 685169695Skan 686169695Skan buffer[0] = '/'; 687169695Skan memcpy (buffer + 1, from, len - 1); 688169695Skan 689169695Skan /* Finish conversion to a C comment, if necessary. */ 690169695Skan if (pfile->state.in_directive && type == '/') 691169695Skan { 692169695Skan buffer[1] = '*'; 693169695Skan buffer[clen - 2] = '*'; 694169695Skan buffer[clen - 1] = '/'; 695169695Skan } 696169695Skan} 697169695Skan 698169695Skan/* Allocate COUNT tokens for RUN. */ 699169695Skanvoid 700169695Skan_cpp_init_tokenrun (tokenrun *run, unsigned int count) 701169695Skan{ 702169695Skan run->base = XNEWVEC (cpp_token, count); 703169695Skan run->limit = run->base + count; 704169695Skan run->next = NULL; 705169695Skan} 706169695Skan 707169695Skan/* Returns the next tokenrun, or creates one if there is none. */ 708169695Skanstatic tokenrun * 709169695Skannext_tokenrun (tokenrun *run) 710169695Skan{ 711169695Skan if (run->next == NULL) 712169695Skan { 713169695Skan run->next = XNEW (tokenrun); 714169695Skan run->next->prev = run; 715169695Skan _cpp_init_tokenrun (run->next, 250); 716169695Skan } 717169695Skan 718169695Skan return run->next; 719169695Skan} 720169695Skan 721169695Skan/* Allocate a single token that is invalidated at the same time as the 722169695Skan rest of the tokens on the line. Has its line and col set to the 723169695Skan same as the last lexed token, so that diagnostics appear in the 724169695Skan right place. */ 725169695Skancpp_token * 726169695Skan_cpp_temp_token (cpp_reader *pfile) 727169695Skan{ 728169695Skan cpp_token *old, *result; 729169695Skan 730169695Skan old = pfile->cur_token - 1; 731169695Skan if (pfile->cur_token == pfile->cur_run->limit) 732169695Skan { 733169695Skan pfile->cur_run = next_tokenrun (pfile->cur_run); 734169695Skan pfile->cur_token = pfile->cur_run->base; 735169695Skan } 736169695Skan 737169695Skan result = pfile->cur_token++; 738169695Skan result->src_loc = old->src_loc; 739169695Skan return result; 740169695Skan} 741169695Skan 742169695Skan/* Lex a token into RESULT (external interface). Takes care of issues 743169695Skan like directive handling, token lookahead, multiple include 744169695Skan optimization and skipping. */ 745169695Skanconst cpp_token * 746169695Skan_cpp_lex_token (cpp_reader *pfile) 747169695Skan{ 748169695Skan cpp_token *result; 749169695Skan 750169695Skan for (;;) 751169695Skan { 752169695Skan if (pfile->cur_token == pfile->cur_run->limit) 753169695Skan { 754169695Skan pfile->cur_run = next_tokenrun (pfile->cur_run); 755169695Skan pfile->cur_token = pfile->cur_run->base; 756169695Skan } 757169695Skan 758169695Skan if (pfile->lookaheads) 759169695Skan { 760169695Skan pfile->lookaheads--; 761169695Skan result = pfile->cur_token++; 762169695Skan } 763169695Skan else 764169695Skan result = _cpp_lex_direct (pfile); 765169695Skan 766169695Skan if (result->flags & BOL) 767169695Skan { 768169695Skan /* Is this a directive. If _cpp_handle_directive returns 769169695Skan false, it is an assembler #. */ 770169695Skan if (result->type == CPP_HASH 771169695Skan /* 6.10.3 p 11: Directives in a list of macro arguments 772169695Skan gives undefined behavior. This implementation 773169695Skan handles the directive as normal. */ 774169695Skan && pfile->state.parsing_args != 1) 775169695Skan { 776169695Skan if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE)) 777169695Skan { 778169695Skan if (pfile->directive_result.type == CPP_PADDING) 779169695Skan continue; 780169695Skan result = &pfile->directive_result; 781169695Skan } 782169695Skan } 783169695Skan else if (pfile->state.in_deferred_pragma) 784169695Skan result = &pfile->directive_result; 785169695Skan 786169695Skan if (pfile->cb.line_change && !pfile->state.skipping) 787169695Skan pfile->cb.line_change (pfile, result, pfile->state.parsing_args); 788169695Skan } 789169695Skan 790169695Skan /* We don't skip tokens in directives. */ 791169695Skan if (pfile->state.in_directive || pfile->state.in_deferred_pragma) 792169695Skan break; 793169695Skan 794169695Skan /* Outside a directive, invalidate controlling macros. At file 795169695Skan EOF, _cpp_lex_direct takes care of popping the buffer, so we never 796169695Skan get here and MI optimization works. */ 797169695Skan pfile->mi_valid = false; 798169695Skan 799169695Skan if (!pfile->state.skipping || result->type == CPP_EOF) 800169695Skan break; 801169695Skan } 802169695Skan 803169695Skan return result; 804169695Skan} 805169695Skan 806169695Skan/* Returns true if a fresh line has been loaded. */ 807169695Skanbool 808169695Skan_cpp_get_fresh_line (cpp_reader *pfile) 809169695Skan{ 810169695Skan int return_at_eof; 811169695Skan 812169695Skan /* We can't get a new line until we leave the current directive. */ 813169695Skan if (pfile->state.in_directive) 814169695Skan return false; 815169695Skan 816169695Skan for (;;) 817169695Skan { 818169695Skan cpp_buffer *buffer = pfile->buffer; 819169695Skan 820169695Skan if (!buffer->need_line) 821169695Skan return true; 822169695Skan 823169695Skan if (buffer->next_line < buffer->rlimit) 824169695Skan { 825169695Skan _cpp_clean_line (pfile); 826169695Skan return true; 827169695Skan } 828169695Skan 829169695Skan /* First, get out of parsing arguments state. */ 830169695Skan if (pfile->state.parsing_args) 831169695Skan return false; 832169695Skan 833169695Skan /* End of buffer. Non-empty files should end in a newline. */ 834169695Skan if (buffer->buf != buffer->rlimit 835169695Skan && buffer->next_line > buffer->rlimit 836169695Skan && !buffer->from_stage3) 837169695Skan { 838169695Skan /* Only warn once. */ 839169695Skan buffer->next_line = buffer->rlimit; 840169695Skan cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, 841169695Skan CPP_BUF_COLUMN (buffer, buffer->cur), 842169695Skan "no newline at end of file"); 843169695Skan } 844169695Skan 845169695Skan return_at_eof = buffer->return_at_eof; 846169695Skan _cpp_pop_buffer (pfile); 847169695Skan if (pfile->buffer == NULL || return_at_eof) 848169695Skan return false; 849169695Skan } 850169695Skan} 851169695Skan 852169695Skan#define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \ 853169695Skan do \ 854169695Skan { \ 855169695Skan result->type = ELSE_TYPE; \ 856169695Skan if (*buffer->cur == CHAR) \ 857169695Skan buffer->cur++, result->type = THEN_TYPE; \ 858169695Skan } \ 859169695Skan while (0) 860169695Skan 861169695Skan/* Lex a token into pfile->cur_token, which is also incremented, to 862169695Skan get diagnostics pointing to the correct location. 863169695Skan 864169695Skan Does not handle issues such as token lookahead, multiple-include 865169695Skan optimization, directives, skipping etc. This function is only 866169695Skan suitable for use by _cpp_lex_token, and in special cases like 867169695Skan lex_expansion_token which doesn't care for any of these issues. 868169695Skan 869169695Skan When meeting a newline, returns CPP_EOF if parsing a directive, 870169695Skan otherwise returns to the start of the token buffer if permissible. 871169695Skan Returns the location of the lexed token. */ 872169695Skancpp_token * 873169695Skan_cpp_lex_direct (cpp_reader *pfile) 874169695Skan{ 875169695Skan cppchar_t c; 876169695Skan cpp_buffer *buffer; 877169695Skan const unsigned char *comment_start; 878169695Skan cpp_token *result = pfile->cur_token++; 879169695Skan 880169695Skan fresh_line: 881169695Skan result->flags = 0; 882169695Skan buffer = pfile->buffer; 883169695Skan if (buffer->need_line) 884169695Skan { 885169695Skan if (pfile->state.in_deferred_pragma) 886169695Skan { 887169695Skan result->type = CPP_PRAGMA_EOL; 888169695Skan pfile->state.in_deferred_pragma = false; 889169695Skan if (!pfile->state.pragma_allow_expansion) 890169695Skan pfile->state.prevent_expansion--; 891169695Skan return result; 892169695Skan } 893169695Skan if (!_cpp_get_fresh_line (pfile)) 894169695Skan { 895169695Skan result->type = CPP_EOF; 896169695Skan if (!pfile->state.in_directive) 897169695Skan { 898169695Skan /* Tell the compiler the line number of the EOF token. */ 899169695Skan result->src_loc = pfile->line_table->highest_line; 900169695Skan result->flags = BOL; 901169695Skan } 902169695Skan return result; 903169695Skan } 904169695Skan if (!pfile->keep_tokens) 905169695Skan { 906169695Skan pfile->cur_run = &pfile->base_run; 907169695Skan result = pfile->base_run.base; 908169695Skan pfile->cur_token = result + 1; 909169695Skan } 910169695Skan result->flags = BOL; 911169695Skan if (pfile->state.parsing_args == 2) 912169695Skan result->flags |= PREV_WHITE; 913169695Skan } 914169695Skan buffer = pfile->buffer; 915169695Skan update_tokens_line: 916169695Skan result->src_loc = pfile->line_table->highest_line; 917169695Skan 918169695Skan skipped_white: 919169695Skan if (buffer->cur >= buffer->notes[buffer->cur_note].pos 920169695Skan && !pfile->overlaid_buffer) 921169695Skan { 922169695Skan _cpp_process_line_notes (pfile, false); 923169695Skan result->src_loc = pfile->line_table->highest_line; 924169695Skan } 925169695Skan c = *buffer->cur++; 926169695Skan 927169695Skan LINEMAP_POSITION_FOR_COLUMN (result->src_loc, pfile->line_table, 928169695Skan CPP_BUF_COLUMN (buffer, buffer->cur)); 929169695Skan 930169695Skan switch (c) 931169695Skan { 932169695Skan case ' ': case '\t': case '\f': case '\v': case '\0': 933169695Skan result->flags |= PREV_WHITE; 934169695Skan skip_whitespace (pfile, c); 935169695Skan goto skipped_white; 936169695Skan 937169695Skan case '\n': 938169695Skan if (buffer->cur < buffer->rlimit) 939169695Skan CPP_INCREMENT_LINE (pfile, 0); 940169695Skan buffer->need_line = true; 941169695Skan goto fresh_line; 942169695Skan 943169695Skan case '0': case '1': case '2': case '3': case '4': 944169695Skan case '5': case '6': case '7': case '8': case '9': 945169695Skan { 946169695Skan struct normalize_state nst = INITIAL_NORMALIZE_STATE; 947169695Skan result->type = CPP_NUMBER; 948169695Skan lex_number (pfile, &result->val.str, &nst); 949169695Skan warn_about_normalization (pfile, result, &nst); 950169695Skan break; 951169695Skan } 952169695Skan 953169695Skan case 'L': 954169695Skan /* 'L' may introduce wide characters or strings. */ 955169695Skan if (*buffer->cur == '\'' || *buffer->cur == '"') 956169695Skan { 957169695Skan lex_string (pfile, result, buffer->cur - 1); 958169695Skan break; 959169695Skan } 960169695Skan /* Fall through. */ 961169695Skan 962169695Skan case '_': 963169695Skan case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': 964169695Skan case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': 965169695Skan case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': 966169695Skan case 's': case 't': case 'u': case 'v': case 'w': case 'x': 967169695Skan case 'y': case 'z': 968169695Skan case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': 969169695Skan case 'G': case 'H': case 'I': case 'J': case 'K': 970169695Skan case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': 971169695Skan case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': 972169695Skan case 'Y': case 'Z': 973169695Skan result->type = CPP_NAME; 974169695Skan { 975169695Skan struct normalize_state nst = INITIAL_NORMALIZE_STATE; 976169695Skan result->val.node = lex_identifier (pfile, buffer->cur - 1, false, 977169695Skan &nst); 978169695Skan warn_about_normalization (pfile, result, &nst); 979169695Skan } 980169695Skan 981169695Skan /* Convert named operators to their proper types. */ 982169695Skan if (result->val.node->flags & NODE_OPERATOR) 983169695Skan { 984169695Skan result->flags |= NAMED_OP; 985169695Skan result->type = (enum cpp_ttype) result->val.node->directive_index; 986169695Skan } 987169695Skan break; 988169695Skan 989169695Skan case '\'': 990169695Skan case '"': 991169695Skan lex_string (pfile, result, buffer->cur - 1); 992169695Skan break; 993169695Skan 994169695Skan case '/': 995169695Skan /* A potential block or line comment. */ 996169695Skan comment_start = buffer->cur; 997169695Skan c = *buffer->cur; 998169695Skan 999169695Skan if (c == '*') 1000169695Skan { 1001169695Skan if (_cpp_skip_block_comment (pfile)) 1002169695Skan cpp_error (pfile, CPP_DL_ERROR, "unterminated comment"); 1003169695Skan } 1004169695Skan else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments) 1005169695Skan || cpp_in_system_header (pfile))) 1006169695Skan { 1007169695Skan /* Warn about comments only if pedantically GNUC89, and not 1008169695Skan in system headers. */ 1009169695Skan if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile) 1010169695Skan && ! buffer->warned_cplusplus_comments) 1011169695Skan { 1012169695Skan cpp_error (pfile, CPP_DL_PEDWARN, 1013169695Skan "C++ style comments are not allowed in ISO C90"); 1014169695Skan cpp_error (pfile, CPP_DL_PEDWARN, 1015169695Skan "(this will be reported only once per input file)"); 1016169695Skan buffer->warned_cplusplus_comments = 1; 1017169695Skan } 1018169695Skan 1019169695Skan if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments)) 1020169695Skan cpp_error (pfile, CPP_DL_WARNING, "multi-line comment"); 1021169695Skan } 1022169695Skan else if (c == '=') 1023169695Skan { 1024169695Skan buffer->cur++; 1025169695Skan result->type = CPP_DIV_EQ; 1026169695Skan break; 1027169695Skan } 1028169695Skan else 1029169695Skan { 1030169695Skan result->type = CPP_DIV; 1031169695Skan break; 1032169695Skan } 1033169695Skan 1034169695Skan if (!pfile->state.save_comments) 1035169695Skan { 1036169695Skan result->flags |= PREV_WHITE; 1037169695Skan goto update_tokens_line; 1038169695Skan } 1039169695Skan 1040169695Skan /* Save the comment as a token in its own right. */ 1041169695Skan save_comment (pfile, result, comment_start, c); 1042169695Skan break; 1043169695Skan 1044169695Skan case '<': 1045169695Skan if (pfile->state.angled_headers) 1046169695Skan { 1047169695Skan lex_string (pfile, result, buffer->cur - 1); 1048169695Skan break; 1049169695Skan } 1050169695Skan 1051169695Skan result->type = CPP_LESS; 1052169695Skan if (*buffer->cur == '=') 1053169695Skan buffer->cur++, result->type = CPP_LESS_EQ; 1054169695Skan else if (*buffer->cur == '<') 1055169695Skan { 1056169695Skan buffer->cur++; 1057169695Skan IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT); 1058169695Skan } 1059169695Skan else if (CPP_OPTION (pfile, digraphs)) 1060169695Skan { 1061169695Skan if (*buffer->cur == ':') 1062169695Skan { 1063169695Skan buffer->cur++; 1064169695Skan result->flags |= DIGRAPH; 1065169695Skan result->type = CPP_OPEN_SQUARE; 1066169695Skan } 1067169695Skan else if (*buffer->cur == '%') 1068169695Skan { 1069169695Skan buffer->cur++; 1070169695Skan result->flags |= DIGRAPH; 1071169695Skan result->type = CPP_OPEN_BRACE; 1072169695Skan } 1073169695Skan } 1074169695Skan break; 1075169695Skan 1076169695Skan case '>': 1077169695Skan result->type = CPP_GREATER; 1078169695Skan if (*buffer->cur == '=') 1079169695Skan buffer->cur++, result->type = CPP_GREATER_EQ; 1080169695Skan else if (*buffer->cur == '>') 1081169695Skan { 1082169695Skan buffer->cur++; 1083169695Skan IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT); 1084169695Skan } 1085169695Skan break; 1086169695Skan 1087169695Skan case '%': 1088169695Skan result->type = CPP_MOD; 1089169695Skan if (*buffer->cur == '=') 1090169695Skan buffer->cur++, result->type = CPP_MOD_EQ; 1091169695Skan else if (CPP_OPTION (pfile, digraphs)) 1092169695Skan { 1093169695Skan if (*buffer->cur == ':') 1094169695Skan { 1095169695Skan buffer->cur++; 1096169695Skan result->flags |= DIGRAPH; 1097169695Skan result->type = CPP_HASH; 1098169695Skan if (*buffer->cur == '%' && buffer->cur[1] == ':') 1099169695Skan buffer->cur += 2, result->type = CPP_PASTE; 1100169695Skan } 1101169695Skan else if (*buffer->cur == '>') 1102169695Skan { 1103169695Skan buffer->cur++; 1104169695Skan result->flags |= DIGRAPH; 1105169695Skan result->type = CPP_CLOSE_BRACE; 1106169695Skan } 1107169695Skan } 1108169695Skan break; 1109169695Skan 1110169695Skan case '.': 1111169695Skan result->type = CPP_DOT; 1112169695Skan if (ISDIGIT (*buffer->cur)) 1113169695Skan { 1114169695Skan struct normalize_state nst = INITIAL_NORMALIZE_STATE; 1115169695Skan result->type = CPP_NUMBER; 1116169695Skan lex_number (pfile, &result->val.str, &nst); 1117169695Skan warn_about_normalization (pfile, result, &nst); 1118169695Skan } 1119169695Skan else if (*buffer->cur == '.' && buffer->cur[1] == '.') 1120169695Skan buffer->cur += 2, result->type = CPP_ELLIPSIS; 1121169695Skan else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus)) 1122169695Skan buffer->cur++, result->type = CPP_DOT_STAR; 1123169695Skan break; 1124169695Skan 1125169695Skan case '+': 1126169695Skan result->type = CPP_PLUS; 1127169695Skan if (*buffer->cur == '+') 1128169695Skan buffer->cur++, result->type = CPP_PLUS_PLUS; 1129169695Skan else if (*buffer->cur == '=') 1130169695Skan buffer->cur++, result->type = CPP_PLUS_EQ; 1131169695Skan break; 1132169695Skan 1133169695Skan case '-': 1134169695Skan result->type = CPP_MINUS; 1135169695Skan if (*buffer->cur == '>') 1136169695Skan { 1137169695Skan buffer->cur++; 1138169695Skan result->type = CPP_DEREF; 1139169695Skan if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus)) 1140169695Skan buffer->cur++, result->type = CPP_DEREF_STAR; 1141169695Skan } 1142169695Skan else if (*buffer->cur == '-') 1143169695Skan buffer->cur++, result->type = CPP_MINUS_MINUS; 1144169695Skan else if (*buffer->cur == '=') 1145169695Skan buffer->cur++, result->type = CPP_MINUS_EQ; 1146169695Skan break; 1147169695Skan 1148169695Skan case '&': 1149169695Skan result->type = CPP_AND; 1150169695Skan if (*buffer->cur == '&') 1151169695Skan buffer->cur++, result->type = CPP_AND_AND; 1152169695Skan else if (*buffer->cur == '=') 1153169695Skan buffer->cur++, result->type = CPP_AND_EQ; 1154169695Skan break; 1155169695Skan 1156169695Skan case '|': 1157169695Skan result->type = CPP_OR; 1158169695Skan if (*buffer->cur == '|') 1159169695Skan buffer->cur++, result->type = CPP_OR_OR; 1160169695Skan else if (*buffer->cur == '=') 1161169695Skan buffer->cur++, result->type = CPP_OR_EQ; 1162169695Skan break; 1163169695Skan 1164169695Skan case ':': 1165169695Skan result->type = CPP_COLON; 1166169695Skan if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus)) 1167169695Skan buffer->cur++, result->type = CPP_SCOPE; 1168169695Skan else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs)) 1169169695Skan { 1170169695Skan buffer->cur++; 1171169695Skan result->flags |= DIGRAPH; 1172169695Skan result->type = CPP_CLOSE_SQUARE; 1173169695Skan } 1174169695Skan break; 1175169695Skan 1176169695Skan case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break; 1177169695Skan case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break; 1178169695Skan case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break; 1179169695Skan case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break; 1180169695Skan case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break; 1181169695Skan 1182169695Skan case '?': result->type = CPP_QUERY; break; 1183169695Skan case '~': result->type = CPP_COMPL; break; 1184169695Skan case ',': result->type = CPP_COMMA; break; 1185169695Skan case '(': result->type = CPP_OPEN_PAREN; break; 1186169695Skan case ')': result->type = CPP_CLOSE_PAREN; break; 1187169695Skan case '[': result->type = CPP_OPEN_SQUARE; break; 1188169695Skan case ']': result->type = CPP_CLOSE_SQUARE; break; 1189169695Skan case '{': result->type = CPP_OPEN_BRACE; break; 1190169695Skan case '}': result->type = CPP_CLOSE_BRACE; break; 1191169695Skan case ';': result->type = CPP_SEMICOLON; break; 1192169695Skan 1193169695Skan /* @ is a punctuator in Objective-C. */ 1194169695Skan case '@': result->type = CPP_ATSIGN; break; 1195169695Skan 1196169695Skan case '$': 1197169695Skan case '\\': 1198169695Skan { 1199169695Skan const uchar *base = --buffer->cur; 1200169695Skan struct normalize_state nst = INITIAL_NORMALIZE_STATE; 1201169695Skan 1202169695Skan if (forms_identifier_p (pfile, true, &nst)) 1203169695Skan { 1204169695Skan result->type = CPP_NAME; 1205169695Skan result->val.node = lex_identifier (pfile, base, true, &nst); 1206169695Skan warn_about_normalization (pfile, result, &nst); 1207169695Skan break; 1208169695Skan } 1209169695Skan buffer->cur++; 1210169695Skan } 1211169695Skan 1212169695Skan default: 1213169695Skan create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER); 1214169695Skan break; 1215169695Skan } 1216169695Skan 1217169695Skan return result; 1218169695Skan} 1219169695Skan 1220169695Skan/* An upper bound on the number of bytes needed to spell TOKEN. 1221169695Skan Does not include preceding whitespace. */ 1222169695Skanunsigned int 1223169695Skancpp_token_len (const cpp_token *token) 1224169695Skan{ 1225169695Skan unsigned int len; 1226169695Skan 1227169695Skan switch (TOKEN_SPELL (token)) 1228169695Skan { 1229169695Skan default: len = 4; break; 1230169695Skan case SPELL_LITERAL: len = token->val.str.len; break; 1231169695Skan case SPELL_IDENT: len = NODE_LEN (token->val.node) * 10; break; 1232169695Skan } 1233169695Skan 1234169695Skan return len; 1235169695Skan} 1236169695Skan 1237169695Skan/* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER. 1238169695Skan Return the number of bytes read out of NAME. (There are always 1239169695Skan 10 bytes written to BUFFER.) */ 1240169695Skan 1241169695Skanstatic size_t 1242169695Skanutf8_to_ucn (unsigned char *buffer, const unsigned char *name) 1243169695Skan{ 1244169695Skan int j; 1245169695Skan int ucn_len = 0; 1246169695Skan int ucn_len_c; 1247169695Skan unsigned t; 1248169695Skan unsigned long utf32; 1249169695Skan 1250169695Skan /* Compute the length of the UTF-8 sequence. */ 1251169695Skan for (t = *name; t & 0x80; t <<= 1) 1252169695Skan ucn_len++; 1253169695Skan 1254169695Skan utf32 = *name & (0x7F >> ucn_len); 1255169695Skan for (ucn_len_c = 1; ucn_len_c < ucn_len; ucn_len_c++) 1256169695Skan { 1257169695Skan utf32 = (utf32 << 6) | (*++name & 0x3F); 1258169695Skan 1259169695Skan /* Ill-formed UTF-8. */ 1260169695Skan if ((*name & ~0x3F) != 0x80) 1261169695Skan abort (); 1262169695Skan } 1263169695Skan 1264169695Skan *buffer++ = '\\'; 1265169695Skan *buffer++ = 'U'; 1266169695Skan for (j = 7; j >= 0; j--) 1267169695Skan *buffer++ = "0123456789abcdef"[(utf32 >> (4 * j)) & 0xF]; 1268169695Skan return ucn_len; 1269169695Skan} 1270169695Skan 1271169695Skan 1272169695Skan/* Write the spelling of a token TOKEN to BUFFER. The buffer must 1273169695Skan already contain the enough space to hold the token's spelling. 1274169695Skan Returns a pointer to the character after the last character written. 1275169695Skan FORSTRING is true if this is to be the spelling after translation 1276169695Skan phase 1 (this is different for UCNs). 1277169695Skan FIXME: Would be nice if we didn't need the PFILE argument. */ 1278169695Skanunsigned char * 1279169695Skancpp_spell_token (cpp_reader *pfile, const cpp_token *token, 1280169695Skan unsigned char *buffer, bool forstring) 1281169695Skan{ 1282169695Skan switch (TOKEN_SPELL (token)) 1283169695Skan { 1284169695Skan case SPELL_OPERATOR: 1285169695Skan { 1286169695Skan const unsigned char *spelling; 1287169695Skan unsigned char c; 1288169695Skan 1289169695Skan if (token->flags & DIGRAPH) 1290169695Skan spelling 1291169695Skan = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH]; 1292169695Skan else if (token->flags & NAMED_OP) 1293169695Skan goto spell_ident; 1294169695Skan else 1295169695Skan spelling = TOKEN_NAME (token); 1296169695Skan 1297169695Skan while ((c = *spelling++) != '\0') 1298169695Skan *buffer++ = c; 1299169695Skan } 1300169695Skan break; 1301169695Skan 1302169695Skan spell_ident: 1303169695Skan case SPELL_IDENT: 1304169695Skan if (forstring) 1305169695Skan { 1306169695Skan memcpy (buffer, NODE_NAME (token->val.node), 1307169695Skan NODE_LEN (token->val.node)); 1308169695Skan buffer += NODE_LEN (token->val.node); 1309169695Skan } 1310169695Skan else 1311169695Skan { 1312169695Skan size_t i; 1313169695Skan const unsigned char * name = NODE_NAME (token->val.node); 1314169695Skan 1315169695Skan for (i = 0; i < NODE_LEN (token->val.node); i++) 1316169695Skan if (name[i] & ~0x7F) 1317169695Skan { 1318169695Skan i += utf8_to_ucn (buffer, name + i) - 1; 1319169695Skan buffer += 10; 1320169695Skan } 1321169695Skan else 1322169695Skan *buffer++ = NODE_NAME (token->val.node)[i]; 1323169695Skan } 1324169695Skan break; 1325169695Skan 1326169695Skan case SPELL_LITERAL: 1327169695Skan memcpy (buffer, token->val.str.text, token->val.str.len); 1328169695Skan buffer += token->val.str.len; 1329169695Skan break; 1330169695Skan 1331169695Skan case SPELL_NONE: 1332169695Skan cpp_error (pfile, CPP_DL_ICE, 1333169695Skan "unspellable token %s", TOKEN_NAME (token)); 1334169695Skan break; 1335169695Skan } 1336169695Skan 1337169695Skan return buffer; 1338169695Skan} 1339169695Skan 1340169695Skan/* Returns TOKEN spelt as a null-terminated string. The string is 1341169695Skan freed when the reader is destroyed. Useful for diagnostics. */ 1342169695Skanunsigned char * 1343169695Skancpp_token_as_text (cpp_reader *pfile, const cpp_token *token) 1344169695Skan{ 1345169695Skan unsigned int len = cpp_token_len (token) + 1; 1346169695Skan unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end; 1347169695Skan 1348169695Skan end = cpp_spell_token (pfile, token, start, false); 1349169695Skan end[0] = '\0'; 1350169695Skan 1351169695Skan return start; 1352169695Skan} 1353169695Skan 1354169695Skan/* Used by C front ends, which really should move to using 1355169695Skan cpp_token_as_text. */ 1356169695Skanconst char * 1357169695Skancpp_type2name (enum cpp_ttype type) 1358169695Skan{ 1359169695Skan return (const char *) token_spellings[type].name; 1360169695Skan} 1361169695Skan 1362169695Skan/* Writes the spelling of token to FP, without any preceding space. 1363169695Skan Separated from cpp_spell_token for efficiency - to avoid stdio 1364169695Skan double-buffering. */ 1365169695Skanvoid 1366169695Skancpp_output_token (const cpp_token *token, FILE *fp) 1367169695Skan{ 1368169695Skan switch (TOKEN_SPELL (token)) 1369169695Skan { 1370169695Skan case SPELL_OPERATOR: 1371169695Skan { 1372169695Skan const unsigned char *spelling; 1373169695Skan int c; 1374169695Skan 1375169695Skan if (token->flags & DIGRAPH) 1376169695Skan spelling 1377169695Skan = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH]; 1378169695Skan else if (token->flags & NAMED_OP) 1379169695Skan goto spell_ident; 1380169695Skan else 1381169695Skan spelling = TOKEN_NAME (token); 1382169695Skan 1383169695Skan c = *spelling; 1384169695Skan do 1385169695Skan putc (c, fp); 1386169695Skan while ((c = *++spelling) != '\0'); 1387169695Skan } 1388169695Skan break; 1389169695Skan 1390169695Skan spell_ident: 1391169695Skan case SPELL_IDENT: 1392169695Skan { 1393169695Skan size_t i; 1394169695Skan const unsigned char * name = NODE_NAME (token->val.node); 1395169695Skan 1396169695Skan for (i = 0; i < NODE_LEN (token->val.node); i++) 1397169695Skan if (name[i] & ~0x7F) 1398169695Skan { 1399169695Skan unsigned char buffer[10]; 1400169695Skan i += utf8_to_ucn (buffer, name + i) - 1; 1401169695Skan fwrite (buffer, 1, 10, fp); 1402169695Skan } 1403169695Skan else 1404169695Skan fputc (NODE_NAME (token->val.node)[i], fp); 1405169695Skan } 1406169695Skan break; 1407169695Skan 1408169695Skan case SPELL_LITERAL: 1409169695Skan fwrite (token->val.str.text, 1, token->val.str.len, fp); 1410169695Skan break; 1411169695Skan 1412169695Skan case SPELL_NONE: 1413169695Skan /* An error, most probably. */ 1414169695Skan break; 1415169695Skan } 1416169695Skan} 1417169695Skan 1418169695Skan/* Compare two tokens. */ 1419169695Skanint 1420169695Skan_cpp_equiv_tokens (const cpp_token *a, const cpp_token *b) 1421169695Skan{ 1422169695Skan if (a->type == b->type && a->flags == b->flags) 1423169695Skan switch (TOKEN_SPELL (a)) 1424169695Skan { 1425169695Skan default: /* Keep compiler happy. */ 1426169695Skan case SPELL_OPERATOR: 1427169695Skan return 1; 1428169695Skan case SPELL_NONE: 1429169695Skan return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no); 1430169695Skan case SPELL_IDENT: 1431169695Skan return a->val.node == b->val.node; 1432169695Skan case SPELL_LITERAL: 1433169695Skan return (a->val.str.len == b->val.str.len 1434169695Skan && !memcmp (a->val.str.text, b->val.str.text, 1435169695Skan a->val.str.len)); 1436169695Skan } 1437169695Skan 1438169695Skan return 0; 1439169695Skan} 1440169695Skan 1441169695Skan/* Returns nonzero if a space should be inserted to avoid an 1442169695Skan accidental token paste for output. For simplicity, it is 1443169695Skan conservative, and occasionally advises a space where one is not 1444169695Skan needed, e.g. "." and ".2". */ 1445169695Skanint 1446169695Skancpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1, 1447169695Skan const cpp_token *token2) 1448169695Skan{ 1449169695Skan enum cpp_ttype a = token1->type, b = token2->type; 1450169695Skan cppchar_t c; 1451169695Skan 1452169695Skan if (token1->flags & NAMED_OP) 1453169695Skan a = CPP_NAME; 1454169695Skan if (token2->flags & NAMED_OP) 1455169695Skan b = CPP_NAME; 1456169695Skan 1457169695Skan c = EOF; 1458169695Skan if (token2->flags & DIGRAPH) 1459169695Skan c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0]; 1460169695Skan else if (token_spellings[b].category == SPELL_OPERATOR) 1461169695Skan c = token_spellings[b].name[0]; 1462169695Skan 1463169695Skan /* Quickly get everything that can paste with an '='. */ 1464169695Skan if ((int) a <= (int) CPP_LAST_EQ && c == '=') 1465169695Skan return 1; 1466169695Skan 1467169695Skan switch (a) 1468169695Skan { 1469169695Skan case CPP_GREATER: return c == '>'; 1470169695Skan case CPP_LESS: return c == '<' || c == '%' || c == ':'; 1471169695Skan case CPP_PLUS: return c == '+'; 1472169695Skan case CPP_MINUS: return c == '-' || c == '>'; 1473169695Skan case CPP_DIV: return c == '/' || c == '*'; /* Comments. */ 1474169695Skan case CPP_MOD: return c == ':' || c == '>'; 1475169695Skan case CPP_AND: return c == '&'; 1476169695Skan case CPP_OR: return c == '|'; 1477169695Skan case CPP_COLON: return c == ':' || c == '>'; 1478169695Skan case CPP_DEREF: return c == '*'; 1479169695Skan case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER; 1480169695Skan case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */ 1481169695Skan case CPP_NAME: return ((b == CPP_NUMBER 1482169695Skan && name_p (pfile, &token2->val.str)) 1483169695Skan || b == CPP_NAME 1484169695Skan || b == CPP_CHAR || b == CPP_STRING); /* L */ 1485169695Skan case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME 1486169695Skan || c == '.' || c == '+' || c == '-'); 1487169695Skan /* UCNs */ 1488169695Skan case CPP_OTHER: return ((token1->val.str.text[0] == '\\' 1489169695Skan && b == CPP_NAME) 1490169695Skan || (CPP_OPTION (pfile, objc) 1491169695Skan && token1->val.str.text[0] == '@' 1492169695Skan && (b == CPP_NAME || b == CPP_STRING))); 1493169695Skan default: break; 1494169695Skan } 1495169695Skan 1496169695Skan return 0; 1497169695Skan} 1498169695Skan 1499169695Skan/* Output all the remaining tokens on the current line, and a newline 1500169695Skan character, to FP. Leading whitespace is removed. If there are 1501169695Skan macros, special token padding is not performed. */ 1502169695Skanvoid 1503169695Skancpp_output_line (cpp_reader *pfile, FILE *fp) 1504169695Skan{ 1505169695Skan const cpp_token *token; 1506169695Skan 1507169695Skan token = cpp_get_token (pfile); 1508169695Skan while (token->type != CPP_EOF) 1509169695Skan { 1510169695Skan cpp_output_token (token, fp); 1511169695Skan token = cpp_get_token (pfile); 1512169695Skan if (token->flags & PREV_WHITE) 1513169695Skan putc (' ', fp); 1514169695Skan } 1515169695Skan 1516169695Skan putc ('\n', fp); 1517169695Skan} 1518169695Skan 1519169695Skan/* Memory buffers. Changing these three constants can have a dramatic 1520169695Skan effect on performance. The values here are reasonable defaults, 1521169695Skan but might be tuned. If you adjust them, be sure to test across a 1522169695Skan range of uses of cpplib, including heavy nested function-like macro 1523169695Skan expansion. Also check the change in peak memory usage (NJAMD is a 1524169695Skan good tool for this). */ 1525169695Skan#define MIN_BUFF_SIZE 8000 1526169695Skan#define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2) 1527169695Skan#define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \ 1528169695Skan (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2) 1529169695Skan 1530169695Skan#if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0) 1531169695Skan #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE! 1532169695Skan#endif 1533169695Skan 1534169695Skan/* Create a new allocation buffer. Place the control block at the end 1535169695Skan of the buffer, so that buffer overflows will cause immediate chaos. */ 1536169695Skanstatic _cpp_buff * 1537169695Skannew_buff (size_t len) 1538169695Skan{ 1539169695Skan _cpp_buff *result; 1540169695Skan unsigned char *base; 1541169695Skan 1542169695Skan if (len < MIN_BUFF_SIZE) 1543169695Skan len = MIN_BUFF_SIZE; 1544169695Skan len = CPP_ALIGN (len); 1545169695Skan 1546169695Skan base = XNEWVEC (unsigned char, len + sizeof (_cpp_buff)); 1547169695Skan result = (_cpp_buff *) (base + len); 1548169695Skan result->base = base; 1549169695Skan result->cur = base; 1550169695Skan result->limit = base + len; 1551169695Skan result->next = NULL; 1552169695Skan return result; 1553169695Skan} 1554169695Skan 1555169695Skan/* Place a chain of unwanted allocation buffers on the free list. */ 1556169695Skanvoid 1557169695Skan_cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff) 1558169695Skan{ 1559169695Skan _cpp_buff *end = buff; 1560169695Skan 1561169695Skan while (end->next) 1562169695Skan end = end->next; 1563169695Skan end->next = pfile->free_buffs; 1564169695Skan pfile->free_buffs = buff; 1565169695Skan} 1566169695Skan 1567169695Skan/* Return a free buffer of size at least MIN_SIZE. */ 1568169695Skan_cpp_buff * 1569169695Skan_cpp_get_buff (cpp_reader *pfile, size_t min_size) 1570169695Skan{ 1571169695Skan _cpp_buff *result, **p; 1572169695Skan 1573169695Skan for (p = &pfile->free_buffs;; p = &(*p)->next) 1574169695Skan { 1575169695Skan size_t size; 1576169695Skan 1577169695Skan if (*p == NULL) 1578169695Skan return new_buff (min_size); 1579169695Skan result = *p; 1580169695Skan size = result->limit - result->base; 1581169695Skan /* Return a buffer that's big enough, but don't waste one that's 1582169695Skan way too big. */ 1583169695Skan if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size)) 1584169695Skan break; 1585169695Skan } 1586169695Skan 1587169695Skan *p = result->next; 1588169695Skan result->next = NULL; 1589169695Skan result->cur = result->base; 1590169695Skan return result; 1591169695Skan} 1592169695Skan 1593169695Skan/* Creates a new buffer with enough space to hold the uncommitted 1594169695Skan remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies 1595169695Skan the excess bytes to the new buffer. Chains the new buffer after 1596169695Skan BUFF, and returns the new buffer. */ 1597169695Skan_cpp_buff * 1598169695Skan_cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra) 1599169695Skan{ 1600169695Skan size_t size = EXTENDED_BUFF_SIZE (buff, min_extra); 1601169695Skan _cpp_buff *new_buff = _cpp_get_buff (pfile, size); 1602169695Skan 1603169695Skan buff->next = new_buff; 1604169695Skan memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff)); 1605169695Skan return new_buff; 1606169695Skan} 1607169695Skan 1608169695Skan/* Creates a new buffer with enough space to hold the uncommitted 1609169695Skan remaining bytes of the buffer pointed to by BUFF, and at least 1610169695Skan MIN_EXTRA more bytes. Copies the excess bytes to the new buffer. 1611169695Skan Chains the new buffer before the buffer pointed to by BUFF, and 1612169695Skan updates the pointer to point to the new buffer. */ 1613169695Skanvoid 1614169695Skan_cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra) 1615169695Skan{ 1616169695Skan _cpp_buff *new_buff, *old_buff = *pbuff; 1617169695Skan size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra); 1618169695Skan 1619169695Skan new_buff = _cpp_get_buff (pfile, size); 1620169695Skan memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff)); 1621169695Skan new_buff->next = old_buff; 1622169695Skan *pbuff = new_buff; 1623169695Skan} 1624169695Skan 1625169695Skan/* Free a chain of buffers starting at BUFF. */ 1626169695Skanvoid 1627169695Skan_cpp_free_buff (_cpp_buff *buff) 1628169695Skan{ 1629169695Skan _cpp_buff *next; 1630169695Skan 1631169695Skan for (; buff; buff = next) 1632169695Skan { 1633169695Skan next = buff->next; 1634169695Skan free (buff->base); 1635169695Skan } 1636169695Skan} 1637169695Skan 1638169695Skan/* Allocate permanent, unaligned storage of length LEN. */ 1639169695Skanunsigned char * 1640169695Skan_cpp_unaligned_alloc (cpp_reader *pfile, size_t len) 1641169695Skan{ 1642169695Skan _cpp_buff *buff = pfile->u_buff; 1643169695Skan unsigned char *result = buff->cur; 1644169695Skan 1645169695Skan if (len > (size_t) (buff->limit - result)) 1646169695Skan { 1647169695Skan buff = _cpp_get_buff (pfile, len); 1648169695Skan buff->next = pfile->u_buff; 1649169695Skan pfile->u_buff = buff; 1650169695Skan result = buff->cur; 1651169695Skan } 1652169695Skan 1653169695Skan buff->cur = result + len; 1654169695Skan return result; 1655169695Skan} 1656169695Skan 1657169695Skan/* Allocate permanent, unaligned storage of length LEN from a_buff. 1658169695Skan That buffer is used for growing allocations when saving macro 1659169695Skan replacement lists in a #define, and when parsing an answer to an 1660169695Skan assertion in #assert, #unassert or #if (and therefore possibly 1661169695Skan whilst expanding macros). It therefore must not be used by any 1662169695Skan code that they might call: specifically the lexer and the guts of 1663169695Skan the macro expander. 1664169695Skan 1665169695Skan All existing other uses clearly fit this restriction: storing 1666169695Skan registered pragmas during initialization. */ 1667169695Skanunsigned char * 1668169695Skan_cpp_aligned_alloc (cpp_reader *pfile, size_t len) 1669169695Skan{ 1670169695Skan _cpp_buff *buff = pfile->a_buff; 1671169695Skan unsigned char *result = buff->cur; 1672169695Skan 1673169695Skan if (len > (size_t) (buff->limit - result)) 1674169695Skan { 1675169695Skan buff = _cpp_get_buff (pfile, len); 1676169695Skan buff->next = pfile->a_buff; 1677169695Skan pfile->a_buff = buff; 1678169695Skan result = buff->cur; 1679169695Skan } 1680169695Skan 1681169695Skan buff->cur = result + len; 1682169695Skan return result; 1683169695Skan} 1684169695Skan 1685169695Skan/* Say which field of TOK is in use. */ 1686169695Skan 1687169695Skanenum cpp_token_fld_kind 1688169695Skancpp_token_val_index (cpp_token *tok) 1689169695Skan{ 1690169695Skan switch (TOKEN_SPELL (tok)) 1691169695Skan { 1692169695Skan case SPELL_IDENT: 1693169695Skan return CPP_TOKEN_FLD_NODE; 1694169695Skan case SPELL_LITERAL: 1695169695Skan return CPP_TOKEN_FLD_STR; 1696169695Skan case SPELL_NONE: 1697169695Skan if (tok->type == CPP_MACRO_ARG) 1698169695Skan return CPP_TOKEN_FLD_ARG_NO; 1699169695Skan else if (tok->type == CPP_PADDING) 1700169695Skan return CPP_TOKEN_FLD_SOURCE; 1701169695Skan else if (tok->type == CPP_PRAGMA) 1702169695Skan return CPP_TOKEN_FLD_PRAGMA; 1703169695Skan /* else fall through */ 1704169695Skan default: 1705169695Skan return CPP_TOKEN_FLD_NONE; 1706169695Skan } 1707169695Skan} 1708