1/* CPP Library - lexical analysis. 2 Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008, 2009 3 Free Software Foundation, Inc. 4 Contributed by Per Bothner, 1994-95. 5 Based on CCCP program by Paul Rubin, June 1986 6 Adapted to ANSI C, Richard Stallman, Jan 1987 7 Broken out to separate file, Zack Weinberg, Mar 2000 8 9This program is free software; you can redistribute it and/or modify it 10under the terms of the GNU General Public License as published by the 11Free Software Foundation; either version 3, or (at your option) any 12later version. 13 14This program is distributed in the hope that it will be useful, 15but WITHOUT ANY WARRANTY; without even the implied warranty of 16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17GNU General Public License for more details. 18 19You should have received a copy of the GNU General Public License 20along with this program; see the file COPYING3. If not see 21<http://www.gnu.org/licenses/>. */ 22 23#include "config.h" 24#include "system.h" 25#include "cpplib.h" 26#include "internal.h" 27 28enum spell_type 29{ 30 SPELL_OPERATOR = 0, 31 SPELL_IDENT, 32 SPELL_LITERAL, 33 SPELL_NONE 34}; 35 36struct token_spelling 37{ 38 enum spell_type category; 39 const unsigned char *name; 40}; 41 42static const unsigned char *const digraph_spellings[] = 43{ UC"%:", UC"%:%:", UC"<:", UC":>", UC"<%", UC"%>" }; 44 45#define OP(e, s) { SPELL_OPERATOR, UC s }, 46#define TK(e, s) { SPELL_ ## s, UC #e }, 47static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE }; 48#undef OP 49#undef TK 50 51#define TOKEN_SPELL(token) (token_spellings[(token)->type].category) 52#define TOKEN_NAME(token) (token_spellings[(token)->type].name) 53 54static void add_line_note (cpp_buffer *, const uchar *, unsigned int); 55static int skip_line_comment (cpp_reader *); 56static void skip_whitespace (cpp_reader *, cppchar_t); 57static void lex_string (cpp_reader *, cpp_token *, const uchar *); 58static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t); 59static void store_comment (cpp_reader *, cpp_token *); 60static void create_literal (cpp_reader *, cpp_token *, const uchar *, 61 unsigned int, enum cpp_ttype); 62static bool warn_in_comment (cpp_reader *, _cpp_line_note *); 63static int name_p (cpp_reader *, const cpp_string *); 64static tokenrun *next_tokenrun (tokenrun *); 65 66static _cpp_buff *new_buff (size_t); 67 68 69/* Utility routine: 70 71 Compares, the token TOKEN to the NUL-terminated string STRING. 72 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */ 73int 74cpp_ideq (const cpp_token *token, const char *string) 75{ 76 if (token->type != CPP_NAME) 77 return 0; 78 79 return !ustrcmp (NODE_NAME (token->val.node.node), (const uchar *) string); 80} 81 82/* Record a note TYPE at byte POS into the current cleaned logical 83 line. */ 84static void 85add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type) 86{ 87 if (buffer->notes_used == buffer->notes_cap) 88 { 89 buffer->notes_cap = buffer->notes_cap * 2 + 200; 90 buffer->notes = XRESIZEVEC (_cpp_line_note, buffer->notes, 91 buffer->notes_cap); 92 } 93 94 buffer->notes[buffer->notes_used].pos = pos; 95 buffer->notes[buffer->notes_used].type = type; 96 buffer->notes_used++; 97} 98 99/* Returns with a logical line that contains no escaped newlines or 100 trigraphs. This is a time-critical inner loop. */ 101void 102_cpp_clean_line (cpp_reader *pfile) 103{ 104 cpp_buffer *buffer; 105 const uchar *s; 106 uchar c, *d, *p; 107 108 buffer = pfile->buffer; 109 buffer->cur_note = buffer->notes_used = 0; 110 buffer->cur = buffer->line_base = buffer->next_line; 111 buffer->need_line = false; 112 s = buffer->next_line - 1; 113 114 if (!buffer->from_stage3) 115 { 116 const uchar *pbackslash = NULL; 117 118 /* Short circuit for the common case of an un-escaped line with 119 no trigraphs. The primary win here is by not writing any 120 data back to memory until we have to. */ 121 for (;;) 122 { 123 c = *++s; 124 if (__builtin_expect (c == '\n', false) 125 || __builtin_expect (c == '\r', false)) 126 { 127 d = (uchar *) s; 128 129 if (__builtin_expect (s == buffer->rlimit, false)) 130 goto done; 131 132 /* DOS line ending? */ 133 if (__builtin_expect (c == '\r', false) 134 && s[1] == '\n') 135 { 136 s++; 137 if (s == buffer->rlimit) 138 goto done; 139 } 140 141 if (__builtin_expect (pbackslash == NULL, true)) 142 goto done; 143 144 /* Check for escaped newline. */ 145 p = d; 146 while (is_nvspace (p[-1])) 147 p--; 148 if (p - 1 != pbackslash) 149 goto done; 150 151 /* Have an escaped newline; process it and proceed to 152 the slow path. */ 153 add_line_note (buffer, p - 1, p != d ? ' ' : '\\'); 154 d = p - 2; 155 buffer->next_line = p - 1; 156 break; 157 } 158 if (__builtin_expect (c == '\\', false)) 159 pbackslash = s; 160 else if (__builtin_expect (c == '?', false) 161 && __builtin_expect (s[1] == '?', false) 162 && _cpp_trigraph_map[s[2]]) 163 { 164 /* Have a trigraph. We may or may not have to convert 165 it. Add a line note regardless, for -Wtrigraphs. */ 166 add_line_note (buffer, s, s[2]); 167 if (CPP_OPTION (pfile, trigraphs)) 168 { 169 /* We do, and that means we have to switch to the 170 slow path. */ 171 d = (uchar *) s; 172 *d = _cpp_trigraph_map[s[2]]; 173 s += 2; 174 break; 175 } 176 } 177 } 178 179 180 for (;;) 181 { 182 c = *++s; 183 *++d = c; 184 185 if (c == '\n' || c == '\r') 186 { 187 /* Handle DOS line endings. */ 188 if (c == '\r' && s != buffer->rlimit && s[1] == '\n') 189 s++; 190 if (s == buffer->rlimit) 191 break; 192 193 /* Escaped? */ 194 p = d; 195 while (p != buffer->next_line && is_nvspace (p[-1])) 196 p--; 197 if (p == buffer->next_line || p[-1] != '\\') 198 break; 199 200 add_line_note (buffer, p - 1, p != d ? ' ': '\\'); 201 d = p - 2; 202 buffer->next_line = p - 1; 203 } 204 else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]]) 205 { 206 /* Add a note regardless, for the benefit of -Wtrigraphs. */ 207 add_line_note (buffer, d, s[2]); 208 if (CPP_OPTION (pfile, trigraphs)) 209 { 210 *d = _cpp_trigraph_map[s[2]]; 211 s += 2; 212 } 213 } 214 } 215 } 216 else 217 { 218 do 219 s++; 220 while (*s != '\n' && *s != '\r'); 221 d = (uchar *) s; 222 223 /* Handle DOS line endings. */ 224 if (*s == '\r' && s != buffer->rlimit && s[1] == '\n') 225 s++; 226 } 227 228 done: 229 *d = '\n'; 230 /* A sentinel note that should never be processed. */ 231 add_line_note (buffer, d + 1, '\n'); 232 buffer->next_line = s + 1; 233} 234 235/* Return true if the trigraph indicated by NOTE should be warned 236 about in a comment. */ 237static bool 238warn_in_comment (cpp_reader *pfile, _cpp_line_note *note) 239{ 240 const uchar *p; 241 242 /* Within comments we don't warn about trigraphs, unless the 243 trigraph forms an escaped newline, as that may change 244 behavior. */ 245 if (note->type != '/') 246 return false; 247 248 /* If -trigraphs, then this was an escaped newline iff the next note 249 is coincident. */ 250 if (CPP_OPTION (pfile, trigraphs)) 251 return note[1].pos == note->pos; 252 253 /* Otherwise, see if this forms an escaped newline. */ 254 p = note->pos + 3; 255 while (is_nvspace (*p)) 256 p++; 257 258 /* There might have been escaped newlines between the trigraph and the 259 newline we found. Hence the position test. */ 260 return (*p == '\n' && p < note[1].pos); 261} 262 263/* Process the notes created by add_line_note as far as the current 264 location. */ 265void 266_cpp_process_line_notes (cpp_reader *pfile, int in_comment) 267{ 268 cpp_buffer *buffer = pfile->buffer; 269 270 for (;;) 271 { 272 _cpp_line_note *note = &buffer->notes[buffer->cur_note]; 273 unsigned int col; 274 275 if (note->pos > buffer->cur) 276 break; 277 278 buffer->cur_note++; 279 col = CPP_BUF_COLUMN (buffer, note->pos + 1); 280 281 if (note->type == '\\' || note->type == ' ') 282 { 283 if (note->type == ' ' && !in_comment) 284 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col, 285 "backslash and newline separated by space"); 286 287 if (buffer->next_line > buffer->rlimit) 288 { 289 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col, 290 "backslash-newline at end of file"); 291 /* Prevent "no newline at end of file" warning. */ 292 buffer->next_line = buffer->rlimit; 293 } 294 295 buffer->line_base = note->pos; 296 CPP_INCREMENT_LINE (pfile, 0); 297 } 298 else if (_cpp_trigraph_map[note->type]) 299 { 300 if (CPP_OPTION (pfile, warn_trigraphs) 301 && (!in_comment || warn_in_comment (pfile, note))) 302 { 303 if (CPP_OPTION (pfile, trigraphs)) 304 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col, 305 "trigraph ??%c converted to %c", 306 note->type, 307 (int) _cpp_trigraph_map[note->type]); 308 else 309 { 310 cpp_error_with_line 311 (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col, 312 "trigraph ??%c ignored, use -trigraphs to enable", 313 note->type); 314 } 315 } 316 } 317 else if (note->type == 0) 318 /* Already processed in lex_raw_string. */; 319 else 320 abort (); 321 } 322} 323 324/* Skip a C-style block comment. We find the end of the comment by 325 seeing if an asterisk is before every '/' we encounter. Returns 326 nonzero if comment terminated by EOF, zero otherwise. 327 328 Buffer->cur points to the initial asterisk of the comment. */ 329bool 330_cpp_skip_block_comment (cpp_reader *pfile) 331{ 332 cpp_buffer *buffer = pfile->buffer; 333 const uchar *cur = buffer->cur; 334 uchar c; 335 336 cur++; 337 if (*cur == '/') 338 cur++; 339 340 for (;;) 341 { 342 /* People like decorating comments with '*', so check for '/' 343 instead for efficiency. */ 344 c = *cur++; 345 346 if (c == '/') 347 { 348 if (cur[-2] == '*') 349 break; 350 351 /* Warn about potential nested comments, but not if the '/' 352 comes immediately before the true comment delimiter. 353 Don't bother to get it right across escaped newlines. */ 354 if (CPP_OPTION (pfile, warn_comments) 355 && cur[0] == '*' && cur[1] != '/') 356 { 357 buffer->cur = cur; 358 cpp_error_with_line (pfile, CPP_DL_WARNING, 359 pfile->line_table->highest_line, CPP_BUF_COL (buffer), 360 "\"/*\" within comment"); 361 } 362 } 363 else if (c == '\n') 364 { 365 unsigned int cols; 366 buffer->cur = cur - 1; 367 _cpp_process_line_notes (pfile, true); 368 if (buffer->next_line >= buffer->rlimit) 369 return true; 370 _cpp_clean_line (pfile); 371 372 cols = buffer->next_line - buffer->line_base; 373 CPP_INCREMENT_LINE (pfile, cols); 374 375 cur = buffer->cur; 376 } 377 } 378 379 buffer->cur = cur; 380 _cpp_process_line_notes (pfile, true); 381 return false; 382} 383 384/* Skip a C++ line comment, leaving buffer->cur pointing to the 385 terminating newline. Handles escaped newlines. Returns nonzero 386 if a multiline comment. */ 387static int 388skip_line_comment (cpp_reader *pfile) 389{ 390 cpp_buffer *buffer = pfile->buffer; 391 source_location orig_line = pfile->line_table->highest_line; 392 393 while (*buffer->cur != '\n') 394 buffer->cur++; 395 396 _cpp_process_line_notes (pfile, true); 397 return orig_line != pfile->line_table->highest_line; 398} 399 400/* Skips whitespace, saving the next non-whitespace character. */ 401static void 402skip_whitespace (cpp_reader *pfile, cppchar_t c) 403{ 404 cpp_buffer *buffer = pfile->buffer; 405 bool saw_NUL = false; 406 407 do 408 { 409 /* Horizontal space always OK. */ 410 if (c == ' ' || c == '\t') 411 ; 412 /* Just \f \v or \0 left. */ 413 else if (c == '\0') 414 saw_NUL = true; 415 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile)) 416 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, 417 CPP_BUF_COL (buffer), 418 "%s in preprocessing directive", 419 c == '\f' ? "form feed" : "vertical tab"); 420 421 c = *buffer->cur++; 422 } 423 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */ 424 while (is_nvspace (c)); 425 426 if (saw_NUL) 427 cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored"); 428 429 buffer->cur--; 430} 431 432/* See if the characters of a number token are valid in a name (no 433 '.', '+' or '-'). */ 434static int 435name_p (cpp_reader *pfile, const cpp_string *string) 436{ 437 unsigned int i; 438 439 for (i = 0; i < string->len; i++) 440 if (!is_idchar (string->text[i])) 441 return 0; 442 443 return 1; 444} 445 446/* After parsing an identifier or other sequence, produce a warning about 447 sequences not in NFC/NFKC. */ 448static void 449warn_about_normalization (cpp_reader *pfile, 450 const cpp_token *token, 451 const struct normalize_state *s) 452{ 453 if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s) 454 && !pfile->state.skipping) 455 { 456 /* Make sure that the token is printed using UCNs, even 457 if we'd otherwise happily print UTF-8. */ 458 unsigned char *buf = XNEWVEC (unsigned char, cpp_token_len (token)); 459 size_t sz; 460 461 sz = cpp_spell_token (pfile, token, buf, false) - buf; 462 if (NORMALIZE_STATE_RESULT (s) == normalized_C) 463 cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0, 464 "`%.*s' is not in NFKC", (int) sz, buf); 465 else 466 cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0, 467 "`%.*s' is not in NFC", (int) sz, buf); 468 } 469} 470 471/* Returns TRUE if the sequence starting at buffer->cur is invalid in 472 an identifier. FIRST is TRUE if this starts an identifier. */ 473static bool 474forms_identifier_p (cpp_reader *pfile, int first, 475 struct normalize_state *state) 476{ 477 cpp_buffer *buffer = pfile->buffer; 478 479 if (*buffer->cur == '$') 480 { 481 if (!CPP_OPTION (pfile, dollars_in_ident)) 482 return false; 483 484 buffer->cur++; 485 if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping) 486 { 487 CPP_OPTION (pfile, warn_dollars) = 0; 488 cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number"); 489 } 490 491 return true; 492 } 493 494 /* Is this a syntactically valid UCN? */ 495 if (CPP_OPTION (pfile, extended_identifiers) 496 && *buffer->cur == '\\' 497 && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U')) 498 { 499 buffer->cur += 2; 500 if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first, 501 state)) 502 return true; 503 buffer->cur -= 2; 504 } 505 506 return false; 507} 508 509/* Helper function to get the cpp_hashnode of the identifier BASE. */ 510static cpp_hashnode * 511lex_identifier_intern (cpp_reader *pfile, const uchar *base) 512{ 513 cpp_hashnode *result; 514 const uchar *cur; 515 unsigned int len; 516 unsigned int hash = HT_HASHSTEP (0, *base); 517 518 cur = base + 1; 519 while (ISIDNUM (*cur)) 520 { 521 hash = HT_HASHSTEP (hash, *cur); 522 cur++; 523 } 524 len = cur - base; 525 hash = HT_HASHFINISH (hash, len); 526 result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table, 527 base, len, hash, HT_ALLOC)); 528 529 /* Rarely, identifiers require diagnostics when lexed. */ 530 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC) 531 && !pfile->state.skipping, 0)) 532 { 533 /* It is allowed to poison the same identifier twice. */ 534 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok) 535 cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"", 536 NODE_NAME (result)); 537 538 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the 539 replacement list of a variadic macro. */ 540 if (result == pfile->spec_nodes.n__VA_ARGS__ 541 && !pfile->state.va_args_ok) 542 cpp_error (pfile, CPP_DL_PEDWARN, 543 "__VA_ARGS__ can only appear in the expansion" 544 " of a C99 variadic macro"); 545 546 /* For -Wc++-compat, warn about use of C++ named operators. */ 547 if (result->flags & NODE_WARN_OPERATOR) 548 cpp_error (pfile, CPP_DL_WARNING, 549 "identifier \"%s\" is a special operator name in C++", 550 NODE_NAME (result)); 551 } 552 553 return result; 554} 555 556/* Get the cpp_hashnode of an identifier specified by NAME in 557 the current cpp_reader object. If none is found, NULL is returned. */ 558cpp_hashnode * 559_cpp_lex_identifier (cpp_reader *pfile, const char *name) 560{ 561 cpp_hashnode *result; 562 result = lex_identifier_intern (pfile, (uchar *) name); 563 return result; 564} 565 566/* Lex an identifier starting at BUFFER->CUR - 1. */ 567static cpp_hashnode * 568lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn, 569 struct normalize_state *nst) 570{ 571 cpp_hashnode *result; 572 const uchar *cur; 573 unsigned int len; 574 unsigned int hash = HT_HASHSTEP (0, *base); 575 576 cur = pfile->buffer->cur; 577 if (! starts_ucn) 578 while (ISIDNUM (*cur)) 579 { 580 hash = HT_HASHSTEP (hash, *cur); 581 cur++; 582 } 583 pfile->buffer->cur = cur; 584 if (starts_ucn || forms_identifier_p (pfile, false, nst)) 585 { 586 /* Slower version for identifiers containing UCNs (or $). */ 587 do { 588 while (ISIDNUM (*pfile->buffer->cur)) 589 { 590 pfile->buffer->cur++; 591 NORMALIZE_STATE_UPDATE_IDNUM (nst); 592 } 593 } while (forms_identifier_p (pfile, false, nst)); 594 result = _cpp_interpret_identifier (pfile, base, 595 pfile->buffer->cur - base); 596 } 597 else 598 { 599 len = cur - base; 600 hash = HT_HASHFINISH (hash, len); 601 602 result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table, 603 base, len, hash, HT_ALLOC)); 604 } 605 606 /* Rarely, identifiers require diagnostics when lexed. */ 607 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC) 608 && !pfile->state.skipping, 0)) 609 { 610 /* It is allowed to poison the same identifier twice. */ 611 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok) 612 cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"", 613 NODE_NAME (result)); 614 615 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the 616 replacement list of a variadic macro. */ 617 if (result == pfile->spec_nodes.n__VA_ARGS__ 618 && !pfile->state.va_args_ok) 619 cpp_error (pfile, CPP_DL_PEDWARN, 620 "__VA_ARGS__ can only appear in the expansion" 621 " of a C99 variadic macro"); 622 623 /* For -Wc++-compat, warn about use of C++ named operators. */ 624 if (result->flags & NODE_WARN_OPERATOR) 625 cpp_error (pfile, CPP_DL_WARNING, 626 "identifier \"%s\" is a special operator name in C++", 627 NODE_NAME (result)); 628 } 629 630 return result; 631} 632 633/* Lex a number to NUMBER starting at BUFFER->CUR - 1. */ 634static void 635lex_number (cpp_reader *pfile, cpp_string *number, 636 struct normalize_state *nst) 637{ 638 const uchar *cur; 639 const uchar *base; 640 uchar *dest; 641 642 base = pfile->buffer->cur - 1; 643 do 644 { 645 cur = pfile->buffer->cur; 646 647 /* N.B. ISIDNUM does not include $. */ 648 while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1])) 649 { 650 cur++; 651 NORMALIZE_STATE_UPDATE_IDNUM (nst); 652 } 653 654 pfile->buffer->cur = cur; 655 } 656 while (forms_identifier_p (pfile, false, nst)); 657 658 number->len = cur - base; 659 dest = _cpp_unaligned_alloc (pfile, number->len + 1); 660 memcpy (dest, base, number->len); 661 dest[number->len] = '\0'; 662 number->text = dest; 663} 664 665/* Create a token of type TYPE with a literal spelling. */ 666static void 667create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base, 668 unsigned int len, enum cpp_ttype type) 669{ 670 uchar *dest = _cpp_unaligned_alloc (pfile, len + 1); 671 672 memcpy (dest, base, len); 673 dest[len] = '\0'; 674 token->type = type; 675 token->val.str.len = len; 676 token->val.str.text = dest; 677} 678 679/* Subroutine of lex_raw_string: Append LEN chars from BASE to the buffer 680 sequence from *FIRST_BUFF_P to LAST_BUFF_P. */ 681 682static void 683bufring_append (cpp_reader *pfile, const uchar *base, size_t len, 684 _cpp_buff **first_buff_p, _cpp_buff **last_buff_p) 685{ 686 _cpp_buff *first_buff = *first_buff_p; 687 _cpp_buff *last_buff = *last_buff_p; 688 689 if (first_buff == NULL) 690 first_buff = last_buff = _cpp_get_buff (pfile, len); 691 else if (len > BUFF_ROOM (last_buff)) 692 { 693 size_t room = BUFF_ROOM (last_buff); 694 memcpy (BUFF_FRONT (last_buff), base, room); 695 BUFF_FRONT (last_buff) += room; 696 base += room; 697 len -= room; 698 last_buff = _cpp_append_extend_buff (pfile, last_buff, len); 699 } 700 701 memcpy (BUFF_FRONT (last_buff), base, len); 702 BUFF_FRONT (last_buff) += len; 703 704 *first_buff_p = first_buff; 705 *last_buff_p = last_buff; 706} 707 708/* Lexes a raw string. The stored string contains the spelling, including 709 double quotes, delimiter string, '(' and ')', any leading 710 'L', 'u', 'U' or 'u8' and 'R' modifier. It returns the type of the 711 literal, or CPP_OTHER if it was not properly terminated. 712 713 The spelling is NUL-terminated, but it is not guaranteed that this 714 is the first NUL since embedded NULs are preserved. */ 715 716static void 717lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base, 718 const uchar *cur) 719{ 720 source_location saw_NUL = 0; 721 const uchar *raw_prefix; 722 unsigned int raw_prefix_len = 0; 723 enum cpp_ttype type; 724 size_t total_len = 0; 725 _cpp_buff *first_buff = NULL, *last_buff = NULL; 726 _cpp_line_note *note = &pfile->buffer->notes[pfile->buffer->cur_note]; 727 728 type = (*base == 'L' ? CPP_WSTRING : 729 *base == 'U' ? CPP_STRING32 : 730 *base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16) 731 : CPP_STRING); 732 733 raw_prefix = cur + 1; 734 while (raw_prefix_len < 16) 735 { 736 switch (raw_prefix[raw_prefix_len]) 737 { 738 case ' ': case '(': case ')': case '\\': case '\t': 739 case '\v': case '\f': case '\n': default: 740 break; 741 /* Basic source charset except the above chars. */ 742 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': 743 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': 744 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': 745 case 's': case 't': case 'u': case 'v': case 'w': case 'x': 746 case 'y': case 'z': 747 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': 748 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': 749 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': 750 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': 751 case 'Y': case 'Z': 752 case '0': case '1': case '2': case '3': case '4': case '5': 753 case '6': case '7': case '8': case '9': 754 case '_': case '{': case '}': case '#': case '[': case ']': 755 case '<': case '>': case '%': case ':': case ';': case '.': 756 case '?': case '*': case '+': case '-': case '/': case '^': 757 case '&': case '|': case '~': case '!': case '=': case ',': 758 case '"': case '\'': 759 raw_prefix_len++; 760 continue; 761 } 762 break; 763 } 764 765 if (raw_prefix[raw_prefix_len] != '(') 766 { 767 int col = CPP_BUF_COLUMN (pfile->buffer, raw_prefix + raw_prefix_len) 768 + 1; 769 if (raw_prefix_len == 16) 770 cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, col, 771 "raw string delimiter longer than 16 characters"); 772 else 773 cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, col, 774 "invalid character '%c' in raw string delimiter", 775 (int) raw_prefix[raw_prefix_len]); 776 pfile->buffer->cur = raw_prefix - 1; 777 create_literal (pfile, token, base, raw_prefix - 1 - base, CPP_OTHER); 778 return; 779 } 780 781 cur = raw_prefix + raw_prefix_len + 1; 782 for (;;) 783 { 784#define BUF_APPEND(STR,LEN) \ 785 do { \ 786 bufring_append (pfile, (const uchar *)(STR), (LEN), \ 787 &first_buff, &last_buff); \ 788 total_len += (LEN); \ 789 } while (0); 790 791 cppchar_t c; 792 793 /* If we previously performed any trigraph or line splicing 794 transformations, undo them within the body of the raw string. */ 795 while (note->pos < cur) 796 ++note; 797 for (; note->pos == cur; ++note) 798 { 799 switch (note->type) 800 { 801 case '\\': 802 case ' ': 803 /* Restore backslash followed by newline. */ 804 BUF_APPEND (base, cur - base); 805 base = cur; 806 BUF_APPEND ("\\", 1); 807 after_backslash: 808 if (note->type == ' ') 809 { 810 /* GNU backslash whitespace newline extension. FIXME 811 could be any sequence of non-vertical space. When we 812 can properly restore any such sequence, we should mark 813 this note as handled so _cpp_process_line_notes 814 doesn't warn. */ 815 BUF_APPEND (" ", 1); 816 } 817 818 BUF_APPEND ("\n", 1); 819 break; 820 821 case 0: 822 /* Already handled. */ 823 break; 824 825 default: 826 if (_cpp_trigraph_map[note->type]) 827 { 828 /* Don't warn about this trigraph in 829 _cpp_process_line_notes, since trigraphs show up as 830 trigraphs in raw strings. */ 831 uchar type = note->type; 832 note->type = 0; 833 834 if (!CPP_OPTION (pfile, trigraphs)) 835 /* If we didn't convert the trigraph in the first 836 place, don't do anything now either. */ 837 break; 838 839 BUF_APPEND (base, cur - base); 840 base = cur; 841 BUF_APPEND ("??", 2); 842 843 /* ??/ followed by newline gets two line notes, one for 844 the trigraph and one for the backslash/newline. */ 845 if (type == '/' && note[1].pos == cur) 846 { 847 if (note[1].type != '\\' 848 && note[1].type != ' ') 849 abort (); 850 BUF_APPEND ("/", 1); 851 ++note; 852 goto after_backslash; 853 } 854 /* The ) from ??) could be part of the suffix. */ 855 else if (type == ')' 856 && strncmp ((const char *) cur+1, 857 (const char *) raw_prefix, 858 raw_prefix_len) == 0 859 && cur[raw_prefix_len+1] == '"') 860 { 861 BUF_APPEND (")", 1); 862 base++; 863 cur += raw_prefix_len + 2; 864 goto break_outer_loop; 865 } 866 else 867 { 868 /* Skip the replacement character. */ 869 base = ++cur; 870 BUF_APPEND (&type, 1); 871 } 872 } 873 else 874 abort (); 875 break; 876 } 877 } 878 c = *cur++; 879 880 if (c == ')' 881 && strncmp ((const char *) cur, (const char *) raw_prefix, 882 raw_prefix_len) == 0 883 && cur[raw_prefix_len] == '"') 884 { 885 cur += raw_prefix_len + 1; 886 break; 887 } 888 else if (c == '\n') 889 { 890 if (pfile->state.in_directive 891 || pfile->state.parsing_args 892 || pfile->state.in_deferred_pragma) 893 { 894 cur--; 895 type = CPP_OTHER; 896 cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, 0, 897 "unterminated raw string"); 898 break; 899 } 900 901 BUF_APPEND (base, cur - base); 902 903 if (pfile->buffer->cur < pfile->buffer->rlimit) 904 CPP_INCREMENT_LINE (pfile, 0); 905 pfile->buffer->need_line = true; 906 907 pfile->buffer->cur = cur-1; 908 _cpp_process_line_notes (pfile, false); 909 if (!_cpp_get_fresh_line (pfile)) 910 { 911 source_location src_loc = token->src_loc; 912 token->type = CPP_EOF; 913 /* Tell the compiler the line number of the EOF token. */ 914 token->src_loc = pfile->line_table->highest_line; 915 token->flags = BOL; 916 if (first_buff != NULL) 917 _cpp_release_buff (pfile, first_buff); 918 cpp_error_with_line (pfile, CPP_DL_ERROR, src_loc, 0, 919 "unterminated raw string"); 920 return; 921 } 922 923 cur = base = pfile->buffer->cur; 924 note = &pfile->buffer->notes[pfile->buffer->cur_note]; 925 } 926 else if (c == '\0' && !saw_NUL) 927 LINEMAP_POSITION_FOR_COLUMN (saw_NUL, pfile->line_table, 928 CPP_BUF_COLUMN (pfile->buffer, cur)); 929 } 930 break_outer_loop: 931 932 if (saw_NUL && !pfile->state.skipping) 933 cpp_error_with_line (pfile, CPP_DL_WARNING, saw_NUL, 0, 934 "null character(s) preserved in literal"); 935 936 pfile->buffer->cur = cur; 937 if (first_buff == NULL) 938 create_literal (pfile, token, base, cur - base, type); 939 else 940 { 941 uchar *dest = _cpp_unaligned_alloc (pfile, total_len + (cur - base) + 1); 942 943 token->type = type; 944 token->val.str.len = total_len + (cur - base); 945 token->val.str.text = dest; 946 last_buff = first_buff; 947 while (last_buff != NULL) 948 { 949 memcpy (dest, last_buff->base, 950 BUFF_FRONT (last_buff) - last_buff->base); 951 dest += BUFF_FRONT (last_buff) - last_buff->base; 952 last_buff = last_buff->next; 953 } 954 _cpp_release_buff (pfile, first_buff); 955 memcpy (dest, base, cur - base); 956 dest[cur - base] = '\0'; 957 } 958} 959 960/* Lexes a string, character constant, or angle-bracketed header file 961 name. The stored string contains the spelling, including opening 962 quote and any leading 'L', 'u', 'U' or 'u8' and optional 963 'R' modifier. It returns the type of the literal, or CPP_OTHER 964 if it was not properly terminated, or CPP_LESS for an unterminated 965 header name which must be relexed as normal tokens. 966 967 The spelling is NUL-terminated, but it is not guaranteed that this 968 is the first NUL since embedded NULs are preserved. */ 969static void 970lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base) 971{ 972 bool saw_NUL = false; 973 const uchar *cur; 974 cppchar_t terminator; 975 enum cpp_ttype type; 976 977 cur = base; 978 terminator = *cur++; 979 if (terminator == 'L' || terminator == 'U') 980 terminator = *cur++; 981 else if (terminator == 'u') 982 { 983 terminator = *cur++; 984 if (terminator == '8') 985 terminator = *cur++; 986 } 987 if (terminator == 'R') 988 { 989 lex_raw_string (pfile, token, base, cur); 990 return; 991 } 992 if (terminator == '"') 993 type = (*base == 'L' ? CPP_WSTRING : 994 *base == 'U' ? CPP_STRING32 : 995 *base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16) 996 : CPP_STRING); 997 else if (terminator == '\'') 998 type = (*base == 'L' ? CPP_WCHAR : 999 *base == 'U' ? CPP_CHAR32 : 1000 *base == 'u' ? CPP_CHAR16 : CPP_CHAR); 1001 else 1002 terminator = '>', type = CPP_HEADER_NAME; 1003 1004 for (;;) 1005 { 1006 cppchar_t c = *cur++; 1007 1008 /* In #include-style directives, terminators are not escapable. */ 1009 if (c == '\\' && !pfile->state.angled_headers && *cur != '\n') 1010 cur++; 1011 else if (c == terminator) 1012 break; 1013 else if (c == '\n') 1014 { 1015 cur--; 1016 /* Unmatched quotes always yield undefined behavior, but 1017 greedy lexing means that what appears to be an unterminated 1018 header name may actually be a legitimate sequence of tokens. */ 1019 if (terminator == '>') 1020 { 1021 token->type = CPP_LESS; 1022 return; 1023 } 1024 type = CPP_OTHER; 1025 break; 1026 } 1027 else if (c == '\0') 1028 saw_NUL = true; 1029 } 1030 1031 if (saw_NUL && !pfile->state.skipping) 1032 cpp_error (pfile, CPP_DL_WARNING, 1033 "null character(s) preserved in literal"); 1034 1035 if (type == CPP_OTHER && CPP_OPTION (pfile, lang) != CLK_ASM) 1036 cpp_error (pfile, CPP_DL_PEDWARN, "missing terminating %c character", 1037 (int) terminator); 1038 1039 pfile->buffer->cur = cur; 1040 create_literal (pfile, token, base, cur - base, type); 1041} 1042 1043/* Return the comment table. The client may not make any assumption 1044 about the ordering of the table. */ 1045cpp_comment_table * 1046cpp_get_comments (cpp_reader *pfile) 1047{ 1048 return &pfile->comments; 1049} 1050 1051/* Append a comment to the end of the comment table. */ 1052static void 1053store_comment (cpp_reader *pfile, cpp_token *token) 1054{ 1055 int len; 1056 1057 if (pfile->comments.allocated == 0) 1058 { 1059 pfile->comments.allocated = 256; 1060 pfile->comments.entries = (cpp_comment *) xmalloc 1061 (pfile->comments.allocated * sizeof (cpp_comment)); 1062 } 1063 1064 if (pfile->comments.count == pfile->comments.allocated) 1065 { 1066 pfile->comments.allocated *= 2; 1067 pfile->comments.entries = (cpp_comment *) xrealloc 1068 (pfile->comments.entries, 1069 pfile->comments.allocated * sizeof (cpp_comment)); 1070 } 1071 1072 len = token->val.str.len; 1073 1074 /* Copy comment. Note, token may not be NULL terminated. */ 1075 pfile->comments.entries[pfile->comments.count].comment = 1076 (char *) xmalloc (sizeof (char) * (len + 1)); 1077 memcpy (pfile->comments.entries[pfile->comments.count].comment, 1078 token->val.str.text, len); 1079 pfile->comments.entries[pfile->comments.count].comment[len] = '\0'; 1080 1081 /* Set source location. */ 1082 pfile->comments.entries[pfile->comments.count].sloc = token->src_loc; 1083 1084 /* Increment the count of entries in the comment table. */ 1085 pfile->comments.count++; 1086} 1087 1088/* The stored comment includes the comment start and any terminator. */ 1089static void 1090save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from, 1091 cppchar_t type) 1092{ 1093 unsigned char *buffer; 1094 unsigned int len, clen; 1095 int convert_to_c = (pfile->state.in_directive || pfile->state.collecting_args) 1096 && type == '/'; 1097 1098 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */ 1099 1100 /* C++ comments probably (not definitely) have moved past a new 1101 line, which we don't want to save in the comment. */ 1102 if (is_vspace (pfile->buffer->cur[-1])) 1103 len--; 1104 1105 /* If we are currently in a directive, then we need to store all 1106 C++ comments as C comments internally, and so we need to 1107 allocate a little extra space in that case. 1108 1109 Note that the only time we encounter a directive here is 1110 when we are saving comments in a "#define". */ 1111 clen = convert_to_c ? len + 2 : len; 1112 1113 buffer = _cpp_unaligned_alloc (pfile, clen); 1114 1115 token->type = CPP_COMMENT; 1116 token->val.str.len = clen; 1117 token->val.str.text = buffer; 1118 1119 buffer[0] = '/'; 1120 memcpy (buffer + 1, from, len - 1); 1121 1122 /* Finish conversion to a C comment, if necessary. */ 1123 if (convert_to_c) 1124 { 1125 buffer[1] = '*'; 1126 buffer[clen - 2] = '*'; 1127 buffer[clen - 1] = '/'; 1128 } 1129 1130 /* Finally store this comment for use by clients of libcpp. */ 1131 store_comment (pfile, token); 1132} 1133 1134/* Allocate COUNT tokens for RUN. */ 1135void 1136_cpp_init_tokenrun (tokenrun *run, unsigned int count) 1137{ 1138 run->base = XNEWVEC (cpp_token, count); 1139 run->limit = run->base + count; 1140 run->next = NULL; 1141} 1142 1143/* Returns the next tokenrun, or creates one if there is none. */ 1144static tokenrun * 1145next_tokenrun (tokenrun *run) 1146{ 1147 if (run->next == NULL) 1148 { 1149 run->next = XNEW (tokenrun); 1150 run->next->prev = run; 1151 _cpp_init_tokenrun (run->next, 250); 1152 } 1153 1154 return run->next; 1155} 1156 1157/* Look ahead in the input stream. */ 1158const cpp_token * 1159cpp_peek_token (cpp_reader *pfile, int index) 1160{ 1161 cpp_context *context = pfile->context; 1162 const cpp_token *peektok; 1163 int count; 1164 1165 /* First, scan through any pending cpp_context objects. */ 1166 while (context->prev) 1167 { 1168 ptrdiff_t sz = (context->direct_p 1169 ? LAST (context).token - FIRST (context).token 1170 : LAST (context).ptoken - FIRST (context).ptoken); 1171 1172 if (index < (int) sz) 1173 return (context->direct_p 1174 ? FIRST (context).token + index 1175 : *(FIRST (context).ptoken + index)); 1176 1177 index -= (int) sz; 1178 context = context->prev; 1179 } 1180 1181 /* We will have to read some new tokens after all (and do so 1182 without invalidating preceding tokens). */ 1183 count = index; 1184 pfile->keep_tokens++; 1185 1186 do 1187 { 1188 peektok = _cpp_lex_token (pfile); 1189 if (peektok->type == CPP_EOF) 1190 return peektok; 1191 } 1192 while (index--); 1193 1194 _cpp_backup_tokens_direct (pfile, count + 1); 1195 pfile->keep_tokens--; 1196 1197 return peektok; 1198} 1199 1200/* Allocate a single token that is invalidated at the same time as the 1201 rest of the tokens on the line. Has its line and col set to the 1202 same as the last lexed token, so that diagnostics appear in the 1203 right place. */ 1204cpp_token * 1205_cpp_temp_token (cpp_reader *pfile) 1206{ 1207 cpp_token *old, *result; 1208 ptrdiff_t sz = pfile->cur_run->limit - pfile->cur_token; 1209 ptrdiff_t la = (ptrdiff_t) pfile->lookaheads; 1210 1211 old = pfile->cur_token - 1; 1212 /* Any pre-existing lookaheads must not be clobbered. */ 1213 if (la) 1214 { 1215 if (sz <= la) 1216 { 1217 tokenrun *next = next_tokenrun (pfile->cur_run); 1218 1219 if (sz < la) 1220 memmove (next->base + 1, next->base, 1221 (la - sz) * sizeof (cpp_token)); 1222 1223 next->base[0] = pfile->cur_run->limit[-1]; 1224 } 1225 1226 if (sz > 1) 1227 memmove (pfile->cur_token + 1, pfile->cur_token, 1228 MIN (la, sz - 1) * sizeof (cpp_token)); 1229 } 1230 1231 if (!sz && pfile->cur_token == pfile->cur_run->limit) 1232 { 1233 pfile->cur_run = next_tokenrun (pfile->cur_run); 1234 pfile->cur_token = pfile->cur_run->base; 1235 } 1236 1237 result = pfile->cur_token++; 1238 result->src_loc = old->src_loc; 1239 return result; 1240} 1241 1242/* Lex a token into RESULT (external interface). Takes care of issues 1243 like directive handling, token lookahead, multiple include 1244 optimization and skipping. */ 1245const cpp_token * 1246_cpp_lex_token (cpp_reader *pfile) 1247{ 1248 cpp_token *result; 1249 1250 for (;;) 1251 { 1252 if (pfile->cur_token == pfile->cur_run->limit) 1253 { 1254 pfile->cur_run = next_tokenrun (pfile->cur_run); 1255 pfile->cur_token = pfile->cur_run->base; 1256 } 1257 /* We assume that the current token is somewhere in the current 1258 run. */ 1259 if (pfile->cur_token < pfile->cur_run->base 1260 || pfile->cur_token >= pfile->cur_run->limit) 1261 abort (); 1262 1263 if (pfile->lookaheads) 1264 { 1265 pfile->lookaheads--; 1266 result = pfile->cur_token++; 1267 } 1268 else 1269 result = _cpp_lex_direct (pfile); 1270 1271 if (result->flags & BOL) 1272 { 1273 /* Is this a directive. If _cpp_handle_directive returns 1274 false, it is an assembler #. */ 1275 if (result->type == CPP_HASH 1276 /* 6.10.3 p 11: Directives in a list of macro arguments 1277 gives undefined behavior. This implementation 1278 handles the directive as normal. */ 1279 && pfile->state.parsing_args != 1) 1280 { 1281 if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE)) 1282 { 1283 if (pfile->directive_result.type == CPP_PADDING) 1284 continue; 1285 result = &pfile->directive_result; 1286 } 1287 } 1288 else if (pfile->state.in_deferred_pragma) 1289 result = &pfile->directive_result; 1290 1291 if (pfile->cb.line_change && !pfile->state.skipping) 1292 pfile->cb.line_change (pfile, result, pfile->state.parsing_args); 1293 } 1294 1295 /* We don't skip tokens in directives. */ 1296 if (pfile->state.in_directive || pfile->state.in_deferred_pragma) 1297 break; 1298 1299 /* Outside a directive, invalidate controlling macros. At file 1300 EOF, _cpp_lex_direct takes care of popping the buffer, so we never 1301 get here and MI optimization works. */ 1302 pfile->mi_valid = false; 1303 1304 if (!pfile->state.skipping || result->type == CPP_EOF) 1305 break; 1306 } 1307 1308 return result; 1309} 1310 1311/* Returns true if a fresh line has been loaded. */ 1312bool 1313_cpp_get_fresh_line (cpp_reader *pfile) 1314{ 1315 int return_at_eof; 1316 1317 /* We can't get a new line until we leave the current directive. */ 1318 if (pfile->state.in_directive) 1319 return false; 1320 1321 for (;;) 1322 { 1323 cpp_buffer *buffer = pfile->buffer; 1324 1325 if (!buffer->need_line) 1326 return true; 1327 1328 if (buffer->next_line < buffer->rlimit) 1329 { 1330 _cpp_clean_line (pfile); 1331 return true; 1332 } 1333 1334 /* First, get out of parsing arguments state. */ 1335 if (pfile->state.parsing_args) 1336 return false; 1337 1338 /* End of buffer. Non-empty files should end in a newline. */ 1339 if (buffer->buf != buffer->rlimit 1340 && buffer->next_line > buffer->rlimit 1341 && !buffer->from_stage3) 1342 { 1343 /* Clip to buffer size. */ 1344 buffer->next_line = buffer->rlimit; 1345 } 1346 1347 return_at_eof = buffer->return_at_eof; 1348 _cpp_pop_buffer (pfile); 1349 if (pfile->buffer == NULL || return_at_eof) 1350 return false; 1351 } 1352} 1353 1354#define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \ 1355 do \ 1356 { \ 1357 result->type = ELSE_TYPE; \ 1358 if (*buffer->cur == CHAR) \ 1359 buffer->cur++, result->type = THEN_TYPE; \ 1360 } \ 1361 while (0) 1362 1363/* Lex a token into pfile->cur_token, which is also incremented, to 1364 get diagnostics pointing to the correct location. 1365 1366 Does not handle issues such as token lookahead, multiple-include 1367 optimization, directives, skipping etc. This function is only 1368 suitable for use by _cpp_lex_token, and in special cases like 1369 lex_expansion_token which doesn't care for any of these issues. 1370 1371 When meeting a newline, returns CPP_EOF if parsing a directive, 1372 otherwise returns to the start of the token buffer if permissible. 1373 Returns the location of the lexed token. */ 1374cpp_token * 1375_cpp_lex_direct (cpp_reader *pfile) 1376{ 1377 cppchar_t c; 1378 cpp_buffer *buffer; 1379 const unsigned char *comment_start; 1380 cpp_token *result = pfile->cur_token++; 1381 1382 fresh_line: 1383 result->flags = 0; 1384 buffer = pfile->buffer; 1385 if (buffer->need_line) 1386 { 1387 if (pfile->state.in_deferred_pragma) 1388 { 1389 result->type = CPP_PRAGMA_EOL; 1390 pfile->state.in_deferred_pragma = false; 1391 if (!pfile->state.pragma_allow_expansion) 1392 pfile->state.prevent_expansion--; 1393 return result; 1394 } 1395 if (!_cpp_get_fresh_line (pfile)) 1396 { 1397 result->type = CPP_EOF; 1398 if (!pfile->state.in_directive) 1399 { 1400 /* Tell the compiler the line number of the EOF token. */ 1401 result->src_loc = pfile->line_table->highest_line; 1402 result->flags = BOL; 1403 } 1404 return result; 1405 } 1406 if (!pfile->keep_tokens) 1407 { 1408 pfile->cur_run = &pfile->base_run; 1409 result = pfile->base_run.base; 1410 pfile->cur_token = result + 1; 1411 } 1412 result->flags = BOL; 1413 if (pfile->state.parsing_args == 2) 1414 result->flags |= PREV_WHITE; 1415 } 1416 buffer = pfile->buffer; 1417 update_tokens_line: 1418 result->src_loc = pfile->line_table->highest_line; 1419 1420 skipped_white: 1421 if (buffer->cur >= buffer->notes[buffer->cur_note].pos 1422 && !pfile->overlaid_buffer) 1423 { 1424 _cpp_process_line_notes (pfile, false); 1425 result->src_loc = pfile->line_table->highest_line; 1426 } 1427 c = *buffer->cur++; 1428 1429 LINEMAP_POSITION_FOR_COLUMN (result->src_loc, pfile->line_table, 1430 CPP_BUF_COLUMN (buffer, buffer->cur)); 1431 1432 switch (c) 1433 { 1434 case ' ': case '\t': case '\f': case '\v': case '\0': 1435 result->flags |= PREV_WHITE; 1436 skip_whitespace (pfile, c); 1437 goto skipped_white; 1438 1439 case '\n': 1440 if (buffer->cur < buffer->rlimit) 1441 CPP_INCREMENT_LINE (pfile, 0); 1442 buffer->need_line = true; 1443 goto fresh_line; 1444 1445 case '0': case '1': case '2': case '3': case '4': 1446 case '5': case '6': case '7': case '8': case '9': 1447 { 1448 struct normalize_state nst = INITIAL_NORMALIZE_STATE; 1449 result->type = CPP_NUMBER; 1450 lex_number (pfile, &result->val.str, &nst); 1451 warn_about_normalization (pfile, result, &nst); 1452 break; 1453 } 1454 1455 case 'L': 1456 case 'u': 1457 case 'U': 1458 case 'R': 1459 /* 'L', 'u', 'U', 'u8' or 'R' may introduce wide characters, 1460 wide strings or raw strings. */ 1461 if (c == 'L' || CPP_OPTION (pfile, uliterals)) 1462 { 1463 if ((*buffer->cur == '\'' && c != 'R') 1464 || *buffer->cur == '"' 1465 || (*buffer->cur == 'R' 1466 && c != 'R' 1467 && buffer->cur[1] == '"' 1468 && CPP_OPTION (pfile, uliterals)) 1469 || (*buffer->cur == '8' 1470 && c == 'u' 1471 && (buffer->cur[1] == '"' 1472 || (buffer->cur[1] == 'R' && buffer->cur[2] == '"')))) 1473 { 1474 lex_string (pfile, result, buffer->cur - 1); 1475 break; 1476 } 1477 } 1478 /* Fall through. */ 1479 1480 case '_': 1481 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': 1482 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': 1483 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': 1484 case 's': case 't': case 'v': case 'w': case 'x': 1485 case 'y': case 'z': 1486 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': 1487 case 'G': case 'H': case 'I': case 'J': case 'K': 1488 case 'M': case 'N': case 'O': case 'P': case 'Q': 1489 case 'S': case 'T': case 'V': case 'W': case 'X': 1490 case 'Y': case 'Z': 1491 result->type = CPP_NAME; 1492 { 1493 struct normalize_state nst = INITIAL_NORMALIZE_STATE; 1494 result->val.node.node = lex_identifier (pfile, buffer->cur - 1, false, 1495 &nst); 1496 warn_about_normalization (pfile, result, &nst); 1497 } 1498 1499 /* Convert named operators to their proper types. */ 1500 if (result->val.node.node->flags & NODE_OPERATOR) 1501 { 1502 result->flags |= NAMED_OP; 1503 result->type = (enum cpp_ttype) result->val.node.node->directive_index; 1504 } 1505 break; 1506 1507 case '\'': 1508 case '"': 1509 lex_string (pfile, result, buffer->cur - 1); 1510 break; 1511 1512 case '/': 1513 /* A potential block or line comment. */ 1514 comment_start = buffer->cur; 1515 c = *buffer->cur; 1516 1517 if (c == '*') 1518 { 1519 if (_cpp_skip_block_comment (pfile)) 1520 cpp_error (pfile, CPP_DL_ERROR, "unterminated comment"); 1521 } 1522 else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments) 1523 || cpp_in_system_header (pfile))) 1524 { 1525 /* Warn about comments only if pedantically GNUC89, and not 1526 in system headers. */ 1527 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile) 1528 && ! buffer->warned_cplusplus_comments) 1529 { 1530 cpp_error (pfile, CPP_DL_PEDWARN, 1531 "C++ style comments are not allowed in ISO C90"); 1532 cpp_error (pfile, CPP_DL_PEDWARN, 1533 "(this will be reported only once per input file)"); 1534 buffer->warned_cplusplus_comments = 1; 1535 } 1536 1537 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments)) 1538 cpp_error (pfile, CPP_DL_WARNING, "multi-line comment"); 1539 } 1540 else if (c == '=') 1541 { 1542 buffer->cur++; 1543 result->type = CPP_DIV_EQ; 1544 break; 1545 } 1546 else 1547 { 1548 result->type = CPP_DIV; 1549 break; 1550 } 1551 1552 if (!pfile->state.save_comments) 1553 { 1554 result->flags |= PREV_WHITE; 1555 goto update_tokens_line; 1556 } 1557 1558 /* Save the comment as a token in its own right. */ 1559 save_comment (pfile, result, comment_start, c); 1560 break; 1561 1562 case '<': 1563 if (pfile->state.angled_headers) 1564 { 1565 lex_string (pfile, result, buffer->cur - 1); 1566 if (result->type != CPP_LESS) 1567 break; 1568 } 1569 1570 result->type = CPP_LESS; 1571 if (*buffer->cur == '=') 1572 buffer->cur++, result->type = CPP_LESS_EQ; 1573 else if (*buffer->cur == '<') 1574 { 1575 buffer->cur++; 1576 IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT); 1577 } 1578 else if (CPP_OPTION (pfile, digraphs)) 1579 { 1580 if (*buffer->cur == ':') 1581 { 1582 buffer->cur++; 1583 result->flags |= DIGRAPH; 1584 result->type = CPP_OPEN_SQUARE; 1585 } 1586 else if (*buffer->cur == '%') 1587 { 1588 buffer->cur++; 1589 result->flags |= DIGRAPH; 1590 result->type = CPP_OPEN_BRACE; 1591 } 1592 } 1593 break; 1594 1595 case '>': 1596 result->type = CPP_GREATER; 1597 if (*buffer->cur == '=') 1598 buffer->cur++, result->type = CPP_GREATER_EQ; 1599 else if (*buffer->cur == '>') 1600 { 1601 buffer->cur++; 1602 IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT); 1603 } 1604 break; 1605 1606 case '%': 1607 result->type = CPP_MOD; 1608 if (*buffer->cur == '=') 1609 buffer->cur++, result->type = CPP_MOD_EQ; 1610 else if (CPP_OPTION (pfile, digraphs)) 1611 { 1612 if (*buffer->cur == ':') 1613 { 1614 buffer->cur++; 1615 result->flags |= DIGRAPH; 1616 result->type = CPP_HASH; 1617 if (*buffer->cur == '%' && buffer->cur[1] == ':') 1618 buffer->cur += 2, result->type = CPP_PASTE, result->val.token_no = 0; 1619 } 1620 else if (*buffer->cur == '>') 1621 { 1622 buffer->cur++; 1623 result->flags |= DIGRAPH; 1624 result->type = CPP_CLOSE_BRACE; 1625 } 1626 } 1627 break; 1628 1629 case '.': 1630 result->type = CPP_DOT; 1631 if (ISDIGIT (*buffer->cur)) 1632 { 1633 struct normalize_state nst = INITIAL_NORMALIZE_STATE; 1634 result->type = CPP_NUMBER; 1635 lex_number (pfile, &result->val.str, &nst); 1636 warn_about_normalization (pfile, result, &nst); 1637 } 1638 else if (*buffer->cur == '.' && buffer->cur[1] == '.') 1639 buffer->cur += 2, result->type = CPP_ELLIPSIS; 1640 else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus)) 1641 buffer->cur++, result->type = CPP_DOT_STAR; 1642 break; 1643 1644 case '+': 1645 result->type = CPP_PLUS; 1646 if (*buffer->cur == '+') 1647 buffer->cur++, result->type = CPP_PLUS_PLUS; 1648 else if (*buffer->cur == '=') 1649 buffer->cur++, result->type = CPP_PLUS_EQ; 1650 break; 1651 1652 case '-': 1653 result->type = CPP_MINUS; 1654 if (*buffer->cur == '>') 1655 { 1656 buffer->cur++; 1657 result->type = CPP_DEREF; 1658 if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus)) 1659 buffer->cur++, result->type = CPP_DEREF_STAR; 1660 } 1661 else if (*buffer->cur == '-') 1662 buffer->cur++, result->type = CPP_MINUS_MINUS; 1663 else if (*buffer->cur == '=') 1664 buffer->cur++, result->type = CPP_MINUS_EQ; 1665 break; 1666 1667 case '&': 1668 result->type = CPP_AND; 1669 if (*buffer->cur == '&') 1670 buffer->cur++, result->type = CPP_AND_AND; 1671 else if (*buffer->cur == '=') 1672 buffer->cur++, result->type = CPP_AND_EQ; 1673 break; 1674 1675 case '|': 1676 result->type = CPP_OR; 1677 if (*buffer->cur == '|') 1678 buffer->cur++, result->type = CPP_OR_OR; 1679 else if (*buffer->cur == '=') 1680 buffer->cur++, result->type = CPP_OR_EQ; 1681 break; 1682 1683 case ':': 1684 result->type = CPP_COLON; 1685 if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus)) 1686 buffer->cur++, result->type = CPP_SCOPE; 1687 else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs)) 1688 { 1689 buffer->cur++; 1690 result->flags |= DIGRAPH; 1691 result->type = CPP_CLOSE_SQUARE; 1692 } 1693 break; 1694 1695 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break; 1696 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break; 1697 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break; 1698 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break; 1699 case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); result->val.token_no = 0; break; 1700 1701 case '?': result->type = CPP_QUERY; break; 1702 case '~': result->type = CPP_COMPL; break; 1703 case ',': result->type = CPP_COMMA; break; 1704 case '(': result->type = CPP_OPEN_PAREN; break; 1705 case ')': result->type = CPP_CLOSE_PAREN; break; 1706 case '[': result->type = CPP_OPEN_SQUARE; break; 1707 case ']': result->type = CPP_CLOSE_SQUARE; break; 1708 case '{': result->type = CPP_OPEN_BRACE; break; 1709 case '}': result->type = CPP_CLOSE_BRACE; break; 1710 case ';': result->type = CPP_SEMICOLON; break; 1711 1712 /* @ is a punctuator in Objective-C. */ 1713 case '@': result->type = CPP_ATSIGN; break; 1714 1715 case '$': 1716 case '\\': 1717 { 1718 const uchar *base = --buffer->cur; 1719 struct normalize_state nst = INITIAL_NORMALIZE_STATE; 1720 1721 if (forms_identifier_p (pfile, true, &nst)) 1722 { 1723 result->type = CPP_NAME; 1724 result->val.node.node = lex_identifier (pfile, base, true, &nst); 1725 warn_about_normalization (pfile, result, &nst); 1726 break; 1727 } 1728 buffer->cur++; 1729 } 1730 1731 default: 1732 create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER); 1733 break; 1734 } 1735 1736 return result; 1737} 1738 1739/* An upper bound on the number of bytes needed to spell TOKEN. 1740 Does not include preceding whitespace. */ 1741unsigned int 1742cpp_token_len (const cpp_token *token) 1743{ 1744 unsigned int len; 1745 1746 switch (TOKEN_SPELL (token)) 1747 { 1748 default: len = 6; break; 1749 case SPELL_LITERAL: len = token->val.str.len; break; 1750 case SPELL_IDENT: len = NODE_LEN (token->val.node.node) * 10; break; 1751 } 1752 1753 return len; 1754} 1755 1756/* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER. 1757 Return the number of bytes read out of NAME. (There are always 1758 10 bytes written to BUFFER.) */ 1759 1760static size_t 1761utf8_to_ucn (unsigned char *buffer, const unsigned char *name) 1762{ 1763 int j; 1764 int ucn_len = 0; 1765 int ucn_len_c; 1766 unsigned t; 1767 unsigned long utf32; 1768 1769 /* Compute the length of the UTF-8 sequence. */ 1770 for (t = *name; t & 0x80; t <<= 1) 1771 ucn_len++; 1772 1773 utf32 = *name & (0x7F >> ucn_len); 1774 for (ucn_len_c = 1; ucn_len_c < ucn_len; ucn_len_c++) 1775 { 1776 utf32 = (utf32 << 6) | (*++name & 0x3F); 1777 1778 /* Ill-formed UTF-8. */ 1779 if ((*name & ~0x3F) != 0x80) 1780 abort (); 1781 } 1782 1783 *buffer++ = '\\'; 1784 *buffer++ = 'U'; 1785 for (j = 7; j >= 0; j--) 1786 *buffer++ = "0123456789abcdef"[(utf32 >> (4 * j)) & 0xF]; 1787 return ucn_len; 1788} 1789 1790/* Given a token TYPE corresponding to a digraph, return a pointer to 1791 the spelling of the digraph. */ 1792static const unsigned char * 1793cpp_digraph2name (enum cpp_ttype type) 1794{ 1795 return digraph_spellings[(int) type - (int) CPP_FIRST_DIGRAPH]; 1796} 1797 1798/* Write the spelling of a token TOKEN to BUFFER. The buffer must 1799 already contain the enough space to hold the token's spelling. 1800 Returns a pointer to the character after the last character written. 1801 FORSTRING is true if this is to be the spelling after translation 1802 phase 1 (this is different for UCNs). 1803 FIXME: Would be nice if we didn't need the PFILE argument. */ 1804unsigned char * 1805cpp_spell_token (cpp_reader *pfile, const cpp_token *token, 1806 unsigned char *buffer, bool forstring) 1807{ 1808 switch (TOKEN_SPELL (token)) 1809 { 1810 case SPELL_OPERATOR: 1811 { 1812 const unsigned char *spelling; 1813 unsigned char c; 1814 1815 if (token->flags & DIGRAPH) 1816 spelling = cpp_digraph2name (token->type); 1817 else if (token->flags & NAMED_OP) 1818 goto spell_ident; 1819 else 1820 spelling = TOKEN_NAME (token); 1821 1822 while ((c = *spelling++) != '\0') 1823 *buffer++ = c; 1824 } 1825 break; 1826 1827 spell_ident: 1828 case SPELL_IDENT: 1829 if (forstring) 1830 { 1831 memcpy (buffer, NODE_NAME (token->val.node.node), 1832 NODE_LEN (token->val.node.node)); 1833 buffer += NODE_LEN (token->val.node.node); 1834 } 1835 else 1836 { 1837 size_t i; 1838 const unsigned char * name = NODE_NAME (token->val.node.node); 1839 1840 for (i = 0; i < NODE_LEN (token->val.node.node); i++) 1841 if (name[i] & ~0x7F) 1842 { 1843 i += utf8_to_ucn (buffer, name + i) - 1; 1844 buffer += 10; 1845 } 1846 else 1847 *buffer++ = NODE_NAME (token->val.node.node)[i]; 1848 } 1849 break; 1850 1851 case SPELL_LITERAL: 1852 memcpy (buffer, token->val.str.text, token->val.str.len); 1853 buffer += token->val.str.len; 1854 break; 1855 1856 case SPELL_NONE: 1857 cpp_error (pfile, CPP_DL_ICE, 1858 "unspellable token %s", TOKEN_NAME (token)); 1859 break; 1860 } 1861 1862 return buffer; 1863} 1864 1865/* Returns TOKEN spelt as a null-terminated string. The string is 1866 freed when the reader is destroyed. Useful for diagnostics. */ 1867unsigned char * 1868cpp_token_as_text (cpp_reader *pfile, const cpp_token *token) 1869{ 1870 unsigned int len = cpp_token_len (token) + 1; 1871 unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end; 1872 1873 end = cpp_spell_token (pfile, token, start, false); 1874 end[0] = '\0'; 1875 1876 return start; 1877} 1878 1879/* Returns a pointer to a string which spells the token defined by 1880 TYPE and FLAGS. Used by C front ends, which really should move to 1881 using cpp_token_as_text. */ 1882const char * 1883cpp_type2name (enum cpp_ttype type, unsigned char flags) 1884{ 1885 if (flags & DIGRAPH) 1886 return (const char *) cpp_digraph2name (type); 1887 else if (flags & NAMED_OP) 1888 return cpp_named_operator2name (type); 1889 1890 return (const char *) token_spellings[type].name; 1891} 1892 1893/* Writes the spelling of token to FP, without any preceding space. 1894 Separated from cpp_spell_token for efficiency - to avoid stdio 1895 double-buffering. */ 1896void 1897cpp_output_token (const cpp_token *token, FILE *fp) 1898{ 1899 switch (TOKEN_SPELL (token)) 1900 { 1901 case SPELL_OPERATOR: 1902 { 1903 const unsigned char *spelling; 1904 int c; 1905 1906 if (token->flags & DIGRAPH) 1907 spelling = cpp_digraph2name (token->type); 1908 else if (token->flags & NAMED_OP) 1909 goto spell_ident; 1910 else 1911 spelling = TOKEN_NAME (token); 1912 1913 c = *spelling; 1914 do 1915 putc (c, fp); 1916 while ((c = *++spelling) != '\0'); 1917 } 1918 break; 1919 1920 spell_ident: 1921 case SPELL_IDENT: 1922 { 1923 size_t i; 1924 const unsigned char * name = NODE_NAME (token->val.node.node); 1925 1926 for (i = 0; i < NODE_LEN (token->val.node.node); i++) 1927 if (name[i] & ~0x7F) 1928 { 1929 unsigned char buffer[10]; 1930 i += utf8_to_ucn (buffer, name + i) - 1; 1931 fwrite (buffer, 1, 10, fp); 1932 } 1933 else 1934 fputc (NODE_NAME (token->val.node.node)[i], fp); 1935 } 1936 break; 1937 1938 case SPELL_LITERAL: 1939 fwrite (token->val.str.text, 1, token->val.str.len, fp); 1940 break; 1941 1942 case SPELL_NONE: 1943 /* An error, most probably. */ 1944 break; 1945 } 1946} 1947 1948/* Compare two tokens. */ 1949int 1950_cpp_equiv_tokens (const cpp_token *a, const cpp_token *b) 1951{ 1952 if (a->type == b->type && a->flags == b->flags) 1953 switch (TOKEN_SPELL (a)) 1954 { 1955 default: /* Keep compiler happy. */ 1956 case SPELL_OPERATOR: 1957 /* token_no is used to track where multiple consecutive ## 1958 tokens were originally located. */ 1959 return (a->type != CPP_PASTE || a->val.token_no == b->val.token_no); 1960 case SPELL_NONE: 1961 return (a->type != CPP_MACRO_ARG 1962 || a->val.macro_arg.arg_no == b->val.macro_arg.arg_no); 1963 case SPELL_IDENT: 1964 return a->val.node.node == b->val.node.node; 1965 case SPELL_LITERAL: 1966 return (a->val.str.len == b->val.str.len 1967 && !memcmp (a->val.str.text, b->val.str.text, 1968 a->val.str.len)); 1969 } 1970 1971 return 0; 1972} 1973 1974/* Returns nonzero if a space should be inserted to avoid an 1975 accidental token paste for output. For simplicity, it is 1976 conservative, and occasionally advises a space where one is not 1977 needed, e.g. "." and ".2". */ 1978int 1979cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1, 1980 const cpp_token *token2) 1981{ 1982 enum cpp_ttype a = token1->type, b = token2->type; 1983 cppchar_t c; 1984 1985 if (token1->flags & NAMED_OP) 1986 a = CPP_NAME; 1987 if (token2->flags & NAMED_OP) 1988 b = CPP_NAME; 1989 1990 c = EOF; 1991 if (token2->flags & DIGRAPH) 1992 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0]; 1993 else if (token_spellings[b].category == SPELL_OPERATOR) 1994 c = token_spellings[b].name[0]; 1995 1996 /* Quickly get everything that can paste with an '='. */ 1997 if ((int) a <= (int) CPP_LAST_EQ && c == '=') 1998 return 1; 1999 2000 switch (a) 2001 { 2002 case CPP_GREATER: return c == '>'; 2003 case CPP_LESS: return c == '<' || c == '%' || c == ':'; 2004 case CPP_PLUS: return c == '+'; 2005 case CPP_MINUS: return c == '-' || c == '>'; 2006 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */ 2007 case CPP_MOD: return c == ':' || c == '>'; 2008 case CPP_AND: return c == '&'; 2009 case CPP_OR: return c == '|'; 2010 case CPP_COLON: return c == ':' || c == '>'; 2011 case CPP_DEREF: return c == '*'; 2012 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER; 2013 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */ 2014 case CPP_NAME: return ((b == CPP_NUMBER 2015 && name_p (pfile, &token2->val.str)) 2016 || b == CPP_NAME 2017 || b == CPP_CHAR || b == CPP_STRING); /* L */ 2018 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME 2019 || c == '.' || c == '+' || c == '-'); 2020 /* UCNs */ 2021 case CPP_OTHER: return ((token1->val.str.text[0] == '\\' 2022 && b == CPP_NAME) 2023 || (CPP_OPTION (pfile, objc) 2024 && token1->val.str.text[0] == '@' 2025 && (b == CPP_NAME || b == CPP_STRING))); 2026 default: break; 2027 } 2028 2029 return 0; 2030} 2031 2032/* Output all the remaining tokens on the current line, and a newline 2033 character, to FP. Leading whitespace is removed. If there are 2034 macros, special token padding is not performed. */ 2035void 2036cpp_output_line (cpp_reader *pfile, FILE *fp) 2037{ 2038 const cpp_token *token; 2039 2040 token = cpp_get_token (pfile); 2041 while (token->type != CPP_EOF) 2042 { 2043 cpp_output_token (token, fp); 2044 token = cpp_get_token (pfile); 2045 if (token->flags & PREV_WHITE) 2046 putc (' ', fp); 2047 } 2048 2049 putc ('\n', fp); 2050} 2051 2052/* Return a string representation of all the remaining tokens on the 2053 current line. The result is allocated using xmalloc and must be 2054 freed by the caller. */ 2055unsigned char * 2056cpp_output_line_to_string (cpp_reader *pfile, const unsigned char *dir_name) 2057{ 2058 const cpp_token *token; 2059 unsigned int out = dir_name ? ustrlen (dir_name) : 0; 2060 unsigned int alloced = 120 + out; 2061 unsigned char *result = (unsigned char *) xmalloc (alloced); 2062 2063 /* If DIR_NAME is empty, there are no initial contents. */ 2064 if (dir_name) 2065 { 2066 sprintf ((char *) result, "#%s ", dir_name); 2067 out += 2; 2068 } 2069 2070 token = cpp_get_token (pfile); 2071 while (token->type != CPP_EOF) 2072 { 2073 unsigned char *last; 2074 /* Include room for a possible space and the terminating nul. */ 2075 unsigned int len = cpp_token_len (token) + 2; 2076 2077 if (out + len > alloced) 2078 { 2079 alloced *= 2; 2080 if (out + len > alloced) 2081 alloced = out + len; 2082 result = (unsigned char *) xrealloc (result, alloced); 2083 } 2084 2085 last = cpp_spell_token (pfile, token, &result[out], 0); 2086 out = last - result; 2087 2088 token = cpp_get_token (pfile); 2089 if (token->flags & PREV_WHITE) 2090 result[out++] = ' '; 2091 } 2092 2093 result[out] = '\0'; 2094 return result; 2095} 2096 2097/* Memory buffers. Changing these three constants can have a dramatic 2098 effect on performance. The values here are reasonable defaults, 2099 but might be tuned. If you adjust them, be sure to test across a 2100 range of uses of cpplib, including heavy nested function-like macro 2101 expansion. Also check the change in peak memory usage (NJAMD is a 2102 good tool for this). */ 2103#define MIN_BUFF_SIZE 8000 2104#define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2) 2105#define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \ 2106 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2) 2107 2108#if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0) 2109 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE! 2110#endif 2111 2112/* Create a new allocation buffer. Place the control block at the end 2113 of the buffer, so that buffer overflows will cause immediate chaos. */ 2114static _cpp_buff * 2115new_buff (size_t len) 2116{ 2117 _cpp_buff *result; 2118 unsigned char *base; 2119 2120 if (len < MIN_BUFF_SIZE) 2121 len = MIN_BUFF_SIZE; 2122 len = CPP_ALIGN (len); 2123 2124 base = XNEWVEC (unsigned char, len + sizeof (_cpp_buff)); 2125 result = (_cpp_buff *) (base + len); 2126 result->base = base; 2127 result->cur = base; 2128 result->limit = base + len; 2129 result->next = NULL; 2130 return result; 2131} 2132 2133/* Place a chain of unwanted allocation buffers on the free list. */ 2134void 2135_cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff) 2136{ 2137 _cpp_buff *end = buff; 2138 2139 while (end->next) 2140 end = end->next; 2141 end->next = pfile->free_buffs; 2142 pfile->free_buffs = buff; 2143} 2144 2145/* Return a free buffer of size at least MIN_SIZE. */ 2146_cpp_buff * 2147_cpp_get_buff (cpp_reader *pfile, size_t min_size) 2148{ 2149 _cpp_buff *result, **p; 2150 2151 for (p = &pfile->free_buffs;; p = &(*p)->next) 2152 { 2153 size_t size; 2154 2155 if (*p == NULL) 2156 return new_buff (min_size); 2157 result = *p; 2158 size = result->limit - result->base; 2159 /* Return a buffer that's big enough, but don't waste one that's 2160 way too big. */ 2161 if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size)) 2162 break; 2163 } 2164 2165 *p = result->next; 2166 result->next = NULL; 2167 result->cur = result->base; 2168 return result; 2169} 2170 2171/* Creates a new buffer with enough space to hold the uncommitted 2172 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies 2173 the excess bytes to the new buffer. Chains the new buffer after 2174 BUFF, and returns the new buffer. */ 2175_cpp_buff * 2176_cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra) 2177{ 2178 size_t size = EXTENDED_BUFF_SIZE (buff, min_extra); 2179 _cpp_buff *new_buff = _cpp_get_buff (pfile, size); 2180 2181 buff->next = new_buff; 2182 memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff)); 2183 return new_buff; 2184} 2185 2186/* Creates a new buffer with enough space to hold the uncommitted 2187 remaining bytes of the buffer pointed to by BUFF, and at least 2188 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer. 2189 Chains the new buffer before the buffer pointed to by BUFF, and 2190 updates the pointer to point to the new buffer. */ 2191void 2192_cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra) 2193{ 2194 _cpp_buff *new_buff, *old_buff = *pbuff; 2195 size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra); 2196 2197 new_buff = _cpp_get_buff (pfile, size); 2198 memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff)); 2199 new_buff->next = old_buff; 2200 *pbuff = new_buff; 2201} 2202 2203/* Free a chain of buffers starting at BUFF. */ 2204void 2205_cpp_free_buff (_cpp_buff *buff) 2206{ 2207 _cpp_buff *next; 2208 2209 for (; buff; buff = next) 2210 { 2211 next = buff->next; 2212 free (buff->base); 2213 } 2214} 2215 2216/* Allocate permanent, unaligned storage of length LEN. */ 2217unsigned char * 2218_cpp_unaligned_alloc (cpp_reader *pfile, size_t len) 2219{ 2220 _cpp_buff *buff = pfile->u_buff; 2221 unsigned char *result = buff->cur; 2222 2223 if (len > (size_t) (buff->limit - result)) 2224 { 2225 buff = _cpp_get_buff (pfile, len); 2226 buff->next = pfile->u_buff; 2227 pfile->u_buff = buff; 2228 result = buff->cur; 2229 } 2230 2231 buff->cur = result + len; 2232 return result; 2233} 2234 2235/* Allocate permanent, unaligned storage of length LEN from a_buff. 2236 That buffer is used for growing allocations when saving macro 2237 replacement lists in a #define, and when parsing an answer to an 2238 assertion in #assert, #unassert or #if (and therefore possibly 2239 whilst expanding macros). It therefore must not be used by any 2240 code that they might call: specifically the lexer and the guts of 2241 the macro expander. 2242 2243 All existing other uses clearly fit this restriction: storing 2244 registered pragmas during initialization. */ 2245unsigned char * 2246_cpp_aligned_alloc (cpp_reader *pfile, size_t len) 2247{ 2248 _cpp_buff *buff = pfile->a_buff; 2249 unsigned char *result = buff->cur; 2250 2251 if (len > (size_t) (buff->limit - result)) 2252 { 2253 buff = _cpp_get_buff (pfile, len); 2254 buff->next = pfile->a_buff; 2255 pfile->a_buff = buff; 2256 result = buff->cur; 2257 } 2258 2259 buff->cur = result + len; 2260 return result; 2261} 2262 2263/* Say which field of TOK is in use. */ 2264 2265enum cpp_token_fld_kind 2266cpp_token_val_index (cpp_token *tok) 2267{ 2268 switch (TOKEN_SPELL (tok)) 2269 { 2270 case SPELL_IDENT: 2271 return CPP_TOKEN_FLD_NODE; 2272 case SPELL_LITERAL: 2273 return CPP_TOKEN_FLD_STR; 2274 case SPELL_OPERATOR: 2275 if (tok->type == CPP_PASTE) 2276 return CPP_TOKEN_FLD_TOKEN_NO; 2277 else 2278 return CPP_TOKEN_FLD_NONE; 2279 case SPELL_NONE: 2280 if (tok->type == CPP_MACRO_ARG) 2281 return CPP_TOKEN_FLD_ARG_NO; 2282 else if (tok->type == CPP_PADDING) 2283 return CPP_TOKEN_FLD_SOURCE; 2284 else if (tok->type == CPP_PRAGMA) 2285 return CPP_TOKEN_FLD_PRAGMA; 2286 /* else fall through */ 2287 default: 2288 return CPP_TOKEN_FLD_NONE; 2289 } 2290} 2291