c-lex.c revision 117415
1/* Mainly the interface between cpplib and the C front ends. 2 Copyright (C) 1987, 1988, 1989, 1992, 1994, 1995, 1996, 1997 3 1998, 1999, 2000, 2001, 2002 Free Software Foundation, Inc. 4 5This file is part of GCC. 6 7GCC is free software; you can redistribute it and/or modify it under 8the terms of the GNU General Public License as published by the Free 9Software Foundation; either version 2, or (at your option) any later 10version. 11 12GCC is distributed in the hope that it will be useful, but WITHOUT ANY 13WARRANTY; without even the implied warranty of MERCHANTABILITY or 14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 15for more details. 16 17You should have received a copy of the GNU General Public License 18along with GCC; see the file COPYING. If not, write to the Free 19Software Foundation, 59 Temple Place - Suite 330, Boston, MA 2002111-1307, USA. */ 21 22/* $FreeBSD: head/contrib/gcc/c-lex.c 117415 2003-07-11 04:49:30Z kan $ */ 23 24#include "config.h" 25#include "system.h" 26 27#include "real.h" 28#include "rtl.h" 29#include "tree.h" 30#include "expr.h" 31#include "input.h" 32#include "output.h" 33#include "c-tree.h" 34#include "c-common.h" 35#include "flags.h" 36#include "timevar.h" 37#include "cpplib.h" 38#include "c-pragma.h" 39#include "toplev.h" 40#include "intl.h" 41#include "tm_p.h" 42#include "splay-tree.h" 43#include "debug.h" 44 45#ifdef MULTIBYTE_CHARS 46#include "mbchar.h" 47#include <locale.h> 48#endif /* MULTIBYTE_CHARS */ 49 50/* The current line map. */ 51static const struct line_map *map; 52 53/* The line used to refresh the lineno global variable after each token. */ 54static unsigned int src_lineno; 55 56/* We may keep statistics about how long which files took to compile. */ 57static int header_time, body_time; 58static splay_tree file_info_tree; 59 60/* File used for outputting assembler code. */ 61extern FILE *asm_out_file; 62 63#undef WCHAR_TYPE_SIZE 64#define WCHAR_TYPE_SIZE TYPE_PRECISION (wchar_type_node) 65 66/* Number of bytes in a wide character. */ 67#define WCHAR_BYTES (WCHAR_TYPE_SIZE / BITS_PER_UNIT) 68 69int pending_lang_change; /* If we need to switch languages - C++ only */ 70int c_header_level; /* depth in C headers - C++ only */ 71 72/* Nonzero tells yylex to ignore \ in string constants. */ 73static int ignore_escape_flag; 74 75static tree interpret_integer PARAMS ((const cpp_token *, unsigned int)); 76static tree interpret_float PARAMS ((const cpp_token *, unsigned int)); 77static enum integer_type_kind 78 narrowest_unsigned_type PARAMS ((tree, unsigned int)); 79static enum integer_type_kind 80 narrowest_signed_type PARAMS ((tree, unsigned int)); 81static tree lex_string PARAMS ((const unsigned char *, unsigned int, 82 int)); 83static tree lex_charconst PARAMS ((const cpp_token *)); 84static void update_header_times PARAMS ((const char *)); 85static int dump_one_header PARAMS ((splay_tree_node, void *)); 86static void cb_line_change PARAMS ((cpp_reader *, const cpp_token *, int)); 87static void cb_ident PARAMS ((cpp_reader *, unsigned int, 88 const cpp_string *)); 89static void cb_file_change PARAMS ((cpp_reader *, const struct line_map *)); 90static void cb_def_pragma PARAMS ((cpp_reader *, unsigned int)); 91static void cb_define PARAMS ((cpp_reader *, unsigned int, 92 cpp_hashnode *)); 93static void cb_undef PARAMS ((cpp_reader *, unsigned int, 94 cpp_hashnode *)); 95 96const char * 97init_c_lex (filename) 98 const char *filename; 99{ 100 struct cpp_callbacks *cb; 101 struct c_fileinfo *toplevel; 102 103 /* Set up filename timing. Must happen before cpp_read_main_file. */ 104 file_info_tree = splay_tree_new ((splay_tree_compare_fn)strcmp, 105 0, 106 (splay_tree_delete_value_fn)free); 107 toplevel = get_fileinfo ("<top level>"); 108 if (flag_detailed_statistics) 109 { 110 header_time = 0; 111 body_time = get_run_time (); 112 toplevel->time = body_time; 113 } 114 115#ifdef MULTIBYTE_CHARS 116 /* Change to the native locale for multibyte conversions. */ 117 setlocale (LC_CTYPE, ""); 118 GET_ENVIRONMENT (literal_codeset, "LANG"); 119#endif 120 121 cb = cpp_get_callbacks (parse_in); 122 123 cb->line_change = cb_line_change; 124 cb->ident = cb_ident; 125 cb->file_change = cb_file_change; 126 cb->def_pragma = cb_def_pragma; 127 128 /* Set the debug callbacks if we can use them. */ 129 if (debug_info_level == DINFO_LEVEL_VERBOSE 130 && (write_symbols == DWARF_DEBUG || write_symbols == DWARF2_DEBUG 131 || write_symbols == VMS_AND_DWARF2_DEBUG)) 132 { 133 cb->define = cb_define; 134 cb->undef = cb_undef; 135 } 136 137 /* Start it at 0. */ 138 lineno = 0; 139 140 return cpp_read_main_file (parse_in, filename, ident_hash); 141} 142 143/* A thin wrapper around the real parser that initializes the 144 integrated preprocessor after debug output has been initialized. 145 Also, make sure the start_source_file debug hook gets called for 146 the primary source file. */ 147 148void 149c_common_parse_file (set_yydebug) 150 int set_yydebug ATTRIBUTE_UNUSED; 151{ 152#if YYDEBUG != 0 153 yydebug = set_yydebug; 154#else 155 warning ("YYDEBUG not defined"); 156#endif 157 158 (*debug_hooks->start_source_file) (lineno, input_filename); 159 cpp_finish_options (parse_in); 160 161 yyparse (); 162 free_parser_stacks (); 163} 164 165struct c_fileinfo * 166get_fileinfo (name) 167 const char *name; 168{ 169 splay_tree_node n; 170 struct c_fileinfo *fi; 171 172 n = splay_tree_lookup (file_info_tree, (splay_tree_key) name); 173 if (n) 174 return (struct c_fileinfo *) n->value; 175 176 fi = (struct c_fileinfo *) xmalloc (sizeof (struct c_fileinfo)); 177 fi->time = 0; 178 fi->interface_only = 0; 179 fi->interface_unknown = 1; 180 splay_tree_insert (file_info_tree, (splay_tree_key) name, 181 (splay_tree_value) fi); 182 return fi; 183} 184 185static void 186update_header_times (name) 187 const char *name; 188{ 189 /* Changing files again. This means currently collected time 190 is charged against header time, and body time starts back at 0. */ 191 if (flag_detailed_statistics) 192 { 193 int this_time = get_run_time (); 194 struct c_fileinfo *file = get_fileinfo (name); 195 header_time += this_time - body_time; 196 file->time += this_time - body_time; 197 body_time = this_time; 198 } 199} 200 201static int 202dump_one_header (n, dummy) 203 splay_tree_node n; 204 void *dummy ATTRIBUTE_UNUSED; 205{ 206 print_time ((const char *) n->key, 207 ((struct c_fileinfo *) n->value)->time); 208 return 0; 209} 210 211void 212dump_time_statistics () 213{ 214 struct c_fileinfo *file = get_fileinfo (input_filename); 215 int this_time = get_run_time (); 216 file->time += this_time - body_time; 217 218 fprintf (stderr, "\n******\n"); 219 print_time ("header files (total)", header_time); 220 print_time ("main file (total)", this_time - body_time); 221 fprintf (stderr, "ratio = %g : 1\n", 222 (double)header_time / (double)(this_time - body_time)); 223 fprintf (stderr, "\n******\n"); 224 225 splay_tree_foreach (file_info_tree, dump_one_header, 0); 226} 227 228static void 229cb_ident (pfile, line, str) 230 cpp_reader *pfile ATTRIBUTE_UNUSED; 231 unsigned int line ATTRIBUTE_UNUSED; 232 const cpp_string *str ATTRIBUTE_UNUSED; 233{ 234#ifdef ASM_OUTPUT_IDENT 235 if (! flag_no_ident) 236 { 237 /* Convert escapes in the string. */ 238 tree value = lex_string (str->text, str->len, 0); 239 ASM_OUTPUT_IDENT (asm_out_file, TREE_STRING_POINTER (value)); 240 } 241#endif 242} 243 244/* Called at the start of every non-empty line. TOKEN is the first 245 lexed token on the line. Used for diagnostic line numbers. */ 246static void 247cb_line_change (pfile, token, parsing_args) 248 cpp_reader *pfile ATTRIBUTE_UNUSED; 249 const cpp_token *token; 250 int parsing_args ATTRIBUTE_UNUSED; 251{ 252 src_lineno = SOURCE_LINE (map, token->line); 253} 254 255static void 256cb_file_change (pfile, new_map) 257 cpp_reader *pfile ATTRIBUTE_UNUSED; 258 const struct line_map *new_map; 259{ 260 unsigned int to_line = SOURCE_LINE (new_map, new_map->to_line); 261 262 if (new_map->reason == LC_ENTER) 263 { 264 /* Don't stack the main buffer on the input stack; 265 we already did in compile_file. */ 266 if (map == NULL) 267 main_input_filename = new_map->to_file; 268 else 269 { 270 int included_at = SOURCE_LINE (new_map - 1, new_map->from_line - 1); 271 272 lineno = included_at; 273 push_srcloc (new_map->to_file, 1); 274 (*debug_hooks->start_source_file) (included_at, new_map->to_file); 275#ifndef NO_IMPLICIT_EXTERN_C 276 if (c_header_level) 277 ++c_header_level; 278 else if (new_map->sysp == 2) 279 { 280 c_header_level = 1; 281 ++pending_lang_change; 282 } 283#endif 284 } 285 } 286 else if (new_map->reason == LC_LEAVE) 287 { 288#ifndef NO_IMPLICIT_EXTERN_C 289 if (c_header_level && --c_header_level == 0) 290 { 291 if (new_map->sysp == 2) 292 warning ("badly nested C headers from preprocessor"); 293 --pending_lang_change; 294 } 295#endif 296 pop_srcloc (); 297 298 (*debug_hooks->end_source_file) (to_line); 299 } 300 301 update_header_times (new_map->to_file); 302#ifndef FREEBSD_NATIVE 303 in_system_header = new_map->sysp != 0; 304#else /* FREEBSD_NATIVE */ 305 in_system_header = 0; 306#endif /* FREEBSD_NATIVE */ 307 input_filename = new_map->to_file; 308 lineno = to_line; 309 map = new_map; 310 311 /* Hook for C++. */ 312 extract_interface_info (); 313} 314 315static void 316cb_def_pragma (pfile, line) 317 cpp_reader *pfile; 318 unsigned int line; 319{ 320 /* Issue a warning message if we have been asked to do so. Ignore 321 unknown pragmas in system headers unless an explicit 322 -Wunknown-pragmas has been given. */ 323 if (warn_unknown_pragmas > in_system_header) 324 { 325 const unsigned char *space, *name; 326 const cpp_token *s; 327 328 space = name = (const unsigned char *) ""; 329 s = cpp_get_token (pfile); 330 if (s->type != CPP_EOF) 331 { 332 space = cpp_token_as_text (pfile, s); 333 s = cpp_get_token (pfile); 334 if (s->type == CPP_NAME) 335 name = cpp_token_as_text (pfile, s); 336 } 337 338 lineno = SOURCE_LINE (map, line); 339 warning ("ignoring #pragma %s %s", space, name); 340 } 341} 342 343/* #define callback for DWARF and DWARF2 debug info. */ 344static void 345cb_define (pfile, line, node) 346 cpp_reader *pfile; 347 unsigned int line; 348 cpp_hashnode *node; 349{ 350 (*debug_hooks->define) (SOURCE_LINE (map, line), 351 (const char *) cpp_macro_definition (pfile, node)); 352} 353 354/* #undef callback for DWARF and DWARF2 debug info. */ 355static void 356cb_undef (pfile, line, node) 357 cpp_reader *pfile ATTRIBUTE_UNUSED; 358 unsigned int line; 359 cpp_hashnode *node; 360{ 361 (*debug_hooks->undef) (SOURCE_LINE (map, line), 362 (const char *) NODE_NAME (node)); 363} 364 365#if 0 /* not yet */ 366/* Returns nonzero if C is a universal-character-name. Give an error if it 367 is not one which may appear in an identifier, as per [extendid]. 368 369 Note that extended character support in identifiers has not yet been 370 implemented. It is my personal opinion that this is not a desirable 371 feature. Portable code cannot count on support for more than the basic 372 identifier character set. */ 373 374static inline int 375is_extended_char (c) 376 int c; 377{ 378#ifdef TARGET_EBCDIC 379 return 0; 380#else 381 /* ASCII. */ 382 if (c < 0x7f) 383 return 0; 384 385 /* None of the valid chars are outside the Basic Multilingual Plane (the 386 low 16 bits). */ 387 if (c > 0xffff) 388 { 389 error ("universal-character-name '\\U%08x' not valid in identifier", c); 390 return 1; 391 } 392 393 /* Latin */ 394 if ((c >= 0x00c0 && c <= 0x00d6) 395 || (c >= 0x00d8 && c <= 0x00f6) 396 || (c >= 0x00f8 && c <= 0x01f5) 397 || (c >= 0x01fa && c <= 0x0217) 398 || (c >= 0x0250 && c <= 0x02a8) 399 || (c >= 0x1e00 && c <= 0x1e9a) 400 || (c >= 0x1ea0 && c <= 0x1ef9)) 401 return 1; 402 403 /* Greek */ 404 if ((c == 0x0384) 405 || (c >= 0x0388 && c <= 0x038a) 406 || (c == 0x038c) 407 || (c >= 0x038e && c <= 0x03a1) 408 || (c >= 0x03a3 && c <= 0x03ce) 409 || (c >= 0x03d0 && c <= 0x03d6) 410 || (c == 0x03da) 411 || (c == 0x03dc) 412 || (c == 0x03de) 413 || (c == 0x03e0) 414 || (c >= 0x03e2 && c <= 0x03f3) 415 || (c >= 0x1f00 && c <= 0x1f15) 416 || (c >= 0x1f18 && c <= 0x1f1d) 417 || (c >= 0x1f20 && c <= 0x1f45) 418 || (c >= 0x1f48 && c <= 0x1f4d) 419 || (c >= 0x1f50 && c <= 0x1f57) 420 || (c == 0x1f59) 421 || (c == 0x1f5b) 422 || (c == 0x1f5d) 423 || (c >= 0x1f5f && c <= 0x1f7d) 424 || (c >= 0x1f80 && c <= 0x1fb4) 425 || (c >= 0x1fb6 && c <= 0x1fbc) 426 || (c >= 0x1fc2 && c <= 0x1fc4) 427 || (c >= 0x1fc6 && c <= 0x1fcc) 428 || (c >= 0x1fd0 && c <= 0x1fd3) 429 || (c >= 0x1fd6 && c <= 0x1fdb) 430 || (c >= 0x1fe0 && c <= 0x1fec) 431 || (c >= 0x1ff2 && c <= 0x1ff4) 432 || (c >= 0x1ff6 && c <= 0x1ffc)) 433 return 1; 434 435 /* Cyrillic */ 436 if ((c >= 0x0401 && c <= 0x040d) 437 || (c >= 0x040f && c <= 0x044f) 438 || (c >= 0x0451 && c <= 0x045c) 439 || (c >= 0x045e && c <= 0x0481) 440 || (c >= 0x0490 && c <= 0x04c4) 441 || (c >= 0x04c7 && c <= 0x04c8) 442 || (c >= 0x04cb && c <= 0x04cc) 443 || (c >= 0x04d0 && c <= 0x04eb) 444 || (c >= 0x04ee && c <= 0x04f5) 445 || (c >= 0x04f8 && c <= 0x04f9)) 446 return 1; 447 448 /* Armenian */ 449 if ((c >= 0x0531 && c <= 0x0556) 450 || (c >= 0x0561 && c <= 0x0587)) 451 return 1; 452 453 /* Hebrew */ 454 if ((c >= 0x05d0 && c <= 0x05ea) 455 || (c >= 0x05f0 && c <= 0x05f4)) 456 return 1; 457 458 /* Arabic */ 459 if ((c >= 0x0621 && c <= 0x063a) 460 || (c >= 0x0640 && c <= 0x0652) 461 || (c >= 0x0670 && c <= 0x06b7) 462 || (c >= 0x06ba && c <= 0x06be) 463 || (c >= 0x06c0 && c <= 0x06ce) 464 || (c >= 0x06e5 && c <= 0x06e7)) 465 return 1; 466 467 /* Devanagari */ 468 if ((c >= 0x0905 && c <= 0x0939) 469 || (c >= 0x0958 && c <= 0x0962)) 470 return 1; 471 472 /* Bengali */ 473 if ((c >= 0x0985 && c <= 0x098c) 474 || (c >= 0x098f && c <= 0x0990) 475 || (c >= 0x0993 && c <= 0x09a8) 476 || (c >= 0x09aa && c <= 0x09b0) 477 || (c == 0x09b2) 478 || (c >= 0x09b6 && c <= 0x09b9) 479 || (c >= 0x09dc && c <= 0x09dd) 480 || (c >= 0x09df && c <= 0x09e1) 481 || (c >= 0x09f0 && c <= 0x09f1)) 482 return 1; 483 484 /* Gurmukhi */ 485 if ((c >= 0x0a05 && c <= 0x0a0a) 486 || (c >= 0x0a0f && c <= 0x0a10) 487 || (c >= 0x0a13 && c <= 0x0a28) 488 || (c >= 0x0a2a && c <= 0x0a30) 489 || (c >= 0x0a32 && c <= 0x0a33) 490 || (c >= 0x0a35 && c <= 0x0a36) 491 || (c >= 0x0a38 && c <= 0x0a39) 492 || (c >= 0x0a59 && c <= 0x0a5c) 493 || (c == 0x0a5e)) 494 return 1; 495 496 /* Gujarati */ 497 if ((c >= 0x0a85 && c <= 0x0a8b) 498 || (c == 0x0a8d) 499 || (c >= 0x0a8f && c <= 0x0a91) 500 || (c >= 0x0a93 && c <= 0x0aa8) 501 || (c >= 0x0aaa && c <= 0x0ab0) 502 || (c >= 0x0ab2 && c <= 0x0ab3) 503 || (c >= 0x0ab5 && c <= 0x0ab9) 504 || (c == 0x0ae0)) 505 return 1; 506 507 /* Oriya */ 508 if ((c >= 0x0b05 && c <= 0x0b0c) 509 || (c >= 0x0b0f && c <= 0x0b10) 510 || (c >= 0x0b13 && c <= 0x0b28) 511 || (c >= 0x0b2a && c <= 0x0b30) 512 || (c >= 0x0b32 && c <= 0x0b33) 513 || (c >= 0x0b36 && c <= 0x0b39) 514 || (c >= 0x0b5c && c <= 0x0b5d) 515 || (c >= 0x0b5f && c <= 0x0b61)) 516 return 1; 517 518 /* Tamil */ 519 if ((c >= 0x0b85 && c <= 0x0b8a) 520 || (c >= 0x0b8e && c <= 0x0b90) 521 || (c >= 0x0b92 && c <= 0x0b95) 522 || (c >= 0x0b99 && c <= 0x0b9a) 523 || (c == 0x0b9c) 524 || (c >= 0x0b9e && c <= 0x0b9f) 525 || (c >= 0x0ba3 && c <= 0x0ba4) 526 || (c >= 0x0ba8 && c <= 0x0baa) 527 || (c >= 0x0bae && c <= 0x0bb5) 528 || (c >= 0x0bb7 && c <= 0x0bb9)) 529 return 1; 530 531 /* Telugu */ 532 if ((c >= 0x0c05 && c <= 0x0c0c) 533 || (c >= 0x0c0e && c <= 0x0c10) 534 || (c >= 0x0c12 && c <= 0x0c28) 535 || (c >= 0x0c2a && c <= 0x0c33) 536 || (c >= 0x0c35 && c <= 0x0c39) 537 || (c >= 0x0c60 && c <= 0x0c61)) 538 return 1; 539 540 /* Kannada */ 541 if ((c >= 0x0c85 && c <= 0x0c8c) 542 || (c >= 0x0c8e && c <= 0x0c90) 543 || (c >= 0x0c92 && c <= 0x0ca8) 544 || (c >= 0x0caa && c <= 0x0cb3) 545 || (c >= 0x0cb5 && c <= 0x0cb9) 546 || (c >= 0x0ce0 && c <= 0x0ce1)) 547 return 1; 548 549 /* Malayalam */ 550 if ((c >= 0x0d05 && c <= 0x0d0c) 551 || (c >= 0x0d0e && c <= 0x0d10) 552 || (c >= 0x0d12 && c <= 0x0d28) 553 || (c >= 0x0d2a && c <= 0x0d39) 554 || (c >= 0x0d60 && c <= 0x0d61)) 555 return 1; 556 557 /* Thai */ 558 if ((c >= 0x0e01 && c <= 0x0e30) 559 || (c >= 0x0e32 && c <= 0x0e33) 560 || (c >= 0x0e40 && c <= 0x0e46) 561 || (c >= 0x0e4f && c <= 0x0e5b)) 562 return 1; 563 564 /* Lao */ 565 if ((c >= 0x0e81 && c <= 0x0e82) 566 || (c == 0x0e84) 567 || (c == 0x0e87) 568 || (c == 0x0e88) 569 || (c == 0x0e8a) 570 || (c == 0x0e0d) 571 || (c >= 0x0e94 && c <= 0x0e97) 572 || (c >= 0x0e99 && c <= 0x0e9f) 573 || (c >= 0x0ea1 && c <= 0x0ea3) 574 || (c == 0x0ea5) 575 || (c == 0x0ea7) 576 || (c == 0x0eaa) 577 || (c == 0x0eab) 578 || (c >= 0x0ead && c <= 0x0eb0) 579 || (c == 0x0eb2) 580 || (c == 0x0eb3) 581 || (c == 0x0ebd) 582 || (c >= 0x0ec0 && c <= 0x0ec4) 583 || (c == 0x0ec6)) 584 return 1; 585 586 /* Georgian */ 587 if ((c >= 0x10a0 && c <= 0x10c5) 588 || (c >= 0x10d0 && c <= 0x10f6)) 589 return 1; 590 591 /* Hiragana */ 592 if ((c >= 0x3041 && c <= 0x3094) 593 || (c >= 0x309b && c <= 0x309e)) 594 return 1; 595 596 /* Katakana */ 597 if ((c >= 0x30a1 && c <= 0x30fe)) 598 return 1; 599 600 /* Bopmofo */ 601 if ((c >= 0x3105 && c <= 0x312c)) 602 return 1; 603 604 /* Hangul */ 605 if ((c >= 0x1100 && c <= 0x1159) 606 || (c >= 0x1161 && c <= 0x11a2) 607 || (c >= 0x11a8 && c <= 0x11f9)) 608 return 1; 609 610 /* CJK Unified Ideographs */ 611 if ((c >= 0xf900 && c <= 0xfa2d) 612 || (c >= 0xfb1f && c <= 0xfb36) 613 || (c >= 0xfb38 && c <= 0xfb3c) 614 || (c == 0xfb3e) 615 || (c >= 0xfb40 && c <= 0xfb41) 616 || (c >= 0xfb42 && c <= 0xfb44) 617 || (c >= 0xfb46 && c <= 0xfbb1) 618 || (c >= 0xfbd3 && c <= 0xfd3f) 619 || (c >= 0xfd50 && c <= 0xfd8f) 620 || (c >= 0xfd92 && c <= 0xfdc7) 621 || (c >= 0xfdf0 && c <= 0xfdfb) 622 || (c >= 0xfe70 && c <= 0xfe72) 623 || (c == 0xfe74) 624 || (c >= 0xfe76 && c <= 0xfefc) 625 || (c >= 0xff21 && c <= 0xff3a) 626 || (c >= 0xff41 && c <= 0xff5a) 627 || (c >= 0xff66 && c <= 0xffbe) 628 || (c >= 0xffc2 && c <= 0xffc7) 629 || (c >= 0xffca && c <= 0xffcf) 630 || (c >= 0xffd2 && c <= 0xffd7) 631 || (c >= 0xffda && c <= 0xffdc) 632 || (c >= 0x4e00 && c <= 0x9fa5)) 633 return 1; 634 635 error ("universal-character-name '\\u%04x' not valid in identifier", c); 636 return 1; 637#endif 638} 639 640/* Add the UTF-8 representation of C to the token_buffer. */ 641 642static void 643utf8_extend_token (c) 644 int c; 645{ 646 int shift, mask; 647 648 if (c <= 0x0000007f) 649 { 650 extend_token (c); 651 return; 652 } 653 else if (c <= 0x000007ff) 654 shift = 6, mask = 0xc0; 655 else if (c <= 0x0000ffff) 656 shift = 12, mask = 0xe0; 657 else if (c <= 0x001fffff) 658 shift = 18, mask = 0xf0; 659 else if (c <= 0x03ffffff) 660 shift = 24, mask = 0xf8; 661 else 662 shift = 30, mask = 0xfc; 663 664 extend_token (mask | (c >> shift)); 665 do 666 { 667 shift -= 6; 668 extend_token ((unsigned char) (0x80 | (c >> shift))); 669 } 670 while (shift); 671} 672#endif 673 674int 675c_lex (value) 676 tree *value; 677{ 678 const cpp_token *tok; 679 680 retry: 681 timevar_push (TV_CPP); 682 do 683 tok = cpp_get_token (parse_in); 684 while (tok->type == CPP_PADDING); 685 timevar_pop (TV_CPP); 686 687 /* The C++ front end does horrible things with the current line 688 number. To ensure an accurate line number, we must reset it 689 every time we return a token. */ 690 lineno = src_lineno; 691 692 *value = NULL_TREE; 693 switch (tok->type) 694 { 695 /* Issue this error here, where we can get at tok->val.c. */ 696 case CPP_OTHER: 697 if (ISGRAPH (tok->val.c)) 698 error ("stray '%c' in program", tok->val.c); 699 else 700 error ("stray '\\%o' in program", tok->val.c); 701 goto retry; 702 703 case CPP_NAME: 704 *value = HT_IDENT_TO_GCC_IDENT (HT_NODE (tok->val.node)); 705 break; 706 707 case CPP_NUMBER: 708 { 709 unsigned int flags = cpp_classify_number (parse_in, tok); 710 711 switch (flags & CPP_N_CATEGORY) 712 { 713 case CPP_N_INVALID: 714 /* cpplib has issued an error. */ 715 *value = error_mark_node; 716 break; 717 718 case CPP_N_INTEGER: 719 *value = interpret_integer (tok, flags); 720 break; 721 722 case CPP_N_FLOATING: 723 *value = interpret_float (tok, flags); 724 break; 725 726 default: 727 abort (); 728 } 729 } 730 break; 731 732 case CPP_CHAR: 733 case CPP_WCHAR: 734 *value = lex_charconst (tok); 735 break; 736 737 case CPP_STRING: 738 case CPP_WSTRING: 739 *value = lex_string (tok->val.str.text, tok->val.str.len, 740 tok->type == CPP_WSTRING); 741 break; 742 743 /* These tokens should not be visible outside cpplib. */ 744 case CPP_HEADER_NAME: 745 case CPP_COMMENT: 746 case CPP_MACRO_ARG: 747 abort (); 748 749 default: break; 750 } 751 752 return tok->type; 753} 754 755/* Returns the narrowest C-visible unsigned type, starting with the 756 minimum specified by FLAGS, that can fit VALUE, or itk_none if 757 there isn't one. */ 758static enum integer_type_kind 759narrowest_unsigned_type (value, flags) 760 tree value; 761 unsigned int flags; 762{ 763 enum integer_type_kind itk; 764 765 if ((flags & CPP_N_WIDTH) == CPP_N_SMALL) 766 itk = itk_unsigned_int; 767 else if ((flags & CPP_N_WIDTH) == CPP_N_MEDIUM) 768 itk = itk_unsigned_long; 769 else 770 itk = itk_unsigned_long_long; 771 772 /* int_fits_type_p must think the type of its first argument is 773 wider than its second argument, or it won't do the proper check. */ 774 TREE_TYPE (value) = widest_unsigned_literal_type_node; 775 776 for (; itk < itk_none; itk += 2 /* skip unsigned types */) 777 if (int_fits_type_p (value, integer_types[itk])) 778 return itk; 779 780 return itk_none; 781} 782 783/* Ditto, but narrowest signed type. */ 784static enum integer_type_kind 785narrowest_signed_type (value, flags) 786 tree value; 787 unsigned int flags; 788{ 789 enum integer_type_kind itk; 790 791 if ((flags & CPP_N_WIDTH) == CPP_N_SMALL) 792 itk = itk_int; 793 else if ((flags & CPP_N_WIDTH) == CPP_N_MEDIUM) 794 itk = itk_long; 795 else 796 itk = itk_long_long; 797 798 /* int_fits_type_p must think the type of its first argument is 799 wider than its second argument, or it won't do the proper check. */ 800 TREE_TYPE (value) = widest_unsigned_literal_type_node; 801 802 for (; itk < itk_none; itk += 2 /* skip signed types */) 803 if (int_fits_type_p (value, integer_types[itk])) 804 return itk; 805 806 return itk_none; 807} 808 809/* Interpret TOKEN, an integer with FLAGS as classified by cpplib. */ 810static tree 811interpret_integer (token, flags) 812 const cpp_token *token; 813 unsigned int flags; 814{ 815 tree value, type; 816 enum integer_type_kind itk; 817 cpp_num integer; 818 cpp_options *options = cpp_get_options (parse_in); 819 820 integer = cpp_interpret_integer (parse_in, token, flags); 821 integer = cpp_num_sign_extend (integer, options->precision); 822 value = build_int_2_wide (integer.low, integer.high); 823 824 /* The type of a constant with a U suffix is straightforward. */ 825 if (flags & CPP_N_UNSIGNED) 826 itk = narrowest_unsigned_type (value, flags); 827 else 828 { 829 /* The type of a potentially-signed integer constant varies 830 depending on the base it's in, the standard in use, and the 831 length suffixes. */ 832 enum integer_type_kind itk_u = narrowest_unsigned_type (value, flags); 833 enum integer_type_kind itk_s = narrowest_signed_type (value, flags); 834 835 /* In both C89 and C99, octal and hex constants may be signed or 836 unsigned, whichever fits tighter. We do not warn about this 837 choice differing from the traditional choice, as the constant 838 is probably a bit pattern and either way will work. */ 839 if ((flags & CPP_N_RADIX) != CPP_N_DECIMAL) 840 itk = MIN (itk_u, itk_s); 841 else 842 { 843 /* In C99, decimal constants are always signed. 844 In C89, decimal constants that don't fit in long have 845 undefined behavior; we try to make them unsigned long. 846 In GCC's extended C89, that last is true of decimal 847 constants that don't fit in long long, too. */ 848 849 itk = itk_s; 850 if (itk_s > itk_u && itk_s > itk_long) 851 { 852 if (!flag_isoc99) 853 { 854 if (itk_u < itk_unsigned_long) 855 itk_u = itk_unsigned_long; 856 itk = itk_u; 857 warning ("this decimal constant is unsigned only in ISO C90"); 858 } 859 else if (warn_traditional) 860 warning ("this decimal constant would be unsigned in ISO C90"); 861 } 862 } 863 } 864 865 if (itk == itk_none) 866 /* cpplib has already issued a warning for overflow. */ 867 type = ((flags & CPP_N_UNSIGNED) 868 ? widest_unsigned_literal_type_node 869 : widest_integer_literal_type_node); 870 else 871 type = integer_types[itk]; 872 873 if (itk > itk_unsigned_long 874 && (flags & CPP_N_WIDTH) != CPP_N_LARGE 875 && ! in_system_header && ! flag_isoc99) 876 pedwarn ("integer constant is too large for \"%s\" type", 877 (flags & CPP_N_UNSIGNED) ? "unsigned long" : "long"); 878 879 TREE_TYPE (value) = type; 880 881 /* Convert imaginary to a complex type. */ 882 if (flags & CPP_N_IMAGINARY) 883 value = build_complex (NULL_TREE, convert (type, integer_zero_node), value); 884 885 return value; 886} 887 888/* Interpret TOKEN, a floating point number with FLAGS as classified 889 by cpplib. */ 890static tree 891interpret_float (token, flags) 892 const cpp_token *token; 893 unsigned int flags; 894{ 895 tree type; 896 tree value; 897 REAL_VALUE_TYPE real; 898 char *copy; 899 size_t copylen; 900 const char *typename; 901 902 /* FIXME: make %T work in error/warning, then we don't need typename. */ 903 if ((flags & CPP_N_WIDTH) == CPP_N_LARGE) 904 { 905 type = long_double_type_node; 906 typename = "long double"; 907 } 908 else if ((flags & CPP_N_WIDTH) == CPP_N_SMALL 909 || flag_single_precision_constant) 910 { 911 type = float_type_node; 912 typename = "float"; 913 } 914 else 915 { 916 type = double_type_node; 917 typename = "double"; 918 } 919 920 /* Copy the constant to a nul-terminated buffer. If the constant 921 has any suffixes, cut them off; REAL_VALUE_ATOF/ REAL_VALUE_HTOF 922 can't handle them. */ 923 copylen = token->val.str.len; 924 if ((flags & CPP_N_WIDTH) != CPP_N_MEDIUM) 925 /* Must be an F or L suffix. */ 926 copylen--; 927 if (flags & CPP_N_IMAGINARY) 928 /* I or J suffix. */ 929 copylen--; 930 931 copy = alloca (copylen + 1); 932 memcpy (copy, token->val.str.text, copylen); 933 copy[copylen] = '\0'; 934 935 real_from_string (&real, copy); 936 real_convert (&real, TYPE_MODE (type), &real); 937 938 /* A diagnostic is required for "soft" overflow by some ISO C 939 testsuites. This is not pedwarn, because some people don't want 940 an error for this. 941 ??? That's a dubious reason... is this a mandatory diagnostic or 942 isn't it? -- zw, 2001-08-21. */ 943 if (REAL_VALUE_ISINF (real) && pedantic) 944 warning ("floating constant exceeds range of \"%s\"", typename); 945 946 /* Create a node with determined type and value. */ 947 value = build_real (type, real); 948 if (flags & CPP_N_IMAGINARY) 949 value = build_complex (NULL_TREE, convert (type, integer_zero_node), value); 950 951 return value; 952} 953 954static tree 955lex_string (str, len, wide) 956 const unsigned char *str; 957 unsigned int len; 958 int wide; 959{ 960 tree value; 961 char *buf = alloca ((len + 1) * (wide ? WCHAR_BYTES : 1)); 962 char *q = buf; 963 const unsigned char *p = str, *limit = str + len; 964 cppchar_t c; 965 966#ifdef MULTIBYTE_CHARS 967 /* Reset multibyte conversion state. */ 968 (void) local_mbtowc (NULL, NULL, 0); 969#endif 970 971 while (p < limit) 972 { 973#ifdef MULTIBYTE_CHARS 974 wchar_t wc; 975 int char_len; 976 977 char_len = local_mbtowc (&wc, (const char *) p, limit - p); 978 if (char_len == -1) 979 { 980 warning ("ignoring invalid multibyte character"); 981 char_len = 1; 982 c = *p++; 983 } 984 else 985 { 986 p += char_len; 987 c = wc; 988 } 989#else 990 c = *p++; 991#endif 992 993 if (c == '\\' && !ignore_escape_flag) 994 c = cpp_parse_escape (parse_in, &p, limit, wide); 995 996 /* Add this single character into the buffer either as a wchar_t, 997 a multibyte sequence, or as a single byte. */ 998 if (wide) 999 { 1000 unsigned charwidth = TYPE_PRECISION (char_type_node); 1001 unsigned bytemask = (1 << charwidth) - 1; 1002 int byte; 1003 1004 for (byte = 0; byte < WCHAR_BYTES; ++byte) 1005 { 1006 int n; 1007 if (byte >= (int) sizeof (c)) 1008 n = 0; 1009 else 1010 n = (c >> (byte * charwidth)) & bytemask; 1011 if (BYTES_BIG_ENDIAN) 1012 q[WCHAR_BYTES - byte - 1] = n; 1013 else 1014 q[byte] = n; 1015 } 1016 q += WCHAR_BYTES; 1017 } 1018#ifdef MULTIBYTE_CHARS 1019 else if (char_len > 1) 1020 { 1021 /* We're dealing with a multibyte character. */ 1022 for ( ; char_len >0; --char_len) 1023 { 1024 *q++ = *(p - char_len); 1025 } 1026 } 1027#endif 1028 else 1029 { 1030 *q++ = c; 1031 } 1032 } 1033 1034 /* Terminate the string value, either with a single byte zero 1035 or with a wide zero. */ 1036 1037 if (wide) 1038 { 1039 memset (q, 0, WCHAR_BYTES); 1040 q += WCHAR_BYTES; 1041 } 1042 else 1043 { 1044 *q++ = '\0'; 1045 } 1046 1047 value = build_string (q - buf, buf); 1048 1049 if (wide) 1050 TREE_TYPE (value) = wchar_array_type_node; 1051 else 1052 TREE_TYPE (value) = char_array_type_node; 1053 return value; 1054} 1055 1056/* Converts a (possibly wide) character constant token into a tree. */ 1057static tree 1058lex_charconst (token) 1059 const cpp_token *token; 1060{ 1061 cppchar_t result; 1062 tree type, value; 1063 unsigned int chars_seen; 1064 int unsignedp; 1065 1066 result = cpp_interpret_charconst (parse_in, token, 1067 &chars_seen, &unsignedp); 1068 1069 /* Cast to cppchar_signed_t to get correct sign-extension of RESULT 1070 before possibly widening to HOST_WIDE_INT for build_int_2. */ 1071 if (unsignedp || (cppchar_signed_t) result >= 0) 1072 value = build_int_2 (result, 0); 1073 else 1074 value = build_int_2 ((cppchar_signed_t) result, -1); 1075 1076 if (token->type == CPP_WCHAR) 1077 type = wchar_type_node; 1078 /* In C, a character constant has type 'int'. 1079 In C++ 'char', but multi-char charconsts have type 'int'. */ 1080 else if ((c_language == clk_c) || chars_seen > 1) 1081 type = integer_type_node; 1082 else 1083 type = char_type_node; 1084 1085 TREE_TYPE (value) = type; 1086 return value; 1087} 1088