c-lex.c revision 117415
1/* Mainly the interface between cpplib and the C front ends.
2   Copyright (C) 1987, 1988, 1989, 1992, 1994, 1995, 1996, 1997
3   1998, 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
9Software Foundation; either version 2, or (at your option) any later
10version.
11
12GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING.  If not, write to the Free
19Software Foundation, 59 Temple Place - Suite 330, Boston, MA
2002111-1307, USA.  */
21
22/* $FreeBSD: head/contrib/gcc/c-lex.c 117415 2003-07-11 04:49:30Z kan $ */
23
24#include "config.h"
25#include "system.h"
26
27#include "real.h"
28#include "rtl.h"
29#include "tree.h"
30#include "expr.h"
31#include "input.h"
32#include "output.h"
33#include "c-tree.h"
34#include "c-common.h"
35#include "flags.h"
36#include "timevar.h"
37#include "cpplib.h"
38#include "c-pragma.h"
39#include "toplev.h"
40#include "intl.h"
41#include "tm_p.h"
42#include "splay-tree.h"
43#include "debug.h"
44
45#ifdef MULTIBYTE_CHARS
46#include "mbchar.h"
47#include <locale.h>
48#endif /* MULTIBYTE_CHARS */
49
50/* The current line map.  */
51static const struct line_map *map;
52
53/* The line used to refresh the lineno global variable after each token.  */
54static unsigned int src_lineno;
55
56/* We may keep statistics about how long which files took to compile.  */
57static int header_time, body_time;
58static splay_tree file_info_tree;
59
60/* File used for outputting assembler code.  */
61extern FILE *asm_out_file;
62
63#undef WCHAR_TYPE_SIZE
64#define WCHAR_TYPE_SIZE TYPE_PRECISION (wchar_type_node)
65
66/* Number of bytes in a wide character.  */
67#define WCHAR_BYTES (WCHAR_TYPE_SIZE / BITS_PER_UNIT)
68
69int pending_lang_change; /* If we need to switch languages - C++ only */
70int c_header_level;	 /* depth in C headers - C++ only */
71
72/* Nonzero tells yylex to ignore \ in string constants.  */
73static int ignore_escape_flag;
74
75static tree interpret_integer	PARAMS ((const cpp_token *, unsigned int));
76static tree interpret_float	PARAMS ((const cpp_token *, unsigned int));
77static enum integer_type_kind
78  narrowest_unsigned_type	PARAMS ((tree, unsigned int));
79static enum integer_type_kind
80  narrowest_signed_type		PARAMS ((tree, unsigned int));
81static tree lex_string		PARAMS ((const unsigned char *, unsigned int,
82					 int));
83static tree lex_charconst	PARAMS ((const cpp_token *));
84static void update_header_times	PARAMS ((const char *));
85static int dump_one_header	PARAMS ((splay_tree_node, void *));
86static void cb_line_change     PARAMS ((cpp_reader *, const cpp_token *, int));
87static void cb_ident		PARAMS ((cpp_reader *, unsigned int,
88					 const cpp_string *));
89static void cb_file_change    PARAMS ((cpp_reader *, const struct line_map *));
90static void cb_def_pragma	PARAMS ((cpp_reader *, unsigned int));
91static void cb_define		PARAMS ((cpp_reader *, unsigned int,
92					 cpp_hashnode *));
93static void cb_undef		PARAMS ((cpp_reader *, unsigned int,
94					 cpp_hashnode *));
95
96const char *
97init_c_lex (filename)
98     const char *filename;
99{
100  struct cpp_callbacks *cb;
101  struct c_fileinfo *toplevel;
102
103  /* Set up filename timing.  Must happen before cpp_read_main_file.  */
104  file_info_tree = splay_tree_new ((splay_tree_compare_fn)strcmp,
105				   0,
106				   (splay_tree_delete_value_fn)free);
107  toplevel = get_fileinfo ("<top level>");
108  if (flag_detailed_statistics)
109    {
110      header_time = 0;
111      body_time = get_run_time ();
112      toplevel->time = body_time;
113    }
114
115#ifdef MULTIBYTE_CHARS
116  /* Change to the native locale for multibyte conversions.  */
117  setlocale (LC_CTYPE, "");
118  GET_ENVIRONMENT (literal_codeset, "LANG");
119#endif
120
121  cb = cpp_get_callbacks (parse_in);
122
123  cb->line_change = cb_line_change;
124  cb->ident = cb_ident;
125  cb->file_change = cb_file_change;
126  cb->def_pragma = cb_def_pragma;
127
128  /* Set the debug callbacks if we can use them.  */
129  if (debug_info_level == DINFO_LEVEL_VERBOSE
130      && (write_symbols == DWARF_DEBUG || write_symbols == DWARF2_DEBUG
131          || write_symbols == VMS_AND_DWARF2_DEBUG))
132    {
133      cb->define = cb_define;
134      cb->undef = cb_undef;
135    }
136
137  /* Start it at 0.  */
138  lineno = 0;
139
140  return cpp_read_main_file (parse_in, filename, ident_hash);
141}
142
143/* A thin wrapper around the real parser that initializes the
144   integrated preprocessor after debug output has been initialized.
145   Also, make sure the start_source_file debug hook gets called for
146   the primary source file.  */
147
148void
149c_common_parse_file (set_yydebug)
150     int set_yydebug ATTRIBUTE_UNUSED;
151{
152#if YYDEBUG != 0
153  yydebug = set_yydebug;
154#else
155  warning ("YYDEBUG not defined");
156#endif
157
158  (*debug_hooks->start_source_file) (lineno, input_filename);
159  cpp_finish_options (parse_in);
160
161  yyparse ();
162  free_parser_stacks ();
163}
164
165struct c_fileinfo *
166get_fileinfo (name)
167     const char *name;
168{
169  splay_tree_node n;
170  struct c_fileinfo *fi;
171
172  n = splay_tree_lookup (file_info_tree, (splay_tree_key) name);
173  if (n)
174    return (struct c_fileinfo *) n->value;
175
176  fi = (struct c_fileinfo *) xmalloc (sizeof (struct c_fileinfo));
177  fi->time = 0;
178  fi->interface_only = 0;
179  fi->interface_unknown = 1;
180  splay_tree_insert (file_info_tree, (splay_tree_key) name,
181		     (splay_tree_value) fi);
182  return fi;
183}
184
185static void
186update_header_times (name)
187     const char *name;
188{
189  /* Changing files again.  This means currently collected time
190     is charged against header time, and body time starts back at 0.  */
191  if (flag_detailed_statistics)
192    {
193      int this_time = get_run_time ();
194      struct c_fileinfo *file = get_fileinfo (name);
195      header_time += this_time - body_time;
196      file->time += this_time - body_time;
197      body_time = this_time;
198    }
199}
200
201static int
202dump_one_header (n, dummy)
203     splay_tree_node n;
204     void *dummy ATTRIBUTE_UNUSED;
205{
206  print_time ((const char *) n->key,
207	      ((struct c_fileinfo *) n->value)->time);
208  return 0;
209}
210
211void
212dump_time_statistics ()
213{
214  struct c_fileinfo *file = get_fileinfo (input_filename);
215  int this_time = get_run_time ();
216  file->time += this_time - body_time;
217
218  fprintf (stderr, "\n******\n");
219  print_time ("header files (total)", header_time);
220  print_time ("main file (total)", this_time - body_time);
221  fprintf (stderr, "ratio = %g : 1\n",
222	   (double)header_time / (double)(this_time - body_time));
223  fprintf (stderr, "\n******\n");
224
225  splay_tree_foreach (file_info_tree, dump_one_header, 0);
226}
227
228static void
229cb_ident (pfile, line, str)
230     cpp_reader *pfile ATTRIBUTE_UNUSED;
231     unsigned int line ATTRIBUTE_UNUSED;
232     const cpp_string *str ATTRIBUTE_UNUSED;
233{
234#ifdef ASM_OUTPUT_IDENT
235  if (! flag_no_ident)
236    {
237      /* Convert escapes in the string.  */
238      tree value = lex_string (str->text, str->len, 0);
239      ASM_OUTPUT_IDENT (asm_out_file, TREE_STRING_POINTER (value));
240    }
241#endif
242}
243
244/* Called at the start of every non-empty line.  TOKEN is the first
245   lexed token on the line.  Used for diagnostic line numbers.  */
246static void
247cb_line_change (pfile, token, parsing_args)
248     cpp_reader *pfile ATTRIBUTE_UNUSED;
249     const cpp_token *token;
250     int parsing_args ATTRIBUTE_UNUSED;
251{
252  src_lineno = SOURCE_LINE (map, token->line);
253}
254
255static void
256cb_file_change (pfile, new_map)
257     cpp_reader *pfile ATTRIBUTE_UNUSED;
258     const struct line_map *new_map;
259{
260  unsigned int to_line = SOURCE_LINE (new_map, new_map->to_line);
261
262  if (new_map->reason == LC_ENTER)
263    {
264      /* Don't stack the main buffer on the input stack;
265	 we already did in compile_file.  */
266      if (map == NULL)
267	main_input_filename = new_map->to_file;
268      else
269	{
270          int included_at = SOURCE_LINE (new_map - 1, new_map->from_line - 1);
271
272	  lineno = included_at;
273	  push_srcloc (new_map->to_file, 1);
274	  (*debug_hooks->start_source_file) (included_at, new_map->to_file);
275#ifndef NO_IMPLICIT_EXTERN_C
276	  if (c_header_level)
277	    ++c_header_level;
278	  else if (new_map->sysp == 2)
279	    {
280	      c_header_level = 1;
281	      ++pending_lang_change;
282	    }
283#endif
284	}
285    }
286  else if (new_map->reason == LC_LEAVE)
287    {
288#ifndef NO_IMPLICIT_EXTERN_C
289      if (c_header_level && --c_header_level == 0)
290	{
291	  if (new_map->sysp == 2)
292	    warning ("badly nested C headers from preprocessor");
293	  --pending_lang_change;
294	}
295#endif
296      pop_srcloc ();
297
298      (*debug_hooks->end_source_file) (to_line);
299    }
300
301  update_header_times (new_map->to_file);
302#ifndef FREEBSD_NATIVE
303  in_system_header = new_map->sysp != 0;
304#else /* FREEBSD_NATIVE */
305  in_system_header = 0;
306#endif /* FREEBSD_NATIVE */
307  input_filename = new_map->to_file;
308  lineno = to_line;
309  map = new_map;
310
311  /* Hook for C++.  */
312  extract_interface_info ();
313}
314
315static void
316cb_def_pragma (pfile, line)
317     cpp_reader *pfile;
318     unsigned int line;
319{
320  /* Issue a warning message if we have been asked to do so.  Ignore
321     unknown pragmas in system headers unless an explicit
322     -Wunknown-pragmas has been given.  */
323  if (warn_unknown_pragmas > in_system_header)
324    {
325      const unsigned char *space, *name;
326      const cpp_token *s;
327
328      space = name = (const unsigned char *) "";
329      s = cpp_get_token (pfile);
330      if (s->type != CPP_EOF)
331	{
332	  space = cpp_token_as_text (pfile, s);
333	  s = cpp_get_token (pfile);
334	  if (s->type == CPP_NAME)
335	    name = cpp_token_as_text (pfile, s);
336	}
337
338      lineno = SOURCE_LINE (map, line);
339      warning ("ignoring #pragma %s %s", space, name);
340    }
341}
342
343/* #define callback for DWARF and DWARF2 debug info.  */
344static void
345cb_define (pfile, line, node)
346     cpp_reader *pfile;
347     unsigned int line;
348     cpp_hashnode *node;
349{
350  (*debug_hooks->define) (SOURCE_LINE (map, line),
351			  (const char *) cpp_macro_definition (pfile, node));
352}
353
354/* #undef callback for DWARF and DWARF2 debug info.  */
355static void
356cb_undef (pfile, line, node)
357     cpp_reader *pfile ATTRIBUTE_UNUSED;
358     unsigned int line;
359     cpp_hashnode *node;
360{
361  (*debug_hooks->undef) (SOURCE_LINE (map, line),
362			 (const char *) NODE_NAME (node));
363}
364
365#if 0 /* not yet */
366/* Returns nonzero if C is a universal-character-name.  Give an error if it
367   is not one which may appear in an identifier, as per [extendid].
368
369   Note that extended character support in identifiers has not yet been
370   implemented.  It is my personal opinion that this is not a desirable
371   feature.  Portable code cannot count on support for more than the basic
372   identifier character set.  */
373
374static inline int
375is_extended_char (c)
376     int c;
377{
378#ifdef TARGET_EBCDIC
379  return 0;
380#else
381  /* ASCII.  */
382  if (c < 0x7f)
383    return 0;
384
385  /* None of the valid chars are outside the Basic Multilingual Plane (the
386     low 16 bits).  */
387  if (c > 0xffff)
388    {
389      error ("universal-character-name '\\U%08x' not valid in identifier", c);
390      return 1;
391    }
392
393  /* Latin */
394  if ((c >= 0x00c0 && c <= 0x00d6)
395      || (c >= 0x00d8 && c <= 0x00f6)
396      || (c >= 0x00f8 && c <= 0x01f5)
397      || (c >= 0x01fa && c <= 0x0217)
398      || (c >= 0x0250 && c <= 0x02a8)
399      || (c >= 0x1e00 && c <= 0x1e9a)
400      || (c >= 0x1ea0 && c <= 0x1ef9))
401    return 1;
402
403  /* Greek */
404  if ((c == 0x0384)
405      || (c >= 0x0388 && c <= 0x038a)
406      || (c == 0x038c)
407      || (c >= 0x038e && c <= 0x03a1)
408      || (c >= 0x03a3 && c <= 0x03ce)
409      || (c >= 0x03d0 && c <= 0x03d6)
410      || (c == 0x03da)
411      || (c == 0x03dc)
412      || (c == 0x03de)
413      || (c == 0x03e0)
414      || (c >= 0x03e2 && c <= 0x03f3)
415      || (c >= 0x1f00 && c <= 0x1f15)
416      || (c >= 0x1f18 && c <= 0x1f1d)
417      || (c >= 0x1f20 && c <= 0x1f45)
418      || (c >= 0x1f48 && c <= 0x1f4d)
419      || (c >= 0x1f50 && c <= 0x1f57)
420      || (c == 0x1f59)
421      || (c == 0x1f5b)
422      || (c == 0x1f5d)
423      || (c >= 0x1f5f && c <= 0x1f7d)
424      || (c >= 0x1f80 && c <= 0x1fb4)
425      || (c >= 0x1fb6 && c <= 0x1fbc)
426      || (c >= 0x1fc2 && c <= 0x1fc4)
427      || (c >= 0x1fc6 && c <= 0x1fcc)
428      || (c >= 0x1fd0 && c <= 0x1fd3)
429      || (c >= 0x1fd6 && c <= 0x1fdb)
430      || (c >= 0x1fe0 && c <= 0x1fec)
431      || (c >= 0x1ff2 && c <= 0x1ff4)
432      || (c >= 0x1ff6 && c <= 0x1ffc))
433    return 1;
434
435  /* Cyrillic */
436  if ((c >= 0x0401 && c <= 0x040d)
437      || (c >= 0x040f && c <= 0x044f)
438      || (c >= 0x0451 && c <= 0x045c)
439      || (c >= 0x045e && c <= 0x0481)
440      || (c >= 0x0490 && c <= 0x04c4)
441      || (c >= 0x04c7 && c <= 0x04c8)
442      || (c >= 0x04cb && c <= 0x04cc)
443      || (c >= 0x04d0 && c <= 0x04eb)
444      || (c >= 0x04ee && c <= 0x04f5)
445      || (c >= 0x04f8 && c <= 0x04f9))
446    return 1;
447
448  /* Armenian */
449  if ((c >= 0x0531 && c <= 0x0556)
450      || (c >= 0x0561 && c <= 0x0587))
451    return 1;
452
453  /* Hebrew */
454  if ((c >= 0x05d0 && c <= 0x05ea)
455      || (c >= 0x05f0 && c <= 0x05f4))
456    return 1;
457
458  /* Arabic */
459  if ((c >= 0x0621 && c <= 0x063a)
460      || (c >= 0x0640 && c <= 0x0652)
461      || (c >= 0x0670 && c <= 0x06b7)
462      || (c >= 0x06ba && c <= 0x06be)
463      || (c >= 0x06c0 && c <= 0x06ce)
464      || (c >= 0x06e5 && c <= 0x06e7))
465    return 1;
466
467  /* Devanagari */
468  if ((c >= 0x0905 && c <= 0x0939)
469      || (c >= 0x0958 && c <= 0x0962))
470    return 1;
471
472  /* Bengali */
473  if ((c >= 0x0985 && c <= 0x098c)
474      || (c >= 0x098f && c <= 0x0990)
475      || (c >= 0x0993 && c <= 0x09a8)
476      || (c >= 0x09aa && c <= 0x09b0)
477      || (c == 0x09b2)
478      || (c >= 0x09b6 && c <= 0x09b9)
479      || (c >= 0x09dc && c <= 0x09dd)
480      || (c >= 0x09df && c <= 0x09e1)
481      || (c >= 0x09f0 && c <= 0x09f1))
482    return 1;
483
484  /* Gurmukhi */
485  if ((c >= 0x0a05 && c <= 0x0a0a)
486      || (c >= 0x0a0f && c <= 0x0a10)
487      || (c >= 0x0a13 && c <= 0x0a28)
488      || (c >= 0x0a2a && c <= 0x0a30)
489      || (c >= 0x0a32 && c <= 0x0a33)
490      || (c >= 0x0a35 && c <= 0x0a36)
491      || (c >= 0x0a38 && c <= 0x0a39)
492      || (c >= 0x0a59 && c <= 0x0a5c)
493      || (c == 0x0a5e))
494    return 1;
495
496  /* Gujarati */
497  if ((c >= 0x0a85 && c <= 0x0a8b)
498      || (c == 0x0a8d)
499      || (c >= 0x0a8f && c <= 0x0a91)
500      || (c >= 0x0a93 && c <= 0x0aa8)
501      || (c >= 0x0aaa && c <= 0x0ab0)
502      || (c >= 0x0ab2 && c <= 0x0ab3)
503      || (c >= 0x0ab5 && c <= 0x0ab9)
504      || (c == 0x0ae0))
505    return 1;
506
507  /* Oriya */
508  if ((c >= 0x0b05 && c <= 0x0b0c)
509      || (c >= 0x0b0f && c <= 0x0b10)
510      || (c >= 0x0b13 && c <= 0x0b28)
511      || (c >= 0x0b2a && c <= 0x0b30)
512      || (c >= 0x0b32 && c <= 0x0b33)
513      || (c >= 0x0b36 && c <= 0x0b39)
514      || (c >= 0x0b5c && c <= 0x0b5d)
515      || (c >= 0x0b5f && c <= 0x0b61))
516    return 1;
517
518  /* Tamil */
519  if ((c >= 0x0b85 && c <= 0x0b8a)
520      || (c >= 0x0b8e && c <= 0x0b90)
521      || (c >= 0x0b92 && c <= 0x0b95)
522      || (c >= 0x0b99 && c <= 0x0b9a)
523      || (c == 0x0b9c)
524      || (c >= 0x0b9e && c <= 0x0b9f)
525      || (c >= 0x0ba3 && c <= 0x0ba4)
526      || (c >= 0x0ba8 && c <= 0x0baa)
527      || (c >= 0x0bae && c <= 0x0bb5)
528      || (c >= 0x0bb7 && c <= 0x0bb9))
529    return 1;
530
531  /* Telugu */
532  if ((c >= 0x0c05 && c <= 0x0c0c)
533      || (c >= 0x0c0e && c <= 0x0c10)
534      || (c >= 0x0c12 && c <= 0x0c28)
535      || (c >= 0x0c2a && c <= 0x0c33)
536      || (c >= 0x0c35 && c <= 0x0c39)
537      || (c >= 0x0c60 && c <= 0x0c61))
538    return 1;
539
540  /* Kannada */
541  if ((c >= 0x0c85 && c <= 0x0c8c)
542      || (c >= 0x0c8e && c <= 0x0c90)
543      || (c >= 0x0c92 && c <= 0x0ca8)
544      || (c >= 0x0caa && c <= 0x0cb3)
545      || (c >= 0x0cb5 && c <= 0x0cb9)
546      || (c >= 0x0ce0 && c <= 0x0ce1))
547    return 1;
548
549  /* Malayalam */
550  if ((c >= 0x0d05 && c <= 0x0d0c)
551      || (c >= 0x0d0e && c <= 0x0d10)
552      || (c >= 0x0d12 && c <= 0x0d28)
553      || (c >= 0x0d2a && c <= 0x0d39)
554      || (c >= 0x0d60 && c <= 0x0d61))
555    return 1;
556
557  /* Thai */
558  if ((c >= 0x0e01 && c <= 0x0e30)
559      || (c >= 0x0e32 && c <= 0x0e33)
560      || (c >= 0x0e40 && c <= 0x0e46)
561      || (c >= 0x0e4f && c <= 0x0e5b))
562    return 1;
563
564  /* Lao */
565  if ((c >= 0x0e81 && c <= 0x0e82)
566      || (c == 0x0e84)
567      || (c == 0x0e87)
568      || (c == 0x0e88)
569      || (c == 0x0e8a)
570      || (c == 0x0e0d)
571      || (c >= 0x0e94 && c <= 0x0e97)
572      || (c >= 0x0e99 && c <= 0x0e9f)
573      || (c >= 0x0ea1 && c <= 0x0ea3)
574      || (c == 0x0ea5)
575      || (c == 0x0ea7)
576      || (c == 0x0eaa)
577      || (c == 0x0eab)
578      || (c >= 0x0ead && c <= 0x0eb0)
579      || (c == 0x0eb2)
580      || (c == 0x0eb3)
581      || (c == 0x0ebd)
582      || (c >= 0x0ec0 && c <= 0x0ec4)
583      || (c == 0x0ec6))
584    return 1;
585
586  /* Georgian */
587  if ((c >= 0x10a0 && c <= 0x10c5)
588      || (c >= 0x10d0 && c <= 0x10f6))
589    return 1;
590
591  /* Hiragana */
592  if ((c >= 0x3041 && c <= 0x3094)
593      || (c >= 0x309b && c <= 0x309e))
594    return 1;
595
596  /* Katakana */
597  if ((c >= 0x30a1 && c <= 0x30fe))
598    return 1;
599
600  /* Bopmofo */
601  if ((c >= 0x3105 && c <= 0x312c))
602    return 1;
603
604  /* Hangul */
605  if ((c >= 0x1100 && c <= 0x1159)
606      || (c >= 0x1161 && c <= 0x11a2)
607      || (c >= 0x11a8 && c <= 0x11f9))
608    return 1;
609
610  /* CJK Unified Ideographs */
611  if ((c >= 0xf900 && c <= 0xfa2d)
612      || (c >= 0xfb1f && c <= 0xfb36)
613      || (c >= 0xfb38 && c <= 0xfb3c)
614      || (c == 0xfb3e)
615      || (c >= 0xfb40 && c <= 0xfb41)
616      || (c >= 0xfb42 && c <= 0xfb44)
617      || (c >= 0xfb46 && c <= 0xfbb1)
618      || (c >= 0xfbd3 && c <= 0xfd3f)
619      || (c >= 0xfd50 && c <= 0xfd8f)
620      || (c >= 0xfd92 && c <= 0xfdc7)
621      || (c >= 0xfdf0 && c <= 0xfdfb)
622      || (c >= 0xfe70 && c <= 0xfe72)
623      || (c == 0xfe74)
624      || (c >= 0xfe76 && c <= 0xfefc)
625      || (c >= 0xff21 && c <= 0xff3a)
626      || (c >= 0xff41 && c <= 0xff5a)
627      || (c >= 0xff66 && c <= 0xffbe)
628      || (c >= 0xffc2 && c <= 0xffc7)
629      || (c >= 0xffca && c <= 0xffcf)
630      || (c >= 0xffd2 && c <= 0xffd7)
631      || (c >= 0xffda && c <= 0xffdc)
632      || (c >= 0x4e00 && c <= 0x9fa5))
633    return 1;
634
635  error ("universal-character-name '\\u%04x' not valid in identifier", c);
636  return 1;
637#endif
638}
639
640/* Add the UTF-8 representation of C to the token_buffer.  */
641
642static void
643utf8_extend_token (c)
644     int c;
645{
646  int shift, mask;
647
648  if      (c <= 0x0000007f)
649    {
650      extend_token (c);
651      return;
652    }
653  else if (c <= 0x000007ff)
654    shift = 6, mask = 0xc0;
655  else if (c <= 0x0000ffff)
656    shift = 12, mask = 0xe0;
657  else if (c <= 0x001fffff)
658    shift = 18, mask = 0xf0;
659  else if (c <= 0x03ffffff)
660    shift = 24, mask = 0xf8;
661  else
662    shift = 30, mask = 0xfc;
663
664  extend_token (mask | (c >> shift));
665  do
666    {
667      shift -= 6;
668      extend_token ((unsigned char) (0x80 | (c >> shift)));
669    }
670  while (shift);
671}
672#endif
673
674int
675c_lex (value)
676     tree *value;
677{
678  const cpp_token *tok;
679
680  retry:
681  timevar_push (TV_CPP);
682  do
683    tok = cpp_get_token (parse_in);
684  while (tok->type == CPP_PADDING);
685  timevar_pop (TV_CPP);
686
687  /* The C++ front end does horrible things with the current line
688     number.  To ensure an accurate line number, we must reset it
689     every time we return a token.  */
690  lineno = src_lineno;
691
692  *value = NULL_TREE;
693  switch (tok->type)
694    {
695    /* Issue this error here, where we can get at tok->val.c.  */
696    case CPP_OTHER:
697      if (ISGRAPH (tok->val.c))
698	error ("stray '%c' in program", tok->val.c);
699      else
700	error ("stray '\\%o' in program", tok->val.c);
701      goto retry;
702
703    case CPP_NAME:
704      *value = HT_IDENT_TO_GCC_IDENT (HT_NODE (tok->val.node));
705      break;
706
707    case CPP_NUMBER:
708      {
709	unsigned int flags = cpp_classify_number (parse_in, tok);
710
711	switch (flags & CPP_N_CATEGORY)
712	  {
713	  case CPP_N_INVALID:
714	    /* cpplib has issued an error.  */
715	    *value = error_mark_node;
716	    break;
717
718	  case CPP_N_INTEGER:
719	    *value = interpret_integer (tok, flags);
720	    break;
721
722	  case CPP_N_FLOATING:
723	    *value = interpret_float (tok, flags);
724	    break;
725
726	  default:
727	    abort ();
728	  }
729      }
730      break;
731
732    case CPP_CHAR:
733    case CPP_WCHAR:
734      *value = lex_charconst (tok);
735      break;
736
737    case CPP_STRING:
738    case CPP_WSTRING:
739      *value = lex_string (tok->val.str.text, tok->val.str.len,
740			   tok->type == CPP_WSTRING);
741      break;
742
743      /* These tokens should not be visible outside cpplib.  */
744    case CPP_HEADER_NAME:
745    case CPP_COMMENT:
746    case CPP_MACRO_ARG:
747      abort ();
748
749    default: break;
750    }
751
752  return tok->type;
753}
754
755/* Returns the narrowest C-visible unsigned type, starting with the
756   minimum specified by FLAGS, that can fit VALUE, or itk_none if
757   there isn't one.  */
758static enum integer_type_kind
759narrowest_unsigned_type (value, flags)
760     tree value;
761     unsigned int flags;
762{
763  enum integer_type_kind itk;
764
765  if ((flags & CPP_N_WIDTH) == CPP_N_SMALL)
766    itk = itk_unsigned_int;
767  else if ((flags & CPP_N_WIDTH) == CPP_N_MEDIUM)
768    itk = itk_unsigned_long;
769  else
770    itk = itk_unsigned_long_long;
771
772  /* int_fits_type_p must think the type of its first argument is
773     wider than its second argument, or it won't do the proper check.  */
774  TREE_TYPE (value) = widest_unsigned_literal_type_node;
775
776  for (; itk < itk_none; itk += 2 /* skip unsigned types */)
777    if (int_fits_type_p (value, integer_types[itk]))
778      return itk;
779
780  return itk_none;
781}
782
783/* Ditto, but narrowest signed type.  */
784static enum integer_type_kind
785narrowest_signed_type (value, flags)
786     tree value;
787     unsigned int flags;
788{
789  enum integer_type_kind itk;
790
791  if ((flags & CPP_N_WIDTH) == CPP_N_SMALL)
792    itk = itk_int;
793  else if ((flags & CPP_N_WIDTH) == CPP_N_MEDIUM)
794    itk = itk_long;
795  else
796    itk = itk_long_long;
797
798  /* int_fits_type_p must think the type of its first argument is
799     wider than its second argument, or it won't do the proper check.  */
800  TREE_TYPE (value) = widest_unsigned_literal_type_node;
801
802  for (; itk < itk_none; itk += 2 /* skip signed types */)
803    if (int_fits_type_p (value, integer_types[itk]))
804      return itk;
805
806  return itk_none;
807}
808
809/* Interpret TOKEN, an integer with FLAGS as classified by cpplib.  */
810static tree
811interpret_integer (token, flags)
812     const cpp_token *token;
813     unsigned int flags;
814{
815  tree value, type;
816  enum integer_type_kind itk;
817  cpp_num integer;
818  cpp_options *options = cpp_get_options (parse_in);
819
820  integer = cpp_interpret_integer (parse_in, token, flags);
821  integer = cpp_num_sign_extend (integer, options->precision);
822  value = build_int_2_wide (integer.low, integer.high);
823
824  /* The type of a constant with a U suffix is straightforward.  */
825  if (flags & CPP_N_UNSIGNED)
826    itk = narrowest_unsigned_type (value, flags);
827  else
828    {
829      /* The type of a potentially-signed integer constant varies
830	 depending on the base it's in, the standard in use, and the
831	 length suffixes.  */
832      enum integer_type_kind itk_u = narrowest_unsigned_type (value, flags);
833      enum integer_type_kind itk_s = narrowest_signed_type (value, flags);
834
835      /* In both C89 and C99, octal and hex constants may be signed or
836	 unsigned, whichever fits tighter.  We do not warn about this
837	 choice differing from the traditional choice, as the constant
838	 is probably a bit pattern and either way will work.  */
839      if ((flags & CPP_N_RADIX) != CPP_N_DECIMAL)
840	itk = MIN (itk_u, itk_s);
841      else
842	{
843	  /* In C99, decimal constants are always signed.
844	     In C89, decimal constants that don't fit in long have
845	     undefined behavior; we try to make them unsigned long.
846	     In GCC's extended C89, that last is true of decimal
847	     constants that don't fit in long long, too.  */
848
849	  itk = itk_s;
850	  if (itk_s > itk_u && itk_s > itk_long)
851	    {
852	      if (!flag_isoc99)
853		{
854		  if (itk_u < itk_unsigned_long)
855		    itk_u = itk_unsigned_long;
856		  itk = itk_u;
857		  warning ("this decimal constant is unsigned only in ISO C90");
858		}
859	      else if (warn_traditional)
860		warning ("this decimal constant would be unsigned in ISO C90");
861	    }
862	}
863    }
864
865  if (itk == itk_none)
866    /* cpplib has already issued a warning for overflow.  */
867    type = ((flags & CPP_N_UNSIGNED)
868	    ? widest_unsigned_literal_type_node
869	    : widest_integer_literal_type_node);
870  else
871    type = integer_types[itk];
872
873  if (itk > itk_unsigned_long
874      && (flags & CPP_N_WIDTH) != CPP_N_LARGE
875      && ! in_system_header && ! flag_isoc99)
876    pedwarn ("integer constant is too large for \"%s\" type",
877	     (flags & CPP_N_UNSIGNED) ? "unsigned long" : "long");
878
879  TREE_TYPE (value) = type;
880
881  /* Convert imaginary to a complex type.  */
882  if (flags & CPP_N_IMAGINARY)
883    value = build_complex (NULL_TREE, convert (type, integer_zero_node), value);
884
885  return value;
886}
887
888/* Interpret TOKEN, a floating point number with FLAGS as classified
889   by cpplib.  */
890static tree
891interpret_float (token, flags)
892     const cpp_token *token;
893     unsigned int flags;
894{
895  tree type;
896  tree value;
897  REAL_VALUE_TYPE real;
898  char *copy;
899  size_t copylen;
900  const char *typename;
901
902  /* FIXME: make %T work in error/warning, then we don't need typename.  */
903  if ((flags & CPP_N_WIDTH) == CPP_N_LARGE)
904    {
905      type = long_double_type_node;
906      typename = "long double";
907    }
908  else if ((flags & CPP_N_WIDTH) == CPP_N_SMALL
909	   || flag_single_precision_constant)
910    {
911      type = float_type_node;
912      typename = "float";
913    }
914  else
915    {
916      type = double_type_node;
917      typename = "double";
918    }
919
920  /* Copy the constant to a nul-terminated buffer.  If the constant
921     has any suffixes, cut them off; REAL_VALUE_ATOF/ REAL_VALUE_HTOF
922     can't handle them.  */
923  copylen = token->val.str.len;
924  if ((flags & CPP_N_WIDTH) != CPP_N_MEDIUM)
925    /* Must be an F or L suffix.  */
926    copylen--;
927  if (flags & CPP_N_IMAGINARY)
928    /* I or J suffix.  */
929    copylen--;
930
931  copy = alloca (copylen + 1);
932  memcpy (copy, token->val.str.text, copylen);
933  copy[copylen] = '\0';
934
935  real_from_string (&real, copy);
936  real_convert (&real, TYPE_MODE (type), &real);
937
938  /* A diagnostic is required for "soft" overflow by some ISO C
939     testsuites.  This is not pedwarn, because some people don't want
940     an error for this.
941     ??? That's a dubious reason... is this a mandatory diagnostic or
942     isn't it?   -- zw, 2001-08-21.  */
943  if (REAL_VALUE_ISINF (real) && pedantic)
944    warning ("floating constant exceeds range of \"%s\"", typename);
945
946  /* Create a node with determined type and value.  */
947  value = build_real (type, real);
948  if (flags & CPP_N_IMAGINARY)
949    value = build_complex (NULL_TREE, convert (type, integer_zero_node), value);
950
951  return value;
952}
953
954static tree
955lex_string (str, len, wide)
956     const unsigned char *str;
957     unsigned int len;
958     int wide;
959{
960  tree value;
961  char *buf = alloca ((len + 1) * (wide ? WCHAR_BYTES : 1));
962  char *q = buf;
963  const unsigned char *p = str, *limit = str + len;
964  cppchar_t c;
965
966#ifdef MULTIBYTE_CHARS
967  /* Reset multibyte conversion state.  */
968  (void) local_mbtowc (NULL, NULL, 0);
969#endif
970
971  while (p < limit)
972    {
973#ifdef MULTIBYTE_CHARS
974      wchar_t wc;
975      int char_len;
976
977      char_len = local_mbtowc (&wc, (const char *) p, limit - p);
978      if (char_len == -1)
979	{
980	  warning ("ignoring invalid multibyte character");
981	  char_len = 1;
982	  c = *p++;
983	}
984      else
985	{
986	  p += char_len;
987	  c = wc;
988	}
989#else
990      c = *p++;
991#endif
992
993      if (c == '\\' && !ignore_escape_flag)
994	c = cpp_parse_escape (parse_in, &p, limit, wide);
995
996      /* Add this single character into the buffer either as a wchar_t,
997	 a multibyte sequence, or as a single byte.  */
998      if (wide)
999	{
1000	  unsigned charwidth = TYPE_PRECISION (char_type_node);
1001	  unsigned bytemask = (1 << charwidth) - 1;
1002	  int byte;
1003
1004	  for (byte = 0; byte < WCHAR_BYTES; ++byte)
1005	    {
1006	      int n;
1007	      if (byte >= (int) sizeof (c))
1008		n = 0;
1009	      else
1010		n = (c >> (byte * charwidth)) & bytemask;
1011	      if (BYTES_BIG_ENDIAN)
1012		q[WCHAR_BYTES - byte - 1] = n;
1013	      else
1014		q[byte] = n;
1015	    }
1016	  q += WCHAR_BYTES;
1017	}
1018#ifdef MULTIBYTE_CHARS
1019      else if (char_len > 1)
1020	{
1021	  /* We're dealing with a multibyte character.  */
1022	  for ( ; char_len >0; --char_len)
1023	    {
1024	      *q++ = *(p - char_len);
1025	    }
1026	}
1027#endif
1028      else
1029	{
1030	  *q++ = c;
1031	}
1032    }
1033
1034  /* Terminate the string value, either with a single byte zero
1035     or with a wide zero.  */
1036
1037  if (wide)
1038    {
1039      memset (q, 0, WCHAR_BYTES);
1040      q += WCHAR_BYTES;
1041    }
1042  else
1043    {
1044      *q++ = '\0';
1045    }
1046
1047  value = build_string (q - buf, buf);
1048
1049  if (wide)
1050    TREE_TYPE (value) = wchar_array_type_node;
1051  else
1052    TREE_TYPE (value) = char_array_type_node;
1053  return value;
1054}
1055
1056/* Converts a (possibly wide) character constant token into a tree.  */
1057static tree
1058lex_charconst (token)
1059     const cpp_token *token;
1060{
1061  cppchar_t result;
1062  tree type, value;
1063  unsigned int chars_seen;
1064  int unsignedp;
1065
1066  result = cpp_interpret_charconst (parse_in, token,
1067 				    &chars_seen, &unsignedp);
1068
1069  /* Cast to cppchar_signed_t to get correct sign-extension of RESULT
1070     before possibly widening to HOST_WIDE_INT for build_int_2.  */
1071  if (unsignedp || (cppchar_signed_t) result >= 0)
1072    value = build_int_2 (result, 0);
1073  else
1074    value = build_int_2 ((cppchar_signed_t) result, -1);
1075
1076  if (token->type == CPP_WCHAR)
1077    type = wchar_type_node;
1078  /* In C, a character constant has type 'int'.
1079     In C++ 'char', but multi-char charconsts have type 'int'.  */
1080  else if ((c_language == clk_c) || chars_seen > 1)
1081    type = integer_type_node;
1082  else
1083    type = char_type_node;
1084
1085  TREE_TYPE (value) = type;
1086  return value;
1087}
1088