1133808Spjd/* Language lexer for the GNU compiler for the Java(TM) language.
2156878Spjd   Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
3133808Spjd   Contributed by Alexandre Petit-Bianco (apbianco@cygnus.com)
4133808Spjd
5133808SpjdThis file is part of GNU CC.
6133808Spjd
7133808SpjdGNU CC is free software; you can redistribute it and/or modify
8133808Spjdit under the terms of the GNU General Public License as published by
9133808Spjdthe Free Software Foundation; either version 2, or (at your option)
10133808Spjdany later version.
11133808Spjd
12133808SpjdGNU CC is distributed in the hope that it will be useful,
13155174Spjdbut WITHOUT ANY WARRANTY; without even the implied warranty of
14133808SpjdMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15133808SpjdGNU General Public License for more details.
16133808Spjd
17133808SpjdYou should have received a copy of the GNU General Public License
18133808Spjdalong with GNU CC; see the file COPYING.  If not, write to
19133808Spjdthe Free Software Foundation, 59 Temple Place - Suite 330,
20133808SpjdBoston, MA 02111-1307, USA.
21133808Spjd
22133808SpjdJava and all Java-based marks are trademarks or registered trademarks
23133808Spjdof Sun Microsystems, Inc. in the United States and other countries.
24133808SpjdThe Free Software Foundation is independent of Sun Microsystems, Inc.  */
25133808Spjd
26133808Spjd/* It defines java_lex (yylex) that reads a Java ASCII source file
27133808Spjd   possibly containing Unicode escape sequence or utf8 encoded
28133808Spjd   characters and returns a token for everything found but comments,
29133808Spjd   white spaces and line terminators. When necessary, it also fills
30133808Spjd   the java_lval (yylval) union. It's implemented to be called by a
31133808Spjd   re-entrant parser generated by Bison.
32133808Spjd
33133808Spjd   The lexical analysis conforms to the Java grammar described in "The
34133808Spjd   Java(TM) Language Specification. J. Gosling, B. Joy, G. Steele.
35133808Spjd   Addison Wesley 1996" (http://java.sun.com/docs/books/jls/html/3.doc.html) */
36133808Spjd
37133808Spjd#include "keyword.h"
38133808Spjd#include "flags.h"
39133808Spjd#include "chartables.h"
40133808Spjd
41133808Spjd/* Function declarations.  */
42133808Spjdstatic char *java_sprint_unicode PARAMS ((struct java_line *, int));
43133808Spjdstatic void java_unicode_2_utf8 PARAMS ((unicode_t));
44133808Spjdstatic void java_lex_error PARAMS ((const char *, int));
45133808Spjd#ifndef JC1_LITE
46133808Spjdstatic int java_is_eol PARAMS ((FILE *, int));
47133808Spjdstatic tree build_wfl_node PARAMS ((tree));
48133808Spjd#endif
49133808Spjdstatic void java_store_unicode PARAMS ((struct java_line *, unicode_t, int));
50133808Spjdstatic int java_parse_escape_sequence PARAMS ((void));
51133808Spjdstatic int java_start_char_p PARAMS ((unicode_t));
52133808Spjdstatic int java_part_char_p PARAMS ((unicode_t));
53133808Spjdstatic int java_parse_doc_section PARAMS ((int));
54156612Spjdstatic void java_parse_end_comment PARAMS ((int));
55133808Spjdstatic int java_get_unicode PARAMS ((void));
56133808Spjdstatic int java_read_unicode PARAMS ((java_lexer *, int *));
57133808Spjdstatic int java_read_unicode_collapsing_terminators PARAMS ((java_lexer *,
58133808Spjd							     int *));
59133808Spjdstatic void java_store_unicode PARAMS ((struct java_line *, unicode_t, int));
60133808Spjdstatic int java_read_char PARAMS ((java_lexer *));
61133808Spjdstatic void java_allocate_new_line PARAMS ((void));
62133808Spjdstatic void java_unget_unicode PARAMS ((void));
63156612Spjdstatic unicode_t java_sneak_unicode PARAMS ((void));
64156612Spjd#ifndef JC1_LITE
65133808Spjdstatic int utf8_cmp PARAMS ((const unsigned char *, int, const char *));
66133808Spjd#endif
67133808Spjd
68156612Spjdjava_lexer *java_new_lexer PARAMS ((FILE *, const char *));
69133808Spjd#ifndef JC1_LITE
70133808Spjdstatic void error_if_numeric_overflow PARAMS ((tree));
71133808Spjd#endif
72133808Spjd
73133808Spjd#ifdef HAVE_ICONV
74133808Spjd/* This is nonzero if we have initialized `need_byteswap'.  */
75133808Spjdstatic int byteswap_init = 0;
76133808Spjd
77133808Spjd/* Some versions of iconv() (e.g., glibc 2.1.3) will return UCS-2 in
78156612Spjd   big-endian order -- not native endian order.  We handle this by
79160330Spjd   doing a conversion once at startup and seeing what happens.  This
80160330Spjd   flag holds the results of this determination.  */
81133808Spjdstatic int need_byteswap = 0;
82133808Spjd#endif
83133808Spjd
84133808Spjdvoid
85133808Spjdjava_init_lex (finput, encoding)
86133808Spjd     FILE *finput;
87133808Spjd     const char *encoding;
88133808Spjd{
89133808Spjd#ifndef JC1_LITE
90133808Spjd  int java_lang_imported = 0;
91133808Spjd
92133808Spjd  if (!java_lang_id)
93133808Spjd    java_lang_id = get_identifier ("java.lang");
94133808Spjd  if (!inst_id)
95133808Spjd    inst_id = get_identifier ("inst$");
96133808Spjd  if (!wpv_id)
97133808Spjd    wpv_id = get_identifier ("write_parm_value$");
98133808Spjd
99133808Spjd  if (!java_lang_imported)
100133808Spjd    {
101163888Spjd      tree node = build_tree_list
102134168Spjd	(build_expr_wfl (java_lang_id, NULL, 0, 0), NULL_TREE);
103163888Spjd      read_import_dir (TREE_PURPOSE (node));
104134168Spjd      TREE_CHAIN (node) = ctxp->import_demand_list;
105134168Spjd      ctxp->import_demand_list = node;
106133808Spjd      java_lang_imported = 1;
107133808Spjd    }
108133808Spjd
109144142Spjd  if (!wfl_operator)
110144142Spjd    wfl_operator = build_expr_wfl (NULL_TREE, ctxp->filename, 0, 0);
111144142Spjd  if (!label_id)
112144142Spjd    label_id = get_identifier ("$L");
113133808Spjd  if (!wfl_append)
114133808Spjd    wfl_append = build_expr_wfl (get_identifier ("append"), NULL, 0, 0);
115133808Spjd  if (!wfl_string_buffer)
116133808Spjd    wfl_string_buffer =
117133808Spjd      build_expr_wfl (get_identifier (flag_emit_class_files
118133808Spjd				      ? "java.lang.StringBuffer"
119133808Spjd				      : "gnu.gcj.runtime.StringBuffer"),
120133808Spjd		      NULL, 0, 0);
121133808Spjd  if (!wfl_to_string)
122133808Spjd    wfl_to_string = build_expr_wfl (get_identifier ("toString"), NULL, 0, 0);
123133808Spjd
124133808Spjd  CPC_INITIALIZER_LIST (ctxp) = CPC_STATIC_INITIALIZER_LIST (ctxp) =
125133808Spjd    CPC_INSTANCE_INITIALIZER_LIST (ctxp) = NULL_TREE;
126133808Spjd
127133808Spjd  memset (ctxp->modifier_ctx, 0, sizeof (ctxp->modifier_ctx));
128133808Spjd  memset (current_jcf, 0, sizeof (JCF));
129133808Spjd  ctxp->current_parsed_class = NULL;
130133808Spjd  ctxp->package = NULL_TREE;
131133808Spjd#endif
132163888Spjd
133163888Spjd  ctxp->filename = input_filename;
134163888Spjd  ctxp->lineno = lineno = 0;
135163888Spjd  ctxp->p_line = NULL;
136163888Spjd  ctxp->c_line = NULL;
137163888Spjd  ctxp->java_error_flag = 0;
138163888Spjd  ctxp->lexer = java_new_lexer (finput, encoding);
139163888Spjd}
140163888Spjd
141163888Spjdstatic char *
142163888Spjdjava_sprint_unicode (line, i)
143163888Spjd    struct java_line *line;
144163888Spjd    int i;
145163888Spjd{
146163888Spjd  static char buffer [10];
147134124Spjd  if (line->unicode_escape_p [i] || line->line [i] > 128)
148134124Spjd    sprintf (buffer, "\\u%04x", line->line [i]);
149134124Spjd  else
150134124Spjd    {
151134124Spjd      buffer [0] = line->line [i];
152134124Spjd      buffer [1] = '\0';
153134124Spjd    }
154134124Spjd  return buffer;
155134124Spjd}
156134124Spjd
157134124Spjdstatic unicode_t
158134124Spjdjava_sneak_unicode ()
159134124Spjd{
160134124Spjd  return (ctxp->c_line->line [ctxp->c_line->current]);
161134124Spjd}
162134124Spjd
163134168Spjdstatic void
164134168Spjdjava_unget_unicode ()
165134168Spjd{
166134168Spjd  if (!ctxp->c_line->current)
167134168Spjd    /* Can't unget unicode.  */
168134168Spjd    abort ();
169134168Spjd
170134168Spjd  ctxp->c_line->current--;
171134168Spjd  ctxp->c_line->char_col -= JAVA_COLUMN_DELTA (0);
172134168Spjd}
173134168Spjd
174134168Spjdstatic void
175134168Spjdjava_allocate_new_line ()
176134168Spjd{
177134168Spjd  unicode_t ahead = (ctxp->c_line ? ctxp->c_line->ahead[0] : '\0');
178163888Spjd  char ahead_escape_p = (ctxp->c_line ?
179163888Spjd			 ctxp->c_line->unicode_escape_ahead_p : 0);
180134124Spjd
181134124Spjd  if (ctxp->c_line && !ctxp->c_line->white_space_only)
182134124Spjd    {
183156612Spjd      if (ctxp->p_line)
184156612Spjd	{
185156612Spjd	  free (ctxp->p_line->unicode_escape_p);
186156612Spjd	  free (ctxp->p_line->line);
187156612Spjd	  free (ctxp->p_line);
188156612Spjd	}
189156612Spjd      ctxp->p_line = ctxp->c_line;
190156612Spjd      ctxp->c_line = NULL;		/* Reallocated.  */
191156612Spjd    }
192156612Spjd
193156612Spjd  if (!ctxp->c_line)
194156612Spjd    {
195156612Spjd      ctxp->c_line = xmalloc (sizeof (struct java_line));
196156612Spjd      ctxp->c_line->max = JAVA_LINE_MAX;
197156612Spjd      ctxp->c_line->line = xmalloc (sizeof (unicode_t)*ctxp->c_line->max);
198133808Spjd      ctxp->c_line->unicode_escape_p =
199133808Spjd	xmalloc (sizeof (char)*ctxp->c_line->max);
200133808Spjd      ctxp->c_line->white_space_only = 0;
201133808Spjd    }
202133808Spjd
203133808Spjd  ctxp->c_line->line [0] = ctxp->c_line->size = 0;
204133808Spjd  ctxp->c_line->char_col = ctxp->c_line->current = 0;
205133808Spjd  if (ahead)
206133808Spjd    {
207163888Spjd      ctxp->c_line->line [ctxp->c_line->size] = ahead;
208163888Spjd      ctxp->c_line->unicode_escape_p [ctxp->c_line->size] = ahead_escape_p;
209163888Spjd      ctxp->c_line->size++;
210163888Spjd    }
211163888Spjd  ctxp->c_line->ahead [0] = 0;
212163888Spjd  ctxp->c_line->unicode_escape_ahead_p = 0;
213163888Spjd  ctxp->c_line->lineno = ++lineno;
214163888Spjd  ctxp->c_line->white_space_only = 1;
215163888Spjd}
216134168Spjd
217134168Spjd/* Create a new lexer object.  */
218134168Spjd
219134168Spjdjava_lexer *
220134168Spjdjava_new_lexer (finput, encoding)
221134168Spjd     FILE *finput;
222134168Spjd     const char *encoding;
223134124Spjd{
224134124Spjd  java_lexer *lex = xmalloc (sizeof (java_lexer));
225134124Spjd  int enc_error = 0;
226134124Spjd
227134124Spjd  lex->finput = finput;
228134124Spjd  lex->bs_count = 0;
229134124Spjd  lex->unget_value = 0;
230134168Spjd  lex->hit_eof = 0;
231134168Spjd
232134168Spjd#ifdef HAVE_ICONV
233134168Spjd  lex->handle = iconv_open ("UCS-2", encoding);
234134168Spjd  if (lex->handle != (iconv_t) -1)
235134168Spjd    {
236134168Spjd      lex->first = -1;
237133808Spjd      lex->last = -1;
238133808Spjd      lex->out_first = -1;
239133808Spjd      lex->out_last = -1;
240133808Spjd      lex->read_anything = 0;
241133808Spjd      lex->use_fallback = 0;
242133808Spjd
243163888Spjd      /* Work around broken iconv() implementations by doing checking at
244163888Spjd	 runtime.  We assume that if the UTF-8 => UCS-2 encoder is broken,
245133808Spjd	 then all UCS-2 encoders will be broken.  Perhaps not a valid
246133808Spjd	 assumption.  */
247133808Spjd      if (! byteswap_init)
248133808Spjd	{
249133808Spjd	  iconv_t handle;
250133808Spjd
251133808Spjd	  byteswap_init = 1;
252133808Spjd
253133808Spjd	  handle = iconv_open ("UCS-2", "UTF-8");
254133808Spjd	  if (handle != (iconv_t) -1)
255133808Spjd	    {
256133808Spjd	      unicode_t result;
257133808Spjd	      unsigned char in[3];
258156612Spjd	      char *inp, *outp;
259133808Spjd	      size_t inc, outc, r;
260133808Spjd
261133808Spjd	      /* This is the UTF-8 encoding of \ufeff.  */
262133808Spjd	      in[0] = 0xef;
263133808Spjd	      in[1] = 0xbb;
264139671Spjd	      in[2] = 0xbf;
265133808Spjd
266133808Spjd	      inp = in;
267139671Spjd	      inc = 3;
268133808Spjd	      outp = (char *) &result;
269139671Spjd	      outc = 2;
270133808Spjd
271133808Spjd	      r = iconv (handle, (ICONV_CONST char **) &inp, &inc,
272133808Spjd			 &outp, &outc);
273133808Spjd	      iconv_close (handle);
274133808Spjd	      /* Conversion must be complete for us to use the result.  */
275133808Spjd	      if (r != (size_t) -1 && inc == 0 && outc == 0)
276133808Spjd		need_byteswap = (result != 0xfeff);
277133808Spjd	    }
278133808Spjd	}
279133808Spjd
280133808Spjd      lex->byte_swap = need_byteswap;
281133808Spjd    }
282133808Spjd  else
283133808Spjd#endif /* HAVE_ICONV */
284133808Spjd    {
285133808Spjd      /* If iconv failed, use the internal decoder if the default
286133808Spjd	 encoding was requested.  This code is used on platforms where
287133808Spjd	 iconv exists but is insufficient for our needs.  For
288133808Spjd	 instance, on Solaris 2.5 iconv cannot handle UTF-8 or UCS-2.
289133808Spjd
290133808Spjd	 On Solaris the default encoding, as returned by nl_langinfo(),
291133808Spjd	 is `646' (aka ASCII), but the Solaris iconv_open() doesn't
292133808Spjd	 understand that.  We work around that by pretending
293156612Spjd	 `646' to be the same as UTF-8.   */
294133808Spjd      if (strcmp (encoding, DEFAULT_ENCODING) && strcmp (encoding, "646"))
295133808Spjd	enc_error = 1;
296133808Spjd#ifdef HAVE_ICONV
297133808Spjd      else
298133808Spjd	lex->use_fallback = 1;
299156612Spjd#endif /* HAVE_ICONV */
300133808Spjd    }
301133808Spjd
302133808Spjd  if (enc_error)
303133808Spjd    fatal_error ("unknown encoding: `%s'\nThis might mean that your locale's encoding is not supported\nby your system's iconv(3) implementation.  If you aren't trying\nto use a particular encoding for your input file, try the\n`--encoding=UTF-8' option", encoding);
304162350Spjd
305156612Spjd  return lex;
306133808Spjd}
307133808Spjd
308133808Spjdvoid
309133808Spjdjava_destroy_lexer (lex)
310133808Spjd     java_lexer *lex;
311133808Spjd{
312133808Spjd#ifdef HAVE_ICONV
313133808Spjd  if (! lex->use_fallback)
314133808Spjd    iconv_close (lex->handle);
315133808Spjd#endif
316139671Spjd  free (lex);
317156612Spjd}
318139671Spjd
319156612Spjdstatic int
320133808Spjdjava_read_char (lex)
321133808Spjd     java_lexer *lex;
322139671Spjd{
323139671Spjd  if (lex->unget_value)
324156612Spjd    {
325139671Spjd      unicode_t r = lex->unget_value;
326139671Spjd      lex->unget_value = 0;
327139671Spjd      return r;
328156612Spjd    }
329139671Spjd
330156612Spjd#ifdef HAVE_ICONV
331133808Spjd  if (! lex->use_fallback)
332133808Spjd    {
333133808Spjd      size_t ir, inbytesleft, in_save, out_count, out_save;
334133808Spjd      char *inp, *outp;
335133808Spjd      unicode_t result;
336133808Spjd
337133808Spjd      /* If there is data which has already been converted, use it.  */
338133808Spjd      if (lex->out_first == -1 || lex->out_first >= lex->out_last)
339133808Spjd	{
340133808Spjd	  lex->out_first = 0;
341157630Spjd	  lex->out_last = 0;
342133808Spjd
343133808Spjd	  while (1)
344133808Spjd	    {
345133808Spjd	      /* See if we need to read more data.  If FIRST == 0 then
346133808Spjd		 the previous conversion attempt ended in the middle of
347133808Spjd		 a character at the end of the buffer.  Otherwise we
348133808Spjd		 only have to read if the buffer is empty.  */
349133808Spjd	      if (lex->first == 0 || lex->first >= lex->last)
350133808Spjd		{
351133808Spjd		  int r;
352133808Spjd
353133808Spjd		  if (lex->first >= lex->last)
354133808Spjd		    {
355133808Spjd		      lex->first = 0;
356133808Spjd		      lex->last = 0;
357157630Spjd		    }
358157630Spjd		  if (feof (lex->finput))
359157630Spjd		    return UEOF;
360157630Spjd		  r = fread (&lex->buffer[lex->last], 1,
361133808Spjd			     sizeof (lex->buffer) - lex->last,
362133808Spjd			     lex->finput);
363133808Spjd		  lex->last += r;
364133808Spjd		}
365133808Spjd
366133808Spjd	      inbytesleft = lex->last - lex->first;
367133808Spjd	      out_count = sizeof (lex->out_buffer) - lex->out_last;
368133808Spjd
369133808Spjd	      if (inbytesleft == 0)
370133808Spjd		{
371133808Spjd		  /* We've tried to read and there is nothing left.  */
372133808Spjd		  return UEOF;
373133808Spjd		}
374157630Spjd
375157630Spjd	      in_save = inbytesleft;
376133808Spjd	      out_save = out_count;
377133808Spjd	      inp = &lex->buffer[lex->first];
378133808Spjd	      outp = &lex->out_buffer[lex->out_last];
379156612Spjd	      ir = iconv (lex->handle, (ICONV_CONST char **) &inp,
380133808Spjd			  &inbytesleft, &outp, &out_count);
381133808Spjd
382156612Spjd	      /* If we haven't read any bytes, then look to see if we
383133808Spjd		 have read a BOM.  */
384133808Spjd	      if (! lex->read_anything && out_save - out_count >= 2)
385133808Spjd		{
386133808Spjd		  unicode_t uc = * (unicode_t *) &lex->out_buffer[0];
387133808Spjd		  if (uc == 0xfeff)
388133808Spjd		    {
389133808Spjd		      lex->byte_swap = 0;
390133808Spjd		      lex->out_first += 2;
391133808Spjd		    }
392133808Spjd		  else if (uc == 0xfffe)
393133808Spjd		    {
394133808Spjd		      lex->byte_swap = 1;
395133808Spjd		      lex->out_first += 2;
396133808Spjd		    }
397133808Spjd		  lex->read_anything = 1;
398133808Spjd		}
399133808Spjd
400133808Spjd	      if (lex->byte_swap)
401133808Spjd		{
402133808Spjd		  unsigned int i;
403133808Spjd		  for (i = 0; i < out_save - out_count; i += 2)
404133808Spjd		    {
405134420Spjd		      char t = lex->out_buffer[lex->out_last + i];
406133808Spjd		      lex->out_buffer[lex->out_last + i]
407245456Smav			= lex->out_buffer[lex->out_last + i + 1];
408133808Spjd		      lex->out_buffer[lex->out_last + i + 1] = t;
409133808Spjd		    }
410133808Spjd		}
411133808Spjd
412133808Spjd	      lex->first += in_save - inbytesleft;
413133808Spjd	      lex->out_last += out_save - out_count;
414133808Spjd
415133808Spjd	      /* If we converted anything at all, move along.  */
416133808Spjd	      if (out_count != out_save)
417133808Spjd		break;
418156612Spjd
419156612Spjd	      if (ir == (size_t) -1)
420156612Spjd		{
421133808Spjd		  if (errno == EINVAL)
422133808Spjd		    {
423156612Spjd		      /* This is ok.  This means that the end of our buffer
424156612Spjd			 is in the middle of a character sequence.  We just
425156612Spjd			 move the valid part of the buffer to the beginning
426133808Spjd			 to force a read.  */
427133808Spjd		      memmove (&lex->buffer[0], &lex->buffer[lex->first],
428245456Smav			       lex->last - lex->first);
429245456Smav		      lex->last -= lex->first;
430245456Smav		      lex->first = 0;
431245456Smav		    }
432160330Spjd		  else
433160330Spjd		    {
434156612Spjd		      /* A more serious error.  */
435156612Spjd		      java_lex_error ("unrecognized character in input stream",
436156612Spjd				      0);
437156612Spjd		      return UEOF;
438156612Spjd		    }
439156612Spjd		}
440156612Spjd	    }
441156612Spjd	}
442156612Spjd
443156612Spjd      if (lex->out_first == -1 || lex->out_first >= lex->out_last)
444156612Spjd	{
445156612Spjd	  /* Don't have any data.  */
446156612Spjd	  return UEOF;
447156612Spjd	}
448156612Spjd
449156612Spjd      /* Success.  */
450156612Spjd      result = * ((unicode_t *) &lex->out_buffer[lex->out_first]);
451156612Spjd      lex->out_first += 2;
452156612Spjd      return result;
453156612Spjd    }
454156612Spjd  else
455156612Spjd#endif /* HAVE_ICONV */
456156612Spjd    {
457156612Spjd      int c, c1, c2;
458156612Spjd      c = getc (lex->finput);
459156612Spjd
460156612Spjd      if (c == EOF)
461156612Spjd	return UEOF;
462156612Spjd      if (c < 128)
463156612Spjd	return (unicode_t) c;
464156612Spjd      else
465156612Spjd	{
466156612Spjd	  if ((c & 0xe0) == 0xc0)
467245456Smav	    {
468245456Smav	      c1 = getc (lex->finput);
469245456Smav	      if ((c1 & 0xc0) == 0x80)
470245456Smav		{
471245456Smav		  unicode_t r = (unicode_t)(((c & 0x1f) << 6) + (c1 & 0x3f));
472245456Smav		  /* Check for valid 2-byte characters.  We explicitly
473245456Smav		     allow \0 because this encoding is common in the
474245456Smav		     Java world.  */
475245456Smav		  if (r == 0 || (r >= 0x80 && r <= 0x7ff))
476245456Smav		    return r;
477245456Smav		}
478245456Smav	    }
479245456Smav	  else if ((c & 0xf0) == 0xe0)
480245456Smav	    {
481245456Smav	      c1 = getc (lex->finput);
482245456Smav	      if ((c1 & 0xc0) == 0x80)
483245456Smav		{
484245456Smav		  c2 = getc (lex->finput);
485245456Smav		  if ((c2 & 0xc0) == 0x80)
486245456Smav		    {
487245456Smav		      unicode_t r =  (unicode_t)(((c & 0xf) << 12) +
488245456Smav						 (( c1 & 0x3f) << 6)
489245456Smav						 + (c2 & 0x3f));
490245456Smav		      /* Check for valid 3-byte characters.
491133808Spjd			 Don't allow surrogate, \ufffe or \uffff.  */
492133808Spjd		      if (IN_RANGE (r, 0x800, 0xffff)
493156612Spjd			  && ! IN_RANGE (r, 0xd800, 0xdfff)
494133808Spjd			  && r != 0xfffe && r != 0xffff)
495133808Spjd			return r;
496133808Spjd		    }
497156612Spjd		}
498133808Spjd	    }
499134420Spjd
500134420Spjd	  /* We simply don't support invalid characters.  We also
501156612Spjd	     don't support 4-, 5-, or 6-byte UTF-8 sequences, as these
502134420Spjd	     cannot be valid Java characters.  */
503156612Spjd	  java_lex_error ("malformed UTF-8 character", 0);
504134420Spjd	}
505134420Spjd    }
506134420Spjd
507134420Spjd  /* We only get here on error.  */
508134420Spjd  return UEOF;
509134420Spjd}
510134420Spjd
511133808Spjdstatic void
512156612Spjdjava_store_unicode (l, c, unicode_escape_p)
513133808Spjd    struct java_line *l;
514163886Spjd    unicode_t c;
515133808Spjd    int unicode_escape_p;
516163886Spjd{
517163886Spjd  if (l->size == l->max)
518163886Spjd    {
519156527Spjd      l->max += JAVA_LINE_MAX;
520133808Spjd      l->line = xrealloc (l->line, sizeof (unicode_t)*l->max);
521133808Spjd      l->unicode_escape_p = xrealloc (l->unicode_escape_p,
522133808Spjd				      sizeof (char)*l->max);
523133808Spjd    }
524133808Spjd  l->line [l->size] = c;
525133808Spjd  l->unicode_escape_p [l->size++] = unicode_escape_p;
526133808Spjd}
527133808Spjd
528156612Spjdstatic int
529146118Spjdjava_read_unicode (lex, unicode_escape_p)
530146118Spjd     java_lexer *lex;
531146118Spjd     int *unicode_escape_p;
532146118Spjd{
533146118Spjd  int c;
534146117Spjd
535156612Spjd  c = java_read_char (lex);
536133808Spjd  *unicode_escape_p = 0;
537133808Spjd
538133808Spjd  if (c != '\\')
539133808Spjd    {
540133808Spjd      lex->bs_count = 0;
541133808Spjd      return c;
542133808Spjd    }
543133808Spjd
544133808Spjd  ++lex->bs_count;
545133808Spjd  if ((lex->bs_count) % 2 == 1)
546133808Spjd    {
547133808Spjd      /* Odd number of \ seen.  */
548133808Spjd      c = java_read_char (lex);
549133808Spjd      if (c == 'u')
550133808Spjd        {
551133808Spjd	  unicode_t unicode = 0;
552133808Spjd	  int shift = 12;
553133808Spjd
554133808Spjd	  /* Recognize any number of `u's in \u.  */
555133808Spjd	  while ((c = java_read_char (lex)) == 'u')
556156612Spjd	    ;
557156612Spjd
558156612Spjd	  shift = 12;
559156612Spjd	  do
560156612Spjd	    {
561133808Spjd	      if (c == UEOF)
562133808Spjd		{
563133808Spjd		  java_lex_error ("prematurely terminated \\u sequence", 0);
564133808Spjd		  return UEOF;
565133808Spjd		}
566133808Spjd
567133808Spjd	      if (hex_p (c))
568133808Spjd		unicode |= (unicode_t)(hex_value (c) << shift);
569133808Spjd	      else
570133808Spjd		{
571133808Spjd		  java_lex_error ("non-hex digit in \\u sequence", 0);
572156612Spjd		  break;
573133808Spjd		}
574133808Spjd
575133808Spjd	      c = java_read_char (lex);
576133808Spjd	      shift -= 4;
577133808Spjd	    }
578133808Spjd	  while (shift >= 0);
579133808Spjd
580133808Spjd	  if (c != UEOF)
581133808Spjd	    lex->unget_value = c;
582133808Spjd
583133808Spjd	  lex->bs_count = 0;
584133808Spjd	  *unicode_escape_p = 1;
585162350Spjd	  return unicode;
586133808Spjd	}
587156612Spjd      lex->unget_value = c;
588133808Spjd    }
589133808Spjd  return (unicode_t) '\\';
590133808Spjd}
591133808Spjd
592133808Spjdstatic int
593133808Spjdjava_read_unicode_collapsing_terminators (lex, unicode_escape_p)
594133808Spjd     java_lexer *lex;
595139295Spjd     int *unicode_escape_p;
596139295Spjd{
597139295Spjd  int c = java_read_unicode (lex, unicode_escape_p);
598156612Spjd
599133808Spjd  if (c == '\r')
600133808Spjd    {
601133808Spjd      /* We have to read ahead to see if we got \r\n.  In that case we
602133808Spjd	 return a single line terminator.  */
603133808Spjd      int dummy;
604162350Spjd      c = java_read_unicode (lex, &dummy);
605156612Spjd      if (c != '\n' && c != UEOF)
606133808Spjd	lex->unget_value = c;
607156612Spjd      /* In either case we must return a newline.  */
608133808Spjd      c = '\n';
609133808Spjd    }
610133808Spjd
611133808Spjd  return c;
612133808Spjd}
613133808Spjd
614133808Spjdstatic int
615133808Spjdjava_get_unicode ()
616133808Spjd{
617133808Spjd  /* It's time to read a line when...  */
618133808Spjd  if (!ctxp->c_line || ctxp->c_line->current == ctxp->c_line->size)
619133808Spjd    {
620133808Spjd      int c;
621133808Spjd      int found_chars = 0;
622133808Spjd
623133808Spjd      if (ctxp->lexer->hit_eof)
624133808Spjd	return UEOF;
625133808Spjd
626133808Spjd      java_allocate_new_line ();
627156612Spjd      if (ctxp->c_line->line[0] != '\n')
628133808Spjd	{
629133808Spjd	  for (;;)
630133808Spjd	    {
631133808Spjd	      int unicode_escape_p;
632133808Spjd	      c = java_read_unicode_collapsing_terminators (ctxp->lexer,
633133808Spjd							    &unicode_escape_p);
634133808Spjd	      if (c != UEOF)
635133808Spjd		{
636133808Spjd		  found_chars = 1;
637133808Spjd		  java_store_unicode (ctxp->c_line, c, unicode_escape_p);
638133808Spjd		  if (ctxp->c_line->white_space_only
639133808Spjd		      && !JAVA_WHITE_SPACE_P (c)
640156612Spjd		      && c != '\n')
641133808Spjd		    ctxp->c_line->white_space_only = 0;
642		}
643	      if ((c == '\n') || (c == UEOF))
644		break;
645	    }
646
647	  if (c == UEOF && ! found_chars)
648	    {
649	      ctxp->lexer->hit_eof = 1;
650	      return UEOF;
651	    }
652	}
653    }
654  ctxp->c_line->char_col += JAVA_COLUMN_DELTA (0);
655  JAVA_LEX_CHAR (ctxp->c_line->line [ctxp->c_line->current]);
656  return ctxp->c_line->line [ctxp->c_line->current++];
657}
658
659/* Parse the end of a C style comment.
660 * C is the first character following the '/' and '*'.  */
661static void
662java_parse_end_comment (c)
663     int c;
664{
665  for ( ;; c = java_get_unicode ())
666    {
667      switch (c)
668	{
669	case UEOF:
670	  java_lex_error ("Comment not terminated at end of input", 0);
671	  return;
672	case '*':
673	  switch (c = java_get_unicode ())
674	    {
675	    case UEOF:
676	      java_lex_error ("Comment not terminated at end of input", 0);
677	      return;
678	    case '/':
679	      return;
680	    case '*':	/* Reparse only '*'.  */
681	      java_unget_unicode ();
682	    }
683	}
684    }
685}
686
687/* Parse the documentation section. Keywords must be at the beginning
688   of a documentation comment line (ignoring white space and any `*'
689   character). Parsed keyword(s): @DEPRECATED.  */
690
691static int
692java_parse_doc_section (c)
693     int c;
694{
695  int valid_tag = 0, seen_star = 0;
696
697  while (JAVA_WHITE_SPACE_P (c) || (c == '*') || c == '\n')
698    {
699      switch (c)
700	{
701	case '*':
702	  seen_star = 1;
703	  break;
704	case '\n': /* ULT */
705	  valid_tag = 1;
706	default:
707	  seen_star = 0;
708	}
709      c = java_get_unicode();
710    }
711
712  if (c == UEOF)
713    java_lex_error ("Comment not terminated at end of input", 0);
714
715  if (seen_star && (c == '/'))
716    return 1;			/* Goto step1 in caller.  */
717
718  /* We're parsing `@deprecated'.  */
719  if (valid_tag && (c == '@'))
720    {
721      char tag [11];
722      int  tag_index = 0;
723
724      while (tag_index < 10 && c != UEOF && c != ' ' && c != '\n')
725	{
726	  c = java_get_unicode ();
727	  tag [tag_index++] = c;
728	}
729
730      if (c == UEOF)
731	java_lex_error ("Comment not terminated at end of input", 0);
732      tag [tag_index] = '\0';
733
734      if (!strcmp (tag, "deprecated"))
735	ctxp->deprecated = 1;
736    }
737  java_unget_unicode ();
738  return 0;
739}
740
741/* Return true if C is a valid start character for a Java identifier.
742   This is only called if C >= 128 -- smaller values are handled
743   inline.  However, this function handles all values anyway.  */
744static int
745java_start_char_p (c)
746     unicode_t c;
747{
748  unsigned int hi = c / 256;
749  const char *const page = type_table[hi];
750  unsigned long val = (unsigned long) page;
751  int flags;
752
753  if ((val & ~ (LETTER_PART | LETTER_START)) != 0)
754    flags = page[c & 255];
755  else
756    flags = val;
757
758  return flags & LETTER_START;
759}
760
761/* Return true if C is a valid part character for a Java identifier.
762   This is only called if C >= 128 -- smaller values are handled
763   inline.  However, this function handles all values anyway.  */
764static int
765java_part_char_p (c)
766     unicode_t c;
767{
768  unsigned int hi = c / 256;
769  const char *const page = type_table[hi];
770  unsigned long val = (unsigned long) page;
771  int flags;
772
773  if ((val & ~ (LETTER_PART | LETTER_START)) != 0)
774    flags = page[c & 255];
775  else
776    flags = val;
777
778  return flags & LETTER_PART;
779}
780
781static int
782java_parse_escape_sequence ()
783{
784  unicode_t char_lit;
785  int c;
786
787  switch (c = java_get_unicode ())
788    {
789    case 'b':
790      return (unicode_t)0x8;
791    case 't':
792      return (unicode_t)0x9;
793    case 'n':
794      return (unicode_t)0xa;
795    case 'f':
796      return (unicode_t)0xc;
797    case 'r':
798      return (unicode_t)0xd;
799    case '"':
800      return (unicode_t)0x22;
801    case '\'':
802      return (unicode_t)0x27;
803    case '\\':
804      return (unicode_t)0x5c;
805    case '0': case '1': case '2': case '3': case '4':
806    case '5': case '6': case '7':
807      {
808	int octal_escape[3];
809	int octal_escape_index = 0;
810	int max = 3;
811	int i, shift;
812
813	for (; octal_escape_index < max && RANGE (c, '0', '7');
814	     c = java_get_unicode ())
815	  {
816	    if (octal_escape_index == 0 && c > '3')
817	      {
818		/* According to the grammar, `\477' has a well-defined
819		   meaning -- it is `\47' followed by `7'.  */
820		--max;
821	      }
822	    octal_escape [octal_escape_index++] = c;
823	  }
824
825	java_unget_unicode ();
826
827	for (char_lit=0, i = 0, shift = 3*(octal_escape_index-1);
828	     i < octal_escape_index; i++, shift -= 3)
829	  char_lit |= (octal_escape [i] - '0') << shift;
830
831	return char_lit;
832      }
833    default:
834      java_lex_error ("Invalid character in escape sequence", 0);
835      return JAVA_CHAR_ERROR;
836    }
837}
838
839#ifndef JC1_LITE
840#define IS_ZERO(X) REAL_VALUES_EQUAL (X, dconst0)
841
842/* Subroutine of java_lex: converts floating-point literals to tree
843   nodes.  LITERAL_TOKEN is the input literal, JAVA_LVAL is where to
844   store the result.  FFLAG indicates whether the literal was tagged
845   with an 'f', indicating it is of type 'float'; NUMBER_BEGINNING
846   is the line number on which to report any error.  */
847
848static void java_perform_atof	PARAMS ((YYSTYPE *, char *, int, int));
849
850static void
851java_perform_atof (java_lval, literal_token, fflag, number_beginning)
852     YYSTYPE *java_lval;
853     char *literal_token;
854     int fflag;
855     int number_beginning;
856{
857  REAL_VALUE_TYPE value;
858  tree type = (fflag ? FLOAT_TYPE_NODE : DOUBLE_TYPE_NODE);
859
860  SET_REAL_VALUE_ATOF (value,
861		       REAL_VALUE_ATOF (literal_token, TYPE_MODE (type)));
862
863  if (REAL_VALUE_ISINF (value) || REAL_VALUE_ISNAN (value))
864    {
865      JAVA_FLOAT_RANGE_ERROR (fflag ? "float" : "double");
866      value = DCONST0;
867    }
868  else if (IS_ZERO (value))
869    {
870      /* We check to see if the value is really 0 or if we've found an
871	 underflow.  We do this in the most primitive imaginable way.  */
872      int really_zero = 1;
873      char *p = literal_token;
874      if (*p == '-')
875	++p;
876      while (*p && *p != 'e' && *p != 'E')
877	{
878	  if (*p != '0' && *p != '.')
879	    {
880	      really_zero = 0;
881	      break;
882	    }
883	  ++p;
884	}
885      if (! really_zero)
886	{
887	  int i = ctxp->c_line->current;
888	  ctxp->c_line->current = number_beginning;
889	  java_lex_error ("Floating point literal underflow", 0);
890	  ctxp->c_line->current = i;
891	}
892    }
893
894  SET_LVAL_NODE_TYPE (build_real (type, value), type);
895}
896#endif
897
898static int yylex		PARAMS ((YYSTYPE *));
899
900static int
901#ifdef JC1_LITE
902yylex (java_lval)
903#else
904java_lex (java_lval)
905#endif
906     YYSTYPE *java_lval;
907{
908  int c;
909  unicode_t first_unicode;
910  int ascii_index, all_ascii;
911  char *string;
912
913  /* Translation of the Unicode escape in the raw stream of Unicode
914     characters. Takes care of line terminator.  */
915 step1:
916  /* Skip white spaces: SP, TAB and FF or ULT.  */
917  for (c = java_get_unicode ();
918       c == '\n' || JAVA_WHITE_SPACE_P (c); c = java_get_unicode ())
919    if (c == '\n')
920      {
921	ctxp->elc.line = ctxp->c_line->lineno;
922	ctxp->elc.col  = ctxp->c_line->char_col-2;
923      }
924
925  ctxp->elc.col = (ctxp->elc.col < 0 ? 0 : ctxp->elc.col);
926
927  if (c == 0x1a)		/* CTRL-Z.  */
928    {
929      if ((c = java_get_unicode ()) == UEOF)
930	return 0;		/* Ok here.  */
931      else
932	java_unget_unicode ();	/* Caught later, at the end of the
933                                   function.  */
934    }
935  /* Handle EOF here.  */
936  if (c == UEOF)	/* Should probably do something here...  */
937    return 0;
938
939  /* Take care of eventual comments.  */
940  if (c == '/')
941    {
942      switch (c = java_get_unicode ())
943	{
944	case '/':
945	  for (;;)
946	    {
947	      c = java_get_unicode ();
948	      if (c == UEOF)
949		{
950		  /* It is ok to end a `//' comment with EOF, unless
951		     we're being pedantic.  */
952		  if (pedantic)
953		    java_lex_error ("Comment not terminated at end of input",
954				    0);
955		  return 0;
956		}
957	      if (c == '\n')	/* ULT */
958		goto step1;
959	    }
960	  break;
961
962	case '*':
963	  if ((c = java_get_unicode ()) == '*')
964	    {
965	      if ((c = java_get_unicode ()) == '/')
966		goto step1;	/* Empty documentation comment.  */
967	      else if (java_parse_doc_section (c))
968		goto step1;
969	    }
970
971	  java_parse_end_comment ((c = java_get_unicode ()));
972	  goto step1;
973	  break;
974	default:
975	  java_unget_unicode ();
976	  c = '/';
977	  break;
978	}
979    }
980
981  ctxp->elc.line = ctxp->c_line->lineno;
982  ctxp->elc.prev_col = ctxp->elc.col;
983  ctxp->elc.col = ctxp->c_line->char_col - JAVA_COLUMN_DELTA (-1);
984  if (ctxp->elc.col < 0)
985    abort ();
986
987  /* Numeric literals.  */
988  if (JAVA_ASCII_DIGIT (c) || (c == '.'))
989    {
990      /* This section of code is borrowed from gcc/c-lex.c.  */
991#define TOTAL_PARTS ((HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR) * 2 + 2)
992      int parts[TOTAL_PARTS];
993      HOST_WIDE_INT high, low;
994      /* End borrowed section.  */
995      char literal_token [256];
996      int  literal_index = 0, radix = 10, long_suffix = 0, overflow = 0, bytes;
997      int  found_hex_digits = 0, found_non_octal_digits = 0;
998      int  i;
999#ifndef JC1_LITE
1000      int  number_beginning = ctxp->c_line->current;
1001      tree value;
1002#endif
1003
1004      /* We might have a . separator instead of a FP like .[0-9]*.  */
1005      if (c == '.')
1006	{
1007	  unicode_t peep = java_sneak_unicode ();
1008
1009	  if (!JAVA_ASCII_DIGIT (peep))
1010	    {
1011	      JAVA_LEX_SEP('.');
1012	      BUILD_OPERATOR (DOT_TK);
1013	    }
1014	}
1015
1016      for (i = 0; i < TOTAL_PARTS; i++)
1017	parts [i] = 0;
1018
1019      if (c == '0')
1020	{
1021	  c = java_get_unicode ();
1022	  if (c == 'x' || c == 'X')
1023	    {
1024	      radix = 16;
1025	      c = java_get_unicode ();
1026	    }
1027	  else if (JAVA_ASCII_DIGIT (c))
1028	    radix = 8;
1029	  else if (c == '.' || c == 'e' || c =='E')
1030	    {
1031	      /* Push the '.', 'e', or 'E' back and prepare for a FP
1032		 parsing...  */
1033	      java_unget_unicode ();
1034	      c = '0';
1035	    }
1036	  else
1037	    {
1038	      /* We have a zero literal: 0, 0{l,L}, 0{f,F}, 0{d,D}.  */
1039	      JAVA_LEX_LIT ("0", 10);
1040              switch (c)
1041		{
1042		case 'L': case 'l':
1043		  SET_LVAL_NODE (long_zero_node);
1044		  return (INT_LIT_TK);
1045		case 'f': case 'F':
1046		  SET_LVAL_NODE (float_zero_node);
1047		  return (FP_LIT_TK);
1048		case 'd': case 'D':
1049		  SET_LVAL_NODE (double_zero_node);
1050		  return (FP_LIT_TK);
1051		default:
1052		  java_unget_unicode ();
1053		  SET_LVAL_NODE (integer_zero_node);
1054		  return (INT_LIT_TK);
1055		}
1056	    }
1057	}
1058      /* Parse the first part of the literal, until we find something
1059	 which is not a number.  */
1060      while ((radix == 16 && JAVA_ASCII_HEXDIGIT (c)) ||
1061	     JAVA_ASCII_DIGIT (c))
1062	{
1063	  /* We store in a string (in case it turns out to be a FP) and in
1064	     PARTS if we have to process a integer literal.  */
1065	  int numeric = hex_value (c);
1066	  int count;
1067
1068	  /* Remember when we find a valid hexadecimal digit.  */
1069	  if (radix == 16)
1070	    found_hex_digits = 1;
1071          /* Remember when we find an invalid octal digit.  */
1072          else if (radix == 8 && !JAVA_ASCII_OCTDIGIT (c))
1073            found_non_octal_digits = 1;
1074
1075	  literal_token [literal_index++] = c;
1076	  /* This section of code if borrowed from gcc/c-lex.c.  */
1077	  for (count = 0; count < TOTAL_PARTS; count++)
1078	    {
1079	      parts[count] *= radix;
1080	      if (count)
1081		{
1082		  parts[count]   += (parts[count-1] >> HOST_BITS_PER_CHAR);
1083		  parts[count-1] &= (1 << HOST_BITS_PER_CHAR) - 1;
1084		}
1085	      else
1086		parts[0] += numeric;
1087	    }
1088	  if (parts [TOTAL_PARTS-1] != 0)
1089	    overflow = 1;
1090	  /* End borrowed section.  */
1091	  c = java_get_unicode ();
1092	}
1093
1094      /* If we have something from the FP char set but not a digit, parse
1095	 a FP literal.  */
1096      if (JAVA_ASCII_FPCHAR (c) && !JAVA_ASCII_DIGIT (c))
1097	{
1098	  int stage = 0;
1099	  int seen_digit = (literal_index ? 1 : 0);
1100	  int seen_exponent = 0;
1101	  int fflag = 0;	/* 1 for {f,F}, 0 for {d,D}. FP literal are
1102				   double unless specified.  */
1103
1104	  /* It is ok if the radix is 8 because this just means we've
1105	     seen a leading `0'.  However, radix==16 is invalid.  */
1106	  if (radix == 16)
1107	    java_lex_error ("Can't express non-decimal FP literal", 0);
1108	  radix = 10;
1109
1110	  for (;;)
1111	    {
1112	      if (c == '.')
1113		{
1114		  if (stage < 1)
1115		    {
1116		      stage = 1;
1117		      literal_token [literal_index++ ] = c;
1118		      c = java_get_unicode ();
1119		    }
1120		  else
1121		    java_lex_error ("Invalid character in FP literal", 0);
1122		}
1123
1124	      if (c == 'e' || c == 'E')
1125		{
1126		  if (stage < 2)
1127		    {
1128		      /* {E,e} must have seen at least a digit.  */
1129		      if (!seen_digit)
1130			java_lex_error
1131                          ("Invalid FP literal, mantissa must have digit", 0);
1132		      seen_digit = 0;
1133		      seen_exponent = 1;
1134		      stage = 2;
1135		      literal_token [literal_index++] = c;
1136		      c = java_get_unicode ();
1137		    }
1138		  else
1139		    java_lex_error ("Invalid character in FP literal", 0);
1140		}
1141	      if ( c == 'f' || c == 'F' || c == 'd' || c == 'D')
1142		{
1143		  fflag = ((c == 'd') || (c == 'D')) ? 0 : 1;
1144		  stage = 4;	/* So we fall through.  */
1145		}
1146
1147	      if ((c=='-' || c =='+') && stage == 2)
1148		{
1149		  stage = 3;
1150		  literal_token [literal_index++] = c;
1151		  c = java_get_unicode ();
1152		}
1153
1154	      if ((stage == 0 && JAVA_ASCII_FPCHAR (c)) ||
1155		  (stage == 1 && JAVA_ASCII_FPCHAR (c) && !(c == '.')) ||
1156		  (stage == 2 && (JAVA_ASCII_DIGIT (c) || JAVA_FP_PM (c))) ||
1157		  (stage == 3 && JAVA_ASCII_DIGIT (c)))
1158		{
1159		  if (JAVA_ASCII_DIGIT (c))
1160		    seen_digit = 1;
1161                  if (stage == 2)
1162                    stage = 3;
1163		  literal_token [literal_index++ ] = c;
1164		  c = java_get_unicode ();
1165		}
1166	      else
1167		{
1168		  if (stage != 4) /* Don't push back fF/dD.  */
1169		    java_unget_unicode ();
1170
1171		  /* An exponent (if any) must have seen a digit.  */
1172		  if (seen_exponent && !seen_digit)
1173		    java_lex_error
1174                      ("Invalid FP literal, exponent must have digit", 0);
1175
1176		  literal_token [literal_index] = '\0';
1177		  JAVA_LEX_LIT (literal_token, radix);
1178
1179#ifndef JC1_LITE
1180		  java_perform_atof (java_lval, literal_token,
1181				     fflag, number_beginning);
1182#endif
1183		  return FP_LIT_TK;
1184		}
1185	    }
1186	} /* JAVA_ASCII_FPCHAR (c) */
1187
1188      /* Here we get back to converting the integral literal.  */
1189      if (radix == 16 && ! found_hex_digits)
1190	java_lex_error
1191	  ("0x must be followed by at least one hexadecimal digit", 0);
1192      else if (radix == 8 && found_non_octal_digits)
1193	java_lex_error ("Octal literal contains digit out of range", 0);
1194      else if (c == 'L' || c == 'l')
1195	long_suffix = 1;
1196      else
1197	java_unget_unicode ();
1198
1199#ifdef JAVA_LEX_DEBUG
1200      literal_token [literal_index] = '\0'; /* So JAVA_LEX_LIT is safe.  */
1201      JAVA_LEX_LIT (literal_token, radix);
1202#endif
1203      /* This section of code is borrowed from gcc/c-lex.c.  */
1204      if (!overflow)
1205	{
1206	  bytes = GET_TYPE_PRECISION (long_type_node);
1207	  for (i = bytes; i < TOTAL_PARTS; i++)
1208	    if (parts [i])
1209	      {
1210	        overflow = 1;
1211		break;
1212	      }
1213	}
1214      high = low = 0;
1215      for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR; i++)
1216	{
1217	  high |= ((HOST_WIDE_INT) parts[i + (HOST_BITS_PER_WIDE_INT
1218					      / HOST_BITS_PER_CHAR)]
1219		   << (i * HOST_BITS_PER_CHAR));
1220	  low |= (HOST_WIDE_INT) parts[i] << (i * HOST_BITS_PER_CHAR);
1221	}
1222      /* End borrowed section.  */
1223
1224#ifndef JC1_LITE
1225      /* Range checking.  */
1226      value = build_int_2 (low, high);
1227      /* Temporarily set type to unsigned.  */
1228      SET_LVAL_NODE_TYPE (value, (long_suffix
1229				  ? unsigned_long_type_node
1230				  : unsigned_int_type_node));
1231
1232      /* For base 10 numbers, only values up to the highest value
1233	 (plus one) can be written.  For instance, only ints up to
1234	 2147483648 can be written.  The special case of the largest
1235	 negative value is handled elsewhere.  For other bases, any
1236	 number can be represented.  */
1237      if (overflow || (radix == 10
1238		       && tree_int_cst_lt (long_suffix
1239					   ? decimal_long_max
1240					   : decimal_int_max,
1241					   value)))
1242	{
1243	  if (long_suffix)
1244	    JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `long' literal");
1245	  else
1246	    JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `int' literal");
1247	}
1248
1249      /* Sign extend the value.  */
1250      SET_LVAL_NODE_TYPE (value, (long_suffix ? long_type_node : int_type_node));
1251      force_fit_type (value, 0);
1252      JAVA_RADIX10_FLAG (value) = radix == 10;
1253#else
1254      SET_LVAL_NODE_TYPE (build_int_2 (low, high),
1255			  long_suffix ? long_type_node : int_type_node);
1256#endif
1257      return INT_LIT_TK;
1258    }
1259
1260  /* Character literals.  */
1261  if (c == '\'')
1262    {
1263      int char_lit;
1264      if ((c = java_get_unicode ()) == '\\')
1265	char_lit = java_parse_escape_sequence ();
1266      else
1267	{
1268	  if (c == '\n' || c == '\'')
1269	    java_lex_error ("Invalid character literal", 0);
1270	  char_lit = c;
1271	}
1272
1273      c = java_get_unicode ();
1274
1275      if ((c == '\n') || (c == UEOF))
1276	java_lex_error ("Character literal not terminated at end of line", 0);
1277      if (c != '\'')
1278	java_lex_error ("Syntax error in character literal", 0);
1279
1280      if (char_lit == JAVA_CHAR_ERROR)
1281        char_lit = 0;		/* We silently convert it to zero.  */
1282
1283      JAVA_LEX_CHAR_LIT (char_lit);
1284      SET_LVAL_NODE_TYPE (build_int_2 (char_lit, 0), char_type_node);
1285      return CHAR_LIT_TK;
1286    }
1287
1288  /* String literals.  */
1289  if (c == '"')
1290    {
1291      int no_error;
1292      char *string;
1293
1294      for (no_error = 1, c = java_get_unicode ();
1295	   c != UEOF && c != '"' && c != '\n'; c = java_get_unicode ())
1296	{
1297	  if (c == '\\')
1298	    c = java_parse_escape_sequence ();
1299	  if (c == JAVA_CHAR_ERROR)
1300	    {
1301	      no_error = 0;
1302	      c = 0;		/* We silently convert it to zero.  */
1303	    }
1304	  java_unicode_2_utf8 (c);
1305	}
1306      if (c == '\n' || c == UEOF) /* ULT.  */
1307	{
1308	  lineno--;	/* Refer to the line where the terminator was seen.  */
1309	  java_lex_error ("String not terminated at end of line", 0);
1310	  lineno++;
1311	}
1312
1313      obstack_1grow (&temporary_obstack, '\0');
1314      string = obstack_finish (&temporary_obstack);
1315#ifndef JC1_LITE
1316      if (!no_error || (c != '"'))
1317	java_lval->node = error_mark_node; /* FIXME: Requires futher
1318                                              testing.  */
1319      else
1320	java_lval->node = build_string (strlen (string), string);
1321#endif
1322      obstack_free (&temporary_obstack, string);
1323      return STRING_LIT_TK;
1324    }
1325
1326  /* Separator.  */
1327  switch (c)
1328    {
1329    case '(':
1330      JAVA_LEX_SEP (c);
1331      BUILD_OPERATOR (OP_TK);
1332    case ')':
1333      JAVA_LEX_SEP (c);
1334      return CP_TK;
1335    case '{':
1336      JAVA_LEX_SEP (c);
1337      if (ctxp->ccb_indent == 1)
1338	ctxp->first_ccb_indent1 = lineno;
1339      ctxp->ccb_indent++;
1340      BUILD_OPERATOR (OCB_TK);
1341    case '}':
1342      JAVA_LEX_SEP (c);
1343      ctxp->ccb_indent--;
1344      if (ctxp->ccb_indent == 1)
1345        ctxp->last_ccb_indent1 = lineno;
1346      BUILD_OPERATOR (CCB_TK);
1347    case '[':
1348      JAVA_LEX_SEP (c);
1349      BUILD_OPERATOR (OSB_TK);
1350    case ']':
1351      JAVA_LEX_SEP (c);
1352      return CSB_TK;
1353    case ';':
1354      JAVA_LEX_SEP (c);
1355      return SC_TK;
1356    case ',':
1357      JAVA_LEX_SEP (c);
1358      return C_TK;
1359    case '.':
1360      JAVA_LEX_SEP (c);
1361      BUILD_OPERATOR (DOT_TK);
1362      /*      return DOT_TK; */
1363    }
1364
1365  /* Operators.  */
1366  switch (c)
1367    {
1368    case '=':
1369      if ((c = java_get_unicode ()) == '=')
1370	{
1371	  BUILD_OPERATOR (EQ_TK);
1372	}
1373      else
1374	{
1375	  /* Equals is used in two different locations. In the
1376	     variable_declarator: rule, it has to be seen as '=' as opposed
1377	     to being seen as an ordinary assignment operator in
1378	     assignment_operators: rule.  */
1379	  java_unget_unicode ();
1380	  BUILD_OPERATOR (ASSIGN_TK);
1381	}
1382
1383    case '>':
1384      switch ((c = java_get_unicode ()))
1385	{
1386	case '=':
1387	  BUILD_OPERATOR (GTE_TK);
1388	case '>':
1389	  switch ((c = java_get_unicode ()))
1390	    {
1391	    case '>':
1392	      if ((c = java_get_unicode ()) == '=')
1393		{
1394		  BUILD_OPERATOR2 (ZRS_ASSIGN_TK);
1395		}
1396	      else
1397		{
1398		  java_unget_unicode ();
1399		  BUILD_OPERATOR (ZRS_TK);
1400		}
1401	    case '=':
1402	      BUILD_OPERATOR2 (SRS_ASSIGN_TK);
1403	    default:
1404	      java_unget_unicode ();
1405	      BUILD_OPERATOR (SRS_TK);
1406	    }
1407	default:
1408	  java_unget_unicode ();
1409	  BUILD_OPERATOR (GT_TK);
1410	}
1411
1412    case '<':
1413      switch ((c = java_get_unicode ()))
1414	{
1415	case '=':
1416	  BUILD_OPERATOR (LTE_TK);
1417	case '<':
1418	  if ((c = java_get_unicode ()) == '=')
1419	    {
1420	      BUILD_OPERATOR2 (LS_ASSIGN_TK);
1421	    }
1422	  else
1423	    {
1424	      java_unget_unicode ();
1425	      BUILD_OPERATOR (LS_TK);
1426	    }
1427	default:
1428	  java_unget_unicode ();
1429	  BUILD_OPERATOR (LT_TK);
1430	}
1431
1432    case '&':
1433      switch ((c = java_get_unicode ()))
1434	{
1435	case '&':
1436	  BUILD_OPERATOR (BOOL_AND_TK);
1437	case '=':
1438	  BUILD_OPERATOR2 (AND_ASSIGN_TK);
1439	default:
1440	  java_unget_unicode ();
1441	  BUILD_OPERATOR (AND_TK);
1442	}
1443
1444    case '|':
1445      switch ((c = java_get_unicode ()))
1446	{
1447	case '|':
1448	  BUILD_OPERATOR (BOOL_OR_TK);
1449	case '=':
1450	  BUILD_OPERATOR2 (OR_ASSIGN_TK);
1451	default:
1452	  java_unget_unicode ();
1453	  BUILD_OPERATOR (OR_TK);
1454	}
1455
1456    case '+':
1457      switch ((c = java_get_unicode ()))
1458	{
1459	case '+':
1460	  BUILD_OPERATOR (INCR_TK);
1461	case '=':
1462	  BUILD_OPERATOR2 (PLUS_ASSIGN_TK);
1463	default:
1464	  java_unget_unicode ();
1465	  BUILD_OPERATOR (PLUS_TK);
1466	}
1467
1468    case '-':
1469      switch ((c = java_get_unicode ()))
1470	{
1471	case '-':
1472	  BUILD_OPERATOR (DECR_TK);
1473	case '=':
1474	  BUILD_OPERATOR2 (MINUS_ASSIGN_TK);
1475	default:
1476	  java_unget_unicode ();
1477	  BUILD_OPERATOR (MINUS_TK);
1478	}
1479
1480    case '*':
1481      if ((c = java_get_unicode ()) == '=')
1482	{
1483	  BUILD_OPERATOR2 (MULT_ASSIGN_TK);
1484	}
1485      else
1486	{
1487	  java_unget_unicode ();
1488	  BUILD_OPERATOR (MULT_TK);
1489	}
1490
1491    case '/':
1492      if ((c = java_get_unicode ()) == '=')
1493	{
1494	  BUILD_OPERATOR2 (DIV_ASSIGN_TK);
1495	}
1496      else
1497	{
1498	  java_unget_unicode ();
1499	  BUILD_OPERATOR (DIV_TK);
1500	}
1501
1502    case '^':
1503      if ((c = java_get_unicode ()) == '=')
1504	{
1505	  BUILD_OPERATOR2 (XOR_ASSIGN_TK);
1506	}
1507      else
1508	{
1509	  java_unget_unicode ();
1510	  BUILD_OPERATOR (XOR_TK);
1511	}
1512
1513    case '%':
1514      if ((c = java_get_unicode ()) == '=')
1515	{
1516	  BUILD_OPERATOR2 (REM_ASSIGN_TK);
1517	}
1518      else
1519	{
1520	  java_unget_unicode ();
1521	  BUILD_OPERATOR (REM_TK);
1522	}
1523
1524    case '!':
1525      if ((c = java_get_unicode()) == '=')
1526	{
1527	  BUILD_OPERATOR (NEQ_TK);
1528	}
1529      else
1530	{
1531	  java_unget_unicode ();
1532	  BUILD_OPERATOR (NEG_TK);
1533	}
1534
1535    case '?':
1536      JAVA_LEX_OP ("?");
1537      BUILD_OPERATOR (REL_QM_TK);
1538    case ':':
1539      JAVA_LEX_OP (":");
1540      BUILD_OPERATOR (REL_CL_TK);
1541    case '~':
1542      BUILD_OPERATOR (NOT_TK);
1543    }
1544
1545  /* Keyword, boolean literal or null literal.  */
1546  for (first_unicode = c, all_ascii = 1, ascii_index = 0;
1547       c != UEOF && JAVA_PART_CHAR_P (c); c = java_get_unicode ())
1548    {
1549      java_unicode_2_utf8 (c);
1550      if (all_ascii && c >= 128)
1551        all_ascii = 0;
1552      ascii_index++;
1553    }
1554
1555  obstack_1grow (&temporary_obstack, '\0');
1556  string = obstack_finish (&temporary_obstack);
1557  if (c != UEOF)
1558    java_unget_unicode ();
1559
1560  /* If we have something all ascii, we consider a keyword, a boolean
1561     literal, a null literal or an all ASCII identifier.  Otherwise,
1562     this is an identifier (possibly not respecting formation rule).  */
1563  if (all_ascii)
1564    {
1565      const struct java_keyword *kw;
1566      if ((kw=java_keyword (string, ascii_index)))
1567	{
1568	  JAVA_LEX_KW (string);
1569	  switch (kw->token)
1570	    {
1571	    case PUBLIC_TK:       case PROTECTED_TK: case STATIC_TK:
1572	    case ABSTRACT_TK:     case FINAL_TK:     case NATIVE_TK:
1573	    case SYNCHRONIZED_TK: case TRANSIENT_TK: case VOLATILE_TK:
1574	    case PRIVATE_TK:      case STRICT_TK:
1575	      SET_MODIFIER_CTX (kw->token);
1576	      return MODIFIER_TK;
1577	    case FLOAT_TK:
1578	      SET_LVAL_NODE (float_type_node);
1579	      return FP_TK;
1580	    case DOUBLE_TK:
1581	      SET_LVAL_NODE (double_type_node);
1582	      return FP_TK;
1583	    case BOOLEAN_TK:
1584	      SET_LVAL_NODE (boolean_type_node);
1585	      return BOOLEAN_TK;
1586	    case BYTE_TK:
1587	      SET_LVAL_NODE (byte_type_node);
1588	      return INTEGRAL_TK;
1589	    case SHORT_TK:
1590	      SET_LVAL_NODE (short_type_node);
1591	      return INTEGRAL_TK;
1592	    case INT_TK:
1593	      SET_LVAL_NODE (int_type_node);
1594	      return INTEGRAL_TK;
1595	    case LONG_TK:
1596	      SET_LVAL_NODE (long_type_node);
1597	      return INTEGRAL_TK;
1598	    case CHAR_TK:
1599	      SET_LVAL_NODE (char_type_node);
1600	      return INTEGRAL_TK;
1601
1602	      /* Keyword based literals.  */
1603	    case TRUE_TK:
1604	    case FALSE_TK:
1605	      SET_LVAL_NODE ((kw->token == TRUE_TK ?
1606			      boolean_true_node : boolean_false_node));
1607	      return BOOL_LIT_TK;
1608	    case NULL_TK:
1609	      SET_LVAL_NODE (null_pointer_node);
1610	      return NULL_TK;
1611
1612	    case ASSERT_TK:
1613	      if (flag_assert)
1614		{
1615		  BUILD_OPERATOR (kw->token);
1616		  return kw->token;
1617		}
1618	      else
1619		break;
1620
1621	      /* Some keyword we want to retain information on the location
1622		 they where found.  */
1623	    case CASE_TK:
1624	    case DEFAULT_TK:
1625	    case SUPER_TK:
1626	    case THIS_TK:
1627	    case RETURN_TK:
1628	    case BREAK_TK:
1629	    case CONTINUE_TK:
1630	    case TRY_TK:
1631	    case CATCH_TK:
1632	    case THROW_TK:
1633	    case INSTANCEOF_TK:
1634	      BUILD_OPERATOR (kw->token);
1635
1636	    default:
1637	      return kw->token;
1638	    }
1639	}
1640    }
1641
1642  /* We may have an ID here.  */
1643  if (JAVA_START_CHAR_P (first_unicode))
1644    {
1645      JAVA_LEX_ID (string);
1646      java_lval->node = BUILD_ID_WFL (GET_IDENTIFIER (string));
1647      return ID_TK;
1648    }
1649
1650  /* Everything else is an invalid character in the input.  */
1651  {
1652    char lex_error_buffer [128];
1653    sprintf (lex_error_buffer, "Invalid character `%s' in input",
1654	     java_sprint_unicode (ctxp->c_line, ctxp->c_line->current));
1655    java_lex_error (lex_error_buffer, 1);
1656  }
1657  return 0;
1658}
1659
1660#ifndef JC1_LITE
1661/* This is called by the parser to see if an error should be generated
1662   due to numeric overflow.  This function only handles the particular
1663   case of the largest negative value, and is only called in the case
1664   where this value is not preceded by `-'.  */
1665static void
1666error_if_numeric_overflow (value)
1667     tree value;
1668{
1669  if (TREE_CODE (value) == INTEGER_CST
1670      && JAVA_RADIX10_FLAG (value)
1671      && tree_int_cst_sgn (value) < 0)
1672    {
1673      if (TREE_TYPE (value) == long_type_node)
1674	java_lex_error ("Numeric overflow for `long' literal", 0);
1675      else
1676	java_lex_error ("Numeric overflow for `int' literal", 0);
1677    }
1678}
1679#endif /* JC1_LITE */
1680
1681static void
1682java_unicode_2_utf8 (unicode)
1683    unicode_t unicode;
1684{
1685  if (RANGE (unicode, 0x01, 0x7f))
1686    obstack_1grow (&temporary_obstack, (char)unicode);
1687  else if (RANGE (unicode, 0x80, 0x7ff) || unicode == 0)
1688    {
1689      obstack_1grow (&temporary_obstack,
1690		     (unsigned char)(0xc0 | ((0x7c0 & unicode) >> 6)));
1691      obstack_1grow (&temporary_obstack,
1692		     (unsigned char)(0x80 | (unicode & 0x3f)));
1693    }
1694  else				/* Range 0x800-0xffff.  */
1695    {
1696      obstack_1grow (&temporary_obstack,
1697		     (unsigned char)(0xe0 | (unicode & 0xf000) >> 12));
1698      obstack_1grow (&temporary_obstack,
1699		     (unsigned char)(0x80 | (unicode & 0x0fc0) >> 6));
1700      obstack_1grow (&temporary_obstack,
1701		     (unsigned char)(0x80 | (unicode & 0x003f)));
1702    }
1703}
1704
1705#ifndef JC1_LITE
1706static tree
1707build_wfl_node (node)
1708     tree node;
1709{
1710  node = build_expr_wfl (node, ctxp->filename, ctxp->elc.line, ctxp->elc.col);
1711  /* Prevent java_complete_lhs from short-circuiting node (if constant).  */
1712  TREE_TYPE (node) = NULL_TREE;
1713  return node;
1714}
1715#endif
1716
1717static void
1718java_lex_error (msg, forward)
1719     const char *msg ATTRIBUTE_UNUSED;
1720     int forward ATTRIBUTE_UNUSED;
1721{
1722#ifndef JC1_LITE
1723  ctxp->elc.line = ctxp->c_line->lineno;
1724  ctxp->elc.col = ctxp->c_line->char_col-1+forward;
1725
1726  /* Might be caught in the middle of some error report.  */
1727  ctxp->java_error_flag = 0;
1728  java_error (NULL);
1729  java_error (msg);
1730#endif
1731}
1732
1733#ifndef JC1_LITE
1734static int
1735java_is_eol (fp, c)
1736  FILE *fp;
1737  int c;
1738{
1739  int next;
1740  switch (c)
1741    {
1742    case '\r':
1743      next = getc (fp);
1744      if (next != '\n' && next != EOF)
1745	ungetc (next, fp);
1746      return 1;
1747    case '\n':
1748      return 1;
1749    default:
1750      return 0;
1751    }
1752}
1753#endif
1754
1755char *
1756java_get_line_col (filename, line, col)
1757     const char *filename ATTRIBUTE_UNUSED;
1758     int line ATTRIBUTE_UNUSED, col ATTRIBUTE_UNUSED;
1759{
1760#ifdef JC1_LITE
1761  return 0;
1762#else
1763  /* Dumb implementation. Doesn't try to cache or optimize things.  */
1764  /* First line of the file is line 1, first column is 1.  */
1765
1766  /* COL == -1 means, at the CR/LF in LINE.  */
1767  /* COL == -2 means, at the first non space char in LINE.  */
1768
1769  FILE *fp;
1770  int c, ccol, cline = 1;
1771  int current_line_col = 0;
1772  int first_non_space = 0;
1773  char *base;
1774
1775  if (!(fp = fopen (filename, "r")))
1776    fatal_io_error ("can't open %s", filename);
1777
1778  while (cline != line)
1779    {
1780      c = getc (fp);
1781      if (c == EOF)
1782	{
1783	  static const char msg[] = "<<file too short - unexpected EOF>>";
1784	  obstack_grow (&temporary_obstack, msg, sizeof(msg)-1);
1785	  goto have_line;
1786	}
1787      if (java_is_eol (fp, c))
1788	cline++;
1789    }
1790
1791  /* Gather the chars of the current line in a buffer.  */
1792  for (;;)
1793    {
1794      c = getc (fp);
1795      if (c < 0 || java_is_eol (fp, c))
1796	break;
1797      if (!first_non_space && !JAVA_WHITE_SPACE_P (c))
1798	first_non_space = current_line_col;
1799      obstack_1grow (&temporary_obstack, c);
1800      current_line_col++;
1801    }
1802 have_line:
1803
1804  obstack_1grow (&temporary_obstack, '\n');
1805
1806  if (col == -1)
1807    {
1808      col = current_line_col;
1809      first_non_space = 0;
1810    }
1811  else if (col == -2)
1812    col = first_non_space;
1813  else
1814    first_non_space = 0;
1815
1816  /* Place the '^' a the right position.  */
1817  base = obstack_base (&temporary_obstack);
1818  for (ccol = 1; ccol <= col+3; ccol++)
1819    {
1820      /* Compute \t when reaching first_non_space.  */
1821      char c = (first_non_space ?
1822		(base [ccol-1] == '\t' ? '\t' : ' ') : ' ');
1823      obstack_1grow (&temporary_obstack, c);
1824    }
1825  obstack_grow0 (&temporary_obstack, "^", 1);
1826
1827  fclose (fp);
1828  return obstack_finish (&temporary_obstack);
1829#endif
1830}
1831
1832#ifndef JC1_LITE
1833static int
1834utf8_cmp (str, length, name)
1835     const unsigned char *str;
1836     int length;
1837     const char *name;
1838{
1839  const unsigned char *limit = str + length;
1840  int i;
1841
1842  for (i = 0; name[i]; ++i)
1843    {
1844      int ch = UTF8_GET (str, limit);
1845      if (ch != name[i])
1846	return ch - name[i];
1847    }
1848
1849  return str == limit ? 0 : 1;
1850}
1851
1852/* A sorted list of all C++ keywords.  */
1853
1854static const char *const cxx_keywords[] =
1855{
1856  "_Complex",
1857  "__alignof",
1858  "__alignof__",
1859  "__asm",
1860  "__asm__",
1861  "__attribute",
1862  "__attribute__",
1863  "__builtin_va_arg",
1864  "__complex",
1865  "__complex__",
1866  "__const",
1867  "__const__",
1868  "__extension__",
1869  "__imag",
1870  "__imag__",
1871  "__inline",
1872  "__inline__",
1873  "__label__",
1874  "__null",
1875  "__real",
1876  "__real__",
1877  "__restrict",
1878  "__restrict__",
1879  "__signed",
1880  "__signed__",
1881  "__typeof",
1882  "__typeof__",
1883  "__volatile",
1884  "__volatile__",
1885  "and",
1886  "and_eq",
1887  "asm",
1888  "auto",
1889  "bitand",
1890  "bitor",
1891  "bool",
1892  "break",
1893  "case",
1894  "catch",
1895  "char",
1896  "class",
1897  "compl",
1898  "const",
1899  "const_cast",
1900  "continue",
1901  "default",
1902  "delete",
1903  "do",
1904  "double",
1905  "dynamic_cast",
1906  "else",
1907  "enum",
1908  "explicit",
1909  "export",
1910  "extern",
1911  "false",
1912  "float",
1913  "for",
1914  "friend",
1915  "goto",
1916  "if",
1917  "inline",
1918  "int",
1919  "long",
1920  "mutable",
1921  "namespace",
1922  "new",
1923  "not",
1924  "not_eq",
1925  "operator",
1926  "or",
1927  "or_eq",
1928  "private",
1929  "protected",
1930  "public",
1931  "register",
1932  "reinterpret_cast",
1933  "return",
1934  "short",
1935  "signed",
1936  "sizeof",
1937  "static",
1938  "static_cast",
1939  "struct",
1940  "switch",
1941  "template",
1942  "this",
1943  "throw",
1944  "true",
1945  "try",
1946  "typedef",
1947  "typeid",
1948  "typename",
1949  "typeof",
1950  "union",
1951  "unsigned",
1952  "using",
1953  "virtual",
1954  "void",
1955  "volatile",
1956  "wchar_t",
1957  "while",
1958  "xor",
1959  "xor_eq"
1960};
1961
1962/* Return true if NAME is a C++ keyword.  */
1963
1964int
1965cxx_keyword_p (name, length)
1966     const char *name;
1967     int length;
1968{
1969  int last = ARRAY_SIZE (cxx_keywords);
1970  int first = 0;
1971  int mid = (last + first) / 2;
1972  int old = -1;
1973
1974  for (mid = (last + first) / 2;
1975       mid != old;
1976       old = mid, mid = (last + first) / 2)
1977    {
1978      int kwl = strlen (cxx_keywords[mid]);
1979      int min_length = kwl > length ? length : kwl;
1980      int r = utf8_cmp (name, min_length, cxx_keywords[mid]);
1981
1982      if (r == 0)
1983	{
1984	  int i;
1985	  /* We've found a match if all the remaining characters are `$'.  */
1986	  for (i = min_length; i < length && name[i] == '$'; ++i)
1987	    ;
1988	  if (i == length)
1989	    return 1;
1990	  r = 1;
1991	}
1992
1993      if (r < 0)
1994	last = mid;
1995      else
1996	first = mid;
1997    }
1998  return 0;
1999}
2000#endif /* JC1_LITE */
2001