1/* FLEX lexer for Ada expressions, for GDB.
2   Copyright (C) 1994, 1997, 1998, 2000, 2001, 2002, 2003, 2007
3   Free Software Foundation, Inc.
4
5This file is part of GDB.
6
7This program is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 2 of the License, or
10(at your option) any later version.
11
12This program is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with this program; if not, write to the Free Software
19Foundation, Inc., 51 Franklin Street, Fifth Floor,
20Boston, MA 02110-1301, USA.  */
21
22/*----------------------------------------------------------------------*/
23
24/* The converted version of this file is to be included in ada-exp.y, */
25/* the Ada parser for gdb.  The function yylex obtains characters from */
26/* the global pointer lexptr.  It returns a syntactic category for */
27/* each successive token and places a semantic value into yylval */
28/* (ada-lval), defined by the parser.   */
29
30DIG	[0-9]
31NUM10	({DIG}({DIG}|_)*)
32HEXDIG	[0-9a-f]
33NUM16	({HEXDIG}({HEXDIG}|_)*)
34OCTDIG	[0-7]
35LETTER	[a-z_]
36ID	({LETTER}({LETTER}|{DIG})*|"<"{LETTER}({LETTER}|{DIG})*">")
37WHITE	[ \t\n]
38TICK	("'"{WHITE}*)
39GRAPHIC [a-z0-9 #&'()*+,-./:;<>=_|!$%?@\[\]\\^`{}~]
40OPER    ([-+*/=<>&]|"<="|">="|"**"|"/="|"and"|"or"|"xor"|"not"|"mod"|"rem"|"abs")
41
42EXP	(e[+-]{NUM10})
43POSEXP  (e"+"?{NUM10})
44
45%{
46
47#define NUMERAL_WIDTH 256
48#define LONGEST_SIGN ((ULONGEST) 1 << (sizeof(LONGEST) * HOST_CHAR_BIT - 1))
49
50/* Temporary staging for numeric literals.  */
51static char numbuf[NUMERAL_WIDTH];
52 static void canonicalizeNumeral (char *s1, const char *);
53static struct stoken processString (const char*, int);
54static int processInt (const char *, const char *, const char *);
55static int processReal (const char *);
56static struct stoken processId (const char *, int);
57static int processAttribute (const char *);
58static int find_dot_all (const char *);
59
60#undef YY_DECL
61#define YY_DECL static int yylex ( void )
62
63#undef YY_INPUT
64#define YY_INPUT(BUF, RESULT, MAX_SIZE) \
65    if ( *lexptr == '\000' ) \
66      (RESULT) = YY_NULL; \
67    else \
68      { \
69        *(BUF) = *lexptr; \
70        (RESULT) = 1; \
71	lexptr += 1; \
72      }
73
74static int find_dot_all (const char *);
75
76%}
77
78%option case-insensitive interactive nodefault
79
80%s BEFORE_QUAL_QUOTE
81
82%%
83
84{WHITE}		 { }
85
86"--".*		 { yyterminate(); }
87
88{NUM10}{POSEXP}  {
89		   canonicalizeNumeral (numbuf, yytext);
90		   return processInt (NULL, numbuf, strrchr(numbuf, 'e')+1);
91		 }
92
93{NUM10}          {
94		   canonicalizeNumeral (numbuf, yytext);
95		   return processInt (NULL, numbuf, NULL);
96		 }
97
98{NUM10}"#"{HEXDIG}({HEXDIG}|_)*"#"{POSEXP} {
99		   canonicalizeNumeral (numbuf, yytext);
100    		   return processInt (numbuf,
101				      strchr (numbuf, '#') + 1,
102				      strrchr(numbuf, '#') + 1);
103		 }
104
105{NUM10}"#"{HEXDIG}({HEXDIG}|_)*"#" {
106		   canonicalizeNumeral (numbuf, yytext);
107    		   return processInt (numbuf, strchr (numbuf, '#') + 1, NULL);
108		 }
109
110"0x"{HEXDIG}+	{
111		  canonicalizeNumeral (numbuf, yytext+2);
112		  return processInt ("16#", numbuf, NULL);
113		}
114
115
116{NUM10}"."{NUM10}{EXP} {
117		   canonicalizeNumeral (numbuf, yytext);
118		   return processReal (numbuf);
119		}
120
121{NUM10}"."{NUM10} {
122		   canonicalizeNumeral (numbuf, yytext);
123		   return processReal (numbuf);
124		}
125
126{NUM10}"#"{NUM16}"."{NUM16}"#"{EXP} {
127                   error (_("Based real literals not implemented yet."));
128		}
129
130{NUM10}"#"{NUM16}"."{NUM16}"#" {
131                   error (_("Based real literals not implemented yet."));
132		}
133
134<INITIAL>"'"({GRAPHIC}|\")"'" {
135		   yylval.typed_val.type = type_char ();
136		   yylval.typed_val.val = yytext[1];
137		   return CHARLIT;
138		}
139
140<INITIAL>"'[\""{HEXDIG}{2}"\"]'"   {
141                   int v;
142                   yylval.typed_val.type = type_char ();
143		   sscanf (yytext+3, "%2x", &v);
144		   yylval.typed_val.val = v;
145		   return CHARLIT;
146		}
147
148\"({GRAPHIC}|"[\""({HEXDIG}{2}|\")"\"]")*\"   {
149	           yylval.sval = processString (yytext+1, yyleng-2);
150		   return STRING;
151		}
152
153\"              {
154                   error (_("ill-formed or non-terminated string literal"));
155		}
156
157
158if		{
159		  while (*lexptr != 'i' && *lexptr != 'I')
160		    lexptr -= 1;
161		  yyrestart(NULL);
162		  return 0;
163		}
164
165	/* ADA KEYWORDS */
166
167abs		{ return ABS; }
168and		{ return _AND_; }
169else		{ return ELSE; }
170in		{ return IN; }
171mod		{ return MOD; }
172new		{ return NEW; }
173not		{ return NOT; }
174null		{ return NULL_PTR; }
175or		{ return OR; }
176others          { return OTHERS; }
177rem		{ return REM; }
178then		{ return THEN; }
179xor		{ return XOR; }
180
181        /* ATTRIBUTES */
182
183{TICK}[a-zA-Z][a-zA-Z]+ { return processAttribute (yytext+1); }
184
185	/* PUNCTUATION */
186
187"=>"		{ return ARROW; }
188".."		{ return DOTDOT; }
189"**"		{ return STARSTAR; }
190":="		{ return ASSIGN; }
191"/="		{ return NOTEQUAL; }
192"<="		{ return LEQ; }
193">="		{ return GEQ; }
194
195<BEFORE_QUAL_QUOTE>"'" { BEGIN INITIAL; return '\''; }
196
197[-&*+./:<>=|;\[\]] { return yytext[0]; }
198
199","		{ if (paren_depth == 0 && comma_terminates)
200		    {
201		      lexptr -= 1;
202		      yyrestart(NULL);
203		      return 0;
204		    }
205		  else
206		    return ',';
207		}
208
209"("		{ paren_depth += 1; return '('; }
210")"		{ if (paren_depth == 0)
211		    {
212		      lexptr -= 1;
213		      yyrestart(NULL);
214		      return 0;
215		    }
216		  else
217 		    {
218		      paren_depth -= 1;
219		      return ')';
220		    }
221		}
222
223"."{WHITE}*all  { return DOT_ALL; }
224
225"."{WHITE}*{ID} {
226	 	  yylval.sval = processId (yytext+1, yyleng-1);
227	          return DOT_ID;
228		}
229
230{ID}({WHITE}*"."{WHITE}*({ID}|\"{OPER}\"))*(" "*"'")?  {
231                  int all_posn = find_dot_all (yytext);
232
233                  if (all_posn == -1 && yytext[yyleng-1] == '\'')
234		    {
235		      BEGIN BEFORE_QUAL_QUOTE;
236		      yyless (yyleng-1);
237		    }
238                  else if (all_posn >= 0)
239		    yyless (all_posn);
240                  yylval.sval = processId (yytext, yyleng);
241                  return NAME;
242               }
243
244
245	/* GDB EXPRESSION CONSTRUCTS  */
246
247"'"[^']+"'"{WHITE}*:: {
248                  yyless (yyleng - 2);
249		  yylval.sval = processId (yytext, yyleng);
250		  return NAME;
251		}
252
253"::"            { return COLONCOLON; }
254
255[{}@]		{ return yytext[0]; }
256
257	/* REGISTERS AND GDB CONVENIENCE VARIABLES */
258
259"$"({LETTER}|{DIG}|"$")*  {
260		  yylval.sval.ptr = yytext;
261		  yylval.sval.length = yyleng;
262		  return SPECIAL_VARIABLE;
263		}
264
265	/* CATCH-ALL ERROR CASE */
266
267.		{ error (_("Invalid character '%s' in expression."), yytext); }
268%%
269
270#include <ctype.h>
271#include "gdb_string.h"
272
273/* Initialize the lexer for processing new expression. */
274
275void
276lexer_init (FILE *inp)
277{
278  BEGIN INITIAL;
279  yyrestart (inp);
280}
281
282
283/* Copy S2 to S1, removing all underscores, and downcasing all letters.  */
284
285static void
286canonicalizeNumeral (char *s1, const char *s2)
287{
288  for (; *s2 != '\000'; s2 += 1)
289    {
290      if (*s2 != '_')
291	{
292	  *s1 = tolower(*s2);
293	  s1 += 1;
294	}
295    }
296  s1[0] = '\000';
297}
298
299/* Interprets the prefix of NUM that consists of digits of the given BASE
300   as an integer of that BASE, with the string EXP as an exponent.
301   Puts value in yylval, and returns INT, if the string is valid.  Causes
302   an error if the number is improperly formated.   BASE, if NULL, defaults
303   to "10", and EXP to "1".  The EXP does not contain a leading 'e' or 'E'.
304 */
305
306static int
307processInt (const char *base0, const char *num0, const char *exp0)
308{
309  ULONGEST result;
310  long exp;
311  int base;
312
313  char *trailer;
314
315  if (base0 == NULL)
316    base = 10;
317  else
318    {
319      base = strtol (base0, (char **) NULL, 10);
320      if (base < 2 || base > 16)
321	error (_("Invalid base: %d."), base);
322    }
323
324  if (exp0 == NULL)
325    exp = 0;
326  else
327    exp = strtol(exp0, (char **) NULL, 10);
328
329  errno = 0;
330  result = strtoulst (num0, (const char **) &trailer, base);
331  if (errno == ERANGE)
332    error (_("Integer literal out of range"));
333  if (isxdigit(*trailer))
334    error (_("Invalid digit `%c' in based literal"), *trailer);
335
336  while (exp > 0)
337    {
338      if (result > (ULONG_MAX / base))
339	error (_("Integer literal out of range"));
340      result *= base;
341      exp -= 1;
342    }
343
344  if ((result >> (gdbarch_int_bit (current_gdbarch)-1)) == 0)
345    yylval.typed_val.type = type_int ();
346  else if ((result >> (gdbarch_long_bit (current_gdbarch)-1)) == 0)
347    yylval.typed_val.type = type_long ();
348  else if (((result >> (gdbarch_long_bit (current_gdbarch)-1)) >> 1) == 0)
349    {
350      /* We have a number representable as an unsigned integer quantity.
351         For consistency with the C treatment, we will treat it as an
352	 anonymous modular (unsigned) quantity.  Alas, the types are such
353	 that we need to store .val as a signed quantity.  Sorry
354         for the mess, but C doesn't officially guarantee that a simple
355         assignment does the trick (no, it doesn't; read the reference manual).
356       */
357      yylval.typed_val.type = builtin_type_unsigned_long;
358      if (result & LONGEST_SIGN)
359	yylval.typed_val.val =
360	  (LONGEST) (result & ~LONGEST_SIGN)
361	  - (LONGEST_SIGN>>1) - (LONGEST_SIGN>>1);
362      else
363	yylval.typed_val.val = (LONGEST) result;
364      return INT;
365    }
366  else
367    yylval.typed_val.type = type_long_long ();
368
369  yylval.typed_val.val = (LONGEST) result;
370  return INT;
371}
372
373static int
374processReal (const char *num0)
375{
376  sscanf (num0, DOUBLEST_SCAN_FORMAT, &yylval.typed_val_float.dval);
377
378  yylval.typed_val_float.type = type_float ();
379  if (sizeof(DOUBLEST) >= gdbarch_double_bit (current_gdbarch)
380			    / TARGET_CHAR_BIT)
381    yylval.typed_val_float.type = type_double ();
382  if (sizeof(DOUBLEST) >= gdbarch_long_double_bit (current_gdbarch)
383			    / TARGET_CHAR_BIT)
384    yylval.typed_val_float.type = type_long_double ();
385
386  return FLOAT;
387}
388
389
390/* Store a canonicalized version of NAME0[0..LEN-1] in yylval.ssym.  The
391   resulting string is valid until the next call to ada_parse.  It differs
392   from NAME0 in that:
393    + Characters between '...' or <...> are transfered verbatim to
394      yylval.ssym.
395    + <, >, and trailing "'" characters in quoted sequences are removed
396      (a leading quote is preserved to indicate that the name is not to be
397      GNAT-encoded).
398    + Unquoted whitespace is removed.
399    + Unquoted alphabetic characters are mapped to lower case.
400   Result is returned as a struct stoken, but for convenience, the string
401   is also null-terminated.  Result string valid until the next call of
402   ada_parse.
403 */
404static struct stoken
405processId (const char *name0, int len)
406{
407  char *name = obstack_alloc (&temp_parse_space, len + 11);
408  int i0, i;
409  struct stoken result;
410
411  while (len > 0 && isspace (name0[len-1]))
412    len -= 1;
413  i = i0 = 0;
414  while (i0 < len)
415    {
416      if (isalnum (name0[i0]))
417	{
418	  name[i] = tolower (name0[i0]);
419	  i += 1; i0 += 1;
420	}
421      else switch (name0[i0])
422	{
423	default:
424	  name[i] = name0[i0];
425	  i += 1; i0 += 1;
426	  break;
427	case ' ': case '\t':
428	  i0 += 1;
429	  break;
430	case '\'':
431	  do
432	    {
433	      name[i] = name0[i0];
434	      i += 1; i0 += 1;
435	    }
436	  while (i0 < len && name0[i0] != '\'');
437	  i0 += 1;
438	  break;
439	case '<':
440	  i0 += 1;
441	  while (i0 < len && name0[i0] != '>')
442	    {
443	      name[i] = name0[i0];
444	      i += 1; i0 += 1;
445	    }
446	  i0 += 1;
447	  break;
448	}
449    }
450  name[i] = '\000';
451
452  result.ptr = name;
453  result.length = i;
454  return result;
455}
456
457/* Return TEXT[0..LEN-1], a string literal without surrounding quotes,
458   with special hex character notations replaced with characters.
459   Result valid until the next call to ada_parse.  */
460
461static struct stoken
462processString (const char *text, int len)
463{
464  const char *p;
465  char *q;
466  const char *lim = text + len;
467  struct stoken result;
468
469  q = result.ptr = obstack_alloc (&temp_parse_space, len);
470  p = text;
471  while (p < lim)
472    {
473      if (p[0] == '[' && p[1] == '"' && p+2 < lim)
474         {
475           if (p[2] == '"')  /* "...["""]... */
476             {
477               *q = '"';
478	       p += 4;
479	     }
480           else
481	     {
482               int chr;
483	       sscanf (p+2, "%2x", &chr);
484	       *q = (char) chr;
485	       p += 5;
486	     }
487         }
488       else
489         *q = *p;
490       q += 1;
491       p += 1;
492     }
493  result.length = q - result.ptr;
494  return result;
495}
496
497/* Returns the position within STR of the '.' in a
498   '.{WHITE}*all' component of a dotted name, or -1 if there is none.
499   Note: we actually don't need this routine, since 'all' can never be an
500   Ada identifier.  Thus, looking up foo.all or foo.all.x as a name
501   must fail, and will eventually be interpreted as (foo).all or
502   (foo).all.x.  However, this does avoid an extraneous lookup. */
503
504static int
505find_dot_all (const char *str)
506{
507  int i;
508  for (i = 0; str[i] != '\000'; i += 1)
509    {
510      if (str[i] == '.')
511	{
512	  int i0 = i;
513	  do
514	    i += 1;
515	  while (isspace (str[i]));
516	  if (strncmp (str+i, "all", 3) == 0
517	      && ! isalnum (str[i+3]) && str[i+3] != '_')
518	    return i0;
519	}
520    }
521  return -1;
522}
523
524/* Returns non-zero iff string SUBSEQ matches a subsequence of STR, ignoring
525   case.  */
526
527static int
528subseqMatch (const char *subseq, const char *str)
529{
530  if (subseq[0] == '\0')
531    return 1;
532  else if (str[0] == '\0')
533    return 0;
534  else if (tolower (subseq[0]) == tolower (str[0]))
535    return subseqMatch (subseq+1, str+1) || subseqMatch (subseq, str+1);
536  else
537    return subseqMatch (subseq, str+1);
538}
539
540
541static struct { const char *name; int code; }
542attributes[] = {
543  { "address", TICK_ADDRESS },
544  { "unchecked_access", TICK_ACCESS },
545  { "unrestricted_access", TICK_ACCESS },
546  { "access", TICK_ACCESS },
547  { "first", TICK_FIRST },
548  { "last", TICK_LAST },
549  { "length", TICK_LENGTH },
550  { "max", TICK_MAX },
551  { "min", TICK_MIN },
552  { "modulus", TICK_MODULUS },
553  { "pos", TICK_POS },
554  { "range", TICK_RANGE },
555  { "size", TICK_SIZE },
556  { "tag", TICK_TAG },
557  { "val", TICK_VAL },
558  { NULL, -1 }
559};
560
561/* Return the syntactic code corresponding to the attribute name or
562   abbreviation STR.  */
563
564static int
565processAttribute (const char *str)
566{
567  int i, k;
568
569  for (i = 0; attributes[i].code != -1; i += 1)
570    if (strcasecmp (str, attributes[i].name) == 0)
571      return attributes[i].code;
572
573  for (i = 0, k = -1; attributes[i].code != -1; i += 1)
574    if (subseqMatch (str, attributes[i].name))
575      {
576	if (k == -1)
577	  k = i;
578	else
579	  error (_("ambiguous attribute name: `%s'"), str);
580      }
581  if (k == -1)
582    error (_("unrecognized attribute: `%s'"), str);
583
584  return attributes[k].code;
585}
586
587int
588yywrap(void)
589{
590  return 1;
591}
592
593/* Dummy definition to suppress warnings about unused static definitions. */
594typedef void (*dummy_function) ();
595dummy_function ada_flex_use[] =
596{
597  (dummy_function) yyunput
598};
599