ada-lex.l revision 1.6
1/* FLEX lexer for Ada expressions, for GDB.
2   Copyright (C) 1994-2016 Free Software Foundation, Inc.
3
4   This file is part of GDB.
5
6   This program is free software; you can redistribute it and/or modify
7   it under the terms of the GNU General Public License as published by
8   the Free Software Foundation; either version 3 of the License, or
9   (at your option) any later version.
10
11   This program is distributed in the hope that it will be useful,
12   but WITHOUT ANY WARRANTY; without even the implied warranty of
13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14   GNU General Public License for more details.
15
16   You should have received a copy of the GNU General Public License
17   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
18
19/*----------------------------------------------------------------------*/
20
21/* The converted version of this file is to be included in ada-exp.y, */
22/* the Ada parser for gdb.  The function yylex obtains characters from */
23/* the global pointer lexptr.  It returns a syntactic category for */
24/* each successive token and places a semantic value into yylval */
25/* (ada-lval), defined by the parser.   */
26
27DIG	[0-9]
28NUM10	({DIG}({DIG}|_)*)
29HEXDIG	[0-9a-f]
30NUM16	({HEXDIG}({HEXDIG}|_)*)
31OCTDIG	[0-7]
32LETTER	[a-z_]
33ID	({LETTER}({LETTER}|{DIG})*|"<"{LETTER}({LETTER}|{DIG})*">")
34WHITE	[ \t\n]
35TICK	("'"{WHITE}*)
36GRAPHIC [a-z0-9 #&'()*+,-./:;<>=_|!$%?@\[\]\\^`{}~]
37OPER    ([-+*/=<>&]|"<="|">="|"**"|"/="|"and"|"or"|"xor"|"not"|"mod"|"rem"|"abs")
38
39EXP	(e[+-]{NUM10})
40POSEXP  (e"+"?{NUM10})
41
42%{
43
44#define NUMERAL_WIDTH 256
45#define LONGEST_SIGN ((ULONGEST) 1 << (sizeof(LONGEST) * HOST_CHAR_BIT - 1))
46
47/* Temporary staging for numeric literals.  */
48static char numbuf[NUMERAL_WIDTH];
49 static void canonicalizeNumeral (char *s1, const char *);
50static struct stoken processString (const char*, int);
51static int processInt (struct parser_state *, const char *, const char *,
52		       const char *);
53static int processReal (struct parser_state *, const char *);
54static struct stoken processId (const char *, int);
55static int processAttribute (const char *);
56static int find_dot_all (const char *);
57static void rewind_to_char (int);
58
59#undef YY_DECL
60#define YY_DECL static int yylex ( void )
61
62/* Flex generates a static function "input" which is not used.
63   Defining YY_NO_INPUT comments it out.  */
64#define YY_NO_INPUT
65
66#undef YY_INPUT
67#define YY_INPUT(BUF, RESULT, MAX_SIZE) \
68    if ( *lexptr == '\000' ) \
69      (RESULT) = YY_NULL; \
70    else \
71      { \
72        *(BUF) = *lexptr; \
73        (RESULT) = 1; \
74	lexptr += 1; \
75      }
76
77static int find_dot_all (const char *);
78
79%}
80
81%option case-insensitive interactive nodefault
82
83%s BEFORE_QUAL_QUOTE
84
85%%
86
87{WHITE}		 { }
88
89"--".*		 { yyterminate(); }
90
91{NUM10}{POSEXP}  {
92		   canonicalizeNumeral (numbuf, yytext);
93		   return processInt (pstate, NULL, numbuf,
94				      strrchr (numbuf, 'e') + 1);
95		 }
96
97{NUM10}          {
98		   canonicalizeNumeral (numbuf, yytext);
99		   return processInt (pstate, NULL, numbuf, NULL);
100		 }
101
102{NUM10}"#"{HEXDIG}({HEXDIG}|_)*"#"{POSEXP} {
103		   canonicalizeNumeral (numbuf, yytext);
104		   return processInt (pstate, numbuf,
105				      strchr (numbuf, '#') + 1,
106				      strrchr(numbuf, '#') + 1);
107		 }
108
109{NUM10}"#"{HEXDIG}({HEXDIG}|_)*"#" {
110		   canonicalizeNumeral (numbuf, yytext);
111		   return processInt (pstate, numbuf, strchr (numbuf, '#') + 1,
112				      NULL);
113		 }
114
115"0x"{HEXDIG}+	{
116		  canonicalizeNumeral (numbuf, yytext+2);
117		  return processInt (pstate, "16#", numbuf, NULL);
118		}
119
120
121{NUM10}"."{NUM10}{EXP} {
122		   canonicalizeNumeral (numbuf, yytext);
123		   return processReal (pstate, numbuf);
124		}
125
126{NUM10}"."{NUM10} {
127		   canonicalizeNumeral (numbuf, yytext);
128		   return processReal (pstate, numbuf);
129		}
130
131{NUM10}"#"{NUM16}"."{NUM16}"#"{EXP} {
132                   error (_("Based real literals not implemented yet."));
133		}
134
135{NUM10}"#"{NUM16}"."{NUM16}"#" {
136                   error (_("Based real literals not implemented yet."));
137		}
138
139<INITIAL>"'"({GRAPHIC}|\")"'" {
140		   yylval.typed_val.type = type_char (pstate);
141		   yylval.typed_val.val = yytext[1];
142		   return CHARLIT;
143		}
144
145<INITIAL>"'[\""{HEXDIG}{2}"\"]'"   {
146                   int v;
147                   yylval.typed_val.type = type_char (pstate);
148		   sscanf (yytext+3, "%2x", &v);
149		   yylval.typed_val.val = v;
150		   return CHARLIT;
151		}
152
153\"({GRAPHIC}|"[\""({HEXDIG}{2}|\")"\"]")*\"   {
154	           yylval.sval = processString (yytext+1, yyleng-2);
155		   return STRING;
156		}
157
158\"              {
159                   error (_("ill-formed or non-terminated string literal"));
160		}
161
162
163if		{
164                  rewind_to_char ('i');
165		  return 0;
166		}
167
168task            {
169                  rewind_to_char ('t');
170		  return 0;
171		}
172
173thread{WHITE}+{DIG} {
174                  /* This keyword signals the end of the expression and
175                     will be processed separately.  */
176                  rewind_to_char ('t');
177		  return 0;
178		}
179
180	/* ADA KEYWORDS */
181
182abs		{ return ABS; }
183and		{ return _AND_; }
184else		{ return ELSE; }
185in		{ return IN; }
186mod		{ return MOD; }
187new		{ return NEW; }
188not		{ return NOT; }
189null		{ return NULL_PTR; }
190or		{ return OR; }
191others          { return OTHERS; }
192rem		{ return REM; }
193then		{ return THEN; }
194xor		{ return XOR; }
195
196	/* BOOLEAN "KEYWORDS" */
197
198 /* True and False are not keywords in Ada, but rather enumeration constants.
199    However, the boolean type is no longer represented as an enum, so True
200    and False are no longer defined in symbol tables.  We compromise by
201    making them keywords (when bare). */
202
203true		{ return TRUEKEYWORD; }
204false		{ return FALSEKEYWORD; }
205
206        /* ATTRIBUTES */
207
208{TICK}[a-zA-Z][a-zA-Z]+ { BEGIN INITIAL; return processAttribute (yytext+1); }
209
210	/* PUNCTUATION */
211
212"=>"		{ return ARROW; }
213".."		{ return DOTDOT; }
214"**"		{ return STARSTAR; }
215":="		{ return ASSIGN; }
216"/="		{ return NOTEQUAL; }
217"<="		{ return LEQ; }
218">="		{ return GEQ; }
219
220<BEFORE_QUAL_QUOTE>"'" { BEGIN INITIAL; return '\''; }
221
222[-&*+./:<>=|;\[\]] { return yytext[0]; }
223
224","		{ if (paren_depth == 0 && comma_terminates)
225		    {
226		      rewind_to_char (',');
227		      return 0;
228		    }
229		  else
230		    return ',';
231		}
232
233"("		{ paren_depth += 1; return '('; }
234")"		{ if (paren_depth == 0)
235		    {
236		      rewind_to_char (')');
237		      return 0;
238		    }
239		  else
240 		    {
241		      paren_depth -= 1;
242		      return ')';
243		    }
244		}
245
246"."{WHITE}*all  { return DOT_ALL; }
247
248"."{WHITE}*{ID} {
249	 	  yylval.sval = processId (yytext+1, yyleng-1);
250	          return DOT_ID;
251		}
252
253{ID}({WHITE}*"."{WHITE}*({ID}|\"{OPER}\"))*(" "*"'")?  {
254                  int all_posn = find_dot_all (yytext);
255
256                  if (all_posn == -1 && yytext[yyleng-1] == '\'')
257		    {
258		      BEGIN BEFORE_QUAL_QUOTE;
259		      yyless (yyleng-1);
260		    }
261                  else if (all_posn >= 0)
262		    yyless (all_posn);
263                  yylval.sval = processId (yytext, yyleng);
264                  return NAME;
265               }
266
267
268	/* GDB EXPRESSION CONSTRUCTS  */
269
270"'"[^']+"'"{WHITE}*:: {
271                  yyless (yyleng - 2);
272		  yylval.sval = processId (yytext, yyleng);
273		  return NAME;
274		}
275
276"::"            { return COLONCOLON; }
277
278[{}@]		{ return yytext[0]; }
279
280	/* REGISTERS AND GDB CONVENIENCE VARIABLES */
281
282"$"({LETTER}|{DIG}|"$")*  {
283		  yylval.sval.ptr = yytext;
284		  yylval.sval.length = yyleng;
285		  return SPECIAL_VARIABLE;
286		}
287
288	/* CATCH-ALL ERROR CASE */
289
290.		{ error (_("Invalid character '%s' in expression."), yytext); }
291%%
292
293#include <ctype.h>
294/* Initialize the lexer for processing new expression. */
295
296static void
297lexer_init (FILE *inp)
298{
299  BEGIN INITIAL;
300  yyrestart (inp);
301}
302
303
304/* Copy S2 to S1, removing all underscores, and downcasing all letters.  */
305
306static void
307canonicalizeNumeral (char *s1, const char *s2)
308{
309  for (; *s2 != '\000'; s2 += 1)
310    {
311      if (*s2 != '_')
312	{
313	  *s1 = tolower(*s2);
314	  s1 += 1;
315	}
316    }
317  s1[0] = '\000';
318}
319
320/* Interprets the prefix of NUM that consists of digits of the given BASE
321   as an integer of that BASE, with the string EXP as an exponent.
322   Puts value in yylval, and returns INT, if the string is valid.  Causes
323   an error if the number is improperly formated.   BASE, if NULL, defaults
324   to "10", and EXP to "1".  The EXP does not contain a leading 'e' or 'E'.
325 */
326
327static int
328processInt (struct parser_state *par_state, const char *base0,
329	    const char *num0, const char *exp0)
330{
331  ULONGEST result;
332  long exp;
333  int base;
334  const char *trailer;
335
336  if (base0 == NULL)
337    base = 10;
338  else
339    {
340      base = strtol (base0, (char **) NULL, 10);
341      if (base < 2 || base > 16)
342	error (_("Invalid base: %d."), base);
343    }
344
345  if (exp0 == NULL)
346    exp = 0;
347  else
348    exp = strtol(exp0, (char **) NULL, 10);
349
350  errno = 0;
351  result = strtoulst (num0, &trailer, base);
352  if (errno == ERANGE)
353    error (_("Integer literal out of range"));
354  if (isxdigit(*trailer))
355    error (_("Invalid digit `%c' in based literal"), *trailer);
356
357  while (exp > 0)
358    {
359      if (result > (ULONG_MAX / base))
360	error (_("Integer literal out of range"));
361      result *= base;
362      exp -= 1;
363    }
364
365  if ((result >> (gdbarch_int_bit (parse_gdbarch (par_state))-1)) == 0)
366    yylval.typed_val.type = type_int (par_state);
367  else if ((result >> (gdbarch_long_bit (parse_gdbarch (par_state))-1)) == 0)
368    yylval.typed_val.type = type_long (par_state);
369  else if (((result >> (gdbarch_long_bit (parse_gdbarch (par_state))-1)) >> 1) == 0)
370    {
371      /* We have a number representable as an unsigned integer quantity.
372         For consistency with the C treatment, we will treat it as an
373	 anonymous modular (unsigned) quantity.  Alas, the types are such
374	 that we need to store .val as a signed quantity.  Sorry
375         for the mess, but C doesn't officially guarantee that a simple
376         assignment does the trick (no, it doesn't; read the reference manual).
377       */
378      yylval.typed_val.type
379	= builtin_type (parse_gdbarch (par_state))->builtin_unsigned_long;
380      if (result & LONGEST_SIGN)
381	yylval.typed_val.val =
382	  (LONGEST) (result & ~LONGEST_SIGN)
383	  - (LONGEST_SIGN>>1) - (LONGEST_SIGN>>1);
384      else
385	yylval.typed_val.val = (LONGEST) result;
386      return INT;
387    }
388  else
389    yylval.typed_val.type = type_long_long (par_state);
390
391  yylval.typed_val.val = (LONGEST) result;
392  return INT;
393}
394
395static int
396processReal (struct parser_state *par_state, const char *num0)
397{
398  sscanf (num0, "%" DOUBLEST_SCAN_FORMAT, &yylval.typed_val_float.dval);
399
400  yylval.typed_val_float.type = type_float (par_state);
401  if (sizeof(DOUBLEST) >= gdbarch_double_bit (parse_gdbarch (par_state))
402			    / TARGET_CHAR_BIT)
403    yylval.typed_val_float.type = type_double (par_state);
404  if (sizeof(DOUBLEST) >= gdbarch_long_double_bit (parse_gdbarch (par_state))
405			    / TARGET_CHAR_BIT)
406    yylval.typed_val_float.type = type_long_double (par_state);
407
408  return FLOAT;
409}
410
411
412/* Store a canonicalized version of NAME0[0..LEN-1] in yylval.ssym.  The
413   resulting string is valid until the next call to ada_parse.  If
414   NAME0 contains the substring "___", it is assumed to be already
415   encoded and the resulting name is equal to it.  Otherwise, it differs
416   from NAME0 in that:
417    + Characters between '...' or <...> are transfered verbatim to
418      yylval.ssym.
419    + <, >, and trailing "'" characters in quoted sequences are removed
420      (a leading quote is preserved to indicate that the name is not to be
421      GNAT-encoded).
422    + Unquoted whitespace is removed.
423    + Unquoted alphabetic characters are mapped to lower case.
424   Result is returned as a struct stoken, but for convenience, the string
425   is also null-terminated.  Result string valid until the next call of
426   ada_parse.
427 */
428static struct stoken
429processId (const char *name0, int len)
430{
431  char *name = (char *) obstack_alloc (&temp_parse_space, len + 11);
432  int i0, i;
433  struct stoken result;
434
435  result.ptr = name;
436  while (len > 0 && isspace (name0[len-1]))
437    len -= 1;
438
439  if (strstr (name0, "___") != NULL)
440    {
441      strncpy (name, name0, len);
442      name[len] = '\000';
443      result.length = len;
444      return result;
445    }
446
447  i = i0 = 0;
448  while (i0 < len)
449    {
450      if (isalnum (name0[i0]))
451	{
452	  name[i] = tolower (name0[i0]);
453	  i += 1; i0 += 1;
454	}
455      else switch (name0[i0])
456	{
457	default:
458	  name[i] = name0[i0];
459	  i += 1; i0 += 1;
460	  break;
461	case ' ': case '\t':
462	  i0 += 1;
463	  break;
464	case '\'':
465	  do
466	    {
467	      name[i] = name0[i0];
468	      i += 1; i0 += 1;
469	    }
470	  while (i0 < len && name0[i0] != '\'');
471	  i0 += 1;
472	  break;
473	case '<':
474	  i0 += 1;
475	  while (i0 < len && name0[i0] != '>')
476	    {
477	      name[i] = name0[i0];
478	      i += 1; i0 += 1;
479	    }
480	  i0 += 1;
481	  break;
482	}
483    }
484  name[i] = '\000';
485
486  result.length = i;
487  return result;
488}
489
490/* Return TEXT[0..LEN-1], a string literal without surrounding quotes,
491   with special hex character notations replaced with characters.
492   Result valid until the next call to ada_parse.  */
493
494static struct stoken
495processString (const char *text, int len)
496{
497  const char *p;
498  char *q;
499  const char *lim = text + len;
500  struct stoken result;
501
502  q = (char *) obstack_alloc (&temp_parse_space, len);
503  result.ptr = q;
504  p = text;
505  while (p < lim)
506    {
507      if (p[0] == '[' && p[1] == '"' && p+2 < lim)
508         {
509           if (p[2] == '"')  /* "...["""]... */
510             {
511               *q = '"';
512	       p += 4;
513	     }
514           else
515	     {
516               int chr;
517	       sscanf (p+2, "%2x", &chr);
518	       *q = (char) chr;
519	       p += 5;
520	     }
521         }
522       else
523         *q = *p;
524       q += 1;
525       p += 1;
526     }
527  result.length = q - result.ptr;
528  return result;
529}
530
531/* Returns the position within STR of the '.' in a
532   '.{WHITE}*all' component of a dotted name, or -1 if there is none.
533   Note: we actually don't need this routine, since 'all' can never be an
534   Ada identifier.  Thus, looking up foo.all or foo.all.x as a name
535   must fail, and will eventually be interpreted as (foo).all or
536   (foo).all.x.  However, this does avoid an extraneous lookup. */
537
538static int
539find_dot_all (const char *str)
540{
541  int i;
542
543  for (i = 0; str[i] != '\000'; i++)
544    if (str[i] == '.')
545      {
546	int i0 = i;
547
548	do
549	  i += 1;
550	while (isspace (str[i]));
551
552	if (strncasecmp (str + i, "all", 3) == 0
553	    && !isalnum (str[i + 3]) && str[i + 3] != '_')
554	  return i0;
555      }
556  return -1;
557}
558
559/* Returns non-zero iff string SUBSEQ matches a subsequence of STR, ignoring
560   case.  */
561
562static int
563subseqMatch (const char *subseq, const char *str)
564{
565  if (subseq[0] == '\0')
566    return 1;
567  else if (str[0] == '\0')
568    return 0;
569  else if (tolower (subseq[0]) == tolower (str[0]))
570    return subseqMatch (subseq+1, str+1) || subseqMatch (subseq, str+1);
571  else
572    return subseqMatch (subseq, str+1);
573}
574
575
576static struct { const char *name; int code; }
577attributes[] = {
578  { "address", TICK_ADDRESS },
579  { "unchecked_access", TICK_ACCESS },
580  { "unrestricted_access", TICK_ACCESS },
581  { "access", TICK_ACCESS },
582  { "first", TICK_FIRST },
583  { "last", TICK_LAST },
584  { "length", TICK_LENGTH },
585  { "max", TICK_MAX },
586  { "min", TICK_MIN },
587  { "modulus", TICK_MODULUS },
588  { "pos", TICK_POS },
589  { "range", TICK_RANGE },
590  { "size", TICK_SIZE },
591  { "tag", TICK_TAG },
592  { "val", TICK_VAL },
593  { NULL, -1 }
594};
595
596/* Return the syntactic code corresponding to the attribute name or
597   abbreviation STR.  */
598
599static int
600processAttribute (const char *str)
601{
602  int i, k;
603
604  for (i = 0; attributes[i].code != -1; i += 1)
605    if (strcasecmp (str, attributes[i].name) == 0)
606      return attributes[i].code;
607
608  for (i = 0, k = -1; attributes[i].code != -1; i += 1)
609    if (subseqMatch (str, attributes[i].name))
610      {
611	if (k == -1)
612	  k = i;
613	else
614	  error (_("ambiguous attribute name: `%s'"), str);
615      }
616  if (k == -1)
617    error (_("unrecognized attribute: `%s'"), str);
618
619  return attributes[k].code;
620}
621
622/* Back up lexptr by yyleng and then to the rightmost occurrence of
623   character CH, case-folded (there must be one).  WARNING: since
624   lexptr points to the next input character that Flex has not yet
625   transferred to its internal buffer, the use of this function
626   depends on the assumption that Flex calls YY_INPUT only when it is
627   logically necessary to do so (thus, there is no reading ahead
628   farther than needed to identify the next token.)  */
629
630static void
631rewind_to_char (int ch)
632{
633  lexptr -= yyleng;
634  while (toupper (*lexptr) != toupper (ch))
635    lexptr -= 1;
636  yyrestart (NULL);
637}
638
639int
640yywrap(void)
641{
642  return 1;
643}
644
645/* Dummy definition to suppress warnings about unused static definitions. */
646typedef void (*dummy_function) ();
647dummy_function ada_flex_use[] =
648{
649  (dummy_function) yyunput
650};
651