ada-lex.l revision 1.8
1/* FLEX lexer for Ada expressions, for GDB.
2   Copyright (C) 1994-2019 Free Software Foundation, Inc.
3
4   This file is part of GDB.
5
6   This program is free software; you can redistribute it and/or modify
7   it under the terms of the GNU General Public License as published by
8   the Free Software Foundation; either version 3 of the License, or
9   (at your option) any later version.
10
11   This program is distributed in the hope that it will be useful,
12   but WITHOUT ANY WARRANTY; without even the implied warranty of
13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14   GNU General Public License for more details.
15
16   You should have received a copy of the GNU General Public License
17   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
18
19/*----------------------------------------------------------------------*/
20
21/* The converted version of this file is to be included in ada-exp.y, */
22/* the Ada parser for gdb.  The function yylex obtains characters from */
23/* the global pointer lexptr.  It returns a syntactic category for */
24/* each successive token and places a semantic value into yylval */
25/* (ada-lval), defined by the parser.   */
26
27DIG	[0-9]
28NUM10	({DIG}({DIG}|_)*)
29HEXDIG	[0-9a-f]
30NUM16	({HEXDIG}({HEXDIG}|_)*)
31OCTDIG	[0-7]
32LETTER	[a-z_]
33ID	({LETTER}({LETTER}|{DIG})*|"<"{LETTER}({LETTER}|{DIG})*">")
34WHITE	[ \t\n]
35TICK	("'"{WHITE}*)
36GRAPHIC [a-z0-9 #&'()*+,-./:;<>=_|!$%?@\[\]\\^`{}~]
37OPER    ([-+*/=<>&]|"<="|">="|"**"|"/="|"and"|"or"|"xor"|"not"|"mod"|"rem"|"abs")
38
39EXP	(e[+-]{NUM10})
40POSEXP  (e"+"?{NUM10})
41
42%{
43
44#include "diagnostics.h"
45
46/* Some old versions of flex generate code that uses the "register" keyword,
47   which clang warns about.  This was observed for example with flex 2.5.35,
48   as shipped with macOS 10.12.  */
49DIAGNOSTIC_PUSH
50DIAGNOSTIC_IGNORE_DEPRECATED_REGISTER
51
52#define NUMERAL_WIDTH 256
53#define LONGEST_SIGN ((ULONGEST) 1 << (sizeof(LONGEST) * HOST_CHAR_BIT - 1))
54
55/* Temporary staging for numeric literals.  */
56static char numbuf[NUMERAL_WIDTH];
57 static void canonicalizeNumeral (char *s1, const char *);
58static struct stoken processString (const char*, int);
59static int processInt (struct parser_state *, const char *, const char *,
60		       const char *);
61static int processReal (struct parser_state *, const char *);
62static struct stoken processId (const char *, int);
63static int processAttribute (const char *);
64static int find_dot_all (const char *);
65static void rewind_to_char (int);
66
67#undef YY_DECL
68#define YY_DECL static int yylex ( void )
69
70/* Flex generates a static function "input" which is not used.
71   Defining YY_NO_INPUT comments it out.  */
72#define YY_NO_INPUT
73
74#undef YY_INPUT
75#define YY_INPUT(BUF, RESULT, MAX_SIZE) \
76    if ( *lexptr == '\000' ) \
77      (RESULT) = YY_NULL; \
78    else \
79      { \
80        *(BUF) = *lexptr; \
81        (RESULT) = 1; \
82	lexptr += 1; \
83      }
84
85static int find_dot_all (const char *);
86
87%}
88
89%option case-insensitive interactive nodefault
90
91%s BEFORE_QUAL_QUOTE
92
93%%
94
95{WHITE}		 { }
96
97"--".*		 { yyterminate(); }
98
99{NUM10}{POSEXP}  {
100		   canonicalizeNumeral (numbuf, yytext);
101		   return processInt (pstate, NULL, numbuf,
102				      strrchr (numbuf, 'e') + 1);
103		 }
104
105{NUM10}          {
106		   canonicalizeNumeral (numbuf, yytext);
107		   return processInt (pstate, NULL, numbuf, NULL);
108		 }
109
110{NUM10}"#"{HEXDIG}({HEXDIG}|_)*"#"{POSEXP} {
111		   canonicalizeNumeral (numbuf, yytext);
112		   return processInt (pstate, numbuf,
113				      strchr (numbuf, '#') + 1,
114				      strrchr(numbuf, '#') + 1);
115		 }
116
117{NUM10}"#"{HEXDIG}({HEXDIG}|_)*"#" {
118		   canonicalizeNumeral (numbuf, yytext);
119		   return processInt (pstate, numbuf, strchr (numbuf, '#') + 1,
120				      NULL);
121		 }
122
123"0x"{HEXDIG}+	{
124		  canonicalizeNumeral (numbuf, yytext+2);
125		  return processInt (pstate, "16#", numbuf, NULL);
126		}
127
128
129{NUM10}"."{NUM10}{EXP} {
130		   canonicalizeNumeral (numbuf, yytext);
131		   return processReal (pstate, numbuf);
132		}
133
134{NUM10}"."{NUM10} {
135		   canonicalizeNumeral (numbuf, yytext);
136		   return processReal (pstate, numbuf);
137		}
138
139{NUM10}"#"{NUM16}"."{NUM16}"#"{EXP} {
140                   error (_("Based real literals not implemented yet."));
141		}
142
143{NUM10}"#"{NUM16}"."{NUM16}"#" {
144                   error (_("Based real literals not implemented yet."));
145		}
146
147<INITIAL>"'"({GRAPHIC}|\")"'" {
148		   yylval.typed_val.type = type_char (pstate);
149		   yylval.typed_val.val = yytext[1];
150		   return CHARLIT;
151		}
152
153<INITIAL>"'[\""{HEXDIG}{2}"\"]'"   {
154                   int v;
155                   yylval.typed_val.type = type_char (pstate);
156		   sscanf (yytext+3, "%2x", &v);
157		   yylval.typed_val.val = v;
158		   return CHARLIT;
159		}
160
161\"({GRAPHIC}|"[\""({HEXDIG}{2}|\")"\"]")*\"   {
162	           yylval.sval = processString (yytext+1, yyleng-2);
163		   return STRING;
164		}
165
166\"              {
167                   error (_("ill-formed or non-terminated string literal"));
168		}
169
170
171if		{
172                  rewind_to_char ('i');
173		  return 0;
174		}
175
176task            {
177                  rewind_to_char ('t');
178		  return 0;
179		}
180
181thread{WHITE}+{DIG} {
182                  /* This keyword signals the end of the expression and
183                     will be processed separately.  */
184                  rewind_to_char ('t');
185		  return 0;
186		}
187
188	/* ADA KEYWORDS */
189
190abs		{ return ABS; }
191and		{ return _AND_; }
192else		{ return ELSE; }
193in		{ return IN; }
194mod		{ return MOD; }
195new		{ return NEW; }
196not		{ return NOT; }
197null		{ return NULL_PTR; }
198or		{ return OR; }
199others          { return OTHERS; }
200rem		{ return REM; }
201then		{ return THEN; }
202xor		{ return XOR; }
203
204	/* BOOLEAN "KEYWORDS" */
205
206 /* True and False are not keywords in Ada, but rather enumeration constants.
207    However, the boolean type is no longer represented as an enum, so True
208    and False are no longer defined in symbol tables.  We compromise by
209    making them keywords (when bare). */
210
211true		{ return TRUEKEYWORD; }
212false		{ return FALSEKEYWORD; }
213
214        /* ATTRIBUTES */
215
216{TICK}[a-zA-Z][a-zA-Z]+ { BEGIN INITIAL; return processAttribute (yytext+1); }
217
218	/* PUNCTUATION */
219
220"=>"		{ return ARROW; }
221".."		{ return DOTDOT; }
222"**"		{ return STARSTAR; }
223":="		{ return ASSIGN; }
224"/="		{ return NOTEQUAL; }
225"<="		{ return LEQ; }
226">="		{ return GEQ; }
227
228<BEFORE_QUAL_QUOTE>"'" { BEGIN INITIAL; return '\''; }
229
230[-&*+./:<>=|;\[\]] { return yytext[0]; }
231
232","		{ if (paren_depth == 0 && comma_terminates)
233		    {
234		      rewind_to_char (',');
235		      return 0;
236		    }
237		  else
238		    return ',';
239		}
240
241"("		{ paren_depth += 1; return '('; }
242")"		{ if (paren_depth == 0)
243		    {
244		      rewind_to_char (')');
245		      return 0;
246		    }
247		  else
248 		    {
249		      paren_depth -= 1;
250		      return ')';
251		    }
252		}
253
254"."{WHITE}*all  { return DOT_ALL; }
255
256"."{WHITE}*{ID} {
257	 	  yylval.sval = processId (yytext+1, yyleng-1);
258	          return DOT_ID;
259		}
260
261{ID}({WHITE}*"."{WHITE}*({ID}|\"{OPER}\"))*(" "*"'")?  {
262                  int all_posn = find_dot_all (yytext);
263
264                  if (all_posn == -1 && yytext[yyleng-1] == '\'')
265		    {
266		      BEGIN BEFORE_QUAL_QUOTE;
267		      yyless (yyleng-1);
268		    }
269                  else if (all_posn >= 0)
270		    yyless (all_posn);
271                  yylval.sval = processId (yytext, yyleng);
272                  return NAME;
273               }
274
275
276	/* GDB EXPRESSION CONSTRUCTS  */
277
278"'"[^']+"'"{WHITE}*:: {
279                  yyless (yyleng - 2);
280		  yylval.sval = processId (yytext, yyleng);
281		  return NAME;
282		}
283
284"::"            { return COLONCOLON; }
285
286[{}@]		{ return yytext[0]; }
287
288	/* REGISTERS AND GDB CONVENIENCE VARIABLES */
289
290"$"({LETTER}|{DIG}|"$")*  {
291		  yylval.sval.ptr = yytext;
292		  yylval.sval.length = yyleng;
293		  return DOLLAR_VARIABLE;
294		}
295
296	/* CATCH-ALL ERROR CASE */
297
298.		{ error (_("Invalid character '%s' in expression."), yytext); }
299%%
300
301#include <ctype.h>
302/* Initialize the lexer for processing new expression. */
303
304static void
305lexer_init (FILE *inp)
306{
307  BEGIN INITIAL;
308  yyrestart (inp);
309}
310
311
312/* Copy S2 to S1, removing all underscores, and downcasing all letters.  */
313
314static void
315canonicalizeNumeral (char *s1, const char *s2)
316{
317  for (; *s2 != '\000'; s2 += 1)
318    {
319      if (*s2 != '_')
320	{
321	  *s1 = tolower(*s2);
322	  s1 += 1;
323	}
324    }
325  s1[0] = '\000';
326}
327
328/* Interprets the prefix of NUM that consists of digits of the given BASE
329   as an integer of that BASE, with the string EXP as an exponent.
330   Puts value in yylval, and returns INT, if the string is valid.  Causes
331   an error if the number is improperly formated.   BASE, if NULL, defaults
332   to "10", and EXP to "1".  The EXP does not contain a leading 'e' or 'E'.
333 */
334
335static int
336processInt (struct parser_state *par_state, const char *base0,
337	    const char *num0, const char *exp0)
338{
339  ULONGEST result;
340  long exp;
341  int base;
342  const char *trailer;
343
344  if (base0 == NULL)
345    base = 10;
346  else
347    {
348      base = strtol (base0, (char **) NULL, 10);
349      if (base < 2 || base > 16)
350	error (_("Invalid base: %d."), base);
351    }
352
353  if (exp0 == NULL)
354    exp = 0;
355  else
356    exp = strtol(exp0, (char **) NULL, 10);
357
358  errno = 0;
359  result = strtoulst (num0, &trailer, base);
360  if (errno == ERANGE)
361    error (_("Integer literal out of range"));
362  if (isxdigit(*trailer))
363    error (_("Invalid digit `%c' in based literal"), *trailer);
364
365  while (exp > 0)
366    {
367      if (result > (ULONG_MAX / base))
368	error (_("Integer literal out of range"));
369      result *= base;
370      exp -= 1;
371    }
372
373  if ((result >> (gdbarch_int_bit (parse_gdbarch (par_state))-1)) == 0)
374    yylval.typed_val.type = type_int (par_state);
375  else if ((result >> (gdbarch_long_bit (parse_gdbarch (par_state))-1)) == 0)
376    yylval.typed_val.type = type_long (par_state);
377  else if (((result >> (gdbarch_long_bit (parse_gdbarch (par_state))-1)) >> 1) == 0)
378    {
379      /* We have a number representable as an unsigned integer quantity.
380         For consistency with the C treatment, we will treat it as an
381	 anonymous modular (unsigned) quantity.  Alas, the types are such
382	 that we need to store .val as a signed quantity.  Sorry
383         for the mess, but C doesn't officially guarantee that a simple
384         assignment does the trick (no, it doesn't; read the reference manual).
385       */
386      yylval.typed_val.type
387	= builtin_type (parse_gdbarch (par_state))->builtin_unsigned_long;
388      if (result & LONGEST_SIGN)
389	yylval.typed_val.val =
390	  (LONGEST) (result & ~LONGEST_SIGN)
391	  - (LONGEST_SIGN>>1) - (LONGEST_SIGN>>1);
392      else
393	yylval.typed_val.val = (LONGEST) result;
394      return INT;
395    }
396  else
397    yylval.typed_val.type = type_long_long (par_state);
398
399  yylval.typed_val.val = (LONGEST) result;
400  return INT;
401}
402
403static int
404processReal (struct parser_state *par_state, const char *num0)
405{
406  yylval.typed_val_float.type = type_long_double (par_state);
407
408  bool parsed = parse_float (num0, strlen (num0),
409			     yylval.typed_val_float.type,
410			     yylval.typed_val_float.val);
411  gdb_assert (parsed);
412  return FLOAT;
413}
414
415
416/* Store a canonicalized version of NAME0[0..LEN-1] in yylval.ssym.  The
417   resulting string is valid until the next call to ada_parse.  If
418   NAME0 contains the substring "___", it is assumed to be already
419   encoded and the resulting name is equal to it.  Similarly, if the name
420   starts with '<', it is copied verbatim.  Otherwise, it differs
421   from NAME0 in that:
422    + Characters between '...' are transfered verbatim to yylval.ssym.
423    + Trailing "'" characters in quoted sequences are removed (a leading quote is
424      preserved to indicate that the name is not to be GNAT-encoded).
425    + Unquoted whitespace is removed.
426    + Unquoted alphabetic characters are mapped to lower case.
427   Result is returned as a struct stoken, but for convenience, the string
428   is also null-terminated.  Result string valid until the next call of
429   ada_parse.
430 */
431static struct stoken
432processId (const char *name0, int len)
433{
434  char *name = (char *) obstack_alloc (&temp_parse_space, len + 11);
435  int i0, i;
436  struct stoken result;
437
438  result.ptr = name;
439  while (len > 0 && isspace (name0[len-1]))
440    len -= 1;
441
442  if (name0[0] == '<' || strstr (name0, "___") != NULL)
443    {
444      strncpy (name, name0, len);
445      name[len] = '\000';
446      result.length = len;
447      return result;
448    }
449
450  i = i0 = 0;
451  while (i0 < len)
452    {
453      if (isalnum (name0[i0]))
454	{
455	  name[i] = tolower (name0[i0]);
456	  i += 1; i0 += 1;
457	}
458      else switch (name0[i0])
459	{
460	default:
461	  name[i] = name0[i0];
462	  i += 1; i0 += 1;
463	  break;
464	case ' ': case '\t':
465	  i0 += 1;
466	  break;
467	case '\'':
468	  do
469	    {
470	      name[i] = name0[i0];
471	      i += 1; i0 += 1;
472	    }
473	  while (i0 < len && name0[i0] != '\'');
474	  i0 += 1;
475	  break;
476	}
477    }
478  name[i] = '\000';
479
480  result.length = i;
481  return result;
482}
483
484/* Return TEXT[0..LEN-1], a string literal without surrounding quotes,
485   with special hex character notations replaced with characters.
486   Result valid until the next call to ada_parse.  */
487
488static struct stoken
489processString (const char *text, int len)
490{
491  const char *p;
492  char *q;
493  const char *lim = text + len;
494  struct stoken result;
495
496  q = (char *) obstack_alloc (&temp_parse_space, len);
497  result.ptr = q;
498  p = text;
499  while (p < lim)
500    {
501      if (p[0] == '[' && p[1] == '"' && p+2 < lim)
502         {
503           if (p[2] == '"')  /* "...["""]... */
504             {
505               *q = '"';
506	       p += 4;
507	     }
508           else
509	     {
510               int chr;
511	       sscanf (p+2, "%2x", &chr);
512	       *q = (char) chr;
513	       p += 5;
514	     }
515         }
516       else
517         *q = *p;
518       q += 1;
519       p += 1;
520     }
521  result.length = q - result.ptr;
522  return result;
523}
524
525/* Returns the position within STR of the '.' in a
526   '.{WHITE}*all' component of a dotted name, or -1 if there is none.
527   Note: we actually don't need this routine, since 'all' can never be an
528   Ada identifier.  Thus, looking up foo.all or foo.all.x as a name
529   must fail, and will eventually be interpreted as (foo).all or
530   (foo).all.x.  However, this does avoid an extraneous lookup. */
531
532static int
533find_dot_all (const char *str)
534{
535  int i;
536
537  for (i = 0; str[i] != '\000'; i++)
538    if (str[i] == '.')
539      {
540	int i0 = i;
541
542	do
543	  i += 1;
544	while (isspace (str[i]));
545
546	if (strncasecmp (str + i, "all", 3) == 0
547	    && !isalnum (str[i + 3]) && str[i + 3] != '_')
548	  return i0;
549      }
550  return -1;
551}
552
553/* Returns non-zero iff string SUBSEQ matches a subsequence of STR, ignoring
554   case.  */
555
556static int
557subseqMatch (const char *subseq, const char *str)
558{
559  if (subseq[0] == '\0')
560    return 1;
561  else if (str[0] == '\0')
562    return 0;
563  else if (tolower (subseq[0]) == tolower (str[0]))
564    return subseqMatch (subseq+1, str+1) || subseqMatch (subseq, str+1);
565  else
566    return subseqMatch (subseq, str+1);
567}
568
569
570static struct { const char *name; int code; }
571attributes[] = {
572  { "address", TICK_ADDRESS },
573  { "unchecked_access", TICK_ACCESS },
574  { "unrestricted_access", TICK_ACCESS },
575  { "access", TICK_ACCESS },
576  { "first", TICK_FIRST },
577  { "last", TICK_LAST },
578  { "length", TICK_LENGTH },
579  { "max", TICK_MAX },
580  { "min", TICK_MIN },
581  { "modulus", TICK_MODULUS },
582  { "pos", TICK_POS },
583  { "range", TICK_RANGE },
584  { "size", TICK_SIZE },
585  { "tag", TICK_TAG },
586  { "val", TICK_VAL },
587  { NULL, -1 }
588};
589
590/* Return the syntactic code corresponding to the attribute name or
591   abbreviation STR.  */
592
593static int
594processAttribute (const char *str)
595{
596  int i, k;
597
598  for (i = 0; attributes[i].code != -1; i += 1)
599    if (strcasecmp (str, attributes[i].name) == 0)
600      return attributes[i].code;
601
602  for (i = 0, k = -1; attributes[i].code != -1; i += 1)
603    if (subseqMatch (str, attributes[i].name))
604      {
605	if (k == -1)
606	  k = i;
607	else
608	  error (_("ambiguous attribute name: `%s'"), str);
609      }
610  if (k == -1)
611    error (_("unrecognized attribute: `%s'"), str);
612
613  return attributes[k].code;
614}
615
616/* Back up lexptr by yyleng and then to the rightmost occurrence of
617   character CH, case-folded (there must be one).  WARNING: since
618   lexptr points to the next input character that Flex has not yet
619   transferred to its internal buffer, the use of this function
620   depends on the assumption that Flex calls YY_INPUT only when it is
621   logically necessary to do so (thus, there is no reading ahead
622   farther than needed to identify the next token.)  */
623
624static void
625rewind_to_char (int ch)
626{
627  lexptr -= yyleng;
628  while (toupper (*lexptr) != toupper (ch))
629    lexptr -= 1;
630  yyrestart (NULL);
631}
632
633int
634yywrap(void)
635{
636  return 1;
637}
638
639/* Dummy definition to suppress warnings about unused static definitions. */
640typedef void (*dummy_function) ();
641dummy_function ada_flex_use[] =
642{
643  (dummy_function) yyunput
644};
645
646DIAGNOSTIC_POP
647