1/* Lexical analysis for genksyms.
2   Copyright 1996, 1997 Linux International.
3
4   New implementation contributed by Richard Henderson <rth@tamu.edu>
5   Based on original work by Bjorn Ekwall <bj0rn@blox.se>
6
7   Taken from Linux modutils 2.4.22.
8
9   This program is free software; you can redistribute it and/or modify it
10   under the terms of the GNU General Public License as published by the
11   Free Software Foundation; either version 2 of the License, or (at your
12   option) any later version.
13
14   This program is distributed in the hope that it will be useful, but
15   WITHOUT ANY WARRANTY; without even the implied warranty of
16   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17   General Public License for more details.
18
19   You should have received a copy of the GNU General Public License
20   along with this program; if not, write to the Free Software Foundation,
21   Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
22
23
24%{
25
26#include <limits.h>
27#include <stdlib.h>
28#include <string.h>
29#include <ctype.h>
30
31#include "genksyms.h"
32#include "parse.h"
33
34/* We've got a two-level lexer here.  We let flex do basic tokenization
35   and then we categorize those basic tokens in the second stage.  */
36#define YY_DECL		static int yylex1(void)
37
38%}
39
40IDENT			[A-Za-z_\$][A-Za-z0-9_\$]*
41
42O_INT			0[0-7]*
43D_INT			[1-9][0-9]*
44X_INT			0[Xx][0-9A-Fa-f]+
45I_SUF			[Uu]|[Ll]|[Uu][Ll]|[Ll][Uu]
46INT			({O_INT}|{D_INT}|{X_INT}){I_SUF}?
47
48FRAC			([0-9]*\.[0-9]+)|([0-9]+\.)
49EXP			[Ee][+-]?[0-9]+
50F_SUF			[FfLl]
51REAL			({FRAC}{EXP}?{F_SUF}?)|([0-9]+{EXP}{F_SUF}?)
52
53STRING			L?\"([^\\\"]*\\.)*[^\\\"]*\"
54CHAR			L?\'([^\\\']*\\.)*[^\\\']*\'
55
56MC_TOKEN		([~%^&*+=|<>/-]=)|(&&)|("||")|(->)|(<<)|(>>)
57
58/* Version 2 checksumming does proper tokenization; version 1 wasn't
59   quite so pedantic.  */
60%s V2_TOKENS
61
62/* We don't do multiple input files.  */
63%option noyywrap
64
65%%
66
67
68 /* Keep track of our location in the original source files.  */
69^#[ \t]+{INT}[ \t]+\"[^\"\n]+\".*\n	return FILENAME;
70^#.*\n					cur_line++;
71\n					cur_line++;
72
73 /* Ignore all other whitespace.  */
74[ \t\f\v\r]+				;
75
76
77{STRING}				return STRING;
78{CHAR}					return CHAR;
79{IDENT}					return IDENT;
80
81 /* The Pedant requires that the other C multi-character tokens be
82    recognized as tokens.  We don't actually use them since we don't
83    parse expressions, but we do want whitespace to be arranged
84    around them properly.  */
85<V2_TOKENS>{MC_TOKEN}			return OTHER;
86<V2_TOKENS>{INT}			return INT;
87<V2_TOKENS>{REAL}			return REAL;
88
89"..."					return DOTS;
90
91 /* All other tokens are single characters.  */
92.					return yytext[0];
93
94
95%%
96
97/* Bring in the keyword recognizer.  */
98
99#include "keywords.c"
100
101
102/* Macros to append to our phrase collection list.  */
103
104#define _APP(T,L)	do {						   \
105			  cur_node = next_node;				   \
106			  next_node = xmalloc(sizeof(*next_node));	   \
107			  next_node->next = cur_node;			   \
108			  cur_node->string = memcpy(xmalloc(L+1), T, L+1); \
109			  cur_node->tag = SYM_NORMAL;			   \
110			} while (0)
111
112#define APP		_APP(yytext, yyleng)
113
114
115/* The second stage lexer.  Here we incorporate knowledge of the state
116   of the parser to tailor the tokens that are returned.  */
117
118int
119yylex(void)
120{
121  static enum {
122    ST_NOTSTARTED, ST_NORMAL, ST_ATTRIBUTE, ST_ASM, ST_BRACKET, ST_BRACE,
123    ST_EXPRESSION, ST_TABLE_1, ST_TABLE_2, ST_TABLE_3, ST_TABLE_4,
124    ST_TABLE_5, ST_TABLE_6
125  } lexstate = ST_NOTSTARTED;
126
127  static int suppress_type_lookup, dont_want_brace_phrase;
128  static struct string_list *next_node;
129
130  int token, count = 0;
131  struct string_list *cur_node;
132
133  if (lexstate == ST_NOTSTARTED)
134    {
135      BEGIN(V2_TOKENS);
136      next_node = xmalloc(sizeof(*next_node));
137      next_node->next = NULL;
138      lexstate = ST_NORMAL;
139    }
140
141repeat:
142  token = yylex1();
143
144  if (token == 0)
145    return 0;
146  else if (token == FILENAME)
147    {
148      char *file, *e;
149
150      /* Save the filename and line number for later error messages.  */
151
152      if (cur_filename)
153	free(cur_filename);
154
155      file = strchr(yytext, '\"')+1;
156      e = strchr(file, '\"');
157      *e = '\0';
158      cur_filename = memcpy(xmalloc(e-file+1), file, e-file+1);
159      cur_line = atoi(yytext+2);
160
161      goto repeat;
162    }
163
164  switch (lexstate)
165    {
166    case ST_NORMAL:
167      switch (token)
168	{
169	case IDENT:
170	  APP;
171	  {
172	    const struct resword *r = is_reserved_word(yytext, yyleng);
173	    if (r)
174	      {
175		switch (token = r->token)
176		  {
177		  case ATTRIBUTE_KEYW:
178		    lexstate = ST_ATTRIBUTE;
179		    count = 0;
180		    goto repeat;
181		  case ASM_KEYW:
182		    lexstate = ST_ASM;
183		    count = 0;
184		    goto repeat;
185
186		  case STRUCT_KEYW:
187		  case UNION_KEYW:
188		    dont_want_brace_phrase = 3;
189		  case ENUM_KEYW:
190		    suppress_type_lookup = 2;
191		    goto fini;
192
193		  case EXPORT_SYMBOL_KEYW:
194		      goto fini;
195		  }
196	      }
197	    if (!suppress_type_lookup)
198	      {
199		struct symbol *sym = find_symbol(yytext, SYM_TYPEDEF);
200		if (sym && sym->type == SYM_TYPEDEF)
201		  token = TYPE;
202	      }
203	  }
204	  break;
205
206	case '[':
207	  APP;
208	  lexstate = ST_BRACKET;
209	  count = 1;
210	  goto repeat;
211
212	case '{':
213	  APP;
214	  if (dont_want_brace_phrase)
215	    break;
216	  lexstate = ST_BRACE;
217	  count = 1;
218	  goto repeat;
219
220	case '=': case ':':
221	  APP;
222	  lexstate = ST_EXPRESSION;
223	  break;
224
225	case DOTS:
226	default:
227	  APP;
228	  break;
229	}
230      break;
231
232    case ST_ATTRIBUTE:
233      APP;
234      switch (token)
235	{
236	case '(':
237	  ++count;
238	  goto repeat;
239	case ')':
240	  if (--count == 0)
241	    {
242	      lexstate = ST_NORMAL;
243	      token = ATTRIBUTE_PHRASE;
244	      break;
245	    }
246	  goto repeat;
247	default:
248	  goto repeat;
249	}
250      break;
251
252    case ST_ASM:
253      APP;
254      switch (token)
255	{
256	case '(':
257	  ++count;
258	  goto repeat;
259	case ')':
260	  if (--count == 0)
261	    {
262	      lexstate = ST_NORMAL;
263	      token = ASM_PHRASE;
264	      break;
265	    }
266	  goto repeat;
267	default:
268	  goto repeat;
269	}
270      break;
271
272    case ST_BRACKET:
273      APP;
274      switch (token)
275	{
276	case '[':
277	  ++count;
278	  goto repeat;
279	case ']':
280	  if (--count == 0)
281	    {
282	      lexstate = ST_NORMAL;
283	      token = BRACKET_PHRASE;
284	      break;
285	    }
286	  goto repeat;
287	default:
288	  goto repeat;
289	}
290      break;
291
292    case ST_BRACE:
293      APP;
294      switch (token)
295	{
296	case '{':
297	  ++count;
298	  goto repeat;
299	case '}':
300	  if (--count == 0)
301	    {
302	      lexstate = ST_NORMAL;
303	      token = BRACE_PHRASE;
304	      break;
305	    }
306	  goto repeat;
307	default:
308	  goto repeat;
309	}
310      break;
311
312    case ST_EXPRESSION:
313      switch (token)
314	{
315	case '(': case '[': case '{':
316	  ++count;
317	  APP;
318	  goto repeat;
319	case ')': case ']': case '}':
320	  --count;
321	  APP;
322	  goto repeat;
323	case ',': case ';':
324	  if (count == 0)
325	    {
326	      /* Put back the token we just read so's we can find it again
327		 after registering the expression.  */
328	      unput(token);
329
330	      lexstate = ST_NORMAL;
331	      token = EXPRESSION_PHRASE;
332	      break;
333	    }
334	  APP;
335	  goto repeat;
336	default:
337	  APP;
338	  goto repeat;
339	}
340      break;
341
342    case ST_TABLE_1:
343      goto repeat;
344
345    case ST_TABLE_2:
346      if (token == IDENT && yyleng == 1 && yytext[0] == 'X')
347	{
348	  token = EXPORT_SYMBOL_KEYW;
349	  lexstate = ST_TABLE_5;
350	  APP;
351	  break;
352	}
353      lexstate = ST_TABLE_6;
354      /* FALLTHRU */
355
356    case ST_TABLE_6:
357      switch (token)
358	{
359	case '{': case '[': case '(':
360	  ++count;
361	  break;
362	case '}': case ']': case ')':
363	  --count;
364	  break;
365	case ',':
366	  if (count == 0)
367	    lexstate = ST_TABLE_2;
368	  break;
369	};
370      goto repeat;
371
372    case ST_TABLE_3:
373      goto repeat;
374
375    case ST_TABLE_4:
376      if (token == ';')
377	lexstate = ST_NORMAL;
378      goto repeat;
379
380    case ST_TABLE_5:
381      switch (token)
382	{
383	case ',':
384	  token = ';';
385	  lexstate = ST_TABLE_2;
386	  APP;
387	  break;
388	default:
389	  APP;
390	  break;
391	}
392      break;
393
394    default:
395      exit(1);
396    }
397fini:
398
399  if (suppress_type_lookup > 0)
400    --suppress_type_lookup;
401  if (dont_want_brace_phrase > 0)
402    --dont_want_brace_phrase;
403
404  yylval = &next_node->next;
405
406  return token;
407}
408