1/* SPDX-License-Identifier: GPL-2.0-or-later */
2/*
3 * Lexical analysis for genksyms.
4 * Copyright 1996, 1997 Linux International.
5 *
6 * New implementation contributed by Richard Henderson <rth@tamu.edu>
7 * Based on original work by Bjorn Ekwall <bj0rn@blox.se>
8 *
9 * Taken from Linux modutils 2.4.22.
10 */
11
12%{
13
14#include <limits.h>
15#include <stdlib.h>
16#include <string.h>
17#include <ctype.h>
18
19#include "genksyms.h"
20#include "parse.tab.h"
21
22/* We've got a two-level lexer here.  We let flex do basic tokenization
23   and then we categorize those basic tokens in the second stage.  */
24#define YY_DECL		static int yylex1(void)
25
26%}
27
28IDENT			[A-Za-z_\$][A-Za-z0-9_\$]*
29
30O_INT			0[0-7]*
31D_INT			[1-9][0-9]*
32X_INT			0[Xx][0-9A-Fa-f]+
33I_SUF			[Uu]|[Ll]|[Uu][Ll]|[Ll][Uu]
34INT			({O_INT}|{D_INT}|{X_INT}){I_SUF}?
35
36FRAC			([0-9]*\.[0-9]+)|([0-9]+\.)
37EXP			[Ee][+-]?[0-9]+
38F_SUF			[FfLl]
39REAL			({FRAC}{EXP}?{F_SUF}?)|([0-9]+{EXP}{F_SUF}?)
40
41STRING			L?\"([^\\\"]*\\.)*[^\\\"]*\"
42CHAR			L?\'([^\\\']*\\.)*[^\\\']*\'
43
44MC_TOKEN		([~%^&*+=|<>/-]=)|(&&)|("||")|(->)|(<<)|(>>)
45
46/* We don't do multiple input files.  */
47%option noyywrap
48
49%option noinput
50
51%%
52
53
54 /* Keep track of our location in the original source files.  */
55^#[ \t]+{INT}[ \t]+\"[^\"\n]+\".*\n	return FILENAME;
56^#.*\n					cur_line++;
57\n					cur_line++;
58
59 /* Ignore all other whitespace.  */
60[ \t\f\v\r]+				;
61
62
63{STRING}				return STRING;
64{CHAR}					return CHAR;
65{IDENT}					return IDENT;
66
67 /* The Pedant requires that the other C multi-character tokens be
68    recognized as tokens.  We don't actually use them since we don't
69    parse expressions, but we do want whitespace to be arranged
70    around them properly.  */
71{MC_TOKEN}				return OTHER;
72{INT}					return INT;
73{REAL}					return REAL;
74
75"..."					return DOTS;
76
77 /* All other tokens are single characters.  */
78.					return yytext[0];
79
80
81%%
82
83/* Bring in the keyword recognizer.  */
84
85#include "keywords.c"
86
87
88/* Macros to append to our phrase collection list.  */
89
90/*
91 * We mark any token, that that equals to a known enumerator, as
92 * SYM_ENUM_CONST. The parser will change this for struct and union tags later,
93 * the only problem is struct and union members:
94 *    enum e { a, b }; struct s { int a, b; }
95 * but in this case, the only effect will be, that the ABI checksums become
96 * more volatile, which is acceptable. Also, such collisions are quite rare,
97 * so far it was only observed in include/linux/telephony.h.
98 */
99#define _APP(T,L)	do {						   \
100			  cur_node = next_node;				   \
101			  next_node = xmalloc(sizeof(*next_node));	   \
102			  next_node->next = cur_node;			   \
103			  cur_node->string = memcpy(xmalloc(L+1), T, L+1); \
104			  cur_node->tag =				   \
105			    find_symbol(cur_node->string, SYM_ENUM_CONST, 1)?\
106			    SYM_ENUM_CONST : SYM_NORMAL ;		   \
107			  cur_node->in_source_file = in_source_file;       \
108			} while (0)
109
110#define APP		_APP(yytext, yyleng)
111
112
113/* The second stage lexer.  Here we incorporate knowledge of the state
114   of the parser to tailor the tokens that are returned.  */
115
116int
117yylex(void)
118{
119  static enum {
120    ST_NOTSTARTED, ST_NORMAL, ST_ATTRIBUTE, ST_ASM, ST_TYPEOF, ST_TYPEOF_1,
121    ST_BRACKET, ST_BRACE, ST_EXPRESSION, ST_STATIC_ASSERT,
122  } lexstate = ST_NOTSTARTED;
123
124  static int suppress_type_lookup, dont_want_brace_phrase;
125  static struct string_list *next_node;
126  static char *source_file;
127
128  int token, count = 0;
129  struct string_list *cur_node;
130
131  if (lexstate == ST_NOTSTARTED)
132    {
133      next_node = xmalloc(sizeof(*next_node));
134      next_node->next = NULL;
135      lexstate = ST_NORMAL;
136    }
137
138repeat:
139  token = yylex1();
140
141  if (token == 0)
142    return 0;
143  else if (token == FILENAME)
144    {
145      char *file, *e;
146
147      /* Save the filename and line number for later error messages.  */
148
149      if (cur_filename)
150	free(cur_filename);
151
152      file = strchr(yytext, '\"')+1;
153      e = strchr(file, '\"');
154      *e = '\0';
155      cur_filename = memcpy(xmalloc(e-file+1), file, e-file+1);
156      cur_line = atoi(yytext+2);
157
158      if (!source_file) {
159        source_file = xstrdup(cur_filename);
160        in_source_file = 1;
161      } else {
162        in_source_file = (strcmp(cur_filename, source_file) == 0);
163      }
164
165      goto repeat;
166    }
167
168  switch (lexstate)
169    {
170    case ST_NORMAL:
171      switch (token)
172	{
173	case IDENT:
174	  APP;
175	  {
176	    int r = is_reserved_word(yytext, yyleng);
177	    if (r >= 0)
178	      {
179		switch (token = r)
180		  {
181		  case ATTRIBUTE_KEYW:
182		    lexstate = ST_ATTRIBUTE;
183		    count = 0;
184		    goto repeat;
185		  case ASM_KEYW:
186		    lexstate = ST_ASM;
187		    count = 0;
188		    goto repeat;
189		  case TYPEOF_KEYW:
190		    lexstate = ST_TYPEOF;
191		    count = 0;
192		    goto repeat;
193
194		  case STRUCT_KEYW:
195		  case UNION_KEYW:
196		  case ENUM_KEYW:
197		    dont_want_brace_phrase = 3;
198		    suppress_type_lookup = 2;
199		    goto fini;
200
201		  case EXPORT_SYMBOL_KEYW:
202		      goto fini;
203
204		  case STATIC_ASSERT_KEYW:
205		    lexstate = ST_STATIC_ASSERT;
206		    count = 0;
207		    goto repeat;
208		  }
209	      }
210	    if (!suppress_type_lookup)
211	      {
212		if (find_symbol(yytext, SYM_TYPEDEF, 1))
213		  token = TYPE;
214	      }
215	  }
216	  break;
217
218	case '[':
219	  APP;
220	  lexstate = ST_BRACKET;
221	  count = 1;
222	  goto repeat;
223
224	case '{':
225	  APP;
226	  if (dont_want_brace_phrase)
227	    break;
228	  lexstate = ST_BRACE;
229	  count = 1;
230	  goto repeat;
231
232	case '=': case ':':
233	  APP;
234	  lexstate = ST_EXPRESSION;
235	  break;
236
237	default:
238	  APP;
239	  break;
240	}
241      break;
242
243    case ST_ATTRIBUTE:
244      APP;
245      switch (token)
246	{
247	case '(':
248	  ++count;
249	  goto repeat;
250	case ')':
251	  if (--count == 0)
252	    {
253	      lexstate = ST_NORMAL;
254	      token = ATTRIBUTE_PHRASE;
255	      break;
256	    }
257	  goto repeat;
258	default:
259	  goto repeat;
260	}
261      break;
262
263    case ST_ASM:
264      APP;
265      switch (token)
266	{
267	case '(':
268	  ++count;
269	  goto repeat;
270	case ')':
271	  if (--count == 0)
272	    {
273	      lexstate = ST_NORMAL;
274	      token = ASM_PHRASE;
275	      break;
276	    }
277	  goto repeat;
278	default:
279	  goto repeat;
280	}
281      break;
282
283    case ST_TYPEOF_1:
284      if (token == IDENT)
285	{
286	  if (is_reserved_word(yytext, yyleng) >= 0
287	      || find_symbol(yytext, SYM_TYPEDEF, 1))
288	    {
289	      yyless(0);
290	      unput('(');
291	      lexstate = ST_NORMAL;
292	      token = TYPEOF_KEYW;
293	      break;
294	    }
295	  _APP("(", 1);
296	}
297	lexstate = ST_TYPEOF;
298	/* FALLTHRU */
299
300    case ST_TYPEOF:
301      switch (token)
302	{
303	case '(':
304	  if ( ++count == 1 )
305	    lexstate = ST_TYPEOF_1;
306	  else
307	    APP;
308	  goto repeat;
309	case ')':
310	  APP;
311	  if (--count == 0)
312	    {
313	      lexstate = ST_NORMAL;
314	      token = TYPEOF_PHRASE;
315	      break;
316	    }
317	  goto repeat;
318	default:
319	  APP;
320	  goto repeat;
321	}
322      break;
323
324    case ST_BRACKET:
325      APP;
326      switch (token)
327	{
328	case '[':
329	  ++count;
330	  goto repeat;
331	case ']':
332	  if (--count == 0)
333	    {
334	      lexstate = ST_NORMAL;
335	      token = BRACKET_PHRASE;
336	      break;
337	    }
338	  goto repeat;
339	default:
340	  goto repeat;
341	}
342      break;
343
344    case ST_BRACE:
345      APP;
346      switch (token)
347	{
348	case '{':
349	  ++count;
350	  goto repeat;
351	case '}':
352	  if (--count == 0)
353	    {
354	      lexstate = ST_NORMAL;
355	      token = BRACE_PHRASE;
356	      break;
357	    }
358	  goto repeat;
359	default:
360	  goto repeat;
361	}
362      break;
363
364    case ST_EXPRESSION:
365      switch (token)
366	{
367	case '(': case '[': case '{':
368	  ++count;
369	  APP;
370	  goto repeat;
371	case '}':
372	  /* is this the last line of an enum declaration? */
373	  if (count == 0)
374	    {
375	      /* Put back the token we just read so's we can find it again
376		 after registering the expression.  */
377	      unput(token);
378
379	      lexstate = ST_NORMAL;
380	      token = EXPRESSION_PHRASE;
381	      break;
382	    }
383	  /* FALLTHRU */
384	case ')': case ']':
385	  --count;
386	  APP;
387	  goto repeat;
388	case ',': case ';':
389	  if (count == 0)
390	    {
391	      /* Put back the token we just read so's we can find it again
392		 after registering the expression.  */
393	      unput(token);
394
395	      lexstate = ST_NORMAL;
396	      token = EXPRESSION_PHRASE;
397	      break;
398	    }
399	  APP;
400	  goto repeat;
401	default:
402	  APP;
403	  goto repeat;
404	}
405      break;
406
407    case ST_STATIC_ASSERT:
408      APP;
409      switch (token)
410	{
411	case '(':
412	  ++count;
413	  goto repeat;
414	case ')':
415	  if (--count == 0)
416	    {
417	      lexstate = ST_NORMAL;
418	      token = STATIC_ASSERT_PHRASE;
419	      break;
420	    }
421	  goto repeat;
422	default:
423	  goto repeat;
424	}
425      break;
426
427    default:
428      exit(1);
429    }
430fini:
431
432  if (suppress_type_lookup > 0)
433    --suppress_type_lookup;
434  if (dont_want_brace_phrase > 0)
435    --dont_want_brace_phrase;
436
437  yylval = &next_node->next;
438
439  return token;
440}
441