1%{
2/*
3 *  FILE: lexer.l
4 *  AUTH: Michael John Radwin <mjr@acm.org>
5 *
6 *  DESC: stubgen lexer.  Portions borrowed from Newcastle
7 *  University's Arjuna project (http://arjuna.ncl.ac.uk/), and
8 *  Jeff Lee's ANSI Grammar
9 *  (ftp://ftp.uu.net/usenet/net.sources/ansi.c.grammar.Z)
10 *
11 *  DATE: Thu Aug 15 13:10:06 EDT 1996
12 *   $Id: lexer.l 10 2002-07-09 12:24:59Z ejakowatz $
13 *
14 *  Copyright (c) 1996-1998  Michael John Radwin
15 *
16 *  This program is free software; you can redistribute it and/or modify
17 *  it under the terms of the GNU General Public License as published by
18 *  the Free Software Foundation; either version 2 of the License, or
19 *  (at your option) any later version.
20 *
21 *  This program is distributed in the hope that it will be useful,
22 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
23 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
24 *  GNU General Public License for more details.
25 *
26 *  You should have received a copy of the GNU General Public License
27 *  along with this program; if not, write to the Free Software
28 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
29 *
30 *  Modification history:
31 *  $Log: lexer.l,v $
32 *  Revision 1.1  2002/07/09 12:24:59  ejakowatz
33 *  It is accomplished ...
34 *
35 *  Revision 1.1  2001/11/07 10:06:07  ithamar
36 *  Added stubgen to CVS
37 *
38 *  Revision 1.33  1998/07/27 19:16:57  mradwin
39 *  added some c++ keywords
40 *  need to handle typename, using, and namespace
41 *
42 *  Revision 1.32  1998/05/11 19:49:11  mradwin
43 *  Version 2.03 (updated copyright information).
44 *
45 *  Revision 1.31  1998/04/07 23:39:55  mradwin
46 *  changed error-handling code significantly.  functions
47 *  like count() are now contributing to linebuf so we get correct
48 *  parse error messages during lineno == 1 and other situations.
49 *  also, instead of calling fatal() for collect*() functions,
50 *  we return -1 and let the parser recover more gracefully.
51 *
52 *  Revision 1.30  1998/01/12 19:39:11  mradwin
53 *  modified rcsid
54 *
55 *  Revision 1.29  1997/11/13 22:37:31  mradwin
56 *  changed char[] to char * to make non-gcc compilers
57 *  a little happier.  We need to #define const to nothing
58 *  for other compilers as well.
59 *
60 *  Revision 1.28  1997/11/13 21:29:30  mradwin
61 *  moved code from parser.y to main.c
62 *
63 *  Revision 1.27  1997/11/13 21:10:17  mradwin
64 *  renamed stubgen.[ly] to parser.y lexer.l
65 *
66 *  Revision 1.26  1997/11/11 03:52:06  mradwin
67 *  changed fatal()
68 *
69 *  Revision 1.25  1997/11/05 03:02:02  mradwin
70 *  Modified logging routines.
71 *
72 *  Revision 1.24  1997/11/01 23:12:43  mradwin
73 *  greatly improved error-recovery.  errors no longer spill over
74 *  into other files because the yyerror state is properly reset.
75 *
76 *  Revision 1.23  1997/10/26 23:16:32  mradwin
77 *  changed inform_user and fatal functions to use varargs
78 *
79 *  Revision 1.22  1997/10/26 22:46:48  mradwin
80 *  support macros within comments, etc.
81 *
82 *  Revision 1.21  1997/10/16 19:42:48  mradwin
83 *  added support for elipses, static member/array initializers,
84 *  and bitfields.
85 *
86 *  Revision 1.20  1997/10/16 17:36:06  mradwin
87 *  Fixed compiler warning on win32 from <ctype.h> and isspace()
88 *
89 *  Revision 1.19  1997/10/16 17:12:59  mradwin
90 *  handle extern "C" blocks better now, and support multi-line
91 *  macros.  still need error-checking.
92 *
93 *  Revision 1.18  1997/10/15 22:09:06  mradwin
94 *  changed tons of names.  stubelem -> sytaxelem,
95 *  stubin -> infile, stubout -> outfile, stublog -> logfile.
96 *
97 *  Revision 1.17  1997/10/15 21:45:13  mradwin
98 *  rearranged table.[ch] and util.[ch] so that util pkg
99 *  knows nothing about syntaxelems.
100 *
101 *  Revision 1.16  1997/10/15 17:42:37  mradwin
102 *  added support for 'extern "C" { ... }' blocks.
103 *
104 *  Revision 1.15  1997/09/05 19:17:06  mradwin
105 *  works for scanning old versions, except for parameter
106 *  names that differ between .H and .C files.
107 *
108 *  Revision 1.14  1997/09/05 16:37:41  mradwin
109 *  rcsid
110 *
111 *  Revision 1.13  1997/09/05 16:34:36  mradwin
112 *  GPL-ized code.
113 *
114 *  Revision 1.12  1997/09/05 16:13:18  mradwin
115 *  changed email address to acm.org
116 *
117 *  Revision 1.11  1996/09/12 14:44:49  mjr
118 *  Added throw decl recognition (great, another 4 bytes in syntaxelem)
119 *  and cleaned up the grammar so that const_opt appears in far fewer
120 *  places.  const_opt is by default 0 as well, so we don't need to
121 *  pass it as an arg to new_elem().
122 *
123 *  I also added a fix to a potential bug with the MINIT and INLIN
124 *  exclusive start states.  I think they could have been confused
125 *  by braces within comments, so now I'm grabbing comments in those
126 *  states as well.
127 *
128 *  Revision 1.10  1996/09/12 03:46:10  mjr
129 *  No concrete changes in code.  Just added some sanity by
130 *  factoring out code into util.[ch] and putting some prototypes
131 *  that were in table.h into stubgen.y where they belong.
132 *
133 *  Revision 1.9  1996/09/01 20:59:48  mjr
134 *  Added collectMemberInitList() function, which is similar
135 *  to collectInlineDef() and also the exclusive state MINIT
136 *
137 *  Revision 1.8  1996/08/23 05:09:19  mjr
138 *  fixed up some more portability things
139 *
140 *  Revision 1.7  1996/08/22 02:43:47  mjr
141 *  added parse error message (using O'Reilly p. 274)
142 *
143 *  Revision 1.6  1996/08/21 18:33:50  mjr
144 *  removed the buffer for inlines.  we don't care anyway.
145 *  now we can't overflow on inlines!
146 *
147 *  Revision 1.5  1996/08/21 17:40:56  mjr
148 *  added some cpp directives for porting to WIN32
149 *
150 *  Revision 1.4  1996/08/19 17:01:33  mjr
151 *  no echo now
152 *
153 *  Revision 1.3  1996/08/15 21:24:58  mjr
154 *  *** empty log message ***
155 */
156%}
157
158D			[0-9]
159L			[a-zA-Z_]
160H			[a-fA-F0-9]
161E			[Ee][+-]?{D}+
162FS			(f|F|l|L)
163IS			(u|U|l|L)*
164
165%{
166#include <stdio.h>
167#include <string.h>
168#include <ctype.h>
169#include "table.h"
170#include "util.h"
171
172#ifdef WIN32
173/* definitions of exit, malloc, realloc, and free */
174#include <stdlib.h>
175#endif
176
177#if 0 /* #ifdef WIN32 */
178#include "y_tab.h"
179#else
180#include "y.tab.h"
181#endif
182
183#ifdef __cplusplus
184#define STUB_INPUT() yyinput()
185#else
186#define STUB_INPUT() input()
187#endif
188
189/* when we return a string, duplicate it so we can free it later.
190   we always allocate memory so we can uniformly free() it. */
191#define RETURN_STR(x) tokens_seen++; yylval.string = strdup(yytext); return(x)
192
193/* make that nasty union a value that will bus error if we misinterpret
194   the value as a pointer */
195#define RETURN_VAL(x) tokens_seen++; yylval.flag = 37; return(x)
196
197static const char rcsid[] = "$Id: lexer.l 10 2002-07-09 12:24:59Z ejakowatz $";
198
199static void count();
200static void comment();
201static void macro();
202
203char linebuf[1024]; /* null-terminated at beginning of each file */
204int lineno;         /* set to 1        at beginning of each file */
205int column;         /* set to 0        at beginning of each file */
206int tokens_seen;    /* set to 0        at beginning of each file */
207
208%}
209
210%x INLIN MINIT
211%%
212\n.*			{ /*
213                           * for debugging purposes, we grab an entire
214                           * line and buffer it, then grab tokens out of
215                           * it.  This lets us have more informative
216                           * error messages.  See yyerror() in parser.y
217                           */
218                          strncpy(linebuf, yytext+1, 1024);
219			  lineno++;
220			  column = 0;
221			  yyless(1); /* give back everything but \n */
222			}
223"/*"			{ comment(); }
224
225"//".*                  { count(); }
226"#"                     { macro(); /* was #.* { count(); } */ }
227
228"static"		{ count(); tokens_seen++; }
229"volatile"		{ count(); tokens_seen++; }
230"auto"			{ count(); tokens_seen++; }
231"extern"		{ count(); RETURN_VAL(EXTERN); }
232"register"		{ count(); tokens_seen++; }
233"typedef"		{ count(); tokens_seen++; }
234"struct"		{ count(); RETURN_VAL(STRUCT); }
235"union"			{ count(); RETURN_VAL(UNION); }
236"enum"			{ count(); RETURN_VAL(ENUM); }
237"const"			{ count(); RETURN_VAL(CONST); }
238"template"		{ count(); RETURN_VAL(TEMPLATE); }
239
240"typename"              { count(); tokens_seen++; /* FIXME */ }
241"using"                 { count(); tokens_seen++; /* FIXME */ }
242"namespace"             { count(); RETURN_VAL(CLASS); /* FIXME */ }
243
244"dllexport"             { count(); tokens_seen++; }
245"dllimport"             { count(); tokens_seen++; }
246
247"explicit"              { count(); tokens_seen++; }
248"mutable"               { count(); tokens_seen++; }
249"inline"                { count(); tokens_seen++; }
250"virtual"               { count(); tokens_seen++; }
251"class"                 { count(); RETURN_VAL(CLASS); }
252"delete"                { count(); RETURN_VAL(DELETE); }
253"new"                   { count(); RETURN_VAL(NEW); }
254"friend"                { count(); RETURN_VAL(FRIEND); }
255"operator"              { count(); RETURN_VAL(OPERATOR); }
256"protected"             { count(); RETURN_VAL(PROTECTED); }
257"private"               { count(); RETURN_VAL(PRIVATE); }
258"public"                { count(); RETURN_VAL(PUBLIC); }
259"throw"                 { count(); RETURN_VAL(THROW); }
260
261"char"			{ count(); RETURN_STR(CHAR); }
262"short"			{ count(); RETURN_STR(SHORT); }
263"int"			{ count(); RETURN_STR(INT); }
264"long"			{ count(); RETURN_STR(LONG); }
265"signed"		{ count(); RETURN_STR(SIGNED); }
266"unsigned"		{ count(); RETURN_STR(UNSIGNED); }
267"float"			{ count(); RETURN_STR(FLOAT); }
268"double"		{ count(); RETURN_STR(DOUBLE); }
269"void"			{ count(); RETURN_STR(VOID); }
270
271{L}({L}|{D})*		{ count(); RETURN_STR(IDENTIFIER); }
272
2730[xX]{H}+{IS}?		{ count(); RETURN_STR(CONSTANT); }
2740{D}+{IS}?		{ count(); RETURN_STR(CONSTANT); }
275{D}+{IS}?		{ count(); RETURN_STR(CONSTANT); }
276'(\\.|[^\\'])+'		{ count(); RETURN_STR(CONSTANT); /* 'fontlck */ }
277
278{D}+{E}{FS}?		{ count(); RETURN_STR(CONSTANT); }
279{D}*"."{D}+({E})?{FS}?	{ count(); RETURN_STR(CONSTANT); }
280{D}+"."{D}*({E})?{FS}?	{ count(); RETURN_STR(CONSTANT); }
281
282\"(\\.|[^\\"])*\"	{ count(); RETURN_STR(STRING_LITERAL); /* "fontlck */ }
283
284">>="			{ count(); RETURN_VAL(RIGHT_ASSIGN); }
285"<<="			{ count(); RETURN_VAL(LEFT_ASSIGN); }
286"+="			{ count(); RETURN_VAL(ADD_ASSIGN); }
287"-="			{ count(); RETURN_VAL(SUB_ASSIGN); }
288"*="			{ count(); RETURN_VAL(MUL_ASSIGN); }
289"/="			{ count(); RETURN_VAL(DIV_ASSIGN); }
290"%="			{ count(); RETURN_VAL(MOD_ASSIGN); }
291"&="			{ count(); RETURN_VAL(AND_ASSIGN); }
292"^="			{ count(); RETURN_VAL(XOR_ASSIGN); }
293"|="			{ count(); RETURN_VAL(OR_ASSIGN); }
294">>"			{ count(); RETURN_VAL(RIGHT_OP); }
295"<<"			{ count(); RETURN_VAL(LEFT_OP); }
296"++"			{ count(); RETURN_VAL(INC_OP); }
297"--"			{ count(); RETURN_VAL(DEC_OP); }
298"->"			{ count(); RETURN_VAL(PTR_OP); }
299"->*"			{ count(); RETURN_VAL(MEM_PTR_OP); }
300"&&"			{ count(); RETURN_VAL(AND_OP); }
301"||"			{ count(); RETURN_VAL(OR_OP); }
302"<="			{ count(); RETURN_VAL(LE_OP); }
303">="			{ count(); RETURN_VAL(GE_OP); }
304"=="			{ count(); RETURN_VAL(EQ_OP); }
305"!="			{ count(); RETURN_VAL(NE_OP); }
306";"			{ count(); RETURN_VAL(';'); }
307"{"			{ count(); RETURN_VAL('{'); }
308"}"			{ count(); RETURN_VAL('}'); }
309","			{ count(); RETURN_VAL(','); }
310":"			{ count(); RETURN_VAL(':'); }
311"="			{ count(); RETURN_VAL('='); }
312"("			{ count(); RETURN_VAL('('); }
313")"			{ count(); RETURN_VAL(')'); }
314"["			{ count(); RETURN_VAL('['); }
315"]"			{ count(); RETURN_VAL(']'); }
316"."			{ count(); RETURN_VAL('.'); }
317"&"			{ count(); RETURN_VAL('&'); }
318"!"			{ count(); RETURN_VAL('!'); }
319"~"			{ count(); RETURN_VAL('~'); }
320"-"			{ count(); RETURN_VAL('-'); }
321"+"			{ count(); RETURN_VAL('+'); }
322"*"			{ count(); RETURN_VAL('*'); }
323"/"			{ count(); RETURN_VAL('/'); }
324"%"			{ count(); RETURN_VAL('%'); }
325"<"			{ count(); RETURN_VAL('<'); }
326">"			{ count(); RETURN_VAL('>'); }
327"^"			{ count(); RETURN_VAL('^'); }
328"|"			{ count(); RETURN_VAL('|'); }
329"?"			{ count(); RETURN_VAL('?'); }
330"::"			{ count(); RETURN_VAL(CLCL); }
331"..."			{ count(); RETURN_VAL(ELIPSIS); }
332
333<INLIN>"/*"		{ comment(); }
334<INLIN>"//".*           { count(); }
335<INLIN>"#"              { macro(); /* was #.* { count(); } */ }
336<INLIN>. |
337<INLIN>\n               { RETURN_VAL((int) yytext[0]); }
338
339<MINIT>"/*"		{ comment(); }
340<MINIT>"//".*           { count(); }
341<MINIT>"#"              { macro(); /* was #.* { count(); } */ }
342<MINIT>. |
343<MINIT>\n               { RETURN_VAL((int) yytext[0]); }
344
345[ \t\v\f]		{ count(); }
346.			{ count(); /* ignore bad characters */ }
347
348%%
349
350/*
351 * called when EOF is encountered.  Return 1 so the scanner will return
352 * the zero token to report end-of-file.
353 */
354int yywrap()
355{
356    return(1);
357}
358
359static void comment()
360{
361    int c1 = 0, c2 = STUB_INPUT();
362
363    linebuf[column] = c2;
364    column++;
365    for(;;) {
366	if (c2 == EOF)
367	    break;
368	if (c1 == '*' && c2 == '/')
369	    break;
370	if (c2 == '\n') {
371	    linebuf[0] = '\0';
372	    column = 0;
373	    lineno++;
374	}
375
376	c1 = c2;
377	c2 = STUB_INPUT();
378	linebuf[column] = c2;
379	column++;
380    }
381}
382
383
384static void macro()
385{
386  int c1 = 0, c2 = STUB_INPUT(), nonws = 0;
387
388  log_printf("MACRO reading begining...\n#");
389  log_printf("%c", c2);
390
391  linebuf[column] = c2;
392  column++;
393  for(;;) {
394    if (c2 == EOF)
395      break;
396    if (!isspace(c1))
397      nonws = c1;
398    if (nonws == '\\' && c2 == '\n') {
399      linebuf[0] = '\0';
400      column = 0;
401      lineno++;
402    } else if (c2 == '\n') {
403      linebuf[0] = '\0';
404      column = 0;
405      lineno++;
406      break;
407    }
408
409    c1 = c2;
410    c2 = STUB_INPUT();
411    linebuf[column] = c2;
412    log_printf("%c", c2);
413    column++;
414  }
415  log_printf("MACRO reading done.\n");
416}
417
418
419static void count()
420{
421    int i;
422
423    if (lineno == 1)
424	strcat(linebuf, yytext);
425
426    for (i = 0; yytext[i] != '\0'; i++)
427	if (yytext[i] == '\n')
428	    column = 0;
429	else if (yytext[i] == '\t')
430	    column += 8 - (column % 8);
431	else
432	    column++;
433
434    /* equiv to fprintf(yyout, "%s", yytext); */
435    /* ECHO; */
436}
437
438/*
439 * Collect the contents of inline functions, reading them char by char.
440 * thanks to the arjuna stubgen project for this one
441 */
442int collectInlineDef()
443{
444    int bracelevel = 1;
445    int token;
446
447    /* the magic of exclusive start states makes it all possible */
448    BEGIN INLIN;
449
450    while (bracelevel > 0) {
451        token = yylex();
452	column++;
453/*	fprintf(stderr, "INLIN: read token %c\n", token); */
454	if (token > 0) {
455	    /* Assume single char */
456	    switch (token) {
457	    case '{':
458		bracelevel++;
459		break;
460	    case '}':
461		bracelevel--;
462		if (bracelevel == 0)
463		{
464		    column--;
465		    unput(token);
466		    break;
467		}
468		break;
469	    case '\n':
470		column = 0;
471		lineno++;
472		break;
473	    }
474	} else {
475	    /* fatal error: Unexpected EOF reading inline function */
476	    return -1;
477	}
478    }
479
480    /* we now return you to your regularly scheduled start state */
481    BEGIN 0;
482
483    return 0;
484}
485
486
487/*
488 * hmmm... looks familiar.  more control-y programming.
489 */
490int collectMemberInitList()
491{
492    int token;
493    int insideList = 1;
494
495    /* the magic of exclusive start states makes it all possible */
496    BEGIN MINIT;
497
498    while(insideList) {
499        token = yylex();
500	column++;
501/*	fprintf(stderr, "MINIT: read token %c\n", token); */
502	if (token > 0) {
503	    /* Assume single char */
504	    switch (token)
505	    {
506	    case '{':
507		insideList = 0;
508		unput(token);
509		break;
510	    case '\n':
511		column = 0;
512		lineno++;
513		break;
514	    }
515	} else {
516	    /* fatal error: Unexpected EOF reading member initialization */
517	    return -1;
518	}
519    }
520
521    /* we now return you to your regularly scheduled start state */
522    BEGIN 0;
523
524    return 0;
525}
526