1%{ 2/* 3 * FILE: lexer.l 4 * AUTH: Michael John Radwin <mjr@acm.org> 5 * 6 * DESC: stubgen lexer. Portions borrowed from Newcastle 7 * University's Arjuna project (http://arjuna.ncl.ac.uk/), and 8 * Jeff Lee's ANSI Grammar 9 * (ftp://ftp.uu.net/usenet/net.sources/ansi.c.grammar.Z) 10 * 11 * DATE: Thu Aug 15 13:10:06 EDT 1996 12 * $Id: lexer.l 10 2002-07-09 12:24:59Z ejakowatz $ 13 * 14 * Copyright (c) 1996-1998 Michael John Radwin 15 * 16 * This program is free software; you can redistribute it and/or modify 17 * it under the terms of the GNU General Public License as published by 18 * the Free Software Foundation; either version 2 of the License, or 19 * (at your option) any later version. 20 * 21 * This program is distributed in the hope that it will be useful, 22 * but WITHOUT ANY WARRANTY; without even the implied warranty of 23 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 24 * GNU General Public License for more details. 25 * 26 * You should have received a copy of the GNU General Public License 27 * along with this program; if not, write to the Free Software 28 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 29 * 30 * Modification history: 31 * $Log: lexer.l,v $ 32 * Revision 1.1 2002/07/09 12:24:59 ejakowatz 33 * It is accomplished ... 34 * 35 * Revision 1.1 2001/11/07 10:06:07 ithamar 36 * Added stubgen to CVS 37 * 38 * Revision 1.33 1998/07/27 19:16:57 mradwin 39 * added some c++ keywords 40 * need to handle typename, using, and namespace 41 * 42 * Revision 1.32 1998/05/11 19:49:11 mradwin 43 * Version 2.03 (updated copyright information). 44 * 45 * Revision 1.31 1998/04/07 23:39:55 mradwin 46 * changed error-handling code significantly. functions 47 * like count() are now contributing to linebuf so we get correct 48 * parse error messages during lineno == 1 and other situations. 49 * also, instead of calling fatal() for collect*() functions, 50 * we return -1 and let the parser recover more gracefully. 51 * 52 * Revision 1.30 1998/01/12 19:39:11 mradwin 53 * modified rcsid 54 * 55 * Revision 1.29 1997/11/13 22:37:31 mradwin 56 * changed char[] to char * to make non-gcc compilers 57 * a little happier. We need to #define const to nothing 58 * for other compilers as well. 59 * 60 * Revision 1.28 1997/11/13 21:29:30 mradwin 61 * moved code from parser.y to main.c 62 * 63 * Revision 1.27 1997/11/13 21:10:17 mradwin 64 * renamed stubgen.[ly] to parser.y lexer.l 65 * 66 * Revision 1.26 1997/11/11 03:52:06 mradwin 67 * changed fatal() 68 * 69 * Revision 1.25 1997/11/05 03:02:02 mradwin 70 * Modified logging routines. 71 * 72 * Revision 1.24 1997/11/01 23:12:43 mradwin 73 * greatly improved error-recovery. errors no longer spill over 74 * into other files because the yyerror state is properly reset. 75 * 76 * Revision 1.23 1997/10/26 23:16:32 mradwin 77 * changed inform_user and fatal functions to use varargs 78 * 79 * Revision 1.22 1997/10/26 22:46:48 mradwin 80 * support macros within comments, etc. 81 * 82 * Revision 1.21 1997/10/16 19:42:48 mradwin 83 * added support for elipses, static member/array initializers, 84 * and bitfields. 85 * 86 * Revision 1.20 1997/10/16 17:36:06 mradwin 87 * Fixed compiler warning on win32 from <ctype.h> and isspace() 88 * 89 * Revision 1.19 1997/10/16 17:12:59 mradwin 90 * handle extern "C" blocks better now, and support multi-line 91 * macros. still need error-checking. 92 * 93 * Revision 1.18 1997/10/15 22:09:06 mradwin 94 * changed tons of names. stubelem -> sytaxelem, 95 * stubin -> infile, stubout -> outfile, stublog -> logfile. 96 * 97 * Revision 1.17 1997/10/15 21:45:13 mradwin 98 * rearranged table.[ch] and util.[ch] so that util pkg 99 * knows nothing about syntaxelems. 100 * 101 * Revision 1.16 1997/10/15 17:42:37 mradwin 102 * added support for 'extern "C" { ... }' blocks. 103 * 104 * Revision 1.15 1997/09/05 19:17:06 mradwin 105 * works for scanning old versions, except for parameter 106 * names that differ between .H and .C files. 107 * 108 * Revision 1.14 1997/09/05 16:37:41 mradwin 109 * rcsid 110 * 111 * Revision 1.13 1997/09/05 16:34:36 mradwin 112 * GPL-ized code. 113 * 114 * Revision 1.12 1997/09/05 16:13:18 mradwin 115 * changed email address to acm.org 116 * 117 * Revision 1.11 1996/09/12 14:44:49 mjr 118 * Added throw decl recognition (great, another 4 bytes in syntaxelem) 119 * and cleaned up the grammar so that const_opt appears in far fewer 120 * places. const_opt is by default 0 as well, so we don't need to 121 * pass it as an arg to new_elem(). 122 * 123 * I also added a fix to a potential bug with the MINIT and INLIN 124 * exclusive start states. I think they could have been confused 125 * by braces within comments, so now I'm grabbing comments in those 126 * states as well. 127 * 128 * Revision 1.10 1996/09/12 03:46:10 mjr 129 * No concrete changes in code. Just added some sanity by 130 * factoring out code into util.[ch] and putting some prototypes 131 * that were in table.h into stubgen.y where they belong. 132 * 133 * Revision 1.9 1996/09/01 20:59:48 mjr 134 * Added collectMemberInitList() function, which is similar 135 * to collectInlineDef() and also the exclusive state MINIT 136 * 137 * Revision 1.8 1996/08/23 05:09:19 mjr 138 * fixed up some more portability things 139 * 140 * Revision 1.7 1996/08/22 02:43:47 mjr 141 * added parse error message (using O'Reilly p. 274) 142 * 143 * Revision 1.6 1996/08/21 18:33:50 mjr 144 * removed the buffer for inlines. we don't care anyway. 145 * now we can't overflow on inlines! 146 * 147 * Revision 1.5 1996/08/21 17:40:56 mjr 148 * added some cpp directives for porting to WIN32 149 * 150 * Revision 1.4 1996/08/19 17:01:33 mjr 151 * no echo now 152 * 153 * Revision 1.3 1996/08/15 21:24:58 mjr 154 * *** empty log message *** 155 */ 156%} 157 158D [0-9] 159L [a-zA-Z_] 160H [a-fA-F0-9] 161E [Ee][+-]?{D}+ 162FS (f|F|l|L) 163IS (u|U|l|L)* 164 165%{ 166#include <stdio.h> 167#include <string.h> 168#include <ctype.h> 169#include "table.h" 170#include "util.h" 171 172#ifdef WIN32 173/* definitions of exit, malloc, realloc, and free */ 174#include <stdlib.h> 175#endif 176 177#if 0 /* #ifdef WIN32 */ 178#include "y_tab.h" 179#else 180#include "y.tab.h" 181#endif 182 183#ifdef __cplusplus 184#define STUB_INPUT() yyinput() 185#else 186#define STUB_INPUT() input() 187#endif 188 189/* when we return a string, duplicate it so we can free it later. 190 we always allocate memory so we can uniformly free() it. */ 191#define RETURN_STR(x) tokens_seen++; yylval.string = strdup(yytext); return(x) 192 193/* make that nasty union a value that will bus error if we misinterpret 194 the value as a pointer */ 195#define RETURN_VAL(x) tokens_seen++; yylval.flag = 37; return(x) 196 197static const char rcsid[] = "$Id: lexer.l 10 2002-07-09 12:24:59Z ejakowatz $"; 198 199static void count(); 200static void comment(); 201static void macro(); 202 203char linebuf[1024]; /* null-terminated at beginning of each file */ 204int lineno; /* set to 1 at beginning of each file */ 205int column; /* set to 0 at beginning of each file */ 206int tokens_seen; /* set to 0 at beginning of each file */ 207 208%} 209 210%x INLIN MINIT 211%% 212\n.* { /* 213 * for debugging purposes, we grab an entire 214 * line and buffer it, then grab tokens out of 215 * it. This lets us have more informative 216 * error messages. See yyerror() in parser.y 217 */ 218 strncpy(linebuf, yytext+1, 1024); 219 lineno++; 220 column = 0; 221 yyless(1); /* give back everything but \n */ 222 } 223"/*" { comment(); } 224 225"//".* { count(); } 226"#" { macro(); /* was #.* { count(); } */ } 227 228"static" { count(); tokens_seen++; } 229"volatile" { count(); tokens_seen++; } 230"auto" { count(); tokens_seen++; } 231"extern" { count(); RETURN_VAL(EXTERN); } 232"register" { count(); tokens_seen++; } 233"typedef" { count(); tokens_seen++; } 234"struct" { count(); RETURN_VAL(STRUCT); } 235"union" { count(); RETURN_VAL(UNION); } 236"enum" { count(); RETURN_VAL(ENUM); } 237"const" { count(); RETURN_VAL(CONST); } 238"template" { count(); RETURN_VAL(TEMPLATE); } 239 240"typename" { count(); tokens_seen++; /* FIXME */ } 241"using" { count(); tokens_seen++; /* FIXME */ } 242"namespace" { count(); RETURN_VAL(CLASS); /* FIXME */ } 243 244"dllexport" { count(); tokens_seen++; } 245"dllimport" { count(); tokens_seen++; } 246 247"explicit" { count(); tokens_seen++; } 248"mutable" { count(); tokens_seen++; } 249"inline" { count(); tokens_seen++; } 250"virtual" { count(); tokens_seen++; } 251"class" { count(); RETURN_VAL(CLASS); } 252"delete" { count(); RETURN_VAL(DELETE); } 253"new" { count(); RETURN_VAL(NEW); } 254"friend" { count(); RETURN_VAL(FRIEND); } 255"operator" { count(); RETURN_VAL(OPERATOR); } 256"protected" { count(); RETURN_VAL(PROTECTED); } 257"private" { count(); RETURN_VAL(PRIVATE); } 258"public" { count(); RETURN_VAL(PUBLIC); } 259"throw" { count(); RETURN_VAL(THROW); } 260 261"char" { count(); RETURN_STR(CHAR); } 262"short" { count(); RETURN_STR(SHORT); } 263"int" { count(); RETURN_STR(INT); } 264"long" { count(); RETURN_STR(LONG); } 265"signed" { count(); RETURN_STR(SIGNED); } 266"unsigned" { count(); RETURN_STR(UNSIGNED); } 267"float" { count(); RETURN_STR(FLOAT); } 268"double" { count(); RETURN_STR(DOUBLE); } 269"void" { count(); RETURN_STR(VOID); } 270 271{L}({L}|{D})* { count(); RETURN_STR(IDENTIFIER); } 272 2730[xX]{H}+{IS}? { count(); RETURN_STR(CONSTANT); } 2740{D}+{IS}? { count(); RETURN_STR(CONSTANT); } 275{D}+{IS}? { count(); RETURN_STR(CONSTANT); } 276'(\\.|[^\\'])+' { count(); RETURN_STR(CONSTANT); /* 'fontlck */ } 277 278{D}+{E}{FS}? { count(); RETURN_STR(CONSTANT); } 279{D}*"."{D}+({E})?{FS}? { count(); RETURN_STR(CONSTANT); } 280{D}+"."{D}*({E})?{FS}? { count(); RETURN_STR(CONSTANT); } 281 282\"(\\.|[^\\"])*\" { count(); RETURN_STR(STRING_LITERAL); /* "fontlck */ } 283 284">>=" { count(); RETURN_VAL(RIGHT_ASSIGN); } 285"<<=" { count(); RETURN_VAL(LEFT_ASSIGN); } 286"+=" { count(); RETURN_VAL(ADD_ASSIGN); } 287"-=" { count(); RETURN_VAL(SUB_ASSIGN); } 288"*=" { count(); RETURN_VAL(MUL_ASSIGN); } 289"/=" { count(); RETURN_VAL(DIV_ASSIGN); } 290"%=" { count(); RETURN_VAL(MOD_ASSIGN); } 291"&=" { count(); RETURN_VAL(AND_ASSIGN); } 292"^=" { count(); RETURN_VAL(XOR_ASSIGN); } 293"|=" { count(); RETURN_VAL(OR_ASSIGN); } 294">>" { count(); RETURN_VAL(RIGHT_OP); } 295"<<" { count(); RETURN_VAL(LEFT_OP); } 296"++" { count(); RETURN_VAL(INC_OP); } 297"--" { count(); RETURN_VAL(DEC_OP); } 298"->" { count(); RETURN_VAL(PTR_OP); } 299"->*" { count(); RETURN_VAL(MEM_PTR_OP); } 300"&&" { count(); RETURN_VAL(AND_OP); } 301"||" { count(); RETURN_VAL(OR_OP); } 302"<=" { count(); RETURN_VAL(LE_OP); } 303">=" { count(); RETURN_VAL(GE_OP); } 304"==" { count(); RETURN_VAL(EQ_OP); } 305"!=" { count(); RETURN_VAL(NE_OP); } 306";" { count(); RETURN_VAL(';'); } 307"{" { count(); RETURN_VAL('{'); } 308"}" { count(); RETURN_VAL('}'); } 309"," { count(); RETURN_VAL(','); } 310":" { count(); RETURN_VAL(':'); } 311"=" { count(); RETURN_VAL('='); } 312"(" { count(); RETURN_VAL('('); } 313")" { count(); RETURN_VAL(')'); } 314"[" { count(); RETURN_VAL('['); } 315"]" { count(); RETURN_VAL(']'); } 316"." { count(); RETURN_VAL('.'); } 317"&" { count(); RETURN_VAL('&'); } 318"!" { count(); RETURN_VAL('!'); } 319"~" { count(); RETURN_VAL('~'); } 320"-" { count(); RETURN_VAL('-'); } 321"+" { count(); RETURN_VAL('+'); } 322"*" { count(); RETURN_VAL('*'); } 323"/" { count(); RETURN_VAL('/'); } 324"%" { count(); RETURN_VAL('%'); } 325"<" { count(); RETURN_VAL('<'); } 326">" { count(); RETURN_VAL('>'); } 327"^" { count(); RETURN_VAL('^'); } 328"|" { count(); RETURN_VAL('|'); } 329"?" { count(); RETURN_VAL('?'); } 330"::" { count(); RETURN_VAL(CLCL); } 331"..." { count(); RETURN_VAL(ELIPSIS); } 332 333<INLIN>"/*" { comment(); } 334<INLIN>"//".* { count(); } 335<INLIN>"#" { macro(); /* was #.* { count(); } */ } 336<INLIN>. | 337<INLIN>\n { RETURN_VAL((int) yytext[0]); } 338 339<MINIT>"/*" { comment(); } 340<MINIT>"//".* { count(); } 341<MINIT>"#" { macro(); /* was #.* { count(); } */ } 342<MINIT>. | 343<MINIT>\n { RETURN_VAL((int) yytext[0]); } 344 345[ \t\v\f] { count(); } 346. { count(); /* ignore bad characters */ } 347 348%% 349 350/* 351 * called when EOF is encountered. Return 1 so the scanner will return 352 * the zero token to report end-of-file. 353 */ 354int yywrap() 355{ 356 return(1); 357} 358 359static void comment() 360{ 361 int c1 = 0, c2 = STUB_INPUT(); 362 363 linebuf[column] = c2; 364 column++; 365 for(;;) { 366 if (c2 == EOF) 367 break; 368 if (c1 == '*' && c2 == '/') 369 break; 370 if (c2 == '\n') { 371 linebuf[0] = '\0'; 372 column = 0; 373 lineno++; 374 } 375 376 c1 = c2; 377 c2 = STUB_INPUT(); 378 linebuf[column] = c2; 379 column++; 380 } 381} 382 383 384static void macro() 385{ 386 int c1 = 0, c2 = STUB_INPUT(), nonws = 0; 387 388 log_printf("MACRO reading begining...\n#"); 389 log_printf("%c", c2); 390 391 linebuf[column] = c2; 392 column++; 393 for(;;) { 394 if (c2 == EOF) 395 break; 396 if (!isspace(c1)) 397 nonws = c1; 398 if (nonws == '\\' && c2 == '\n') { 399 linebuf[0] = '\0'; 400 column = 0; 401 lineno++; 402 } else if (c2 == '\n') { 403 linebuf[0] = '\0'; 404 column = 0; 405 lineno++; 406 break; 407 } 408 409 c1 = c2; 410 c2 = STUB_INPUT(); 411 linebuf[column] = c2; 412 log_printf("%c", c2); 413 column++; 414 } 415 log_printf("MACRO reading done.\n"); 416} 417 418 419static void count() 420{ 421 int i; 422 423 if (lineno == 1) 424 strcat(linebuf, yytext); 425 426 for (i = 0; yytext[i] != '\0'; i++) 427 if (yytext[i] == '\n') 428 column = 0; 429 else if (yytext[i] == '\t') 430 column += 8 - (column % 8); 431 else 432 column++; 433 434 /* equiv to fprintf(yyout, "%s", yytext); */ 435 /* ECHO; */ 436} 437 438/* 439 * Collect the contents of inline functions, reading them char by char. 440 * thanks to the arjuna stubgen project for this one 441 */ 442int collectInlineDef() 443{ 444 int bracelevel = 1; 445 int token; 446 447 /* the magic of exclusive start states makes it all possible */ 448 BEGIN INLIN; 449 450 while (bracelevel > 0) { 451 token = yylex(); 452 column++; 453/* fprintf(stderr, "INLIN: read token %c\n", token); */ 454 if (token > 0) { 455 /* Assume single char */ 456 switch (token) { 457 case '{': 458 bracelevel++; 459 break; 460 case '}': 461 bracelevel--; 462 if (bracelevel == 0) 463 { 464 column--; 465 unput(token); 466 break; 467 } 468 break; 469 case '\n': 470 column = 0; 471 lineno++; 472 break; 473 } 474 } else { 475 /* fatal error: Unexpected EOF reading inline function */ 476 return -1; 477 } 478 } 479 480 /* we now return you to your regularly scheduled start state */ 481 BEGIN 0; 482 483 return 0; 484} 485 486 487/* 488 * hmmm... looks familiar. more control-y programming. 489 */ 490int collectMemberInitList() 491{ 492 int token; 493 int insideList = 1; 494 495 /* the magic of exclusive start states makes it all possible */ 496 BEGIN MINIT; 497 498 while(insideList) { 499 token = yylex(); 500 column++; 501/* fprintf(stderr, "MINIT: read token %c\n", token); */ 502 if (token > 0) { 503 /* Assume single char */ 504 switch (token) 505 { 506 case '{': 507 insideList = 0; 508 unput(token); 509 break; 510 case '\n': 511 column = 0; 512 lineno++; 513 break; 514 } 515 } else { 516 /* fatal error: Unexpected EOF reading member initialization */ 517 return -1; 518 } 519 } 520 521 /* we now return you to your regularly scheduled start state */ 522 BEGIN 0; 523 524 return 0; 525} 526