1/* 2 * Copyright 1993-2002 Christopher Seiwald and Perforce Software, Inc. 3 * 4 * This file is part of Jam - see jam.c for Copyright information. 5 */ 6 7/* 8 * scan.c - the jam yacc scanner 9 * 10 * 12/26/93 (seiwald) - bump buf in yylex to 10240 - yuk. 11 * 09/16/94 (seiwald) - check for overflows, unmatched {}'s, etc. 12 * Also handle tokens abutting EOF by remembering 13 * to return EOF now matter how many times yylex() 14 * reinvokes yyline(). 15 * 02/11/95 (seiwald) - honor only punctuation keywords if SCAN_PUNCT. 16 * 07/27/95 (seiwald) - Include jamgram.h after scan.h, so that YYSTYPE is 17 * defined before Linux's yacc tries to redefine it. 18 * 01/10/01 (seiwald) - \ can now escape any whitespace char 19 * 11/04/02 (seiwald) - const-ing for string literals 20 */ 21 22# include "jam.h" 23# include "lists.h" 24# include "parse.h" 25# include "scan.h" 26# include "jamgram.h" 27# include "jambase.h" 28# include "jcache.h" 29# include "newstr.h" 30 31struct keyword { 32 const char *word; 33 int type; 34} keywords[] = { 35# include "jamgramtab.h" 36 { 0, 0 } 37} ; 38 39struct include { 40 struct include *next; /* next serial include file */ 41 const char *string; /* pointer into current line */ 42 char **strings; /* for yyfparse() -- text to parse */ 43 FILE *file; /* for yyfparse() -- file being read */ 44 const char *fname; /* for yyfparse() -- file name */ 45 int line; /* line counter for error messages */ 46 char buf[ 512 ]; /* for yyfparse() -- line buffer */ 47} ; 48 49static struct include *incp = 0; /* current file; head of chain */ 50 51static int scanmode = SCAN_NORMAL; 52static int anyerrors = 0; 53static char *symdump( YYSTYPE *s ); 54 55# define BIGGEST_TOKEN 10240 /* no single token can be larger */ 56 57/* 58 * Set parser mode: normal, string, or keyword 59 */ 60 61void 62yymode( int n ) 63{ 64 scanmode = n; 65} 66 67void 68yyerror( const char *s ) 69{ 70 if( incp ) 71 printf( "%s: line %d: ", incp->fname, incp->line ); 72 73 printf( "%s at %s\n", s, symdump( &yylval ) ); 74 75 ++anyerrors; 76} 77 78int 79yyanyerrors() 80{ 81 return anyerrors != 0; 82} 83 84void 85yyfparse( const char *s ) 86{ 87 struct include *i = (struct include *)malloc( sizeof( *i ) ); 88 89 /* Push this onto the incp chain. */ 90 91 i->string = ""; 92 i->strings = 0; 93 i->file = 0; 94 i->fname = copystr( s ); 95 i->line = 0; 96 i->next = incp; 97 incp = i; 98 99 /* If the filename is "+", it means use the internal jambase. */ 100 101 if( !strcmp( s, "+" ) ) 102 i->strings = jambase; 103} 104 105/* 106 * yyline() - read new line and return first character 107 * 108 * Fabricates a continuous stream of characters across include files, 109 * returning EOF at the bitter end. 110 */ 111 112int 113yyline() 114{ 115 struct include *i = incp; 116 117 if( !incp ) 118 return EOF; 119 120 /* Once we start reading from the input stream, we reset the */ 121 /* include insertion point so that the next include file becomes */ 122 /* the head of the list. */ 123 124 /* If there is more data in this line, return it. */ 125 126 if( *i->string ) 127 return *i->string++; 128 129 /* If we're reading from an internal string list, go to the */ 130 /* next string. */ 131 132 if( i->strings ) 133 { 134 if( !*i->strings ) 135 goto next; 136 137 i->line++; 138 i->string = *(i->strings++); 139 return *i->string++; 140 } 141 142 /* If necessary, open the file */ 143 144#ifdef OPT_JAMFILE_CACHE_EXT 145 if( !i->file ) 146 { 147 if ( strcmp( i->fname, "-" ) ) 148 { 149 i->strings = jcache((char*)i->fname); 150 if (!i->strings || !*i->strings) 151 goto next; 152 i->line++; 153 i->string = *(i->strings++); 154 return *i->string++; 155 } 156 else 157 { 158 i->file = stdin; 159 if( fgets( i->buf, sizeof( i->buf ), i->file ) ) 160 { 161 i->line++; 162 i->string = i->buf; 163 return *i->string++; 164 } 165 } 166 } 167#else 168 if( !i->file ) 169 { 170 FILE *f = stdin; 171 172 if( strcmp( i->fname, "-" ) && !( f = fopen( i->fname, "r" ) ) ) 173 perror( i->fname ); 174 175 i->file = f; 176 } 177 178 /* If there's another line in this file, start it. */ 179 180 if( i->file && fgets( i->buf, sizeof( i->buf ), i->file ) ) 181 { 182 i->line++; 183 i->string = i->buf; 184 return *i->string++; 185 } 186#endif 187 188 next: 189 /* This include is done. */ 190 /* Free it up and return EOF so yyparse() returns to parse_file(). */ 191 192 incp = i->next; 193 194 /* Close file, free name */ 195 196 if( i->file && i->file != stdin ) 197 fclose( i->file ); 198 freestr( i->fname ); 199 free( (char *)i ); 200 201 return EOF; 202} 203 204/* 205 * yylex() - set yylval to current token; return its type 206 * 207 * Macros to move things along: 208 * 209 * yychar() - return and advance character; invalid after EOF 210 * yyprev() - back up one character; invalid before yychar() 211 * 212 * yychar() returns a continuous stream of characters, until it hits 213 * the EOF of the current include file. 214 */ 215 216# define yychar() ( *incp->string ? *incp->string++ : yyline() ) 217# define yyprev() ( incp->string-- ) 218 219int 220yylex() 221{ 222 int c; 223 char buf[BIGGEST_TOKEN]; 224 char *b = buf; 225 226 if( !incp ) 227 goto eof; 228 229 /* Get first character (whitespace or of token) */ 230 231 c = yychar(); 232 233 if( scanmode == SCAN_STRING ) 234 { 235 /* If scanning for a string (action's {}'s), look for the */ 236 /* closing brace. We handle matching braces, if they match! */ 237 238 int nest = 1; 239 240 while( c != EOF && b < buf + sizeof( buf ) ) 241 { 242 if( c == '{' ) 243 nest++; 244 245 if( c == '}' && !--nest ) 246 break; 247 248 *b++ = c; 249 250 c = yychar(); 251 } 252 253 /* We ate the ending brace -- regurgitate it. */ 254 255 if( c != EOF ) 256 yyprev(); 257 258 /* Check obvious errors. */ 259 260 if( b == buf + sizeof( buf ) ) 261 { 262 yyerror( "action block too big" ); 263 goto eof; 264 } 265 266 if( nest ) 267 { 268 yyerror( "unmatched {} in action block" ); 269 goto eof; 270 } 271 272 *b = 0; 273 yylval.type = STRING; 274 yylval.string = newstr( buf ); 275 276 } 277 else 278 { 279 char *b = buf; 280 struct keyword *k; 281 int inquote = 0; 282 int notkeyword; 283 284 /* Eat white space */ 285 286 for( ;; ) 287 { 288 /* Skip past white space */ 289 290 while( c != EOF && isspace( c ) ) 291 c = yychar(); 292 293 /* Not a comment? Swallow up comment line. */ 294 295 if( c != '#' ) 296 break; 297 while( ( c = yychar() ) != EOF && c != '\n' ) 298 ; 299 } 300 301 /* c now points to the first character of a token. */ 302 303 if( c == EOF ) 304 goto eof; 305 306 /* While scanning the word, disqualify it for (expensive) */ 307 /* keyword lookup when we can: $anything, "anything", \anything */ 308 309 notkeyword = c == '$'; 310 311 /* look for white space to delimit word */ 312 /* "'s get stripped but preserve white space */ 313 /* \ protects next character */ 314 315 while( 316 c != EOF && 317 b < buf + sizeof( buf ) && 318 ( inquote || !isspace( c ) ) ) 319 { 320 if( c == '"' ) 321 { 322 /* begin or end " */ 323 inquote = !inquote; 324 notkeyword = 1; 325 } 326 else if( c != '\\' ) 327 { 328 /* normal char */ 329 *b++ = c; 330 } 331 else if( ( c = yychar()) != EOF ) 332 { 333 /* \c */ 334 *b++ = c; 335 notkeyword = 1; 336 } 337 else 338 { 339 /* \EOF */ 340 break; 341 } 342 343 c = yychar(); 344 } 345 346 /* Check obvious errors. */ 347 348 if( b == buf + sizeof( buf ) ) 349 { 350 yyerror( "string too big" ); 351 goto eof; 352 } 353 354 if( inquote ) 355 { 356 yyerror( "unmatched \" in string" ); 357 goto eof; 358 } 359 360 /* We looked ahead a character - back up. */ 361 362 if( c != EOF ) 363 yyprev(); 364 365 /* scan token table */ 366 /* don't scan if it's obviously not a keyword or if its */ 367 /* an alphabetic when were looking for punctuation */ 368 369 *b = 0; 370 yylval.type = ARG; 371 372 if( !notkeyword && !( isalpha( *buf ) && scanmode == SCAN_PUNCT ) ) 373 { 374 for( k = keywords; k->word; k++ ) 375 if( *buf == *k->word && !strcmp( k->word, buf ) ) 376 { 377 yylval.type = k->type; 378 yylval.string = k->word; /* used by symdump */ 379 break; 380 } 381 } 382 383 if( yylval.type == ARG ) 384 yylval.string = newstr( buf ); 385 } 386 387 if( DEBUG_SCAN ) 388 printf( "scan %s\n", symdump( &yylval ) ); 389 390 return yylval.type; 391 392eof: 393 yylval.type = EOF; 394 return yylval.type; 395} 396 397static char * 398symdump( YYSTYPE *s ) 399{ 400 static char buf[ BIGGEST_TOKEN + 20 ]; 401 402 switch( s->type ) 403 { 404 case EOF: 405 sprintf( buf, "EOF" ); 406 break; 407 case 0: 408 sprintf( buf, "unknown symbol %s", s->string ); 409 break; 410 case ARG: 411 sprintf( buf, "argument %s", s->string ); 412 break; 413 case STRING: 414 sprintf( buf, "string \"%s\"", s->string ); 415 break; 416 default: 417 sprintf( buf, "keyword %s", s->string ); 418 break; 419 } 420 return buf; 421} 422