lexer.c revision 170268
1/* $FreeBSD: head/contrib/ipfilter/tools/lexer.c 170268 2007-06-04 02:54:36Z darrenr $ */ 2 3/* 4 * Copyright (C) 2002-2006 by Darren Reed. 5 * 6 * See the IPFILTER.LICENCE file for details on licencing. 7 */ 8#include <ctype.h> 9#include "ipf.h" 10#ifdef IPFILTER_SCAN 11# include "netinet/ip_scan.h" 12#endif 13#include <sys/ioctl.h> 14#include <syslog.h> 15#ifdef TEST_LEXER 16# define NO_YACC 17union { 18 int num; 19 char *str; 20 struct in_addr ipa; 21 i6addr_t ip6; 22} yylval; 23#endif 24#include "lexer.h" 25#include "y.tab.h" 26 27FILE *yyin; 28 29#define ishex(c) (ISDIGIT(c) || ((c) >= 'a' && (c) <= 'f') || \ 30 ((c) >= 'A' && (c) <= 'F')) 31#define TOOLONG -3 32 33extern int string_start; 34extern int string_end; 35extern char *string_val; 36extern int pos; 37extern int yydebug; 38 39char *yystr = NULL; 40int yytext[YYBUFSIZ+1]; 41int yylineNum = 1; 42int yypos = 0; 43int yylast = -1; 44int yyexpectaddr = 0; 45int yybreakondot = 0; 46int yyvarnext = 0; 47int yytokentype = 0; 48wordtab_t *yywordtab = NULL; 49int yysavedepth = 0; 50wordtab_t *yysavewords[30]; 51 52 53static wordtab_t *yyfindkey __P((char *)); 54static int yygetc __P((void)); 55static void yyunputc __P((int)); 56static int yyswallow __P((int)); 57static char *yytexttostr __P((int, int)); 58static void yystrtotext __P((char *)); 59 60static int yygetc() 61{ 62 int c; 63 64 if (yypos < yylast) { 65 c = yytext[yypos++]; 66 if (c == '\n') 67 yylineNum++; 68 return c; 69 } 70 71 if (yypos == YYBUFSIZ) 72 return TOOLONG; 73 74 if (pos >= string_start && pos <= string_end) { 75 c = string_val[pos - string_start]; 76 yypos++; 77 } else { 78 c = fgetc(yyin); 79 } 80 if (c == '\n') 81 yylineNum++; 82 yytext[yypos++] = c; 83 yylast = yypos; 84 yytext[yypos] = '\0'; 85 86 return c; 87} 88 89 90static void yyunputc(c) 91int c; 92{ 93 if (c == '\n') 94 yylineNum--; 95 yytext[--yypos] = c; 96} 97 98 99static int yyswallow(last) 100int last; 101{ 102 int c; 103 104 while (((c = yygetc()) > '\0') && (c != last)) 105 ; 106 107 if (c != EOF) 108 yyunputc(c); 109 if (c == last) 110 return 0; 111 return -1; 112} 113 114 115static void yystrtotext(str) 116char *str; 117{ 118 int len; 119 char *s; 120 121 len = strlen(str); 122 if (len > YYBUFSIZ) 123 len = YYBUFSIZ; 124 125 for (s = str; *s != '\0' && len > 0; s++, len--) 126 yytext[yylast++] = *s; 127 yytext[yylast] = '\0'; 128} 129 130 131static char *yytexttostr(offset, max) 132int offset, max; 133{ 134 char *str; 135 int i; 136 137 if ((yytext[offset] == '\'' || yytext[offset] == '"') && 138 (yytext[offset] == yytext[offset + max - 1])) { 139 offset++; 140 max--; 141 } 142 143 if (max > yylast) 144 max = yylast; 145 str = malloc(max + 1); 146 if (str != NULL) { 147 for (i = offset; i < max; i++) 148 str[i - offset] = (char)(yytext[i] & 0xff); 149 str[i - offset] = '\0'; 150 } 151 return str; 152} 153 154 155int yylex() 156{ 157 int c, n, isbuilding, rval, lnext, nokey = 0; 158 char *name; 159 160 isbuilding = 0; 161 lnext = 0; 162 rval = 0; 163 164 if (yystr != NULL) { 165 free(yystr); 166 yystr = NULL; 167 } 168 169nextchar: 170 c = yygetc(); 171 172 switch (c) 173 { 174 case '\n' : 175 lnext = 0; 176 nokey = 0; 177 case '\t' : 178 case '\r' : 179 case ' ' : 180 if (isbuilding == 1) { 181 yyunputc(c); 182 goto done; 183 } 184 if (yylast > yypos) { 185 bcopy(yytext + yypos, yytext, 186 sizeof(yytext[0]) * (yylast - yypos + 1)); 187 } 188 yylast -= yypos; 189 yypos = 0; 190 lnext = 0; 191 nokey = 0; 192 goto nextchar; 193 194 case '\\' : 195 if (lnext == 0) { 196 lnext = 1; 197 if (yylast == yypos) { 198 yylast--; 199 yypos--; 200 } else 201 yypos--; 202 if (yypos == 0) 203 nokey = 1; 204 goto nextchar; 205 } 206 break; 207 } 208 209 if (lnext == 1) { 210 lnext = 0; 211 if ((isbuilding == 0) && !ISALNUM(c)) { 212 return c; 213 } 214 goto nextchar; 215 } 216 217 switch (c) 218 { 219 case '#' : 220 if (isbuilding == 1) { 221 yyunputc(c); 222 goto done; 223 } 224 yyswallow('\n'); 225 rval = YY_COMMENT; 226 goto nextchar; 227 228 case '$' : 229 if (isbuilding == 1) { 230 yyunputc(c); 231 goto done; 232 } 233 n = yygetc(); 234 if (n == '{') { 235 if (yyswallow('}') == -1) { 236 rval = -2; 237 goto done; 238 } 239 (void) yygetc(); 240 } else { 241 if (!ISALPHA(n)) { 242 yyunputc(n); 243 break; 244 } 245 do { 246 n = yygetc(); 247 } while (ISALPHA(n) || ISDIGIT(n) || n == '_'); 248 yyunputc(n); 249 } 250 251 name = yytexttostr(1, yypos); /* skip $ */ 252 253 if (name != NULL) { 254 string_val = get_variable(name, NULL, yylineNum); 255 free(name); 256 if (string_val != NULL) { 257 name = yytexttostr(yypos, yylast); 258 if (name != NULL) { 259 yypos = 0; 260 yylast = 0; 261 yystrtotext(string_val); 262 yystrtotext(name); 263 free(string_val); 264 free(name); 265 goto nextchar; 266 } 267 free(string_val); 268 } 269 } 270 break; 271 272 case '\'': 273 case '"' : 274 if (isbuilding == 1) { 275 goto done; 276 } 277 do { 278 n = yygetc(); 279 if (n == EOF || n == TOOLONG) { 280 rval = -2; 281 goto done; 282 } 283 if (n == '\n') { 284 yyunputc(' '); 285 yypos++; 286 } 287 } while (n != c); 288 rval = YY_STR; 289 goto done; 290 /* NOTREACHED */ 291 292 case EOF : 293 yylineNum = 1; 294 yypos = 0; 295 yylast = -1; 296 yyexpectaddr = 0; 297 yybreakondot = 0; 298 yyvarnext = 0; 299 yytokentype = 0; 300 return 0; 301 } 302 303 if (strchr("=,/;{}()@", c) != NULL) { 304 if (isbuilding == 1) { 305 yyunputc(c); 306 goto done; 307 } 308 rval = c; 309 goto done; 310 } else if (c == '.') { 311 if (isbuilding == 0) { 312 rval = c; 313 goto done; 314 } 315 if (yybreakondot != 0) { 316 yyunputc(c); 317 goto done; 318 } 319 } 320 321 switch (c) 322 { 323 case '-' : 324 if (yyexpectaddr) 325 break; 326 if (isbuilding == 1) 327 break; 328 n = yygetc(); 329 if (n == '>') { 330 isbuilding = 1; 331 goto done; 332 } 333 yyunputc(n); 334 rval = '-'; 335 goto done; 336 337 case '!' : 338 if (isbuilding == 1) { 339 yyunputc(c); 340 goto done; 341 } 342 n = yygetc(); 343 if (n == '=') { 344 rval = YY_CMP_NE; 345 goto done; 346 } 347 yyunputc(n); 348 rval = '!'; 349 goto done; 350 351 case '<' : 352 if (yyexpectaddr) 353 break; 354 if (isbuilding == 1) { 355 yyunputc(c); 356 goto done; 357 } 358 n = yygetc(); 359 if (n == '=') { 360 rval = YY_CMP_LE; 361 goto done; 362 } 363 if (n == '>') { 364 rval = YY_RANGE_OUT; 365 goto done; 366 } 367 yyunputc(n); 368 rval = YY_CMP_LT; 369 goto done; 370 371 case '>' : 372 if (yyexpectaddr) 373 break; 374 if (isbuilding == 1) { 375 yyunputc(c); 376 goto done; 377 } 378 n = yygetc(); 379 if (n == '=') { 380 rval = YY_CMP_GE; 381 goto done; 382 } 383 if (n == '<') { 384 rval = YY_RANGE_IN; 385 goto done; 386 } 387 yyunputc(n); 388 rval = YY_CMP_GT; 389 goto done; 390 } 391 392 /* 393 * Now for the reason this is here...IPv6 address parsing. 394 * The longest string we can expect is of this form: 395 * 0000:0000:0000:0000:0000:0000:000.000.000.000 396 * not: 397 * 0000:0000:0000:0000:0000:0000:0000:0000 398 */ 399#ifdef USE_INET6 400 if (yyexpectaddr == 1 && isbuilding == 0 && (ishex(c) || c == ':')) { 401 char ipv6buf[45 + 1], *s, oc; 402 int start; 403 404 start = yypos; 405 s = ipv6buf; 406 oc = c; 407 408 /* 409 * Perhaps we should implement stricter controls on what we 410 * swallow up here, but surely it would just be duplicating 411 * the code in inet_pton() anyway. 412 */ 413 do { 414 *s++ = c; 415 c = yygetc(); 416 } while ((ishex(c) || c == ':' || c == '.') && 417 (s - ipv6buf < 46)); 418 yyunputc(c); 419 *s = '\0'; 420 421 if (inet_pton(AF_INET6, ipv6buf, &yylval.ip6) == 1) { 422 rval = YY_IPV6; 423 yyexpectaddr = 0; 424 goto done; 425 } 426 yypos = start; 427 c = oc; 428 } 429#endif 430 431 if (c == ':') { 432 if (isbuilding == 1) { 433 yyunputc(c); 434 goto done; 435 } 436 rval = ':'; 437 goto done; 438 } 439 440 if (isbuilding == 0 && c == '0') { 441 n = yygetc(); 442 if (n == 'x') { 443 do { 444 n = yygetc(); 445 } while (ishex(n)); 446 yyunputc(n); 447 rval = YY_HEX; 448 goto done; 449 } 450 yyunputc(n); 451 } 452 453 /* 454 * No negative numbers with leading - sign.. 455 */ 456 if (isbuilding == 0 && ISDIGIT(c)) { 457 do { 458 n = yygetc(); 459 } while (ISDIGIT(n)); 460 yyunputc(n); 461 rval = YY_NUMBER; 462 goto done; 463 } 464 465 isbuilding = 1; 466 goto nextchar; 467 468done: 469 yystr = yytexttostr(0, yypos); 470 471 if (yydebug) 472 printf("isbuilding %d yyvarnext %d nokey %d\n", 473 isbuilding, yyvarnext, nokey); 474 if (isbuilding == 1) { 475 wordtab_t *w; 476 477 w = NULL; 478 isbuilding = 0; 479 480 if ((yyvarnext == 0) && (nokey == 0)) { 481 w = yyfindkey(yystr); 482 if (w == NULL && yywordtab != NULL) { 483 yyresetdict(); 484 w = yyfindkey(yystr); 485 } 486 } else 487 yyvarnext = 0; 488 if (w != NULL) 489 rval = w->w_value; 490 else 491 rval = YY_STR; 492 } 493 494 if (rval == YY_STR && yysavedepth > 0) 495 yyresetdict(); 496 497 yytokentype = rval; 498 499 if (yydebug) 500 printf("lexed(%s) [%d,%d,%d] => %d @%d\n", yystr, string_start, 501 string_end, pos, rval, yysavedepth); 502 503 switch (rval) 504 { 505 case YY_NUMBER : 506 sscanf(yystr, "%u", &yylval.num); 507 break; 508 509 case YY_HEX : 510 sscanf(yystr, "0x%x", (u_int *)&yylval.num); 511 break; 512 513 case YY_STR : 514 yylval.str = strdup(yystr); 515 break; 516 517 default : 518 break; 519 } 520 521 if (yylast > 0) { 522 bcopy(yytext + yypos, yytext, 523 sizeof(yytext[0]) * (yylast - yypos + 1)); 524 yylast -= yypos; 525 yypos = 0; 526 } 527 528 return rval; 529} 530 531 532static wordtab_t *yyfindkey(key) 533char *key; 534{ 535 wordtab_t *w; 536 537 if (yywordtab == NULL) 538 return NULL; 539 540 for (w = yywordtab; w->w_word != 0; w++) 541 if (strcasecmp(key, w->w_word) == 0) 542 return w; 543 return NULL; 544} 545 546 547char *yykeytostr(num) 548int num; 549{ 550 wordtab_t *w; 551 552 if (yywordtab == NULL) 553 return "<unknown>"; 554 555 for (w = yywordtab; w->w_word; w++) 556 if (w->w_value == num) 557 return w->w_word; 558 return "<unknown>"; 559} 560 561 562wordtab_t *yysettab(words) 563wordtab_t *words; 564{ 565 wordtab_t *save; 566 567 save = yywordtab; 568 yywordtab = words; 569 return save; 570} 571 572 573void yyerror(msg) 574char *msg; 575{ 576 char *txt, letter[2]; 577 int freetxt = 0; 578 579 if (yytokentype < 256) { 580 letter[0] = yytokentype; 581 letter[1] = '\0'; 582 txt = letter; 583 } else if (yytokentype == YY_STR || yytokentype == YY_HEX || 584 yytokentype == YY_NUMBER) { 585 if (yystr == NULL) { 586 txt = yytexttostr(yypos, YYBUFSIZ); 587 freetxt = 1; 588 } else 589 txt = yystr; 590 } else { 591 txt = yykeytostr(yytokentype); 592 } 593 fprintf(stderr, "%s error at \"%s\", line %d\n", msg, txt, yylineNum); 594 if (freetxt == 1) 595 free(txt); 596 exit(1); 597} 598 599 600void yysetdict(newdict) 601wordtab_t *newdict; 602{ 603 if (yysavedepth == sizeof(yysavewords)/sizeof(yysavewords[0])) { 604 fprintf(stderr, "%d: at maximum dictionary depth\n", 605 yylineNum); 606 return; 607 } 608 609 yysavewords[yysavedepth++] = yysettab(newdict); 610 if (yydebug) 611 printf("yysavedepth++ => %d\n", yysavedepth); 612} 613 614void yyresetdict() 615{ 616 if (yydebug) 617 printf("yyresetdict(%d)\n", yysavedepth); 618 if (yysavedepth > 0) { 619 yysettab(yysavewords[--yysavedepth]); 620 if (yydebug) 621 printf("yysavedepth-- => %d\n", yysavedepth); 622 } 623} 624 625 626 627#ifdef TEST_LEXER 628int main(argc, argv) 629int argc; 630char *argv[]; 631{ 632 int n; 633 634 yyin = stdin; 635 636 while ((n = yylex()) != 0) 637 printf("%d.n = %d [%s] %d %d\n", 638 yylineNum, n, yystr, yypos, yylast); 639} 640#endif 641