lexer.c revision 145510
1/* $NetBSD$ */ 2 3/* 4 * Copyright (C) 2003 by Darren Reed. 5 * 6 * See the IPFILTER.LICENCE file for details on licencing. 7 */ 8#include <ctype.h> 9#include "ipf.h" 10#ifdef IPFILTER_SCAN 11# include "netinet/ip_scan.h" 12#endif 13#include <sys/ioctl.h> 14#include <syslog.h> 15#ifdef TEST_LEXER 16# define NO_YACC 17union { 18 int num; 19 char *str; 20 struct in_addr ipa; 21 i6addr_t ip6; 22} yylval; 23#endif 24#include "lexer.h" 25#include "y.tab.h" 26 27FILE *yyin; 28 29#define ishex(c) (ISDIGIT(c) || ((c) >= 'a' && (c) <= 'f') || \ 30 ((c) >= 'A' && (c) <= 'F')) 31#define TOOLONG -3 32 33extern int string_start; 34extern int string_end; 35extern char *string_val; 36extern int pos; 37extern int yydebug; 38 39char *yystr = NULL; 40int yytext[YYBUFSIZ+1]; 41int yylineNum = 1; 42int yypos = 0; 43int yylast = -1; 44int yyexpectaddr = 0; 45int yybreakondot = 0; 46int yyvarnext = 0; 47int yytokentype = 0; 48wordtab_t *yywordtab = NULL; 49int yysavedepth = 0; 50wordtab_t *yysavewords[30]; 51 52 53static wordtab_t *yyfindkey __P((char *)); 54static int yygetc __P((void)); 55static void yyunputc __P((int)); 56static int yyswallow __P((int)); 57static char *yytexttostr __P((int, int)); 58static void yystrtotext __P((char *)); 59 60static int yygetc() 61{ 62 int c; 63 64 if (yypos < yylast) { 65 c = yytext[yypos++]; 66 if (c == '\n') 67 yylineNum++; 68 return c; 69 } 70 71 if (yypos == YYBUFSIZ) 72 return TOOLONG; 73 74 if (pos >= string_start && pos <= string_end) { 75 c = string_val[pos - string_start]; 76 yypos++; 77 } else { 78 c = fgetc(yyin); 79 } 80 if (c == '\n') 81 yylineNum++; 82 yytext[yypos++] = c; 83 yylast = yypos; 84 yytext[yypos] = '\0'; 85 86 return c; 87} 88 89 90static void yyunputc(c) 91int c; 92{ 93 if (c == '\n') 94 yylineNum--; 95 yytext[--yypos] = c; 96} 97 98 99static int yyswallow(last) 100int last; 101{ 102 int c; 103 104 while (((c = yygetc()) > '\0') && (c != last)) 105 ; 106 107 if (c != EOF) 108 yyunputc(c); 109 if (c == last) 110 return 0; 111 return -1; 112} 113 114 115static void yystrtotext(str) 116char *str; 117{ 118 int len; 119 char *s; 120 121 len = strlen(str); 122 if (len > YYBUFSIZ) 123 len = YYBUFSIZ; 124 125 for (s = str; *s != '\0' && len > 0; s++, len--) 126 yytext[yylast++] = *s; 127 yytext[yylast] = '\0'; 128} 129 130 131static char *yytexttostr(offset, max) 132int offset, max; 133{ 134 char *str; 135 int i; 136 137 if ((yytext[offset] == '\'' || yytext[offset] == '"') && 138 (yytext[offset] == yytext[offset + max - 1])) { 139 offset++; 140 max--; 141 } 142 143 if (max > yylast) 144 max = yylast; 145 str = malloc(max + 1); 146 if (str != NULL) { 147 for (i = offset; i < max; i++) 148 str[i - offset] = (char)(yytext[i] & 0xff); 149 str[i - offset] = '\0'; 150 } 151 return str; 152} 153 154 155int yylex() 156{ 157 int c, n, isbuilding, rval, lnext, nokey = 0; 158 char *name; 159 160 isbuilding = 0; 161 lnext = 0; 162 rval = 0; 163 164 if (yystr != NULL) { 165 free(yystr); 166 yystr = NULL; 167 } 168 169nextchar: 170 c = yygetc(); 171 172 switch (c) 173 { 174 case '\n' : 175 case '\t' : 176 case '\r' : 177 case ' ' : 178 if (isbuilding == 1) { 179 yyunputc(c); 180 goto done; 181 } 182 if (yylast > yypos) { 183 bcopy(yytext + yypos, yytext, 184 sizeof(yytext[0]) * (yylast - yypos + 1)); 185 } 186 yylast -= yypos; 187 yypos = 0; 188 lnext = 0; 189 nokey = 0; 190 goto nextchar; 191 192 case '\\' : 193 if (lnext == 0) { 194 lnext = 1; 195 if (yylast == yypos) { 196 yylast--; 197 yypos--; 198 } else 199 yypos--; 200 if (yypos == 0) 201 nokey = 1; 202 goto nextchar; 203 } 204 break; 205 } 206 207 if (lnext == 1) { 208 lnext = 0; 209 if ((isbuilding == 0) && !ISALNUM(c)) { 210 return c; 211 } 212 goto nextchar; 213 } 214 215 switch (c) 216 { 217 case '#' : 218 if (isbuilding == 1) { 219 yyunputc(c); 220 goto done; 221 } 222 yyswallow('\n'); 223 rval = YY_COMMENT; 224 goto nextchar; 225 226 case '$' : 227 if (isbuilding == 1) { 228 yyunputc(c); 229 goto done; 230 } 231 n = yygetc(); 232 if (n == '{') { 233 if (yyswallow('}') == -1) { 234 rval = -2; 235 goto done; 236 } 237 (void) yygetc(); 238 } else { 239 if (!ISALPHA(n)) { 240 yyunputc(n); 241 break; 242 } 243 do { 244 n = yygetc(); 245 } while (ISALPHA(n) || ISDIGIT(n) || n == '_'); 246 yyunputc(n); 247 } 248 249 name = yytexttostr(1, yypos); /* skip $ */ 250 251 if (name != NULL) { 252 string_val = get_variable(name, NULL, yylineNum); 253 free(name); 254 if (string_val != NULL) { 255 name = yytexttostr(yypos, yylast); 256 if (name != NULL) { 257 yypos = 0; 258 yylast = 0; 259 yystrtotext(string_val); 260 yystrtotext(name); 261 free(string_val); 262 free(name); 263 goto nextchar; 264 } 265 free(string_val); 266 } 267 } 268 break; 269 270 case '\'': 271 case '"' : 272 if (isbuilding == 1) { 273 goto done; 274 } 275 do { 276 n = yygetc(); 277 if (n == EOF || n == TOOLONG) { 278 rval = -2; 279 goto done; 280 } 281 if (n == '\n') { 282 yyunputc(' '); 283 yypos++; 284 } 285 } while (n != c); 286 yyunputc(n); 287 break; 288 289 case EOF : 290 yylineNum = 1; 291 yypos = 0; 292 yylast = -1; 293 yyexpectaddr = 0; 294 yybreakondot = 0; 295 yyvarnext = 0; 296 yytokentype = 0; 297 return 0; 298 } 299 300 if (strchr("=,/;{}()@", c) != NULL) { 301 if (isbuilding == 1) { 302 yyunputc(c); 303 goto done; 304 } 305 rval = c; 306 goto done; 307 } else if (c == '.') { 308 if (isbuilding == 0) { 309 rval = c; 310 goto done; 311 } 312 if (yybreakondot != 0) { 313 yyunputc(c); 314 goto done; 315 } 316 } 317 318 switch (c) 319 { 320 case '-' : 321 if (yyexpectaddr) 322 break; 323 if (isbuilding == 1) 324 break; 325 n = yygetc(); 326 if (n == '>') { 327 isbuilding = 1; 328 goto done; 329 } 330 yyunputc(n); 331 rval = '-'; 332 goto done; 333 334 case '!' : 335 if (isbuilding == 1) { 336 yyunputc(c); 337 goto done; 338 } 339 n = yygetc(); 340 if (n == '=') { 341 rval = YY_CMP_NE; 342 goto done; 343 } 344 yyunputc(n); 345 rval = '!'; 346 goto done; 347 348 case '<' : 349 if (yyexpectaddr) 350 break; 351 if (isbuilding == 1) { 352 yyunputc(c); 353 goto done; 354 } 355 n = yygetc(); 356 if (n == '=') { 357 rval = YY_CMP_LE; 358 goto done; 359 } 360 if (n == '>') { 361 rval = YY_RANGE_OUT; 362 goto done; 363 } 364 yyunputc(n); 365 rval = YY_CMP_LT; 366 goto done; 367 368 case '>' : 369 if (yyexpectaddr) 370 break; 371 if (isbuilding == 1) { 372 yyunputc(c); 373 goto done; 374 } 375 n = yygetc(); 376 if (n == '=') { 377 rval = YY_CMP_GE; 378 goto done; 379 } 380 if (n == '<') { 381 rval = YY_RANGE_IN; 382 goto done; 383 } 384 yyunputc(n); 385 rval = YY_CMP_GT; 386 goto done; 387 } 388 389 /* 390 * Now for the reason this is here...IPv6 address parsing. 391 * The longest string we can expect is of this form: 392 * 0000:0000:0000:0000:0000:0000:000.000.000.000 393 * not: 394 * 0000:0000:0000:0000:0000:0000:0000:0000 395 */ 396#ifdef USE_INET6 397 if (yyexpectaddr == 1 && isbuilding == 0 && (ishex(c) || c == ':')) { 398 char ipv6buf[45 + 1], *s, oc; 399 int start; 400 401 start = yypos; 402 s = ipv6buf; 403 oc = c; 404 405 /* 406 * Perhaps we should implement stricter controls on what we 407 * swallow up here, but surely it would just be duplicating 408 * the code in inet_pton() anyway. 409 */ 410 do { 411 *s++ = c; 412 c = yygetc(); 413 } while ((ishex(c) || c == ':' || c == '.') && 414 (s - ipv6buf < 46)); 415 yyunputc(c); 416 *s = '\0'; 417 418 if (inet_pton(AF_INET6, ipv6buf, &yylval.ip6) == 1) { 419 rval = YY_IPV6; 420 yyexpectaddr = 0; 421 goto done; 422 } 423 yypos = start; 424 c = oc; 425 } 426#endif 427 428 if (c == ':') { 429 if (isbuilding == 1) { 430 yyunputc(c); 431 goto done; 432 } 433 rval = ':'; 434 goto done; 435 } 436 437 if (isbuilding == 0 && c == '0') { 438 n = yygetc(); 439 if (n == 'x') { 440 do { 441 n = yygetc(); 442 } while (ishex(n)); 443 yyunputc(n); 444 rval = YY_HEX; 445 goto done; 446 } 447 yyunputc(n); 448 } 449 450 /* 451 * No negative numbers with leading - sign.. 452 */ 453 if (isbuilding == 0 && ISDIGIT(c)) { 454 do { 455 n = yygetc(); 456 } while (ISDIGIT(n)); 457 yyunputc(n); 458 rval = YY_NUMBER; 459 goto done; 460 } 461 462 isbuilding = 1; 463 goto nextchar; 464 465done: 466 yystr = yytexttostr(0, yypos); 467 468 if (isbuilding == 1) { 469 wordtab_t *w; 470 471 w = NULL; 472 isbuilding = 0; 473 474 if ((yyvarnext == 0) && (nokey == 0)) { 475 w = yyfindkey(yystr); 476 if (w == NULL && yywordtab != NULL) { 477 yyresetdict(); 478 w = yyfindkey(yystr); 479 } 480 } else 481 yyvarnext = 0; 482 if (w != NULL) 483 rval = w->w_value; 484 else 485 rval = YY_STR; 486 } 487 488 if (rval == YY_STR && yysavedepth > 0) 489 yyresetdict(); 490 491 yytokentype = rval; 492 493 if (yydebug) 494 printf("lexed(%s) [%d,%d,%d] => %d\n", yystr, string_start, 495 string_end, pos, rval); 496 497 switch (rval) 498 { 499 case YY_NUMBER : 500 sscanf(yystr, "%u", &yylval.num); 501 break; 502 503 case YY_HEX : 504 sscanf(yystr, "0x%x", (u_int *)&yylval.num); 505 break; 506 507 case YY_STR : 508 yylval.str = strdup(yystr); 509 break; 510 511 default : 512 break; 513 } 514 515 if (yylast > 0) { 516 bcopy(yytext + yypos, yytext, 517 sizeof(yytext[0]) * (yylast - yypos + 1)); 518 yylast -= yypos; 519 yypos = 0; 520 } 521 522 return rval; 523} 524 525 526static wordtab_t *yyfindkey(key) 527char *key; 528{ 529 wordtab_t *w; 530 531 if (yywordtab == NULL) 532 return NULL; 533 534 for (w = yywordtab; w->w_word != 0; w++) 535 if (strcasecmp(key, w->w_word) == 0) 536 return w; 537 return NULL; 538} 539 540 541char *yykeytostr(num) 542int num; 543{ 544 wordtab_t *w; 545 546 if (yywordtab == NULL) 547 return "<unknown>"; 548 549 for (w = yywordtab; w->w_word; w++) 550 if (w->w_value == num) 551 return w->w_word; 552 return "<unknown>"; 553} 554 555 556wordtab_t *yysettab(words) 557wordtab_t *words; 558{ 559 wordtab_t *save; 560 561 save = yywordtab; 562 yywordtab = words; 563 return save; 564} 565 566 567void yyerror(msg) 568char *msg; 569{ 570 char *txt, letter[2]; 571 int freetxt = 0; 572 573 if (yytokentype < 256) { 574 letter[0] = yytokentype; 575 letter[1] = '\0'; 576 txt = letter; 577 } else if (yytokentype == YY_STR || yytokentype == YY_HEX || 578 yytokentype == YY_NUMBER) { 579 if (yystr == NULL) { 580 txt = yytexttostr(yypos, YYBUFSIZ); 581 freetxt = 1; 582 } else 583 txt = yystr; 584 } else { 585 txt = yykeytostr(yytokentype); 586 } 587 fprintf(stderr, "%s error at \"%s\", line %d\n", msg, txt, yylineNum); 588 if (freetxt == 1) 589 free(txt); 590 exit(1); 591} 592 593 594void yysetdict(newdict) 595wordtab_t *newdict; 596{ 597 if (yysavedepth == sizeof(yysavewords)/sizeof(yysavewords[0])) { 598 fprintf(stderr, "%d: at maximum dictionary depth\n", 599 yylineNum); 600 return; 601 } 602 603 yysavewords[yysavedepth++] = yysettab(newdict); 604 if (yydebug) 605 printf("yysavedepth++ => %d\n", yysavedepth); 606} 607 608void yyresetdict() 609{ 610 if (yysavedepth > 0) { 611 yysettab(yysavewords[--yysavedepth]); 612 if (yydebug) 613 printf("yysavedepth-- => %d\n", yysavedepth); 614 } 615} 616 617 618 619#ifdef TEST_LEXER 620int main(argc, argv) 621int argc; 622char *argv[]; 623{ 624 int n; 625 626 yyin = stdin; 627 628 while ((n = yylex()) != 0) 629 printf("%d.n = %d [%s] %d %d\n", 630 yylineNum, n, yystr, yypos, yylast); 631} 632#endif 633