1/**************************************************************** 2Copyright (C) Lucent Technologies 1997 3All Rights Reserved 4 5Permission to use, copy, modify, and distribute this software and 6its documentation for any purpose and without fee is hereby 7granted, provided that the above copyright notice appear in all 8copies and that both that the copyright notice and this 9permission notice and warranty disclaimer appear in supporting 10documentation, and that the name Lucent Technologies or any of 11its entities not be used in advertising or publicity pertaining 12to distribution of the software without specific, written prior 13permission. 14 15LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, 16INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. 17IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY 18SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 19WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER 20IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, 21ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF 22THIS SOFTWARE. 23****************************************************************/ 24 25#if HAVE_NBTOOL_CONFIG_H 26#include "nbtool_config.h" 27#endif 28 29#include <stdio.h> 30#include <stdlib.h> 31#include <string.h> 32#include <ctype.h> 33#include "awk.h" 34#include "awkgram.h" 35 36extern YYSTYPE yylval; 37extern int infunc; 38 39int lineno = 1; 40int bracecnt = 0; 41int brackcnt = 0; 42int parencnt = 0; 43 44typedef struct Keyword { 45 const char *word; 46 int sub; 47 int type; 48} Keyword; 49 50int peek(void); 51int gettok(char **, int *); 52int binsearch(const char *, const Keyword *, int); 53 54const Keyword keywords[] ={ /* keep sorted: binary searched */ 55 { "BEGIN", XBEGIN, XBEGIN }, 56 { "END", XEND, XEND }, 57 { "NF", VARNF, VARNF }, 58 { "atan2", FATAN, BLTIN }, 59 { "break", BREAK, BREAK }, 60 { "close", CLOSE, CLOSE }, 61 { "continue", CONTINUE, CONTINUE }, 62 { "cos", FCOS, BLTIN }, 63 { "delete", DELETE, DELETE }, 64 { "do", DO, DO }, 65 { "else", ELSE, ELSE }, 66 { "exit", EXIT, EXIT }, 67 { "exp", FEXP, BLTIN }, 68 { "fflush", FFLUSH, BLTIN }, 69 { "for", FOR, FOR }, 70 { "func", FUNC, FUNC }, 71 { "function", FUNC, FUNC }, 72 { "gensub", GENSUB, GENSUB }, 73 { "getline", GETLINE, GETLINE }, 74 { "gsub", GSUB, GSUB }, 75 { "if", IF, IF }, 76 { "in", IN, IN }, 77 { "index", INDEX, INDEX }, 78 { "int", FINT, BLTIN }, 79 { "length", FLENGTH, BLTIN }, 80 { "log", FLOG, BLTIN }, 81 { "match", MATCHFCN, MATCHFCN }, 82 { "next", NEXT, NEXT }, 83 { "nextfile", NEXTFILE, NEXTFILE }, 84 { "print", PRINT, PRINT }, 85 { "printf", PRINTF, PRINTF }, 86 { "rand", FRAND, BLTIN }, 87 { "return", RETURN, RETURN }, 88 { "sin", FSIN, BLTIN }, 89 { "split", SPLIT, SPLIT }, 90 { "sprintf", SPRINTF, SPRINTF }, 91 { "sqrt", FSQRT, BLTIN }, 92 { "srand", FSRAND, BLTIN }, 93 { "strftime", FSTRFTIME, BLTIN }, 94 { "sub", SUB, SUB }, 95 { "substr", SUBSTR, SUBSTR }, 96 { "system", FSYSTEM, BLTIN }, 97 { "systime", FSYSTIME, BLTIN }, 98 { "tolower", FTOLOWER, BLTIN }, 99 { "toupper", FTOUPPER, BLTIN }, 100 { "while", WHILE, WHILE }, 101}; 102 103#define RET(x) { if(dbg)printf("lex %s\n", tokname(x)); return(x); } 104 105int peek(void) 106{ 107 int c = input(); 108 unput(c); 109 return c; 110} 111 112int gettok(char **pbuf, int *psz) /* get next input token */ 113{ 114 int c, retc; 115 uschar *buf = (uschar *) *pbuf; 116 int sz = *psz; 117 uschar *bp = buf; 118 119 c = input(); 120 if (c == 0) 121 return 0; 122 buf[0] = c; 123 buf[1] = 0; 124 if (!isalnum(c) && c != '.' && c != '_') 125 return c; 126 127 *bp++ = c; 128 if (isalpha(c) || c == '_') { /* it's a varname */ 129 for ( ; (c = input()) != 0; ) { 130 if (bp-buf >= sz) 131 if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok")) 132 FATAL( "out of space for name %.10s...", buf ); 133 if (isalnum(c) || c == '_') 134 *bp++ = c; 135 else { 136 *bp = 0; 137 unput(c); 138 break; 139 } 140 } 141 *bp = 0; 142 retc = 'a'; /* alphanumeric */ 143 } else { /* maybe it's a number, but could be . */ 144 char *rem; 145 /* read input until can't be a number */ 146 for ( ; (c = input()) != 0; ) { 147 if (bp-buf >= sz) 148 if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok")) 149 FATAL( "out of space for number %.10s...", buf ); 150 if (isdigit(c) || c == 'e' || c == 'E' 151 || c == '.' || c == '+' || c == '-') 152 *bp++ = c; 153 else { 154 unput(c); 155 break; 156 } 157 } 158 *bp = 0; 159 strtod(buf, &rem); /* parse the number */ 160 if (rem == (char *)buf) { /* it wasn't a valid number at all */ 161 buf[1] = 0; /* return one character as token */ 162 retc = buf[0]; /* character is its own type */ 163 unputstr(rem+1); /* put rest back for later */ 164 } else { /* some prefix was a number */ 165 unputstr(rem); /* put rest back for later */ 166 rem[0] = 0; /* truncate buf after number part */ 167 retc = '0'; /* type is number */ 168 } 169 } 170 *pbuf = buf; 171 *psz = sz; 172 return retc; 173} 174 175int word(char *); 176int string(void); 177int regexpr(void); 178int sc = 0; /* 1 => return a } right now */ 179int reg = 0; /* 1 => return a REGEXPR now */ 180 181int yylex(void) 182{ 183 int c; 184 static char *buf = 0; 185 static int bufsize = 5; /* BUG: setting this small causes core dump! */ 186 187 if (buf == 0 && (buf = malloc(bufsize)) == NULL) 188 FATAL( "out of space in yylex" ); 189 if (sc) { 190 sc = 0; 191 RET('}'); 192 } 193 if (reg) { 194 reg = 0; 195 return regexpr(); 196 } 197 for (;;) { 198 c = gettok(&buf, &bufsize); 199 if (c == 0) 200 return 0; 201 if (isalpha(c) || c == '_') 202 return word(buf); 203 if (isdigit(c)) { 204 yylval.cp = setsymtab(buf, tostring(buf), atof(buf), CON|NUM, symtab); 205 /* should this also have STR set? */ 206 RET(NUMBER); 207 } 208 209 yylval.i = c; 210 switch (c) { 211 case '\n': /* {EOL} */ 212 RET(NL); 213 case '\r': /* assume \n is coming */ 214 case ' ': /* {WS}+ */ 215 case '\t': 216 break; 217 case '#': /* #.* strip comments */ 218 while ((c = input()) != '\n' && c != 0) 219 ; 220 unput(c); 221 break; 222 case ';': 223 RET(';'); 224 case '\\': 225 if (peek() == '\n') { 226 input(); 227 } else if (peek() == '\r') { 228 input(); input(); /* \n */ 229 lineno++; 230 } else { 231 RET(c); 232 } 233 break; 234 case '&': 235 if (peek() == '&') { 236 input(); RET(AND); 237 } else 238 RET('&'); 239 case '|': 240 if (peek() == '|') { 241 input(); RET(BOR); 242 } else 243 RET('|'); 244 case '!': 245 if (peek() == '=') { 246 input(); yylval.i = NE; RET(NE); 247 } else if (peek() == '~') { 248 input(); yylval.i = NOTMATCH; RET(MATCHOP); 249 } else 250 RET(NOT); 251 case '~': 252 yylval.i = MATCH; 253 RET(MATCHOP); 254 case '<': 255 if (peek() == '=') { 256 input(); yylval.i = LE; RET(LE); 257 } else { 258 yylval.i = LT; RET(LT); 259 } 260 case '=': 261 if (peek() == '=') { 262 input(); yylval.i = EQ; RET(EQ); 263 } else { 264 yylval.i = ASSIGN; RET(ASGNOP); 265 } 266 case '>': 267 if (peek() == '=') { 268 input(); yylval.i = GE; RET(GE); 269 } else if (peek() == '>') { 270 input(); yylval.i = APPEND; RET(APPEND); 271 } else { 272 yylval.i = GT; RET(GT); 273 } 274 case '+': 275 if (peek() == '+') { 276 input(); yylval.i = INCR; RET(INCR); 277 } else if (peek() == '=') { 278 input(); yylval.i = ADDEQ; RET(ASGNOP); 279 } else 280 RET('+'); 281 case '-': 282 if (peek() == '-') { 283 input(); yylval.i = DECR; RET(DECR); 284 } else if (peek() == '=') { 285 input(); yylval.i = SUBEQ; RET(ASGNOP); 286 } else 287 RET('-'); 288 case '*': 289 if (peek() == '=') { /* *= */ 290 input(); yylval.i = MULTEQ; RET(ASGNOP); 291 } else if (peek() == '*') { /* ** or **= */ 292 input(); /* eat 2nd * */ 293 if (peek() == '=') { 294 input(); yylval.i = POWEQ; RET(ASGNOP); 295 } else { 296 RET(POWER); 297 } 298 } else 299 RET('*'); 300 case '/': 301 RET('/'); 302 case '%': 303 if (peek() == '=') { 304 input(); yylval.i = MODEQ; RET(ASGNOP); 305 } else 306 RET('%'); 307 case '^': 308 if (peek() == '=') { 309 input(); yylval.i = POWEQ; RET(ASGNOP); 310 } else 311 RET(POWER); 312 313 case '$': 314 /* BUG: awkward, if not wrong */ 315 c = gettok(&buf, &bufsize); 316 if (isalpha(c)) { 317 if (strcmp(buf, "NF") == 0) { /* very special */ 318 unputstr("(NF)"); 319 RET(INDIRECT); 320 } 321 c = peek(); 322 if (c == '(' || c == '[' || (infunc && isarg(buf) >= 0)) { 323 unputstr(buf); 324 RET(INDIRECT); 325 } 326 yylval.cp = setsymtab(buf, "", 0.0, STR|NUM, symtab); 327 RET(IVAR); 328 } else if (c == 0) { /* */ 329 SYNTAX( "unexpected end of input after $" ); 330 RET(';'); 331 } else { 332 unputstr(buf); 333 RET(INDIRECT); 334 } 335 336 case '}': 337 if (--bracecnt < 0) 338 SYNTAX( "extra }" ); 339 sc = 1; 340 RET(';'); 341 case ']': 342 if (--brackcnt < 0) 343 SYNTAX( "extra ]" ); 344 RET(']'); 345 case ')': 346 if (--parencnt < 0) 347 SYNTAX( "extra )" ); 348 RET(')'); 349 case '{': 350 bracecnt++; 351 RET('{'); 352 case '[': 353 brackcnt++; 354 RET('['); 355 case '(': 356 parencnt++; 357 RET('('); 358 359 case '"': 360 return string(); /* BUG: should be like tran.c ? */ 361 362 default: 363 RET(c); 364 } 365 } 366} 367 368int string(void) 369{ 370 int c, n; 371 uschar *s, *bp; 372 static uschar *buf = 0; 373 static int bufsz = 500; 374 375 if (buf == 0 && (buf = malloc(bufsz)) == NULL) 376 FATAL("out of space for strings"); 377 for (bp = buf; (c = input()) != '"'; ) { 378 if (!adjbuf(&buf, &bufsz, bp-buf+2, 500, &bp, "string")) 379 FATAL("out of space for string %.10s...", buf); 380 switch (c) { 381 case '\n': 382 case '\r': 383 case 0: 384 SYNTAX( "non-terminated string %.10s...", buf ); 385 lineno++; 386 if (c == 0) /* hopeless */ 387 FATAL( "giving up" ); 388 break; 389 case '\\': 390 c = input(); 391 switch (c) { 392 case '\n': break; 393 case '"': *bp++ = '"'; break; 394 case 'n': *bp++ = '\n'; break; 395 case 't': *bp++ = '\t'; break; 396 case 'f': *bp++ = '\f'; break; 397 case 'r': *bp++ = '\r'; break; 398 case 'b': *bp++ = '\b'; break; 399 case 'v': *bp++ = '\v'; break; 400 case 'a': *bp++ = '\007'; break; 401 case '\\': *bp++ = '\\'; break; 402 403 case '0': case '1': case '2': /* octal: \d \dd \ddd */ 404 case '3': case '4': case '5': case '6': case '7': 405 n = c - '0'; 406 if ((c = peek()) >= '0' && c < '8') { 407 n = 8 * n + input() - '0'; 408 if ((c = peek()) >= '0' && c < '8') 409 n = 8 * n + input() - '0'; 410 } 411 *bp++ = n; 412 break; 413 414 case 'x': /* hex \x0-9a-fA-F + */ 415 { char xbuf[100], *px; 416 for (px = xbuf; (c = input()) != 0 && px-xbuf < 100-2; ) { 417 if (isdigit(c) 418 || (c >= 'a' && c <= 'f') 419 || (c >= 'A' && c <= 'F')) 420 *px++ = c; 421 else 422 break; 423 } 424 *px = 0; 425 unput(c); 426 sscanf(xbuf, "%x", &n); 427 *bp++ = n; 428 break; 429 } 430 431 default: 432 WARNING("warning: escape sequence `\\%c' " 433 "treated as plain `%c'", c, c); 434 *bp++ = c; 435 break; 436 } 437 break; 438 default: 439 *bp++ = c; 440 break; 441 } 442 } 443 *bp = 0; 444 s = tostring(buf); 445 *bp++ = ' '; *bp++ = 0; 446 yylval.cp = setsymtab(buf, s, 0.0, CON|STR|DONTFREE, symtab); 447 RET(STRING); 448} 449 450 451int binsearch(const char *w, const Keyword *kp, int n) 452{ 453 int cond, low, mid, high; 454 455 low = 0; 456 high = n - 1; 457 while (low <= high) { 458 mid = (low + high) / 2; 459 if ((cond = strcmp(w, kp[mid].word)) < 0) 460 high = mid - 1; 461 else if (cond > 0) 462 low = mid + 1; 463 else 464 return mid; 465 } 466 return -1; 467} 468 469int word(char *w) 470{ 471 const Keyword *kp; 472 int c, n; 473 474 n = binsearch(w, keywords, sizeof(keywords)/sizeof(keywords[0])); 475/* BUG: this ought to be inside the if; in theory could fault (daniel barrett) */ 476 kp = keywords + n; 477 if (n != -1) { /* found in table */ 478 yylval.i = kp->sub; 479 switch (kp->type) { /* special handling */ 480 case BLTIN: 481 if (kp->sub == FSYSTEM && safe) 482 SYNTAX( "system is unsafe" ); 483 RET(kp->type); 484 case FUNC: 485 if (infunc) 486 SYNTAX( "illegal nested function" ); 487 RET(kp->type); 488 case RETURN: 489 if (!infunc) 490 SYNTAX( "return not in function" ); 491 RET(kp->type); 492 case VARNF: 493 yylval.cp = setsymtab("NF", "", 0.0, NUM, symtab); 494 RET(VARNF); 495 default: 496 RET(kp->type); 497 } 498 } 499 c = peek(); /* look for '(' */ 500 if (c != '(' && infunc && (n=isarg(w)) >= 0) { 501 yylval.i = n; 502 RET(ARG); 503 } else { 504 yylval.cp = setsymtab(w, "", 0.0, STR|NUM|DONTFREE, symtab); 505 if (c == '(') { 506 RET(CALL); 507 } else { 508 RET(VAR); 509 } 510 } 511} 512 513void startreg(void) /* next call to yylex will return a regular expression */ 514{ 515 reg = 1; 516} 517 518int regexpr(void) 519{ 520 int c; 521 static uschar *buf = 0; 522 static int bufsz = 500; 523 uschar *bp; 524 525 if (buf == 0 && (buf = malloc(bufsz)) == NULL) 526 FATAL("out of space for rex expr"); 527 bp = buf; 528 for ( ; (c = input()) != '/' && c != 0; ) { 529 if (!adjbuf(&buf, &bufsz, bp-buf+3, 500, &bp, "regexpr")) 530 FATAL("out of space for reg expr %.10s...", buf); 531 if (c == '\n') { 532 SYNTAX( "newline in regular expression %.10s...", buf ); 533 unput('\n'); 534 break; 535 } else if (c == '\\') { 536 *bp++ = '\\'; 537 *bp++ = input(); 538 } else { 539 *bp++ = c; 540 } 541 } 542 *bp = 0; 543 if (c == 0) 544 SYNTAX("non-terminated regular expression %.10s...", buf); 545 yylval.s = tostring(buf); 546 unput('/'); 547 RET(REGEXPR); 548} 549 550/* low-level lexical stuff, sort of inherited from lex */ 551 552char ebuf[300]; 553char *ep = ebuf; 554char yysbuf[100]; /* pushback buffer */ 555char *yysptr = yysbuf; 556FILE *yyin = 0; 557 558int input(void) /* get next lexical input character */ 559{ 560 int c; 561 extern char *lexprog; 562 563 if (yysptr > yysbuf) 564 c = (uschar)*--yysptr; 565 else if (lexprog != NULL) { /* awk '...' */ 566 if ((c = (uschar)*lexprog) != 0) 567 lexprog++; 568 } else /* awk -f ... */ 569 c = pgetc(); 570 if (c == '\n') 571 lineno++; 572 else if (c == EOF) 573 c = 0; 574 if (ep >= ebuf + sizeof ebuf) 575 ep = ebuf; 576 return *ep++ = c; 577} 578 579void unput(int c) /* put lexical character back on input */ 580{ 581 if (c == '\n') 582 lineno--; 583 if (yysptr >= yysbuf + sizeof(yysbuf)) 584 FATAL("pushed back too much: %.20s...", yysbuf); 585 *yysptr++ = c; 586 if (--ep < ebuf) 587 ep = ebuf + sizeof(ebuf) - 1; 588} 589 590void unputstr(const char *s) /* put a string back on input */ 591{ 592 int i; 593 594 for (i = strlen(s)-1; i >= 0; i--) 595 unput(s[i]); 596} 597