b.c revision 107806
1/**************************************************************** 2Copyright (C) Lucent Technologies 1997 3All Rights Reserved 4 5Permission to use, copy, modify, and distribute this software and 6its documentation for any purpose and without fee is hereby 7granted, provided that the above copyright notice appear in all 8copies and that both that the copyright notice and this 9permission notice and warranty disclaimer appear in supporting 10documentation, and that the name Lucent Technologies or any of 11its entities not be used in advertising or publicity pertaining 12to distribution of the software without specific, written prior 13permission. 14 15LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, 16INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. 17IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY 18SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 19WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER 20IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, 21ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF 22THIS SOFTWARE. 23****************************************************************/ 24 25/* lasciate ogne speranza, voi ch'entrate. */ 26 27#define DEBUG 28 29#include <ctype.h> 30#include <stdio.h> 31#include <string.h> 32#include <stdlib.h> 33#include "awk.h" 34#include "ytab.h" 35 36#define HAT (NCHARS-2) /* matches ^ in regular expr */ 37 /* NCHARS is 2**n */ 38#define MAXLIN 22 39 40#define type(v) (v)->nobj /* badly overloaded here */ 41#define info(v) (v)->ntype /* badly overloaded here */ 42#define left(v) (v)->narg[0] 43#define right(v) (v)->narg[1] 44#define parent(v) (v)->nnext 45 46#define LEAF case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL: 47#define UNARY case STAR: case PLUS: case QUEST: 48 49/* encoding in tree Nodes: 50 leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL): 51 left is index, right contains value or pointer to value 52 unary (STAR, PLUS, QUEST): left is child, right is null 53 binary (CAT, OR): left and right are children 54 parent contains pointer to parent 55*/ 56 57 58int *setvec; 59int *tmpset; 60int maxsetvec = 0; 61 62int rtok; /* next token in current re */ 63int rlxval; 64static uschar *rlxstr; 65static uschar *prestr; /* current position in current re */ 66static uschar *lastre; /* origin of last re */ 67 68static int setcnt; 69static int poscnt; 70 71char *patbeg; 72int patlen; 73 74#define NFA 20 /* cache this many dynamic fa's */ 75fa *fatab[NFA]; 76int nfatab = 0; /* entries in fatab */ 77 78fa *makedfa(const char *s, int anchor) /* returns dfa for reg expr s */ 79{ 80 int i, use, nuse; 81 fa *pfa; 82 static int now = 1; 83 84 if (setvec == 0) { /* first time through any RE */ 85 maxsetvec = MAXLIN; 86 setvec = (int *) malloc(maxsetvec * sizeof(int)); 87 tmpset = (int *) malloc(maxsetvec * sizeof(int)); 88 if (setvec == 0 || tmpset == 0) 89 overflo("out of space initializing makedfa"); 90 } 91 92 if (compile_time) /* a constant for sure */ 93 return mkdfa(s, anchor); 94 for (i = 0; i < nfatab; i++) /* is it there already? */ 95 if (fatab[i]->anchor == anchor 96 && strcmp((const char *) fatab[i]->restr, s) == 0) { 97 fatab[i]->use = now++; 98 return fatab[i]; 99 } 100 pfa = mkdfa(s, anchor); 101 if (nfatab < NFA) { /* room for another */ 102 fatab[nfatab] = pfa; 103 fatab[nfatab]->use = now++; 104 nfatab++; 105 return pfa; 106 } 107 use = fatab[0]->use; /* replace least-recently used */ 108 nuse = 0; 109 for (i = 1; i < nfatab; i++) 110 if (fatab[i]->use < use) { 111 use = fatab[i]->use; 112 nuse = i; 113 } 114 freefa(fatab[nuse]); 115 fatab[nuse] = pfa; 116 pfa->use = now++; 117 return pfa; 118} 119 120fa *mkdfa(const char *s, int anchor) /* does the real work of making a dfa */ 121 /* anchor = 1 for anchored matches, else 0 */ 122{ 123 Node *p, *p1; 124 fa *f; 125 126 p = reparse(s); 127 p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p); 128 /* put ALL STAR in front of reg. exp. */ 129 p1 = op2(CAT, p1, op2(FINAL, NIL, NIL)); 130 /* put FINAL after reg. exp. */ 131 132 poscnt = 0; 133 penter(p1); /* enter parent pointers and leaf indices */ 134 if ((f = (fa *) calloc(1, sizeof(fa) + poscnt*sizeof(rrow))) == NULL) 135 overflo("out of space for fa"); 136 f->accept = poscnt-1; /* penter has computed number of positions in re */ 137 cfoll(f, p1); /* set up follow sets */ 138 freetr(p1); 139 if ((f->posns[0] = (int *) calloc(1, *(f->re[0].lfollow)*sizeof(int))) == NULL) 140 overflo("out of space in makedfa"); 141 if ((f->posns[1] = (int *) calloc(1, sizeof(int))) == NULL) 142 overflo("out of space in makedfa"); 143 *f->posns[1] = 0; 144 f->initstat = makeinit(f, anchor); 145 f->anchor = anchor; 146 f->restr = (uschar *) tostring(s); 147 return f; 148} 149 150int makeinit(fa *f, int anchor) 151{ 152 int i, k; 153 154 f->curstat = 2; 155 f->out[2] = 0; 156 f->reset = 0; 157 k = *(f->re[0].lfollow); 158 xfree(f->posns[2]); 159 if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL) 160 overflo("out of space in makeinit"); 161 for (i=0; i <= k; i++) { 162 (f->posns[2])[i] = (f->re[0].lfollow)[i]; 163 } 164 if ((f->posns[2])[1] == f->accept) 165 f->out[2] = 1; 166 for (i=0; i < NCHARS; i++) 167 f->gototab[2][i] = 0; 168 f->curstat = cgoto(f, 2, HAT); 169 if (anchor) { 170 *f->posns[2] = k-1; /* leave out position 0 */ 171 for (i=0; i < k; i++) { 172 (f->posns[0])[i] = (f->posns[2])[i]; 173 } 174 175 f->out[0] = f->out[2]; 176 if (f->curstat != 2) 177 --(*f->posns[f->curstat]); 178 } 179 return f->curstat; 180} 181 182void penter(Node *p) /* set up parent pointers and leaf indices */ 183{ 184 switch (type(p)) { 185 LEAF 186 info(p) = poscnt; 187 poscnt++; 188 break; 189 UNARY 190 penter(left(p)); 191 parent(left(p)) = p; 192 break; 193 case CAT: 194 case OR: 195 penter(left(p)); 196 penter(right(p)); 197 parent(left(p)) = p; 198 parent(right(p)) = p; 199 break; 200 default: /* can't happen */ 201 FATAL("can't happen: unknown type %d in penter", type(p)); 202 break; 203 } 204} 205 206void freetr(Node *p) /* free parse tree */ 207{ 208 switch (type(p)) { 209 LEAF 210 xfree(p); 211 break; 212 UNARY 213 freetr(left(p)); 214 xfree(p); 215 break; 216 case CAT: 217 case OR: 218 freetr(left(p)); 219 freetr(right(p)); 220 xfree(p); 221 break; 222 default: /* can't happen */ 223 FATAL("can't happen: unknown type %d in freetr", type(p)); 224 break; 225 } 226} 227 228/* in the parsing of regular expressions, metacharacters like . have */ 229/* to be seen literally; \056 is not a metacharacter. */ 230 231int hexstr(char **pp) /* find and eval hex string at pp, return new p */ 232{ /* only pick up one 8-bit byte (2 chars) */ 233 uschar *p; 234 int n = 0; 235 int i; 236 237 for (i = 0, p = (uschar *) *pp; i < 2 && isxdigit(*p); i++, p++) { 238 if (isdigit(*p)) 239 n = 16 * n + *p - '0'; 240 else if (*p >= 'a' && *p <= 'f') 241 n = 16 * n + *p - 'a' + 10; 242 else if (*p >= 'A' && *p <= 'F') 243 n = 16 * n + *p - 'A' + 10; 244 } 245 *pp = (char *) p; 246 return n; 247} 248 249#define isoctdigit(c) ((c) >= '0' && (c) <= '7') /* multiple use of arg */ 250 251int quoted(char **pp) /* pick up next thing after a \\ */ 252 /* and increment *pp */ 253{ 254 char *p = *pp; 255 int c; 256 257 if ((c = *p++) == 't') 258 c = '\t'; 259 else if (c == 'n') 260 c = '\n'; 261 else if (c == 'f') 262 c = '\f'; 263 else if (c == 'r') 264 c = '\r'; 265 else if (c == 'b') 266 c = '\b'; 267 else if (c == '\\') 268 c = '\\'; 269 else if (c == 'x') { /* hexadecimal goo follows */ 270 c = hexstr(&p); /* this adds a null if number is invalid */ 271 } else if (isoctdigit(c)) { /* \d \dd \ddd */ 272 int n = c - '0'; 273 if (isoctdigit(*p)) { 274 n = 8 * n + *p++ - '0'; 275 if (isoctdigit(*p)) 276 n = 8 * n + *p++ - '0'; 277 } 278 c = n; 279 } /* else */ 280 /* c = c; */ 281 *pp = p; 282 return c; 283} 284 285static int collate_range_cmp(int a, int b) 286{ 287 int r; 288 static char s[2][2]; 289 290 if ((uschar)a == (uschar)b) 291 return 0; 292 s[0][0] = a; 293 s[1][0] = b; 294 if ((r = strcoll(s[0], s[1])) == 0) 295 r = (uschar)a - (uschar)b; 296 return r; 297} 298 299char *cclenter(const char *argp) /* add a character class */ 300{ 301 int i, c, c2; 302 int j; 303 uschar *p = (uschar *) argp; 304 uschar *op, *bp; 305 static uschar *buf = 0; 306 static int bufsz = 100; 307 308 op = p; 309 if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL) 310 FATAL("out of space for character class [%.10s...] 1", p); 311 bp = buf; 312 for (i = 0; (c = *p++) != 0; ) { 313 if (c == '\\') { 314 c = quoted((char **) &p); 315 } else if (c == '-' && i > 0 && bp[-1] != 0) { 316 if (*p != 0) { 317 c = bp[-1]; 318 c2 = *p++; 319 if (c2 == '\\') 320 c2 = quoted((char **) &p); 321 if (collate_range_cmp(c, c2) > 0) { /* empty; ignore */ 322 bp--; 323 i--; 324 continue; 325 } 326 for (j = 0; j < NCHARS; j++) { 327 if ((collate_range_cmp(c, j) > 0) || 328 collate_range_cmp(j, c2) > 0) 329 continue; 330 if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, 0)) 331 FATAL("out of space for character class [%.10s...] 2", p); 332 *bp++ = j; 333 i++; 334 } 335 continue; 336 } 337 } 338 if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, 0)) 339 FATAL("out of space for character class [%.10s...] 3", p); 340 *bp++ = c; 341 i++; 342 } 343 *bp = 0; 344 dprintf( ("cclenter: in = |%s|, out = |%s|\n", op, buf) ); 345 xfree(op); 346 return (char *) tostring((char *) buf); 347} 348 349void overflo(const char *s) 350{ 351 FATAL("regular expression too big: %.30s...", s); 352} 353 354void cfoll(fa *f, Node *v) /* enter follow set of each leaf of vertex v into lfollow[leaf] */ 355{ 356 int i; 357 int *p; 358 359 switch (type(v)) { 360 LEAF 361 f->re[info(v)].ltype = type(v); 362 f->re[info(v)].lval.np = right(v); 363 while (f->accept >= maxsetvec) { /* guessing here! */ 364 maxsetvec *= 4; 365 setvec = (int *) realloc(setvec, maxsetvec * sizeof(int)); 366 tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int)); 367 if (setvec == 0 || tmpset == 0) 368 overflo("out of space in cfoll()"); 369 } 370 for (i = 0; i <= f->accept; i++) 371 setvec[i] = 0; 372 setcnt = 0; 373 follow(v); /* computes setvec and setcnt */ 374 if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL) 375 overflo("out of space building follow set"); 376 f->re[info(v)].lfollow = p; 377 *p = setcnt; 378 for (i = f->accept; i >= 0; i--) 379 if (setvec[i] == 1) 380 *++p = i; 381 break; 382 UNARY 383 cfoll(f,left(v)); 384 break; 385 case CAT: 386 case OR: 387 cfoll(f,left(v)); 388 cfoll(f,right(v)); 389 break; 390 default: /* can't happen */ 391 FATAL("can't happen: unknown type %d in cfoll", type(v)); 392 } 393} 394 395int first(Node *p) /* collects initially active leaves of p into setvec */ 396 /* returns 1 if p matches empty string */ 397{ 398 int b, lp; 399 400 switch (type(p)) { 401 LEAF 402 lp = info(p); /* look for high-water mark of subscripts */ 403 while (setcnt >= maxsetvec || lp >= maxsetvec) { /* guessing here! */ 404 maxsetvec *= 4; 405 setvec = (int *) realloc(setvec, maxsetvec * sizeof(int)); 406 tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int)); 407 if (setvec == 0 || tmpset == 0) 408 overflo("out of space in first()"); 409 } 410 if (setvec[lp] != 1) { 411 setvec[lp] = 1; 412 setcnt++; 413 } 414 if (type(p) == CCL && (*(char *) right(p)) == '\0') 415 return(0); /* empty CCL */ 416 else return(1); 417 case PLUS: 418 if (first(left(p)) == 0) return(0); 419 return(1); 420 case STAR: 421 case QUEST: 422 first(left(p)); 423 return(0); 424 case CAT: 425 if (first(left(p)) == 0 && first(right(p)) == 0) return(0); 426 return(1); 427 case OR: 428 b = first(right(p)); 429 if (first(left(p)) == 0 || b == 0) return(0); 430 return(1); 431 } 432 FATAL("can't happen: unknown type %d in first", type(p)); /* can't happen */ 433 return(-1); 434} 435 436void follow(Node *v) /* collects leaves that can follow v into setvec */ 437{ 438 Node *p; 439 440 if (type(v) == FINAL) 441 return; 442 p = parent(v); 443 switch (type(p)) { 444 case STAR: 445 case PLUS: 446 first(v); 447 follow(p); 448 return; 449 450 case OR: 451 case QUEST: 452 follow(p); 453 return; 454 455 case CAT: 456 if (v == left(p)) { /* v is left child of p */ 457 if (first(right(p)) == 0) { 458 follow(p); 459 return; 460 } 461 } else /* v is right child */ 462 follow(p); 463 return; 464 } 465} 466 467int member(int c, const char *sarg) /* is c in s? */ 468{ 469 uschar *s = (uschar *) sarg; 470 471 while (*s) 472 if (c == *s++) 473 return(1); 474 return(0); 475} 476 477int match(fa *f, const char *p0) /* shortest match ? */ 478{ 479 int s, ns; 480 uschar *p = (uschar *) p0; 481 482 s = f->reset ? makeinit(f,0) : f->initstat; 483 if (f->out[s]) 484 return(1); 485 do { 486 if ((ns = f->gototab[s][*p]) != 0) 487 s = ns; 488 else 489 s = cgoto(f, s, *p); 490 if (f->out[s]) 491 return(1); 492 } while (*p++ != 0); 493 return(0); 494} 495 496int pmatch(fa *f, const char *p0) /* longest match, for sub */ 497{ 498 int s, ns; 499 uschar *p = (uschar *) p0; 500 uschar *q; 501 int i, k; 502 503 s = f->reset ? makeinit(f,1) : f->initstat; 504 patbeg = (char *) p; 505 patlen = -1; 506 do { 507 q = p; 508 do { 509 if (f->out[s]) /* final state */ 510 patlen = q-p; 511 if ((ns = f->gototab[s][*q]) != 0) 512 s = ns; 513 else 514 s = cgoto(f, s, *q); 515 if (s == 1) { /* no transition */ 516 if (patlen >= 0) { 517 patbeg = (char *) p; 518 return(1); 519 } 520 else 521 goto nextin; /* no match */ 522 } 523 } while (*q++ != 0); 524 if (f->out[s]) 525 patlen = q-p-1; /* don't count $ */ 526 if (patlen >= 0) { 527 patbeg = (char *) p; 528 return(1); 529 } 530 nextin: 531 s = 2; 532 if (f->reset) { 533 for (i = 2; i <= f->curstat; i++) 534 xfree(f->posns[i]); 535 k = *f->posns[0]; 536 if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL) 537 overflo("out of space in pmatch"); 538 for (i = 0; i <= k; i++) 539 (f->posns[2])[i] = (f->posns[0])[i]; 540 f->initstat = f->curstat = 2; 541 f->out[2] = f->out[0]; 542 for (i = 0; i < NCHARS; i++) 543 f->gototab[2][i] = 0; 544 } 545 } while (*p++ != 0); 546 return (0); 547} 548 549int nematch(fa *f, const char *p0) /* non-empty match, for sub */ 550{ 551 int s, ns; 552 uschar *p = (uschar *) p0; 553 uschar *q; 554 int i, k; 555 556 s = f->reset ? makeinit(f,1) : f->initstat; 557 patlen = -1; 558 while (*p) { 559 q = p; 560 do { 561 if (f->out[s]) /* final state */ 562 patlen = q-p; 563 if ((ns = f->gototab[s][*q]) != 0) 564 s = ns; 565 else 566 s = cgoto(f, s, *q); 567 if (s == 1) { /* no transition */ 568 if (patlen > 0) { 569 patbeg = (char *) p; 570 return(1); 571 } else 572 goto nnextin; /* no nonempty match */ 573 } 574 } while (*q++ != 0); 575 if (f->out[s]) 576 patlen = q-p-1; /* don't count $ */ 577 if (patlen > 0 ) { 578 patbeg = (char *) p; 579 return(1); 580 } 581 nnextin: 582 s = 2; 583 if (f->reset) { 584 for (i = 2; i <= f->curstat; i++) 585 xfree(f->posns[i]); 586 k = *f->posns[0]; 587 if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL) 588 overflo("out of state space"); 589 for (i = 0; i <= k; i++) 590 (f->posns[2])[i] = (f->posns[0])[i]; 591 f->initstat = f->curstat = 2; 592 f->out[2] = f->out[0]; 593 for (i = 0; i < NCHARS; i++) 594 f->gototab[2][i] = 0; 595 } 596 p++; 597 } 598 return (0); 599} 600 601Node *reparse(const char *p) /* parses regular expression pointed to by p */ 602{ /* uses relex() to scan regular expression */ 603 Node *np; 604 605 dprintf( ("reparse <%s>\n", p) ); 606 lastre = prestr = (uschar *) p; /* prestr points to string to be parsed */ 607 rtok = relex(); 608 /* GNU compatibility: an empty regexp matches anything */ 609 if (rtok == '\0') 610 /* FATAL("empty regular expression"); previous */ 611 return(op2(ALL, NIL, NIL)); 612 np = regexp(); 613 if (rtok != '\0') 614 FATAL("syntax error in regular expression %s at %s", lastre, prestr); 615 return(np); 616} 617 618Node *regexp(void) /* top-level parse of reg expr */ 619{ 620 return (alt(concat(primary()))); 621} 622 623Node *primary(void) 624{ 625 Node *np; 626 627 switch (rtok) { 628 case CHAR: 629 np = op2(CHAR, NIL, itonp(rlxval)); 630 rtok = relex(); 631 return (unary(np)); 632 case ALL: 633 rtok = relex(); 634 return (unary(op2(ALL, NIL, NIL))); 635 case DOT: 636 rtok = relex(); 637 return (unary(op2(DOT, NIL, NIL))); 638 case CCL: 639 np = op2(CCL, NIL, (Node*) cclenter((char *) rlxstr)); 640 rtok = relex(); 641 return (unary(np)); 642 case NCCL: 643 np = op2(NCCL, NIL, (Node *) cclenter((char *) rlxstr)); 644 rtok = relex(); 645 return (unary(np)); 646 case '^': 647 rtok = relex(); 648 return (unary(op2(CHAR, NIL, itonp(HAT)))); 649 case '$': 650 rtok = relex(); 651 return (unary(op2(CHAR, NIL, NIL))); 652 case '(': 653 rtok = relex(); 654 if (rtok == ')') { /* special pleading for () */ 655 rtok = relex(); 656 return unary(op2(CCL, NIL, (Node *) tostring(""))); 657 } 658 np = regexp(); 659 if (rtok == ')') { 660 rtok = relex(); 661 return (unary(np)); 662 } 663 else 664 FATAL("syntax error in regular expression %s at %s", lastre, prestr); 665 default: 666 FATAL("illegal primary in regular expression %s at %s", lastre, prestr); 667 } 668 return 0; /*NOTREACHED*/ 669} 670 671Node *concat(Node *np) 672{ 673 switch (rtok) { 674 case CHAR: case DOT: case ALL: case CCL: case NCCL: case '$': case '(': 675 return (concat(op2(CAT, np, primary()))); 676 } 677 return (np); 678} 679 680Node *alt(Node *np) 681{ 682 if (rtok == OR) { 683 rtok = relex(); 684 return (alt(op2(OR, np, concat(primary())))); 685 } 686 return (np); 687} 688 689Node *unary(Node *np) 690{ 691 switch (rtok) { 692 case STAR: 693 rtok = relex(); 694 return (unary(op2(STAR, np, NIL))); 695 case PLUS: 696 rtok = relex(); 697 return (unary(op2(PLUS, np, NIL))); 698 case QUEST: 699 rtok = relex(); 700 return (unary(op2(QUEST, np, NIL))); 701 default: 702 return (np); 703 } 704} 705 706/* 707 * Character class definitions conformant to the POSIX locale as 708 * defined in IEEE P1003.1 draft 7 of June 2001, assuming the source 709 * and operating character sets are both ASCII (ISO646) or supersets 710 * thereof. 711 * 712 * Note that to avoid overflowing the temporary buffer used in 713 * relex(), the expanded character class (prior to range expansion) 714 * must be less than twice the size of their full name. 715 */ 716 717struct charclass { 718 const char *cc_name; 719 int cc_namelen; 720 int (*cc_func)(int); 721} charclasses[] = { 722 { "alnum", 5, isalnum }, 723 { "alpha", 5, isalpha }, 724 { "blank", 5, isblank }, 725 { "cntrl", 5, iscntrl }, 726 { "digit", 5, isdigit }, 727 { "graph", 5, isgraph }, 728 { "lower", 5, islower }, 729 { "print", 5, isprint }, 730 { "punct", 5, ispunct }, 731 { "space", 5, isspace }, 732 { "upper", 5, isupper }, 733 { "xdigit", 6, isxdigit }, 734 { NULL, 0, NULL }, 735}; 736 737 738int relex(void) /* lexical analyzer for reparse */ 739{ 740 int c, n; 741 int cflag; 742 static uschar *buf = 0; 743 static int bufsz = 100; 744 uschar *bp; 745 struct charclass *cc; 746 int i; 747 748 switch (c = *prestr++) { 749 case '|': return OR; 750 case '*': return STAR; 751 case '+': return PLUS; 752 case '?': return QUEST; 753 case '.': return DOT; 754 case '\0': prestr--; return '\0'; 755 case '^': 756 case '$': 757 case '(': 758 case ')': 759 return c; 760 case '\\': 761 rlxval = quoted((char **) &prestr); 762 return CHAR; 763 default: 764 rlxval = c; 765 return CHAR; 766 case '[': 767 if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL) 768 FATAL("out of space in reg expr %.10s..", lastre); 769 bp = buf; 770 if (*prestr == '^') { 771 cflag = 1; 772 prestr++; 773 } 774 else 775 cflag = 0; 776 n = 2 * strlen((const char *) prestr)+1; 777 if (!adjbuf((char **) &buf, &bufsz, n, n, (char **) &bp, 0)) 778 FATAL("out of space for reg expr %.10s...", lastre); 779 for (; ; ) { 780 if ((c = *prestr++) == '\\') { 781 *bp++ = '\\'; 782 if ((c = *prestr++) == '\0') 783 FATAL("nonterminated character class %.20s...", lastre); 784 *bp++ = c; 785 /* } else if (c == '\n') { */ 786 /* FATAL("newline in character class %.20s...", lastre); */ 787 } else if (c == '[' && *prestr == ':') { 788 /* POSIX char class names, Dag-Erling Smorgrav, des@ofug.org */ 789 for (cc = charclasses; cc->cc_name; cc++) 790 if (strncmp((const char *) prestr + 1, (const char *) cc->cc_name, cc->cc_namelen) == 0) 791 break; 792 if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' && 793 prestr[2 + cc->cc_namelen] == ']') { 794 prestr += cc->cc_namelen + 3; 795 for (i = 0; i < NCHARS; i++) { 796 if (!adjbuf((char **) &buf, &bufsz, bp-buf+1, 100, (char **) &bp, 0)) 797 FATAL("out of space for reg expr %.10s...", lastre); 798 if (cc->cc_func(i)) { 799 *bp++ = i; 800 n++; 801 } 802 } 803 } else 804 *bp++ = c; 805 } else if (c == '\0') { 806 FATAL("nonterminated character class %.20s", lastre); 807 } else if (bp == buf) { /* 1st char is special */ 808 *bp++ = c; 809 } else if (c == ']') { 810 *bp++ = 0; 811 rlxstr = (uschar *) tostring((char *) buf); 812 if (cflag == 0) 813 return CCL; 814 else 815 return NCCL; 816 } else 817 *bp++ = c; 818 } 819 } 820} 821 822int cgoto(fa *f, int s, int c) 823{ 824 int i, j, k; 825 int *p, *q; 826 827 if (c < 0 || c > 255) 828 FATAL("can't happen: neg char %d in cgoto", c); 829 while (f->accept >= maxsetvec) { /* guessing here! */ 830 maxsetvec *= 4; 831 setvec = (int *) realloc(setvec, maxsetvec * sizeof(int)); 832 tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int)); 833 if (setvec == 0 || tmpset == 0) 834 overflo("out of space in cgoto()"); 835 } 836 for (i = 0; i <= f->accept; i++) 837 setvec[i] = 0; 838 setcnt = 0; 839 /* compute positions of gototab[s,c] into setvec */ 840 p = f->posns[s]; 841 for (i = 1; i <= *p; i++) { 842 if ((k = f->re[p[i]].ltype) != FINAL) { 843 if ((k == CHAR && c == ptoi(f->re[p[i]].lval.np)) 844 || (k == DOT && c != 0 && c != HAT) 845 || (k == ALL && c != 0) 846 || (k == CCL && member(c, (char *) f->re[p[i]].lval.up)) 847 || (k == NCCL && !member(c, (char *) f->re[p[i]].lval.up) && c != 0 && c != HAT)) { 848 q = f->re[p[i]].lfollow; 849 for (j = 1; j <= *q; j++) { 850 if (q[j] >= maxsetvec) { 851 maxsetvec *= 4; 852 setvec = (int *) realloc(setvec, maxsetvec * sizeof(int)); 853 tmpset = (int *) realloc(setvec, maxsetvec * sizeof(int)); 854 if (setvec == 0 || tmpset == 0) 855 overflo("cgoto overflow"); 856 } 857 if (setvec[q[j]] == 0) { 858 setcnt++; 859 setvec[q[j]] = 1; 860 } 861 } 862 } 863 } 864 } 865 /* determine if setvec is a previous state */ 866 tmpset[0] = setcnt; 867 j = 1; 868 for (i = f->accept; i >= 0; i--) 869 if (setvec[i]) { 870 tmpset[j++] = i; 871 } 872 /* tmpset == previous state? */ 873 for (i = 1; i <= f->curstat; i++) { 874 p = f->posns[i]; 875 if ((k = tmpset[0]) != p[0]) 876 goto different; 877 for (j = 1; j <= k; j++) 878 if (tmpset[j] != p[j]) 879 goto different; 880 /* setvec is state i */ 881 f->gototab[s][c] = i; 882 return i; 883 different:; 884 } 885 886 /* add tmpset to current set of states */ 887 if (f->curstat >= NSTATES-1) { 888 f->curstat = 2; 889 f->reset = 1; 890 for (i = 2; i < NSTATES; i++) 891 xfree(f->posns[i]); 892 } else 893 ++(f->curstat); 894 for (i = 0; i < NCHARS; i++) 895 f->gototab[f->curstat][i] = 0; 896 xfree(f->posns[f->curstat]); 897 if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL) 898 overflo("out of space in cgoto"); 899 900 f->posns[f->curstat] = p; 901 f->gototab[s][c] = f->curstat; 902 for (i = 0; i <= setcnt; i++) 903 p[i] = tmpset[i]; 904 if (setvec[f->accept]) 905 f->out[f->curstat] = 1; 906 else 907 f->out[f->curstat] = 0; 908 return f->curstat; 909} 910 911 912void freefa(fa *f) /* free a finite automaton */ 913{ 914 int i; 915 916 if (f == NULL) 917 return; 918 for (i = 0; i <= f->curstat; i++) 919 xfree(f->posns[i]); 920 for (i = 0; i <= f->accept; i++) { 921 xfree(f->re[i].lfollow); 922 if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL) 923 xfree((f->re[i].lval.np)); 924 } 925 xfree(f->restr); 926 xfree(f); 927} 928