b.c revision 146299
185587Sobrien/**************************************************************** 285587SobrienCopyright (C) Lucent Technologies 1997 385587SobrienAll Rights Reserved 485587Sobrien 585587SobrienPermission to use, copy, modify, and distribute this software and 685587Sobrienits documentation for any purpose and without fee is hereby 785587Sobriengranted, provided that the above copyright notice appear in all 885587Sobriencopies and that both that the copyright notice and this 985587Sobrienpermission notice and warranty disclaimer appear in supporting 1085587Sobriendocumentation, and that the name Lucent Technologies or any of 1185587Sobrienits entities not be used in advertising or publicity pertaining 1285587Sobriento distribution of the software without specific, written prior 1385587Sobrienpermission. 1485587Sobrien 1585587SobrienLUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, 1685587SobrienINCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. 1785587SobrienIN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY 1885587SobrienSPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 1985587SobrienWHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER 2085587SobrienIN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, 2185587SobrienARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF 2285587SobrienTHIS SOFTWARE. 2385587Sobrien****************************************************************/ 2485587Sobrien 2585587Sobrien/* lasciate ogne speranza, voi ch'entrate. */ 2685587Sobrien 2785587Sobrien#define DEBUG 2885587Sobrien 2985587Sobrien#include <ctype.h> 3085587Sobrien#include <stdio.h> 3185587Sobrien#include <string.h> 3285587Sobrien#include <stdlib.h> 3385587Sobrien#include "awk.h" 3485587Sobrien#include "ytab.h" 3585587Sobrien 36118194Sru#define HAT (NCHARS+2) /* matches ^ in regular expr */ 3785587Sobrien /* NCHARS is 2**n */ 3885587Sobrien#define MAXLIN 22 3985587Sobrien 4085587Sobrien#define type(v) (v)->nobj /* badly overloaded here */ 4185587Sobrien#define info(v) (v)->ntype /* badly overloaded here */ 4285587Sobrien#define left(v) (v)->narg[0] 4385587Sobrien#define right(v) (v)->narg[1] 4485587Sobrien#define parent(v) (v)->nnext 4585587Sobrien 4685587Sobrien#define LEAF case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL: 4785587Sobrien#define UNARY case STAR: case PLUS: case QUEST: 4885587Sobrien 4985587Sobrien/* encoding in tree Nodes: 5085587Sobrien leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL): 5185587Sobrien left is index, right contains value or pointer to value 5285587Sobrien unary (STAR, PLUS, QUEST): left is child, right is null 5385587Sobrien binary (CAT, OR): left and right are children 5485587Sobrien parent contains pointer to parent 5585587Sobrien*/ 5685587Sobrien 5785587Sobrien 5885587Sobrienint *setvec; 5985587Sobrienint *tmpset; 6085587Sobrienint maxsetvec = 0; 6185587Sobrien 6285587Sobrienint rtok; /* next token in current re */ 6385587Sobrienint rlxval; 6485587Sobrienstatic uschar *rlxstr; 6585587Sobrienstatic uschar *prestr; /* current position in current re */ 6685587Sobrienstatic uschar *lastre; /* origin of last re */ 6785587Sobrien 6885587Sobrienstatic int setcnt; 6985587Sobrienstatic int poscnt; 7085587Sobrien 7185587Sobrienchar *patbeg; 7285587Sobrienint patlen; 7385587Sobrien 7485587Sobrien#define NFA 20 /* cache this many dynamic fa's */ 7585587Sobrienfa *fatab[NFA]; 7685587Sobrienint nfatab = 0; /* entries in fatab */ 7785587Sobrien 78107806Sobrienfa *makedfa(const char *s, int anchor) /* returns dfa for reg expr s */ 7985587Sobrien{ 8085587Sobrien int i, use, nuse; 8185587Sobrien fa *pfa; 8285587Sobrien static int now = 1; 8385587Sobrien 8485587Sobrien if (setvec == 0) { /* first time through any RE */ 8585587Sobrien maxsetvec = MAXLIN; 8685587Sobrien setvec = (int *) malloc(maxsetvec * sizeof(int)); 8785587Sobrien tmpset = (int *) malloc(maxsetvec * sizeof(int)); 8885587Sobrien if (setvec == 0 || tmpset == 0) 8985587Sobrien overflo("out of space initializing makedfa"); 9085587Sobrien } 9185587Sobrien 9285587Sobrien if (compile_time) /* a constant for sure */ 9385587Sobrien return mkdfa(s, anchor); 9485587Sobrien for (i = 0; i < nfatab; i++) /* is it there already? */ 9585587Sobrien if (fatab[i]->anchor == anchor 9690902Sdes && strcmp((const char *) fatab[i]->restr, s) == 0) { 9785587Sobrien fatab[i]->use = now++; 9885587Sobrien return fatab[i]; 9985587Sobrien } 10085587Sobrien pfa = mkdfa(s, anchor); 10185587Sobrien if (nfatab < NFA) { /* room for another */ 10285587Sobrien fatab[nfatab] = pfa; 10385587Sobrien fatab[nfatab]->use = now++; 10485587Sobrien nfatab++; 10585587Sobrien return pfa; 10685587Sobrien } 10785587Sobrien use = fatab[0]->use; /* replace least-recently used */ 10885587Sobrien nuse = 0; 10985587Sobrien for (i = 1; i < nfatab; i++) 11085587Sobrien if (fatab[i]->use < use) { 11185587Sobrien use = fatab[i]->use; 11285587Sobrien nuse = i; 11385587Sobrien } 11485587Sobrien freefa(fatab[nuse]); 11585587Sobrien fatab[nuse] = pfa; 11685587Sobrien pfa->use = now++; 11785587Sobrien return pfa; 11885587Sobrien} 11985587Sobrien 120107806Sobrienfa *mkdfa(const char *s, int anchor) /* does the real work of making a dfa */ 12185587Sobrien /* anchor = 1 for anchored matches, else 0 */ 12285587Sobrien{ 12385587Sobrien Node *p, *p1; 12485587Sobrien fa *f; 12585587Sobrien 12685587Sobrien p = reparse(s); 12785587Sobrien p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p); 12885587Sobrien /* put ALL STAR in front of reg. exp. */ 12985587Sobrien p1 = op2(CAT, p1, op2(FINAL, NIL, NIL)); 13085587Sobrien /* put FINAL after reg. exp. */ 13185587Sobrien 13285587Sobrien poscnt = 0; 13385587Sobrien penter(p1); /* enter parent pointers and leaf indices */ 13485587Sobrien if ((f = (fa *) calloc(1, sizeof(fa) + poscnt*sizeof(rrow))) == NULL) 13585587Sobrien overflo("out of space for fa"); 13685587Sobrien f->accept = poscnt-1; /* penter has computed number of positions in re */ 13785587Sobrien cfoll(f, p1); /* set up follow sets */ 13885587Sobrien freetr(p1); 13985587Sobrien if ((f->posns[0] = (int *) calloc(1, *(f->re[0].lfollow)*sizeof(int))) == NULL) 14085587Sobrien overflo("out of space in makedfa"); 14185587Sobrien if ((f->posns[1] = (int *) calloc(1, sizeof(int))) == NULL) 14285587Sobrien overflo("out of space in makedfa"); 14385587Sobrien *f->posns[1] = 0; 14485587Sobrien f->initstat = makeinit(f, anchor); 14585587Sobrien f->anchor = anchor; 14685587Sobrien f->restr = (uschar *) tostring(s); 14785587Sobrien return f; 14885587Sobrien} 14985587Sobrien 15085587Sobrienint makeinit(fa *f, int anchor) 15185587Sobrien{ 15285587Sobrien int i, k; 15385587Sobrien 15485587Sobrien f->curstat = 2; 15585587Sobrien f->out[2] = 0; 15685587Sobrien f->reset = 0; 15785587Sobrien k = *(f->re[0].lfollow); 15885587Sobrien xfree(f->posns[2]); 15985587Sobrien if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL) 16085587Sobrien overflo("out of space in makeinit"); 16185587Sobrien for (i=0; i <= k; i++) { 16285587Sobrien (f->posns[2])[i] = (f->re[0].lfollow)[i]; 16385587Sobrien } 16485587Sobrien if ((f->posns[2])[1] == f->accept) 16585587Sobrien f->out[2] = 1; 16685587Sobrien for (i=0; i < NCHARS; i++) 16785587Sobrien f->gototab[2][i] = 0; 16885587Sobrien f->curstat = cgoto(f, 2, HAT); 16985587Sobrien if (anchor) { 17085587Sobrien *f->posns[2] = k-1; /* leave out position 0 */ 17185587Sobrien for (i=0; i < k; i++) { 17285587Sobrien (f->posns[0])[i] = (f->posns[2])[i]; 17385587Sobrien } 17485587Sobrien 17585587Sobrien f->out[0] = f->out[2]; 17685587Sobrien if (f->curstat != 2) 17785587Sobrien --(*f->posns[f->curstat]); 17885587Sobrien } 17985587Sobrien return f->curstat; 18085587Sobrien} 18185587Sobrien 18285587Sobrienvoid penter(Node *p) /* set up parent pointers and leaf indices */ 18385587Sobrien{ 18485587Sobrien switch (type(p)) { 18585587Sobrien LEAF 18685587Sobrien info(p) = poscnt; 18785587Sobrien poscnt++; 18885587Sobrien break; 18985587Sobrien UNARY 19085587Sobrien penter(left(p)); 19185587Sobrien parent(left(p)) = p; 19285587Sobrien break; 19385587Sobrien case CAT: 19485587Sobrien case OR: 19585587Sobrien penter(left(p)); 19685587Sobrien penter(right(p)); 19785587Sobrien parent(left(p)) = p; 19885587Sobrien parent(right(p)) = p; 19985587Sobrien break; 20085587Sobrien default: /* can't happen */ 20185587Sobrien FATAL("can't happen: unknown type %d in penter", type(p)); 20285587Sobrien break; 20385587Sobrien } 20485587Sobrien} 20585587Sobrien 20685587Sobrienvoid freetr(Node *p) /* free parse tree */ 20785587Sobrien{ 20885587Sobrien switch (type(p)) { 20985587Sobrien LEAF 21085587Sobrien xfree(p); 21185587Sobrien break; 21285587Sobrien UNARY 21385587Sobrien freetr(left(p)); 21485587Sobrien xfree(p); 21585587Sobrien break; 21685587Sobrien case CAT: 21785587Sobrien case OR: 21885587Sobrien freetr(left(p)); 21985587Sobrien freetr(right(p)); 22085587Sobrien xfree(p); 22185587Sobrien break; 22285587Sobrien default: /* can't happen */ 22385587Sobrien FATAL("can't happen: unknown type %d in freetr", type(p)); 22485587Sobrien break; 22585587Sobrien } 22685587Sobrien} 22785587Sobrien 22885587Sobrien/* in the parsing of regular expressions, metacharacters like . have */ 22985587Sobrien/* to be seen literally; \056 is not a metacharacter. */ 23085587Sobrien 23185587Sobrienint hexstr(char **pp) /* find and eval hex string at pp, return new p */ 23285587Sobrien{ /* only pick up one 8-bit byte (2 chars) */ 23385587Sobrien uschar *p; 23485587Sobrien int n = 0; 23585587Sobrien int i; 23685587Sobrien 23785587Sobrien for (i = 0, p = (uschar *) *pp; i < 2 && isxdigit(*p); i++, p++) { 23885587Sobrien if (isdigit(*p)) 23985587Sobrien n = 16 * n + *p - '0'; 24085587Sobrien else if (*p >= 'a' && *p <= 'f') 24185587Sobrien n = 16 * n + *p - 'a' + 10; 24285587Sobrien else if (*p >= 'A' && *p <= 'F') 24385587Sobrien n = 16 * n + *p - 'A' + 10; 24485587Sobrien } 24585587Sobrien *pp = (char *) p; 24685587Sobrien return n; 24785587Sobrien} 24885587Sobrien 24985587Sobrien#define isoctdigit(c) ((c) >= '0' && (c) <= '7') /* multiple use of arg */ 25085587Sobrien 25185587Sobrienint quoted(char **pp) /* pick up next thing after a \\ */ 25285587Sobrien /* and increment *pp */ 25385587Sobrien{ 25485587Sobrien char *p = *pp; 25585587Sobrien int c; 25685587Sobrien 25785587Sobrien if ((c = *p++) == 't') 25885587Sobrien c = '\t'; 25985587Sobrien else if (c == 'n') 26085587Sobrien c = '\n'; 26185587Sobrien else if (c == 'f') 26285587Sobrien c = '\f'; 26385587Sobrien else if (c == 'r') 26485587Sobrien c = '\r'; 26585587Sobrien else if (c == 'b') 26685587Sobrien c = '\b'; 26785587Sobrien else if (c == '\\') 26885587Sobrien c = '\\'; 26985587Sobrien else if (c == 'x') { /* hexadecimal goo follows */ 27085587Sobrien c = hexstr(&p); /* this adds a null if number is invalid */ 27185587Sobrien } else if (isoctdigit(c)) { /* \d \dd \ddd */ 27285587Sobrien int n = c - '0'; 27385587Sobrien if (isoctdigit(*p)) { 27485587Sobrien n = 8 * n + *p++ - '0'; 27585587Sobrien if (isoctdigit(*p)) 27685587Sobrien n = 8 * n + *p++ - '0'; 27785587Sobrien } 27885587Sobrien c = n; 27985587Sobrien } /* else */ 28085587Sobrien /* c = c; */ 28185587Sobrien *pp = p; 28285587Sobrien return c; 28385587Sobrien} 28485587Sobrien 285107806Sobrienchar *cclenter(const char *argp) /* add a character class */ 286107806Sobrien{ 28785587Sobrien int i, c, c2; 28885587Sobrien uschar *p = (uschar *) argp; 28985587Sobrien uschar *op, *bp; 29085587Sobrien static uschar *buf = 0; 29185587Sobrien static int bufsz = 100; 29285587Sobrien 29385587Sobrien op = p; 29485587Sobrien if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL) 29585587Sobrien FATAL("out of space for character class [%.10s...] 1", p); 29685587Sobrien bp = buf; 29785587Sobrien for (i = 0; (c = *p++) != 0; ) { 29885587Sobrien if (c == '\\') { 29985587Sobrien c = quoted((char **) &p); 30085587Sobrien } else if (c == '-' && i > 0 && bp[-1] != 0) { 30185587Sobrien if (*p != 0) { 30285587Sobrien c = bp[-1]; 30385587Sobrien c2 = *p++; 30485587Sobrien if (c2 == '\\') 30585587Sobrien c2 = quoted((char **) &p); 306118194Sru if (c > c2) { /* empty; ignore */ 30785587Sobrien bp--; 30885587Sobrien i--; 30985587Sobrien continue; 31085587Sobrien } 311118194Sru while (c < c2) { 31285587Sobrien if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, 0)) 31385587Sobrien FATAL("out of space for character class [%.10s...] 2", p); 314118194Sru *bp++ = ++c; 31585587Sobrien i++; 31685587Sobrien } 31785587Sobrien continue; 31885587Sobrien } 31985587Sobrien } 32085587Sobrien if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, 0)) 32185587Sobrien FATAL("out of space for character class [%.10s...] 3", p); 32285587Sobrien *bp++ = c; 32385587Sobrien i++; 32485587Sobrien } 32585587Sobrien *bp = 0; 32685587Sobrien dprintf( ("cclenter: in = |%s|, out = |%s|\n", op, buf) ); 32785587Sobrien xfree(op); 32885587Sobrien return (char *) tostring((char *) buf); 32985587Sobrien} 33085587Sobrien 331107806Sobrienvoid overflo(const char *s) 33285587Sobrien{ 33385587Sobrien FATAL("regular expression too big: %.30s...", s); 33485587Sobrien} 33585587Sobrien 33685587Sobrienvoid cfoll(fa *f, Node *v) /* enter follow set of each leaf of vertex v into lfollow[leaf] */ 33785587Sobrien{ 33885587Sobrien int i; 33985587Sobrien int *p; 34085587Sobrien 34185587Sobrien switch (type(v)) { 34285587Sobrien LEAF 34385587Sobrien f->re[info(v)].ltype = type(v); 34485587Sobrien f->re[info(v)].lval.np = right(v); 34585587Sobrien while (f->accept >= maxsetvec) { /* guessing here! */ 34685587Sobrien maxsetvec *= 4; 34785587Sobrien setvec = (int *) realloc(setvec, maxsetvec * sizeof(int)); 34885587Sobrien tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int)); 34985587Sobrien if (setvec == 0 || tmpset == 0) 35085587Sobrien overflo("out of space in cfoll()"); 35185587Sobrien } 35285587Sobrien for (i = 0; i <= f->accept; i++) 35385587Sobrien setvec[i] = 0; 35485587Sobrien setcnt = 0; 35585587Sobrien follow(v); /* computes setvec and setcnt */ 35685587Sobrien if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL) 35785587Sobrien overflo("out of space building follow set"); 35885587Sobrien f->re[info(v)].lfollow = p; 35985587Sobrien *p = setcnt; 36085587Sobrien for (i = f->accept; i >= 0; i--) 36185587Sobrien if (setvec[i] == 1) 36285587Sobrien *++p = i; 36385587Sobrien break; 36485587Sobrien UNARY 36585587Sobrien cfoll(f,left(v)); 36685587Sobrien break; 36785587Sobrien case CAT: 36885587Sobrien case OR: 36985587Sobrien cfoll(f,left(v)); 37085587Sobrien cfoll(f,right(v)); 37185587Sobrien break; 37285587Sobrien default: /* can't happen */ 37385587Sobrien FATAL("can't happen: unknown type %d in cfoll", type(v)); 37485587Sobrien } 37585587Sobrien} 37685587Sobrien 37785587Sobrienint first(Node *p) /* collects initially active leaves of p into setvec */ 37885587Sobrien /* returns 1 if p matches empty string */ 37985587Sobrien{ 38085587Sobrien int b, lp; 38185587Sobrien 38285587Sobrien switch (type(p)) { 38385587Sobrien LEAF 38485587Sobrien lp = info(p); /* look for high-water mark of subscripts */ 38585587Sobrien while (setcnt >= maxsetvec || lp >= maxsetvec) { /* guessing here! */ 38685587Sobrien maxsetvec *= 4; 38785587Sobrien setvec = (int *) realloc(setvec, maxsetvec * sizeof(int)); 38885587Sobrien tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int)); 38985587Sobrien if (setvec == 0 || tmpset == 0) 39085587Sobrien overflo("out of space in first()"); 39185587Sobrien } 39285587Sobrien if (setvec[lp] != 1) { 39385587Sobrien setvec[lp] = 1; 39485587Sobrien setcnt++; 39585587Sobrien } 39685587Sobrien if (type(p) == CCL && (*(char *) right(p)) == '\0') 39785587Sobrien return(0); /* empty CCL */ 39885587Sobrien else return(1); 39985587Sobrien case PLUS: 40085587Sobrien if (first(left(p)) == 0) return(0); 40185587Sobrien return(1); 40285587Sobrien case STAR: 40385587Sobrien case QUEST: 40485587Sobrien first(left(p)); 40585587Sobrien return(0); 40685587Sobrien case CAT: 40785587Sobrien if (first(left(p)) == 0 && first(right(p)) == 0) return(0); 40885587Sobrien return(1); 40985587Sobrien case OR: 41085587Sobrien b = first(right(p)); 41185587Sobrien if (first(left(p)) == 0 || b == 0) return(0); 41285587Sobrien return(1); 41385587Sobrien } 41485587Sobrien FATAL("can't happen: unknown type %d in first", type(p)); /* can't happen */ 41585587Sobrien return(-1); 41685587Sobrien} 41785587Sobrien 41885587Sobrienvoid follow(Node *v) /* collects leaves that can follow v into setvec */ 41985587Sobrien{ 42085587Sobrien Node *p; 42185587Sobrien 42285587Sobrien if (type(v) == FINAL) 42385587Sobrien return; 42485587Sobrien p = parent(v); 42585587Sobrien switch (type(p)) { 42685587Sobrien case STAR: 42785587Sobrien case PLUS: 42885587Sobrien first(v); 42985587Sobrien follow(p); 43085587Sobrien return; 43185587Sobrien 43285587Sobrien case OR: 43385587Sobrien case QUEST: 43485587Sobrien follow(p); 43585587Sobrien return; 43685587Sobrien 43785587Sobrien case CAT: 43885587Sobrien if (v == left(p)) { /* v is left child of p */ 43985587Sobrien if (first(right(p)) == 0) { 44085587Sobrien follow(p); 44185587Sobrien return; 44285587Sobrien } 44385587Sobrien } else /* v is right child */ 44485587Sobrien follow(p); 44585587Sobrien return; 44685587Sobrien } 44785587Sobrien} 44885587Sobrien 449107806Sobrienint member(int c, const char *sarg) /* is c in s? */ 45085587Sobrien{ 45185587Sobrien uschar *s = (uschar *) sarg; 45285587Sobrien 45385587Sobrien while (*s) 45485587Sobrien if (c == *s++) 45585587Sobrien return(1); 45685587Sobrien return(0); 45785587Sobrien} 45885587Sobrien 459107806Sobrienint match(fa *f, const char *p0) /* shortest match ? */ 46085587Sobrien{ 46185587Sobrien int s, ns; 46285587Sobrien uschar *p = (uschar *) p0; 46385587Sobrien 46485587Sobrien s = f->reset ? makeinit(f,0) : f->initstat; 46585587Sobrien if (f->out[s]) 46685587Sobrien return(1); 46785587Sobrien do { 468146299Sru assert(*p < NCHARS); 46985587Sobrien if ((ns = f->gototab[s][*p]) != 0) 47085587Sobrien s = ns; 47185587Sobrien else 47285587Sobrien s = cgoto(f, s, *p); 47385587Sobrien if (f->out[s]) 47485587Sobrien return(1); 47585587Sobrien } while (*p++ != 0); 47685587Sobrien return(0); 47785587Sobrien} 47885587Sobrien 479107806Sobrienint pmatch(fa *f, const char *p0) /* longest match, for sub */ 48085587Sobrien{ 48185587Sobrien int s, ns; 48285587Sobrien uschar *p = (uschar *) p0; 48385587Sobrien uschar *q; 48485587Sobrien int i, k; 48585587Sobrien 486125601Sru /* s = f->reset ? makeinit(f,1) : f->initstat; */ 487125601Sru if (f->reset) { 488125601Sru f->initstat = s = makeinit(f,1); 489125601Sru } else { 490125601Sru s = f->initstat; 491125601Sru } 49285587Sobrien patbeg = (char *) p; 49385587Sobrien patlen = -1; 49485587Sobrien do { 49585587Sobrien q = p; 49685587Sobrien do { 49785587Sobrien if (f->out[s]) /* final state */ 49885587Sobrien patlen = q-p; 499146299Sru assert(*q < NCHARS); 50085587Sobrien if ((ns = f->gototab[s][*q]) != 0) 50185587Sobrien s = ns; 50285587Sobrien else 50385587Sobrien s = cgoto(f, s, *q); 50485587Sobrien if (s == 1) { /* no transition */ 50585587Sobrien if (patlen >= 0) { 50685587Sobrien patbeg = (char *) p; 50785587Sobrien return(1); 50885587Sobrien } 50985587Sobrien else 51085587Sobrien goto nextin; /* no match */ 51185587Sobrien } 51285587Sobrien } while (*q++ != 0); 51385587Sobrien if (f->out[s]) 51485587Sobrien patlen = q-p-1; /* don't count $ */ 51585587Sobrien if (patlen >= 0) { 51685587Sobrien patbeg = (char *) p; 51785587Sobrien return(1); 51885587Sobrien } 51985587Sobrien nextin: 52085587Sobrien s = 2; 52185587Sobrien if (f->reset) { 52285587Sobrien for (i = 2; i <= f->curstat; i++) 52385587Sobrien xfree(f->posns[i]); 52485587Sobrien k = *f->posns[0]; 52585587Sobrien if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL) 52685587Sobrien overflo("out of space in pmatch"); 52785587Sobrien for (i = 0; i <= k; i++) 52885587Sobrien (f->posns[2])[i] = (f->posns[0])[i]; 52985587Sobrien f->initstat = f->curstat = 2; 53085587Sobrien f->out[2] = f->out[0]; 53185587Sobrien for (i = 0; i < NCHARS; i++) 53285587Sobrien f->gototab[2][i] = 0; 53385587Sobrien } 53485587Sobrien } while (*p++ != 0); 53585587Sobrien return (0); 53685587Sobrien} 53785587Sobrien 538107806Sobrienint nematch(fa *f, const char *p0) /* non-empty match, for sub */ 53985587Sobrien{ 54085587Sobrien int s, ns; 54185587Sobrien uschar *p = (uschar *) p0; 54285587Sobrien uschar *q; 54385587Sobrien int i, k; 54485587Sobrien 545125601Sru /* s = f->reset ? makeinit(f,1) : f->initstat; */ 546125601Sru if (f->reset) { 547125601Sru f->initstat = s = makeinit(f,1); 548125601Sru } else { 549125601Sru s = f->initstat; 550125601Sru } 55185587Sobrien patlen = -1; 55285587Sobrien while (*p) { 55385587Sobrien q = p; 55485587Sobrien do { 55585587Sobrien if (f->out[s]) /* final state */ 55685587Sobrien patlen = q-p; 557146299Sru assert(*q < NCHARS); 55885587Sobrien if ((ns = f->gototab[s][*q]) != 0) 55985587Sobrien s = ns; 56085587Sobrien else 56185587Sobrien s = cgoto(f, s, *q); 56285587Sobrien if (s == 1) { /* no transition */ 56385587Sobrien if (patlen > 0) { 56485587Sobrien patbeg = (char *) p; 56585587Sobrien return(1); 56685587Sobrien } else 56785587Sobrien goto nnextin; /* no nonempty match */ 56885587Sobrien } 56985587Sobrien } while (*q++ != 0); 57085587Sobrien if (f->out[s]) 57185587Sobrien patlen = q-p-1; /* don't count $ */ 57285587Sobrien if (patlen > 0 ) { 57385587Sobrien patbeg = (char *) p; 57485587Sobrien return(1); 57585587Sobrien } 57685587Sobrien nnextin: 57785587Sobrien s = 2; 57885587Sobrien if (f->reset) { 57985587Sobrien for (i = 2; i <= f->curstat; i++) 58085587Sobrien xfree(f->posns[i]); 58185587Sobrien k = *f->posns[0]; 58285587Sobrien if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL) 58385587Sobrien overflo("out of state space"); 58485587Sobrien for (i = 0; i <= k; i++) 58585587Sobrien (f->posns[2])[i] = (f->posns[0])[i]; 58685587Sobrien f->initstat = f->curstat = 2; 58785587Sobrien f->out[2] = f->out[0]; 58885587Sobrien for (i = 0; i < NCHARS; i++) 58985587Sobrien f->gototab[2][i] = 0; 59085587Sobrien } 59185587Sobrien p++; 59285587Sobrien } 59385587Sobrien return (0); 59485587Sobrien} 59585587Sobrien 596107806SobrienNode *reparse(const char *p) /* parses regular expression pointed to by p */ 59785587Sobrien{ /* uses relex() to scan regular expression */ 59885587Sobrien Node *np; 59985587Sobrien 60085587Sobrien dprintf( ("reparse <%s>\n", p) ); 60185587Sobrien lastre = prestr = (uschar *) p; /* prestr points to string to be parsed */ 60285587Sobrien rtok = relex(); 603107806Sobrien /* GNU compatibility: an empty regexp matches anything */ 60485587Sobrien if (rtok == '\0') 605107806Sobrien /* FATAL("empty regular expression"); previous */ 606107806Sobrien return(op2(ALL, NIL, NIL)); 60785587Sobrien np = regexp(); 60885587Sobrien if (rtok != '\0') 60985587Sobrien FATAL("syntax error in regular expression %s at %s", lastre, prestr); 61085587Sobrien return(np); 61185587Sobrien} 61285587Sobrien 61385587SobrienNode *regexp(void) /* top-level parse of reg expr */ 61485587Sobrien{ 61585587Sobrien return (alt(concat(primary()))); 61685587Sobrien} 61785587Sobrien 61885587SobrienNode *primary(void) 61985587Sobrien{ 62085587Sobrien Node *np; 62185587Sobrien 62285587Sobrien switch (rtok) { 62385587Sobrien case CHAR: 62485587Sobrien np = op2(CHAR, NIL, itonp(rlxval)); 62585587Sobrien rtok = relex(); 62685587Sobrien return (unary(np)); 62785587Sobrien case ALL: 62885587Sobrien rtok = relex(); 62985587Sobrien return (unary(op2(ALL, NIL, NIL))); 63085587Sobrien case DOT: 63185587Sobrien rtok = relex(); 63285587Sobrien return (unary(op2(DOT, NIL, NIL))); 63385587Sobrien case CCL: 63485587Sobrien np = op2(CCL, NIL, (Node*) cclenter((char *) rlxstr)); 63585587Sobrien rtok = relex(); 63685587Sobrien return (unary(np)); 63785587Sobrien case NCCL: 63885587Sobrien np = op2(NCCL, NIL, (Node *) cclenter((char *) rlxstr)); 63985587Sobrien rtok = relex(); 64085587Sobrien return (unary(np)); 64185587Sobrien case '^': 64285587Sobrien rtok = relex(); 64385587Sobrien return (unary(op2(CHAR, NIL, itonp(HAT)))); 64485587Sobrien case '$': 64585587Sobrien rtok = relex(); 64685587Sobrien return (unary(op2(CHAR, NIL, NIL))); 64785587Sobrien case '(': 64885587Sobrien rtok = relex(); 64985587Sobrien if (rtok == ')') { /* special pleading for () */ 65085587Sobrien rtok = relex(); 65185587Sobrien return unary(op2(CCL, NIL, (Node *) tostring(""))); 65285587Sobrien } 65385587Sobrien np = regexp(); 65485587Sobrien if (rtok == ')') { 65585587Sobrien rtok = relex(); 65685587Sobrien return (unary(np)); 65785587Sobrien } 65885587Sobrien else 65985587Sobrien FATAL("syntax error in regular expression %s at %s", lastre, prestr); 66085587Sobrien default: 66185587Sobrien FATAL("illegal primary in regular expression %s at %s", lastre, prestr); 66285587Sobrien } 66385587Sobrien return 0; /*NOTREACHED*/ 66485587Sobrien} 66585587Sobrien 66685587SobrienNode *concat(Node *np) 66785587Sobrien{ 66885587Sobrien switch (rtok) { 66985587Sobrien case CHAR: case DOT: case ALL: case CCL: case NCCL: case '$': case '(': 67085587Sobrien return (concat(op2(CAT, np, primary()))); 67185587Sobrien } 67285587Sobrien return (np); 67385587Sobrien} 67485587Sobrien 67585587SobrienNode *alt(Node *np) 67685587Sobrien{ 67785587Sobrien if (rtok == OR) { 67885587Sobrien rtok = relex(); 67985587Sobrien return (alt(op2(OR, np, concat(primary())))); 68085587Sobrien } 68185587Sobrien return (np); 68285587Sobrien} 68385587Sobrien 68485587SobrienNode *unary(Node *np) 68585587Sobrien{ 68685587Sobrien switch (rtok) { 68785587Sobrien case STAR: 68885587Sobrien rtok = relex(); 68985587Sobrien return (unary(op2(STAR, np, NIL))); 69085587Sobrien case PLUS: 69185587Sobrien rtok = relex(); 69285587Sobrien return (unary(op2(PLUS, np, NIL))); 69385587Sobrien case QUEST: 69485587Sobrien rtok = relex(); 69585587Sobrien return (unary(op2(QUEST, np, NIL))); 69685587Sobrien default: 69785587Sobrien return (np); 69885587Sobrien } 69985587Sobrien} 70085587Sobrien 70190902Sdes/* 70290902Sdes * Character class definitions conformant to the POSIX locale as 70390902Sdes * defined in IEEE P1003.1 draft 7 of June 2001, assuming the source 70490902Sdes * and operating character sets are both ASCII (ISO646) or supersets 70590902Sdes * thereof. 70690902Sdes * 70790902Sdes * Note that to avoid overflowing the temporary buffer used in 70890902Sdes * relex(), the expanded character class (prior to range expansion) 70990902Sdes * must be less than twice the size of their full name. 71090902Sdes */ 711112336Sobrien 712112336Sobrien/* Because isblank doesn't show up in any of the header files on any 713112336Sobrien * system i use, it's defined here. if some other locale has a richer 714112336Sobrien * definition of "blank", define HAS_ISBLANK and provide your own 715112336Sobrien * version. 716118194Sru * the parentheses here are an attempt to find a path through the maze 717118194Sru * of macro definition and/or function and/or version provided. thanks 718118194Sru * to nelson beebe for the suggestion; let's see if it works everywhere. 719112336Sobrien */ 720112336Sobrien 721112336Sobrien#ifndef HAS_ISBLANK 722112336Sobrien 723118194Sruint (isblank)(int c) 724112336Sobrien{ 725112336Sobrien return c==' ' || c=='\t'; 726112336Sobrien} 727112336Sobrien 728112336Sobrien#endif 729112336Sobrien 73090902Sdesstruct charclass { 73190902Sdes const char *cc_name; 73290902Sdes int cc_namelen; 733112336Sobrien int (*cc_func)(int); 73490902Sdes} charclasses[] = { 735112336Sobrien { "alnum", 5, isalnum }, 736112336Sobrien { "alpha", 5, isalpha }, 737112336Sobrien { "blank", 5, isblank }, 738112336Sobrien { "cntrl", 5, iscntrl }, 739112336Sobrien { "digit", 5, isdigit }, 740112336Sobrien { "graph", 5, isgraph }, 741112336Sobrien { "lower", 5, islower }, 742112336Sobrien { "print", 5, isprint }, 743112336Sobrien { "punct", 5, ispunct }, 744112336Sobrien { "space", 5, isspace }, 745112336Sobrien { "upper", 5, isupper }, 746112336Sobrien { "xdigit", 6, isxdigit }, 74790902Sdes { NULL, 0, NULL }, 74890902Sdes}; 74990902Sdes 75090902Sdes 75185587Sobrienint relex(void) /* lexical analyzer for reparse */ 75285587Sobrien{ 75385587Sobrien int c, n; 75485587Sobrien int cflag; 75585587Sobrien static uschar *buf = 0; 75685587Sobrien static int bufsz = 100; 75785587Sobrien uschar *bp; 75890902Sdes struct charclass *cc; 759112336Sobrien int i; 76085587Sobrien 76185587Sobrien switch (c = *prestr++) { 76285587Sobrien case '|': return OR; 76385587Sobrien case '*': return STAR; 76485587Sobrien case '+': return PLUS; 76585587Sobrien case '?': return QUEST; 76685587Sobrien case '.': return DOT; 76785587Sobrien case '\0': prestr--; return '\0'; 76885587Sobrien case '^': 76985587Sobrien case '$': 77085587Sobrien case '(': 77185587Sobrien case ')': 77285587Sobrien return c; 77385587Sobrien case '\\': 77485587Sobrien rlxval = quoted((char **) &prestr); 77585587Sobrien return CHAR; 77685587Sobrien default: 77785587Sobrien rlxval = c; 77885587Sobrien return CHAR; 77985587Sobrien case '[': 78085587Sobrien if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL) 78185587Sobrien FATAL("out of space in reg expr %.10s..", lastre); 78285587Sobrien bp = buf; 78385587Sobrien if (*prestr == '^') { 78485587Sobrien cflag = 1; 78585587Sobrien prestr++; 78685587Sobrien } 78785587Sobrien else 78885587Sobrien cflag = 0; 78990902Sdes n = 2 * strlen((const char *) prestr)+1; 79085587Sobrien if (!adjbuf((char **) &buf, &bufsz, n, n, (char **) &bp, 0)) 79185587Sobrien FATAL("out of space for reg expr %.10s...", lastre); 79285587Sobrien for (; ; ) { 79385587Sobrien if ((c = *prestr++) == '\\') { 79485587Sobrien *bp++ = '\\'; 79585587Sobrien if ((c = *prestr++) == '\0') 79685587Sobrien FATAL("nonterminated character class %.20s...", lastre); 79785587Sobrien *bp++ = c; 79885587Sobrien /* } else if (c == '\n') { */ 79985587Sobrien /* FATAL("newline in character class %.20s...", lastre); */ 80090902Sdes } else if (c == '[' && *prestr == ':') { 80190902Sdes /* POSIX char class names, Dag-Erling Smorgrav, des@ofug.org */ 80290902Sdes for (cc = charclasses; cc->cc_name; cc++) 80390902Sdes if (strncmp((const char *) prestr + 1, (const char *) cc->cc_name, cc->cc_namelen) == 0) 80490902Sdes break; 80590902Sdes if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' && 80690902Sdes prestr[2 + cc->cc_namelen] == ']') { 80790902Sdes prestr += cc->cc_namelen + 3; 808112336Sobrien for (i = 0; i < NCHARS; i++) { 809112336Sobrien if (!adjbuf((char **) &buf, &bufsz, bp-buf+1, 100, (char **) &bp, 0)) 810112336Sobrien FATAL("out of space for reg expr %.10s...", lastre); 811112336Sobrien if (cc->cc_func(i)) { 812112336Sobrien *bp++ = i; 813112336Sobrien n++; 814112336Sobrien } 815112336Sobrien } 81690902Sdes } else 81790902Sdes *bp++ = c; 81885587Sobrien } else if (c == '\0') { 81985587Sobrien FATAL("nonterminated character class %.20s", lastre); 82085587Sobrien } else if (bp == buf) { /* 1st char is special */ 82185587Sobrien *bp++ = c; 82285587Sobrien } else if (c == ']') { 82385587Sobrien *bp++ = 0; 82485587Sobrien rlxstr = (uschar *) tostring((char *) buf); 82585587Sobrien if (cflag == 0) 82685587Sobrien return CCL; 82785587Sobrien else 82885587Sobrien return NCCL; 82985587Sobrien } else 83085587Sobrien *bp++ = c; 83185587Sobrien } 83285587Sobrien } 83385587Sobrien} 83485587Sobrien 83585587Sobrienint cgoto(fa *f, int s, int c) 83685587Sobrien{ 83785587Sobrien int i, j, k; 83885587Sobrien int *p, *q; 83985587Sobrien 840146299Sru assert(c == HAT || c < NCHARS); 84185587Sobrien while (f->accept >= maxsetvec) { /* guessing here! */ 84285587Sobrien maxsetvec *= 4; 84385587Sobrien setvec = (int *) realloc(setvec, maxsetvec * sizeof(int)); 84485587Sobrien tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int)); 84585587Sobrien if (setvec == 0 || tmpset == 0) 84685587Sobrien overflo("out of space in cgoto()"); 84785587Sobrien } 84885587Sobrien for (i = 0; i <= f->accept; i++) 84985587Sobrien setvec[i] = 0; 85085587Sobrien setcnt = 0; 85185587Sobrien /* compute positions of gototab[s,c] into setvec */ 85285587Sobrien p = f->posns[s]; 85385587Sobrien for (i = 1; i <= *p; i++) { 85485587Sobrien if ((k = f->re[p[i]].ltype) != FINAL) { 85585587Sobrien if ((k == CHAR && c == ptoi(f->re[p[i]].lval.np)) 85685587Sobrien || (k == DOT && c != 0 && c != HAT) 85785587Sobrien || (k == ALL && c != 0) 85885587Sobrien || (k == CCL && member(c, (char *) f->re[p[i]].lval.up)) 85985587Sobrien || (k == NCCL && !member(c, (char *) f->re[p[i]].lval.up) && c != 0 && c != HAT)) { 86085587Sobrien q = f->re[p[i]].lfollow; 86185587Sobrien for (j = 1; j <= *q; j++) { 86285587Sobrien if (q[j] >= maxsetvec) { 86385587Sobrien maxsetvec *= 4; 86485587Sobrien setvec = (int *) realloc(setvec, maxsetvec * sizeof(int)); 86585587Sobrien tmpset = (int *) realloc(setvec, maxsetvec * sizeof(int)); 86685587Sobrien if (setvec == 0 || tmpset == 0) 86785587Sobrien overflo("cgoto overflow"); 86885587Sobrien } 86985587Sobrien if (setvec[q[j]] == 0) { 87085587Sobrien setcnt++; 87185587Sobrien setvec[q[j]] = 1; 87285587Sobrien } 87385587Sobrien } 87485587Sobrien } 87585587Sobrien } 87685587Sobrien } 87785587Sobrien /* determine if setvec is a previous state */ 87885587Sobrien tmpset[0] = setcnt; 87985587Sobrien j = 1; 88085587Sobrien for (i = f->accept; i >= 0; i--) 88185587Sobrien if (setvec[i]) { 88285587Sobrien tmpset[j++] = i; 88385587Sobrien } 88485587Sobrien /* tmpset == previous state? */ 88585587Sobrien for (i = 1; i <= f->curstat; i++) { 88685587Sobrien p = f->posns[i]; 88785587Sobrien if ((k = tmpset[0]) != p[0]) 88885587Sobrien goto different; 88985587Sobrien for (j = 1; j <= k; j++) 89085587Sobrien if (tmpset[j] != p[j]) 89185587Sobrien goto different; 89285587Sobrien /* setvec is state i */ 89385587Sobrien f->gototab[s][c] = i; 89485587Sobrien return i; 89585587Sobrien different:; 89685587Sobrien } 89785587Sobrien 89885587Sobrien /* add tmpset to current set of states */ 89985587Sobrien if (f->curstat >= NSTATES-1) { 90085587Sobrien f->curstat = 2; 90185587Sobrien f->reset = 1; 90285587Sobrien for (i = 2; i < NSTATES; i++) 90385587Sobrien xfree(f->posns[i]); 90485587Sobrien } else 90585587Sobrien ++(f->curstat); 90685587Sobrien for (i = 0; i < NCHARS; i++) 90785587Sobrien f->gototab[f->curstat][i] = 0; 90885587Sobrien xfree(f->posns[f->curstat]); 90985587Sobrien if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL) 91085587Sobrien overflo("out of space in cgoto"); 91185587Sobrien 91285587Sobrien f->posns[f->curstat] = p; 91385587Sobrien f->gototab[s][c] = f->curstat; 91485587Sobrien for (i = 0; i <= setcnt; i++) 91585587Sobrien p[i] = tmpset[i]; 91685587Sobrien if (setvec[f->accept]) 91785587Sobrien f->out[f->curstat] = 1; 91885587Sobrien else 91985587Sobrien f->out[f->curstat] = 0; 92085587Sobrien return f->curstat; 92185587Sobrien} 92285587Sobrien 92385587Sobrien 92485587Sobrienvoid freefa(fa *f) /* free a finite automaton */ 92585587Sobrien{ 92685587Sobrien int i; 92785587Sobrien 92885587Sobrien if (f == NULL) 92985587Sobrien return; 93085587Sobrien for (i = 0; i <= f->curstat; i++) 93185587Sobrien xfree(f->posns[i]); 93285587Sobrien for (i = 0; i <= f->accept; i++) { 93385587Sobrien xfree(f->re[i].lfollow); 93485587Sobrien if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL) 93585587Sobrien xfree((f->re[i].lval.np)); 93685587Sobrien } 93785587Sobrien xfree(f->restr); 93885587Sobrien xfree(f); 93985587Sobrien} 940