b.c revision 170331
185587Sobrien/****************************************************************
285587SobrienCopyright (C) Lucent Technologies 1997
385587SobrienAll Rights Reserved
485587Sobrien
585587SobrienPermission to use, copy, modify, and distribute this software and
685587Sobrienits documentation for any purpose and without fee is hereby
785587Sobriengranted, provided that the above copyright notice appear in all
885587Sobriencopies and that both that the copyright notice and this
985587Sobrienpermission notice and warranty disclaimer appear in supporting
1085587Sobriendocumentation, and that the name Lucent Technologies or any of
1185587Sobrienits entities not be used in advertising or publicity pertaining
1285587Sobriento distribution of the software without specific, written prior
1385587Sobrienpermission.
1485587Sobrien
1585587SobrienLUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
1685587SobrienINCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
1785587SobrienIN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
1885587SobrienSPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
1985587SobrienWHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
2085587SobrienIN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
2185587SobrienARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
2285587SobrienTHIS SOFTWARE.
2385587Sobrien****************************************************************/
2485587Sobrien
25170331Srafan/* lasciate ogne speranza, voi ch'intrate. */
2685587Sobrien
2785587Sobrien#define	DEBUG
2885587Sobrien
2985587Sobrien#include <ctype.h>
3085587Sobrien#include <stdio.h>
3185587Sobrien#include <string.h>
3285587Sobrien#include <stdlib.h>
3385587Sobrien#include "awk.h"
3485587Sobrien#include "ytab.h"
3585587Sobrien
36118194Sru#define	HAT	(NCHARS+2)	/* matches ^ in regular expr */
3785587Sobrien				/* NCHARS is 2**n */
3885587Sobrien#define MAXLIN 22
3985587Sobrien
4085587Sobrien#define type(v)		(v)->nobj	/* badly overloaded here */
4185587Sobrien#define info(v)		(v)->ntype	/* badly overloaded here */
4285587Sobrien#define left(v)		(v)->narg[0]
4385587Sobrien#define right(v)	(v)->narg[1]
4485587Sobrien#define parent(v)	(v)->nnext
4585587Sobrien
4685587Sobrien#define LEAF	case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL:
47170331Srafan#define ELEAF	case EMPTYRE:		/* empty string in regexp */
4885587Sobrien#define UNARY	case STAR: case PLUS: case QUEST:
4985587Sobrien
5085587Sobrien/* encoding in tree Nodes:
51170331Srafan	leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL, EMPTYRE):
5285587Sobrien		left is index, right contains value or pointer to value
5385587Sobrien	unary (STAR, PLUS, QUEST): left is child, right is null
5485587Sobrien	binary (CAT, OR): left and right are children
5585587Sobrien	parent contains pointer to parent
5685587Sobrien*/
5785587Sobrien
5885587Sobrien
5985587Sobrienint	*setvec;
6085587Sobrienint	*tmpset;
6185587Sobrienint	maxsetvec = 0;
6285587Sobrien
6385587Sobrienint	rtok;		/* next token in current re */
6485587Sobrienint	rlxval;
6585587Sobrienstatic uschar	*rlxstr;
6685587Sobrienstatic uschar	*prestr;	/* current position in current re */
6785587Sobrienstatic uschar	*lastre;	/* origin of last re */
6885587Sobrien
6985587Sobrienstatic	int setcnt;
7085587Sobrienstatic	int poscnt;
7185587Sobrien
7285587Sobrienchar	*patbeg;
7385587Sobrienint	patlen;
7485587Sobrien
7585587Sobrien#define	NFA	20	/* cache this many dynamic fa's */
7685587Sobrienfa	*fatab[NFA];
7785587Sobrienint	nfatab	= 0;	/* entries in fatab */
7885587Sobrien
79107806Sobrienfa *makedfa(const char *s, int anchor)	/* returns dfa for reg expr s */
8085587Sobrien{
8185587Sobrien	int i, use, nuse;
8285587Sobrien	fa *pfa;
8385587Sobrien	static int now = 1;
8485587Sobrien
8585587Sobrien	if (setvec == 0) {	/* first time through any RE */
8685587Sobrien		maxsetvec = MAXLIN;
8785587Sobrien		setvec = (int *) malloc(maxsetvec * sizeof(int));
8885587Sobrien		tmpset = (int *) malloc(maxsetvec * sizeof(int));
8985587Sobrien		if (setvec == 0 || tmpset == 0)
9085587Sobrien			overflo("out of space initializing makedfa");
9185587Sobrien	}
9285587Sobrien
9385587Sobrien	if (compile_time)	/* a constant for sure */
9485587Sobrien		return mkdfa(s, anchor);
9585587Sobrien	for (i = 0; i < nfatab; i++)	/* is it there already? */
9685587Sobrien		if (fatab[i]->anchor == anchor
9790902Sdes		  && strcmp((const char *) fatab[i]->restr, s) == 0) {
9885587Sobrien			fatab[i]->use = now++;
9985587Sobrien			return fatab[i];
10085587Sobrien		}
10185587Sobrien	pfa = mkdfa(s, anchor);
10285587Sobrien	if (nfatab < NFA) {	/* room for another */
10385587Sobrien		fatab[nfatab] = pfa;
10485587Sobrien		fatab[nfatab]->use = now++;
10585587Sobrien		nfatab++;
10685587Sobrien		return pfa;
10785587Sobrien	}
10885587Sobrien	use = fatab[0]->use;	/* replace least-recently used */
10985587Sobrien	nuse = 0;
11085587Sobrien	for (i = 1; i < nfatab; i++)
11185587Sobrien		if (fatab[i]->use < use) {
11285587Sobrien			use = fatab[i]->use;
11385587Sobrien			nuse = i;
11485587Sobrien		}
11585587Sobrien	freefa(fatab[nuse]);
11685587Sobrien	fatab[nuse] = pfa;
11785587Sobrien	pfa->use = now++;
11885587Sobrien	return pfa;
11985587Sobrien}
12085587Sobrien
121107806Sobrienfa *mkdfa(const char *s, int anchor)	/* does the real work of making a dfa */
12285587Sobrien				/* anchor = 1 for anchored matches, else 0 */
12385587Sobrien{
12485587Sobrien	Node *p, *p1;
12585587Sobrien	fa *f;
12685587Sobrien
12785587Sobrien	p = reparse(s);
12885587Sobrien	p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p);
12985587Sobrien		/* put ALL STAR in front of reg.  exp. */
13085587Sobrien	p1 = op2(CAT, p1, op2(FINAL, NIL, NIL));
13185587Sobrien		/* put FINAL after reg.  exp. */
13285587Sobrien
13385587Sobrien	poscnt = 0;
13485587Sobrien	penter(p1);	/* enter parent pointers and leaf indices */
13585587Sobrien	if ((f = (fa *) calloc(1, sizeof(fa) + poscnt*sizeof(rrow))) == NULL)
13685587Sobrien		overflo("out of space for fa");
13785587Sobrien	f->accept = poscnt-1;	/* penter has computed number of positions in re */
13885587Sobrien	cfoll(f, p1);	/* set up follow sets */
13985587Sobrien	freetr(p1);
14085587Sobrien	if ((f->posns[0] = (int *) calloc(1, *(f->re[0].lfollow)*sizeof(int))) == NULL)
14185587Sobrien			overflo("out of space in makedfa");
14285587Sobrien	if ((f->posns[1] = (int *) calloc(1, sizeof(int))) == NULL)
14385587Sobrien		overflo("out of space in makedfa");
14485587Sobrien	*f->posns[1] = 0;
14585587Sobrien	f->initstat = makeinit(f, anchor);
14685587Sobrien	f->anchor = anchor;
14785587Sobrien	f->restr = (uschar *) tostring(s);
14885587Sobrien	return f;
14985587Sobrien}
15085587Sobrien
15185587Sobrienint makeinit(fa *f, int anchor)
15285587Sobrien{
15385587Sobrien	int i, k;
15485587Sobrien
15585587Sobrien	f->curstat = 2;
15685587Sobrien	f->out[2] = 0;
15785587Sobrien	f->reset = 0;
15885587Sobrien	k = *(f->re[0].lfollow);
15985587Sobrien	xfree(f->posns[2]);
16085587Sobrien	if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
16185587Sobrien		overflo("out of space in makeinit");
16285587Sobrien	for (i=0; i <= k; i++) {
16385587Sobrien		(f->posns[2])[i] = (f->re[0].lfollow)[i];
16485587Sobrien	}
16585587Sobrien	if ((f->posns[2])[1] == f->accept)
16685587Sobrien		f->out[2] = 1;
16785587Sobrien	for (i=0; i < NCHARS; i++)
16885587Sobrien		f->gototab[2][i] = 0;
16985587Sobrien	f->curstat = cgoto(f, 2, HAT);
17085587Sobrien	if (anchor) {
17185587Sobrien		*f->posns[2] = k-1;	/* leave out position 0 */
17285587Sobrien		for (i=0; i < k; i++) {
17385587Sobrien			(f->posns[0])[i] = (f->posns[2])[i];
17485587Sobrien		}
17585587Sobrien
17685587Sobrien		f->out[0] = f->out[2];
17785587Sobrien		if (f->curstat != 2)
17885587Sobrien			--(*f->posns[f->curstat]);
17985587Sobrien	}
18085587Sobrien	return f->curstat;
18185587Sobrien}
18285587Sobrien
18385587Sobrienvoid penter(Node *p)	/* set up parent pointers and leaf indices */
18485587Sobrien{
18585587Sobrien	switch (type(p)) {
186170331Srafan	ELEAF
18785587Sobrien	LEAF
18885587Sobrien		info(p) = poscnt;
18985587Sobrien		poscnt++;
19085587Sobrien		break;
19185587Sobrien	UNARY
19285587Sobrien		penter(left(p));
19385587Sobrien		parent(left(p)) = p;
19485587Sobrien		break;
19585587Sobrien	case CAT:
19685587Sobrien	case OR:
19785587Sobrien		penter(left(p));
19885587Sobrien		penter(right(p));
19985587Sobrien		parent(left(p)) = p;
20085587Sobrien		parent(right(p)) = p;
20185587Sobrien		break;
20285587Sobrien	default:	/* can't happen */
20385587Sobrien		FATAL("can't happen: unknown type %d in penter", type(p));
20485587Sobrien		break;
20585587Sobrien	}
20685587Sobrien}
20785587Sobrien
20885587Sobrienvoid freetr(Node *p)	/* free parse tree */
20985587Sobrien{
21085587Sobrien	switch (type(p)) {
211170331Srafan	ELEAF
21285587Sobrien	LEAF
21385587Sobrien		xfree(p);
21485587Sobrien		break;
21585587Sobrien	UNARY
21685587Sobrien		freetr(left(p));
21785587Sobrien		xfree(p);
21885587Sobrien		break;
21985587Sobrien	case CAT:
22085587Sobrien	case OR:
22185587Sobrien		freetr(left(p));
22285587Sobrien		freetr(right(p));
22385587Sobrien		xfree(p);
22485587Sobrien		break;
22585587Sobrien	default:	/* can't happen */
22685587Sobrien		FATAL("can't happen: unknown type %d in freetr", type(p));
22785587Sobrien		break;
22885587Sobrien	}
22985587Sobrien}
23085587Sobrien
23185587Sobrien/* in the parsing of regular expressions, metacharacters like . have */
23285587Sobrien/* to be seen literally;  \056 is not a metacharacter. */
23385587Sobrien
23485587Sobrienint hexstr(char **pp)	/* find and eval hex string at pp, return new p */
23585587Sobrien{			/* only pick up one 8-bit byte (2 chars) */
23685587Sobrien	uschar *p;
23785587Sobrien	int n = 0;
23885587Sobrien	int i;
23985587Sobrien
24085587Sobrien	for (i = 0, p = (uschar *) *pp; i < 2 && isxdigit(*p); i++, p++) {
24185587Sobrien		if (isdigit(*p))
24285587Sobrien			n = 16 * n + *p - '0';
24385587Sobrien		else if (*p >= 'a' && *p <= 'f')
24485587Sobrien			n = 16 * n + *p - 'a' + 10;
24585587Sobrien		else if (*p >= 'A' && *p <= 'F')
24685587Sobrien			n = 16 * n + *p - 'A' + 10;
24785587Sobrien	}
24885587Sobrien	*pp = (char *) p;
24985587Sobrien	return n;
25085587Sobrien}
25185587Sobrien
25285587Sobrien#define isoctdigit(c) ((c) >= '0' && (c) <= '7')	/* multiple use of arg */
25385587Sobrien
25485587Sobrienint quoted(char **pp)	/* pick up next thing after a \\ */
25585587Sobrien			/* and increment *pp */
25685587Sobrien{
25785587Sobrien	char *p = *pp;
25885587Sobrien	int c;
25985587Sobrien
26085587Sobrien	if ((c = *p++) == 't')
26185587Sobrien		c = '\t';
26285587Sobrien	else if (c == 'n')
26385587Sobrien		c = '\n';
26485587Sobrien	else if (c == 'f')
26585587Sobrien		c = '\f';
26685587Sobrien	else if (c == 'r')
26785587Sobrien		c = '\r';
26885587Sobrien	else if (c == 'b')
26985587Sobrien		c = '\b';
27085587Sobrien	else if (c == '\\')
27185587Sobrien		c = '\\';
27285587Sobrien	else if (c == 'x') {	/* hexadecimal goo follows */
27385587Sobrien		c = hexstr(&p);	/* this adds a null if number is invalid */
27485587Sobrien	} else if (isoctdigit(c)) {	/* \d \dd \ddd */
27585587Sobrien		int n = c - '0';
27685587Sobrien		if (isoctdigit(*p)) {
27785587Sobrien			n = 8 * n + *p++ - '0';
27885587Sobrien			if (isoctdigit(*p))
27985587Sobrien				n = 8 * n + *p++ - '0';
28085587Sobrien		}
28185587Sobrien		c = n;
28285587Sobrien	} /* else */
28385587Sobrien		/* c = c; */
28485587Sobrien	*pp = p;
28585587Sobrien	return c;
28685587Sobrien}
28785587Sobrien
288107806Sobrienchar *cclenter(const char *argp)	/* add a character class */
289107806Sobrien{
29085587Sobrien	int i, c, c2;
29185587Sobrien	uschar *p = (uschar *) argp;
29285587Sobrien	uschar *op, *bp;
29385587Sobrien	static uschar *buf = 0;
29485587Sobrien	static int bufsz = 100;
29585587Sobrien
29685587Sobrien	op = p;
29785587Sobrien	if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
29885587Sobrien		FATAL("out of space for character class [%.10s...] 1", p);
29985587Sobrien	bp = buf;
30085587Sobrien	for (i = 0; (c = *p++) != 0; ) {
30185587Sobrien		if (c == '\\') {
30285587Sobrien			c = quoted((char **) &p);
30385587Sobrien		} else if (c == '-' && i > 0 && bp[-1] != 0) {
30485587Sobrien			if (*p != 0) {
30585587Sobrien				c = bp[-1];
30685587Sobrien				c2 = *p++;
30785587Sobrien				if (c2 == '\\')
30885587Sobrien					c2 = quoted((char **) &p);
309118194Sru				if (c > c2) {	/* empty; ignore */
31085587Sobrien					bp--;
31185587Sobrien					i--;
31285587Sobrien					continue;
31385587Sobrien				}
314118194Sru				while (c < c2) {
315170331Srafan					if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, "cclenter1"))
31685587Sobrien						FATAL("out of space for character class [%.10s...] 2", p);
317118194Sru					*bp++ = ++c;
31885587Sobrien					i++;
31985587Sobrien				}
32085587Sobrien				continue;
32185587Sobrien			}
32285587Sobrien		}
323170331Srafan		if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, "cclenter2"))
32485587Sobrien			FATAL("out of space for character class [%.10s...] 3", p);
32585587Sobrien		*bp++ = c;
32685587Sobrien		i++;
32785587Sobrien	}
32885587Sobrien	*bp = 0;
32985587Sobrien	dprintf( ("cclenter: in = |%s|, out = |%s|\n", op, buf) );
33085587Sobrien	xfree(op);
33185587Sobrien	return (char *) tostring((char *) buf);
33285587Sobrien}
33385587Sobrien
334107806Sobrienvoid overflo(const char *s)
33585587Sobrien{
33685587Sobrien	FATAL("regular expression too big: %.30s...", s);
33785587Sobrien}
33885587Sobrien
33985587Sobrienvoid cfoll(fa *f, Node *v)	/* enter follow set of each leaf of vertex v into lfollow[leaf] */
34085587Sobrien{
34185587Sobrien	int i;
34285587Sobrien	int *p;
34385587Sobrien
34485587Sobrien	switch (type(v)) {
345170331Srafan	ELEAF
34685587Sobrien	LEAF
34785587Sobrien		f->re[info(v)].ltype = type(v);
34885587Sobrien		f->re[info(v)].lval.np = right(v);
34985587Sobrien		while (f->accept >= maxsetvec) {	/* guessing here! */
35085587Sobrien			maxsetvec *= 4;
35185587Sobrien			setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
35285587Sobrien			tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
35385587Sobrien			if (setvec == 0 || tmpset == 0)
35485587Sobrien				overflo("out of space in cfoll()");
35585587Sobrien		}
35685587Sobrien		for (i = 0; i <= f->accept; i++)
35785587Sobrien			setvec[i] = 0;
35885587Sobrien		setcnt = 0;
35985587Sobrien		follow(v);	/* computes setvec and setcnt */
36085587Sobrien		if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
36185587Sobrien			overflo("out of space building follow set");
36285587Sobrien		f->re[info(v)].lfollow = p;
36385587Sobrien		*p = setcnt;
36485587Sobrien		for (i = f->accept; i >= 0; i--)
36585587Sobrien			if (setvec[i] == 1)
36685587Sobrien				*++p = i;
36785587Sobrien		break;
36885587Sobrien	UNARY
36985587Sobrien		cfoll(f,left(v));
37085587Sobrien		break;
37185587Sobrien	case CAT:
37285587Sobrien	case OR:
37385587Sobrien		cfoll(f,left(v));
37485587Sobrien		cfoll(f,right(v));
37585587Sobrien		break;
37685587Sobrien	default:	/* can't happen */
37785587Sobrien		FATAL("can't happen: unknown type %d in cfoll", type(v));
37885587Sobrien	}
37985587Sobrien}
38085587Sobrien
38185587Sobrienint first(Node *p)	/* collects initially active leaves of p into setvec */
382170331Srafan			/* returns 0 if p matches empty string */
38385587Sobrien{
38485587Sobrien	int b, lp;
38585587Sobrien
38685587Sobrien	switch (type(p)) {
387170331Srafan	ELEAF
38885587Sobrien	LEAF
38985587Sobrien		lp = info(p);	/* look for high-water mark of subscripts */
39085587Sobrien		while (setcnt >= maxsetvec || lp >= maxsetvec) {	/* guessing here! */
39185587Sobrien			maxsetvec *= 4;
39285587Sobrien			setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
39385587Sobrien			tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
39485587Sobrien			if (setvec == 0 || tmpset == 0)
39585587Sobrien				overflo("out of space in first()");
39685587Sobrien		}
397170331Srafan		if (type(p) == EMPTYRE) {
398170331Srafan			setvec[lp] = 0;
399170331Srafan			return(0);
400170331Srafan		}
40185587Sobrien		if (setvec[lp] != 1) {
40285587Sobrien			setvec[lp] = 1;
40385587Sobrien			setcnt++;
40485587Sobrien		}
40585587Sobrien		if (type(p) == CCL && (*(char *) right(p)) == '\0')
40685587Sobrien			return(0);		/* empty CCL */
40785587Sobrien		else return(1);
40885587Sobrien	case PLUS:
40985587Sobrien		if (first(left(p)) == 0) return(0);
41085587Sobrien		return(1);
41185587Sobrien	case STAR:
41285587Sobrien	case QUEST:
41385587Sobrien		first(left(p));
41485587Sobrien		return(0);
41585587Sobrien	case CAT:
41685587Sobrien		if (first(left(p)) == 0 && first(right(p)) == 0) return(0);
41785587Sobrien		return(1);
41885587Sobrien	case OR:
41985587Sobrien		b = first(right(p));
42085587Sobrien		if (first(left(p)) == 0 || b == 0) return(0);
42185587Sobrien		return(1);
42285587Sobrien	}
42385587Sobrien	FATAL("can't happen: unknown type %d in first", type(p));	/* can't happen */
42485587Sobrien	return(-1);
42585587Sobrien}
42685587Sobrien
42785587Sobrienvoid follow(Node *v)	/* collects leaves that can follow v into setvec */
42885587Sobrien{
42985587Sobrien	Node *p;
43085587Sobrien
43185587Sobrien	if (type(v) == FINAL)
43285587Sobrien		return;
43385587Sobrien	p = parent(v);
43485587Sobrien	switch (type(p)) {
43585587Sobrien	case STAR:
43685587Sobrien	case PLUS:
43785587Sobrien		first(v);
43885587Sobrien		follow(p);
43985587Sobrien		return;
44085587Sobrien
44185587Sobrien	case OR:
44285587Sobrien	case QUEST:
44385587Sobrien		follow(p);
44485587Sobrien		return;
44585587Sobrien
44685587Sobrien	case CAT:
44785587Sobrien		if (v == left(p)) {	/* v is left child of p */
44885587Sobrien			if (first(right(p)) == 0) {
44985587Sobrien				follow(p);
45085587Sobrien				return;
45185587Sobrien			}
45285587Sobrien		} else		/* v is right child */
45385587Sobrien			follow(p);
45485587Sobrien		return;
45585587Sobrien	}
45685587Sobrien}
45785587Sobrien
458107806Sobrienint member(int c, const char *sarg)	/* is c in s? */
45985587Sobrien{
46085587Sobrien	uschar *s = (uschar *) sarg;
46185587Sobrien
46285587Sobrien	while (*s)
46385587Sobrien		if (c == *s++)
46485587Sobrien			return(1);
46585587Sobrien	return(0);
46685587Sobrien}
46785587Sobrien
468107806Sobrienint match(fa *f, const char *p0)	/* shortest match ? */
46985587Sobrien{
47085587Sobrien	int s, ns;
47185587Sobrien	uschar *p = (uschar *) p0;
47285587Sobrien
47385587Sobrien	s = f->reset ? makeinit(f,0) : f->initstat;
47485587Sobrien	if (f->out[s])
47585587Sobrien		return(1);
47685587Sobrien	do {
477170331Srafan		/* assert(*p < NCHARS); */
47885587Sobrien		if ((ns = f->gototab[s][*p]) != 0)
47985587Sobrien			s = ns;
48085587Sobrien		else
48185587Sobrien			s = cgoto(f, s, *p);
48285587Sobrien		if (f->out[s])
48385587Sobrien			return(1);
48485587Sobrien	} while (*p++ != 0);
48585587Sobrien	return(0);
48685587Sobrien}
48785587Sobrien
488107806Sobrienint pmatch(fa *f, const char *p0)	/* longest match, for sub */
48985587Sobrien{
49085587Sobrien	int s, ns;
49185587Sobrien	uschar *p = (uschar *) p0;
49285587Sobrien	uschar *q;
49385587Sobrien	int i, k;
49485587Sobrien
495125601Sru	/* s = f->reset ? makeinit(f,1) : f->initstat; */
496125601Sru	if (f->reset) {
497125601Sru		f->initstat = s = makeinit(f,1);
498125601Sru	} else {
499125601Sru		s = f->initstat;
500125601Sru	}
50185587Sobrien	patbeg = (char *) p;
50285587Sobrien	patlen = -1;
50385587Sobrien	do {
50485587Sobrien		q = p;
50585587Sobrien		do {
50685587Sobrien			if (f->out[s])		/* final state */
50785587Sobrien				patlen = q-p;
508170331Srafan			/* assert(*q < NCHARS); */
50985587Sobrien			if ((ns = f->gototab[s][*q]) != 0)
51085587Sobrien				s = ns;
51185587Sobrien			else
51285587Sobrien				s = cgoto(f, s, *q);
51385587Sobrien			if (s == 1) {	/* no transition */
51485587Sobrien				if (patlen >= 0) {
51585587Sobrien					patbeg = (char *) p;
51685587Sobrien					return(1);
51785587Sobrien				}
51885587Sobrien				else
51985587Sobrien					goto nextin;	/* no match */
52085587Sobrien			}
52185587Sobrien		} while (*q++ != 0);
52285587Sobrien		if (f->out[s])
52385587Sobrien			patlen = q-p-1;	/* don't count $ */
52485587Sobrien		if (patlen >= 0) {
52585587Sobrien			patbeg = (char *) p;
52685587Sobrien			return(1);
52785587Sobrien		}
52885587Sobrien	nextin:
52985587Sobrien		s = 2;
53085587Sobrien		if (f->reset) {
53185587Sobrien			for (i = 2; i <= f->curstat; i++)
53285587Sobrien				xfree(f->posns[i]);
53385587Sobrien			k = *f->posns[0];
53485587Sobrien			if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
53585587Sobrien				overflo("out of space in pmatch");
53685587Sobrien			for (i = 0; i <= k; i++)
53785587Sobrien				(f->posns[2])[i] = (f->posns[0])[i];
53885587Sobrien			f->initstat = f->curstat = 2;
53985587Sobrien			f->out[2] = f->out[0];
54085587Sobrien			for (i = 0; i < NCHARS; i++)
54185587Sobrien				f->gototab[2][i] = 0;
54285587Sobrien		}
54385587Sobrien	} while (*p++ != 0);
54485587Sobrien	return (0);
54585587Sobrien}
54685587Sobrien
547107806Sobrienint nematch(fa *f, const char *p0)	/* non-empty match, for sub */
54885587Sobrien{
54985587Sobrien	int s, ns;
55085587Sobrien	uschar *p = (uschar *) p0;
55185587Sobrien	uschar *q;
55285587Sobrien	int i, k;
55385587Sobrien
554125601Sru	/* s = f->reset ? makeinit(f,1) : f->initstat; */
555125601Sru	if (f->reset) {
556125601Sru		f->initstat = s = makeinit(f,1);
557125601Sru	} else {
558125601Sru		s = f->initstat;
559125601Sru	}
56085587Sobrien	patlen = -1;
56185587Sobrien	while (*p) {
56285587Sobrien		q = p;
56385587Sobrien		do {
56485587Sobrien			if (f->out[s])		/* final state */
56585587Sobrien				patlen = q-p;
566170331Srafan			/* assert(*q < NCHARS); */
56785587Sobrien			if ((ns = f->gototab[s][*q]) != 0)
56885587Sobrien				s = ns;
56985587Sobrien			else
57085587Sobrien				s = cgoto(f, s, *q);
57185587Sobrien			if (s == 1) {	/* no transition */
57285587Sobrien				if (patlen > 0) {
57385587Sobrien					patbeg = (char *) p;
57485587Sobrien					return(1);
57585587Sobrien				} else
57685587Sobrien					goto nnextin;	/* no nonempty match */
57785587Sobrien			}
57885587Sobrien		} while (*q++ != 0);
57985587Sobrien		if (f->out[s])
58085587Sobrien			patlen = q-p-1;	/* don't count $ */
58185587Sobrien		if (patlen > 0 ) {
58285587Sobrien			patbeg = (char *) p;
58385587Sobrien			return(1);
58485587Sobrien		}
58585587Sobrien	nnextin:
58685587Sobrien		s = 2;
58785587Sobrien		if (f->reset) {
58885587Sobrien			for (i = 2; i <= f->curstat; i++)
58985587Sobrien				xfree(f->posns[i]);
59085587Sobrien			k = *f->posns[0];
59185587Sobrien			if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
59285587Sobrien				overflo("out of state space");
59385587Sobrien			for (i = 0; i <= k; i++)
59485587Sobrien				(f->posns[2])[i] = (f->posns[0])[i];
59585587Sobrien			f->initstat = f->curstat = 2;
59685587Sobrien			f->out[2] = f->out[0];
59785587Sobrien			for (i = 0; i < NCHARS; i++)
59885587Sobrien				f->gototab[2][i] = 0;
59985587Sobrien		}
60085587Sobrien		p++;
60185587Sobrien	}
60285587Sobrien	return (0);
60385587Sobrien}
60485587Sobrien
605107806SobrienNode *reparse(const char *p)	/* parses regular expression pointed to by p */
60685587Sobrien{			/* uses relex() to scan regular expression */
60785587Sobrien	Node *np;
60885587Sobrien
60985587Sobrien	dprintf( ("reparse <%s>\n", p) );
61085587Sobrien	lastre = prestr = (uschar *) p;	/* prestr points to string to be parsed */
61185587Sobrien	rtok = relex();
612107806Sobrien	/* GNU compatibility: an empty regexp matches anything */
613170331Srafan	if (rtok == '\0') {
614107806Sobrien		/* FATAL("empty regular expression"); previous */
615170331Srafan		return(op2(EMPTYRE, NIL, NIL));
616170331Srafan	}
61785587Sobrien	np = regexp();
61885587Sobrien	if (rtok != '\0')
61985587Sobrien		FATAL("syntax error in regular expression %s at %s", lastre, prestr);
62085587Sobrien	return(np);
62185587Sobrien}
62285587Sobrien
62385587SobrienNode *regexp(void)	/* top-level parse of reg expr */
62485587Sobrien{
62585587Sobrien	return (alt(concat(primary())));
62685587Sobrien}
62785587Sobrien
62885587SobrienNode *primary(void)
62985587Sobrien{
63085587Sobrien	Node *np;
63185587Sobrien
63285587Sobrien	switch (rtok) {
63385587Sobrien	case CHAR:
63485587Sobrien		np = op2(CHAR, NIL, itonp(rlxval));
63585587Sobrien		rtok = relex();
63685587Sobrien		return (unary(np));
63785587Sobrien	case ALL:
63885587Sobrien		rtok = relex();
63985587Sobrien		return (unary(op2(ALL, NIL, NIL)));
640170331Srafan	case EMPTYRE:
641170331Srafan		rtok = relex();
642170331Srafan		return (unary(op2(ALL, NIL, NIL)));
64385587Sobrien	case DOT:
64485587Sobrien		rtok = relex();
64585587Sobrien		return (unary(op2(DOT, NIL, NIL)));
64685587Sobrien	case CCL:
64785587Sobrien		np = op2(CCL, NIL, (Node*) cclenter((char *) rlxstr));
64885587Sobrien		rtok = relex();
64985587Sobrien		return (unary(np));
65085587Sobrien	case NCCL:
65185587Sobrien		np = op2(NCCL, NIL, (Node *) cclenter((char *) rlxstr));
65285587Sobrien		rtok = relex();
65385587Sobrien		return (unary(np));
65485587Sobrien	case '^':
65585587Sobrien		rtok = relex();
65685587Sobrien		return (unary(op2(CHAR, NIL, itonp(HAT))));
65785587Sobrien	case '$':
65885587Sobrien		rtok = relex();
65985587Sobrien		return (unary(op2(CHAR, NIL, NIL)));
66085587Sobrien	case '(':
66185587Sobrien		rtok = relex();
66285587Sobrien		if (rtok == ')') {	/* special pleading for () */
66385587Sobrien			rtok = relex();
66485587Sobrien			return unary(op2(CCL, NIL, (Node *) tostring("")));
66585587Sobrien		}
66685587Sobrien		np = regexp();
66785587Sobrien		if (rtok == ')') {
66885587Sobrien			rtok = relex();
66985587Sobrien			return (unary(np));
67085587Sobrien		}
67185587Sobrien		else
67285587Sobrien			FATAL("syntax error in regular expression %s at %s", lastre, prestr);
67385587Sobrien	default:
67485587Sobrien		FATAL("illegal primary in regular expression %s at %s", lastre, prestr);
67585587Sobrien	}
67685587Sobrien	return 0;	/*NOTREACHED*/
67785587Sobrien}
67885587Sobrien
67985587SobrienNode *concat(Node *np)
68085587Sobrien{
68185587Sobrien	switch (rtok) {
682170331Srafan	case CHAR: case DOT: case ALL: case EMPTYRE: case CCL: case NCCL: case '$': case '(':
68385587Sobrien		return (concat(op2(CAT, np, primary())));
68485587Sobrien	}
68585587Sobrien	return (np);
68685587Sobrien}
68785587Sobrien
68885587SobrienNode *alt(Node *np)
68985587Sobrien{
69085587Sobrien	if (rtok == OR) {
69185587Sobrien		rtok = relex();
69285587Sobrien		return (alt(op2(OR, np, concat(primary()))));
69385587Sobrien	}
69485587Sobrien	return (np);
69585587Sobrien}
69685587Sobrien
69785587SobrienNode *unary(Node *np)
69885587Sobrien{
69985587Sobrien	switch (rtok) {
70085587Sobrien	case STAR:
70185587Sobrien		rtok = relex();
70285587Sobrien		return (unary(op2(STAR, np, NIL)));
70385587Sobrien	case PLUS:
70485587Sobrien		rtok = relex();
70585587Sobrien		return (unary(op2(PLUS, np, NIL)));
70685587Sobrien	case QUEST:
70785587Sobrien		rtok = relex();
70885587Sobrien		return (unary(op2(QUEST, np, NIL)));
70985587Sobrien	default:
71085587Sobrien		return (np);
71185587Sobrien	}
71285587Sobrien}
71385587Sobrien
71490902Sdes/*
71590902Sdes * Character class definitions conformant to the POSIX locale as
71690902Sdes * defined in IEEE P1003.1 draft 7 of June 2001, assuming the source
71790902Sdes * and operating character sets are both ASCII (ISO646) or supersets
71890902Sdes * thereof.
71990902Sdes *
72090902Sdes * Note that to avoid overflowing the temporary buffer used in
72190902Sdes * relex(), the expanded character class (prior to range expansion)
72290902Sdes * must be less than twice the size of their full name.
72390902Sdes */
724112336Sobrien
725112336Sobrien/* Because isblank doesn't show up in any of the header files on any
726112336Sobrien * system i use, it's defined here.  if some other locale has a richer
727112336Sobrien * definition of "blank", define HAS_ISBLANK and provide your own
728112336Sobrien * version.
729118194Sru * the parentheses here are an attempt to find a path through the maze
730118194Sru * of macro definition and/or function and/or version provided.  thanks
731118194Sru * to nelson beebe for the suggestion; let's see if it works everywhere.
732112336Sobrien */
733112336Sobrien
734112336Sobrien#ifndef HAS_ISBLANK
735112336Sobrien
736118194Sruint (isblank)(int c)
737112336Sobrien{
738112336Sobrien	return c==' ' || c=='\t';
739112336Sobrien}
740112336Sobrien
741112336Sobrien#endif
742112336Sobrien
74390902Sdesstruct charclass {
74490902Sdes	const char *cc_name;
74590902Sdes	int cc_namelen;
746112336Sobrien	int (*cc_func)(int);
74790902Sdes} charclasses[] = {
748112336Sobrien	{ "alnum",	5,	isalnum },
749112336Sobrien	{ "alpha",	5,	isalpha },
750112336Sobrien	{ "blank",	5,	isblank },
751112336Sobrien	{ "cntrl",	5,	iscntrl },
752112336Sobrien	{ "digit",	5,	isdigit },
753112336Sobrien	{ "graph",	5,	isgraph },
754112336Sobrien	{ "lower",	5,	islower },
755112336Sobrien	{ "print",	5,	isprint },
756112336Sobrien	{ "punct",	5,	ispunct },
757112336Sobrien	{ "space",	5,	isspace },
758112336Sobrien	{ "upper",	5,	isupper },
759112336Sobrien	{ "xdigit",	6,	isxdigit },
76090902Sdes	{ NULL,		0,	NULL },
76190902Sdes};
76290902Sdes
76390902Sdes
76485587Sobrienint relex(void)		/* lexical analyzer for reparse */
76585587Sobrien{
76685587Sobrien	int c, n;
76785587Sobrien	int cflag;
76885587Sobrien	static uschar *buf = 0;
76985587Sobrien	static int bufsz = 100;
77085587Sobrien	uschar *bp;
77190902Sdes	struct charclass *cc;
772112336Sobrien	int i;
77385587Sobrien
77485587Sobrien	switch (c = *prestr++) {
77585587Sobrien	case '|': return OR;
77685587Sobrien	case '*': return STAR;
77785587Sobrien	case '+': return PLUS;
77885587Sobrien	case '?': return QUEST;
77985587Sobrien	case '.': return DOT;
78085587Sobrien	case '\0': prestr--; return '\0';
78185587Sobrien	case '^':
78285587Sobrien	case '$':
78385587Sobrien	case '(':
78485587Sobrien	case ')':
78585587Sobrien		return c;
78685587Sobrien	case '\\':
78785587Sobrien		rlxval = quoted((char **) &prestr);
78885587Sobrien		return CHAR;
78985587Sobrien	default:
79085587Sobrien		rlxval = c;
79185587Sobrien		return CHAR;
79285587Sobrien	case '[':
79385587Sobrien		if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
79485587Sobrien			FATAL("out of space in reg expr %.10s..", lastre);
79585587Sobrien		bp = buf;
79685587Sobrien		if (*prestr == '^') {
79785587Sobrien			cflag = 1;
79885587Sobrien			prestr++;
79985587Sobrien		}
80085587Sobrien		else
80185587Sobrien			cflag = 0;
80290902Sdes		n = 2 * strlen((const char *) prestr)+1;
803170331Srafan		if (!adjbuf((char **) &buf, &bufsz, n, n, (char **) &bp, "relex1"))
80485587Sobrien			FATAL("out of space for reg expr %.10s...", lastre);
80585587Sobrien		for (; ; ) {
80685587Sobrien			if ((c = *prestr++) == '\\') {
80785587Sobrien				*bp++ = '\\';
80885587Sobrien				if ((c = *prestr++) == '\0')
80985587Sobrien					FATAL("nonterminated character class %.20s...", lastre);
81085587Sobrien				*bp++ = c;
81185587Sobrien			/* } else if (c == '\n') { */
81285587Sobrien			/* 	FATAL("newline in character class %.20s...", lastre); */
81390902Sdes			} else if (c == '[' && *prestr == ':') {
81490902Sdes				/* POSIX char class names, Dag-Erling Smorgrav, des@ofug.org */
81590902Sdes				for (cc = charclasses; cc->cc_name; cc++)
81690902Sdes					if (strncmp((const char *) prestr + 1, (const char *) cc->cc_name, cc->cc_namelen) == 0)
81790902Sdes						break;
81890902Sdes				if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' &&
81990902Sdes				    prestr[2 + cc->cc_namelen] == ']') {
82090902Sdes					prestr += cc->cc_namelen + 3;
821112336Sobrien					for (i = 0; i < NCHARS; i++) {
822170331Srafan						if (!adjbuf((char **) &buf, &bufsz, bp-buf+1, 100, (char **) &bp, "relex2"))
823112336Sobrien						    FATAL("out of space for reg expr %.10s...", lastre);
824112336Sobrien						if (cc->cc_func(i)) {
825112336Sobrien							*bp++ = i;
826112336Sobrien							n++;
827112336Sobrien						}
828112336Sobrien					}
82990902Sdes				} else
83090902Sdes					*bp++ = c;
83185587Sobrien			} else if (c == '\0') {
83285587Sobrien				FATAL("nonterminated character class %.20s", lastre);
83385587Sobrien			} else if (bp == buf) {	/* 1st char is special */
83485587Sobrien				*bp++ = c;
83585587Sobrien			} else if (c == ']') {
83685587Sobrien				*bp++ = 0;
83785587Sobrien				rlxstr = (uschar *) tostring((char *) buf);
83885587Sobrien				if (cflag == 0)
83985587Sobrien					return CCL;
84085587Sobrien				else
84185587Sobrien					return NCCL;
84285587Sobrien			} else
84385587Sobrien				*bp++ = c;
84485587Sobrien		}
84585587Sobrien	}
84685587Sobrien}
84785587Sobrien
84885587Sobrienint cgoto(fa *f, int s, int c)
84985587Sobrien{
85085587Sobrien	int i, j, k;
85185587Sobrien	int *p, *q;
85285587Sobrien
853146299Sru	assert(c == HAT || c < NCHARS);
85485587Sobrien	while (f->accept >= maxsetvec) {	/* guessing here! */
85585587Sobrien		maxsetvec *= 4;
85685587Sobrien		setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
85785587Sobrien		tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
85885587Sobrien		if (setvec == 0 || tmpset == 0)
85985587Sobrien			overflo("out of space in cgoto()");
86085587Sobrien	}
86185587Sobrien	for (i = 0; i <= f->accept; i++)
86285587Sobrien		setvec[i] = 0;
86385587Sobrien	setcnt = 0;
86485587Sobrien	/* compute positions of gototab[s,c] into setvec */
86585587Sobrien	p = f->posns[s];
86685587Sobrien	for (i = 1; i <= *p; i++) {
86785587Sobrien		if ((k = f->re[p[i]].ltype) != FINAL) {
86885587Sobrien			if ((k == CHAR && c == ptoi(f->re[p[i]].lval.np))
86985587Sobrien			 || (k == DOT && c != 0 && c != HAT)
87085587Sobrien			 || (k == ALL && c != 0)
871170331Srafan			 || (k == EMPTYRE && c != 0)
87285587Sobrien			 || (k == CCL && member(c, (char *) f->re[p[i]].lval.up))
87385587Sobrien			 || (k == NCCL && !member(c, (char *) f->re[p[i]].lval.up) && c != 0 && c != HAT)) {
87485587Sobrien				q = f->re[p[i]].lfollow;
87585587Sobrien				for (j = 1; j <= *q; j++) {
87685587Sobrien					if (q[j] >= maxsetvec) {
87785587Sobrien						maxsetvec *= 4;
87885587Sobrien						setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
87985587Sobrien						tmpset = (int *) realloc(setvec, maxsetvec * sizeof(int));
88085587Sobrien						if (setvec == 0 || tmpset == 0)
88185587Sobrien							overflo("cgoto overflow");
88285587Sobrien					}
88385587Sobrien					if (setvec[q[j]] == 0) {
88485587Sobrien						setcnt++;
88585587Sobrien						setvec[q[j]] = 1;
88685587Sobrien					}
88785587Sobrien				}
88885587Sobrien			}
88985587Sobrien		}
89085587Sobrien	}
89185587Sobrien	/* determine if setvec is a previous state */
89285587Sobrien	tmpset[0] = setcnt;
89385587Sobrien	j = 1;
89485587Sobrien	for (i = f->accept; i >= 0; i--)
89585587Sobrien		if (setvec[i]) {
89685587Sobrien			tmpset[j++] = i;
89785587Sobrien		}
89885587Sobrien	/* tmpset == previous state? */
89985587Sobrien	for (i = 1; i <= f->curstat; i++) {
90085587Sobrien		p = f->posns[i];
90185587Sobrien		if ((k = tmpset[0]) != p[0])
90285587Sobrien			goto different;
90385587Sobrien		for (j = 1; j <= k; j++)
90485587Sobrien			if (tmpset[j] != p[j])
90585587Sobrien				goto different;
90685587Sobrien		/* setvec is state i */
90785587Sobrien		f->gototab[s][c] = i;
90885587Sobrien		return i;
90985587Sobrien	  different:;
91085587Sobrien	}
91185587Sobrien
91285587Sobrien	/* add tmpset to current set of states */
91385587Sobrien	if (f->curstat >= NSTATES-1) {
91485587Sobrien		f->curstat = 2;
91585587Sobrien		f->reset = 1;
91685587Sobrien		for (i = 2; i < NSTATES; i++)
91785587Sobrien			xfree(f->posns[i]);
91885587Sobrien	} else
91985587Sobrien		++(f->curstat);
92085587Sobrien	for (i = 0; i < NCHARS; i++)
92185587Sobrien		f->gototab[f->curstat][i] = 0;
92285587Sobrien	xfree(f->posns[f->curstat]);
92385587Sobrien	if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
92485587Sobrien		overflo("out of space in cgoto");
92585587Sobrien
92685587Sobrien	f->posns[f->curstat] = p;
92785587Sobrien	f->gototab[s][c] = f->curstat;
92885587Sobrien	for (i = 0; i <= setcnt; i++)
92985587Sobrien		p[i] = tmpset[i];
93085587Sobrien	if (setvec[f->accept])
93185587Sobrien		f->out[f->curstat] = 1;
93285587Sobrien	else
93385587Sobrien		f->out[f->curstat] = 0;
93485587Sobrien	return f->curstat;
93585587Sobrien}
93685587Sobrien
93785587Sobrien
93885587Sobrienvoid freefa(fa *f)	/* free a finite automaton */
93985587Sobrien{
94085587Sobrien	int i;
94185587Sobrien
94285587Sobrien	if (f == NULL)
94385587Sobrien		return;
94485587Sobrien	for (i = 0; i <= f->curstat; i++)
94585587Sobrien		xfree(f->posns[i]);
94685587Sobrien	for (i = 0; i <= f->accept; i++) {
94785587Sobrien		xfree(f->re[i].lfollow);
94885587Sobrien		if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL)
94985587Sobrien			xfree((f->re[i].lval.np));
95085587Sobrien	}
95185587Sobrien	xfree(f->restr);
95285587Sobrien	xfree(f);
95385587Sobrien}
954