185587Sobrien/****************************************************************
285587SobrienCopyright (C) Lucent Technologies 1997
385587SobrienAll Rights Reserved
485587Sobrien
585587SobrienPermission to use, copy, modify, and distribute this software and
685587Sobrienits documentation for any purpose and without fee is hereby
785587Sobriengranted, provided that the above copyright notice appear in all
885587Sobriencopies and that both that the copyright notice and this
985587Sobrienpermission notice and warranty disclaimer appear in supporting
1085587Sobriendocumentation, and that the name Lucent Technologies or any of
1185587Sobrienits entities not be used in advertising or publicity pertaining
1285587Sobriento distribution of the software without specific, written prior
1385587Sobrienpermission.
1485587Sobrien
1585587SobrienLUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
1685587SobrienINCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
1785587SobrienIN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
1885587SobrienSPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
1985587SobrienWHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
2085587SobrienIN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
2185587SobrienARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
2285587SobrienTHIS SOFTWARE.
2385587Sobrien****************************************************************/
2485587Sobrien
25170331Srafan/* lasciate ogne speranza, voi ch'intrate. */
2685587Sobrien
27201989Sru#include <sys/cdefs.h>
28201989Sru__FBSDID("$FreeBSD: stable/11/contrib/one-true-awk/b.c 315581 2017-03-19 20:04:23Z pfg $");
29201989Sru
3085587Sobrien#define	DEBUG
3185587Sobrien
3285587Sobrien#include <ctype.h>
3385587Sobrien#include <stdio.h>
3485587Sobrien#include <string.h>
3585587Sobrien#include <stdlib.h>
3685587Sobrien#include "awk.h"
3785587Sobrien#include "ytab.h"
3885587Sobrien
39118194Sru#define	HAT	(NCHARS+2)	/* matches ^ in regular expr */
4085587Sobrien				/* NCHARS is 2**n */
4185587Sobrien#define MAXLIN 22
4285587Sobrien
4385587Sobrien#define type(v)		(v)->nobj	/* badly overloaded here */
4485587Sobrien#define info(v)		(v)->ntype	/* badly overloaded here */
4585587Sobrien#define left(v)		(v)->narg[0]
4685587Sobrien#define right(v)	(v)->narg[1]
4785587Sobrien#define parent(v)	(v)->nnext
4885587Sobrien
4985587Sobrien#define LEAF	case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL:
50170331Srafan#define ELEAF	case EMPTYRE:		/* empty string in regexp */
5185587Sobrien#define UNARY	case STAR: case PLUS: case QUEST:
5285587Sobrien
5385587Sobrien/* encoding in tree Nodes:
54170331Srafan	leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL, EMPTYRE):
5585587Sobrien		left is index, right contains value or pointer to value
5685587Sobrien	unary (STAR, PLUS, QUEST): left is child, right is null
5785587Sobrien	binary (CAT, OR): left and right are children
5885587Sobrien	parent contains pointer to parent
5985587Sobrien*/
6085587Sobrien
6185587Sobrien
6285587Sobrienint	*setvec;
6385587Sobrienint	*tmpset;
6485587Sobrienint	maxsetvec = 0;
6585587Sobrien
6685587Sobrienint	rtok;		/* next token in current re */
6785587Sobrienint	rlxval;
6885587Sobrienstatic uschar	*rlxstr;
6985587Sobrienstatic uschar	*prestr;	/* current position in current re */
7085587Sobrienstatic uschar	*lastre;	/* origin of last re */
7185587Sobrien
7285587Sobrienstatic	int setcnt;
7385587Sobrienstatic	int poscnt;
7485587Sobrien
7585587Sobrienchar	*patbeg;
7685587Sobrienint	patlen;
7785587Sobrien
7885587Sobrien#define	NFA	20	/* cache this many dynamic fa's */
7985587Sobrienfa	*fatab[NFA];
8085587Sobrienint	nfatab	= 0;	/* entries in fatab */
8185587Sobrien
82107806Sobrienfa *makedfa(const char *s, int anchor)	/* returns dfa for reg expr s */
8385587Sobrien{
8485587Sobrien	int i, use, nuse;
8585587Sobrien	fa *pfa;
8685587Sobrien	static int now = 1;
8785587Sobrien
88301289Spfg	if (setvec == NULL) {	/* first time through any RE */
8985587Sobrien		maxsetvec = MAXLIN;
9085587Sobrien		setvec = (int *) malloc(maxsetvec * sizeof(int));
9185587Sobrien		tmpset = (int *) malloc(maxsetvec * sizeof(int));
92301289Spfg		if (setvec == NULL || tmpset == NULL)
9385587Sobrien			overflo("out of space initializing makedfa");
9485587Sobrien	}
9585587Sobrien
9685587Sobrien	if (compile_time)	/* a constant for sure */
9785587Sobrien		return mkdfa(s, anchor);
9885587Sobrien	for (i = 0; i < nfatab; i++)	/* is it there already? */
9985587Sobrien		if (fatab[i]->anchor == anchor
10090902Sdes		  && strcmp((const char *) fatab[i]->restr, s) == 0) {
10185587Sobrien			fatab[i]->use = now++;
10285587Sobrien			return fatab[i];
10385587Sobrien		}
10485587Sobrien	pfa = mkdfa(s, anchor);
10585587Sobrien	if (nfatab < NFA) {	/* room for another */
10685587Sobrien		fatab[nfatab] = pfa;
10785587Sobrien		fatab[nfatab]->use = now++;
10885587Sobrien		nfatab++;
10985587Sobrien		return pfa;
11085587Sobrien	}
11185587Sobrien	use = fatab[0]->use;	/* replace least-recently used */
11285587Sobrien	nuse = 0;
11385587Sobrien	for (i = 1; i < nfatab; i++)
11485587Sobrien		if (fatab[i]->use < use) {
11585587Sobrien			use = fatab[i]->use;
11685587Sobrien			nuse = i;
11785587Sobrien		}
11885587Sobrien	freefa(fatab[nuse]);
11985587Sobrien	fatab[nuse] = pfa;
12085587Sobrien	pfa->use = now++;
12185587Sobrien	return pfa;
12285587Sobrien}
12385587Sobrien
124107806Sobrienfa *mkdfa(const char *s, int anchor)	/* does the real work of making a dfa */
12585587Sobrien				/* anchor = 1 for anchored matches, else 0 */
12685587Sobrien{
12785587Sobrien	Node *p, *p1;
12885587Sobrien	fa *f;
12985587Sobrien
13085587Sobrien	p = reparse(s);
13185587Sobrien	p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p);
13285587Sobrien		/* put ALL STAR in front of reg.  exp. */
13385587Sobrien	p1 = op2(CAT, p1, op2(FINAL, NIL, NIL));
13485587Sobrien		/* put FINAL after reg.  exp. */
13585587Sobrien
13685587Sobrien	poscnt = 0;
13785587Sobrien	penter(p1);	/* enter parent pointers and leaf indices */
13885587Sobrien	if ((f = (fa *) calloc(1, sizeof(fa) + poscnt*sizeof(rrow))) == NULL)
13985587Sobrien		overflo("out of space for fa");
14085587Sobrien	f->accept = poscnt-1;	/* penter has computed number of positions in re */
14185587Sobrien	cfoll(f, p1);	/* set up follow sets */
14285587Sobrien	freetr(p1);
143315581Spfg	if ((f->posns[0] = (int *) calloc(*(f->re[0].lfollow), sizeof(int))) == NULL)
14485587Sobrien			overflo("out of space in makedfa");
14585587Sobrien	if ((f->posns[1] = (int *) calloc(1, sizeof(int))) == NULL)
14685587Sobrien		overflo("out of space in makedfa");
14785587Sobrien	*f->posns[1] = 0;
14885587Sobrien	f->initstat = makeinit(f, anchor);
14985587Sobrien	f->anchor = anchor;
15085587Sobrien	f->restr = (uschar *) tostring(s);
15185587Sobrien	return f;
15285587Sobrien}
15385587Sobrien
15485587Sobrienint makeinit(fa *f, int anchor)
15585587Sobrien{
15685587Sobrien	int i, k;
15785587Sobrien
15885587Sobrien	f->curstat = 2;
15985587Sobrien	f->out[2] = 0;
16085587Sobrien	f->reset = 0;
16185587Sobrien	k = *(f->re[0].lfollow);
16285587Sobrien	xfree(f->posns[2]);
163315581Spfg	if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL)
16485587Sobrien		overflo("out of space in makeinit");
16585587Sobrien	for (i=0; i <= k; i++) {
16685587Sobrien		(f->posns[2])[i] = (f->re[0].lfollow)[i];
16785587Sobrien	}
16885587Sobrien	if ((f->posns[2])[1] == f->accept)
16985587Sobrien		f->out[2] = 1;
17085587Sobrien	for (i=0; i < NCHARS; i++)
17185587Sobrien		f->gototab[2][i] = 0;
17285587Sobrien	f->curstat = cgoto(f, 2, HAT);
17385587Sobrien	if (anchor) {
17485587Sobrien		*f->posns[2] = k-1;	/* leave out position 0 */
17585587Sobrien		for (i=0; i < k; i++) {
17685587Sobrien			(f->posns[0])[i] = (f->posns[2])[i];
17785587Sobrien		}
17885587Sobrien
17985587Sobrien		f->out[0] = f->out[2];
18085587Sobrien		if (f->curstat != 2)
18185587Sobrien			--(*f->posns[f->curstat]);
18285587Sobrien	}
18385587Sobrien	return f->curstat;
18485587Sobrien}
18585587Sobrien
18685587Sobrienvoid penter(Node *p)	/* set up parent pointers and leaf indices */
18785587Sobrien{
18885587Sobrien	switch (type(p)) {
189170331Srafan	ELEAF
19085587Sobrien	LEAF
19185587Sobrien		info(p) = poscnt;
19285587Sobrien		poscnt++;
19385587Sobrien		break;
19485587Sobrien	UNARY
19585587Sobrien		penter(left(p));
19685587Sobrien		parent(left(p)) = p;
19785587Sobrien		break;
19885587Sobrien	case CAT:
19985587Sobrien	case OR:
20085587Sobrien		penter(left(p));
20185587Sobrien		penter(right(p));
20285587Sobrien		parent(left(p)) = p;
20385587Sobrien		parent(right(p)) = p;
20485587Sobrien		break;
20585587Sobrien	default:	/* can't happen */
20685587Sobrien		FATAL("can't happen: unknown type %d in penter", type(p));
20785587Sobrien		break;
20885587Sobrien	}
20985587Sobrien}
21085587Sobrien
21185587Sobrienvoid freetr(Node *p)	/* free parse tree */
21285587Sobrien{
21385587Sobrien	switch (type(p)) {
214170331Srafan	ELEAF
21585587Sobrien	LEAF
21685587Sobrien		xfree(p);
21785587Sobrien		break;
21885587Sobrien	UNARY
21985587Sobrien		freetr(left(p));
22085587Sobrien		xfree(p);
22185587Sobrien		break;
22285587Sobrien	case CAT:
22385587Sobrien	case OR:
22485587Sobrien		freetr(left(p));
22585587Sobrien		freetr(right(p));
22685587Sobrien		xfree(p);
22785587Sobrien		break;
22885587Sobrien	default:	/* can't happen */
22985587Sobrien		FATAL("can't happen: unknown type %d in freetr", type(p));
23085587Sobrien		break;
23185587Sobrien	}
23285587Sobrien}
23385587Sobrien
23485587Sobrien/* in the parsing of regular expressions, metacharacters like . have */
23585587Sobrien/* to be seen literally;  \056 is not a metacharacter. */
23685587Sobrien
237224731Sruint hexstr(uschar **pp)	/* find and eval hex string at pp, return new p */
23885587Sobrien{			/* only pick up one 8-bit byte (2 chars) */
23985587Sobrien	uschar *p;
24085587Sobrien	int n = 0;
24185587Sobrien	int i;
24285587Sobrien
24385587Sobrien	for (i = 0, p = (uschar *) *pp; i < 2 && isxdigit(*p); i++, p++) {
24485587Sobrien		if (isdigit(*p))
24585587Sobrien			n = 16 * n + *p - '0';
24685587Sobrien		else if (*p >= 'a' && *p <= 'f')
24785587Sobrien			n = 16 * n + *p - 'a' + 10;
24885587Sobrien		else if (*p >= 'A' && *p <= 'F')
24985587Sobrien			n = 16 * n + *p - 'A' + 10;
25085587Sobrien	}
251224731Sru	*pp = (uschar *) p;
25285587Sobrien	return n;
25385587Sobrien}
25485587Sobrien
25585587Sobrien#define isoctdigit(c) ((c) >= '0' && (c) <= '7')	/* multiple use of arg */
25685587Sobrien
257224731Sruint quoted(uschar **pp)	/* pick up next thing after a \\ */
25885587Sobrien			/* and increment *pp */
25985587Sobrien{
260224731Sru	uschar *p = *pp;
26185587Sobrien	int c;
26285587Sobrien
26385587Sobrien	if ((c = *p++) == 't')
26485587Sobrien		c = '\t';
26585587Sobrien	else if (c == 'n')
26685587Sobrien		c = '\n';
26785587Sobrien	else if (c == 'f')
26885587Sobrien		c = '\f';
26985587Sobrien	else if (c == 'r')
27085587Sobrien		c = '\r';
27185587Sobrien	else if (c == 'b')
27285587Sobrien		c = '\b';
27385587Sobrien	else if (c == '\\')
27485587Sobrien		c = '\\';
27585587Sobrien	else if (c == 'x') {	/* hexadecimal goo follows */
27685587Sobrien		c = hexstr(&p);	/* this adds a null if number is invalid */
27785587Sobrien	} else if (isoctdigit(c)) {	/* \d \dd \ddd */
27885587Sobrien		int n = c - '0';
27985587Sobrien		if (isoctdigit(*p)) {
28085587Sobrien			n = 8 * n + *p++ - '0';
28185587Sobrien			if (isoctdigit(*p))
28285587Sobrien				n = 8 * n + *p++ - '0';
28385587Sobrien		}
28485587Sobrien		c = n;
28585587Sobrien	} /* else */
28685587Sobrien		/* c = c; */
28785587Sobrien	*pp = p;
28885587Sobrien	return c;
28985587Sobrien}
29085587Sobrien
291201989Srustatic int collate_range_cmp(int a, int b)
292201989Sru{
293201989Sru	static char s[2][2];
294201989Sru
295201989Sru	if ((uschar)a == (uschar)b)
296201989Sru		return 0;
297201989Sru	s[0][0] = a;
298201989Sru	s[1][0] = b;
299201989Sru	return (strcoll(s[0], s[1]));
300201989Sru}
301201989Sru
302107806Sobrienchar *cclenter(const char *argp)	/* add a character class */
303107806Sobrien{
30485587Sobrien	int i, c, c2;
305201989Sru	int j;
30685587Sobrien	uschar *p = (uschar *) argp;
30785587Sobrien	uschar *op, *bp;
308301289Spfg	static uschar *buf = NULL;
30985587Sobrien	static int bufsz = 100;
31085587Sobrien
31185587Sobrien	op = p;
312301289Spfg	if (buf == NULL && (buf = (uschar *) malloc(bufsz)) == NULL)
31385587Sobrien		FATAL("out of space for character class [%.10s...] 1", p);
31485587Sobrien	bp = buf;
31585587Sobrien	for (i = 0; (c = *p++) != 0; ) {
31685587Sobrien		if (c == '\\') {
317224731Sru			c = quoted(&p);
31885587Sobrien		} else if (c == '-' && i > 0 && bp[-1] != 0) {
31985587Sobrien			if (*p != 0) {
32085587Sobrien				c = bp[-1];
32185587Sobrien				c2 = *p++;
32285587Sobrien				if (c2 == '\\')
323224731Sru					c2 = quoted(&p);
324201989Sru				if (collate_range_cmp(c, c2) > 0) {
32585587Sobrien					bp--;
32685587Sobrien					i--;
32785587Sobrien					continue;
32885587Sobrien				}
329201989Sru				for (j = 0; j < NCHARS; j++) {
330201989Sru					if ((collate_range_cmp(c, j) > 0) ||
331201989Sru					    collate_range_cmp(j, c2) > 0)
332201989Sru						continue;
333170331Srafan					if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, "cclenter1"))
33485587Sobrien						FATAL("out of space for character class [%.10s...] 2", p);
335201989Sru					*bp++ = j;
33685587Sobrien					i++;
33785587Sobrien				}
33885587Sobrien				continue;
33985587Sobrien			}
34085587Sobrien		}
341170331Srafan		if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, "cclenter2"))
34285587Sobrien			FATAL("out of space for character class [%.10s...] 3", p);
34385587Sobrien		*bp++ = c;
34485587Sobrien		i++;
34585587Sobrien	}
34685587Sobrien	*bp = 0;
34785587Sobrien	dprintf( ("cclenter: in = |%s|, out = |%s|\n", op, buf) );
34885587Sobrien	xfree(op);
34985587Sobrien	return (char *) tostring((char *) buf);
35085587Sobrien}
35185587Sobrien
352107806Sobrienvoid overflo(const char *s)
35385587Sobrien{
35485587Sobrien	FATAL("regular expression too big: %.30s...", s);
35585587Sobrien}
35685587Sobrien
35785587Sobrienvoid cfoll(fa *f, Node *v)	/* enter follow set of each leaf of vertex v into lfollow[leaf] */
35885587Sobrien{
35985587Sobrien	int i;
36085587Sobrien	int *p;
36185587Sobrien
36285587Sobrien	switch (type(v)) {
363170331Srafan	ELEAF
36485587Sobrien	LEAF
36585587Sobrien		f->re[info(v)].ltype = type(v);
36685587Sobrien		f->re[info(v)].lval.np = right(v);
36785587Sobrien		while (f->accept >= maxsetvec) {	/* guessing here! */
36885587Sobrien			maxsetvec *= 4;
36985587Sobrien			setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
37085587Sobrien			tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
371301289Spfg			if (setvec == NULL || tmpset == NULL)
37285587Sobrien				overflo("out of space in cfoll()");
37385587Sobrien		}
37485587Sobrien		for (i = 0; i <= f->accept; i++)
37585587Sobrien			setvec[i] = 0;
37685587Sobrien		setcnt = 0;
37785587Sobrien		follow(v);	/* computes setvec and setcnt */
378315581Spfg		if ((p = (int *) calloc(setcnt+1, sizeof(int))) == NULL)
37985587Sobrien			overflo("out of space building follow set");
38085587Sobrien		f->re[info(v)].lfollow = p;
38185587Sobrien		*p = setcnt;
38285587Sobrien		for (i = f->accept; i >= 0; i--)
38385587Sobrien			if (setvec[i] == 1)
38485587Sobrien				*++p = i;
38585587Sobrien		break;
38685587Sobrien	UNARY
38785587Sobrien		cfoll(f,left(v));
38885587Sobrien		break;
38985587Sobrien	case CAT:
39085587Sobrien	case OR:
39185587Sobrien		cfoll(f,left(v));
39285587Sobrien		cfoll(f,right(v));
39385587Sobrien		break;
39485587Sobrien	default:	/* can't happen */
39585587Sobrien		FATAL("can't happen: unknown type %d in cfoll", type(v));
39685587Sobrien	}
39785587Sobrien}
39885587Sobrien
39985587Sobrienint first(Node *p)	/* collects initially active leaves of p into setvec */
400170331Srafan			/* returns 0 if p matches empty string */
40185587Sobrien{
40285587Sobrien	int b, lp;
40385587Sobrien
40485587Sobrien	switch (type(p)) {
405170331Srafan	ELEAF
40685587Sobrien	LEAF
40785587Sobrien		lp = info(p);	/* look for high-water mark of subscripts */
40885587Sobrien		while (setcnt >= maxsetvec || lp >= maxsetvec) {	/* guessing here! */
40985587Sobrien			maxsetvec *= 4;
41085587Sobrien			setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
41185587Sobrien			tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
412301289Spfg			if (setvec == NULL || tmpset == NULL)
41385587Sobrien				overflo("out of space in first()");
41485587Sobrien		}
415170331Srafan		if (type(p) == EMPTYRE) {
416170331Srafan			setvec[lp] = 0;
417170331Srafan			return(0);
418170331Srafan		}
41985587Sobrien		if (setvec[lp] != 1) {
42085587Sobrien			setvec[lp] = 1;
42185587Sobrien			setcnt++;
42285587Sobrien		}
42385587Sobrien		if (type(p) == CCL && (*(char *) right(p)) == '\0')
42485587Sobrien			return(0);		/* empty CCL */
42585587Sobrien		else return(1);
42685587Sobrien	case PLUS:
42785587Sobrien		if (first(left(p)) == 0) return(0);
42885587Sobrien		return(1);
42985587Sobrien	case STAR:
43085587Sobrien	case QUEST:
43185587Sobrien		first(left(p));
43285587Sobrien		return(0);
43385587Sobrien	case CAT:
43485587Sobrien		if (first(left(p)) == 0 && first(right(p)) == 0) return(0);
43585587Sobrien		return(1);
43685587Sobrien	case OR:
43785587Sobrien		b = first(right(p));
43885587Sobrien		if (first(left(p)) == 0 || b == 0) return(0);
43985587Sobrien		return(1);
44085587Sobrien	}
44185587Sobrien	FATAL("can't happen: unknown type %d in first", type(p));	/* can't happen */
44285587Sobrien	return(-1);
44385587Sobrien}
44485587Sobrien
44585587Sobrienvoid follow(Node *v)	/* collects leaves that can follow v into setvec */
44685587Sobrien{
44785587Sobrien	Node *p;
44885587Sobrien
44985587Sobrien	if (type(v) == FINAL)
45085587Sobrien		return;
45185587Sobrien	p = parent(v);
45285587Sobrien	switch (type(p)) {
45385587Sobrien	case STAR:
45485587Sobrien	case PLUS:
45585587Sobrien		first(v);
45685587Sobrien		follow(p);
45785587Sobrien		return;
45885587Sobrien
45985587Sobrien	case OR:
46085587Sobrien	case QUEST:
46185587Sobrien		follow(p);
46285587Sobrien		return;
46385587Sobrien
46485587Sobrien	case CAT:
46585587Sobrien		if (v == left(p)) {	/* v is left child of p */
46685587Sobrien			if (first(right(p)) == 0) {
46785587Sobrien				follow(p);
46885587Sobrien				return;
46985587Sobrien			}
47085587Sobrien		} else		/* v is right child */
47185587Sobrien			follow(p);
47285587Sobrien		return;
47385587Sobrien	}
47485587Sobrien}
47585587Sobrien
476107806Sobrienint member(int c, const char *sarg)	/* is c in s? */
47785587Sobrien{
47885587Sobrien	uschar *s = (uschar *) sarg;
47985587Sobrien
48085587Sobrien	while (*s)
48185587Sobrien		if (c == *s++)
48285587Sobrien			return(1);
48385587Sobrien	return(0);
48485587Sobrien}
48585587Sobrien
486107806Sobrienint match(fa *f, const char *p0)	/* shortest match ? */
48785587Sobrien{
48885587Sobrien	int s, ns;
48985587Sobrien	uschar *p = (uschar *) p0;
49085587Sobrien
49185587Sobrien	s = f->reset ? makeinit(f,0) : f->initstat;
49285587Sobrien	if (f->out[s])
49385587Sobrien		return(1);
49485587Sobrien	do {
495170331Srafan		/* assert(*p < NCHARS); */
49685587Sobrien		if ((ns = f->gototab[s][*p]) != 0)
49785587Sobrien			s = ns;
49885587Sobrien		else
49985587Sobrien			s = cgoto(f, s, *p);
50085587Sobrien		if (f->out[s])
50185587Sobrien			return(1);
50285587Sobrien	} while (*p++ != 0);
50385587Sobrien	return(0);
50485587Sobrien}
50585587Sobrien
506107806Sobrienint pmatch(fa *f, const char *p0)	/* longest match, for sub */
50785587Sobrien{
50885587Sobrien	int s, ns;
50985587Sobrien	uschar *p = (uschar *) p0;
51085587Sobrien	uschar *q;
51185587Sobrien	int i, k;
51285587Sobrien
513125601Sru	/* s = f->reset ? makeinit(f,1) : f->initstat; */
514125601Sru	if (f->reset) {
515125601Sru		f->initstat = s = makeinit(f,1);
516125601Sru	} else {
517125601Sru		s = f->initstat;
518125601Sru	}
51985587Sobrien	patbeg = (char *) p;
52085587Sobrien	patlen = -1;
52185587Sobrien	do {
52285587Sobrien		q = p;
52385587Sobrien		do {
52485587Sobrien			if (f->out[s])		/* final state */
52585587Sobrien				patlen = q-p;
526170331Srafan			/* assert(*q < NCHARS); */
52785587Sobrien			if ((ns = f->gototab[s][*q]) != 0)
52885587Sobrien				s = ns;
52985587Sobrien			else
53085587Sobrien				s = cgoto(f, s, *q);
53185587Sobrien			if (s == 1) {	/* no transition */
53285587Sobrien				if (patlen >= 0) {
53385587Sobrien					patbeg = (char *) p;
53485587Sobrien					return(1);
53585587Sobrien				}
53685587Sobrien				else
53785587Sobrien					goto nextin;	/* no match */
53885587Sobrien			}
53985587Sobrien		} while (*q++ != 0);
54085587Sobrien		if (f->out[s])
54185587Sobrien			patlen = q-p-1;	/* don't count $ */
54285587Sobrien		if (patlen >= 0) {
54385587Sobrien			patbeg = (char *) p;
54485587Sobrien			return(1);
54585587Sobrien		}
54685587Sobrien	nextin:
54785587Sobrien		s = 2;
54885587Sobrien		if (f->reset) {
54985587Sobrien			for (i = 2; i <= f->curstat; i++)
55085587Sobrien				xfree(f->posns[i]);
55185587Sobrien			k = *f->posns[0];
552315581Spfg			if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL)
55385587Sobrien				overflo("out of space in pmatch");
55485587Sobrien			for (i = 0; i <= k; i++)
55585587Sobrien				(f->posns[2])[i] = (f->posns[0])[i];
55685587Sobrien			f->initstat = f->curstat = 2;
55785587Sobrien			f->out[2] = f->out[0];
55885587Sobrien			for (i = 0; i < NCHARS; i++)
55985587Sobrien				f->gototab[2][i] = 0;
56085587Sobrien		}
56185587Sobrien	} while (*p++ != 0);
56285587Sobrien	return (0);
56385587Sobrien}
56485587Sobrien
565107806Sobrienint nematch(fa *f, const char *p0)	/* non-empty match, for sub */
56685587Sobrien{
56785587Sobrien	int s, ns;
56885587Sobrien	uschar *p = (uschar *) p0;
56985587Sobrien	uschar *q;
57085587Sobrien	int i, k;
57185587Sobrien
572125601Sru	/* s = f->reset ? makeinit(f,1) : f->initstat; */
573125601Sru	if (f->reset) {
574125601Sru		f->initstat = s = makeinit(f,1);
575125601Sru	} else {
576125601Sru		s = f->initstat;
577125601Sru	}
57885587Sobrien	patlen = -1;
57985587Sobrien	while (*p) {
58085587Sobrien		q = p;
58185587Sobrien		do {
58285587Sobrien			if (f->out[s])		/* final state */
58385587Sobrien				patlen = q-p;
584170331Srafan			/* assert(*q < NCHARS); */
58585587Sobrien			if ((ns = f->gototab[s][*q]) != 0)
58685587Sobrien				s = ns;
58785587Sobrien			else
58885587Sobrien				s = cgoto(f, s, *q);
58985587Sobrien			if (s == 1) {	/* no transition */
59085587Sobrien				if (patlen > 0) {
59185587Sobrien					patbeg = (char *) p;
59285587Sobrien					return(1);
59385587Sobrien				} else
59485587Sobrien					goto nnextin;	/* no nonempty match */
59585587Sobrien			}
59685587Sobrien		} while (*q++ != 0);
59785587Sobrien		if (f->out[s])
59885587Sobrien			patlen = q-p-1;	/* don't count $ */
59985587Sobrien		if (patlen > 0 ) {
60085587Sobrien			patbeg = (char *) p;
60185587Sobrien			return(1);
60285587Sobrien		}
60385587Sobrien	nnextin:
60485587Sobrien		s = 2;
60585587Sobrien		if (f->reset) {
60685587Sobrien			for (i = 2; i <= f->curstat; i++)
60785587Sobrien				xfree(f->posns[i]);
60885587Sobrien			k = *f->posns[0];
609315581Spfg			if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL)
61085587Sobrien				overflo("out of state space");
61185587Sobrien			for (i = 0; i <= k; i++)
61285587Sobrien				(f->posns[2])[i] = (f->posns[0])[i];
61385587Sobrien			f->initstat = f->curstat = 2;
61485587Sobrien			f->out[2] = f->out[0];
61585587Sobrien			for (i = 0; i < NCHARS; i++)
61685587Sobrien				f->gototab[2][i] = 0;
61785587Sobrien		}
61885587Sobrien		p++;
61985587Sobrien	}
62085587Sobrien	return (0);
62185587Sobrien}
62285587Sobrien
623107806SobrienNode *reparse(const char *p)	/* parses regular expression pointed to by p */
62485587Sobrien{			/* uses relex() to scan regular expression */
62585587Sobrien	Node *np;
62685587Sobrien
62785587Sobrien	dprintf( ("reparse <%s>\n", p) );
62885587Sobrien	lastre = prestr = (uschar *) p;	/* prestr points to string to be parsed */
62985587Sobrien	rtok = relex();
630107806Sobrien	/* GNU compatibility: an empty regexp matches anything */
631170331Srafan	if (rtok == '\0') {
632107806Sobrien		/* FATAL("empty regular expression"); previous */
633170331Srafan		return(op2(EMPTYRE, NIL, NIL));
634170331Srafan	}
63585587Sobrien	np = regexp();
63685587Sobrien	if (rtok != '\0')
63785587Sobrien		FATAL("syntax error in regular expression %s at %s", lastre, prestr);
63885587Sobrien	return(np);
63985587Sobrien}
64085587Sobrien
64185587SobrienNode *regexp(void)	/* top-level parse of reg expr */
64285587Sobrien{
64385587Sobrien	return (alt(concat(primary())));
64485587Sobrien}
64585587Sobrien
64685587SobrienNode *primary(void)
64785587Sobrien{
64885587Sobrien	Node *np;
64985587Sobrien
65085587Sobrien	switch (rtok) {
65185587Sobrien	case CHAR:
65285587Sobrien		np = op2(CHAR, NIL, itonp(rlxval));
65385587Sobrien		rtok = relex();
65485587Sobrien		return (unary(np));
65585587Sobrien	case ALL:
65685587Sobrien		rtok = relex();
65785587Sobrien		return (unary(op2(ALL, NIL, NIL)));
658170331Srafan	case EMPTYRE:
659170331Srafan		rtok = relex();
660170331Srafan		return (unary(op2(ALL, NIL, NIL)));
66185587Sobrien	case DOT:
66285587Sobrien		rtok = relex();
66385587Sobrien		return (unary(op2(DOT, NIL, NIL)));
66485587Sobrien	case CCL:
66585587Sobrien		np = op2(CCL, NIL, (Node*) cclenter((char *) rlxstr));
66685587Sobrien		rtok = relex();
66785587Sobrien		return (unary(np));
66885587Sobrien	case NCCL:
66985587Sobrien		np = op2(NCCL, NIL, (Node *) cclenter((char *) rlxstr));
67085587Sobrien		rtok = relex();
67185587Sobrien		return (unary(np));
67285587Sobrien	case '^':
67385587Sobrien		rtok = relex();
67485587Sobrien		return (unary(op2(CHAR, NIL, itonp(HAT))));
67585587Sobrien	case '$':
67685587Sobrien		rtok = relex();
67785587Sobrien		return (unary(op2(CHAR, NIL, NIL)));
67885587Sobrien	case '(':
67985587Sobrien		rtok = relex();
68085587Sobrien		if (rtok == ')') {	/* special pleading for () */
68185587Sobrien			rtok = relex();
68285587Sobrien			return unary(op2(CCL, NIL, (Node *) tostring("")));
68385587Sobrien		}
68485587Sobrien		np = regexp();
68585587Sobrien		if (rtok == ')') {
68685587Sobrien			rtok = relex();
68785587Sobrien			return (unary(np));
68885587Sobrien		}
68985587Sobrien		else
69085587Sobrien			FATAL("syntax error in regular expression %s at %s", lastre, prestr);
69185587Sobrien	default:
69285587Sobrien		FATAL("illegal primary in regular expression %s at %s", lastre, prestr);
69385587Sobrien	}
69485587Sobrien	return 0;	/*NOTREACHED*/
69585587Sobrien}
69685587Sobrien
69785587SobrienNode *concat(Node *np)
69885587Sobrien{
69985587Sobrien	switch (rtok) {
700170331Srafan	case CHAR: case DOT: case ALL: case EMPTYRE: case CCL: case NCCL: case '$': case '(':
70185587Sobrien		return (concat(op2(CAT, np, primary())));
70285587Sobrien	}
70385587Sobrien	return (np);
70485587Sobrien}
70585587Sobrien
70685587SobrienNode *alt(Node *np)
70785587Sobrien{
70885587Sobrien	if (rtok == OR) {
70985587Sobrien		rtok = relex();
71085587Sobrien		return (alt(op2(OR, np, concat(primary()))));
71185587Sobrien	}
71285587Sobrien	return (np);
71385587Sobrien}
71485587Sobrien
71585587SobrienNode *unary(Node *np)
71685587Sobrien{
71785587Sobrien	switch (rtok) {
71885587Sobrien	case STAR:
71985587Sobrien		rtok = relex();
72085587Sobrien		return (unary(op2(STAR, np, NIL)));
72185587Sobrien	case PLUS:
72285587Sobrien		rtok = relex();
72385587Sobrien		return (unary(op2(PLUS, np, NIL)));
72485587Sobrien	case QUEST:
72585587Sobrien		rtok = relex();
72685587Sobrien		return (unary(op2(QUEST, np, NIL)));
72785587Sobrien	default:
72885587Sobrien		return (np);
72985587Sobrien	}
73085587Sobrien}
73185587Sobrien
73290902Sdes/*
73390902Sdes * Character class definitions conformant to the POSIX locale as
73490902Sdes * defined in IEEE P1003.1 draft 7 of June 2001, assuming the source
73590902Sdes * and operating character sets are both ASCII (ISO646) or supersets
73690902Sdes * thereof.
73790902Sdes *
73890902Sdes * Note that to avoid overflowing the temporary buffer used in
73990902Sdes * relex(), the expanded character class (prior to range expansion)
74090902Sdes * must be less than twice the size of their full name.
74190902Sdes */
742112336Sobrien
743112336Sobrien/* Because isblank doesn't show up in any of the header files on any
744112336Sobrien * system i use, it's defined here.  if some other locale has a richer
745112336Sobrien * definition of "blank", define HAS_ISBLANK and provide your own
746112336Sobrien * version.
747118194Sru * the parentheses here are an attempt to find a path through the maze
748118194Sru * of macro definition and/or function and/or version provided.  thanks
749118194Sru * to nelson beebe for the suggestion; let's see if it works everywhere.
750112336Sobrien */
751112336Sobrien
752201951Sru/* #define HAS_ISBLANK */
753112336Sobrien#ifndef HAS_ISBLANK
754112336Sobrien
755221381Sruint (xisblank)(int c)
756112336Sobrien{
757112336Sobrien	return c==' ' || c=='\t';
758112336Sobrien}
759112336Sobrien
760112336Sobrien#endif
761112336Sobrien
76290902Sdesstruct charclass {
76390902Sdes	const char *cc_name;
76490902Sdes	int cc_namelen;
765112336Sobrien	int (*cc_func)(int);
76690902Sdes} charclasses[] = {
767112336Sobrien	{ "alnum",	5,	isalnum },
768112336Sobrien	{ "alpha",	5,	isalpha },
769221381Sru#ifndef HAS_ISBLANK
770221381Sru	{ "blank",	5,	isspace }, /* was isblank */
771221381Sru#else
772112336Sobrien	{ "blank",	5,	isblank },
773221381Sru#endif
774112336Sobrien	{ "cntrl",	5,	iscntrl },
775112336Sobrien	{ "digit",	5,	isdigit },
776112336Sobrien	{ "graph",	5,	isgraph },
777112336Sobrien	{ "lower",	5,	islower },
778112336Sobrien	{ "print",	5,	isprint },
779112336Sobrien	{ "punct",	5,	ispunct },
780112336Sobrien	{ "space",	5,	isspace },
781112336Sobrien	{ "upper",	5,	isupper },
782112336Sobrien	{ "xdigit",	6,	isxdigit },
78390902Sdes	{ NULL,		0,	NULL },
78490902Sdes};
78590902Sdes
78690902Sdes
78785587Sobrienint relex(void)		/* lexical analyzer for reparse */
78885587Sobrien{
78985587Sobrien	int c, n;
79085587Sobrien	int cflag;
791301289Spfg	static uschar *buf = NULL;
79285587Sobrien	static int bufsz = 100;
79385587Sobrien	uschar *bp;
79490902Sdes	struct charclass *cc;
795112336Sobrien	int i;
79685587Sobrien
79785587Sobrien	switch (c = *prestr++) {
79885587Sobrien	case '|': return OR;
79985587Sobrien	case '*': return STAR;
80085587Sobrien	case '+': return PLUS;
80185587Sobrien	case '?': return QUEST;
80285587Sobrien	case '.': return DOT;
80385587Sobrien	case '\0': prestr--; return '\0';
80485587Sobrien	case '^':
80585587Sobrien	case '$':
80685587Sobrien	case '(':
80785587Sobrien	case ')':
80885587Sobrien		return c;
80985587Sobrien	case '\\':
810224731Sru		rlxval = quoted(&prestr);
81185587Sobrien		return CHAR;
81285587Sobrien	default:
81385587Sobrien		rlxval = c;
81485587Sobrien		return CHAR;
81585587Sobrien	case '[':
816301289Spfg		if (buf == NULL && (buf = (uschar *) malloc(bufsz)) == NULL)
81785587Sobrien			FATAL("out of space in reg expr %.10s..", lastre);
81885587Sobrien		bp = buf;
81985587Sobrien		if (*prestr == '^') {
82085587Sobrien			cflag = 1;
82185587Sobrien			prestr++;
82285587Sobrien		}
82385587Sobrien		else
82485587Sobrien			cflag = 0;
82590902Sdes		n = 2 * strlen((const char *) prestr)+1;
826170331Srafan		if (!adjbuf((char **) &buf, &bufsz, n, n, (char **) &bp, "relex1"))
82785587Sobrien			FATAL("out of space for reg expr %.10s...", lastre);
82885587Sobrien		for (; ; ) {
82985587Sobrien			if ((c = *prestr++) == '\\') {
83085587Sobrien				*bp++ = '\\';
83185587Sobrien				if ((c = *prestr++) == '\0')
83285587Sobrien					FATAL("nonterminated character class %.20s...", lastre);
83385587Sobrien				*bp++ = c;
83485587Sobrien			/* } else if (c == '\n') { */
83585587Sobrien			/* 	FATAL("newline in character class %.20s...", lastre); */
83690902Sdes			} else if (c == '[' && *prestr == ':') {
83790902Sdes				/* POSIX char class names, Dag-Erling Smorgrav, des@ofug.org */
83890902Sdes				for (cc = charclasses; cc->cc_name; cc++)
83990902Sdes					if (strncmp((const char *) prestr + 1, (const char *) cc->cc_name, cc->cc_namelen) == 0)
84090902Sdes						break;
84190902Sdes				if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' &&
84290902Sdes				    prestr[2 + cc->cc_namelen] == ']') {
84390902Sdes					prestr += cc->cc_namelen + 3;
844305448Sache					for (i = 1; i < NCHARS; i++) {
845170331Srafan						if (!adjbuf((char **) &buf, &bufsz, bp-buf+1, 100, (char **) &bp, "relex2"))
846112336Sobrien						    FATAL("out of space for reg expr %.10s...", lastre);
847112336Sobrien						if (cc->cc_func(i)) {
848112336Sobrien							*bp++ = i;
849112336Sobrien							n++;
850112336Sobrien						}
851112336Sobrien					}
85290902Sdes				} else
85390902Sdes					*bp++ = c;
85485587Sobrien			} else if (c == '\0') {
85585587Sobrien				FATAL("nonterminated character class %.20s", lastre);
85685587Sobrien			} else if (bp == buf) {	/* 1st char is special */
85785587Sobrien				*bp++ = c;
85885587Sobrien			} else if (c == ']') {
85985587Sobrien				*bp++ = 0;
86085587Sobrien				rlxstr = (uschar *) tostring((char *) buf);
86185587Sobrien				if (cflag == 0)
86285587Sobrien					return CCL;
86385587Sobrien				else
86485587Sobrien					return NCCL;
86585587Sobrien			} else
86685587Sobrien				*bp++ = c;
86785587Sobrien		}
86885587Sobrien	}
86985587Sobrien}
87085587Sobrien
87185587Sobrienint cgoto(fa *f, int s, int c)
87285587Sobrien{
87385587Sobrien	int i, j, k;
87485587Sobrien	int *p, *q;
87585587Sobrien
876146299Sru	assert(c == HAT || c < NCHARS);
87785587Sobrien	while (f->accept >= maxsetvec) {	/* guessing here! */
87885587Sobrien		maxsetvec *= 4;
87985587Sobrien		setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
88085587Sobrien		tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
881301289Spfg		if (setvec == NULL || tmpset == NULL)
88285587Sobrien			overflo("out of space in cgoto()");
88385587Sobrien	}
88485587Sobrien	for (i = 0; i <= f->accept; i++)
88585587Sobrien		setvec[i] = 0;
88685587Sobrien	setcnt = 0;
88785587Sobrien	/* compute positions of gototab[s,c] into setvec */
88885587Sobrien	p = f->posns[s];
88985587Sobrien	for (i = 1; i <= *p; i++) {
89085587Sobrien		if ((k = f->re[p[i]].ltype) != FINAL) {
89185587Sobrien			if ((k == CHAR && c == ptoi(f->re[p[i]].lval.np))
89285587Sobrien			 || (k == DOT && c != 0 && c != HAT)
89385587Sobrien			 || (k == ALL && c != 0)
894170331Srafan			 || (k == EMPTYRE && c != 0)
89585587Sobrien			 || (k == CCL && member(c, (char *) f->re[p[i]].lval.up))
89685587Sobrien			 || (k == NCCL && !member(c, (char *) f->re[p[i]].lval.up) && c != 0 && c != HAT)) {
89785587Sobrien				q = f->re[p[i]].lfollow;
89885587Sobrien				for (j = 1; j <= *q; j++) {
89985587Sobrien					if (q[j] >= maxsetvec) {
90085587Sobrien						maxsetvec *= 4;
90185587Sobrien						setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
902201951Sru						tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
903301289Spfg						if (setvec == NULL || tmpset == NULL)
90485587Sobrien							overflo("cgoto overflow");
90585587Sobrien					}
90685587Sobrien					if (setvec[q[j]] == 0) {
90785587Sobrien						setcnt++;
90885587Sobrien						setvec[q[j]] = 1;
90985587Sobrien					}
91085587Sobrien				}
91185587Sobrien			}
91285587Sobrien		}
91385587Sobrien	}
91485587Sobrien	/* determine if setvec is a previous state */
91585587Sobrien	tmpset[0] = setcnt;
91685587Sobrien	j = 1;
91785587Sobrien	for (i = f->accept; i >= 0; i--)
91885587Sobrien		if (setvec[i]) {
91985587Sobrien			tmpset[j++] = i;
92085587Sobrien		}
92185587Sobrien	/* tmpset == previous state? */
92285587Sobrien	for (i = 1; i <= f->curstat; i++) {
92385587Sobrien		p = f->posns[i];
92485587Sobrien		if ((k = tmpset[0]) != p[0])
92585587Sobrien			goto different;
92685587Sobrien		for (j = 1; j <= k; j++)
92785587Sobrien			if (tmpset[j] != p[j])
92885587Sobrien				goto different;
92985587Sobrien		/* setvec is state i */
93085587Sobrien		f->gototab[s][c] = i;
93185587Sobrien		return i;
93285587Sobrien	  different:;
93385587Sobrien	}
93485587Sobrien
93585587Sobrien	/* add tmpset to current set of states */
93685587Sobrien	if (f->curstat >= NSTATES-1) {
93785587Sobrien		f->curstat = 2;
93885587Sobrien		f->reset = 1;
93985587Sobrien		for (i = 2; i < NSTATES; i++)
94085587Sobrien			xfree(f->posns[i]);
94185587Sobrien	} else
94285587Sobrien		++(f->curstat);
94385587Sobrien	for (i = 0; i < NCHARS; i++)
94485587Sobrien		f->gototab[f->curstat][i] = 0;
94585587Sobrien	xfree(f->posns[f->curstat]);
946315581Spfg	if ((p = (int *) calloc(setcnt+1, sizeof(int))) == NULL)
94785587Sobrien		overflo("out of space in cgoto");
94885587Sobrien
94985587Sobrien	f->posns[f->curstat] = p;
95085587Sobrien	f->gototab[s][c] = f->curstat;
95185587Sobrien	for (i = 0; i <= setcnt; i++)
95285587Sobrien		p[i] = tmpset[i];
95385587Sobrien	if (setvec[f->accept])
95485587Sobrien		f->out[f->curstat] = 1;
95585587Sobrien	else
95685587Sobrien		f->out[f->curstat] = 0;
95785587Sobrien	return f->curstat;
95885587Sobrien}
95985587Sobrien
96085587Sobrien
96185587Sobrienvoid freefa(fa *f)	/* free a finite automaton */
96285587Sobrien{
96385587Sobrien	int i;
96485587Sobrien
96585587Sobrien	if (f == NULL)
96685587Sobrien		return;
96785587Sobrien	for (i = 0; i <= f->curstat; i++)
96885587Sobrien		xfree(f->posns[i]);
96985587Sobrien	for (i = 0; i <= f->accept; i++) {
97085587Sobrien		xfree(f->re[i].lfollow);
97185587Sobrien		if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL)
97285587Sobrien			xfree((f->re[i].lval.np));
97385587Sobrien	}
97485587Sobrien	xfree(f->restr);
97585587Sobrien	xfree(f);
97685587Sobrien}
977