1/****************************************************************
2Copyright (C) Lucent Technologies 1997
3All Rights Reserved
4
5Permission to use, copy, modify, and distribute this software and
6its documentation for any purpose and without fee is hereby
7granted, provided that the above copyright notice appear in all
8copies and that both that the copyright notice and this
9permission notice and warranty disclaimer appear in supporting
10documentation, and that the name Lucent Technologies or any of
11its entities not be used in advertising or publicity pertaining
12to distribution of the software without specific, written prior
13permission.
14
15LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22THIS SOFTWARE.
23****************************************************************/
24
25/* lasciate ogne speranza, voi ch'intrate. */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: stable/11/contrib/one-true-awk/b.c 315581 2017-03-19 20:04:23Z pfg $");
29
30#define	DEBUG
31
32#include <ctype.h>
33#include <stdio.h>
34#include <string.h>
35#include <stdlib.h>
36#include "awk.h"
37#include "ytab.h"
38
39#define	HAT	(NCHARS+2)	/* matches ^ in regular expr */
40				/* NCHARS is 2**n */
41#define MAXLIN 22
42
43#define type(v)		(v)->nobj	/* badly overloaded here */
44#define info(v)		(v)->ntype	/* badly overloaded here */
45#define left(v)		(v)->narg[0]
46#define right(v)	(v)->narg[1]
47#define parent(v)	(v)->nnext
48
49#define LEAF	case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL:
50#define ELEAF	case EMPTYRE:		/* empty string in regexp */
51#define UNARY	case STAR: case PLUS: case QUEST:
52
53/* encoding in tree Nodes:
54	leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL, EMPTYRE):
55		left is index, right contains value or pointer to value
56	unary (STAR, PLUS, QUEST): left is child, right is null
57	binary (CAT, OR): left and right are children
58	parent contains pointer to parent
59*/
60
61
62int	*setvec;
63int	*tmpset;
64int	maxsetvec = 0;
65
66int	rtok;		/* next token in current re */
67int	rlxval;
68static uschar	*rlxstr;
69static uschar	*prestr;	/* current position in current re */
70static uschar	*lastre;	/* origin of last re */
71
72static	int setcnt;
73static	int poscnt;
74
75char	*patbeg;
76int	patlen;
77
78#define	NFA	20	/* cache this many dynamic fa's */
79fa	*fatab[NFA];
80int	nfatab	= 0;	/* entries in fatab */
81
82fa *makedfa(const char *s, int anchor)	/* returns dfa for reg expr s */
83{
84	int i, use, nuse;
85	fa *pfa;
86	static int now = 1;
87
88	if (setvec == NULL) {	/* first time through any RE */
89		maxsetvec = MAXLIN;
90		setvec = (int *) malloc(maxsetvec * sizeof(int));
91		tmpset = (int *) malloc(maxsetvec * sizeof(int));
92		if (setvec == NULL || tmpset == NULL)
93			overflo("out of space initializing makedfa");
94	}
95
96	if (compile_time)	/* a constant for sure */
97		return mkdfa(s, anchor);
98	for (i = 0; i < nfatab; i++)	/* is it there already? */
99		if (fatab[i]->anchor == anchor
100		  && strcmp((const char *) fatab[i]->restr, s) == 0) {
101			fatab[i]->use = now++;
102			return fatab[i];
103		}
104	pfa = mkdfa(s, anchor);
105	if (nfatab < NFA) {	/* room for another */
106		fatab[nfatab] = pfa;
107		fatab[nfatab]->use = now++;
108		nfatab++;
109		return pfa;
110	}
111	use = fatab[0]->use;	/* replace least-recently used */
112	nuse = 0;
113	for (i = 1; i < nfatab; i++)
114		if (fatab[i]->use < use) {
115			use = fatab[i]->use;
116			nuse = i;
117		}
118	freefa(fatab[nuse]);
119	fatab[nuse] = pfa;
120	pfa->use = now++;
121	return pfa;
122}
123
124fa *mkdfa(const char *s, int anchor)	/* does the real work of making a dfa */
125				/* anchor = 1 for anchored matches, else 0 */
126{
127	Node *p, *p1;
128	fa *f;
129
130	p = reparse(s);
131	p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p);
132		/* put ALL STAR in front of reg.  exp. */
133	p1 = op2(CAT, p1, op2(FINAL, NIL, NIL));
134		/* put FINAL after reg.  exp. */
135
136	poscnt = 0;
137	penter(p1);	/* enter parent pointers and leaf indices */
138	if ((f = (fa *) calloc(1, sizeof(fa) + poscnt*sizeof(rrow))) == NULL)
139		overflo("out of space for fa");
140	f->accept = poscnt-1;	/* penter has computed number of positions in re */
141	cfoll(f, p1);	/* set up follow sets */
142	freetr(p1);
143	if ((f->posns[0] = (int *) calloc(*(f->re[0].lfollow), sizeof(int))) == NULL)
144			overflo("out of space in makedfa");
145	if ((f->posns[1] = (int *) calloc(1, sizeof(int))) == NULL)
146		overflo("out of space in makedfa");
147	*f->posns[1] = 0;
148	f->initstat = makeinit(f, anchor);
149	f->anchor = anchor;
150	f->restr = (uschar *) tostring(s);
151	return f;
152}
153
154int makeinit(fa *f, int anchor)
155{
156	int i, k;
157
158	f->curstat = 2;
159	f->out[2] = 0;
160	f->reset = 0;
161	k = *(f->re[0].lfollow);
162	xfree(f->posns[2]);
163	if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL)
164		overflo("out of space in makeinit");
165	for (i=0; i <= k; i++) {
166		(f->posns[2])[i] = (f->re[0].lfollow)[i];
167	}
168	if ((f->posns[2])[1] == f->accept)
169		f->out[2] = 1;
170	for (i=0; i < NCHARS; i++)
171		f->gototab[2][i] = 0;
172	f->curstat = cgoto(f, 2, HAT);
173	if (anchor) {
174		*f->posns[2] = k-1;	/* leave out position 0 */
175		for (i=0; i < k; i++) {
176			(f->posns[0])[i] = (f->posns[2])[i];
177		}
178
179		f->out[0] = f->out[2];
180		if (f->curstat != 2)
181			--(*f->posns[f->curstat]);
182	}
183	return f->curstat;
184}
185
186void penter(Node *p)	/* set up parent pointers and leaf indices */
187{
188	switch (type(p)) {
189	ELEAF
190	LEAF
191		info(p) = poscnt;
192		poscnt++;
193		break;
194	UNARY
195		penter(left(p));
196		parent(left(p)) = p;
197		break;
198	case CAT:
199	case OR:
200		penter(left(p));
201		penter(right(p));
202		parent(left(p)) = p;
203		parent(right(p)) = p;
204		break;
205	default:	/* can't happen */
206		FATAL("can't happen: unknown type %d in penter", type(p));
207		break;
208	}
209}
210
211void freetr(Node *p)	/* free parse tree */
212{
213	switch (type(p)) {
214	ELEAF
215	LEAF
216		xfree(p);
217		break;
218	UNARY
219		freetr(left(p));
220		xfree(p);
221		break;
222	case CAT:
223	case OR:
224		freetr(left(p));
225		freetr(right(p));
226		xfree(p);
227		break;
228	default:	/* can't happen */
229		FATAL("can't happen: unknown type %d in freetr", type(p));
230		break;
231	}
232}
233
234/* in the parsing of regular expressions, metacharacters like . have */
235/* to be seen literally;  \056 is not a metacharacter. */
236
237int hexstr(uschar **pp)	/* find and eval hex string at pp, return new p */
238{			/* only pick up one 8-bit byte (2 chars) */
239	uschar *p;
240	int n = 0;
241	int i;
242
243	for (i = 0, p = (uschar *) *pp; i < 2 && isxdigit(*p); i++, p++) {
244		if (isdigit(*p))
245			n = 16 * n + *p - '0';
246		else if (*p >= 'a' && *p <= 'f')
247			n = 16 * n + *p - 'a' + 10;
248		else if (*p >= 'A' && *p <= 'F')
249			n = 16 * n + *p - 'A' + 10;
250	}
251	*pp = (uschar *) p;
252	return n;
253}
254
255#define isoctdigit(c) ((c) >= '0' && (c) <= '7')	/* multiple use of arg */
256
257int quoted(uschar **pp)	/* pick up next thing after a \\ */
258			/* and increment *pp */
259{
260	uschar *p = *pp;
261	int c;
262
263	if ((c = *p++) == 't')
264		c = '\t';
265	else if (c == 'n')
266		c = '\n';
267	else if (c == 'f')
268		c = '\f';
269	else if (c == 'r')
270		c = '\r';
271	else if (c == 'b')
272		c = '\b';
273	else if (c == '\\')
274		c = '\\';
275	else if (c == 'x') {	/* hexadecimal goo follows */
276		c = hexstr(&p);	/* this adds a null if number is invalid */
277	} else if (isoctdigit(c)) {	/* \d \dd \ddd */
278		int n = c - '0';
279		if (isoctdigit(*p)) {
280			n = 8 * n + *p++ - '0';
281			if (isoctdigit(*p))
282				n = 8 * n + *p++ - '0';
283		}
284		c = n;
285	} /* else */
286		/* c = c; */
287	*pp = p;
288	return c;
289}
290
291static int collate_range_cmp(int a, int b)
292{
293	static char s[2][2];
294
295	if ((uschar)a == (uschar)b)
296		return 0;
297	s[0][0] = a;
298	s[1][0] = b;
299	return (strcoll(s[0], s[1]));
300}
301
302char *cclenter(const char *argp)	/* add a character class */
303{
304	int i, c, c2;
305	int j;
306	uschar *p = (uschar *) argp;
307	uschar *op, *bp;
308	static uschar *buf = NULL;
309	static int bufsz = 100;
310
311	op = p;
312	if (buf == NULL && (buf = (uschar *) malloc(bufsz)) == NULL)
313		FATAL("out of space for character class [%.10s...] 1", p);
314	bp = buf;
315	for (i = 0; (c = *p++) != 0; ) {
316		if (c == '\\') {
317			c = quoted(&p);
318		} else if (c == '-' && i > 0 && bp[-1] != 0) {
319			if (*p != 0) {
320				c = bp[-1];
321				c2 = *p++;
322				if (c2 == '\\')
323					c2 = quoted(&p);
324				if (collate_range_cmp(c, c2) > 0) {
325					bp--;
326					i--;
327					continue;
328				}
329				for (j = 0; j < NCHARS; j++) {
330					if ((collate_range_cmp(c, j) > 0) ||
331					    collate_range_cmp(j, c2) > 0)
332						continue;
333					if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, "cclenter1"))
334						FATAL("out of space for character class [%.10s...] 2", p);
335					*bp++ = j;
336					i++;
337				}
338				continue;
339			}
340		}
341		if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, "cclenter2"))
342			FATAL("out of space for character class [%.10s...] 3", p);
343		*bp++ = c;
344		i++;
345	}
346	*bp = 0;
347	dprintf( ("cclenter: in = |%s|, out = |%s|\n", op, buf) );
348	xfree(op);
349	return (char *) tostring((char *) buf);
350}
351
352void overflo(const char *s)
353{
354	FATAL("regular expression too big: %.30s...", s);
355}
356
357void cfoll(fa *f, Node *v)	/* enter follow set of each leaf of vertex v into lfollow[leaf] */
358{
359	int i;
360	int *p;
361
362	switch (type(v)) {
363	ELEAF
364	LEAF
365		f->re[info(v)].ltype = type(v);
366		f->re[info(v)].lval.np = right(v);
367		while (f->accept >= maxsetvec) {	/* guessing here! */
368			maxsetvec *= 4;
369			setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
370			tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
371			if (setvec == NULL || tmpset == NULL)
372				overflo("out of space in cfoll()");
373		}
374		for (i = 0; i <= f->accept; i++)
375			setvec[i] = 0;
376		setcnt = 0;
377		follow(v);	/* computes setvec and setcnt */
378		if ((p = (int *) calloc(setcnt+1, sizeof(int))) == NULL)
379			overflo("out of space building follow set");
380		f->re[info(v)].lfollow = p;
381		*p = setcnt;
382		for (i = f->accept; i >= 0; i--)
383			if (setvec[i] == 1)
384				*++p = i;
385		break;
386	UNARY
387		cfoll(f,left(v));
388		break;
389	case CAT:
390	case OR:
391		cfoll(f,left(v));
392		cfoll(f,right(v));
393		break;
394	default:	/* can't happen */
395		FATAL("can't happen: unknown type %d in cfoll", type(v));
396	}
397}
398
399int first(Node *p)	/* collects initially active leaves of p into setvec */
400			/* returns 0 if p matches empty string */
401{
402	int b, lp;
403
404	switch (type(p)) {
405	ELEAF
406	LEAF
407		lp = info(p);	/* look for high-water mark of subscripts */
408		while (setcnt >= maxsetvec || lp >= maxsetvec) {	/* guessing here! */
409			maxsetvec *= 4;
410			setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
411			tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
412			if (setvec == NULL || tmpset == NULL)
413				overflo("out of space in first()");
414		}
415		if (type(p) == EMPTYRE) {
416			setvec[lp] = 0;
417			return(0);
418		}
419		if (setvec[lp] != 1) {
420			setvec[lp] = 1;
421			setcnt++;
422		}
423		if (type(p) == CCL && (*(char *) right(p)) == '\0')
424			return(0);		/* empty CCL */
425		else return(1);
426	case PLUS:
427		if (first(left(p)) == 0) return(0);
428		return(1);
429	case STAR:
430	case QUEST:
431		first(left(p));
432		return(0);
433	case CAT:
434		if (first(left(p)) == 0 && first(right(p)) == 0) return(0);
435		return(1);
436	case OR:
437		b = first(right(p));
438		if (first(left(p)) == 0 || b == 0) return(0);
439		return(1);
440	}
441	FATAL("can't happen: unknown type %d in first", type(p));	/* can't happen */
442	return(-1);
443}
444
445void follow(Node *v)	/* collects leaves that can follow v into setvec */
446{
447	Node *p;
448
449	if (type(v) == FINAL)
450		return;
451	p = parent(v);
452	switch (type(p)) {
453	case STAR:
454	case PLUS:
455		first(v);
456		follow(p);
457		return;
458
459	case OR:
460	case QUEST:
461		follow(p);
462		return;
463
464	case CAT:
465		if (v == left(p)) {	/* v is left child of p */
466			if (first(right(p)) == 0) {
467				follow(p);
468				return;
469			}
470		} else		/* v is right child */
471			follow(p);
472		return;
473	}
474}
475
476int member(int c, const char *sarg)	/* is c in s? */
477{
478	uschar *s = (uschar *) sarg;
479
480	while (*s)
481		if (c == *s++)
482			return(1);
483	return(0);
484}
485
486int match(fa *f, const char *p0)	/* shortest match ? */
487{
488	int s, ns;
489	uschar *p = (uschar *) p0;
490
491	s = f->reset ? makeinit(f,0) : f->initstat;
492	if (f->out[s])
493		return(1);
494	do {
495		/* assert(*p < NCHARS); */
496		if ((ns = f->gototab[s][*p]) != 0)
497			s = ns;
498		else
499			s = cgoto(f, s, *p);
500		if (f->out[s])
501			return(1);
502	} while (*p++ != 0);
503	return(0);
504}
505
506int pmatch(fa *f, const char *p0)	/* longest match, for sub */
507{
508	int s, ns;
509	uschar *p = (uschar *) p0;
510	uschar *q;
511	int i, k;
512
513	/* s = f->reset ? makeinit(f,1) : f->initstat; */
514	if (f->reset) {
515		f->initstat = s = makeinit(f,1);
516	} else {
517		s = f->initstat;
518	}
519	patbeg = (char *) p;
520	patlen = -1;
521	do {
522		q = p;
523		do {
524			if (f->out[s])		/* final state */
525				patlen = q-p;
526			/* assert(*q < NCHARS); */
527			if ((ns = f->gototab[s][*q]) != 0)
528				s = ns;
529			else
530				s = cgoto(f, s, *q);
531			if (s == 1) {	/* no transition */
532				if (patlen >= 0) {
533					patbeg = (char *) p;
534					return(1);
535				}
536				else
537					goto nextin;	/* no match */
538			}
539		} while (*q++ != 0);
540		if (f->out[s])
541			patlen = q-p-1;	/* don't count $ */
542		if (patlen >= 0) {
543			patbeg = (char *) p;
544			return(1);
545		}
546	nextin:
547		s = 2;
548		if (f->reset) {
549			for (i = 2; i <= f->curstat; i++)
550				xfree(f->posns[i]);
551			k = *f->posns[0];
552			if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL)
553				overflo("out of space in pmatch");
554			for (i = 0; i <= k; i++)
555				(f->posns[2])[i] = (f->posns[0])[i];
556			f->initstat = f->curstat = 2;
557			f->out[2] = f->out[0];
558			for (i = 0; i < NCHARS; i++)
559				f->gototab[2][i] = 0;
560		}
561	} while (*p++ != 0);
562	return (0);
563}
564
565int nematch(fa *f, const char *p0)	/* non-empty match, for sub */
566{
567	int s, ns;
568	uschar *p = (uschar *) p0;
569	uschar *q;
570	int i, k;
571
572	/* s = f->reset ? makeinit(f,1) : f->initstat; */
573	if (f->reset) {
574		f->initstat = s = makeinit(f,1);
575	} else {
576		s = f->initstat;
577	}
578	patlen = -1;
579	while (*p) {
580		q = p;
581		do {
582			if (f->out[s])		/* final state */
583				patlen = q-p;
584			/* assert(*q < NCHARS); */
585			if ((ns = f->gototab[s][*q]) != 0)
586				s = ns;
587			else
588				s = cgoto(f, s, *q);
589			if (s == 1) {	/* no transition */
590				if (patlen > 0) {
591					patbeg = (char *) p;
592					return(1);
593				} else
594					goto nnextin;	/* no nonempty match */
595			}
596		} while (*q++ != 0);
597		if (f->out[s])
598			patlen = q-p-1;	/* don't count $ */
599		if (patlen > 0 ) {
600			patbeg = (char *) p;
601			return(1);
602		}
603	nnextin:
604		s = 2;
605		if (f->reset) {
606			for (i = 2; i <= f->curstat; i++)
607				xfree(f->posns[i]);
608			k = *f->posns[0];
609			if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL)
610				overflo("out of state space");
611			for (i = 0; i <= k; i++)
612				(f->posns[2])[i] = (f->posns[0])[i];
613			f->initstat = f->curstat = 2;
614			f->out[2] = f->out[0];
615			for (i = 0; i < NCHARS; i++)
616				f->gototab[2][i] = 0;
617		}
618		p++;
619	}
620	return (0);
621}
622
623Node *reparse(const char *p)	/* parses regular expression pointed to by p */
624{			/* uses relex() to scan regular expression */
625	Node *np;
626
627	dprintf( ("reparse <%s>\n", p) );
628	lastre = prestr = (uschar *) p;	/* prestr points to string to be parsed */
629	rtok = relex();
630	/* GNU compatibility: an empty regexp matches anything */
631	if (rtok == '\0') {
632		/* FATAL("empty regular expression"); previous */
633		return(op2(EMPTYRE, NIL, NIL));
634	}
635	np = regexp();
636	if (rtok != '\0')
637		FATAL("syntax error in regular expression %s at %s", lastre, prestr);
638	return(np);
639}
640
641Node *regexp(void)	/* top-level parse of reg expr */
642{
643	return (alt(concat(primary())));
644}
645
646Node *primary(void)
647{
648	Node *np;
649
650	switch (rtok) {
651	case CHAR:
652		np = op2(CHAR, NIL, itonp(rlxval));
653		rtok = relex();
654		return (unary(np));
655	case ALL:
656		rtok = relex();
657		return (unary(op2(ALL, NIL, NIL)));
658	case EMPTYRE:
659		rtok = relex();
660		return (unary(op2(ALL, NIL, NIL)));
661	case DOT:
662		rtok = relex();
663		return (unary(op2(DOT, NIL, NIL)));
664	case CCL:
665		np = op2(CCL, NIL, (Node*) cclenter((char *) rlxstr));
666		rtok = relex();
667		return (unary(np));
668	case NCCL:
669		np = op2(NCCL, NIL, (Node *) cclenter((char *) rlxstr));
670		rtok = relex();
671		return (unary(np));
672	case '^':
673		rtok = relex();
674		return (unary(op2(CHAR, NIL, itonp(HAT))));
675	case '$':
676		rtok = relex();
677		return (unary(op2(CHAR, NIL, NIL)));
678	case '(':
679		rtok = relex();
680		if (rtok == ')') {	/* special pleading for () */
681			rtok = relex();
682			return unary(op2(CCL, NIL, (Node *) tostring("")));
683		}
684		np = regexp();
685		if (rtok == ')') {
686			rtok = relex();
687			return (unary(np));
688		}
689		else
690			FATAL("syntax error in regular expression %s at %s", lastre, prestr);
691	default:
692		FATAL("illegal primary in regular expression %s at %s", lastre, prestr);
693	}
694	return 0;	/*NOTREACHED*/
695}
696
697Node *concat(Node *np)
698{
699	switch (rtok) {
700	case CHAR: case DOT: case ALL: case EMPTYRE: case CCL: case NCCL: case '$': case '(':
701		return (concat(op2(CAT, np, primary())));
702	}
703	return (np);
704}
705
706Node *alt(Node *np)
707{
708	if (rtok == OR) {
709		rtok = relex();
710		return (alt(op2(OR, np, concat(primary()))));
711	}
712	return (np);
713}
714
715Node *unary(Node *np)
716{
717	switch (rtok) {
718	case STAR:
719		rtok = relex();
720		return (unary(op2(STAR, np, NIL)));
721	case PLUS:
722		rtok = relex();
723		return (unary(op2(PLUS, np, NIL)));
724	case QUEST:
725		rtok = relex();
726		return (unary(op2(QUEST, np, NIL)));
727	default:
728		return (np);
729	}
730}
731
732/*
733 * Character class definitions conformant to the POSIX locale as
734 * defined in IEEE P1003.1 draft 7 of June 2001, assuming the source
735 * and operating character sets are both ASCII (ISO646) or supersets
736 * thereof.
737 *
738 * Note that to avoid overflowing the temporary buffer used in
739 * relex(), the expanded character class (prior to range expansion)
740 * must be less than twice the size of their full name.
741 */
742
743/* Because isblank doesn't show up in any of the header files on any
744 * system i use, it's defined here.  if some other locale has a richer
745 * definition of "blank", define HAS_ISBLANK and provide your own
746 * version.
747 * the parentheses here are an attempt to find a path through the maze
748 * of macro definition and/or function and/or version provided.  thanks
749 * to nelson beebe for the suggestion; let's see if it works everywhere.
750 */
751
752/* #define HAS_ISBLANK */
753#ifndef HAS_ISBLANK
754
755int (xisblank)(int c)
756{
757	return c==' ' || c=='\t';
758}
759
760#endif
761
762struct charclass {
763	const char *cc_name;
764	int cc_namelen;
765	int (*cc_func)(int);
766} charclasses[] = {
767	{ "alnum",	5,	isalnum },
768	{ "alpha",	5,	isalpha },
769#ifndef HAS_ISBLANK
770	{ "blank",	5,	isspace }, /* was isblank */
771#else
772	{ "blank",	5,	isblank },
773#endif
774	{ "cntrl",	5,	iscntrl },
775	{ "digit",	5,	isdigit },
776	{ "graph",	5,	isgraph },
777	{ "lower",	5,	islower },
778	{ "print",	5,	isprint },
779	{ "punct",	5,	ispunct },
780	{ "space",	5,	isspace },
781	{ "upper",	5,	isupper },
782	{ "xdigit",	6,	isxdigit },
783	{ NULL,		0,	NULL },
784};
785
786
787int relex(void)		/* lexical analyzer for reparse */
788{
789	int c, n;
790	int cflag;
791	static uschar *buf = NULL;
792	static int bufsz = 100;
793	uschar *bp;
794	struct charclass *cc;
795	int i;
796
797	switch (c = *prestr++) {
798	case '|': return OR;
799	case '*': return STAR;
800	case '+': return PLUS;
801	case '?': return QUEST;
802	case '.': return DOT;
803	case '\0': prestr--; return '\0';
804	case '^':
805	case '$':
806	case '(':
807	case ')':
808		return c;
809	case '\\':
810		rlxval = quoted(&prestr);
811		return CHAR;
812	default:
813		rlxval = c;
814		return CHAR;
815	case '[':
816		if (buf == NULL && (buf = (uschar *) malloc(bufsz)) == NULL)
817			FATAL("out of space in reg expr %.10s..", lastre);
818		bp = buf;
819		if (*prestr == '^') {
820			cflag = 1;
821			prestr++;
822		}
823		else
824			cflag = 0;
825		n = 2 * strlen((const char *) prestr)+1;
826		if (!adjbuf((char **) &buf, &bufsz, n, n, (char **) &bp, "relex1"))
827			FATAL("out of space for reg expr %.10s...", lastre);
828		for (; ; ) {
829			if ((c = *prestr++) == '\\') {
830				*bp++ = '\\';
831				if ((c = *prestr++) == '\0')
832					FATAL("nonterminated character class %.20s...", lastre);
833				*bp++ = c;
834			/* } else if (c == '\n') { */
835			/* 	FATAL("newline in character class %.20s...", lastre); */
836			} else if (c == '[' && *prestr == ':') {
837				/* POSIX char class names, Dag-Erling Smorgrav, des@ofug.org */
838				for (cc = charclasses; cc->cc_name; cc++)
839					if (strncmp((const char *) prestr + 1, (const char *) cc->cc_name, cc->cc_namelen) == 0)
840						break;
841				if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' &&
842				    prestr[2 + cc->cc_namelen] == ']') {
843					prestr += cc->cc_namelen + 3;
844					for (i = 1; i < NCHARS; i++) {
845						if (!adjbuf((char **) &buf, &bufsz, bp-buf+1, 100, (char **) &bp, "relex2"))
846						    FATAL("out of space for reg expr %.10s...", lastre);
847						if (cc->cc_func(i)) {
848							*bp++ = i;
849							n++;
850						}
851					}
852				} else
853					*bp++ = c;
854			} else if (c == '\0') {
855				FATAL("nonterminated character class %.20s", lastre);
856			} else if (bp == buf) {	/* 1st char is special */
857				*bp++ = c;
858			} else if (c == ']') {
859				*bp++ = 0;
860				rlxstr = (uschar *) tostring((char *) buf);
861				if (cflag == 0)
862					return CCL;
863				else
864					return NCCL;
865			} else
866				*bp++ = c;
867		}
868	}
869}
870
871int cgoto(fa *f, int s, int c)
872{
873	int i, j, k;
874	int *p, *q;
875
876	assert(c == HAT || c < NCHARS);
877	while (f->accept >= maxsetvec) {	/* guessing here! */
878		maxsetvec *= 4;
879		setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
880		tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
881		if (setvec == NULL || tmpset == NULL)
882			overflo("out of space in cgoto()");
883	}
884	for (i = 0; i <= f->accept; i++)
885		setvec[i] = 0;
886	setcnt = 0;
887	/* compute positions of gototab[s,c] into setvec */
888	p = f->posns[s];
889	for (i = 1; i <= *p; i++) {
890		if ((k = f->re[p[i]].ltype) != FINAL) {
891			if ((k == CHAR && c == ptoi(f->re[p[i]].lval.np))
892			 || (k == DOT && c != 0 && c != HAT)
893			 || (k == ALL && c != 0)
894			 || (k == EMPTYRE && c != 0)
895			 || (k == CCL && member(c, (char *) f->re[p[i]].lval.up))
896			 || (k == NCCL && !member(c, (char *) f->re[p[i]].lval.up) && c != 0 && c != HAT)) {
897				q = f->re[p[i]].lfollow;
898				for (j = 1; j <= *q; j++) {
899					if (q[j] >= maxsetvec) {
900						maxsetvec *= 4;
901						setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
902						tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
903						if (setvec == NULL || tmpset == NULL)
904							overflo("cgoto overflow");
905					}
906					if (setvec[q[j]] == 0) {
907						setcnt++;
908						setvec[q[j]] = 1;
909					}
910				}
911			}
912		}
913	}
914	/* determine if setvec is a previous state */
915	tmpset[0] = setcnt;
916	j = 1;
917	for (i = f->accept; i >= 0; i--)
918		if (setvec[i]) {
919			tmpset[j++] = i;
920		}
921	/* tmpset == previous state? */
922	for (i = 1; i <= f->curstat; i++) {
923		p = f->posns[i];
924		if ((k = tmpset[0]) != p[0])
925			goto different;
926		for (j = 1; j <= k; j++)
927			if (tmpset[j] != p[j])
928				goto different;
929		/* setvec is state i */
930		f->gototab[s][c] = i;
931		return i;
932	  different:;
933	}
934
935	/* add tmpset to current set of states */
936	if (f->curstat >= NSTATES-1) {
937		f->curstat = 2;
938		f->reset = 1;
939		for (i = 2; i < NSTATES; i++)
940			xfree(f->posns[i]);
941	} else
942		++(f->curstat);
943	for (i = 0; i < NCHARS; i++)
944		f->gototab[f->curstat][i] = 0;
945	xfree(f->posns[f->curstat]);
946	if ((p = (int *) calloc(setcnt+1, sizeof(int))) == NULL)
947		overflo("out of space in cgoto");
948
949	f->posns[f->curstat] = p;
950	f->gototab[s][c] = f->curstat;
951	for (i = 0; i <= setcnt; i++)
952		p[i] = tmpset[i];
953	if (setvec[f->accept])
954		f->out[f->curstat] = 1;
955	else
956		f->out[f->curstat] = 0;
957	return f->curstat;
958}
959
960
961void freefa(fa *f)	/* free a finite automaton */
962{
963	int i;
964
965	if (f == NULL)
966		return;
967	for (i = 0; i <= f->curstat; i++)
968		xfree(f->posns[i]);
969	for (i = 0; i <= f->accept; i++) {
970		xfree(f->re[i].lfollow);
971		if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL)
972			xfree((f->re[i].lval.np));
973	}
974	xfree(f->restr);
975	xfree(f);
976}
977