b.c revision 107806
1/****************************************************************
2Copyright (C) Lucent Technologies 1997
3All Rights Reserved
4
5Permission to use, copy, modify, and distribute this software and
6its documentation for any purpose and without fee is hereby
7granted, provided that the above copyright notice appear in all
8copies and that both that the copyright notice and this
9permission notice and warranty disclaimer appear in supporting
10documentation, and that the name Lucent Technologies or any of
11its entities not be used in advertising or publicity pertaining
12to distribution of the software without specific, written prior
13permission.
14
15LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22THIS SOFTWARE.
23****************************************************************/
24
25/* lasciate ogne speranza, voi ch'entrate. */
26
27#define	DEBUG
28
29#include <ctype.h>
30#include <stdio.h>
31#include <string.h>
32#include <stdlib.h>
33#include "awk.h"
34#include "ytab.h"
35
36#define	HAT	(NCHARS-2)	/* matches ^ in regular expr */
37				/* NCHARS is 2**n */
38#define MAXLIN 22
39
40#define type(v)		(v)->nobj	/* badly overloaded here */
41#define info(v)		(v)->ntype	/* badly overloaded here */
42#define left(v)		(v)->narg[0]
43#define right(v)	(v)->narg[1]
44#define parent(v)	(v)->nnext
45
46#define LEAF	case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL:
47#define UNARY	case STAR: case PLUS: case QUEST:
48
49/* encoding in tree Nodes:
50	leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL):
51		left is index, right contains value or pointer to value
52	unary (STAR, PLUS, QUEST): left is child, right is null
53	binary (CAT, OR): left and right are children
54	parent contains pointer to parent
55*/
56
57
58int	*setvec;
59int	*tmpset;
60int	maxsetvec = 0;
61
62int	rtok;		/* next token in current re */
63int	rlxval;
64static uschar	*rlxstr;
65static uschar	*prestr;	/* current position in current re */
66static uschar	*lastre;	/* origin of last re */
67
68static	int setcnt;
69static	int poscnt;
70
71char	*patbeg;
72int	patlen;
73
74#define	NFA	20	/* cache this many dynamic fa's */
75fa	*fatab[NFA];
76int	nfatab	= 0;	/* entries in fatab */
77
78fa *makedfa(const char *s, int anchor)	/* returns dfa for reg expr s */
79{
80	int i, use, nuse;
81	fa *pfa;
82	static int now = 1;
83
84	if (setvec == 0) {	/* first time through any RE */
85		maxsetvec = MAXLIN;
86		setvec = (int *) malloc(maxsetvec * sizeof(int));
87		tmpset = (int *) malloc(maxsetvec * sizeof(int));
88		if (setvec == 0 || tmpset == 0)
89			overflo("out of space initializing makedfa");
90	}
91
92	if (compile_time)	/* a constant for sure */
93		return mkdfa(s, anchor);
94	for (i = 0; i < nfatab; i++)	/* is it there already? */
95		if (fatab[i]->anchor == anchor
96		  && strcmp((const char *) fatab[i]->restr, s) == 0) {
97			fatab[i]->use = now++;
98			return fatab[i];
99		}
100	pfa = mkdfa(s, anchor);
101	if (nfatab < NFA) {	/* room for another */
102		fatab[nfatab] = pfa;
103		fatab[nfatab]->use = now++;
104		nfatab++;
105		return pfa;
106	}
107	use = fatab[0]->use;	/* replace least-recently used */
108	nuse = 0;
109	for (i = 1; i < nfatab; i++)
110		if (fatab[i]->use < use) {
111			use = fatab[i]->use;
112			nuse = i;
113		}
114	freefa(fatab[nuse]);
115	fatab[nuse] = pfa;
116	pfa->use = now++;
117	return pfa;
118}
119
120fa *mkdfa(const char *s, int anchor)	/* does the real work of making a dfa */
121				/* anchor = 1 for anchored matches, else 0 */
122{
123	Node *p, *p1;
124	fa *f;
125
126	p = reparse(s);
127	p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p);
128		/* put ALL STAR in front of reg.  exp. */
129	p1 = op2(CAT, p1, op2(FINAL, NIL, NIL));
130		/* put FINAL after reg.  exp. */
131
132	poscnt = 0;
133	penter(p1);	/* enter parent pointers and leaf indices */
134	if ((f = (fa *) calloc(1, sizeof(fa) + poscnt*sizeof(rrow))) == NULL)
135		overflo("out of space for fa");
136	f->accept = poscnt-1;	/* penter has computed number of positions in re */
137	cfoll(f, p1);	/* set up follow sets */
138	freetr(p1);
139	if ((f->posns[0] = (int *) calloc(1, *(f->re[0].lfollow)*sizeof(int))) == NULL)
140			overflo("out of space in makedfa");
141	if ((f->posns[1] = (int *) calloc(1, sizeof(int))) == NULL)
142		overflo("out of space in makedfa");
143	*f->posns[1] = 0;
144	f->initstat = makeinit(f, anchor);
145	f->anchor = anchor;
146	f->restr = (uschar *) tostring(s);
147	return f;
148}
149
150int makeinit(fa *f, int anchor)
151{
152	int i, k;
153
154	f->curstat = 2;
155	f->out[2] = 0;
156	f->reset = 0;
157	k = *(f->re[0].lfollow);
158	xfree(f->posns[2]);
159	if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
160		overflo("out of space in makeinit");
161	for (i=0; i <= k; i++) {
162		(f->posns[2])[i] = (f->re[0].lfollow)[i];
163	}
164	if ((f->posns[2])[1] == f->accept)
165		f->out[2] = 1;
166	for (i=0; i < NCHARS; i++)
167		f->gototab[2][i] = 0;
168	f->curstat = cgoto(f, 2, HAT);
169	if (anchor) {
170		*f->posns[2] = k-1;	/* leave out position 0 */
171		for (i=0; i < k; i++) {
172			(f->posns[0])[i] = (f->posns[2])[i];
173		}
174
175		f->out[0] = f->out[2];
176		if (f->curstat != 2)
177			--(*f->posns[f->curstat]);
178	}
179	return f->curstat;
180}
181
182void penter(Node *p)	/* set up parent pointers and leaf indices */
183{
184	switch (type(p)) {
185	LEAF
186		info(p) = poscnt;
187		poscnt++;
188		break;
189	UNARY
190		penter(left(p));
191		parent(left(p)) = p;
192		break;
193	case CAT:
194	case OR:
195		penter(left(p));
196		penter(right(p));
197		parent(left(p)) = p;
198		parent(right(p)) = p;
199		break;
200	default:	/* can't happen */
201		FATAL("can't happen: unknown type %d in penter", type(p));
202		break;
203	}
204}
205
206void freetr(Node *p)	/* free parse tree */
207{
208	switch (type(p)) {
209	LEAF
210		xfree(p);
211		break;
212	UNARY
213		freetr(left(p));
214		xfree(p);
215		break;
216	case CAT:
217	case OR:
218		freetr(left(p));
219		freetr(right(p));
220		xfree(p);
221		break;
222	default:	/* can't happen */
223		FATAL("can't happen: unknown type %d in freetr", type(p));
224		break;
225	}
226}
227
228/* in the parsing of regular expressions, metacharacters like . have */
229/* to be seen literally;  \056 is not a metacharacter. */
230
231int hexstr(char **pp)	/* find and eval hex string at pp, return new p */
232{			/* only pick up one 8-bit byte (2 chars) */
233	uschar *p;
234	int n = 0;
235	int i;
236
237	for (i = 0, p = (uschar *) *pp; i < 2 && isxdigit(*p); i++, p++) {
238		if (isdigit(*p))
239			n = 16 * n + *p - '0';
240		else if (*p >= 'a' && *p <= 'f')
241			n = 16 * n + *p - 'a' + 10;
242		else if (*p >= 'A' && *p <= 'F')
243			n = 16 * n + *p - 'A' + 10;
244	}
245	*pp = (char *) p;
246	return n;
247}
248
249#define isoctdigit(c) ((c) >= '0' && (c) <= '7')	/* multiple use of arg */
250
251int quoted(char **pp)	/* pick up next thing after a \\ */
252			/* and increment *pp */
253{
254	char *p = *pp;
255	int c;
256
257	if ((c = *p++) == 't')
258		c = '\t';
259	else if (c == 'n')
260		c = '\n';
261	else if (c == 'f')
262		c = '\f';
263	else if (c == 'r')
264		c = '\r';
265	else if (c == 'b')
266		c = '\b';
267	else if (c == '\\')
268		c = '\\';
269	else if (c == 'x') {	/* hexadecimal goo follows */
270		c = hexstr(&p);	/* this adds a null if number is invalid */
271	} else if (isoctdigit(c)) {	/* \d \dd \ddd */
272		int n = c - '0';
273		if (isoctdigit(*p)) {
274			n = 8 * n + *p++ - '0';
275			if (isoctdigit(*p))
276				n = 8 * n + *p++ - '0';
277		}
278		c = n;
279	} /* else */
280		/* c = c; */
281	*pp = p;
282	return c;
283}
284
285static int collate_range_cmp(int a, int b)
286{
287	int r;
288	static char s[2][2];
289
290	if ((uschar)a == (uschar)b)
291		return 0;
292	s[0][0] = a;
293	s[1][0] = b;
294	if ((r = strcoll(s[0], s[1])) == 0)
295		r = (uschar)a - (uschar)b;
296	return r;
297}
298
299char *cclenter(const char *argp)	/* add a character class */
300{
301	int i, c, c2;
302	int j;
303	uschar *p = (uschar *) argp;
304	uschar *op, *bp;
305	static uschar *buf = 0;
306	static int bufsz = 100;
307
308	op = p;
309	if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
310		FATAL("out of space for character class [%.10s...] 1", p);
311	bp = buf;
312	for (i = 0; (c = *p++) != 0; ) {
313		if (c == '\\') {
314			c = quoted((char **) &p);
315		} else if (c == '-' && i > 0 && bp[-1] != 0) {
316			if (*p != 0) {
317				c = bp[-1];
318				c2 = *p++;
319				if (c2 == '\\')
320					c2 = quoted((char **) &p);
321				if (collate_range_cmp(c, c2) > 0) {	/* empty; ignore */
322					bp--;
323					i--;
324					continue;
325				}
326				for (j = 0; j < NCHARS; j++) {
327					if ((collate_range_cmp(c, j) > 0) ||
328					    collate_range_cmp(j, c2) > 0)
329						continue;
330					if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, 0))
331						FATAL("out of space for character class [%.10s...] 2", p);
332					*bp++ = j;
333					i++;
334				}
335				continue;
336			}
337		}
338		if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, 0))
339			FATAL("out of space for character class [%.10s...] 3", p);
340		*bp++ = c;
341		i++;
342	}
343	*bp = 0;
344	dprintf( ("cclenter: in = |%s|, out = |%s|\n", op, buf) );
345	xfree(op);
346	return (char *) tostring((char *) buf);
347}
348
349void overflo(const char *s)
350{
351	FATAL("regular expression too big: %.30s...", s);
352}
353
354void cfoll(fa *f, Node *v)	/* enter follow set of each leaf of vertex v into lfollow[leaf] */
355{
356	int i;
357	int *p;
358
359	switch (type(v)) {
360	LEAF
361		f->re[info(v)].ltype = type(v);
362		f->re[info(v)].lval.np = right(v);
363		while (f->accept >= maxsetvec) {	/* guessing here! */
364			maxsetvec *= 4;
365			setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
366			tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
367			if (setvec == 0 || tmpset == 0)
368				overflo("out of space in cfoll()");
369		}
370		for (i = 0; i <= f->accept; i++)
371			setvec[i] = 0;
372		setcnt = 0;
373		follow(v);	/* computes setvec and setcnt */
374		if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
375			overflo("out of space building follow set");
376		f->re[info(v)].lfollow = p;
377		*p = setcnt;
378		for (i = f->accept; i >= 0; i--)
379			if (setvec[i] == 1)
380				*++p = i;
381		break;
382	UNARY
383		cfoll(f,left(v));
384		break;
385	case CAT:
386	case OR:
387		cfoll(f,left(v));
388		cfoll(f,right(v));
389		break;
390	default:	/* can't happen */
391		FATAL("can't happen: unknown type %d in cfoll", type(v));
392	}
393}
394
395int first(Node *p)	/* collects initially active leaves of p into setvec */
396			/* returns 1 if p matches empty string */
397{
398	int b, lp;
399
400	switch (type(p)) {
401	LEAF
402		lp = info(p);	/* look for high-water mark of subscripts */
403		while (setcnt >= maxsetvec || lp >= maxsetvec) {	/* guessing here! */
404			maxsetvec *= 4;
405			setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
406			tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
407			if (setvec == 0 || tmpset == 0)
408				overflo("out of space in first()");
409		}
410		if (setvec[lp] != 1) {
411			setvec[lp] = 1;
412			setcnt++;
413		}
414		if (type(p) == CCL && (*(char *) right(p)) == '\0')
415			return(0);		/* empty CCL */
416		else return(1);
417	case PLUS:
418		if (first(left(p)) == 0) return(0);
419		return(1);
420	case STAR:
421	case QUEST:
422		first(left(p));
423		return(0);
424	case CAT:
425		if (first(left(p)) == 0 && first(right(p)) == 0) return(0);
426		return(1);
427	case OR:
428		b = first(right(p));
429		if (first(left(p)) == 0 || b == 0) return(0);
430		return(1);
431	}
432	FATAL("can't happen: unknown type %d in first", type(p));	/* can't happen */
433	return(-1);
434}
435
436void follow(Node *v)	/* collects leaves that can follow v into setvec */
437{
438	Node *p;
439
440	if (type(v) == FINAL)
441		return;
442	p = parent(v);
443	switch (type(p)) {
444	case STAR:
445	case PLUS:
446		first(v);
447		follow(p);
448		return;
449
450	case OR:
451	case QUEST:
452		follow(p);
453		return;
454
455	case CAT:
456		if (v == left(p)) {	/* v is left child of p */
457			if (first(right(p)) == 0) {
458				follow(p);
459				return;
460			}
461		} else		/* v is right child */
462			follow(p);
463		return;
464	}
465}
466
467int member(int c, const char *sarg)	/* is c in s? */
468{
469	uschar *s = (uschar *) sarg;
470
471	while (*s)
472		if (c == *s++)
473			return(1);
474	return(0);
475}
476
477int match(fa *f, const char *p0)	/* shortest match ? */
478{
479	int s, ns;
480	uschar *p = (uschar *) p0;
481
482	s = f->reset ? makeinit(f,0) : f->initstat;
483	if (f->out[s])
484		return(1);
485	do {
486		if ((ns = f->gototab[s][*p]) != 0)
487			s = ns;
488		else
489			s = cgoto(f, s, *p);
490		if (f->out[s])
491			return(1);
492	} while (*p++ != 0);
493	return(0);
494}
495
496int pmatch(fa *f, const char *p0)	/* longest match, for sub */
497{
498	int s, ns;
499	uschar *p = (uschar *) p0;
500	uschar *q;
501	int i, k;
502
503	s = f->reset ? makeinit(f,1) : f->initstat;
504	patbeg = (char *) p;
505	patlen = -1;
506	do {
507		q = p;
508		do {
509			if (f->out[s])		/* final state */
510				patlen = q-p;
511			if ((ns = f->gototab[s][*q]) != 0)
512				s = ns;
513			else
514				s = cgoto(f, s, *q);
515			if (s == 1) {	/* no transition */
516				if (patlen >= 0) {
517					patbeg = (char *) p;
518					return(1);
519				}
520				else
521					goto nextin;	/* no match */
522			}
523		} while (*q++ != 0);
524		if (f->out[s])
525			patlen = q-p-1;	/* don't count $ */
526		if (patlen >= 0) {
527			patbeg = (char *) p;
528			return(1);
529		}
530	nextin:
531		s = 2;
532		if (f->reset) {
533			for (i = 2; i <= f->curstat; i++)
534				xfree(f->posns[i]);
535			k = *f->posns[0];
536			if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
537				overflo("out of space in pmatch");
538			for (i = 0; i <= k; i++)
539				(f->posns[2])[i] = (f->posns[0])[i];
540			f->initstat = f->curstat = 2;
541			f->out[2] = f->out[0];
542			for (i = 0; i < NCHARS; i++)
543				f->gototab[2][i] = 0;
544		}
545	} while (*p++ != 0);
546	return (0);
547}
548
549int nematch(fa *f, const char *p0)	/* non-empty match, for sub */
550{
551	int s, ns;
552	uschar *p = (uschar *) p0;
553	uschar *q;
554	int i, k;
555
556	s = f->reset ? makeinit(f,1) : f->initstat;
557	patlen = -1;
558	while (*p) {
559		q = p;
560		do {
561			if (f->out[s])		/* final state */
562				patlen = q-p;
563			if ((ns = f->gototab[s][*q]) != 0)
564				s = ns;
565			else
566				s = cgoto(f, s, *q);
567			if (s == 1) {	/* no transition */
568				if (patlen > 0) {
569					patbeg = (char *) p;
570					return(1);
571				} else
572					goto nnextin;	/* no nonempty match */
573			}
574		} while (*q++ != 0);
575		if (f->out[s])
576			patlen = q-p-1;	/* don't count $ */
577		if (patlen > 0 ) {
578			patbeg = (char *) p;
579			return(1);
580		}
581	nnextin:
582		s = 2;
583		if (f->reset) {
584			for (i = 2; i <= f->curstat; i++)
585				xfree(f->posns[i]);
586			k = *f->posns[0];
587			if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
588				overflo("out of state space");
589			for (i = 0; i <= k; i++)
590				(f->posns[2])[i] = (f->posns[0])[i];
591			f->initstat = f->curstat = 2;
592			f->out[2] = f->out[0];
593			for (i = 0; i < NCHARS; i++)
594				f->gototab[2][i] = 0;
595		}
596		p++;
597	}
598	return (0);
599}
600
601Node *reparse(const char *p)	/* parses regular expression pointed to by p */
602{			/* uses relex() to scan regular expression */
603	Node *np;
604
605	dprintf( ("reparse <%s>\n", p) );
606	lastre = prestr = (uschar *) p;	/* prestr points to string to be parsed */
607	rtok = relex();
608	/* GNU compatibility: an empty regexp matches anything */
609	if (rtok == '\0')
610		/* FATAL("empty regular expression"); previous */
611		return(op2(ALL, NIL, NIL));
612	np = regexp();
613	if (rtok != '\0')
614		FATAL("syntax error in regular expression %s at %s", lastre, prestr);
615	return(np);
616}
617
618Node *regexp(void)	/* top-level parse of reg expr */
619{
620	return (alt(concat(primary())));
621}
622
623Node *primary(void)
624{
625	Node *np;
626
627	switch (rtok) {
628	case CHAR:
629		np = op2(CHAR, NIL, itonp(rlxval));
630		rtok = relex();
631		return (unary(np));
632	case ALL:
633		rtok = relex();
634		return (unary(op2(ALL, NIL, NIL)));
635	case DOT:
636		rtok = relex();
637		return (unary(op2(DOT, NIL, NIL)));
638	case CCL:
639		np = op2(CCL, NIL, (Node*) cclenter((char *) rlxstr));
640		rtok = relex();
641		return (unary(np));
642	case NCCL:
643		np = op2(NCCL, NIL, (Node *) cclenter((char *) rlxstr));
644		rtok = relex();
645		return (unary(np));
646	case '^':
647		rtok = relex();
648		return (unary(op2(CHAR, NIL, itonp(HAT))));
649	case '$':
650		rtok = relex();
651		return (unary(op2(CHAR, NIL, NIL)));
652	case '(':
653		rtok = relex();
654		if (rtok == ')') {	/* special pleading for () */
655			rtok = relex();
656			return unary(op2(CCL, NIL, (Node *) tostring("")));
657		}
658		np = regexp();
659		if (rtok == ')') {
660			rtok = relex();
661			return (unary(np));
662		}
663		else
664			FATAL("syntax error in regular expression %s at %s", lastre, prestr);
665	default:
666		FATAL("illegal primary in regular expression %s at %s", lastre, prestr);
667	}
668	return 0;	/*NOTREACHED*/
669}
670
671Node *concat(Node *np)
672{
673	switch (rtok) {
674	case CHAR: case DOT: case ALL: case CCL: case NCCL: case '$': case '(':
675		return (concat(op2(CAT, np, primary())));
676	}
677	return (np);
678}
679
680Node *alt(Node *np)
681{
682	if (rtok == OR) {
683		rtok = relex();
684		return (alt(op2(OR, np, concat(primary()))));
685	}
686	return (np);
687}
688
689Node *unary(Node *np)
690{
691	switch (rtok) {
692	case STAR:
693		rtok = relex();
694		return (unary(op2(STAR, np, NIL)));
695	case PLUS:
696		rtok = relex();
697		return (unary(op2(PLUS, np, NIL)));
698	case QUEST:
699		rtok = relex();
700		return (unary(op2(QUEST, np, NIL)));
701	default:
702		return (np);
703	}
704}
705
706/*
707 * Character class definitions conformant to the POSIX locale as
708 * defined in IEEE P1003.1 draft 7 of June 2001, assuming the source
709 * and operating character sets are both ASCII (ISO646) or supersets
710 * thereof.
711 *
712 * Note that to avoid overflowing the temporary buffer used in
713 * relex(), the expanded character class (prior to range expansion)
714 * must be less than twice the size of their full name.
715 */
716
717struct charclass {
718	const char *cc_name;
719	int cc_namelen;
720	int (*cc_func)(int);
721} charclasses[] = {
722	{ "alnum",	5,	isalnum },
723	{ "alpha",	5,	isalpha },
724	{ "blank",	5,	isblank },
725	{ "cntrl",	5,	iscntrl },
726	{ "digit",	5,	isdigit },
727	{ "graph",	5,	isgraph },
728	{ "lower",	5,	islower },
729	{ "print",	5,	isprint },
730	{ "punct",	5,	ispunct },
731	{ "space",	5,	isspace },
732	{ "upper",	5,	isupper },
733	{ "xdigit",	6,	isxdigit },
734	{ NULL,		0,	NULL },
735};
736
737
738int relex(void)		/* lexical analyzer for reparse */
739{
740	int c, n;
741	int cflag;
742	static uschar *buf = 0;
743	static int bufsz = 100;
744	uschar *bp;
745	struct charclass *cc;
746	int i;
747
748	switch (c = *prestr++) {
749	case '|': return OR;
750	case '*': return STAR;
751	case '+': return PLUS;
752	case '?': return QUEST;
753	case '.': return DOT;
754	case '\0': prestr--; return '\0';
755	case '^':
756	case '$':
757	case '(':
758	case ')':
759		return c;
760	case '\\':
761		rlxval = quoted((char **) &prestr);
762		return CHAR;
763	default:
764		rlxval = c;
765		return CHAR;
766	case '[':
767		if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
768			FATAL("out of space in reg expr %.10s..", lastre);
769		bp = buf;
770		if (*prestr == '^') {
771			cflag = 1;
772			prestr++;
773		}
774		else
775			cflag = 0;
776		n = 2 * strlen((const char *) prestr)+1;
777		if (!adjbuf((char **) &buf, &bufsz, n, n, (char **) &bp, 0))
778			FATAL("out of space for reg expr %.10s...", lastre);
779		for (; ; ) {
780			if ((c = *prestr++) == '\\') {
781				*bp++ = '\\';
782				if ((c = *prestr++) == '\0')
783					FATAL("nonterminated character class %.20s...", lastre);
784				*bp++ = c;
785			/* } else if (c == '\n') { */
786			/* 	FATAL("newline in character class %.20s...", lastre); */
787			} else if (c == '[' && *prestr == ':') {
788				/* POSIX char class names, Dag-Erling Smorgrav, des@ofug.org */
789				for (cc = charclasses; cc->cc_name; cc++)
790					if (strncmp((const char *) prestr + 1, (const char *) cc->cc_name, cc->cc_namelen) == 0)
791						break;
792				if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' &&
793				    prestr[2 + cc->cc_namelen] == ']') {
794					prestr += cc->cc_namelen + 3;
795					for (i = 0; i < NCHARS; i++) {
796						if (!adjbuf((char **) &buf, &bufsz, bp-buf+1, 100, (char **) &bp, 0))
797						    FATAL("out of space for reg expr %.10s...", lastre);
798						if (cc->cc_func(i)) {
799							*bp++ = i;
800							n++;
801						}
802					}
803				} else
804					*bp++ = c;
805			} else if (c == '\0') {
806				FATAL("nonterminated character class %.20s", lastre);
807			} else if (bp == buf) {	/* 1st char is special */
808				*bp++ = c;
809			} else if (c == ']') {
810				*bp++ = 0;
811				rlxstr = (uschar *) tostring((char *) buf);
812				if (cflag == 0)
813					return CCL;
814				else
815					return NCCL;
816			} else
817				*bp++ = c;
818		}
819	}
820}
821
822int cgoto(fa *f, int s, int c)
823{
824	int i, j, k;
825	int *p, *q;
826
827	if (c < 0 || c > 255)
828		FATAL("can't happen: neg char %d in cgoto", c);
829	while (f->accept >= maxsetvec) {	/* guessing here! */
830		maxsetvec *= 4;
831		setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
832		tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
833		if (setvec == 0 || tmpset == 0)
834			overflo("out of space in cgoto()");
835	}
836	for (i = 0; i <= f->accept; i++)
837		setvec[i] = 0;
838	setcnt = 0;
839	/* compute positions of gototab[s,c] into setvec */
840	p = f->posns[s];
841	for (i = 1; i <= *p; i++) {
842		if ((k = f->re[p[i]].ltype) != FINAL) {
843			if ((k == CHAR && c == ptoi(f->re[p[i]].lval.np))
844			 || (k == DOT && c != 0 && c != HAT)
845			 || (k == ALL && c != 0)
846			 || (k == CCL && member(c, (char *) f->re[p[i]].lval.up))
847			 || (k == NCCL && !member(c, (char *) f->re[p[i]].lval.up) && c != 0 && c != HAT)) {
848				q = f->re[p[i]].lfollow;
849				for (j = 1; j <= *q; j++) {
850					if (q[j] >= maxsetvec) {
851						maxsetvec *= 4;
852						setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
853						tmpset = (int *) realloc(setvec, maxsetvec * sizeof(int));
854						if (setvec == 0 || tmpset == 0)
855							overflo("cgoto overflow");
856					}
857					if (setvec[q[j]] == 0) {
858						setcnt++;
859						setvec[q[j]] = 1;
860					}
861				}
862			}
863		}
864	}
865	/* determine if setvec is a previous state */
866	tmpset[0] = setcnt;
867	j = 1;
868	for (i = f->accept; i >= 0; i--)
869		if (setvec[i]) {
870			tmpset[j++] = i;
871		}
872	/* tmpset == previous state? */
873	for (i = 1; i <= f->curstat; i++) {
874		p = f->posns[i];
875		if ((k = tmpset[0]) != p[0])
876			goto different;
877		for (j = 1; j <= k; j++)
878			if (tmpset[j] != p[j])
879				goto different;
880		/* setvec is state i */
881		f->gototab[s][c] = i;
882		return i;
883	  different:;
884	}
885
886	/* add tmpset to current set of states */
887	if (f->curstat >= NSTATES-1) {
888		f->curstat = 2;
889		f->reset = 1;
890		for (i = 2; i < NSTATES; i++)
891			xfree(f->posns[i]);
892	} else
893		++(f->curstat);
894	for (i = 0; i < NCHARS; i++)
895		f->gototab[f->curstat][i] = 0;
896	xfree(f->posns[f->curstat]);
897	if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
898		overflo("out of space in cgoto");
899
900	f->posns[f->curstat] = p;
901	f->gototab[s][c] = f->curstat;
902	for (i = 0; i <= setcnt; i++)
903		p[i] = tmpset[i];
904	if (setvec[f->accept])
905		f->out[f->curstat] = 1;
906	else
907		f->out[f->curstat] = 0;
908	return f->curstat;
909}
910
911
912void freefa(fa *f)	/* free a finite automaton */
913{
914	int i;
915
916	if (f == NULL)
917		return;
918	for (i = 0; i <= f->curstat; i++)
919		xfree(f->posns[i]);
920	for (i = 0; i <= f->accept; i++) {
921		xfree(f->re[i].lfollow);
922		if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL)
923			xfree((f->re[i].lval.np));
924	}
925	xfree(f->restr);
926	xfree(f);
927}
928