b.c revision 85587
1/****************************************************************
2Copyright (C) Lucent Technologies 1997
3All Rights Reserved
4
5Permission to use, copy, modify, and distribute this software and
6its documentation for any purpose and without fee is hereby
7granted, provided that the above copyright notice appear in all
8copies and that both that the copyright notice and this
9permission notice and warranty disclaimer appear in supporting
10documentation, and that the name Lucent Technologies or any of
11its entities not be used in advertising or publicity pertaining
12to distribution of the software without specific, written prior
13permission.
14
15LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22THIS SOFTWARE.
23****************************************************************/
24
25/* lasciate ogne speranza, voi ch'entrate. */
26
27#define	DEBUG
28
29#include <ctype.h>
30#include <stdio.h>
31#include <string.h>
32#include <stdlib.h>
33#include "awk.h"
34#include "ytab.h"
35
36#define	HAT	(NCHARS-2)	/* matches ^ in regular expr */
37				/* NCHARS is 2**n */
38#define MAXLIN 22
39
40#define type(v)		(v)->nobj	/* badly overloaded here */
41#define info(v)		(v)->ntype	/* badly overloaded here */
42#define left(v)		(v)->narg[0]
43#define right(v)	(v)->narg[1]
44#define parent(v)	(v)->nnext
45
46#define LEAF	case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL:
47#define UNARY	case STAR: case PLUS: case QUEST:
48
49/* encoding in tree Nodes:
50	leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL):
51		left is index, right contains value or pointer to value
52	unary (STAR, PLUS, QUEST): left is child, right is null
53	binary (CAT, OR): left and right are children
54	parent contains pointer to parent
55*/
56
57
58int	*setvec;
59int	*tmpset;
60int	maxsetvec = 0;
61
62int	rtok;		/* next token in current re */
63int	rlxval;
64static uschar	*rlxstr;
65static uschar	*prestr;	/* current position in current re */
66static uschar	*lastre;	/* origin of last re */
67
68static	int setcnt;
69static	int poscnt;
70
71char	*patbeg;
72int	patlen;
73
74#define	NFA	20	/* cache this many dynamic fa's */
75fa	*fatab[NFA];
76int	nfatab	= 0;	/* entries in fatab */
77
78fa *makedfa(char *s, int anchor)	/* returns dfa for reg expr s */
79{
80	int i, use, nuse;
81	fa *pfa;
82	static int now = 1;
83
84	if (setvec == 0) {	/* first time through any RE */
85		maxsetvec = MAXLIN;
86		setvec = (int *) malloc(maxsetvec * sizeof(int));
87		tmpset = (int *) malloc(maxsetvec * sizeof(int));
88		if (setvec == 0 || tmpset == 0)
89			overflo("out of space initializing makedfa");
90	}
91
92	if (compile_time)	/* a constant for sure */
93		return mkdfa(s, anchor);
94	for (i = 0; i < nfatab; i++)	/* is it there already? */
95		if (fatab[i]->anchor == anchor
96		  && strcmp(fatab[i]->restr, s) == 0) {
97			fatab[i]->use = now++;
98			return fatab[i];
99		}
100	pfa = mkdfa(s, anchor);
101	if (nfatab < NFA) {	/* room for another */
102		fatab[nfatab] = pfa;
103		fatab[nfatab]->use = now++;
104		nfatab++;
105		return pfa;
106	}
107	use = fatab[0]->use;	/* replace least-recently used */
108	nuse = 0;
109	for (i = 1; i < nfatab; i++)
110		if (fatab[i]->use < use) {
111			use = fatab[i]->use;
112			nuse = i;
113		}
114	freefa(fatab[nuse]);
115	fatab[nuse] = pfa;
116	pfa->use = now++;
117	return pfa;
118}
119
120fa *mkdfa(char *s, int anchor)	/* does the real work of making a dfa */
121				/* anchor = 1 for anchored matches, else 0 */
122{
123	Node *p, *p1;
124	fa *f;
125
126	p = reparse(s);
127	p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p);
128		/* put ALL STAR in front of reg.  exp. */
129	p1 = op2(CAT, p1, op2(FINAL, NIL, NIL));
130		/* put FINAL after reg.  exp. */
131
132	poscnt = 0;
133	penter(p1);	/* enter parent pointers and leaf indices */
134	if ((f = (fa *) calloc(1, sizeof(fa) + poscnt*sizeof(rrow))) == NULL)
135		overflo("out of space for fa");
136	f->accept = poscnt-1;	/* penter has computed number of positions in re */
137	cfoll(f, p1);	/* set up follow sets */
138	freetr(p1);
139	if ((f->posns[0] = (int *) calloc(1, *(f->re[0].lfollow)*sizeof(int))) == NULL)
140			overflo("out of space in makedfa");
141	if ((f->posns[1] = (int *) calloc(1, sizeof(int))) == NULL)
142		overflo("out of space in makedfa");
143	*f->posns[1] = 0;
144	f->initstat = makeinit(f, anchor);
145	f->anchor = anchor;
146	f->restr = (uschar *) tostring(s);
147	return f;
148}
149
150int makeinit(fa *f, int anchor)
151{
152	int i, k;
153
154	f->curstat = 2;
155	f->out[2] = 0;
156	f->reset = 0;
157	k = *(f->re[0].lfollow);
158	xfree(f->posns[2]);
159	if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
160		overflo("out of space in makeinit");
161	for (i=0; i <= k; i++) {
162		(f->posns[2])[i] = (f->re[0].lfollow)[i];
163	}
164	if ((f->posns[2])[1] == f->accept)
165		f->out[2] = 1;
166	for (i=0; i < NCHARS; i++)
167		f->gototab[2][i] = 0;
168	f->curstat = cgoto(f, 2, HAT);
169	if (anchor) {
170		*f->posns[2] = k-1;	/* leave out position 0 */
171		for (i=0; i < k; i++) {
172			(f->posns[0])[i] = (f->posns[2])[i];
173		}
174
175		f->out[0] = f->out[2];
176		if (f->curstat != 2)
177			--(*f->posns[f->curstat]);
178	}
179	return f->curstat;
180}
181
182void penter(Node *p)	/* set up parent pointers and leaf indices */
183{
184	switch (type(p)) {
185	LEAF
186		info(p) = poscnt;
187		poscnt++;
188		break;
189	UNARY
190		penter(left(p));
191		parent(left(p)) = p;
192		break;
193	case CAT:
194	case OR:
195		penter(left(p));
196		penter(right(p));
197		parent(left(p)) = p;
198		parent(right(p)) = p;
199		break;
200	default:	/* can't happen */
201		FATAL("can't happen: unknown type %d in penter", type(p));
202		break;
203	}
204}
205
206void freetr(Node *p)	/* free parse tree */
207{
208	switch (type(p)) {
209	LEAF
210		xfree(p);
211		break;
212	UNARY
213		freetr(left(p));
214		xfree(p);
215		break;
216	case CAT:
217	case OR:
218		freetr(left(p));
219		freetr(right(p));
220		xfree(p);
221		break;
222	default:	/* can't happen */
223		FATAL("can't happen: unknown type %d in freetr", type(p));
224		break;
225	}
226}
227
228/* in the parsing of regular expressions, metacharacters like . have */
229/* to be seen literally;  \056 is not a metacharacter. */
230
231int hexstr(char **pp)	/* find and eval hex string at pp, return new p */
232{			/* only pick up one 8-bit byte (2 chars) */
233	uschar *p;
234	int n = 0;
235	int i;
236
237	for (i = 0, p = (uschar *) *pp; i < 2 && isxdigit(*p); i++, p++) {
238		if (isdigit(*p))
239			n = 16 * n + *p - '0';
240		else if (*p >= 'a' && *p <= 'f')
241			n = 16 * n + *p - 'a' + 10;
242		else if (*p >= 'A' && *p <= 'F')
243			n = 16 * n + *p - 'A' + 10;
244	}
245	*pp = (char *) p;
246	return n;
247}
248
249#define isoctdigit(c) ((c) >= '0' && (c) <= '7')	/* multiple use of arg */
250
251int quoted(char **pp)	/* pick up next thing after a \\ */
252			/* and increment *pp */
253{
254	char *p = *pp;
255	int c;
256
257	if ((c = *p++) == 't')
258		c = '\t';
259	else if (c == 'n')
260		c = '\n';
261	else if (c == 'f')
262		c = '\f';
263	else if (c == 'r')
264		c = '\r';
265	else if (c == 'b')
266		c = '\b';
267	else if (c == '\\')
268		c = '\\';
269	else if (c == 'x') {	/* hexadecimal goo follows */
270		c = hexstr(&p);	/* this adds a null if number is invalid */
271	} else if (isoctdigit(c)) {	/* \d \dd \ddd */
272		int n = c - '0';
273		if (isoctdigit(*p)) {
274			n = 8 * n + *p++ - '0';
275			if (isoctdigit(*p))
276				n = 8 * n + *p++ - '0';
277		}
278		c = n;
279	} /* else */
280		/* c = c; */
281	*pp = p;
282	return c;
283}
284
285char *cclenter(char *argp)	/* add a character class */
286{
287	int i, c, c2;
288	uschar *p = (uschar *) argp;
289	uschar *op, *bp;
290	static uschar *buf = 0;
291	static int bufsz = 100;
292
293	op = p;
294	if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
295		FATAL("out of space for character class [%.10s...] 1", p);
296	bp = buf;
297	for (i = 0; (c = *p++) != 0; ) {
298		if (c == '\\') {
299			c = quoted((char **) &p);
300		} else if (c == '-' && i > 0 && bp[-1] != 0) {
301			if (*p != 0) {
302				c = bp[-1];
303				c2 = *p++;
304				if (c2 == '\\')
305					c2 = quoted((char **) &p);
306				if (c > c2) {	/* empty; ignore */
307					bp--;
308					i--;
309					continue;
310				}
311				while (c < c2) {
312					if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, 0))
313						FATAL("out of space for character class [%.10s...] 2", p);
314					*bp++ = ++c;
315					i++;
316				}
317				continue;
318			}
319		}
320		if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, 0))
321			FATAL("out of space for character class [%.10s...] 3", p);
322		*bp++ = c;
323		i++;
324	}
325	*bp = 0;
326	dprintf( ("cclenter: in = |%s|, out = |%s|\n", op, buf) );
327	xfree(op);
328	return (char *) tostring((char *) buf);
329}
330
331void overflo(char *s)
332{
333	FATAL("regular expression too big: %.30s...", s);
334}
335
336void cfoll(fa *f, Node *v)	/* enter follow set of each leaf of vertex v into lfollow[leaf] */
337{
338	int i;
339	int *p;
340
341	switch (type(v)) {
342	LEAF
343		f->re[info(v)].ltype = type(v);
344		f->re[info(v)].lval.np = right(v);
345		while (f->accept >= maxsetvec) {	/* guessing here! */
346			maxsetvec *= 4;
347			setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
348			tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
349			if (setvec == 0 || tmpset == 0)
350				overflo("out of space in cfoll()");
351		}
352		for (i = 0; i <= f->accept; i++)
353			setvec[i] = 0;
354		setcnt = 0;
355		follow(v);	/* computes setvec and setcnt */
356		if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
357			overflo("out of space building follow set");
358		f->re[info(v)].lfollow = p;
359		*p = setcnt;
360		for (i = f->accept; i >= 0; i--)
361			if (setvec[i] == 1)
362				*++p = i;
363		break;
364	UNARY
365		cfoll(f,left(v));
366		break;
367	case CAT:
368	case OR:
369		cfoll(f,left(v));
370		cfoll(f,right(v));
371		break;
372	default:	/* can't happen */
373		FATAL("can't happen: unknown type %d in cfoll", type(v));
374	}
375}
376
377int first(Node *p)	/* collects initially active leaves of p into setvec */
378			/* returns 1 if p matches empty string */
379{
380	int b, lp;
381
382	switch (type(p)) {
383	LEAF
384		lp = info(p);	/* look for high-water mark of subscripts */
385		while (setcnt >= maxsetvec || lp >= maxsetvec) {	/* guessing here! */
386			maxsetvec *= 4;
387			setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
388			tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
389			if (setvec == 0 || tmpset == 0)
390				overflo("out of space in first()");
391		}
392		if (setvec[lp] != 1) {
393			setvec[lp] = 1;
394			setcnt++;
395		}
396		if (type(p) == CCL && (*(char *) right(p)) == '\0')
397			return(0);		/* empty CCL */
398		else return(1);
399	case PLUS:
400		if (first(left(p)) == 0) return(0);
401		return(1);
402	case STAR:
403	case QUEST:
404		first(left(p));
405		return(0);
406	case CAT:
407		if (first(left(p)) == 0 && first(right(p)) == 0) return(0);
408		return(1);
409	case OR:
410		b = first(right(p));
411		if (first(left(p)) == 0 || b == 0) return(0);
412		return(1);
413	}
414	FATAL("can't happen: unknown type %d in first", type(p));	/* can't happen */
415	return(-1);
416}
417
418void follow(Node *v)	/* collects leaves that can follow v into setvec */
419{
420	Node *p;
421
422	if (type(v) == FINAL)
423		return;
424	p = parent(v);
425	switch (type(p)) {
426	case STAR:
427	case PLUS:
428		first(v);
429		follow(p);
430		return;
431
432	case OR:
433	case QUEST:
434		follow(p);
435		return;
436
437	case CAT:
438		if (v == left(p)) {	/* v is left child of p */
439			if (first(right(p)) == 0) {
440				follow(p);
441				return;
442			}
443		} else		/* v is right child */
444			follow(p);
445		return;
446	}
447}
448
449int member(int c, char *sarg)	/* is c in s? */
450{
451	uschar *s = (uschar *) sarg;
452
453	while (*s)
454		if (c == *s++)
455			return(1);
456	return(0);
457}
458
459int match(fa *f, char *p0)	/* shortest match ? */
460{
461	int s, ns;
462	uschar *p = (uschar *) p0;
463
464	s = f->reset ? makeinit(f,0) : f->initstat;
465	if (f->out[s])
466		return(1);
467	do {
468		if ((ns = f->gototab[s][*p]) != 0)
469			s = ns;
470		else
471			s = cgoto(f, s, *p);
472		if (f->out[s])
473			return(1);
474	} while (*p++ != 0);
475	return(0);
476}
477
478int pmatch(fa *f, char *p0)	/* longest match, for sub */
479{
480	int s, ns;
481	uschar *p = (uschar *) p0;
482	uschar *q;
483	int i, k;
484
485	s = f->reset ? makeinit(f,1) : f->initstat;
486	patbeg = (char *) p;
487	patlen = -1;
488	do {
489		q = p;
490		do {
491			if (f->out[s])		/* final state */
492				patlen = q-p;
493			if ((ns = f->gototab[s][*q]) != 0)
494				s = ns;
495			else
496				s = cgoto(f, s, *q);
497			if (s == 1) {	/* no transition */
498				if (patlen >= 0) {
499					patbeg = (char *) p;
500					return(1);
501				}
502				else
503					goto nextin;	/* no match */
504			}
505		} while (*q++ != 0);
506		if (f->out[s])
507			patlen = q-p-1;	/* don't count $ */
508		if (patlen >= 0) {
509			patbeg = (char *) p;
510			return(1);
511		}
512	nextin:
513		s = 2;
514		if (f->reset) {
515			for (i = 2; i <= f->curstat; i++)
516				xfree(f->posns[i]);
517			k = *f->posns[0];
518			if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
519				overflo("out of space in pmatch");
520			for (i = 0; i <= k; i++)
521				(f->posns[2])[i] = (f->posns[0])[i];
522			f->initstat = f->curstat = 2;
523			f->out[2] = f->out[0];
524			for (i = 0; i < NCHARS; i++)
525				f->gototab[2][i] = 0;
526		}
527	} while (*p++ != 0);
528	return (0);
529}
530
531int nematch(fa *f, char *p0)	/* non-empty match, for sub */
532{
533	int s, ns;
534	uschar *p = (uschar *) p0;
535	uschar *q;
536	int i, k;
537
538	s = f->reset ? makeinit(f,1) : f->initstat;
539	patlen = -1;
540	while (*p) {
541		q = p;
542		do {
543			if (f->out[s])		/* final state */
544				patlen = q-p;
545			if ((ns = f->gototab[s][*q]) != 0)
546				s = ns;
547			else
548				s = cgoto(f, s, *q);
549			if (s == 1) {	/* no transition */
550				if (patlen > 0) {
551					patbeg = (char *) p;
552					return(1);
553				} else
554					goto nnextin;	/* no nonempty match */
555			}
556		} while (*q++ != 0);
557		if (f->out[s])
558			patlen = q-p-1;	/* don't count $ */
559		if (patlen > 0 ) {
560			patbeg = (char *) p;
561			return(1);
562		}
563	nnextin:
564		s = 2;
565		if (f->reset) {
566			for (i = 2; i <= f->curstat; i++)
567				xfree(f->posns[i]);
568			k = *f->posns[0];
569			if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
570				overflo("out of state space");
571			for (i = 0; i <= k; i++)
572				(f->posns[2])[i] = (f->posns[0])[i];
573			f->initstat = f->curstat = 2;
574			f->out[2] = f->out[0];
575			for (i = 0; i < NCHARS; i++)
576				f->gototab[2][i] = 0;
577		}
578		p++;
579	}
580	return (0);
581}
582
583Node *reparse(char *p)	/* parses regular expression pointed to by p */
584{			/* uses relex() to scan regular expression */
585	Node *np;
586
587	dprintf( ("reparse <%s>\n", p) );
588	lastre = prestr = (uschar *) p;	/* prestr points to string to be parsed */
589	rtok = relex();
590	if (rtok == '\0')
591		FATAL("empty regular expression");
592	np = regexp();
593	if (rtok != '\0')
594		FATAL("syntax error in regular expression %s at %s", lastre, prestr);
595	return(np);
596}
597
598Node *regexp(void)	/* top-level parse of reg expr */
599{
600	return (alt(concat(primary())));
601}
602
603Node *primary(void)
604{
605	Node *np;
606
607	switch (rtok) {
608	case CHAR:
609		np = op2(CHAR, NIL, itonp(rlxval));
610		rtok = relex();
611		return (unary(np));
612	case ALL:
613		rtok = relex();
614		return (unary(op2(ALL, NIL, NIL)));
615	case DOT:
616		rtok = relex();
617		return (unary(op2(DOT, NIL, NIL)));
618	case CCL:
619		np = op2(CCL, NIL, (Node*) cclenter((char *) rlxstr));
620		rtok = relex();
621		return (unary(np));
622	case NCCL:
623		np = op2(NCCL, NIL, (Node *) cclenter((char *) rlxstr));
624		rtok = relex();
625		return (unary(np));
626	case '^':
627		rtok = relex();
628		return (unary(op2(CHAR, NIL, itonp(HAT))));
629	case '$':
630		rtok = relex();
631		return (unary(op2(CHAR, NIL, NIL)));
632	case '(':
633		rtok = relex();
634		if (rtok == ')') {	/* special pleading for () */
635			rtok = relex();
636			return unary(op2(CCL, NIL, (Node *) tostring("")));
637		}
638		np = regexp();
639		if (rtok == ')') {
640			rtok = relex();
641			return (unary(np));
642		}
643		else
644			FATAL("syntax error in regular expression %s at %s", lastre, prestr);
645	default:
646		FATAL("illegal primary in regular expression %s at %s", lastre, prestr);
647	}
648	return 0;	/*NOTREACHED*/
649}
650
651Node *concat(Node *np)
652{
653	switch (rtok) {
654	case CHAR: case DOT: case ALL: case CCL: case NCCL: case '$': case '(':
655		return (concat(op2(CAT, np, primary())));
656	}
657	return (np);
658}
659
660Node *alt(Node *np)
661{
662	if (rtok == OR) {
663		rtok = relex();
664		return (alt(op2(OR, np, concat(primary()))));
665	}
666	return (np);
667}
668
669Node *unary(Node *np)
670{
671	switch (rtok) {
672	case STAR:
673		rtok = relex();
674		return (unary(op2(STAR, np, NIL)));
675	case PLUS:
676		rtok = relex();
677		return (unary(op2(PLUS, np, NIL)));
678	case QUEST:
679		rtok = relex();
680		return (unary(op2(QUEST, np, NIL)));
681	default:
682		return (np);
683	}
684}
685
686int relex(void)		/* lexical analyzer for reparse */
687{
688	int c, n;
689	int cflag;
690	static uschar *buf = 0;
691	static int bufsz = 100;
692	uschar *bp;
693
694	switch (c = *prestr++) {
695	case '|': return OR;
696	case '*': return STAR;
697	case '+': return PLUS;
698	case '?': return QUEST;
699	case '.': return DOT;
700	case '\0': prestr--; return '\0';
701	case '^':
702	case '$':
703	case '(':
704	case ')':
705		return c;
706	case '\\':
707		rlxval = quoted((char **) &prestr);
708		return CHAR;
709	default:
710		rlxval = c;
711		return CHAR;
712	case '[':
713		if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
714			FATAL("out of space in reg expr %.10s..", lastre);
715		bp = buf;
716		if (*prestr == '^') {
717			cflag = 1;
718			prestr++;
719		}
720		else
721			cflag = 0;
722		n = 2 * strlen(prestr)+1;
723		if (!adjbuf((char **) &buf, &bufsz, n, n, (char **) &bp, 0))
724			FATAL("out of space for reg expr %.10s...", lastre);
725		for (; ; ) {
726			if ((c = *prestr++) == '\\') {
727				*bp++ = '\\';
728				if ((c = *prestr++) == '\0')
729					FATAL("nonterminated character class %.20s...", lastre);
730				*bp++ = c;
731			/* } else if (c == '\n') { */
732			/* 	FATAL("newline in character class %.20s...", lastre); */
733			} else if (c == '\0') {
734				FATAL("nonterminated character class %.20s", lastre);
735			} else if (bp == buf) {	/* 1st char is special */
736				*bp++ = c;
737			} else if (c == ']') {
738				*bp++ = 0;
739				rlxstr = (uschar *) tostring((char *) buf);
740				if (cflag == 0)
741					return CCL;
742				else
743					return NCCL;
744			} else
745				*bp++ = c;
746		}
747	}
748}
749
750int cgoto(fa *f, int s, int c)
751{
752	int i, j, k;
753	int *p, *q;
754
755	if (c < 0 || c > 255)
756		FATAL("can't happen: neg char %d in cgoto", c);
757	while (f->accept >= maxsetvec) {	/* guessing here! */
758		maxsetvec *= 4;
759		setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
760		tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
761		if (setvec == 0 || tmpset == 0)
762			overflo("out of space in cgoto()");
763	}
764	for (i = 0; i <= f->accept; i++)
765		setvec[i] = 0;
766	setcnt = 0;
767	/* compute positions of gototab[s,c] into setvec */
768	p = f->posns[s];
769	for (i = 1; i <= *p; i++) {
770		if ((k = f->re[p[i]].ltype) != FINAL) {
771			if ((k == CHAR && c == ptoi(f->re[p[i]].lval.np))
772			 || (k == DOT && c != 0 && c != HAT)
773			 || (k == ALL && c != 0)
774			 || (k == CCL && member(c, (char *) f->re[p[i]].lval.up))
775			 || (k == NCCL && !member(c, (char *) f->re[p[i]].lval.up) && c != 0 && c != HAT)) {
776				q = f->re[p[i]].lfollow;
777				for (j = 1; j <= *q; j++) {
778					if (q[j] >= maxsetvec) {
779						maxsetvec *= 4;
780						setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
781						tmpset = (int *) realloc(setvec, maxsetvec * sizeof(int));
782						if (setvec == 0 || tmpset == 0)
783							overflo("cgoto overflow");
784					}
785					if (setvec[q[j]] == 0) {
786						setcnt++;
787						setvec[q[j]] = 1;
788					}
789				}
790			}
791		}
792	}
793	/* determine if setvec is a previous state */
794	tmpset[0] = setcnt;
795	j = 1;
796	for (i = f->accept; i >= 0; i--)
797		if (setvec[i]) {
798			tmpset[j++] = i;
799		}
800	/* tmpset == previous state? */
801	for (i = 1; i <= f->curstat; i++) {
802		p = f->posns[i];
803		if ((k = tmpset[0]) != p[0])
804			goto different;
805		for (j = 1; j <= k; j++)
806			if (tmpset[j] != p[j])
807				goto different;
808		/* setvec is state i */
809		f->gototab[s][c] = i;
810		return i;
811	  different:;
812	}
813
814	/* add tmpset to current set of states */
815	if (f->curstat >= NSTATES-1) {
816		f->curstat = 2;
817		f->reset = 1;
818		for (i = 2; i < NSTATES; i++)
819			xfree(f->posns[i]);
820	} else
821		++(f->curstat);
822	for (i = 0; i < NCHARS; i++)
823		f->gototab[f->curstat][i] = 0;
824	xfree(f->posns[f->curstat]);
825	if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
826		overflo("out of space in cgoto");
827
828	f->posns[f->curstat] = p;
829	f->gototab[s][c] = f->curstat;
830	for (i = 0; i <= setcnt; i++)
831		p[i] = tmpset[i];
832	if (setvec[f->accept])
833		f->out[f->curstat] = 1;
834	else
835		f->out[f->curstat] = 0;
836	return f->curstat;
837}
838
839
840void freefa(fa *f)	/* free a finite automaton */
841{
842	int i;
843
844	if (f == NULL)
845		return;
846	for (i = 0; i <= f->curstat; i++)
847		xfree(f->posns[i]);
848	for (i = 0; i <= f->accept; i++) {
849		xfree(f->re[i].lfollow);
850		if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL)
851			xfree((f->re[i].lval.np));
852	}
853	xfree(f->restr);
854	xfree(f);
855}
856