b.c revision 118194
1/****************************************************************
2Copyright (C) Lucent Technologies 1997
3All Rights Reserved
4
5Permission to use, copy, modify, and distribute this software and
6its documentation for any purpose and without fee is hereby
7granted, provided that the above copyright notice appear in all
8copies and that both that the copyright notice and this
9permission notice and warranty disclaimer appear in supporting
10documentation, and that the name Lucent Technologies or any of
11its entities not be used in advertising or publicity pertaining
12to distribution of the software without specific, written prior
13permission.
14
15LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22THIS SOFTWARE.
23****************************************************************/
24
25/* lasciate ogne speranza, voi ch'entrate. */
26
27#define	DEBUG
28
29#include <ctype.h>
30#include <stdio.h>
31#include <string.h>
32#include <stdlib.h>
33#include "awk.h"
34#include "ytab.h"
35
36#define	HAT	(NCHARS+2)	/* matches ^ in regular expr */
37				/* NCHARS is 2**n */
38#define MAXLIN 22
39
40#define type(v)		(v)->nobj	/* badly overloaded here */
41#define info(v)		(v)->ntype	/* badly overloaded here */
42#define left(v)		(v)->narg[0]
43#define right(v)	(v)->narg[1]
44#define parent(v)	(v)->nnext
45
46#define LEAF	case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL:
47#define UNARY	case STAR: case PLUS: case QUEST:
48
49/* encoding in tree Nodes:
50	leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL):
51		left is index, right contains value or pointer to value
52	unary (STAR, PLUS, QUEST): left is child, right is null
53	binary (CAT, OR): left and right are children
54	parent contains pointer to parent
55*/
56
57
58int	*setvec;
59int	*tmpset;
60int	maxsetvec = 0;
61
62int	rtok;		/* next token in current re */
63int	rlxval;
64static uschar	*rlxstr;
65static uschar	*prestr;	/* current position in current re */
66static uschar	*lastre;	/* origin of last re */
67
68static	int setcnt;
69static	int poscnt;
70
71char	*patbeg;
72int	patlen;
73
74#define	NFA	20	/* cache this many dynamic fa's */
75fa	*fatab[NFA];
76int	nfatab	= 0;	/* entries in fatab */
77
78fa *makedfa(const char *s, int anchor)	/* returns dfa for reg expr s */
79{
80	int i, use, nuse;
81	fa *pfa;
82	static int now = 1;
83
84	if (setvec == 0) {	/* first time through any RE */
85		maxsetvec = MAXLIN;
86		setvec = (int *) malloc(maxsetvec * sizeof(int));
87		tmpset = (int *) malloc(maxsetvec * sizeof(int));
88		if (setvec == 0 || tmpset == 0)
89			overflo("out of space initializing makedfa");
90	}
91
92	if (compile_time)	/* a constant for sure */
93		return mkdfa(s, anchor);
94	for (i = 0; i < nfatab; i++)	/* is it there already? */
95		if (fatab[i]->anchor == anchor
96		  && strcmp((const char *) fatab[i]->restr, s) == 0) {
97			fatab[i]->use = now++;
98			return fatab[i];
99		}
100	pfa = mkdfa(s, anchor);
101	if (nfatab < NFA) {	/* room for another */
102		fatab[nfatab] = pfa;
103		fatab[nfatab]->use = now++;
104		nfatab++;
105		return pfa;
106	}
107	use = fatab[0]->use;	/* replace least-recently used */
108	nuse = 0;
109	for (i = 1; i < nfatab; i++)
110		if (fatab[i]->use < use) {
111			use = fatab[i]->use;
112			nuse = i;
113		}
114	freefa(fatab[nuse]);
115	fatab[nuse] = pfa;
116	pfa->use = now++;
117	return pfa;
118}
119
120fa *mkdfa(const char *s, int anchor)	/* does the real work of making a dfa */
121				/* anchor = 1 for anchored matches, else 0 */
122{
123	Node *p, *p1;
124	fa *f;
125
126	p = reparse(s);
127	p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p);
128		/* put ALL STAR in front of reg.  exp. */
129	p1 = op2(CAT, p1, op2(FINAL, NIL, NIL));
130		/* put FINAL after reg.  exp. */
131
132	poscnt = 0;
133	penter(p1);	/* enter parent pointers and leaf indices */
134	if ((f = (fa *) calloc(1, sizeof(fa) + poscnt*sizeof(rrow))) == NULL)
135		overflo("out of space for fa");
136	f->accept = poscnt-1;	/* penter has computed number of positions in re */
137	cfoll(f, p1);	/* set up follow sets */
138	freetr(p1);
139	if ((f->posns[0] = (int *) calloc(1, *(f->re[0].lfollow)*sizeof(int))) == NULL)
140			overflo("out of space in makedfa");
141	if ((f->posns[1] = (int *) calloc(1, sizeof(int))) == NULL)
142		overflo("out of space in makedfa");
143	*f->posns[1] = 0;
144	f->initstat = makeinit(f, anchor);
145	f->anchor = anchor;
146	f->restr = (uschar *) tostring(s);
147	return f;
148}
149
150int makeinit(fa *f, int anchor)
151{
152	int i, k;
153
154	f->curstat = 2;
155	f->out[2] = 0;
156	f->reset = 0;
157	k = *(f->re[0].lfollow);
158	xfree(f->posns[2]);
159	if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
160		overflo("out of space in makeinit");
161	for (i=0; i <= k; i++) {
162		(f->posns[2])[i] = (f->re[0].lfollow)[i];
163	}
164	if ((f->posns[2])[1] == f->accept)
165		f->out[2] = 1;
166	for (i=0; i < NCHARS; i++)
167		f->gototab[2][i] = 0;
168	f->curstat = cgoto(f, 2, HAT);
169	if (anchor) {
170		*f->posns[2] = k-1;	/* leave out position 0 */
171		for (i=0; i < k; i++) {
172			(f->posns[0])[i] = (f->posns[2])[i];
173		}
174
175		f->out[0] = f->out[2];
176		if (f->curstat != 2)
177			--(*f->posns[f->curstat]);
178	}
179	return f->curstat;
180}
181
182void penter(Node *p)	/* set up parent pointers and leaf indices */
183{
184	switch (type(p)) {
185	LEAF
186		info(p) = poscnt;
187		poscnt++;
188		break;
189	UNARY
190		penter(left(p));
191		parent(left(p)) = p;
192		break;
193	case CAT:
194	case OR:
195		penter(left(p));
196		penter(right(p));
197		parent(left(p)) = p;
198		parent(right(p)) = p;
199		break;
200	default:	/* can't happen */
201		FATAL("can't happen: unknown type %d in penter", type(p));
202		break;
203	}
204}
205
206void freetr(Node *p)	/* free parse tree */
207{
208	switch (type(p)) {
209	LEAF
210		xfree(p);
211		break;
212	UNARY
213		freetr(left(p));
214		xfree(p);
215		break;
216	case CAT:
217	case OR:
218		freetr(left(p));
219		freetr(right(p));
220		xfree(p);
221		break;
222	default:	/* can't happen */
223		FATAL("can't happen: unknown type %d in freetr", type(p));
224		break;
225	}
226}
227
228/* in the parsing of regular expressions, metacharacters like . have */
229/* to be seen literally;  \056 is not a metacharacter. */
230
231int hexstr(char **pp)	/* find and eval hex string at pp, return new p */
232{			/* only pick up one 8-bit byte (2 chars) */
233	uschar *p;
234	int n = 0;
235	int i;
236
237	for (i = 0, p = (uschar *) *pp; i < 2 && isxdigit(*p); i++, p++) {
238		if (isdigit(*p))
239			n = 16 * n + *p - '0';
240		else if (*p >= 'a' && *p <= 'f')
241			n = 16 * n + *p - 'a' + 10;
242		else if (*p >= 'A' && *p <= 'F')
243			n = 16 * n + *p - 'A' + 10;
244	}
245	*pp = (char *) p;
246	return n;
247}
248
249#define isoctdigit(c) ((c) >= '0' && (c) <= '7')	/* multiple use of arg */
250
251int quoted(char **pp)	/* pick up next thing after a \\ */
252			/* and increment *pp */
253{
254	char *p = *pp;
255	int c;
256
257	if ((c = *p++) == 't')
258		c = '\t';
259	else if (c == 'n')
260		c = '\n';
261	else if (c == 'f')
262		c = '\f';
263	else if (c == 'r')
264		c = '\r';
265	else if (c == 'b')
266		c = '\b';
267	else if (c == '\\')
268		c = '\\';
269	else if (c == 'x') {	/* hexadecimal goo follows */
270		c = hexstr(&p);	/* this adds a null if number is invalid */
271	} else if (isoctdigit(c)) {	/* \d \dd \ddd */
272		int n = c - '0';
273		if (isoctdigit(*p)) {
274			n = 8 * n + *p++ - '0';
275			if (isoctdigit(*p))
276				n = 8 * n + *p++ - '0';
277		}
278		c = n;
279	} /* else */
280		/* c = c; */
281	*pp = p;
282	return c;
283}
284
285char *cclenter(const char *argp)	/* add a character class */
286{
287	int i, c, c2;
288	uschar *p = (uschar *) argp;
289	uschar *op, *bp;
290	static uschar *buf = 0;
291	static int bufsz = 100;
292
293	op = p;
294	if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
295		FATAL("out of space for character class [%.10s...] 1", p);
296	bp = buf;
297	for (i = 0; (c = *p++) != 0; ) {
298		if (c == '\\') {
299			c = quoted((char **) &p);
300		} else if (c == '-' && i > 0 && bp[-1] != 0) {
301			if (*p != 0) {
302				c = bp[-1];
303				c2 = *p++;
304				if (c2 == '\\')
305					c2 = quoted((char **) &p);
306				if (c > c2) {	/* empty; ignore */
307					bp--;
308					i--;
309					continue;
310				}
311				while (c < c2) {
312					if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, 0))
313						FATAL("out of space for character class [%.10s...] 2", p);
314					*bp++ = ++c;
315					i++;
316				}
317				continue;
318			}
319		}
320		if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, 0))
321			FATAL("out of space for character class [%.10s...] 3", p);
322		*bp++ = c;
323		i++;
324	}
325	*bp = 0;
326	dprintf( ("cclenter: in = |%s|, out = |%s|\n", op, buf) );
327	xfree(op);
328	return (char *) tostring((char *) buf);
329}
330
331void overflo(const char *s)
332{
333	FATAL("regular expression too big: %.30s...", s);
334}
335
336void cfoll(fa *f, Node *v)	/* enter follow set of each leaf of vertex v into lfollow[leaf] */
337{
338	int i;
339	int *p;
340
341	switch (type(v)) {
342	LEAF
343		f->re[info(v)].ltype = type(v);
344		f->re[info(v)].lval.np = right(v);
345		while (f->accept >= maxsetvec) {	/* guessing here! */
346			maxsetvec *= 4;
347			setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
348			tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
349			if (setvec == 0 || tmpset == 0)
350				overflo("out of space in cfoll()");
351		}
352		for (i = 0; i <= f->accept; i++)
353			setvec[i] = 0;
354		setcnt = 0;
355		follow(v);	/* computes setvec and setcnt */
356		if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
357			overflo("out of space building follow set");
358		f->re[info(v)].lfollow = p;
359		*p = setcnt;
360		for (i = f->accept; i >= 0; i--)
361			if (setvec[i] == 1)
362				*++p = i;
363		break;
364	UNARY
365		cfoll(f,left(v));
366		break;
367	case CAT:
368	case OR:
369		cfoll(f,left(v));
370		cfoll(f,right(v));
371		break;
372	default:	/* can't happen */
373		FATAL("can't happen: unknown type %d in cfoll", type(v));
374	}
375}
376
377int first(Node *p)	/* collects initially active leaves of p into setvec */
378			/* returns 1 if p matches empty string */
379{
380	int b, lp;
381
382	switch (type(p)) {
383	LEAF
384		lp = info(p);	/* look for high-water mark of subscripts */
385		while (setcnt >= maxsetvec || lp >= maxsetvec) {	/* guessing here! */
386			maxsetvec *= 4;
387			setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
388			tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
389			if (setvec == 0 || tmpset == 0)
390				overflo("out of space in first()");
391		}
392		if (setvec[lp] != 1) {
393			setvec[lp] = 1;
394			setcnt++;
395		}
396		if (type(p) == CCL && (*(char *) right(p)) == '\0')
397			return(0);		/* empty CCL */
398		else return(1);
399	case PLUS:
400		if (first(left(p)) == 0) return(0);
401		return(1);
402	case STAR:
403	case QUEST:
404		first(left(p));
405		return(0);
406	case CAT:
407		if (first(left(p)) == 0 && first(right(p)) == 0) return(0);
408		return(1);
409	case OR:
410		b = first(right(p));
411		if (first(left(p)) == 0 || b == 0) return(0);
412		return(1);
413	}
414	FATAL("can't happen: unknown type %d in first", type(p));	/* can't happen */
415	return(-1);
416}
417
418void follow(Node *v)	/* collects leaves that can follow v into setvec */
419{
420	Node *p;
421
422	if (type(v) == FINAL)
423		return;
424	p = parent(v);
425	switch (type(p)) {
426	case STAR:
427	case PLUS:
428		first(v);
429		follow(p);
430		return;
431
432	case OR:
433	case QUEST:
434		follow(p);
435		return;
436
437	case CAT:
438		if (v == left(p)) {	/* v is left child of p */
439			if (first(right(p)) == 0) {
440				follow(p);
441				return;
442			}
443		} else		/* v is right child */
444			follow(p);
445		return;
446	}
447}
448
449int member(int c, const char *sarg)	/* is c in s? */
450{
451	uschar *s = (uschar *) sarg;
452
453	while (*s)
454		if (c == *s++)
455			return(1);
456	return(0);
457}
458
459int match(fa *f, const char *p0)	/* shortest match ? */
460{
461	int s, ns;
462	uschar *p = (uschar *) p0;
463
464	s = f->reset ? makeinit(f,0) : f->initstat;
465	if (f->out[s])
466		return(1);
467	do {
468		if ((ns = f->gototab[s][*p]) != 0)
469			s = ns;
470		else
471			s = cgoto(f, s, *p);
472		if (f->out[s])
473			return(1);
474	} while (*p++ != 0);
475	return(0);
476}
477
478int pmatch(fa *f, const char *p0)	/* longest match, for sub */
479{
480	int s, ns;
481	uschar *p = (uschar *) p0;
482	uschar *q;
483	int i, k;
484
485	s = f->reset ? makeinit(f,1) : f->initstat;
486	patbeg = (char *) p;
487	patlen = -1;
488	do {
489		q = p;
490		do {
491			if (f->out[s])		/* final state */
492				patlen = q-p;
493			if ((ns = f->gototab[s][*q]) != 0)
494				s = ns;
495			else
496				s = cgoto(f, s, *q);
497			if (s == 1) {	/* no transition */
498				if (patlen >= 0) {
499					patbeg = (char *) p;
500					return(1);
501				}
502				else
503					goto nextin;	/* no match */
504			}
505		} while (*q++ != 0);
506		if (f->out[s])
507			patlen = q-p-1;	/* don't count $ */
508		if (patlen >= 0) {
509			patbeg = (char *) p;
510			return(1);
511		}
512	nextin:
513		s = 2;
514		if (f->reset) {
515			for (i = 2; i <= f->curstat; i++)
516				xfree(f->posns[i]);
517			k = *f->posns[0];
518			if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
519				overflo("out of space in pmatch");
520			for (i = 0; i <= k; i++)
521				(f->posns[2])[i] = (f->posns[0])[i];
522			f->initstat = f->curstat = 2;
523			f->out[2] = f->out[0];
524			for (i = 0; i < NCHARS; i++)
525				f->gototab[2][i] = 0;
526		}
527	} while (*p++ != 0);
528	return (0);
529}
530
531int nematch(fa *f, const char *p0)	/* non-empty match, for sub */
532{
533	int s, ns;
534	uschar *p = (uschar *) p0;
535	uschar *q;
536	int i, k;
537
538	s = f->reset ? makeinit(f,1) : f->initstat;
539	patlen = -1;
540	while (*p) {
541		q = p;
542		do {
543			if (f->out[s])		/* final state */
544				patlen = q-p;
545			if ((ns = f->gototab[s][*q]) != 0)
546				s = ns;
547			else
548				s = cgoto(f, s, *q);
549			if (s == 1) {	/* no transition */
550				if (patlen > 0) {
551					patbeg = (char *) p;
552					return(1);
553				} else
554					goto nnextin;	/* no nonempty match */
555			}
556		} while (*q++ != 0);
557		if (f->out[s])
558			patlen = q-p-1;	/* don't count $ */
559		if (patlen > 0 ) {
560			patbeg = (char *) p;
561			return(1);
562		}
563	nnextin:
564		s = 2;
565		if (f->reset) {
566			for (i = 2; i <= f->curstat; i++)
567				xfree(f->posns[i]);
568			k = *f->posns[0];
569			if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
570				overflo("out of state space");
571			for (i = 0; i <= k; i++)
572				(f->posns[2])[i] = (f->posns[0])[i];
573			f->initstat = f->curstat = 2;
574			f->out[2] = f->out[0];
575			for (i = 0; i < NCHARS; i++)
576				f->gototab[2][i] = 0;
577		}
578		p++;
579	}
580	return (0);
581}
582
583Node *reparse(const char *p)	/* parses regular expression pointed to by p */
584{			/* uses relex() to scan regular expression */
585	Node *np;
586
587	dprintf( ("reparse <%s>\n", p) );
588	lastre = prestr = (uschar *) p;	/* prestr points to string to be parsed */
589	rtok = relex();
590	/* GNU compatibility: an empty regexp matches anything */
591	if (rtok == '\0')
592		/* FATAL("empty regular expression"); previous */
593		return(op2(ALL, NIL, NIL));
594	np = regexp();
595	if (rtok != '\0')
596		FATAL("syntax error in regular expression %s at %s", lastre, prestr);
597	return(np);
598}
599
600Node *regexp(void)	/* top-level parse of reg expr */
601{
602	return (alt(concat(primary())));
603}
604
605Node *primary(void)
606{
607	Node *np;
608
609	switch (rtok) {
610	case CHAR:
611		np = op2(CHAR, NIL, itonp(rlxval));
612		rtok = relex();
613		return (unary(np));
614	case ALL:
615		rtok = relex();
616		return (unary(op2(ALL, NIL, NIL)));
617	case DOT:
618		rtok = relex();
619		return (unary(op2(DOT, NIL, NIL)));
620	case CCL:
621		np = op2(CCL, NIL, (Node*) cclenter((char *) rlxstr));
622		rtok = relex();
623		return (unary(np));
624	case NCCL:
625		np = op2(NCCL, NIL, (Node *) cclenter((char *) rlxstr));
626		rtok = relex();
627		return (unary(np));
628	case '^':
629		rtok = relex();
630		return (unary(op2(CHAR, NIL, itonp(HAT))));
631	case '$':
632		rtok = relex();
633		return (unary(op2(CHAR, NIL, NIL)));
634	case '(':
635		rtok = relex();
636		if (rtok == ')') {	/* special pleading for () */
637			rtok = relex();
638			return unary(op2(CCL, NIL, (Node *) tostring("")));
639		}
640		np = regexp();
641		if (rtok == ')') {
642			rtok = relex();
643			return (unary(np));
644		}
645		else
646			FATAL("syntax error in regular expression %s at %s", lastre, prestr);
647	default:
648		FATAL("illegal primary in regular expression %s at %s", lastre, prestr);
649	}
650	return 0;	/*NOTREACHED*/
651}
652
653Node *concat(Node *np)
654{
655	switch (rtok) {
656	case CHAR: case DOT: case ALL: case CCL: case NCCL: case '$': case '(':
657		return (concat(op2(CAT, np, primary())));
658	}
659	return (np);
660}
661
662Node *alt(Node *np)
663{
664	if (rtok == OR) {
665		rtok = relex();
666		return (alt(op2(OR, np, concat(primary()))));
667	}
668	return (np);
669}
670
671Node *unary(Node *np)
672{
673	switch (rtok) {
674	case STAR:
675		rtok = relex();
676		return (unary(op2(STAR, np, NIL)));
677	case PLUS:
678		rtok = relex();
679		return (unary(op2(PLUS, np, NIL)));
680	case QUEST:
681		rtok = relex();
682		return (unary(op2(QUEST, np, NIL)));
683	default:
684		return (np);
685	}
686}
687
688/*
689 * Character class definitions conformant to the POSIX locale as
690 * defined in IEEE P1003.1 draft 7 of June 2001, assuming the source
691 * and operating character sets are both ASCII (ISO646) or supersets
692 * thereof.
693 *
694 * Note that to avoid overflowing the temporary buffer used in
695 * relex(), the expanded character class (prior to range expansion)
696 * must be less than twice the size of their full name.
697 */
698
699/* Because isblank doesn't show up in any of the header files on any
700 * system i use, it's defined here.  if some other locale has a richer
701 * definition of "blank", define HAS_ISBLANK and provide your own
702 * version.
703 * the parentheses here are an attempt to find a path through the maze
704 * of macro definition and/or function and/or version provided.  thanks
705 * to nelson beebe for the suggestion; let's see if it works everywhere.
706 */
707
708#ifndef HAS_ISBLANK
709
710int (isblank)(int c)
711{
712	return c==' ' || c=='\t';
713}
714
715#endif
716
717struct charclass {
718	const char *cc_name;
719	int cc_namelen;
720	int (*cc_func)(int);
721} charclasses[] = {
722	{ "alnum",	5,	isalnum },
723	{ "alpha",	5,	isalpha },
724	{ "blank",	5,	isblank },
725	{ "cntrl",	5,	iscntrl },
726	{ "digit",	5,	isdigit },
727	{ "graph",	5,	isgraph },
728	{ "lower",	5,	islower },
729	{ "print",	5,	isprint },
730	{ "punct",	5,	ispunct },
731	{ "space",	5,	isspace },
732	{ "upper",	5,	isupper },
733	{ "xdigit",	6,	isxdigit },
734	{ NULL,		0,	NULL },
735};
736
737
738int relex(void)		/* lexical analyzer for reparse */
739{
740	int c, n;
741	int cflag;
742	static uschar *buf = 0;
743	static int bufsz = 100;
744	uschar *bp;
745	struct charclass *cc;
746	int i;
747
748	switch (c = *prestr++) {
749	case '|': return OR;
750	case '*': return STAR;
751	case '+': return PLUS;
752	case '?': return QUEST;
753	case '.': return DOT;
754	case '\0': prestr--; return '\0';
755	case '^':
756	case '$':
757	case '(':
758	case ')':
759		return c;
760	case '\\':
761		rlxval = quoted((char **) &prestr);
762		return CHAR;
763	default:
764		rlxval = c;
765		return CHAR;
766	case '[':
767		if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
768			FATAL("out of space in reg expr %.10s..", lastre);
769		bp = buf;
770		if (*prestr == '^') {
771			cflag = 1;
772			prestr++;
773		}
774		else
775			cflag = 0;
776		n = 2 * strlen((const char *) prestr)+1;
777		if (!adjbuf((char **) &buf, &bufsz, n, n, (char **) &bp, 0))
778			FATAL("out of space for reg expr %.10s...", lastre);
779		for (; ; ) {
780			if ((c = *prestr++) == '\\') {
781				*bp++ = '\\';
782				if ((c = *prestr++) == '\0')
783					FATAL("nonterminated character class %.20s...", lastre);
784				*bp++ = c;
785			/* } else if (c == '\n') { */
786			/* 	FATAL("newline in character class %.20s...", lastre); */
787			} else if (c == '[' && *prestr == ':') {
788				/* POSIX char class names, Dag-Erling Smorgrav, des@ofug.org */
789				for (cc = charclasses; cc->cc_name; cc++)
790					if (strncmp((const char *) prestr + 1, (const char *) cc->cc_name, cc->cc_namelen) == 0)
791						break;
792				if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' &&
793				    prestr[2 + cc->cc_namelen] == ']') {
794					prestr += cc->cc_namelen + 3;
795					for (i = 0; i < NCHARS; i++) {
796						if (!adjbuf((char **) &buf, &bufsz, bp-buf+1, 100, (char **) &bp, 0))
797						    FATAL("out of space for reg expr %.10s...", lastre);
798						if (cc->cc_func(i)) {
799							*bp++ = i;
800							n++;
801						}
802					}
803				} else
804					*bp++ = c;
805			} else if (c == '\0') {
806				FATAL("nonterminated character class %.20s", lastre);
807			} else if (bp == buf) {	/* 1st char is special */
808				*bp++ = c;
809			} else if (c == ']') {
810				*bp++ = 0;
811				rlxstr = (uschar *) tostring((char *) buf);
812				if (cflag == 0)
813					return CCL;
814				else
815					return NCCL;
816			} else
817				*bp++ = c;
818		}
819	}
820}
821
822int cgoto(fa *f, int s, int c)
823{
824	int i, j, k;
825	int *p, *q;
826
827	while (f->accept >= maxsetvec) {	/* guessing here! */
828		maxsetvec *= 4;
829		setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
830		tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
831		if (setvec == 0 || tmpset == 0)
832			overflo("out of space in cgoto()");
833	}
834	for (i = 0; i <= f->accept; i++)
835		setvec[i] = 0;
836	setcnt = 0;
837	/* compute positions of gototab[s,c] into setvec */
838	p = f->posns[s];
839	for (i = 1; i <= *p; i++) {
840		if ((k = f->re[p[i]].ltype) != FINAL) {
841			if ((k == CHAR && c == ptoi(f->re[p[i]].lval.np))
842			 || (k == DOT && c != 0 && c != HAT)
843			 || (k == ALL && c != 0)
844			 || (k == CCL && member(c, (char *) f->re[p[i]].lval.up))
845			 || (k == NCCL && !member(c, (char *) f->re[p[i]].lval.up) && c != 0 && c != HAT)) {
846				q = f->re[p[i]].lfollow;
847				for (j = 1; j <= *q; j++) {
848					if (q[j] >= maxsetvec) {
849						maxsetvec *= 4;
850						setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
851						tmpset = (int *) realloc(setvec, maxsetvec * sizeof(int));
852						if (setvec == 0 || tmpset == 0)
853							overflo("cgoto overflow");
854					}
855					if (setvec[q[j]] == 0) {
856						setcnt++;
857						setvec[q[j]] = 1;
858					}
859				}
860			}
861		}
862	}
863	/* determine if setvec is a previous state */
864	tmpset[0] = setcnt;
865	j = 1;
866	for (i = f->accept; i >= 0; i--)
867		if (setvec[i]) {
868			tmpset[j++] = i;
869		}
870	/* tmpset == previous state? */
871	for (i = 1; i <= f->curstat; i++) {
872		p = f->posns[i];
873		if ((k = tmpset[0]) != p[0])
874			goto different;
875		for (j = 1; j <= k; j++)
876			if (tmpset[j] != p[j])
877				goto different;
878		/* setvec is state i */
879		f->gototab[s][c] = i;
880		return i;
881	  different:;
882	}
883
884	/* add tmpset to current set of states */
885	if (f->curstat >= NSTATES-1) {
886		f->curstat = 2;
887		f->reset = 1;
888		for (i = 2; i < NSTATES; i++)
889			xfree(f->posns[i]);
890	} else
891		++(f->curstat);
892	for (i = 0; i < NCHARS; i++)
893		f->gototab[f->curstat][i] = 0;
894	xfree(f->posns[f->curstat]);
895	if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
896		overflo("out of space in cgoto");
897
898	f->posns[f->curstat] = p;
899	f->gototab[s][c] = f->curstat;
900	for (i = 0; i <= setcnt; i++)
901		p[i] = tmpset[i];
902	if (setvec[f->accept])
903		f->out[f->curstat] = 1;
904	else
905		f->out[f->curstat] = 0;
906	return f->curstat;
907}
908
909
910void freefa(fa *f)	/* free a finite automaton */
911{
912	int i;
913
914	if (f == NULL)
915		return;
916	for (i = 0; i <= f->curstat; i++)
917		xfree(f->posns[i]);
918	for (i = 0; i <= f->accept; i++) {
919		xfree(f->re[i].lfollow);
920		if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL)
921			xfree((f->re[i].lval.np));
922	}
923	xfree(f->restr);
924	xfree(f);
925}
926