b.c revision 125601
1/****************************************************************
2Copyright (C) Lucent Technologies 1997
3All Rights Reserved
4
5Permission to use, copy, modify, and distribute this software and
6its documentation for any purpose and without fee is hereby
7granted, provided that the above copyright notice appear in all
8copies and that both that the copyright notice and this
9permission notice and warranty disclaimer appear in supporting
10documentation, and that the name Lucent Technologies or any of
11its entities not be used in advertising or publicity pertaining
12to distribution of the software without specific, written prior
13permission.
14
15LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22THIS SOFTWARE.
23****************************************************************/
24
25/* lasciate ogne speranza, voi ch'entrate. */
26
27#define	DEBUG
28
29#include <ctype.h>
30#include <stdio.h>
31#include <string.h>
32#include <stdlib.h>
33#include "awk.h"
34#include "ytab.h"
35
36#define	HAT	(NCHARS+2)	/* matches ^ in regular expr */
37				/* NCHARS is 2**n */
38#define MAXLIN 22
39
40#define type(v)		(v)->nobj	/* badly overloaded here */
41#define info(v)		(v)->ntype	/* badly overloaded here */
42#define left(v)		(v)->narg[0]
43#define right(v)	(v)->narg[1]
44#define parent(v)	(v)->nnext
45
46#define LEAF	case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL:
47#define UNARY	case STAR: case PLUS: case QUEST:
48
49/* encoding in tree Nodes:
50	leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL):
51		left is index, right contains value or pointer to value
52	unary (STAR, PLUS, QUEST): left is child, right is null
53	binary (CAT, OR): left and right are children
54	parent contains pointer to parent
55*/
56
57
58int	*setvec;
59int	*tmpset;
60int	maxsetvec = 0;
61
62int	rtok;		/* next token in current re */
63int	rlxval;
64static uschar	*rlxstr;
65static uschar	*prestr;	/* current position in current re */
66static uschar	*lastre;	/* origin of last re */
67
68static	int setcnt;
69static	int poscnt;
70
71char	*patbeg;
72int	patlen;
73
74#define	NFA	20	/* cache this many dynamic fa's */
75fa	*fatab[NFA];
76int	nfatab	= 0;	/* entries in fatab */
77
78fa *makedfa(const char *s, int anchor)	/* returns dfa for reg expr s */
79{
80	int i, use, nuse;
81	fa *pfa;
82	static int now = 1;
83
84	if (setvec == 0) {	/* first time through any RE */
85		maxsetvec = MAXLIN;
86		setvec = (int *) malloc(maxsetvec * sizeof(int));
87		tmpset = (int *) malloc(maxsetvec * sizeof(int));
88		if (setvec == 0 || tmpset == 0)
89			overflo("out of space initializing makedfa");
90	}
91
92	if (compile_time)	/* a constant for sure */
93		return mkdfa(s, anchor);
94	for (i = 0; i < nfatab; i++)	/* is it there already? */
95		if (fatab[i]->anchor == anchor
96		  && strcmp((const char *) fatab[i]->restr, s) == 0) {
97			fatab[i]->use = now++;
98			return fatab[i];
99		}
100	pfa = mkdfa(s, anchor);
101	if (nfatab < NFA) {	/* room for another */
102		fatab[nfatab] = pfa;
103		fatab[nfatab]->use = now++;
104		nfatab++;
105		return pfa;
106	}
107	use = fatab[0]->use;	/* replace least-recently used */
108	nuse = 0;
109	for (i = 1; i < nfatab; i++)
110		if (fatab[i]->use < use) {
111			use = fatab[i]->use;
112			nuse = i;
113		}
114	freefa(fatab[nuse]);
115	fatab[nuse] = pfa;
116	pfa->use = now++;
117	return pfa;
118}
119
120fa *mkdfa(const char *s, int anchor)	/* does the real work of making a dfa */
121				/* anchor = 1 for anchored matches, else 0 */
122{
123	Node *p, *p1;
124	fa *f;
125
126	p = reparse(s);
127	p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p);
128		/* put ALL STAR in front of reg.  exp. */
129	p1 = op2(CAT, p1, op2(FINAL, NIL, NIL));
130		/* put FINAL after reg.  exp. */
131
132	poscnt = 0;
133	penter(p1);	/* enter parent pointers and leaf indices */
134	if ((f = (fa *) calloc(1, sizeof(fa) + poscnt*sizeof(rrow))) == NULL)
135		overflo("out of space for fa");
136	f->accept = poscnt-1;	/* penter has computed number of positions in re */
137	cfoll(f, p1);	/* set up follow sets */
138	freetr(p1);
139	if ((f->posns[0] = (int *) calloc(1, *(f->re[0].lfollow)*sizeof(int))) == NULL)
140			overflo("out of space in makedfa");
141	if ((f->posns[1] = (int *) calloc(1, sizeof(int))) == NULL)
142		overflo("out of space in makedfa");
143	*f->posns[1] = 0;
144	f->initstat = makeinit(f, anchor);
145	f->anchor = anchor;
146	f->restr = (uschar *) tostring(s);
147	return f;
148}
149
150int makeinit(fa *f, int anchor)
151{
152	int i, k;
153
154	f->curstat = 2;
155	f->out[2] = 0;
156	f->reset = 0;
157	k = *(f->re[0].lfollow);
158	xfree(f->posns[2]);
159	if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
160		overflo("out of space in makeinit");
161	for (i=0; i <= k; i++) {
162		(f->posns[2])[i] = (f->re[0].lfollow)[i];
163	}
164	if ((f->posns[2])[1] == f->accept)
165		f->out[2] = 1;
166	for (i=0; i < NCHARS; i++)
167		f->gototab[2][i] = 0;
168	f->curstat = cgoto(f, 2, HAT);
169	if (anchor) {
170		*f->posns[2] = k-1;	/* leave out position 0 */
171		for (i=0; i < k; i++) {
172			(f->posns[0])[i] = (f->posns[2])[i];
173		}
174
175		f->out[0] = f->out[2];
176		if (f->curstat != 2)
177			--(*f->posns[f->curstat]);
178	}
179	return f->curstat;
180}
181
182void penter(Node *p)	/* set up parent pointers and leaf indices */
183{
184	switch (type(p)) {
185	LEAF
186		info(p) = poscnt;
187		poscnt++;
188		break;
189	UNARY
190		penter(left(p));
191		parent(left(p)) = p;
192		break;
193	case CAT:
194	case OR:
195		penter(left(p));
196		penter(right(p));
197		parent(left(p)) = p;
198		parent(right(p)) = p;
199		break;
200	default:	/* can't happen */
201		FATAL("can't happen: unknown type %d in penter", type(p));
202		break;
203	}
204}
205
206void freetr(Node *p)	/* free parse tree */
207{
208	switch (type(p)) {
209	LEAF
210		xfree(p);
211		break;
212	UNARY
213		freetr(left(p));
214		xfree(p);
215		break;
216	case CAT:
217	case OR:
218		freetr(left(p));
219		freetr(right(p));
220		xfree(p);
221		break;
222	default:	/* can't happen */
223		FATAL("can't happen: unknown type %d in freetr", type(p));
224		break;
225	}
226}
227
228/* in the parsing of regular expressions, metacharacters like . have */
229/* to be seen literally;  \056 is not a metacharacter. */
230
231int hexstr(char **pp)	/* find and eval hex string at pp, return new p */
232{			/* only pick up one 8-bit byte (2 chars) */
233	uschar *p;
234	int n = 0;
235	int i;
236
237	for (i = 0, p = (uschar *) *pp; i < 2 && isxdigit(*p); i++, p++) {
238		if (isdigit(*p))
239			n = 16 * n + *p - '0';
240		else if (*p >= 'a' && *p <= 'f')
241			n = 16 * n + *p - 'a' + 10;
242		else if (*p >= 'A' && *p <= 'F')
243			n = 16 * n + *p - 'A' + 10;
244	}
245	*pp = (char *) p;
246	return n;
247}
248
249#define isoctdigit(c) ((c) >= '0' && (c) <= '7')	/* multiple use of arg */
250
251int quoted(char **pp)	/* pick up next thing after a \\ */
252			/* and increment *pp */
253{
254	char *p = *pp;
255	int c;
256
257	if ((c = *p++) == 't')
258		c = '\t';
259	else if (c == 'n')
260		c = '\n';
261	else if (c == 'f')
262		c = '\f';
263	else if (c == 'r')
264		c = '\r';
265	else if (c == 'b')
266		c = '\b';
267	else if (c == '\\')
268		c = '\\';
269	else if (c == 'x') {	/* hexadecimal goo follows */
270		c = hexstr(&p);	/* this adds a null if number is invalid */
271	} else if (isoctdigit(c)) {	/* \d \dd \ddd */
272		int n = c - '0';
273		if (isoctdigit(*p)) {
274			n = 8 * n + *p++ - '0';
275			if (isoctdigit(*p))
276				n = 8 * n + *p++ - '0';
277		}
278		c = n;
279	} /* else */
280		/* c = c; */
281	*pp = p;
282	return c;
283}
284
285char *cclenter(const char *argp)	/* add a character class */
286{
287	int i, c, c2;
288	uschar *p = (uschar *) argp;
289	uschar *op, *bp;
290	static uschar *buf = 0;
291	static int bufsz = 100;
292
293	op = p;
294	if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
295		FATAL("out of space for character class [%.10s...] 1", p);
296	bp = buf;
297	for (i = 0; (c = *p++) != 0; ) {
298		if (c == '\\') {
299			c = quoted((char **) &p);
300		} else if (c == '-' && i > 0 && bp[-1] != 0) {
301			if (*p != 0) {
302				c = bp[-1];
303				c2 = *p++;
304				if (c2 == '\\')
305					c2 = quoted((char **) &p);
306				if (c > c2) {	/* empty; ignore */
307					bp--;
308					i--;
309					continue;
310				}
311				while (c < c2) {
312					if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, 0))
313						FATAL("out of space for character class [%.10s...] 2", p);
314					*bp++ = ++c;
315					i++;
316				}
317				continue;
318			}
319		}
320		if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, 0))
321			FATAL("out of space for character class [%.10s...] 3", p);
322		*bp++ = c;
323		i++;
324	}
325	*bp = 0;
326	dprintf( ("cclenter: in = |%s|, out = |%s|\n", op, buf) );
327	xfree(op);
328	return (char *) tostring((char *) buf);
329}
330
331void overflo(const char *s)
332{
333	FATAL("regular expression too big: %.30s...", s);
334}
335
336void cfoll(fa *f, Node *v)	/* enter follow set of each leaf of vertex v into lfollow[leaf] */
337{
338	int i;
339	int *p;
340
341	switch (type(v)) {
342	LEAF
343		f->re[info(v)].ltype = type(v);
344		f->re[info(v)].lval.np = right(v);
345		while (f->accept >= maxsetvec) {	/* guessing here! */
346			maxsetvec *= 4;
347			setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
348			tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
349			if (setvec == 0 || tmpset == 0)
350				overflo("out of space in cfoll()");
351		}
352		for (i = 0; i <= f->accept; i++)
353			setvec[i] = 0;
354		setcnt = 0;
355		follow(v);	/* computes setvec and setcnt */
356		if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
357			overflo("out of space building follow set");
358		f->re[info(v)].lfollow = p;
359		*p = setcnt;
360		for (i = f->accept; i >= 0; i--)
361			if (setvec[i] == 1)
362				*++p = i;
363		break;
364	UNARY
365		cfoll(f,left(v));
366		break;
367	case CAT:
368	case OR:
369		cfoll(f,left(v));
370		cfoll(f,right(v));
371		break;
372	default:	/* can't happen */
373		FATAL("can't happen: unknown type %d in cfoll", type(v));
374	}
375}
376
377int first(Node *p)	/* collects initially active leaves of p into setvec */
378			/* returns 1 if p matches empty string */
379{
380	int b, lp;
381
382	switch (type(p)) {
383	LEAF
384		lp = info(p);	/* look for high-water mark of subscripts */
385		while (setcnt >= maxsetvec || lp >= maxsetvec) {	/* guessing here! */
386			maxsetvec *= 4;
387			setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
388			tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
389			if (setvec == 0 || tmpset == 0)
390				overflo("out of space in first()");
391		}
392		if (setvec[lp] != 1) {
393			setvec[lp] = 1;
394			setcnt++;
395		}
396		if (type(p) == CCL && (*(char *) right(p)) == '\0')
397			return(0);		/* empty CCL */
398		else return(1);
399	case PLUS:
400		if (first(left(p)) == 0) return(0);
401		return(1);
402	case STAR:
403	case QUEST:
404		first(left(p));
405		return(0);
406	case CAT:
407		if (first(left(p)) == 0 && first(right(p)) == 0) return(0);
408		return(1);
409	case OR:
410		b = first(right(p));
411		if (first(left(p)) == 0 || b == 0) return(0);
412		return(1);
413	}
414	FATAL("can't happen: unknown type %d in first", type(p));	/* can't happen */
415	return(-1);
416}
417
418void follow(Node *v)	/* collects leaves that can follow v into setvec */
419{
420	Node *p;
421
422	if (type(v) == FINAL)
423		return;
424	p = parent(v);
425	switch (type(p)) {
426	case STAR:
427	case PLUS:
428		first(v);
429		follow(p);
430		return;
431
432	case OR:
433	case QUEST:
434		follow(p);
435		return;
436
437	case CAT:
438		if (v == left(p)) {	/* v is left child of p */
439			if (first(right(p)) == 0) {
440				follow(p);
441				return;
442			}
443		} else		/* v is right child */
444			follow(p);
445		return;
446	}
447}
448
449int member(int c, const char *sarg)	/* is c in s? */
450{
451	uschar *s = (uschar *) sarg;
452
453	while (*s)
454		if (c == *s++)
455			return(1);
456	return(0);
457}
458
459int match(fa *f, const char *p0)	/* shortest match ? */
460{
461	int s, ns;
462	uschar *p = (uschar *) p0;
463
464	s = f->reset ? makeinit(f,0) : f->initstat;
465	if (f->out[s])
466		return(1);
467	do {
468		if ((ns = f->gototab[s][*p]) != 0)
469			s = ns;
470		else
471			s = cgoto(f, s, *p);
472		if (f->out[s])
473			return(1);
474	} while (*p++ != 0);
475	return(0);
476}
477
478int pmatch(fa *f, const char *p0)	/* longest match, for sub */
479{
480	int s, ns;
481	uschar *p = (uschar *) p0;
482	uschar *q;
483	int i, k;
484
485	/* s = f->reset ? makeinit(f,1) : f->initstat; */
486	if (f->reset) {
487		f->initstat = s = makeinit(f,1);
488	} else {
489		s = f->initstat;
490	}
491	patbeg = (char *) p;
492	patlen = -1;
493	do {
494		q = p;
495		do {
496			if (f->out[s])		/* final state */
497				patlen = q-p;
498			if ((ns = f->gototab[s][*q]) != 0)
499				s = ns;
500			else
501				s = cgoto(f, s, *q);
502			if (s == 1) {	/* no transition */
503				if (patlen >= 0) {
504					patbeg = (char *) p;
505					return(1);
506				}
507				else
508					goto nextin;	/* no match */
509			}
510		} while (*q++ != 0);
511		if (f->out[s])
512			patlen = q-p-1;	/* don't count $ */
513		if (patlen >= 0) {
514			patbeg = (char *) p;
515			return(1);
516		}
517	nextin:
518		s = 2;
519		if (f->reset) {
520			for (i = 2; i <= f->curstat; i++)
521				xfree(f->posns[i]);
522			k = *f->posns[0];
523			if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
524				overflo("out of space in pmatch");
525			for (i = 0; i <= k; i++)
526				(f->posns[2])[i] = (f->posns[0])[i];
527			f->initstat = f->curstat = 2;
528			f->out[2] = f->out[0];
529			for (i = 0; i < NCHARS; i++)
530				f->gototab[2][i] = 0;
531		}
532	} while (*p++ != 0);
533	return (0);
534}
535
536int nematch(fa *f, const char *p0)	/* non-empty match, for sub */
537{
538	int s, ns;
539	uschar *p = (uschar *) p0;
540	uschar *q;
541	int i, k;
542
543	/* s = f->reset ? makeinit(f,1) : f->initstat; */
544	if (f->reset) {
545		f->initstat = s = makeinit(f,1);
546	} else {
547		s = f->initstat;
548	}
549	patlen = -1;
550	while (*p) {
551		q = p;
552		do {
553			if (f->out[s])		/* final state */
554				patlen = q-p;
555			if ((ns = f->gototab[s][*q]) != 0)
556				s = ns;
557			else
558				s = cgoto(f, s, *q);
559			if (s == 1) {	/* no transition */
560				if (patlen > 0) {
561					patbeg = (char *) p;
562					return(1);
563				} else
564					goto nnextin;	/* no nonempty match */
565			}
566		} while (*q++ != 0);
567		if (f->out[s])
568			patlen = q-p-1;	/* don't count $ */
569		if (patlen > 0 ) {
570			patbeg = (char *) p;
571			return(1);
572		}
573	nnextin:
574		s = 2;
575		if (f->reset) {
576			for (i = 2; i <= f->curstat; i++)
577				xfree(f->posns[i]);
578			k = *f->posns[0];
579			if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
580				overflo("out of state space");
581			for (i = 0; i <= k; i++)
582				(f->posns[2])[i] = (f->posns[0])[i];
583			f->initstat = f->curstat = 2;
584			f->out[2] = f->out[0];
585			for (i = 0; i < NCHARS; i++)
586				f->gototab[2][i] = 0;
587		}
588		p++;
589	}
590	return (0);
591}
592
593Node *reparse(const char *p)	/* parses regular expression pointed to by p */
594{			/* uses relex() to scan regular expression */
595	Node *np;
596
597	dprintf( ("reparse <%s>\n", p) );
598	lastre = prestr = (uschar *) p;	/* prestr points to string to be parsed */
599	rtok = relex();
600	/* GNU compatibility: an empty regexp matches anything */
601	if (rtok == '\0')
602		/* FATAL("empty regular expression"); previous */
603		return(op2(ALL, NIL, NIL));
604	np = regexp();
605	if (rtok != '\0')
606		FATAL("syntax error in regular expression %s at %s", lastre, prestr);
607	return(np);
608}
609
610Node *regexp(void)	/* top-level parse of reg expr */
611{
612	return (alt(concat(primary())));
613}
614
615Node *primary(void)
616{
617	Node *np;
618
619	switch (rtok) {
620	case CHAR:
621		np = op2(CHAR, NIL, itonp(rlxval));
622		rtok = relex();
623		return (unary(np));
624	case ALL:
625		rtok = relex();
626		return (unary(op2(ALL, NIL, NIL)));
627	case DOT:
628		rtok = relex();
629		return (unary(op2(DOT, NIL, NIL)));
630	case CCL:
631		np = op2(CCL, NIL, (Node*) cclenter((char *) rlxstr));
632		rtok = relex();
633		return (unary(np));
634	case NCCL:
635		np = op2(NCCL, NIL, (Node *) cclenter((char *) rlxstr));
636		rtok = relex();
637		return (unary(np));
638	case '^':
639		rtok = relex();
640		return (unary(op2(CHAR, NIL, itonp(HAT))));
641	case '$':
642		rtok = relex();
643		return (unary(op2(CHAR, NIL, NIL)));
644	case '(':
645		rtok = relex();
646		if (rtok == ')') {	/* special pleading for () */
647			rtok = relex();
648			return unary(op2(CCL, NIL, (Node *) tostring("")));
649		}
650		np = regexp();
651		if (rtok == ')') {
652			rtok = relex();
653			return (unary(np));
654		}
655		else
656			FATAL("syntax error in regular expression %s at %s", lastre, prestr);
657	default:
658		FATAL("illegal primary in regular expression %s at %s", lastre, prestr);
659	}
660	return 0;	/*NOTREACHED*/
661}
662
663Node *concat(Node *np)
664{
665	switch (rtok) {
666	case CHAR: case DOT: case ALL: case CCL: case NCCL: case '$': case '(':
667		return (concat(op2(CAT, np, primary())));
668	}
669	return (np);
670}
671
672Node *alt(Node *np)
673{
674	if (rtok == OR) {
675		rtok = relex();
676		return (alt(op2(OR, np, concat(primary()))));
677	}
678	return (np);
679}
680
681Node *unary(Node *np)
682{
683	switch (rtok) {
684	case STAR:
685		rtok = relex();
686		return (unary(op2(STAR, np, NIL)));
687	case PLUS:
688		rtok = relex();
689		return (unary(op2(PLUS, np, NIL)));
690	case QUEST:
691		rtok = relex();
692		return (unary(op2(QUEST, np, NIL)));
693	default:
694		return (np);
695	}
696}
697
698/*
699 * Character class definitions conformant to the POSIX locale as
700 * defined in IEEE P1003.1 draft 7 of June 2001, assuming the source
701 * and operating character sets are both ASCII (ISO646) or supersets
702 * thereof.
703 *
704 * Note that to avoid overflowing the temporary buffer used in
705 * relex(), the expanded character class (prior to range expansion)
706 * must be less than twice the size of their full name.
707 */
708
709/* Because isblank doesn't show up in any of the header files on any
710 * system i use, it's defined here.  if some other locale has a richer
711 * definition of "blank", define HAS_ISBLANK and provide your own
712 * version.
713 * the parentheses here are an attempt to find a path through the maze
714 * of macro definition and/or function and/or version provided.  thanks
715 * to nelson beebe for the suggestion; let's see if it works everywhere.
716 */
717
718#ifndef HAS_ISBLANK
719
720int (isblank)(int c)
721{
722	return c==' ' || c=='\t';
723}
724
725#endif
726
727struct charclass {
728	const char *cc_name;
729	int cc_namelen;
730	int (*cc_func)(int);
731} charclasses[] = {
732	{ "alnum",	5,	isalnum },
733	{ "alpha",	5,	isalpha },
734	{ "blank",	5,	isblank },
735	{ "cntrl",	5,	iscntrl },
736	{ "digit",	5,	isdigit },
737	{ "graph",	5,	isgraph },
738	{ "lower",	5,	islower },
739	{ "print",	5,	isprint },
740	{ "punct",	5,	ispunct },
741	{ "space",	5,	isspace },
742	{ "upper",	5,	isupper },
743	{ "xdigit",	6,	isxdigit },
744	{ NULL,		0,	NULL },
745};
746
747
748int relex(void)		/* lexical analyzer for reparse */
749{
750	int c, n;
751	int cflag;
752	static uschar *buf = 0;
753	static int bufsz = 100;
754	uschar *bp;
755	struct charclass *cc;
756	int i;
757
758	switch (c = *prestr++) {
759	case '|': return OR;
760	case '*': return STAR;
761	case '+': return PLUS;
762	case '?': return QUEST;
763	case '.': return DOT;
764	case '\0': prestr--; return '\0';
765	case '^':
766	case '$':
767	case '(':
768	case ')':
769		return c;
770	case '\\':
771		rlxval = quoted((char **) &prestr);
772		return CHAR;
773	default:
774		rlxval = c;
775		return CHAR;
776	case '[':
777		if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
778			FATAL("out of space in reg expr %.10s..", lastre);
779		bp = buf;
780		if (*prestr == '^') {
781			cflag = 1;
782			prestr++;
783		}
784		else
785			cflag = 0;
786		n = 2 * strlen((const char *) prestr)+1;
787		if (!adjbuf((char **) &buf, &bufsz, n, n, (char **) &bp, 0))
788			FATAL("out of space for reg expr %.10s...", lastre);
789		for (; ; ) {
790			if ((c = *prestr++) == '\\') {
791				*bp++ = '\\';
792				if ((c = *prestr++) == '\0')
793					FATAL("nonterminated character class %.20s...", lastre);
794				*bp++ = c;
795			/* } else if (c == '\n') { */
796			/* 	FATAL("newline in character class %.20s...", lastre); */
797			} else if (c == '[' && *prestr == ':') {
798				/* POSIX char class names, Dag-Erling Smorgrav, des@ofug.org */
799				for (cc = charclasses; cc->cc_name; cc++)
800					if (strncmp((const char *) prestr + 1, (const char *) cc->cc_name, cc->cc_namelen) == 0)
801						break;
802				if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' &&
803				    prestr[2 + cc->cc_namelen] == ']') {
804					prestr += cc->cc_namelen + 3;
805					for (i = 0; i < NCHARS; i++) {
806						if (!adjbuf((char **) &buf, &bufsz, bp-buf+1, 100, (char **) &bp, 0))
807						    FATAL("out of space for reg expr %.10s...", lastre);
808						if (cc->cc_func(i)) {
809							*bp++ = i;
810							n++;
811						}
812					}
813				} else
814					*bp++ = c;
815			} else if (c == '\0') {
816				FATAL("nonterminated character class %.20s", lastre);
817			} else if (bp == buf) {	/* 1st char is special */
818				*bp++ = c;
819			} else if (c == ']') {
820				*bp++ = 0;
821				rlxstr = (uschar *) tostring((char *) buf);
822				if (cflag == 0)
823					return CCL;
824				else
825					return NCCL;
826			} else
827				*bp++ = c;
828		}
829	}
830}
831
832int cgoto(fa *f, int s, int c)
833{
834	int i, j, k;
835	int *p, *q;
836
837	while (f->accept >= maxsetvec) {	/* guessing here! */
838		maxsetvec *= 4;
839		setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
840		tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
841		if (setvec == 0 || tmpset == 0)
842			overflo("out of space in cgoto()");
843	}
844	for (i = 0; i <= f->accept; i++)
845		setvec[i] = 0;
846	setcnt = 0;
847	/* compute positions of gototab[s,c] into setvec */
848	p = f->posns[s];
849	for (i = 1; i <= *p; i++) {
850		if ((k = f->re[p[i]].ltype) != FINAL) {
851			if ((k == CHAR && c == ptoi(f->re[p[i]].lval.np))
852			 || (k == DOT && c != 0 && c != HAT)
853			 || (k == ALL && c != 0)
854			 || (k == CCL && member(c, (char *) f->re[p[i]].lval.up))
855			 || (k == NCCL && !member(c, (char *) f->re[p[i]].lval.up) && c != 0 && c != HAT)) {
856				q = f->re[p[i]].lfollow;
857				for (j = 1; j <= *q; j++) {
858					if (q[j] >= maxsetvec) {
859						maxsetvec *= 4;
860						setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
861						tmpset = (int *) realloc(setvec, maxsetvec * sizeof(int));
862						if (setvec == 0 || tmpset == 0)
863							overflo("cgoto overflow");
864					}
865					if (setvec[q[j]] == 0) {
866						setcnt++;
867						setvec[q[j]] = 1;
868					}
869				}
870			}
871		}
872	}
873	/* determine if setvec is a previous state */
874	tmpset[0] = setcnt;
875	j = 1;
876	for (i = f->accept; i >= 0; i--)
877		if (setvec[i]) {
878			tmpset[j++] = i;
879		}
880	/* tmpset == previous state? */
881	for (i = 1; i <= f->curstat; i++) {
882		p = f->posns[i];
883		if ((k = tmpset[0]) != p[0])
884			goto different;
885		for (j = 1; j <= k; j++)
886			if (tmpset[j] != p[j])
887				goto different;
888		/* setvec is state i */
889		f->gototab[s][c] = i;
890		return i;
891	  different:;
892	}
893
894	/* add tmpset to current set of states */
895	if (f->curstat >= NSTATES-1) {
896		f->curstat = 2;
897		f->reset = 1;
898		for (i = 2; i < NSTATES; i++)
899			xfree(f->posns[i]);
900	} else
901		++(f->curstat);
902	for (i = 0; i < NCHARS; i++)
903		f->gototab[f->curstat][i] = 0;
904	xfree(f->posns[f->curstat]);
905	if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
906		overflo("out of space in cgoto");
907
908	f->posns[f->curstat] = p;
909	f->gototab[s][c] = f->curstat;
910	for (i = 0; i <= setcnt; i++)
911		p[i] = tmpset[i];
912	if (setvec[f->accept])
913		f->out[f->curstat] = 1;
914	else
915		f->out[f->curstat] = 0;
916	return f->curstat;
917}
918
919
920void freefa(fa *f)	/* free a finite automaton */
921{
922	int i;
923
924	if (f == NULL)
925		return;
926	for (i = 0; i <= f->curstat; i++)
927		xfree(f->posns[i]);
928	for (i = 0; i <= f->accept; i++) {
929		xfree(f->re[i].lfollow);
930		if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL)
931			xfree((f->re[i].lval.np));
932	}
933	xfree(f->restr);
934	xfree(f);
935}
936