b.c revision 170331
1/****************************************************************
2Copyright (C) Lucent Technologies 1997
3All Rights Reserved
4
5Permission to use, copy, modify, and distribute this software and
6its documentation for any purpose and without fee is hereby
7granted, provided that the above copyright notice appear in all
8copies and that both that the copyright notice and this
9permission notice and warranty disclaimer appear in supporting
10documentation, and that the name Lucent Technologies or any of
11its entities not be used in advertising or publicity pertaining
12to distribution of the software without specific, written prior
13permission.
14
15LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22THIS SOFTWARE.
23****************************************************************/
24
25/* lasciate ogne speranza, voi ch'intrate. */
26
27#define	DEBUG
28
29#include <ctype.h>
30#include <stdio.h>
31#include <string.h>
32#include <stdlib.h>
33#include "awk.h"
34#include "ytab.h"
35
36#define	HAT	(NCHARS+2)	/* matches ^ in regular expr */
37				/* NCHARS is 2**n */
38#define MAXLIN 22
39
40#define type(v)		(v)->nobj	/* badly overloaded here */
41#define info(v)		(v)->ntype	/* badly overloaded here */
42#define left(v)		(v)->narg[0]
43#define right(v)	(v)->narg[1]
44#define parent(v)	(v)->nnext
45
46#define LEAF	case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL:
47#define ELEAF	case EMPTYRE:		/* empty string in regexp */
48#define UNARY	case STAR: case PLUS: case QUEST:
49
50/* encoding in tree Nodes:
51	leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL, EMPTYRE):
52		left is index, right contains value or pointer to value
53	unary (STAR, PLUS, QUEST): left is child, right is null
54	binary (CAT, OR): left and right are children
55	parent contains pointer to parent
56*/
57
58
59int	*setvec;
60int	*tmpset;
61int	maxsetvec = 0;
62
63int	rtok;		/* next token in current re */
64int	rlxval;
65static uschar	*rlxstr;
66static uschar	*prestr;	/* current position in current re */
67static uschar	*lastre;	/* origin of last re */
68
69static	int setcnt;
70static	int poscnt;
71
72char	*patbeg;
73int	patlen;
74
75#define	NFA	20	/* cache this many dynamic fa's */
76fa	*fatab[NFA];
77int	nfatab	= 0;	/* entries in fatab */
78
79fa *makedfa(const char *s, int anchor)	/* returns dfa for reg expr s */
80{
81	int i, use, nuse;
82	fa *pfa;
83	static int now = 1;
84
85	if (setvec == 0) {	/* first time through any RE */
86		maxsetvec = MAXLIN;
87		setvec = (int *) malloc(maxsetvec * sizeof(int));
88		tmpset = (int *) malloc(maxsetvec * sizeof(int));
89		if (setvec == 0 || tmpset == 0)
90			overflo("out of space initializing makedfa");
91	}
92
93	if (compile_time)	/* a constant for sure */
94		return mkdfa(s, anchor);
95	for (i = 0; i < nfatab; i++)	/* is it there already? */
96		if (fatab[i]->anchor == anchor
97		  && strcmp((const char *) fatab[i]->restr, s) == 0) {
98			fatab[i]->use = now++;
99			return fatab[i];
100		}
101	pfa = mkdfa(s, anchor);
102	if (nfatab < NFA) {	/* room for another */
103		fatab[nfatab] = pfa;
104		fatab[nfatab]->use = now++;
105		nfatab++;
106		return pfa;
107	}
108	use = fatab[0]->use;	/* replace least-recently used */
109	nuse = 0;
110	for (i = 1; i < nfatab; i++)
111		if (fatab[i]->use < use) {
112			use = fatab[i]->use;
113			nuse = i;
114		}
115	freefa(fatab[nuse]);
116	fatab[nuse] = pfa;
117	pfa->use = now++;
118	return pfa;
119}
120
121fa *mkdfa(const char *s, int anchor)	/* does the real work of making a dfa */
122				/* anchor = 1 for anchored matches, else 0 */
123{
124	Node *p, *p1;
125	fa *f;
126
127	p = reparse(s);
128	p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p);
129		/* put ALL STAR in front of reg.  exp. */
130	p1 = op2(CAT, p1, op2(FINAL, NIL, NIL));
131		/* put FINAL after reg.  exp. */
132
133	poscnt = 0;
134	penter(p1);	/* enter parent pointers and leaf indices */
135	if ((f = (fa *) calloc(1, sizeof(fa) + poscnt*sizeof(rrow))) == NULL)
136		overflo("out of space for fa");
137	f->accept = poscnt-1;	/* penter has computed number of positions in re */
138	cfoll(f, p1);	/* set up follow sets */
139	freetr(p1);
140	if ((f->posns[0] = (int *) calloc(1, *(f->re[0].lfollow)*sizeof(int))) == NULL)
141			overflo("out of space in makedfa");
142	if ((f->posns[1] = (int *) calloc(1, sizeof(int))) == NULL)
143		overflo("out of space in makedfa");
144	*f->posns[1] = 0;
145	f->initstat = makeinit(f, anchor);
146	f->anchor = anchor;
147	f->restr = (uschar *) tostring(s);
148	return f;
149}
150
151int makeinit(fa *f, int anchor)
152{
153	int i, k;
154
155	f->curstat = 2;
156	f->out[2] = 0;
157	f->reset = 0;
158	k = *(f->re[0].lfollow);
159	xfree(f->posns[2]);
160	if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
161		overflo("out of space in makeinit");
162	for (i=0; i <= k; i++) {
163		(f->posns[2])[i] = (f->re[0].lfollow)[i];
164	}
165	if ((f->posns[2])[1] == f->accept)
166		f->out[2] = 1;
167	for (i=0; i < NCHARS; i++)
168		f->gototab[2][i] = 0;
169	f->curstat = cgoto(f, 2, HAT);
170	if (anchor) {
171		*f->posns[2] = k-1;	/* leave out position 0 */
172		for (i=0; i < k; i++) {
173			(f->posns[0])[i] = (f->posns[2])[i];
174		}
175
176		f->out[0] = f->out[2];
177		if (f->curstat != 2)
178			--(*f->posns[f->curstat]);
179	}
180	return f->curstat;
181}
182
183void penter(Node *p)	/* set up parent pointers and leaf indices */
184{
185	switch (type(p)) {
186	ELEAF
187	LEAF
188		info(p) = poscnt;
189		poscnt++;
190		break;
191	UNARY
192		penter(left(p));
193		parent(left(p)) = p;
194		break;
195	case CAT:
196	case OR:
197		penter(left(p));
198		penter(right(p));
199		parent(left(p)) = p;
200		parent(right(p)) = p;
201		break;
202	default:	/* can't happen */
203		FATAL("can't happen: unknown type %d in penter", type(p));
204		break;
205	}
206}
207
208void freetr(Node *p)	/* free parse tree */
209{
210	switch (type(p)) {
211	ELEAF
212	LEAF
213		xfree(p);
214		break;
215	UNARY
216		freetr(left(p));
217		xfree(p);
218		break;
219	case CAT:
220	case OR:
221		freetr(left(p));
222		freetr(right(p));
223		xfree(p);
224		break;
225	default:	/* can't happen */
226		FATAL("can't happen: unknown type %d in freetr", type(p));
227		break;
228	}
229}
230
231/* in the parsing of regular expressions, metacharacters like . have */
232/* to be seen literally;  \056 is not a metacharacter. */
233
234int hexstr(char **pp)	/* find and eval hex string at pp, return new p */
235{			/* only pick up one 8-bit byte (2 chars) */
236	uschar *p;
237	int n = 0;
238	int i;
239
240	for (i = 0, p = (uschar *) *pp; i < 2 && isxdigit(*p); i++, p++) {
241		if (isdigit(*p))
242			n = 16 * n + *p - '0';
243		else if (*p >= 'a' && *p <= 'f')
244			n = 16 * n + *p - 'a' + 10;
245		else if (*p >= 'A' && *p <= 'F')
246			n = 16 * n + *p - 'A' + 10;
247	}
248	*pp = (char *) p;
249	return n;
250}
251
252#define isoctdigit(c) ((c) >= '0' && (c) <= '7')	/* multiple use of arg */
253
254int quoted(char **pp)	/* pick up next thing after a \\ */
255			/* and increment *pp */
256{
257	char *p = *pp;
258	int c;
259
260	if ((c = *p++) == 't')
261		c = '\t';
262	else if (c == 'n')
263		c = '\n';
264	else if (c == 'f')
265		c = '\f';
266	else if (c == 'r')
267		c = '\r';
268	else if (c == 'b')
269		c = '\b';
270	else if (c == '\\')
271		c = '\\';
272	else if (c == 'x') {	/* hexadecimal goo follows */
273		c = hexstr(&p);	/* this adds a null if number is invalid */
274	} else if (isoctdigit(c)) {	/* \d \dd \ddd */
275		int n = c - '0';
276		if (isoctdigit(*p)) {
277			n = 8 * n + *p++ - '0';
278			if (isoctdigit(*p))
279				n = 8 * n + *p++ - '0';
280		}
281		c = n;
282	} /* else */
283		/* c = c; */
284	*pp = p;
285	return c;
286}
287
288char *cclenter(const char *argp)	/* add a character class */
289{
290	int i, c, c2;
291	uschar *p = (uschar *) argp;
292	uschar *op, *bp;
293	static uschar *buf = 0;
294	static int bufsz = 100;
295
296	op = p;
297	if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
298		FATAL("out of space for character class [%.10s...] 1", p);
299	bp = buf;
300	for (i = 0; (c = *p++) != 0; ) {
301		if (c == '\\') {
302			c = quoted((char **) &p);
303		} else if (c == '-' && i > 0 && bp[-1] != 0) {
304			if (*p != 0) {
305				c = bp[-1];
306				c2 = *p++;
307				if (c2 == '\\')
308					c2 = quoted((char **) &p);
309				if (c > c2) {	/* empty; ignore */
310					bp--;
311					i--;
312					continue;
313				}
314				while (c < c2) {
315					if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, "cclenter1"))
316						FATAL("out of space for character class [%.10s...] 2", p);
317					*bp++ = ++c;
318					i++;
319				}
320				continue;
321			}
322		}
323		if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, "cclenter2"))
324			FATAL("out of space for character class [%.10s...] 3", p);
325		*bp++ = c;
326		i++;
327	}
328	*bp = 0;
329	dprintf( ("cclenter: in = |%s|, out = |%s|\n", op, buf) );
330	xfree(op);
331	return (char *) tostring((char *) buf);
332}
333
334void overflo(const char *s)
335{
336	FATAL("regular expression too big: %.30s...", s);
337}
338
339void cfoll(fa *f, Node *v)	/* enter follow set of each leaf of vertex v into lfollow[leaf] */
340{
341	int i;
342	int *p;
343
344	switch (type(v)) {
345	ELEAF
346	LEAF
347		f->re[info(v)].ltype = type(v);
348		f->re[info(v)].lval.np = right(v);
349		while (f->accept >= maxsetvec) {	/* guessing here! */
350			maxsetvec *= 4;
351			setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
352			tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
353			if (setvec == 0 || tmpset == 0)
354				overflo("out of space in cfoll()");
355		}
356		for (i = 0; i <= f->accept; i++)
357			setvec[i] = 0;
358		setcnt = 0;
359		follow(v);	/* computes setvec and setcnt */
360		if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
361			overflo("out of space building follow set");
362		f->re[info(v)].lfollow = p;
363		*p = setcnt;
364		for (i = f->accept; i >= 0; i--)
365			if (setvec[i] == 1)
366				*++p = i;
367		break;
368	UNARY
369		cfoll(f,left(v));
370		break;
371	case CAT:
372	case OR:
373		cfoll(f,left(v));
374		cfoll(f,right(v));
375		break;
376	default:	/* can't happen */
377		FATAL("can't happen: unknown type %d in cfoll", type(v));
378	}
379}
380
381int first(Node *p)	/* collects initially active leaves of p into setvec */
382			/* returns 0 if p matches empty string */
383{
384	int b, lp;
385
386	switch (type(p)) {
387	ELEAF
388	LEAF
389		lp = info(p);	/* look for high-water mark of subscripts */
390		while (setcnt >= maxsetvec || lp >= maxsetvec) {	/* guessing here! */
391			maxsetvec *= 4;
392			setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
393			tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
394			if (setvec == 0 || tmpset == 0)
395				overflo("out of space in first()");
396		}
397		if (type(p) == EMPTYRE) {
398			setvec[lp] = 0;
399			return(0);
400		}
401		if (setvec[lp] != 1) {
402			setvec[lp] = 1;
403			setcnt++;
404		}
405		if (type(p) == CCL && (*(char *) right(p)) == '\0')
406			return(0);		/* empty CCL */
407		else return(1);
408	case PLUS:
409		if (first(left(p)) == 0) return(0);
410		return(1);
411	case STAR:
412	case QUEST:
413		first(left(p));
414		return(0);
415	case CAT:
416		if (first(left(p)) == 0 && first(right(p)) == 0) return(0);
417		return(1);
418	case OR:
419		b = first(right(p));
420		if (first(left(p)) == 0 || b == 0) return(0);
421		return(1);
422	}
423	FATAL("can't happen: unknown type %d in first", type(p));	/* can't happen */
424	return(-1);
425}
426
427void follow(Node *v)	/* collects leaves that can follow v into setvec */
428{
429	Node *p;
430
431	if (type(v) == FINAL)
432		return;
433	p = parent(v);
434	switch (type(p)) {
435	case STAR:
436	case PLUS:
437		first(v);
438		follow(p);
439		return;
440
441	case OR:
442	case QUEST:
443		follow(p);
444		return;
445
446	case CAT:
447		if (v == left(p)) {	/* v is left child of p */
448			if (first(right(p)) == 0) {
449				follow(p);
450				return;
451			}
452		} else		/* v is right child */
453			follow(p);
454		return;
455	}
456}
457
458int member(int c, const char *sarg)	/* is c in s? */
459{
460	uschar *s = (uschar *) sarg;
461
462	while (*s)
463		if (c == *s++)
464			return(1);
465	return(0);
466}
467
468int match(fa *f, const char *p0)	/* shortest match ? */
469{
470	int s, ns;
471	uschar *p = (uschar *) p0;
472
473	s = f->reset ? makeinit(f,0) : f->initstat;
474	if (f->out[s])
475		return(1);
476	do {
477		/* assert(*p < NCHARS); */
478		if ((ns = f->gototab[s][*p]) != 0)
479			s = ns;
480		else
481			s = cgoto(f, s, *p);
482		if (f->out[s])
483			return(1);
484	} while (*p++ != 0);
485	return(0);
486}
487
488int pmatch(fa *f, const char *p0)	/* longest match, for sub */
489{
490	int s, ns;
491	uschar *p = (uschar *) p0;
492	uschar *q;
493	int i, k;
494
495	/* s = f->reset ? makeinit(f,1) : f->initstat; */
496	if (f->reset) {
497		f->initstat = s = makeinit(f,1);
498	} else {
499		s = f->initstat;
500	}
501	patbeg = (char *) p;
502	patlen = -1;
503	do {
504		q = p;
505		do {
506			if (f->out[s])		/* final state */
507				patlen = q-p;
508			/* assert(*q < NCHARS); */
509			if ((ns = f->gototab[s][*q]) != 0)
510				s = ns;
511			else
512				s = cgoto(f, s, *q);
513			if (s == 1) {	/* no transition */
514				if (patlen >= 0) {
515					patbeg = (char *) p;
516					return(1);
517				}
518				else
519					goto nextin;	/* no match */
520			}
521		} while (*q++ != 0);
522		if (f->out[s])
523			patlen = q-p-1;	/* don't count $ */
524		if (patlen >= 0) {
525			patbeg = (char *) p;
526			return(1);
527		}
528	nextin:
529		s = 2;
530		if (f->reset) {
531			for (i = 2; i <= f->curstat; i++)
532				xfree(f->posns[i]);
533			k = *f->posns[0];
534			if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
535				overflo("out of space in pmatch");
536			for (i = 0; i <= k; i++)
537				(f->posns[2])[i] = (f->posns[0])[i];
538			f->initstat = f->curstat = 2;
539			f->out[2] = f->out[0];
540			for (i = 0; i < NCHARS; i++)
541				f->gototab[2][i] = 0;
542		}
543	} while (*p++ != 0);
544	return (0);
545}
546
547int nematch(fa *f, const char *p0)	/* non-empty match, for sub */
548{
549	int s, ns;
550	uschar *p = (uschar *) p0;
551	uschar *q;
552	int i, k;
553
554	/* s = f->reset ? makeinit(f,1) : f->initstat; */
555	if (f->reset) {
556		f->initstat = s = makeinit(f,1);
557	} else {
558		s = f->initstat;
559	}
560	patlen = -1;
561	while (*p) {
562		q = p;
563		do {
564			if (f->out[s])		/* final state */
565				patlen = q-p;
566			/* assert(*q < NCHARS); */
567			if ((ns = f->gototab[s][*q]) != 0)
568				s = ns;
569			else
570				s = cgoto(f, s, *q);
571			if (s == 1) {	/* no transition */
572				if (patlen > 0) {
573					patbeg = (char *) p;
574					return(1);
575				} else
576					goto nnextin;	/* no nonempty match */
577			}
578		} while (*q++ != 0);
579		if (f->out[s])
580			patlen = q-p-1;	/* don't count $ */
581		if (patlen > 0 ) {
582			patbeg = (char *) p;
583			return(1);
584		}
585	nnextin:
586		s = 2;
587		if (f->reset) {
588			for (i = 2; i <= f->curstat; i++)
589				xfree(f->posns[i]);
590			k = *f->posns[0];
591			if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
592				overflo("out of state space");
593			for (i = 0; i <= k; i++)
594				(f->posns[2])[i] = (f->posns[0])[i];
595			f->initstat = f->curstat = 2;
596			f->out[2] = f->out[0];
597			for (i = 0; i < NCHARS; i++)
598				f->gototab[2][i] = 0;
599		}
600		p++;
601	}
602	return (0);
603}
604
605Node *reparse(const char *p)	/* parses regular expression pointed to by p */
606{			/* uses relex() to scan regular expression */
607	Node *np;
608
609	dprintf( ("reparse <%s>\n", p) );
610	lastre = prestr = (uschar *) p;	/* prestr points to string to be parsed */
611	rtok = relex();
612	/* GNU compatibility: an empty regexp matches anything */
613	if (rtok == '\0') {
614		/* FATAL("empty regular expression"); previous */
615		return(op2(EMPTYRE, NIL, NIL));
616	}
617	np = regexp();
618	if (rtok != '\0')
619		FATAL("syntax error in regular expression %s at %s", lastre, prestr);
620	return(np);
621}
622
623Node *regexp(void)	/* top-level parse of reg expr */
624{
625	return (alt(concat(primary())));
626}
627
628Node *primary(void)
629{
630	Node *np;
631
632	switch (rtok) {
633	case CHAR:
634		np = op2(CHAR, NIL, itonp(rlxval));
635		rtok = relex();
636		return (unary(np));
637	case ALL:
638		rtok = relex();
639		return (unary(op2(ALL, NIL, NIL)));
640	case EMPTYRE:
641		rtok = relex();
642		return (unary(op2(ALL, NIL, NIL)));
643	case DOT:
644		rtok = relex();
645		return (unary(op2(DOT, NIL, NIL)));
646	case CCL:
647		np = op2(CCL, NIL, (Node*) cclenter((char *) rlxstr));
648		rtok = relex();
649		return (unary(np));
650	case NCCL:
651		np = op2(NCCL, NIL, (Node *) cclenter((char *) rlxstr));
652		rtok = relex();
653		return (unary(np));
654	case '^':
655		rtok = relex();
656		return (unary(op2(CHAR, NIL, itonp(HAT))));
657	case '$':
658		rtok = relex();
659		return (unary(op2(CHAR, NIL, NIL)));
660	case '(':
661		rtok = relex();
662		if (rtok == ')') {	/* special pleading for () */
663			rtok = relex();
664			return unary(op2(CCL, NIL, (Node *) tostring("")));
665		}
666		np = regexp();
667		if (rtok == ')') {
668			rtok = relex();
669			return (unary(np));
670		}
671		else
672			FATAL("syntax error in regular expression %s at %s", lastre, prestr);
673	default:
674		FATAL("illegal primary in regular expression %s at %s", lastre, prestr);
675	}
676	return 0;	/*NOTREACHED*/
677}
678
679Node *concat(Node *np)
680{
681	switch (rtok) {
682	case CHAR: case DOT: case ALL: case EMPTYRE: case CCL: case NCCL: case '$': case '(':
683		return (concat(op2(CAT, np, primary())));
684	}
685	return (np);
686}
687
688Node *alt(Node *np)
689{
690	if (rtok == OR) {
691		rtok = relex();
692		return (alt(op2(OR, np, concat(primary()))));
693	}
694	return (np);
695}
696
697Node *unary(Node *np)
698{
699	switch (rtok) {
700	case STAR:
701		rtok = relex();
702		return (unary(op2(STAR, np, NIL)));
703	case PLUS:
704		rtok = relex();
705		return (unary(op2(PLUS, np, NIL)));
706	case QUEST:
707		rtok = relex();
708		return (unary(op2(QUEST, np, NIL)));
709	default:
710		return (np);
711	}
712}
713
714/*
715 * Character class definitions conformant to the POSIX locale as
716 * defined in IEEE P1003.1 draft 7 of June 2001, assuming the source
717 * and operating character sets are both ASCII (ISO646) or supersets
718 * thereof.
719 *
720 * Note that to avoid overflowing the temporary buffer used in
721 * relex(), the expanded character class (prior to range expansion)
722 * must be less than twice the size of their full name.
723 */
724
725/* Because isblank doesn't show up in any of the header files on any
726 * system i use, it's defined here.  if some other locale has a richer
727 * definition of "blank", define HAS_ISBLANK and provide your own
728 * version.
729 * the parentheses here are an attempt to find a path through the maze
730 * of macro definition and/or function and/or version provided.  thanks
731 * to nelson beebe for the suggestion; let's see if it works everywhere.
732 */
733
734#ifndef HAS_ISBLANK
735
736int (isblank)(int c)
737{
738	return c==' ' || c=='\t';
739}
740
741#endif
742
743struct charclass {
744	const char *cc_name;
745	int cc_namelen;
746	int (*cc_func)(int);
747} charclasses[] = {
748	{ "alnum",	5,	isalnum },
749	{ "alpha",	5,	isalpha },
750	{ "blank",	5,	isblank },
751	{ "cntrl",	5,	iscntrl },
752	{ "digit",	5,	isdigit },
753	{ "graph",	5,	isgraph },
754	{ "lower",	5,	islower },
755	{ "print",	5,	isprint },
756	{ "punct",	5,	ispunct },
757	{ "space",	5,	isspace },
758	{ "upper",	5,	isupper },
759	{ "xdigit",	6,	isxdigit },
760	{ NULL,		0,	NULL },
761};
762
763
764int relex(void)		/* lexical analyzer for reparse */
765{
766	int c, n;
767	int cflag;
768	static uschar *buf = 0;
769	static int bufsz = 100;
770	uschar *bp;
771	struct charclass *cc;
772	int i;
773
774	switch (c = *prestr++) {
775	case '|': return OR;
776	case '*': return STAR;
777	case '+': return PLUS;
778	case '?': return QUEST;
779	case '.': return DOT;
780	case '\0': prestr--; return '\0';
781	case '^':
782	case '$':
783	case '(':
784	case ')':
785		return c;
786	case '\\':
787		rlxval = quoted((char **) &prestr);
788		return CHAR;
789	default:
790		rlxval = c;
791		return CHAR;
792	case '[':
793		if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
794			FATAL("out of space in reg expr %.10s..", lastre);
795		bp = buf;
796		if (*prestr == '^') {
797			cflag = 1;
798			prestr++;
799		}
800		else
801			cflag = 0;
802		n = 2 * strlen((const char *) prestr)+1;
803		if (!adjbuf((char **) &buf, &bufsz, n, n, (char **) &bp, "relex1"))
804			FATAL("out of space for reg expr %.10s...", lastre);
805		for (; ; ) {
806			if ((c = *prestr++) == '\\') {
807				*bp++ = '\\';
808				if ((c = *prestr++) == '\0')
809					FATAL("nonterminated character class %.20s...", lastre);
810				*bp++ = c;
811			/* } else if (c == '\n') { */
812			/* 	FATAL("newline in character class %.20s...", lastre); */
813			} else if (c == '[' && *prestr == ':') {
814				/* POSIX char class names, Dag-Erling Smorgrav, des@ofug.org */
815				for (cc = charclasses; cc->cc_name; cc++)
816					if (strncmp((const char *) prestr + 1, (const char *) cc->cc_name, cc->cc_namelen) == 0)
817						break;
818				if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' &&
819				    prestr[2 + cc->cc_namelen] == ']') {
820					prestr += cc->cc_namelen + 3;
821					for (i = 0; i < NCHARS; i++) {
822						if (!adjbuf((char **) &buf, &bufsz, bp-buf+1, 100, (char **) &bp, "relex2"))
823						    FATAL("out of space for reg expr %.10s...", lastre);
824						if (cc->cc_func(i)) {
825							*bp++ = i;
826							n++;
827						}
828					}
829				} else
830					*bp++ = c;
831			} else if (c == '\0') {
832				FATAL("nonterminated character class %.20s", lastre);
833			} else if (bp == buf) {	/* 1st char is special */
834				*bp++ = c;
835			} else if (c == ']') {
836				*bp++ = 0;
837				rlxstr = (uschar *) tostring((char *) buf);
838				if (cflag == 0)
839					return CCL;
840				else
841					return NCCL;
842			} else
843				*bp++ = c;
844		}
845	}
846}
847
848int cgoto(fa *f, int s, int c)
849{
850	int i, j, k;
851	int *p, *q;
852
853	assert(c == HAT || c < NCHARS);
854	while (f->accept >= maxsetvec) {	/* guessing here! */
855		maxsetvec *= 4;
856		setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
857		tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
858		if (setvec == 0 || tmpset == 0)
859			overflo("out of space in cgoto()");
860	}
861	for (i = 0; i <= f->accept; i++)
862		setvec[i] = 0;
863	setcnt = 0;
864	/* compute positions of gototab[s,c] into setvec */
865	p = f->posns[s];
866	for (i = 1; i <= *p; i++) {
867		if ((k = f->re[p[i]].ltype) != FINAL) {
868			if ((k == CHAR && c == ptoi(f->re[p[i]].lval.np))
869			 || (k == DOT && c != 0 && c != HAT)
870			 || (k == ALL && c != 0)
871			 || (k == EMPTYRE && c != 0)
872			 || (k == CCL && member(c, (char *) f->re[p[i]].lval.up))
873			 || (k == NCCL && !member(c, (char *) f->re[p[i]].lval.up) && c != 0 && c != HAT)) {
874				q = f->re[p[i]].lfollow;
875				for (j = 1; j <= *q; j++) {
876					if (q[j] >= maxsetvec) {
877						maxsetvec *= 4;
878						setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
879						tmpset = (int *) realloc(setvec, maxsetvec * sizeof(int));
880						if (setvec == 0 || tmpset == 0)
881							overflo("cgoto overflow");
882					}
883					if (setvec[q[j]] == 0) {
884						setcnt++;
885						setvec[q[j]] = 1;
886					}
887				}
888			}
889		}
890	}
891	/* determine if setvec is a previous state */
892	tmpset[0] = setcnt;
893	j = 1;
894	for (i = f->accept; i >= 0; i--)
895		if (setvec[i]) {
896			tmpset[j++] = i;
897		}
898	/* tmpset == previous state? */
899	for (i = 1; i <= f->curstat; i++) {
900		p = f->posns[i];
901		if ((k = tmpset[0]) != p[0])
902			goto different;
903		for (j = 1; j <= k; j++)
904			if (tmpset[j] != p[j])
905				goto different;
906		/* setvec is state i */
907		f->gototab[s][c] = i;
908		return i;
909	  different:;
910	}
911
912	/* add tmpset to current set of states */
913	if (f->curstat >= NSTATES-1) {
914		f->curstat = 2;
915		f->reset = 1;
916		for (i = 2; i < NSTATES; i++)
917			xfree(f->posns[i]);
918	} else
919		++(f->curstat);
920	for (i = 0; i < NCHARS; i++)
921		f->gototab[f->curstat][i] = 0;
922	xfree(f->posns[f->curstat]);
923	if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
924		overflo("out of space in cgoto");
925
926	f->posns[f->curstat] = p;
927	f->gototab[s][c] = f->curstat;
928	for (i = 0; i <= setcnt; i++)
929		p[i] = tmpset[i];
930	if (setvec[f->accept])
931		f->out[f->curstat] = 1;
932	else
933		f->out[f->curstat] = 0;
934	return f->curstat;
935}
936
937
938void freefa(fa *f)	/* free a finite automaton */
939{
940	int i;
941
942	if (f == NULL)
943		return;
944	for (i = 0; i <= f->curstat; i++)
945		xfree(f->posns[i]);
946	for (i = 0; i <= f->accept; i++) {
947		xfree(f->re[i].lfollow);
948		if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL)
949			xfree((f->re[i].lval.np));
950	}
951	xfree(f->restr);
952	xfree(f);
953}
954