Deleted Added
full compact
b.c (108072) b.c (112336)
1/****************************************************************
2Copyright (C) Lucent Technologies 1997
3All Rights Reserved
4
5Permission to use, copy, modify, and distribute this software and
6its documentation for any purpose and without fee is hereby
7granted, provided that the above copyright notice appear in all
8copies and that both that the copyright notice and this
9permission notice and warranty disclaimer appear in supporting
10documentation, and that the name Lucent Technologies or any of
11its entities not be used in advertising or publicity pertaining
12to distribution of the software without specific, written prior
13permission.
14
15LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22THIS SOFTWARE.
23****************************************************************/
24
25/* lasciate ogne speranza, voi ch'entrate. */
26
27#define DEBUG
28
29#include <ctype.h>
30#include <stdio.h>
31#include <string.h>
32#include <stdlib.h>
33#include "awk.h"
34#include "ytab.h"
35
36#define HAT (NCHARS-2) /* matches ^ in regular expr */
37 /* NCHARS is 2**n */
38#define MAXLIN 22
39
40#define type(v) (v)->nobj /* badly overloaded here */
41#define info(v) (v)->ntype /* badly overloaded here */
42#define left(v) (v)->narg[0]
43#define right(v) (v)->narg[1]
44#define parent(v) (v)->nnext
45
46#define LEAF case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL:
47#define UNARY case STAR: case PLUS: case QUEST:
48
49/* encoding in tree Nodes:
50 leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL):
51 left is index, right contains value or pointer to value
52 unary (STAR, PLUS, QUEST): left is child, right is null
53 binary (CAT, OR): left and right are children
54 parent contains pointer to parent
55*/
56
57
58int *setvec;
59int *tmpset;
60int maxsetvec = 0;
61
62int rtok; /* next token in current re */
63int rlxval;
64static uschar *rlxstr;
65static uschar *prestr; /* current position in current re */
66static uschar *lastre; /* origin of last re */
67
68static int setcnt;
69static int poscnt;
70
71char *patbeg;
72int patlen;
73
74#define NFA 20 /* cache this many dynamic fa's */
75fa *fatab[NFA];
76int nfatab = 0; /* entries in fatab */
77
78fa *makedfa(const char *s, int anchor) /* returns dfa for reg expr s */
79{
80 int i, use, nuse;
81 fa *pfa;
82 static int now = 1;
83
84 if (setvec == 0) { /* first time through any RE */
85 maxsetvec = MAXLIN;
86 setvec = (int *) malloc(maxsetvec * sizeof(int));
87 tmpset = (int *) malloc(maxsetvec * sizeof(int));
88 if (setvec == 0 || tmpset == 0)
89 overflo("out of space initializing makedfa");
90 }
91
92 if (compile_time) /* a constant for sure */
93 return mkdfa(s, anchor);
94 for (i = 0; i < nfatab; i++) /* is it there already? */
95 if (fatab[i]->anchor == anchor
96 && strcmp((const char *) fatab[i]->restr, s) == 0) {
97 fatab[i]->use = now++;
98 return fatab[i];
99 }
100 pfa = mkdfa(s, anchor);
101 if (nfatab < NFA) { /* room for another */
102 fatab[nfatab] = pfa;
103 fatab[nfatab]->use = now++;
104 nfatab++;
105 return pfa;
106 }
107 use = fatab[0]->use; /* replace least-recently used */
108 nuse = 0;
109 for (i = 1; i < nfatab; i++)
110 if (fatab[i]->use < use) {
111 use = fatab[i]->use;
112 nuse = i;
113 }
114 freefa(fatab[nuse]);
115 fatab[nuse] = pfa;
116 pfa->use = now++;
117 return pfa;
118}
119
120fa *mkdfa(const char *s, int anchor) /* does the real work of making a dfa */
121 /* anchor = 1 for anchored matches, else 0 */
122{
123 Node *p, *p1;
124 fa *f;
125
126 p = reparse(s);
127 p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p);
128 /* put ALL STAR in front of reg. exp. */
129 p1 = op2(CAT, p1, op2(FINAL, NIL, NIL));
130 /* put FINAL after reg. exp. */
131
132 poscnt = 0;
133 penter(p1); /* enter parent pointers and leaf indices */
134 if ((f = (fa *) calloc(1, sizeof(fa) + poscnt*sizeof(rrow))) == NULL)
135 overflo("out of space for fa");
136 f->accept = poscnt-1; /* penter has computed number of positions in re */
137 cfoll(f, p1); /* set up follow sets */
138 freetr(p1);
139 if ((f->posns[0] = (int *) calloc(1, *(f->re[0].lfollow)*sizeof(int))) == NULL)
140 overflo("out of space in makedfa");
141 if ((f->posns[1] = (int *) calloc(1, sizeof(int))) == NULL)
142 overflo("out of space in makedfa");
143 *f->posns[1] = 0;
144 f->initstat = makeinit(f, anchor);
145 f->anchor = anchor;
146 f->restr = (uschar *) tostring(s);
147 return f;
148}
149
150int makeinit(fa *f, int anchor)
151{
152 int i, k;
153
154 f->curstat = 2;
155 f->out[2] = 0;
156 f->reset = 0;
157 k = *(f->re[0].lfollow);
158 xfree(f->posns[2]);
159 if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
160 overflo("out of space in makeinit");
161 for (i=0; i <= k; i++) {
162 (f->posns[2])[i] = (f->re[0].lfollow)[i];
163 }
164 if ((f->posns[2])[1] == f->accept)
165 f->out[2] = 1;
166 for (i=0; i < NCHARS; i++)
167 f->gototab[2][i] = 0;
168 f->curstat = cgoto(f, 2, HAT);
169 if (anchor) {
170 *f->posns[2] = k-1; /* leave out position 0 */
171 for (i=0; i < k; i++) {
172 (f->posns[0])[i] = (f->posns[2])[i];
173 }
174
175 f->out[0] = f->out[2];
176 if (f->curstat != 2)
177 --(*f->posns[f->curstat]);
178 }
179 return f->curstat;
180}
181
182void penter(Node *p) /* set up parent pointers and leaf indices */
183{
184 switch (type(p)) {
185 LEAF
186 info(p) = poscnt;
187 poscnt++;
188 break;
189 UNARY
190 penter(left(p));
191 parent(left(p)) = p;
192 break;
193 case CAT:
194 case OR:
195 penter(left(p));
196 penter(right(p));
197 parent(left(p)) = p;
198 parent(right(p)) = p;
199 break;
200 default: /* can't happen */
201 FATAL("can't happen: unknown type %d in penter", type(p));
202 break;
203 }
204}
205
206void freetr(Node *p) /* free parse tree */
207{
208 switch (type(p)) {
209 LEAF
210 xfree(p);
211 break;
212 UNARY
213 freetr(left(p));
214 xfree(p);
215 break;
216 case CAT:
217 case OR:
218 freetr(left(p));
219 freetr(right(p));
220 xfree(p);
221 break;
222 default: /* can't happen */
223 FATAL("can't happen: unknown type %d in freetr", type(p));
224 break;
225 }
226}
227
228/* in the parsing of regular expressions, metacharacters like . have */
229/* to be seen literally; \056 is not a metacharacter. */
230
231int hexstr(char **pp) /* find and eval hex string at pp, return new p */
232{ /* only pick up one 8-bit byte (2 chars) */
233 uschar *p;
234 int n = 0;
235 int i;
236
237 for (i = 0, p = (uschar *) *pp; i < 2 && isxdigit(*p); i++, p++) {
238 if (isdigit(*p))
239 n = 16 * n + *p - '0';
240 else if (*p >= 'a' && *p <= 'f')
241 n = 16 * n + *p - 'a' + 10;
242 else if (*p >= 'A' && *p <= 'F')
243 n = 16 * n + *p - 'A' + 10;
244 }
245 *pp = (char *) p;
246 return n;
247}
248
249#define isoctdigit(c) ((c) >= '0' && (c) <= '7') /* multiple use of arg */
250
251int quoted(char **pp) /* pick up next thing after a \\ */
252 /* and increment *pp */
253{
254 char *p = *pp;
255 int c;
256
257 if ((c = *p++) == 't')
258 c = '\t';
259 else if (c == 'n')
260 c = '\n';
261 else if (c == 'f')
262 c = '\f';
263 else if (c == 'r')
264 c = '\r';
265 else if (c == 'b')
266 c = '\b';
267 else if (c == '\\')
268 c = '\\';
269 else if (c == 'x') { /* hexadecimal goo follows */
270 c = hexstr(&p); /* this adds a null if number is invalid */
271 } else if (isoctdigit(c)) { /* \d \dd \ddd */
272 int n = c - '0';
273 if (isoctdigit(*p)) {
274 n = 8 * n + *p++ - '0';
275 if (isoctdigit(*p))
276 n = 8 * n + *p++ - '0';
277 }
278 c = n;
279 } /* else */
280 /* c = c; */
281 *pp = p;
282 return c;
283}
284
1/****************************************************************
2Copyright (C) Lucent Technologies 1997
3All Rights Reserved
4
5Permission to use, copy, modify, and distribute this software and
6its documentation for any purpose and without fee is hereby
7granted, provided that the above copyright notice appear in all
8copies and that both that the copyright notice and this
9permission notice and warranty disclaimer appear in supporting
10documentation, and that the name Lucent Technologies or any of
11its entities not be used in advertising or publicity pertaining
12to distribution of the software without specific, written prior
13permission.
14
15LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22THIS SOFTWARE.
23****************************************************************/
24
25/* lasciate ogne speranza, voi ch'entrate. */
26
27#define DEBUG
28
29#include <ctype.h>
30#include <stdio.h>
31#include <string.h>
32#include <stdlib.h>
33#include "awk.h"
34#include "ytab.h"
35
36#define HAT (NCHARS-2) /* matches ^ in regular expr */
37 /* NCHARS is 2**n */
38#define MAXLIN 22
39
40#define type(v) (v)->nobj /* badly overloaded here */
41#define info(v) (v)->ntype /* badly overloaded here */
42#define left(v) (v)->narg[0]
43#define right(v) (v)->narg[1]
44#define parent(v) (v)->nnext
45
46#define LEAF case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL:
47#define UNARY case STAR: case PLUS: case QUEST:
48
49/* encoding in tree Nodes:
50 leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL):
51 left is index, right contains value or pointer to value
52 unary (STAR, PLUS, QUEST): left is child, right is null
53 binary (CAT, OR): left and right are children
54 parent contains pointer to parent
55*/
56
57
58int *setvec;
59int *tmpset;
60int maxsetvec = 0;
61
62int rtok; /* next token in current re */
63int rlxval;
64static uschar *rlxstr;
65static uschar *prestr; /* current position in current re */
66static uschar *lastre; /* origin of last re */
67
68static int setcnt;
69static int poscnt;
70
71char *patbeg;
72int patlen;
73
74#define NFA 20 /* cache this many dynamic fa's */
75fa *fatab[NFA];
76int nfatab = 0; /* entries in fatab */
77
78fa *makedfa(const char *s, int anchor) /* returns dfa for reg expr s */
79{
80 int i, use, nuse;
81 fa *pfa;
82 static int now = 1;
83
84 if (setvec == 0) { /* first time through any RE */
85 maxsetvec = MAXLIN;
86 setvec = (int *) malloc(maxsetvec * sizeof(int));
87 tmpset = (int *) malloc(maxsetvec * sizeof(int));
88 if (setvec == 0 || tmpset == 0)
89 overflo("out of space initializing makedfa");
90 }
91
92 if (compile_time) /* a constant for sure */
93 return mkdfa(s, anchor);
94 for (i = 0; i < nfatab; i++) /* is it there already? */
95 if (fatab[i]->anchor == anchor
96 && strcmp((const char *) fatab[i]->restr, s) == 0) {
97 fatab[i]->use = now++;
98 return fatab[i];
99 }
100 pfa = mkdfa(s, anchor);
101 if (nfatab < NFA) { /* room for another */
102 fatab[nfatab] = pfa;
103 fatab[nfatab]->use = now++;
104 nfatab++;
105 return pfa;
106 }
107 use = fatab[0]->use; /* replace least-recently used */
108 nuse = 0;
109 for (i = 1; i < nfatab; i++)
110 if (fatab[i]->use < use) {
111 use = fatab[i]->use;
112 nuse = i;
113 }
114 freefa(fatab[nuse]);
115 fatab[nuse] = pfa;
116 pfa->use = now++;
117 return pfa;
118}
119
120fa *mkdfa(const char *s, int anchor) /* does the real work of making a dfa */
121 /* anchor = 1 for anchored matches, else 0 */
122{
123 Node *p, *p1;
124 fa *f;
125
126 p = reparse(s);
127 p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p);
128 /* put ALL STAR in front of reg. exp. */
129 p1 = op2(CAT, p1, op2(FINAL, NIL, NIL));
130 /* put FINAL after reg. exp. */
131
132 poscnt = 0;
133 penter(p1); /* enter parent pointers and leaf indices */
134 if ((f = (fa *) calloc(1, sizeof(fa) + poscnt*sizeof(rrow))) == NULL)
135 overflo("out of space for fa");
136 f->accept = poscnt-1; /* penter has computed number of positions in re */
137 cfoll(f, p1); /* set up follow sets */
138 freetr(p1);
139 if ((f->posns[0] = (int *) calloc(1, *(f->re[0].lfollow)*sizeof(int))) == NULL)
140 overflo("out of space in makedfa");
141 if ((f->posns[1] = (int *) calloc(1, sizeof(int))) == NULL)
142 overflo("out of space in makedfa");
143 *f->posns[1] = 0;
144 f->initstat = makeinit(f, anchor);
145 f->anchor = anchor;
146 f->restr = (uschar *) tostring(s);
147 return f;
148}
149
150int makeinit(fa *f, int anchor)
151{
152 int i, k;
153
154 f->curstat = 2;
155 f->out[2] = 0;
156 f->reset = 0;
157 k = *(f->re[0].lfollow);
158 xfree(f->posns[2]);
159 if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
160 overflo("out of space in makeinit");
161 for (i=0; i <= k; i++) {
162 (f->posns[2])[i] = (f->re[0].lfollow)[i];
163 }
164 if ((f->posns[2])[1] == f->accept)
165 f->out[2] = 1;
166 for (i=0; i < NCHARS; i++)
167 f->gototab[2][i] = 0;
168 f->curstat = cgoto(f, 2, HAT);
169 if (anchor) {
170 *f->posns[2] = k-1; /* leave out position 0 */
171 for (i=0; i < k; i++) {
172 (f->posns[0])[i] = (f->posns[2])[i];
173 }
174
175 f->out[0] = f->out[2];
176 if (f->curstat != 2)
177 --(*f->posns[f->curstat]);
178 }
179 return f->curstat;
180}
181
182void penter(Node *p) /* set up parent pointers and leaf indices */
183{
184 switch (type(p)) {
185 LEAF
186 info(p) = poscnt;
187 poscnt++;
188 break;
189 UNARY
190 penter(left(p));
191 parent(left(p)) = p;
192 break;
193 case CAT:
194 case OR:
195 penter(left(p));
196 penter(right(p));
197 parent(left(p)) = p;
198 parent(right(p)) = p;
199 break;
200 default: /* can't happen */
201 FATAL("can't happen: unknown type %d in penter", type(p));
202 break;
203 }
204}
205
206void freetr(Node *p) /* free parse tree */
207{
208 switch (type(p)) {
209 LEAF
210 xfree(p);
211 break;
212 UNARY
213 freetr(left(p));
214 xfree(p);
215 break;
216 case CAT:
217 case OR:
218 freetr(left(p));
219 freetr(right(p));
220 xfree(p);
221 break;
222 default: /* can't happen */
223 FATAL("can't happen: unknown type %d in freetr", type(p));
224 break;
225 }
226}
227
228/* in the parsing of regular expressions, metacharacters like . have */
229/* to be seen literally; \056 is not a metacharacter. */
230
231int hexstr(char **pp) /* find and eval hex string at pp, return new p */
232{ /* only pick up one 8-bit byte (2 chars) */
233 uschar *p;
234 int n = 0;
235 int i;
236
237 for (i = 0, p = (uschar *) *pp; i < 2 && isxdigit(*p); i++, p++) {
238 if (isdigit(*p))
239 n = 16 * n + *p - '0';
240 else if (*p >= 'a' && *p <= 'f')
241 n = 16 * n + *p - 'a' + 10;
242 else if (*p >= 'A' && *p <= 'F')
243 n = 16 * n + *p - 'A' + 10;
244 }
245 *pp = (char *) p;
246 return n;
247}
248
249#define isoctdigit(c) ((c) >= '0' && (c) <= '7') /* multiple use of arg */
250
251int quoted(char **pp) /* pick up next thing after a \\ */
252 /* and increment *pp */
253{
254 char *p = *pp;
255 int c;
256
257 if ((c = *p++) == 't')
258 c = '\t';
259 else if (c == 'n')
260 c = '\n';
261 else if (c == 'f')
262 c = '\f';
263 else if (c == 'r')
264 c = '\r';
265 else if (c == 'b')
266 c = '\b';
267 else if (c == '\\')
268 c = '\\';
269 else if (c == 'x') { /* hexadecimal goo follows */
270 c = hexstr(&p); /* this adds a null if number is invalid */
271 } else if (isoctdigit(c)) { /* \d \dd \ddd */
272 int n = c - '0';
273 if (isoctdigit(*p)) {
274 n = 8 * n + *p++ - '0';
275 if (isoctdigit(*p))
276 n = 8 * n + *p++ - '0';
277 }
278 c = n;
279 } /* else */
280 /* c = c; */
281 *pp = p;
282 return c;
283}
284
285static int collate_range_cmp(int a, int b)
286{
287 int r;
288 static char s[2][2];
289
290 if ((uschar)a == (uschar)b)
291 return 0;
292 s[0][0] = a;
293 s[1][0] = b;
294 if ((r = strcoll(s[0], s[1])) == 0)
295 r = (uschar)a - (uschar)b;
296 return r;
297}
298
285char *cclenter(const char *argp) /* add a character class */
286{
287 int i, c, c2;
299char *cclenter(const char *argp) /* add a character class */
300{
301 int i, c, c2;
302 int j;
288 uschar *p = (uschar *) argp;
289 uschar *op, *bp;
290 static uschar *buf = 0;
291 static int bufsz = 100;
292
293 op = p;
294 if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
295 FATAL("out of space for character class [%.10s...] 1", p);
296 bp = buf;
297 for (i = 0; (c = *p++) != 0; ) {
298 if (c == '\\') {
299 c = quoted((char **) &p);
300 } else if (c == '-' && i > 0 && bp[-1] != 0) {
301 if (*p != 0) {
302 c = bp[-1];
303 c2 = *p++;
304 if (c2 == '\\')
305 c2 = quoted((char **) &p);
303 uschar *p = (uschar *) argp;
304 uschar *op, *bp;
305 static uschar *buf = 0;
306 static int bufsz = 100;
307
308 op = p;
309 if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
310 FATAL("out of space for character class [%.10s...] 1", p);
311 bp = buf;
312 for (i = 0; (c = *p++) != 0; ) {
313 if (c == '\\') {
314 c = quoted((char **) &p);
315 } else if (c == '-' && i > 0 && bp[-1] != 0) {
316 if (*p != 0) {
317 c = bp[-1];
318 c2 = *p++;
319 if (c2 == '\\')
320 c2 = quoted((char **) &p);
306 if (c > c2) { /* empty; ignore */
321 if (collate_range_cmp(c, c2) > 0) { /* empty; ignore */
307 bp--;
308 i--;
309 continue;
310 }
322 bp--;
323 i--;
324 continue;
325 }
311 while (c < c2) {
326 for (j = 0; j < NCHARS; j++) {
327 if ((collate_range_cmp(c, j) > 0) ||
328 collate_range_cmp(j, c2) > 0)
329 continue;
312 if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, 0))
313 FATAL("out of space for character class [%.10s...] 2", p);
330 if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, 0))
331 FATAL("out of space for character class [%.10s...] 2", p);
314 *bp++ = ++c;
332 *bp++ = j;
315 i++;
316 }
317 continue;
318 }
319 }
320 if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, 0))
321 FATAL("out of space for character class [%.10s...] 3", p);
322 *bp++ = c;
323 i++;
324 }
325 *bp = 0;
326 dprintf( ("cclenter: in = |%s|, out = |%s|\n", op, buf) );
327 xfree(op);
328 return (char *) tostring((char *) buf);
329}
330
331void overflo(const char *s)
332{
333 FATAL("regular expression too big: %.30s...", s);
334}
335
336void cfoll(fa *f, Node *v) /* enter follow set of each leaf of vertex v into lfollow[leaf] */
337{
338 int i;
339 int *p;
340
341 switch (type(v)) {
342 LEAF
343 f->re[info(v)].ltype = type(v);
344 f->re[info(v)].lval.np = right(v);
345 while (f->accept >= maxsetvec) { /* guessing here! */
346 maxsetvec *= 4;
347 setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
348 tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
349 if (setvec == 0 || tmpset == 0)
350 overflo("out of space in cfoll()");
351 }
352 for (i = 0; i <= f->accept; i++)
353 setvec[i] = 0;
354 setcnt = 0;
355 follow(v); /* computes setvec and setcnt */
356 if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
357 overflo("out of space building follow set");
358 f->re[info(v)].lfollow = p;
359 *p = setcnt;
360 for (i = f->accept; i >= 0; i--)
361 if (setvec[i] == 1)
362 *++p = i;
363 break;
364 UNARY
365 cfoll(f,left(v));
366 break;
367 case CAT:
368 case OR:
369 cfoll(f,left(v));
370 cfoll(f,right(v));
371 break;
372 default: /* can't happen */
373 FATAL("can't happen: unknown type %d in cfoll", type(v));
374 }
375}
376
377int first(Node *p) /* collects initially active leaves of p into setvec */
378 /* returns 1 if p matches empty string */
379{
380 int b, lp;
381
382 switch (type(p)) {
383 LEAF
384 lp = info(p); /* look for high-water mark of subscripts */
385 while (setcnt >= maxsetvec || lp >= maxsetvec) { /* guessing here! */
386 maxsetvec *= 4;
387 setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
388 tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
389 if (setvec == 0 || tmpset == 0)
390 overflo("out of space in first()");
391 }
392 if (setvec[lp] != 1) {
393 setvec[lp] = 1;
394 setcnt++;
395 }
396 if (type(p) == CCL && (*(char *) right(p)) == '\0')
397 return(0); /* empty CCL */
398 else return(1);
399 case PLUS:
400 if (first(left(p)) == 0) return(0);
401 return(1);
402 case STAR:
403 case QUEST:
404 first(left(p));
405 return(0);
406 case CAT:
407 if (first(left(p)) == 0 && first(right(p)) == 0) return(0);
408 return(1);
409 case OR:
410 b = first(right(p));
411 if (first(left(p)) == 0 || b == 0) return(0);
412 return(1);
413 }
414 FATAL("can't happen: unknown type %d in first", type(p)); /* can't happen */
415 return(-1);
416}
417
418void follow(Node *v) /* collects leaves that can follow v into setvec */
419{
420 Node *p;
421
422 if (type(v) == FINAL)
423 return;
424 p = parent(v);
425 switch (type(p)) {
426 case STAR:
427 case PLUS:
428 first(v);
429 follow(p);
430 return;
431
432 case OR:
433 case QUEST:
434 follow(p);
435 return;
436
437 case CAT:
438 if (v == left(p)) { /* v is left child of p */
439 if (first(right(p)) == 0) {
440 follow(p);
441 return;
442 }
443 } else /* v is right child */
444 follow(p);
445 return;
446 }
447}
448
449int member(int c, const char *sarg) /* is c in s? */
450{
451 uschar *s = (uschar *) sarg;
452
453 while (*s)
454 if (c == *s++)
455 return(1);
456 return(0);
457}
458
459int match(fa *f, const char *p0) /* shortest match ? */
460{
461 int s, ns;
462 uschar *p = (uschar *) p0;
463
464 s = f->reset ? makeinit(f,0) : f->initstat;
465 if (f->out[s])
466 return(1);
467 do {
468 if ((ns = f->gototab[s][*p]) != 0)
469 s = ns;
470 else
471 s = cgoto(f, s, *p);
472 if (f->out[s])
473 return(1);
474 } while (*p++ != 0);
475 return(0);
476}
477
478int pmatch(fa *f, const char *p0) /* longest match, for sub */
479{
480 int s, ns;
481 uschar *p = (uschar *) p0;
482 uschar *q;
483 int i, k;
484
485 s = f->reset ? makeinit(f,1) : f->initstat;
486 patbeg = (char *) p;
487 patlen = -1;
488 do {
489 q = p;
490 do {
491 if (f->out[s]) /* final state */
492 patlen = q-p;
493 if ((ns = f->gototab[s][*q]) != 0)
494 s = ns;
495 else
496 s = cgoto(f, s, *q);
497 if (s == 1) { /* no transition */
498 if (patlen >= 0) {
499 patbeg = (char *) p;
500 return(1);
501 }
502 else
503 goto nextin; /* no match */
504 }
505 } while (*q++ != 0);
506 if (f->out[s])
507 patlen = q-p-1; /* don't count $ */
508 if (patlen >= 0) {
509 patbeg = (char *) p;
510 return(1);
511 }
512 nextin:
513 s = 2;
514 if (f->reset) {
515 for (i = 2; i <= f->curstat; i++)
516 xfree(f->posns[i]);
517 k = *f->posns[0];
518 if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
519 overflo("out of space in pmatch");
520 for (i = 0; i <= k; i++)
521 (f->posns[2])[i] = (f->posns[0])[i];
522 f->initstat = f->curstat = 2;
523 f->out[2] = f->out[0];
524 for (i = 0; i < NCHARS; i++)
525 f->gototab[2][i] = 0;
526 }
527 } while (*p++ != 0);
528 return (0);
529}
530
531int nematch(fa *f, const char *p0) /* non-empty match, for sub */
532{
533 int s, ns;
534 uschar *p = (uschar *) p0;
535 uschar *q;
536 int i, k;
537
538 s = f->reset ? makeinit(f,1) : f->initstat;
539 patlen = -1;
540 while (*p) {
541 q = p;
542 do {
543 if (f->out[s]) /* final state */
544 patlen = q-p;
545 if ((ns = f->gototab[s][*q]) != 0)
546 s = ns;
547 else
548 s = cgoto(f, s, *q);
549 if (s == 1) { /* no transition */
550 if (patlen > 0) {
551 patbeg = (char *) p;
552 return(1);
553 } else
554 goto nnextin; /* no nonempty match */
555 }
556 } while (*q++ != 0);
557 if (f->out[s])
558 patlen = q-p-1; /* don't count $ */
559 if (patlen > 0 ) {
560 patbeg = (char *) p;
561 return(1);
562 }
563 nnextin:
564 s = 2;
565 if (f->reset) {
566 for (i = 2; i <= f->curstat; i++)
567 xfree(f->posns[i]);
568 k = *f->posns[0];
569 if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
570 overflo("out of state space");
571 for (i = 0; i <= k; i++)
572 (f->posns[2])[i] = (f->posns[0])[i];
573 f->initstat = f->curstat = 2;
574 f->out[2] = f->out[0];
575 for (i = 0; i < NCHARS; i++)
576 f->gototab[2][i] = 0;
577 }
578 p++;
579 }
580 return (0);
581}
582
583Node *reparse(const char *p) /* parses regular expression pointed to by p */
584{ /* uses relex() to scan regular expression */
585 Node *np;
586
587 dprintf( ("reparse <%s>\n", p) );
588 lastre = prestr = (uschar *) p; /* prestr points to string to be parsed */
589 rtok = relex();
590 /* GNU compatibility: an empty regexp matches anything */
591 if (rtok == '\0')
592 /* FATAL("empty regular expression"); previous */
593 return(op2(ALL, NIL, NIL));
594 np = regexp();
595 if (rtok != '\0')
596 FATAL("syntax error in regular expression %s at %s", lastre, prestr);
597 return(np);
598}
599
600Node *regexp(void) /* top-level parse of reg expr */
601{
602 return (alt(concat(primary())));
603}
604
605Node *primary(void)
606{
607 Node *np;
608
609 switch (rtok) {
610 case CHAR:
611 np = op2(CHAR, NIL, itonp(rlxval));
612 rtok = relex();
613 return (unary(np));
614 case ALL:
615 rtok = relex();
616 return (unary(op2(ALL, NIL, NIL)));
617 case DOT:
618 rtok = relex();
619 return (unary(op2(DOT, NIL, NIL)));
620 case CCL:
621 np = op2(CCL, NIL, (Node*) cclenter((char *) rlxstr));
622 rtok = relex();
623 return (unary(np));
624 case NCCL:
625 np = op2(NCCL, NIL, (Node *) cclenter((char *) rlxstr));
626 rtok = relex();
627 return (unary(np));
628 case '^':
629 rtok = relex();
630 return (unary(op2(CHAR, NIL, itonp(HAT))));
631 case '$':
632 rtok = relex();
633 return (unary(op2(CHAR, NIL, NIL)));
634 case '(':
635 rtok = relex();
636 if (rtok == ')') { /* special pleading for () */
637 rtok = relex();
638 return unary(op2(CCL, NIL, (Node *) tostring("")));
639 }
640 np = regexp();
641 if (rtok == ')') {
642 rtok = relex();
643 return (unary(np));
644 }
645 else
646 FATAL("syntax error in regular expression %s at %s", lastre, prestr);
647 default:
648 FATAL("illegal primary in regular expression %s at %s", lastre, prestr);
649 }
650 return 0; /*NOTREACHED*/
651}
652
653Node *concat(Node *np)
654{
655 switch (rtok) {
656 case CHAR: case DOT: case ALL: case CCL: case NCCL: case '$': case '(':
657 return (concat(op2(CAT, np, primary())));
658 }
659 return (np);
660}
661
662Node *alt(Node *np)
663{
664 if (rtok == OR) {
665 rtok = relex();
666 return (alt(op2(OR, np, concat(primary()))));
667 }
668 return (np);
669}
670
671Node *unary(Node *np)
672{
673 switch (rtok) {
674 case STAR:
675 rtok = relex();
676 return (unary(op2(STAR, np, NIL)));
677 case PLUS:
678 rtok = relex();
679 return (unary(op2(PLUS, np, NIL)));
680 case QUEST:
681 rtok = relex();
682 return (unary(op2(QUEST, np, NIL)));
683 default:
684 return (np);
685 }
686}
687
688/*
689 * Character class definitions conformant to the POSIX locale as
690 * defined in IEEE P1003.1 draft 7 of June 2001, assuming the source
691 * and operating character sets are both ASCII (ISO646) or supersets
692 * thereof.
693 *
694 * Note that to avoid overflowing the temporary buffer used in
695 * relex(), the expanded character class (prior to range expansion)
696 * must be less than twice the size of their full name.
697 */
333 i++;
334 }
335 continue;
336 }
337 }
338 if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, 0))
339 FATAL("out of space for character class [%.10s...] 3", p);
340 *bp++ = c;
341 i++;
342 }
343 *bp = 0;
344 dprintf( ("cclenter: in = |%s|, out = |%s|\n", op, buf) );
345 xfree(op);
346 return (char *) tostring((char *) buf);
347}
348
349void overflo(const char *s)
350{
351 FATAL("regular expression too big: %.30s...", s);
352}
353
354void cfoll(fa *f, Node *v) /* enter follow set of each leaf of vertex v into lfollow[leaf] */
355{
356 int i;
357 int *p;
358
359 switch (type(v)) {
360 LEAF
361 f->re[info(v)].ltype = type(v);
362 f->re[info(v)].lval.np = right(v);
363 while (f->accept >= maxsetvec) { /* guessing here! */
364 maxsetvec *= 4;
365 setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
366 tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
367 if (setvec == 0 || tmpset == 0)
368 overflo("out of space in cfoll()");
369 }
370 for (i = 0; i <= f->accept; i++)
371 setvec[i] = 0;
372 setcnt = 0;
373 follow(v); /* computes setvec and setcnt */
374 if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
375 overflo("out of space building follow set");
376 f->re[info(v)].lfollow = p;
377 *p = setcnt;
378 for (i = f->accept; i >= 0; i--)
379 if (setvec[i] == 1)
380 *++p = i;
381 break;
382 UNARY
383 cfoll(f,left(v));
384 break;
385 case CAT:
386 case OR:
387 cfoll(f,left(v));
388 cfoll(f,right(v));
389 break;
390 default: /* can't happen */
391 FATAL("can't happen: unknown type %d in cfoll", type(v));
392 }
393}
394
395int first(Node *p) /* collects initially active leaves of p into setvec */
396 /* returns 1 if p matches empty string */
397{
398 int b, lp;
399
400 switch (type(p)) {
401 LEAF
402 lp = info(p); /* look for high-water mark of subscripts */
403 while (setcnt >= maxsetvec || lp >= maxsetvec) { /* guessing here! */
404 maxsetvec *= 4;
405 setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
406 tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
407 if (setvec == 0 || tmpset == 0)
408 overflo("out of space in first()");
409 }
410 if (setvec[lp] != 1) {
411 setvec[lp] = 1;
412 setcnt++;
413 }
414 if (type(p) == CCL && (*(char *) right(p)) == '\0')
415 return(0); /* empty CCL */
416 else return(1);
417 case PLUS:
418 if (first(left(p)) == 0) return(0);
419 return(1);
420 case STAR:
421 case QUEST:
422 first(left(p));
423 return(0);
424 case CAT:
425 if (first(left(p)) == 0 && first(right(p)) == 0) return(0);
426 return(1);
427 case OR:
428 b = first(right(p));
429 if (first(left(p)) == 0 || b == 0) return(0);
430 return(1);
431 }
432 FATAL("can't happen: unknown type %d in first", type(p)); /* can't happen */
433 return(-1);
434}
435
436void follow(Node *v) /* collects leaves that can follow v into setvec */
437{
438 Node *p;
439
440 if (type(v) == FINAL)
441 return;
442 p = parent(v);
443 switch (type(p)) {
444 case STAR:
445 case PLUS:
446 first(v);
447 follow(p);
448 return;
449
450 case OR:
451 case QUEST:
452 follow(p);
453 return;
454
455 case CAT:
456 if (v == left(p)) { /* v is left child of p */
457 if (first(right(p)) == 0) {
458 follow(p);
459 return;
460 }
461 } else /* v is right child */
462 follow(p);
463 return;
464 }
465}
466
467int member(int c, const char *sarg) /* is c in s? */
468{
469 uschar *s = (uschar *) sarg;
470
471 while (*s)
472 if (c == *s++)
473 return(1);
474 return(0);
475}
476
477int match(fa *f, const char *p0) /* shortest match ? */
478{
479 int s, ns;
480 uschar *p = (uschar *) p0;
481
482 s = f->reset ? makeinit(f,0) : f->initstat;
483 if (f->out[s])
484 return(1);
485 do {
486 if ((ns = f->gototab[s][*p]) != 0)
487 s = ns;
488 else
489 s = cgoto(f, s, *p);
490 if (f->out[s])
491 return(1);
492 } while (*p++ != 0);
493 return(0);
494}
495
496int pmatch(fa *f, const char *p0) /* longest match, for sub */
497{
498 int s, ns;
499 uschar *p = (uschar *) p0;
500 uschar *q;
501 int i, k;
502
503 s = f->reset ? makeinit(f,1) : f->initstat;
504 patbeg = (char *) p;
505 patlen = -1;
506 do {
507 q = p;
508 do {
509 if (f->out[s]) /* final state */
510 patlen = q-p;
511 if ((ns = f->gototab[s][*q]) != 0)
512 s = ns;
513 else
514 s = cgoto(f, s, *q);
515 if (s == 1) { /* no transition */
516 if (patlen >= 0) {
517 patbeg = (char *) p;
518 return(1);
519 }
520 else
521 goto nextin; /* no match */
522 }
523 } while (*q++ != 0);
524 if (f->out[s])
525 patlen = q-p-1; /* don't count $ */
526 if (patlen >= 0) {
527 patbeg = (char *) p;
528 return(1);
529 }
530 nextin:
531 s = 2;
532 if (f->reset) {
533 for (i = 2; i <= f->curstat; i++)
534 xfree(f->posns[i]);
535 k = *f->posns[0];
536 if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
537 overflo("out of space in pmatch");
538 for (i = 0; i <= k; i++)
539 (f->posns[2])[i] = (f->posns[0])[i];
540 f->initstat = f->curstat = 2;
541 f->out[2] = f->out[0];
542 for (i = 0; i < NCHARS; i++)
543 f->gototab[2][i] = 0;
544 }
545 } while (*p++ != 0);
546 return (0);
547}
548
549int nematch(fa *f, const char *p0) /* non-empty match, for sub */
550{
551 int s, ns;
552 uschar *p = (uschar *) p0;
553 uschar *q;
554 int i, k;
555
556 s = f->reset ? makeinit(f,1) : f->initstat;
557 patlen = -1;
558 while (*p) {
559 q = p;
560 do {
561 if (f->out[s]) /* final state */
562 patlen = q-p;
563 if ((ns = f->gototab[s][*q]) != 0)
564 s = ns;
565 else
566 s = cgoto(f, s, *q);
567 if (s == 1) { /* no transition */
568 if (patlen > 0) {
569 patbeg = (char *) p;
570 return(1);
571 } else
572 goto nnextin; /* no nonempty match */
573 }
574 } while (*q++ != 0);
575 if (f->out[s])
576 patlen = q-p-1; /* don't count $ */
577 if (patlen > 0 ) {
578 patbeg = (char *) p;
579 return(1);
580 }
581 nnextin:
582 s = 2;
583 if (f->reset) {
584 for (i = 2; i <= f->curstat; i++)
585 xfree(f->posns[i]);
586 k = *f->posns[0];
587 if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
588 overflo("out of state space");
589 for (i = 0; i <= k; i++)
590 (f->posns[2])[i] = (f->posns[0])[i];
591 f->initstat = f->curstat = 2;
592 f->out[2] = f->out[0];
593 for (i = 0; i < NCHARS; i++)
594 f->gototab[2][i] = 0;
595 }
596 p++;
597 }
598 return (0);
599}
600
601Node *reparse(const char *p) /* parses regular expression pointed to by p */
602{ /* uses relex() to scan regular expression */
603 Node *np;
604
605 dprintf( ("reparse <%s>\n", p) );
606 lastre = prestr = (uschar *) p; /* prestr points to string to be parsed */
607 rtok = relex();
608 /* GNU compatibility: an empty regexp matches anything */
609 if (rtok == '\0')
610 /* FATAL("empty regular expression"); previous */
611 return(op2(ALL, NIL, NIL));
612 np = regexp();
613 if (rtok != '\0')
614 FATAL("syntax error in regular expression %s at %s", lastre, prestr);
615 return(np);
616}
617
618Node *regexp(void) /* top-level parse of reg expr */
619{
620 return (alt(concat(primary())));
621}
622
623Node *primary(void)
624{
625 Node *np;
626
627 switch (rtok) {
628 case CHAR:
629 np = op2(CHAR, NIL, itonp(rlxval));
630 rtok = relex();
631 return (unary(np));
632 case ALL:
633 rtok = relex();
634 return (unary(op2(ALL, NIL, NIL)));
635 case DOT:
636 rtok = relex();
637 return (unary(op2(DOT, NIL, NIL)));
638 case CCL:
639 np = op2(CCL, NIL, (Node*) cclenter((char *) rlxstr));
640 rtok = relex();
641 return (unary(np));
642 case NCCL:
643 np = op2(NCCL, NIL, (Node *) cclenter((char *) rlxstr));
644 rtok = relex();
645 return (unary(np));
646 case '^':
647 rtok = relex();
648 return (unary(op2(CHAR, NIL, itonp(HAT))));
649 case '$':
650 rtok = relex();
651 return (unary(op2(CHAR, NIL, NIL)));
652 case '(':
653 rtok = relex();
654 if (rtok == ')') { /* special pleading for () */
655 rtok = relex();
656 return unary(op2(CCL, NIL, (Node *) tostring("")));
657 }
658 np = regexp();
659 if (rtok == ')') {
660 rtok = relex();
661 return (unary(np));
662 }
663 else
664 FATAL("syntax error in regular expression %s at %s", lastre, prestr);
665 default:
666 FATAL("illegal primary in regular expression %s at %s", lastre, prestr);
667 }
668 return 0; /*NOTREACHED*/
669}
670
671Node *concat(Node *np)
672{
673 switch (rtok) {
674 case CHAR: case DOT: case ALL: case CCL: case NCCL: case '$': case '(':
675 return (concat(op2(CAT, np, primary())));
676 }
677 return (np);
678}
679
680Node *alt(Node *np)
681{
682 if (rtok == OR) {
683 rtok = relex();
684 return (alt(op2(OR, np, concat(primary()))));
685 }
686 return (np);
687}
688
689Node *unary(Node *np)
690{
691 switch (rtok) {
692 case STAR:
693 rtok = relex();
694 return (unary(op2(STAR, np, NIL)));
695 case PLUS:
696 rtok = relex();
697 return (unary(op2(PLUS, np, NIL)));
698 case QUEST:
699 rtok = relex();
700 return (unary(op2(QUEST, np, NIL)));
701 default:
702 return (np);
703 }
704}
705
706/*
707 * Character class definitions conformant to the POSIX locale as
708 * defined in IEEE P1003.1 draft 7 of June 2001, assuming the source
709 * and operating character sets are both ASCII (ISO646) or supersets
710 * thereof.
711 *
712 * Note that to avoid overflowing the temporary buffer used in
713 * relex(), the expanded character class (prior to range expansion)
714 * must be less than twice the size of their full name.
715 */
716
717/* Because isblank doesn't show up in any of the header files on any
718 * system i use, it's defined here. if some other locale has a richer
719 * definition of "blank", define HAS_ISBLANK and provide your own
720 * version.
721 */
722
723#ifndef HAS_ISBLANK
724
725int isblank(int c)
726{
727 return c==' ' || c=='\t';
728}
729
730#endif
731
698struct charclass {
699 const char *cc_name;
700 int cc_namelen;
732struct charclass {
733 const char *cc_name;
734 int cc_namelen;
701 const char *cc_expand;
735 int (*cc_func)(int);
702} charclasses[] = {
736} charclasses[] = {
703 { "alnum", 5, "0-9A-Za-z" },
704 { "alpha", 5, "A-Za-z" },
705 { "blank", 5, " \t" },
706 { "cntrl", 5, "\000-\037\177" },
707 { "digit", 5, "0-9" },
708 { "graph", 5, "\041-\176" },
709 { "lower", 5, "a-z" },
710 { "print", 5, " \041-\176" },
711 { "punct", 5, "\041-\057\072-\100\133-\140\173-\176" },
712 { "space", 5, " \f\n\r\t\v" },
713 { "upper", 5, "A-Z" },
714 { "xdigit", 6, "0-9A-Fa-f" },
737 { "alnum", 5, isalnum },
738 { "alpha", 5, isalpha },
739 { "blank", 5, isblank },
740 { "cntrl", 5, iscntrl },
741 { "digit", 5, isdigit },
742 { "graph", 5, isgraph },
743 { "lower", 5, islower },
744 { "print", 5, isprint },
745 { "punct", 5, ispunct },
746 { "space", 5, isspace },
747 { "upper", 5, isupper },
748 { "xdigit", 6, isxdigit },
715 { NULL, 0, NULL },
716};
717
718
719int relex(void) /* lexical analyzer for reparse */
720{
721 int c, n;
722 int cflag;
723 static uschar *buf = 0;
724 static int bufsz = 100;
725 uschar *bp;
726 struct charclass *cc;
749 { NULL, 0, NULL },
750};
751
752
753int relex(void) /* lexical analyzer for reparse */
754{
755 int c, n;
756 int cflag;
757 static uschar *buf = 0;
758 static int bufsz = 100;
759 uschar *bp;
760 struct charclass *cc;
727 const uschar *p;
761 int i;
728
729 switch (c = *prestr++) {
730 case '|': return OR;
731 case '*': return STAR;
732 case '+': return PLUS;
733 case '?': return QUEST;
734 case '.': return DOT;
735 case '\0': prestr--; return '\0';
736 case '^':
737 case '$':
738 case '(':
739 case ')':
740 return c;
741 case '\\':
742 rlxval = quoted((char **) &prestr);
743 return CHAR;
744 default:
745 rlxval = c;
746 return CHAR;
747 case '[':
748 if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
749 FATAL("out of space in reg expr %.10s..", lastre);
750 bp = buf;
751 if (*prestr == '^') {
752 cflag = 1;
753 prestr++;
754 }
755 else
756 cflag = 0;
757 n = 2 * strlen((const char *) prestr)+1;
758 if (!adjbuf((char **) &buf, &bufsz, n, n, (char **) &bp, 0))
759 FATAL("out of space for reg expr %.10s...", lastre);
760 for (; ; ) {
761 if ((c = *prestr++) == '\\') {
762 *bp++ = '\\';
763 if ((c = *prestr++) == '\0')
764 FATAL("nonterminated character class %.20s...", lastre);
765 *bp++ = c;
766 /* } else if (c == '\n') { */
767 /* FATAL("newline in character class %.20s...", lastre); */
768 } else if (c == '[' && *prestr == ':') {
769 /* POSIX char class names, Dag-Erling Smorgrav, des@ofug.org */
770 for (cc = charclasses; cc->cc_name; cc++)
771 if (strncmp((const char *) prestr + 1, (const char *) cc->cc_name, cc->cc_namelen) == 0)
772 break;
773 if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' &&
774 prestr[2 + cc->cc_namelen] == ']') {
775 prestr += cc->cc_namelen + 3;
762
763 switch (c = *prestr++) {
764 case '|': return OR;
765 case '*': return STAR;
766 case '+': return PLUS;
767 case '?': return QUEST;
768 case '.': return DOT;
769 case '\0': prestr--; return '\0';
770 case '^':
771 case '$':
772 case '(':
773 case ')':
774 return c;
775 case '\\':
776 rlxval = quoted((char **) &prestr);
777 return CHAR;
778 default:
779 rlxval = c;
780 return CHAR;
781 case '[':
782 if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
783 FATAL("out of space in reg expr %.10s..", lastre);
784 bp = buf;
785 if (*prestr == '^') {
786 cflag = 1;
787 prestr++;
788 }
789 else
790 cflag = 0;
791 n = 2 * strlen((const char *) prestr)+1;
792 if (!adjbuf((char **) &buf, &bufsz, n, n, (char **) &bp, 0))
793 FATAL("out of space for reg expr %.10s...", lastre);
794 for (; ; ) {
795 if ((c = *prestr++) == '\\') {
796 *bp++ = '\\';
797 if ((c = *prestr++) == '\0')
798 FATAL("nonterminated character class %.20s...", lastre);
799 *bp++ = c;
800 /* } else if (c == '\n') { */
801 /* FATAL("newline in character class %.20s...", lastre); */
802 } else if (c == '[' && *prestr == ':') {
803 /* POSIX char class names, Dag-Erling Smorgrav, des@ofug.org */
804 for (cc = charclasses; cc->cc_name; cc++)
805 if (strncmp((const char *) prestr + 1, (const char *) cc->cc_name, cc->cc_namelen) == 0)
806 break;
807 if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' &&
808 prestr[2 + cc->cc_namelen] == ']') {
809 prestr += cc->cc_namelen + 3;
776 for (p = (const uschar *) cc->cc_expand; *p; p++)
777 *bp++ = *p;
810 for (i = 0; i < NCHARS; i++) {
811 if (!adjbuf((char **) &buf, &bufsz, bp-buf+1, 100, (char **) &bp, 0))
812 FATAL("out of space for reg expr %.10s...", lastre);
813 if (cc->cc_func(i)) {
814 *bp++ = i;
815 n++;
816 }
817 }
778 } else
779 *bp++ = c;
780 } else if (c == '\0') {
781 FATAL("nonterminated character class %.20s", lastre);
782 } else if (bp == buf) { /* 1st char is special */
783 *bp++ = c;
784 } else if (c == ']') {
785 *bp++ = 0;
786 rlxstr = (uschar *) tostring((char *) buf);
787 if (cflag == 0)
788 return CCL;
789 else
790 return NCCL;
791 } else
792 *bp++ = c;
793 }
794 }
795}
796
797int cgoto(fa *f, int s, int c)
798{
799 int i, j, k;
800 int *p, *q;
801
802 if (c < 0 || c > 255)
803 FATAL("can't happen: neg char %d in cgoto", c);
804 while (f->accept >= maxsetvec) { /* guessing here! */
805 maxsetvec *= 4;
806 setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
807 tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
808 if (setvec == 0 || tmpset == 0)
809 overflo("out of space in cgoto()");
810 }
811 for (i = 0; i <= f->accept; i++)
812 setvec[i] = 0;
813 setcnt = 0;
814 /* compute positions of gototab[s,c] into setvec */
815 p = f->posns[s];
816 for (i = 1; i <= *p; i++) {
817 if ((k = f->re[p[i]].ltype) != FINAL) {
818 if ((k == CHAR && c == ptoi(f->re[p[i]].lval.np))
819 || (k == DOT && c != 0 && c != HAT)
820 || (k == ALL && c != 0)
821 || (k == CCL && member(c, (char *) f->re[p[i]].lval.up))
822 || (k == NCCL && !member(c, (char *) f->re[p[i]].lval.up) && c != 0 && c != HAT)) {
823 q = f->re[p[i]].lfollow;
824 for (j = 1; j <= *q; j++) {
825 if (q[j] >= maxsetvec) {
826 maxsetvec *= 4;
827 setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
828 tmpset = (int *) realloc(setvec, maxsetvec * sizeof(int));
829 if (setvec == 0 || tmpset == 0)
830 overflo("cgoto overflow");
831 }
832 if (setvec[q[j]] == 0) {
833 setcnt++;
834 setvec[q[j]] = 1;
835 }
836 }
837 }
838 }
839 }
840 /* determine if setvec is a previous state */
841 tmpset[0] = setcnt;
842 j = 1;
843 for (i = f->accept; i >= 0; i--)
844 if (setvec[i]) {
845 tmpset[j++] = i;
846 }
847 /* tmpset == previous state? */
848 for (i = 1; i <= f->curstat; i++) {
849 p = f->posns[i];
850 if ((k = tmpset[0]) != p[0])
851 goto different;
852 for (j = 1; j <= k; j++)
853 if (tmpset[j] != p[j])
854 goto different;
855 /* setvec is state i */
856 f->gototab[s][c] = i;
857 return i;
858 different:;
859 }
860
861 /* add tmpset to current set of states */
862 if (f->curstat >= NSTATES-1) {
863 f->curstat = 2;
864 f->reset = 1;
865 for (i = 2; i < NSTATES; i++)
866 xfree(f->posns[i]);
867 } else
868 ++(f->curstat);
869 for (i = 0; i < NCHARS; i++)
870 f->gototab[f->curstat][i] = 0;
871 xfree(f->posns[f->curstat]);
872 if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
873 overflo("out of space in cgoto");
874
875 f->posns[f->curstat] = p;
876 f->gototab[s][c] = f->curstat;
877 for (i = 0; i <= setcnt; i++)
878 p[i] = tmpset[i];
879 if (setvec[f->accept])
880 f->out[f->curstat] = 1;
881 else
882 f->out[f->curstat] = 0;
883 return f->curstat;
884}
885
886
887void freefa(fa *f) /* free a finite automaton */
888{
889 int i;
890
891 if (f == NULL)
892 return;
893 for (i = 0; i <= f->curstat; i++)
894 xfree(f->posns[i]);
895 for (i = 0; i <= f->accept; i++) {
896 xfree(f->re[i].lfollow);
897 if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL)
898 xfree((f->re[i].lval.np));
899 }
900 xfree(f->restr);
901 xfree(f);
902}
818 } else
819 *bp++ = c;
820 } else if (c == '\0') {
821 FATAL("nonterminated character class %.20s", lastre);
822 } else if (bp == buf) { /* 1st char is special */
823 *bp++ = c;
824 } else if (c == ']') {
825 *bp++ = 0;
826 rlxstr = (uschar *) tostring((char *) buf);
827 if (cflag == 0)
828 return CCL;
829 else
830 return NCCL;
831 } else
832 *bp++ = c;
833 }
834 }
835}
836
837int cgoto(fa *f, int s, int c)
838{
839 int i, j, k;
840 int *p, *q;
841
842 if (c < 0 || c > 255)
843 FATAL("can't happen: neg char %d in cgoto", c);
844 while (f->accept >= maxsetvec) { /* guessing here! */
845 maxsetvec *= 4;
846 setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
847 tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
848 if (setvec == 0 || tmpset == 0)
849 overflo("out of space in cgoto()");
850 }
851 for (i = 0; i <= f->accept; i++)
852 setvec[i] = 0;
853 setcnt = 0;
854 /* compute positions of gototab[s,c] into setvec */
855 p = f->posns[s];
856 for (i = 1; i <= *p; i++) {
857 if ((k = f->re[p[i]].ltype) != FINAL) {
858 if ((k == CHAR && c == ptoi(f->re[p[i]].lval.np))
859 || (k == DOT && c != 0 && c != HAT)
860 || (k == ALL && c != 0)
861 || (k == CCL && member(c, (char *) f->re[p[i]].lval.up))
862 || (k == NCCL && !member(c, (char *) f->re[p[i]].lval.up) && c != 0 && c != HAT)) {
863 q = f->re[p[i]].lfollow;
864 for (j = 1; j <= *q; j++) {
865 if (q[j] >= maxsetvec) {
866 maxsetvec *= 4;
867 setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
868 tmpset = (int *) realloc(setvec, maxsetvec * sizeof(int));
869 if (setvec == 0 || tmpset == 0)
870 overflo("cgoto overflow");
871 }
872 if (setvec[q[j]] == 0) {
873 setcnt++;
874 setvec[q[j]] = 1;
875 }
876 }
877 }
878 }
879 }
880 /* determine if setvec is a previous state */
881 tmpset[0] = setcnt;
882 j = 1;
883 for (i = f->accept; i >= 0; i--)
884 if (setvec[i]) {
885 tmpset[j++] = i;
886 }
887 /* tmpset == previous state? */
888 for (i = 1; i <= f->curstat; i++) {
889 p = f->posns[i];
890 if ((k = tmpset[0]) != p[0])
891 goto different;
892 for (j = 1; j <= k; j++)
893 if (tmpset[j] != p[j])
894 goto different;
895 /* setvec is state i */
896 f->gototab[s][c] = i;
897 return i;
898 different:;
899 }
900
901 /* add tmpset to current set of states */
902 if (f->curstat >= NSTATES-1) {
903 f->curstat = 2;
904 f->reset = 1;
905 for (i = 2; i < NSTATES; i++)
906 xfree(f->posns[i]);
907 } else
908 ++(f->curstat);
909 for (i = 0; i < NCHARS; i++)
910 f->gototab[f->curstat][i] = 0;
911 xfree(f->posns[f->curstat]);
912 if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
913 overflo("out of space in cgoto");
914
915 f->posns[f->curstat] = p;
916 f->gototab[s][c] = f->curstat;
917 for (i = 0; i <= setcnt; i++)
918 p[i] = tmpset[i];
919 if (setvec[f->accept])
920 f->out[f->curstat] = 1;
921 else
922 f->out[f->curstat] = 0;
923 return f->curstat;
924}
925
926
927void freefa(fa *f) /* free a finite automaton */
928{
929 int i;
930
931 if (f == NULL)
932 return;
933 for (i = 0; i <= f->curstat; i++)
934 xfree(f->posns[i]);
935 for (i = 0; i <= f->accept; i++) {
936 xfree(f->re[i].lfollow);
937 if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL)
938 xfree((f->re[i].lval.np));
939 }
940 xfree(f->restr);
941 xfree(f);
942}