Deleted Added
full compact
regcomp.c (49094) regcomp.c (62232)
1/*-
2 * Copyright (c) 1992, 1993, 1994 Henry Spencer.
3 * Copyright (c) 1992, 1993, 1994
4 * The Regents of the University of California. All rights reserved.
5 *
6 * This code is derived from software contributed to Berkeley by
7 * Henry Spencer.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the University of
20 * California, Berkeley and its contributors.
21 * 4. Neither the name of the University nor the names of its contributors
22 * may be used to endorse or promote products derived from this software
23 * without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 * @(#)regcomp.c 8.5 (Berkeley) 3/20/94
1/*-
2 * Copyright (c) 1992, 1993, 1994 Henry Spencer.
3 * Copyright (c) 1992, 1993, 1994
4 * The Regents of the University of California. All rights reserved.
5 *
6 * This code is derived from software contributed to Berkeley by
7 * Henry Spencer.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the University of
20 * California, Berkeley and its contributors.
21 * 4. Neither the name of the University nor the names of its contributors
22 * may be used to endorse or promote products derived from this software
23 * without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 * @(#)regcomp.c 8.5 (Berkeley) 3/20/94
38 *
39 * $FreeBSD: head/lib/libc/regex/regcomp.c 62232 2000-06-29 04:48:34Z dcs $
38 */
39
40#if defined(LIBC_SCCS) && !defined(lint)
41static char sccsid[] = "@(#)regcomp.c 8.5 (Berkeley) 3/20/94";
42#endif /* LIBC_SCCS and not lint */
43
44#include <sys/types.h>
45#include <stdio.h>
46#include <string.h>
47#include <ctype.h>
48#include <limits.h>
49#include <stdlib.h>
50#include <regex.h>
51
52#include "collate.h"
53
54#include "utils.h"
55#include "regex2.h"
56
57#include "cclass.h"
58#include "cname.h"
59
60/*
61 * parse structure, passed up and down to avoid global variables and
62 * other clumsinesses
63 */
64struct parse {
65 char *next; /* next character in RE */
66 char *end; /* end of string (-> NUL normally) */
67 int error; /* has an error been seen? */
68 sop *strip; /* malloced strip */
69 sopno ssize; /* malloced strip size (allocated) */
70 sopno slen; /* malloced strip length (used) */
71 int ncsalloc; /* number of csets allocated */
72 struct re_guts *g;
73# define NPAREN 10 /* we need to remember () 1-9 for back refs */
74 sopno pbegin[NPAREN]; /* -> ( ([0] unused) */
75 sopno pend[NPAREN]; /* -> ) ([0] unused) */
76};
77
78/* ========= begin header generated by ./mkh ========= */
79#ifdef __cplusplus
80extern "C" {
81#endif
82
83/* === regcomp.c === */
84static void p_ere __P((struct parse *p, int stop));
85static void p_ere_exp __P((struct parse *p));
86static void p_str __P((struct parse *p));
87static void p_bre __P((struct parse *p, int end1, int end2));
88static int p_simp_re __P((struct parse *p, int starordinary));
89static int p_count __P((struct parse *p));
90static void p_bracket __P((struct parse *p));
91static void p_b_term __P((struct parse *p, cset *cs));
92static void p_b_cclass __P((struct parse *p, cset *cs));
93static void p_b_eclass __P((struct parse *p, cset *cs));
94static char p_b_symbol __P((struct parse *p));
95static char p_b_coll_elem __P((struct parse *p, int endc));
96static char othercase __P((int ch));
97static void bothcases __P((struct parse *p, int ch));
98static void ordinary __P((struct parse *p, int ch));
99static void nonnewline __P((struct parse *p));
100static void repeat __P((struct parse *p, sopno start, int from, int to));
101static int seterr __P((struct parse *p, int e));
102static cset *allocset __P((struct parse *p));
103static void freeset __P((struct parse *p, cset *cs));
104static int freezeset __P((struct parse *p, cset *cs));
105static int firstch __P((struct parse *p, cset *cs));
106static int nch __P((struct parse *p, cset *cs));
107static void mcadd __P((struct parse *p, cset *cs, char *cp));
108#if used
109static void mcsub __P((cset *cs, char *cp));
110static int mcin __P((cset *cs, char *cp));
111static char *mcfind __P((cset *cs, char *cp));
112#endif
113static void mcinvert __P((struct parse *p, cset *cs));
114static void mccase __P((struct parse *p, cset *cs));
115static int isinsets __P((struct re_guts *g, int c));
116static int samesets __P((struct re_guts *g, int c1, int c2));
117static void categorize __P((struct parse *p, struct re_guts *g));
118static sopno dupl __P((struct parse *p, sopno start, sopno finish));
119static void doemit __P((struct parse *p, sop op, size_t opnd));
120static void doinsert __P((struct parse *p, sop op, size_t opnd, sopno pos));
121static void dofwd __P((struct parse *p, sopno pos, sop value));
122static void enlarge __P((struct parse *p, sopno size));
123static void stripsnug __P((struct parse *p, struct re_guts *g));
124static void findmust __P((struct parse *p, struct re_guts *g));
40 */
41
42#if defined(LIBC_SCCS) && !defined(lint)
43static char sccsid[] = "@(#)regcomp.c 8.5 (Berkeley) 3/20/94";
44#endif /* LIBC_SCCS and not lint */
45
46#include <sys/types.h>
47#include <stdio.h>
48#include <string.h>
49#include <ctype.h>
50#include <limits.h>
51#include <stdlib.h>
52#include <regex.h>
53
54#include "collate.h"
55
56#include "utils.h"
57#include "regex2.h"
58
59#include "cclass.h"
60#include "cname.h"
61
62/*
63 * parse structure, passed up and down to avoid global variables and
64 * other clumsinesses
65 */
66struct parse {
67 char *next; /* next character in RE */
68 char *end; /* end of string (-> NUL normally) */
69 int error; /* has an error been seen? */
70 sop *strip; /* malloced strip */
71 sopno ssize; /* malloced strip size (allocated) */
72 sopno slen; /* malloced strip length (used) */
73 int ncsalloc; /* number of csets allocated */
74 struct re_guts *g;
75# define NPAREN 10 /* we need to remember () 1-9 for back refs */
76 sopno pbegin[NPAREN]; /* -> ( ([0] unused) */
77 sopno pend[NPAREN]; /* -> ) ([0] unused) */
78};
79
80/* ========= begin header generated by ./mkh ========= */
81#ifdef __cplusplus
82extern "C" {
83#endif
84
85/* === regcomp.c === */
86static void p_ere __P((struct parse *p, int stop));
87static void p_ere_exp __P((struct parse *p));
88static void p_str __P((struct parse *p));
89static void p_bre __P((struct parse *p, int end1, int end2));
90static int p_simp_re __P((struct parse *p, int starordinary));
91static int p_count __P((struct parse *p));
92static void p_bracket __P((struct parse *p));
93static void p_b_term __P((struct parse *p, cset *cs));
94static void p_b_cclass __P((struct parse *p, cset *cs));
95static void p_b_eclass __P((struct parse *p, cset *cs));
96static char p_b_symbol __P((struct parse *p));
97static char p_b_coll_elem __P((struct parse *p, int endc));
98static char othercase __P((int ch));
99static void bothcases __P((struct parse *p, int ch));
100static void ordinary __P((struct parse *p, int ch));
101static void nonnewline __P((struct parse *p));
102static void repeat __P((struct parse *p, sopno start, int from, int to));
103static int seterr __P((struct parse *p, int e));
104static cset *allocset __P((struct parse *p));
105static void freeset __P((struct parse *p, cset *cs));
106static int freezeset __P((struct parse *p, cset *cs));
107static int firstch __P((struct parse *p, cset *cs));
108static int nch __P((struct parse *p, cset *cs));
109static void mcadd __P((struct parse *p, cset *cs, char *cp));
110#if used
111static void mcsub __P((cset *cs, char *cp));
112static int mcin __P((cset *cs, char *cp));
113static char *mcfind __P((cset *cs, char *cp));
114#endif
115static void mcinvert __P((struct parse *p, cset *cs));
116static void mccase __P((struct parse *p, cset *cs));
117static int isinsets __P((struct re_guts *g, int c));
118static int samesets __P((struct re_guts *g, int c1, int c2));
119static void categorize __P((struct parse *p, struct re_guts *g));
120static sopno dupl __P((struct parse *p, sopno start, sopno finish));
121static void doemit __P((struct parse *p, sop op, size_t opnd));
122static void doinsert __P((struct parse *p, sop op, size_t opnd, sopno pos));
123static void dofwd __P((struct parse *p, sopno pos, sop value));
124static void enlarge __P((struct parse *p, sopno size));
125static void stripsnug __P((struct parse *p, struct re_guts *g));
126static void findmust __P((struct parse *p, struct re_guts *g));
127static void computejumps __P((struct parse *p, struct re_guts *g));
128static void computematchjumps __P((struct parse *p, struct re_guts *g));
125static sopno pluscount __P((struct parse *p, struct re_guts *g));
126
127#ifdef __cplusplus
128}
129#endif
130/* ========= end header generated by ./mkh ========= */
131
132static char nuls[10]; /* place to point scanner in event of error */
133
134/*
135 * macros for use with parse structure
136 * BEWARE: these know that the parse structure is named `p' !!!
137 */
138#define PEEK() (*p->next)
139#define PEEK2() (*(p->next+1))
140#define MORE() (p->next < p->end)
141#define MORE2() (p->next+1 < p->end)
142#define SEE(c) (MORE() && PEEK() == (c))
143#define SEETWO(a, b) (MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b))
144#define EAT(c) ((SEE(c)) ? (NEXT(), 1) : 0)
145#define EATTWO(a, b) ((SEETWO(a, b)) ? (NEXT2(), 1) : 0)
146#define NEXT() (p->next++)
147#define NEXT2() (p->next += 2)
148#define NEXTn(n) (p->next += (n))
149#define GETNEXT() (*p->next++)
150#define SETERROR(e) seterr(p, (e))
151#define REQUIRE(co, e) ((co) || SETERROR(e))
152#define MUSTSEE(c, e) (REQUIRE(MORE() && PEEK() == (c), e))
153#define MUSTEAT(c, e) (REQUIRE(MORE() && GETNEXT() == (c), e))
154#define MUSTNOTSEE(c, e) (REQUIRE(!MORE() || PEEK() != (c), e))
155#define EMIT(op, sopnd) doemit(p, (sop)(op), (size_t)(sopnd))
156#define INSERT(op, pos) doinsert(p, (sop)(op), HERE()-(pos)+1, pos)
157#define AHEAD(pos) dofwd(p, pos, HERE()-(pos))
158#define ASTERN(sop, pos) EMIT(sop, HERE()-pos)
159#define HERE() (p->slen)
160#define THERE() (p->slen - 1)
161#define THERETHERE() (p->slen - 2)
162#define DROP(n) (p->slen -= (n))
163
164#ifndef NDEBUG
165static int never = 0; /* for use in asserts; shuts lint up */
166#else
167#define never 0 /* some <assert.h>s have bugs too */
168#endif
169
129static sopno pluscount __P((struct parse *p, struct re_guts *g));
130
131#ifdef __cplusplus
132}
133#endif
134/* ========= end header generated by ./mkh ========= */
135
136static char nuls[10]; /* place to point scanner in event of error */
137
138/*
139 * macros for use with parse structure
140 * BEWARE: these know that the parse structure is named `p' !!!
141 */
142#define PEEK() (*p->next)
143#define PEEK2() (*(p->next+1))
144#define MORE() (p->next < p->end)
145#define MORE2() (p->next+1 < p->end)
146#define SEE(c) (MORE() && PEEK() == (c))
147#define SEETWO(a, b) (MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b))
148#define EAT(c) ((SEE(c)) ? (NEXT(), 1) : 0)
149#define EATTWO(a, b) ((SEETWO(a, b)) ? (NEXT2(), 1) : 0)
150#define NEXT() (p->next++)
151#define NEXT2() (p->next += 2)
152#define NEXTn(n) (p->next += (n))
153#define GETNEXT() (*p->next++)
154#define SETERROR(e) seterr(p, (e))
155#define REQUIRE(co, e) ((co) || SETERROR(e))
156#define MUSTSEE(c, e) (REQUIRE(MORE() && PEEK() == (c), e))
157#define MUSTEAT(c, e) (REQUIRE(MORE() && GETNEXT() == (c), e))
158#define MUSTNOTSEE(c, e) (REQUIRE(!MORE() || PEEK() != (c), e))
159#define EMIT(op, sopnd) doemit(p, (sop)(op), (size_t)(sopnd))
160#define INSERT(op, pos) doinsert(p, (sop)(op), HERE()-(pos)+1, pos)
161#define AHEAD(pos) dofwd(p, pos, HERE()-(pos))
162#define ASTERN(sop, pos) EMIT(sop, HERE()-pos)
163#define HERE() (p->slen)
164#define THERE() (p->slen - 1)
165#define THERETHERE() (p->slen - 2)
166#define DROP(n) (p->slen -= (n))
167
168#ifndef NDEBUG
169static int never = 0; /* for use in asserts; shuts lint up */
170#else
171#define never 0 /* some <assert.h>s have bugs too */
172#endif
173
174/* Macro used by computejump()/computematchjump() */
175#define MIN(a,b) ((a)<(b)?(a):(b))
176
170/*
171 - regcomp - interface for parser and compilation
172 = extern int regcomp(regex_t *, const char *, int);
173 = #define REG_BASIC 0000
174 = #define REG_EXTENDED 0001
175 = #define REG_ICASE 0002
176 = #define REG_NOSUB 0004
177 = #define REG_NEWLINE 0010
178 = #define REG_NOSPEC 0020
179 = #define REG_PEND 0040
180 = #define REG_DUMP 0200
181 */
182int /* 0 success, otherwise REG_something */
183regcomp(preg, pattern, cflags)
184regex_t *preg;
185const char *pattern;
186int cflags;
187{
188 struct parse pa;
189 register struct re_guts *g;
190 register struct parse *p = &pa;
191 register int i;
192 register size_t len;
193#ifdef REDEBUG
194# define GOODFLAGS(f) (f)
195#else
196# define GOODFLAGS(f) ((f)&~REG_DUMP)
197#endif
198
199 cflags = GOODFLAGS(cflags);
200 if ((cflags&REG_EXTENDED) && (cflags&REG_NOSPEC))
201 return(REG_INVARG);
202
203 if (cflags&REG_PEND) {
204 if (preg->re_endp < pattern)
205 return(REG_INVARG);
206 len = preg->re_endp - pattern;
207 } else
208 len = strlen((char *)pattern);
209
210 /* do the mallocs early so failure handling is easy */
211 g = (struct re_guts *)malloc(sizeof(struct re_guts) +
212 (NC-1)*sizeof(cat_t));
213 if (g == NULL)
214 return(REG_ESPACE);
215 p->ssize = len/(size_t)2*(size_t)3 + (size_t)1; /* ugh */
216 p->strip = (sop *)malloc(p->ssize * sizeof(sop));
217 p->slen = 0;
218 if (p->strip == NULL) {
219 free((char *)g);
220 return(REG_ESPACE);
221 }
222
223 /* set things up */
224 p->g = g;
225 p->next = (char *)pattern; /* convenience; we do not modify it */
226 p->end = p->next + len;
227 p->error = 0;
228 p->ncsalloc = 0;
229 for (i = 0; i < NPAREN; i++) {
230 p->pbegin[i] = 0;
231 p->pend[i] = 0;
232 }
233 g->csetsize = NC;
234 g->sets = NULL;
235 g->setbits = NULL;
236 g->ncsets = 0;
237 g->cflags = cflags;
238 g->iflags = 0;
239 g->nbol = 0;
240 g->neol = 0;
241 g->must = NULL;
242 g->mlen = 0;
243 g->nsub = 0;
244 g->ncategories = 1; /* category 0 is "everything else" */
245 g->categories = &g->catspace[-(CHAR_MIN)];
246 (void) memset((char *)g->catspace, 0, NC*sizeof(cat_t));
247 g->backrefs = 0;
248
249 /* do it */
250 EMIT(OEND, 0);
251 g->firststate = THERE();
252 if (cflags&REG_EXTENDED)
253 p_ere(p, OUT);
254 else if (cflags&REG_NOSPEC)
255 p_str(p);
256 else
257 p_bre(p, OUT, OUT);
258 EMIT(OEND, 0);
259 g->laststate = THERE();
260
261 /* tidy up loose ends and fill things in */
262 categorize(p, g);
263 stripsnug(p, g);
264 findmust(p, g);
177/*
178 - regcomp - interface for parser and compilation
179 = extern int regcomp(regex_t *, const char *, int);
180 = #define REG_BASIC 0000
181 = #define REG_EXTENDED 0001
182 = #define REG_ICASE 0002
183 = #define REG_NOSUB 0004
184 = #define REG_NEWLINE 0010
185 = #define REG_NOSPEC 0020
186 = #define REG_PEND 0040
187 = #define REG_DUMP 0200
188 */
189int /* 0 success, otherwise REG_something */
190regcomp(preg, pattern, cflags)
191regex_t *preg;
192const char *pattern;
193int cflags;
194{
195 struct parse pa;
196 register struct re_guts *g;
197 register struct parse *p = &pa;
198 register int i;
199 register size_t len;
200#ifdef REDEBUG
201# define GOODFLAGS(f) (f)
202#else
203# define GOODFLAGS(f) ((f)&~REG_DUMP)
204#endif
205
206 cflags = GOODFLAGS(cflags);
207 if ((cflags&REG_EXTENDED) && (cflags&REG_NOSPEC))
208 return(REG_INVARG);
209
210 if (cflags&REG_PEND) {
211 if (preg->re_endp < pattern)
212 return(REG_INVARG);
213 len = preg->re_endp - pattern;
214 } else
215 len = strlen((char *)pattern);
216
217 /* do the mallocs early so failure handling is easy */
218 g = (struct re_guts *)malloc(sizeof(struct re_guts) +
219 (NC-1)*sizeof(cat_t));
220 if (g == NULL)
221 return(REG_ESPACE);
222 p->ssize = len/(size_t)2*(size_t)3 + (size_t)1; /* ugh */
223 p->strip = (sop *)malloc(p->ssize * sizeof(sop));
224 p->slen = 0;
225 if (p->strip == NULL) {
226 free((char *)g);
227 return(REG_ESPACE);
228 }
229
230 /* set things up */
231 p->g = g;
232 p->next = (char *)pattern; /* convenience; we do not modify it */
233 p->end = p->next + len;
234 p->error = 0;
235 p->ncsalloc = 0;
236 for (i = 0; i < NPAREN; i++) {
237 p->pbegin[i] = 0;
238 p->pend[i] = 0;
239 }
240 g->csetsize = NC;
241 g->sets = NULL;
242 g->setbits = NULL;
243 g->ncsets = 0;
244 g->cflags = cflags;
245 g->iflags = 0;
246 g->nbol = 0;
247 g->neol = 0;
248 g->must = NULL;
249 g->mlen = 0;
250 g->nsub = 0;
251 g->ncategories = 1; /* category 0 is "everything else" */
252 g->categories = &g->catspace[-(CHAR_MIN)];
253 (void) memset((char *)g->catspace, 0, NC*sizeof(cat_t));
254 g->backrefs = 0;
255
256 /* do it */
257 EMIT(OEND, 0);
258 g->firststate = THERE();
259 if (cflags&REG_EXTENDED)
260 p_ere(p, OUT);
261 else if (cflags&REG_NOSPEC)
262 p_str(p);
263 else
264 p_bre(p, OUT, OUT);
265 EMIT(OEND, 0);
266 g->laststate = THERE();
267
268 /* tidy up loose ends and fill things in */
269 categorize(p, g);
270 stripsnug(p, g);
271 findmust(p, g);
272 /* only use Boyer-Moore algorithm if the pattern is bigger
273 * than three characters
274 */
275 if(g->mlen > 3) {
276 computejumps(p, g);
277 computematchjumps(p, g);
278 if(g->matchjump == NULL) {
279 free(g->charjump);
280 g->charjump = NULL;
281 }
282 }
265 g->nplus = pluscount(p, g);
266 g->magic = MAGIC2;
267 preg->re_nsub = g->nsub;
268 preg->re_g = g;
269 preg->re_magic = MAGIC1;
270#ifndef REDEBUG
271 /* not debugging, so can't rely on the assert() in regexec() */
272 if (g->iflags&BAD)
273 SETERROR(REG_ASSERT);
274#endif
275
276 /* win or lose, we're done */
277 if (p->error != 0) /* lose */
278 regfree(preg);
279 return(p->error);
280}
281
282/*
283 - p_ere - ERE parser top level, concatenation and alternation
284 == static void p_ere(register struct parse *p, int stop);
285 */
286static void
287p_ere(p, stop)
288register struct parse *p;
289int stop; /* character this ERE should end at */
290{
291 register char c;
292 register sopno prevback;
293 register sopno prevfwd;
294 register sopno conc;
295 register int first = 1; /* is this the first alternative? */
296
297 for (;;) {
298 /* do a bunch of concatenated expressions */
299 conc = HERE();
300 while (MORE() && (c = PEEK()) != '|' && c != stop)
301 p_ere_exp(p);
302 (void)REQUIRE(HERE() != conc, REG_EMPTY); /* require nonempty */
303
304 if (!EAT('|'))
305 break; /* NOTE BREAK OUT */
306
307 if (first) {
308 INSERT(OCH_, conc); /* offset is wrong */
309 prevfwd = conc;
310 prevback = conc;
311 first = 0;
312 }
313 ASTERN(OOR1, prevback);
314 prevback = THERE();
315 AHEAD(prevfwd); /* fix previous offset */
316 prevfwd = HERE();
317 EMIT(OOR2, 0); /* offset is very wrong */
318 }
319
320 if (!first) { /* tail-end fixups */
321 AHEAD(prevfwd);
322 ASTERN(O_CH, prevback);
323 }
324
325 assert(!MORE() || SEE(stop));
326}
327
328/*
329 - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op
330 == static void p_ere_exp(register struct parse *p);
331 */
332static void
333p_ere_exp(p)
334register struct parse *p;
335{
336 register char c;
337 register sopno pos;
338 register int count;
339 register int count2;
340 register sopno subno;
341 int wascaret = 0;
342
343 assert(MORE()); /* caller should have ensured this */
344 c = GETNEXT();
345
346 pos = HERE();
347 switch (c) {
348 case '(':
349 (void)REQUIRE(MORE(), REG_EPAREN);
350 p->g->nsub++;
351 subno = p->g->nsub;
352 if (subno < NPAREN)
353 p->pbegin[subno] = HERE();
354 EMIT(OLPAREN, subno);
355 if (!SEE(')'))
356 p_ere(p, ')');
357 if (subno < NPAREN) {
358 p->pend[subno] = HERE();
359 assert(p->pend[subno] != 0);
360 }
361 EMIT(ORPAREN, subno);
362 (void)MUSTEAT(')', REG_EPAREN);
363 break;
364#ifndef POSIX_MISTAKE
365 case ')': /* happens only if no current unmatched ( */
366 /*
367 * You may ask, why the ifndef? Because I didn't notice
368 * this until slightly too late for 1003.2, and none of the
369 * other 1003.2 regular-expression reviewers noticed it at
370 * all. So an unmatched ) is legal POSIX, at least until
371 * we can get it fixed.
372 */
373 SETERROR(REG_EPAREN);
374 break;
375#endif
376 case '^':
377 EMIT(OBOL, 0);
378 p->g->iflags |= USEBOL;
379 p->g->nbol++;
380 wascaret = 1;
381 break;
382 case '$':
383 EMIT(OEOL, 0);
384 p->g->iflags |= USEEOL;
385 p->g->neol++;
386 break;
387 case '|':
388 SETERROR(REG_EMPTY);
389 break;
390 case '*':
391 case '+':
392 case '?':
393 SETERROR(REG_BADRPT);
394 break;
395 case '.':
396 if (p->g->cflags&REG_NEWLINE)
397 nonnewline(p);
398 else
399 EMIT(OANY, 0);
400 break;
401 case '[':
402 p_bracket(p);
403 break;
404 case '\\':
405 (void)REQUIRE(MORE(), REG_EESCAPE);
406 c = GETNEXT();
407 ordinary(p, c);
408 break;
409 case '{': /* okay as ordinary except if digit follows */
410 (void)REQUIRE(!MORE() || !isdigit((uch)PEEK()), REG_BADRPT);
411 /* FALLTHROUGH */
412 default:
413 ordinary(p, c);
414 break;
415 }
416
417 if (!MORE())
418 return;
419 c = PEEK();
420 /* we call { a repetition if followed by a digit */
421 if (!( c == '*' || c == '+' || c == '?' ||
422 (c == '{' && MORE2() && isdigit((uch)PEEK2())) ))
423 return; /* no repetition, we're done */
424 NEXT();
425
426 (void)REQUIRE(!wascaret, REG_BADRPT);
427 switch (c) {
428 case '*': /* implemented as +? */
429 /* this case does not require the (y|) trick, noKLUDGE */
430 INSERT(OPLUS_, pos);
431 ASTERN(O_PLUS, pos);
432 INSERT(OQUEST_, pos);
433 ASTERN(O_QUEST, pos);
434 break;
435 case '+':
436 INSERT(OPLUS_, pos);
437 ASTERN(O_PLUS, pos);
438 break;
439 case '?':
440 /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
441 INSERT(OCH_, pos); /* offset slightly wrong */
442 ASTERN(OOR1, pos); /* this one's right */
443 AHEAD(pos); /* fix the OCH_ */
444 EMIT(OOR2, 0); /* offset very wrong... */
445 AHEAD(THERE()); /* ...so fix it */
446 ASTERN(O_CH, THERETHERE());
447 break;
448 case '{':
449 count = p_count(p);
450 if (EAT(',')) {
451 if (isdigit((uch)PEEK())) {
452 count2 = p_count(p);
453 (void)REQUIRE(count <= count2, REG_BADBR);
454 } else /* single number with comma */
455 count2 = INFINITY;
456 } else /* just a single number */
457 count2 = count;
458 repeat(p, pos, count, count2);
459 if (!EAT('}')) { /* error heuristics */
460 while (MORE() && PEEK() != '}')
461 NEXT();
462 (void)REQUIRE(MORE(), REG_EBRACE);
463 SETERROR(REG_BADBR);
464 }
465 break;
466 }
467
468 if (!MORE())
469 return;
470 c = PEEK();
471 if (!( c == '*' || c == '+' || c == '?' ||
472 (c == '{' && MORE2() && isdigit((uch)PEEK2())) ) )
473 return;
474 SETERROR(REG_BADRPT);
475}
476
477/*
478 - p_str - string (no metacharacters) "parser"
479 == static void p_str(register struct parse *p);
480 */
481static void
482p_str(p)
483register struct parse *p;
484{
485 (void)REQUIRE(MORE(), REG_EMPTY);
486 while (MORE())
487 ordinary(p, GETNEXT());
488}
489
490/*
491 - p_bre - BRE parser top level, anchoring and concatenation
492 == static void p_bre(register struct parse *p, register int end1, \
493 == register int end2);
494 * Giving end1 as OUT essentially eliminates the end1/end2 check.
495 *
496 * This implementation is a bit of a kludge, in that a trailing $ is first
497 * taken as an ordinary character and then revised to be an anchor. The
498 * only undesirable side effect is that '$' gets included as a character
499 * category in such cases. This is fairly harmless; not worth fixing.
500 * The amount of lookahead needed to avoid this kludge is excessive.
501 */
502static void
503p_bre(p, end1, end2)
504register struct parse *p;
505register int end1; /* first terminating character */
506register int end2; /* second terminating character */
507{
508 register sopno start = HERE();
509 register int first = 1; /* first subexpression? */
510 register int wasdollar = 0;
511
512 if (EAT('^')) {
513 EMIT(OBOL, 0);
514 p->g->iflags |= USEBOL;
515 p->g->nbol++;
516 }
517 while (MORE() && !SEETWO(end1, end2)) {
518 wasdollar = p_simp_re(p, first);
519 first = 0;
520 }
521 if (wasdollar) { /* oops, that was a trailing anchor */
522 DROP(1);
523 EMIT(OEOL, 0);
524 p->g->iflags |= USEEOL;
525 p->g->neol++;
526 }
527
528 (void)REQUIRE(HERE() != start, REG_EMPTY); /* require nonempty */
529}
530
531/*
532 - p_simp_re - parse a simple RE, an atom possibly followed by a repetition
533 == static int p_simp_re(register struct parse *p, int starordinary);
534 */
535static int /* was the simple RE an unbackslashed $? */
536p_simp_re(p, starordinary)
537register struct parse *p;
538int starordinary; /* is a leading * an ordinary character? */
539{
540 register int c;
541 register int count;
542 register int count2;
543 register sopno pos;
544 register int i;
545 register sopno subno;
546# define BACKSL (1<<CHAR_BIT)
547
548 pos = HERE(); /* repetion op, if any, covers from here */
549
550 assert(MORE()); /* caller should have ensured this */
551 c = GETNEXT();
552 if (c == '\\') {
553 (void)REQUIRE(MORE(), REG_EESCAPE);
554 c = BACKSL | GETNEXT();
555 }
556 switch (c) {
557 case '.':
558 if (p->g->cflags&REG_NEWLINE)
559 nonnewline(p);
560 else
561 EMIT(OANY, 0);
562 break;
563 case '[':
564 p_bracket(p);
565 break;
566 case BACKSL|'{':
567 SETERROR(REG_BADRPT);
568 break;
569 case BACKSL|'(':
570 p->g->nsub++;
571 subno = p->g->nsub;
572 if (subno < NPAREN)
573 p->pbegin[subno] = HERE();
574 EMIT(OLPAREN, subno);
575 /* the MORE here is an error heuristic */
576 if (MORE() && !SEETWO('\\', ')'))
577 p_bre(p, '\\', ')');
578 if (subno < NPAREN) {
579 p->pend[subno] = HERE();
580 assert(p->pend[subno] != 0);
581 }
582 EMIT(ORPAREN, subno);
583 (void)REQUIRE(EATTWO('\\', ')'), REG_EPAREN);
584 break;
585 case BACKSL|')': /* should not get here -- must be user */
586 case BACKSL|'}':
587 SETERROR(REG_EPAREN);
588 break;
589 case BACKSL|'1':
590 case BACKSL|'2':
591 case BACKSL|'3':
592 case BACKSL|'4':
593 case BACKSL|'5':
594 case BACKSL|'6':
595 case BACKSL|'7':
596 case BACKSL|'8':
597 case BACKSL|'9':
598 i = (c&~BACKSL) - '0';
599 assert(i < NPAREN);
600 if (p->pend[i] != 0) {
601 assert(i <= p->g->nsub);
602 EMIT(OBACK_, i);
603 assert(p->pbegin[i] != 0);
604 assert(OP(p->strip[p->pbegin[i]]) == OLPAREN);
605 assert(OP(p->strip[p->pend[i]]) == ORPAREN);
606 (void) dupl(p, p->pbegin[i]+1, p->pend[i]);
607 EMIT(O_BACK, i);
608 } else
609 SETERROR(REG_ESUBREG);
610 p->g->backrefs = 1;
611 break;
612 case '*':
613 (void)REQUIRE(starordinary, REG_BADRPT);
614 /* FALLTHROUGH */
615 default:
616 ordinary(p, (char)c);
617 break;
618 }
619
620 if (EAT('*')) { /* implemented as +? */
621 /* this case does not require the (y|) trick, noKLUDGE */
622 INSERT(OPLUS_, pos);
623 ASTERN(O_PLUS, pos);
624 INSERT(OQUEST_, pos);
625 ASTERN(O_QUEST, pos);
626 } else if (EATTWO('\\', '{')) {
627 count = p_count(p);
628 if (EAT(',')) {
629 if (MORE() && isdigit((uch)PEEK())) {
630 count2 = p_count(p);
631 (void)REQUIRE(count <= count2, REG_BADBR);
632 } else /* single number with comma */
633 count2 = INFINITY;
634 } else /* just a single number */
635 count2 = count;
636 repeat(p, pos, count, count2);
637 if (!EATTWO('\\', '}')) { /* error heuristics */
638 while (MORE() && !SEETWO('\\', '}'))
639 NEXT();
640 (void)REQUIRE(MORE(), REG_EBRACE);
641 SETERROR(REG_BADBR);
642 }
643 } else if (c == '$') /* $ (but not \$) ends it */
644 return(1);
645
646 return(0);
647}
648
649/*
650 - p_count - parse a repetition count
651 == static int p_count(register struct parse *p);
652 */
653static int /* the value */
654p_count(p)
655register struct parse *p;
656{
657 register int count = 0;
658 register int ndigits = 0;
659
660 while (MORE() && isdigit((uch)PEEK()) && count <= DUPMAX) {
661 count = count*10 + (GETNEXT() - '0');
662 ndigits++;
663 }
664
665 (void)REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR);
666 return(count);
667}
668
669/*
670 - p_bracket - parse a bracketed character list
671 == static void p_bracket(register struct parse *p);
672 *
673 * Note a significant property of this code: if the allocset() did SETERROR,
674 * no set operations are done.
675 */
676static void
677p_bracket(p)
678register struct parse *p;
679{
680 register cset *cs = allocset(p);
681 register int invert = 0;
682
683 /* Dept of Truly Sickening Special-Case Kludges */
684 if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) {
685 EMIT(OBOW, 0);
686 NEXTn(6);
687 return;
688 }
689 if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) {
690 EMIT(OEOW, 0);
691 NEXTn(6);
692 return;
693 }
694
695 if (EAT('^'))
696 invert++; /* make note to invert set at end */
697 if (EAT(']'))
698 CHadd(cs, ']');
699 else if (EAT('-'))
700 CHadd(cs, '-');
701 while (MORE() && PEEK() != ']' && !SEETWO('-', ']'))
702 p_b_term(p, cs);
703 if (EAT('-'))
704 CHadd(cs, '-');
705 (void)MUSTEAT(']', REG_EBRACK);
706
707 if (p->error != 0) /* don't mess things up further */
708 return;
709
710 if (p->g->cflags&REG_ICASE) {
711 register int i;
712 register int ci;
713
714 for (i = p->g->csetsize - 1; i >= 0; i--)
715 if (CHIN(cs, i) && isalpha(i)) {
716 ci = othercase(i);
717 if (ci != i)
718 CHadd(cs, ci);
719 }
720 if (cs->multis != NULL)
721 mccase(p, cs);
722 }
723 if (invert) {
724 register int i;
725
726 for (i = p->g->csetsize - 1; i >= 0; i--)
727 if (CHIN(cs, i))
728 CHsub(cs, i);
729 else
730 CHadd(cs, i);
731 if (p->g->cflags&REG_NEWLINE)
732 CHsub(cs, '\n');
733 if (cs->multis != NULL)
734 mcinvert(p, cs);
735 }
736
737 assert(cs->multis == NULL); /* xxx */
738
739 if (nch(p, cs) == 1) { /* optimize singleton sets */
740 ordinary(p, firstch(p, cs));
741 freeset(p, cs);
742 } else
743 EMIT(OANYOF, freezeset(p, cs));
744}
745
746/*
747 - p_b_term - parse one term of a bracketed character list
748 == static void p_b_term(register struct parse *p, register cset *cs);
749 */
750static void
751p_b_term(p, cs)
752register struct parse *p;
753register cset *cs;
754{
755 register char c;
756 register char start, finish;
757 register int i;
758
759 /* classify what we've got */
760 switch ((MORE()) ? PEEK() : '\0') {
761 case '[':
762 c = (MORE2()) ? PEEK2() : '\0';
763 break;
764 case '-':
765 SETERROR(REG_ERANGE);
766 return; /* NOTE RETURN */
767 break;
768 default:
769 c = '\0';
770 break;
771 }
772
773 switch (c) {
774 case ':': /* character class */
775 NEXT2();
776 (void)REQUIRE(MORE(), REG_EBRACK);
777 c = PEEK();
778 (void)REQUIRE(c != '-' && c != ']', REG_ECTYPE);
779 p_b_cclass(p, cs);
780 (void)REQUIRE(MORE(), REG_EBRACK);
781 (void)REQUIRE(EATTWO(':', ']'), REG_ECTYPE);
782 break;
783 case '=': /* equivalence class */
784 NEXT2();
785 (void)REQUIRE(MORE(), REG_EBRACK);
786 c = PEEK();
787 (void)REQUIRE(c != '-' && c != ']', REG_ECOLLATE);
788 p_b_eclass(p, cs);
789 (void)REQUIRE(MORE(), REG_EBRACK);
790 (void)REQUIRE(EATTWO('=', ']'), REG_ECOLLATE);
791 break;
792 default: /* symbol, ordinary character, or range */
793/* xxx revision needed for multichar stuff */
794 start = p_b_symbol(p);
795 if (SEE('-') && MORE2() && PEEK2() != ']') {
796 /* range */
797 NEXT();
798 if (EAT('-'))
799 finish = '-';
800 else
801 finish = p_b_symbol(p);
802 } else
803 finish = start;
804 if (start == finish)
805 CHadd(cs, start);
806 else {
807 if (__collate_load_error) {
808 (void)REQUIRE((uch)start <= (uch)finish, REG_ERANGE);
809 for (i = (uch)start; i <= (uch)finish; i++)
810 CHadd(cs, i);
811 } else {
812 (void)REQUIRE(__collate_range_cmp(start, finish) <= 0, REG_ERANGE);
813 for (i = CHAR_MIN; i <= CHAR_MAX; i++) {
814 if ( __collate_range_cmp(start, i) <= 0
815 && __collate_range_cmp(i, finish) <= 0
816 )
817 CHadd(cs, i);
818 }
819 }
820 }
821 break;
822 }
823}
824
825/*
826 - p_b_cclass - parse a character-class name and deal with it
827 == static void p_b_cclass(register struct parse *p, register cset *cs);
828 */
829static void
830p_b_cclass(p, cs)
831register struct parse *p;
832register cset *cs;
833{
834 register int c;
835 register char *sp = p->next;
836 register struct cclass *cp;
837 register size_t len;
838
839 while (MORE() && isalpha((uch)PEEK()))
840 NEXT();
841 len = p->next - sp;
842 for (cp = cclasses; cp->name != NULL; cp++)
843 if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
844 break;
845 if (cp->name == NULL) {
846 /* oops, didn't find it */
847 SETERROR(REG_ECTYPE);
848 return;
849 }
850
851 switch (cp->fidx) {
852 case CALNUM:
853 for (c = CHAR_MIN; c <= CHAR_MAX; c++)
854 if (isalnum((uch)c))
855 CHadd(cs, c);
856 break;
857 case CALPHA:
858 for (c = CHAR_MIN; c <= CHAR_MAX; c++)
859 if (isalpha((uch)c))
860 CHadd(cs, c);
861 break;
862 case CBLANK:
863 for (c = CHAR_MIN; c <= CHAR_MAX; c++)
864 if (isblank((uch)c))
865 CHadd(cs, c);
866 break;
867 case CCNTRL:
868 for (c = CHAR_MIN; c <= CHAR_MAX; c++)
869 if (iscntrl((uch)c))
870 CHadd(cs, c);
871 break;
872 case CDIGIT:
873 for (c = CHAR_MIN; c <= CHAR_MAX; c++)
874 if (isdigit((uch)c))
875 CHadd(cs, c);
876 break;
877 case CGRAPH:
878 for (c = CHAR_MIN; c <= CHAR_MAX; c++)
879 if (isgraph((uch)c))
880 CHadd(cs, c);
881 break;
882 case CLOWER:
883 for (c = CHAR_MIN; c <= CHAR_MAX; c++)
884 if (islower((uch)c))
885 CHadd(cs, c);
886 break;
887 case CPRINT:
888 for (c = CHAR_MIN; c <= CHAR_MAX; c++)
889 if (isprint((uch)c))
890 CHadd(cs, c);
891 break;
892 case CPUNCT:
893 for (c = CHAR_MIN; c <= CHAR_MAX; c++)
894 if (ispunct((uch)c))
895 CHadd(cs, c);
896 break;
897 case CSPACE:
898 for (c = CHAR_MIN; c <= CHAR_MAX; c++)
899 if (isspace((uch)c))
900 CHadd(cs, c);
901 break;
902 case CUPPER:
903 for (c = CHAR_MIN; c <= CHAR_MAX; c++)
904 if (isupper((uch)c))
905 CHadd(cs, c);
906 break;
907 case CXDIGIT:
908 for (c = CHAR_MIN; c <= CHAR_MAX; c++)
909 if (isxdigit((uch)c))
910 CHadd(cs, c);
911 break;
912 }
913#if 0
914 for (u = cp->multis; *u != '\0'; u += strlen(u) + 1)
915 MCadd(p, cs, u);
916#endif
917}
918
919/*
920 - p_b_eclass - parse an equivalence-class name and deal with it
921 == static void p_b_eclass(register struct parse *p, register cset *cs);
922 *
923 * This implementation is incomplete. xxx
924 */
925static void
926p_b_eclass(p, cs)
927register struct parse *p;
928register cset *cs;
929{
930 register char c;
931
932 c = p_b_coll_elem(p, '=');
933 CHadd(cs, c);
934}
935
936/*
937 - p_b_symbol - parse a character or [..]ed multicharacter collating symbol
938 == static char p_b_symbol(register struct parse *p);
939 */
940static char /* value of symbol */
941p_b_symbol(p)
942register struct parse *p;
943{
944 register char value;
945
946 (void)REQUIRE(MORE(), REG_EBRACK);
947 if (!EATTWO('[', '.'))
948 return(GETNEXT());
949
950 /* collating symbol */
951 value = p_b_coll_elem(p, '.');
952 (void)REQUIRE(EATTWO('.', ']'), REG_ECOLLATE);
953 return(value);
954}
955
956/*
957 - p_b_coll_elem - parse a collating-element name and look it up
958 == static char p_b_coll_elem(register struct parse *p, int endc);
959 */
960static char /* value of collating element */
961p_b_coll_elem(p, endc)
962register struct parse *p;
963int endc; /* name ended by endc,']' */
964{
965 register char *sp = p->next;
966 register struct cname *cp;
967 register int len;
968
969 while (MORE() && !SEETWO(endc, ']'))
970 NEXT();
971 if (!MORE()) {
972 SETERROR(REG_EBRACK);
973 return(0);
974 }
975 len = p->next - sp;
976 for (cp = cnames; cp->name != NULL; cp++)
977 if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
978 return(cp->code); /* known name */
979 if (len == 1)
980 return(*sp); /* single character */
981 SETERROR(REG_ECOLLATE); /* neither */
982 return(0);
983}
984
985/*
986 - othercase - return the case counterpart of an alphabetic
987 == static char othercase(int ch);
988 */
989static char /* if no counterpart, return ch */
990othercase(ch)
991int ch;
992{
993 ch = (uch)ch;
994 assert(isalpha(ch));
995 if (isupper(ch))
996 return(tolower(ch));
997 else if (islower(ch))
998 return(toupper(ch));
999 else /* peculiar, but could happen */
1000 return(ch);
1001}
1002
1003/*
1004 - bothcases - emit a dualcase version of a two-case character
1005 == static void bothcases(register struct parse *p, int ch);
1006 *
1007 * Boy, is this implementation ever a kludge...
1008 */
1009static void
1010bothcases(p, ch)
1011register struct parse *p;
1012int ch;
1013{
1014 register char *oldnext = p->next;
1015 register char *oldend = p->end;
1016 char bracket[3];
1017
1018 ch = (uch)ch;
1019 assert(othercase(ch) != ch); /* p_bracket() would recurse */
1020 p->next = bracket;
1021 p->end = bracket+2;
1022 bracket[0] = ch;
1023 bracket[1] = ']';
1024 bracket[2] = '\0';
1025 p_bracket(p);
1026 assert(p->next == bracket+2);
1027 p->next = oldnext;
1028 p->end = oldend;
1029}
1030
1031/*
1032 - ordinary - emit an ordinary character
1033 == static void ordinary(register struct parse *p, register int ch);
1034 */
1035static void
1036ordinary(p, ch)
1037register struct parse *p;
1038register int ch;
1039{
1040 register cat_t *cap = p->g->categories;
1041
1042 if ((p->g->cflags&REG_ICASE) && isalpha((uch)ch) && othercase(ch) != ch)
1043 bothcases(p, ch);
1044 else {
1045 EMIT(OCHAR, (uch)ch);
1046 if (cap[ch] == 0)
1047 cap[ch] = p->g->ncategories++;
1048 }
1049}
1050
1051/*
1052 - nonnewline - emit REG_NEWLINE version of OANY
1053 == static void nonnewline(register struct parse *p);
1054 *
1055 * Boy, is this implementation ever a kludge...
1056 */
1057static void
1058nonnewline(p)
1059register struct parse *p;
1060{
1061 register char *oldnext = p->next;
1062 register char *oldend = p->end;
1063 char bracket[4];
1064
1065 p->next = bracket;
1066 p->end = bracket+3;
1067 bracket[0] = '^';
1068 bracket[1] = '\n';
1069 bracket[2] = ']';
1070 bracket[3] = '\0';
1071 p_bracket(p);
1072 assert(p->next == bracket+3);
1073 p->next = oldnext;
1074 p->end = oldend;
1075}
1076
1077/*
1078 - repeat - generate code for a bounded repetition, recursively if needed
1079 == static void repeat(register struct parse *p, sopno start, int from, int to);
1080 */
1081static void
1082repeat(p, start, from, to)
1083register struct parse *p;
1084sopno start; /* operand from here to end of strip */
1085int from; /* repeated from this number */
1086int to; /* to this number of times (maybe INFINITY) */
1087{
1088 register sopno finish = HERE();
1089# define N 2
1090# define INF 3
1091# define REP(f, t) ((f)*8 + (t))
1092# define MAP(n) (((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N)
1093 register sopno copy;
1094
1095 if (p->error != 0) /* head off possible runaway recursion */
1096 return;
1097
1098 assert(from <= to);
1099
1100 switch (REP(MAP(from), MAP(to))) {
1101 case REP(0, 0): /* must be user doing this */
1102 DROP(finish-start); /* drop the operand */
1103 break;
1104 case REP(0, 1): /* as x{1,1}? */
1105 case REP(0, N): /* as x{1,n}? */
1106 case REP(0, INF): /* as x{1,}? */
1107 /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
1108 INSERT(OCH_, start); /* offset is wrong... */
1109 repeat(p, start+1, 1, to);
1110 ASTERN(OOR1, start);
1111 AHEAD(start); /* ... fix it */
1112 EMIT(OOR2, 0);
1113 AHEAD(THERE());
1114 ASTERN(O_CH, THERETHERE());
1115 break;
1116 case REP(1, 1): /* trivial case */
1117 /* done */
1118 break;
1119 case REP(1, N): /* as x?x{1,n-1} */
1120 /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
1121 INSERT(OCH_, start);
1122 ASTERN(OOR1, start);
1123 AHEAD(start);
1124 EMIT(OOR2, 0); /* offset very wrong... */
1125 AHEAD(THERE()); /* ...so fix it */
1126 ASTERN(O_CH, THERETHERE());
1127 copy = dupl(p, start+1, finish+1);
1128 assert(copy == finish+4);
1129 repeat(p, copy, 1, to-1);
1130 break;
1131 case REP(1, INF): /* as x+ */
1132 INSERT(OPLUS_, start);
1133 ASTERN(O_PLUS, start);
1134 break;
1135 case REP(N, N): /* as xx{m-1,n-1} */
1136 copy = dupl(p, start, finish);
1137 repeat(p, copy, from-1, to-1);
1138 break;
1139 case REP(N, INF): /* as xx{n-1,INF} */
1140 copy = dupl(p, start, finish);
1141 repeat(p, copy, from-1, to);
1142 break;
1143 default: /* "can't happen" */
1144 SETERROR(REG_ASSERT); /* just in case */
1145 break;
1146 }
1147}
1148
1149/*
1150 - seterr - set an error condition
1151 == static int seterr(register struct parse *p, int e);
1152 */
1153static int /* useless but makes type checking happy */
1154seterr(p, e)
1155register struct parse *p;
1156int e;
1157{
1158 if (p->error == 0) /* keep earliest error condition */
1159 p->error = e;
1160 p->next = nuls; /* try to bring things to a halt */
1161 p->end = nuls;
1162 return(0); /* make the return value well-defined */
1163}
1164
1165/*
1166 - allocset - allocate a set of characters for []
1167 == static cset *allocset(register struct parse *p);
1168 */
1169static cset *
1170allocset(p)
1171register struct parse *p;
1172{
1173 register int no = p->g->ncsets++;
1174 register size_t nc;
1175 register size_t nbytes;
1176 register cset *cs;
1177 register size_t css = (size_t)p->g->csetsize;
1178 register int i;
1179
1180 if (no >= p->ncsalloc) { /* need another column of space */
1181 p->ncsalloc += CHAR_BIT;
1182 nc = p->ncsalloc;
1183 assert(nc % CHAR_BIT == 0);
1184 nbytes = nc / CHAR_BIT * css;
1185 if (p->g->sets == NULL)
1186 p->g->sets = (cset *)malloc(nc * sizeof(cset));
1187 else
1188 p->g->sets = (cset *)reallocf((char *)p->g->sets,
1189 nc * sizeof(cset));
1190 if (p->g->setbits == NULL)
1191 p->g->setbits = (uch *)malloc(nbytes);
1192 else {
1193 p->g->setbits = (uch *)reallocf((char *)p->g->setbits,
1194 nbytes);
1195 /* xxx this isn't right if setbits is now NULL */
1196 for (i = 0; i < no; i++)
1197 p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT);
1198 }
1199 if (p->g->sets != NULL && p->g->setbits != NULL)
1200 (void) memset((char *)p->g->setbits + (nbytes - css),
1201 0, css);
1202 else {
1203 no = 0;
1204 SETERROR(REG_ESPACE);
1205 /* caller's responsibility not to do set ops */
1206 }
1207 }
1208
1209 assert(p->g->sets != NULL); /* xxx */
1210 cs = &p->g->sets[no];
1211 cs->ptr = p->g->setbits + css*((no)/CHAR_BIT);
1212 cs->mask = 1 << ((no) % CHAR_BIT);
1213 cs->hash = 0;
1214 cs->smultis = 0;
1215 cs->multis = NULL;
1216
1217 return(cs);
1218}
1219
1220/*
1221 - freeset - free a now-unused set
1222 == static void freeset(register struct parse *p, register cset *cs);
1223 */
1224static void
1225freeset(p, cs)
1226register struct parse *p;
1227register cset *cs;
1228{
1229 register int i;
1230 register cset *top = &p->g->sets[p->g->ncsets];
1231 register size_t css = (size_t)p->g->csetsize;
1232
1233 for (i = 0; i < css; i++)
1234 CHsub(cs, i);
1235 if (cs == top-1) /* recover only the easy case */
1236 p->g->ncsets--;
1237}
1238
1239/*
1240 - freezeset - final processing on a set of characters
1241 == static int freezeset(register struct parse *p, register cset *cs);
1242 *
1243 * The main task here is merging identical sets. This is usually a waste
1244 * of time (although the hash code minimizes the overhead), but can win
1245 * big if REG_ICASE is being used. REG_ICASE, by the way, is why the hash
1246 * is done using addition rather than xor -- all ASCII [aA] sets xor to
1247 * the same value!
1248 */
1249static int /* set number */
1250freezeset(p, cs)
1251register struct parse *p;
1252register cset *cs;
1253{
1254 register short h = cs->hash;
1255 register int i;
1256 register cset *top = &p->g->sets[p->g->ncsets];
1257 register cset *cs2;
1258 register size_t css = (size_t)p->g->csetsize;
1259
1260 /* look for an earlier one which is the same */
1261 for (cs2 = &p->g->sets[0]; cs2 < top; cs2++)
1262 if (cs2->hash == h && cs2 != cs) {
1263 /* maybe */
1264 for (i = 0; i < css; i++)
1265 if (!!CHIN(cs2, i) != !!CHIN(cs, i))
1266 break; /* no */
1267 if (i == css)
1268 break; /* yes */
1269 }
1270
1271 if (cs2 < top) { /* found one */
1272 freeset(p, cs);
1273 cs = cs2;
1274 }
1275
1276 return((int)(cs - p->g->sets));
1277}
1278
1279/*
1280 - firstch - return first character in a set (which must have at least one)
1281 == static int firstch(register struct parse *p, register cset *cs);
1282 */
1283static int /* character; there is no "none" value */
1284firstch(p, cs)
1285register struct parse *p;
1286register cset *cs;
1287{
1288 register int i;
1289 register size_t css = (size_t)p->g->csetsize;
1290
1291 for (i = 0; i < css; i++)
1292 if (CHIN(cs, i))
1293 return((char)i);
1294 assert(never);
1295 return(0); /* arbitrary */
1296}
1297
1298/*
1299 - nch - number of characters in a set
1300 == static int nch(register struct parse *p, register cset *cs);
1301 */
1302static int
1303nch(p, cs)
1304register struct parse *p;
1305register cset *cs;
1306{
1307 register int i;
1308 register size_t css = (size_t)p->g->csetsize;
1309 register int n = 0;
1310
1311 for (i = 0; i < css; i++)
1312 if (CHIN(cs, i))
1313 n++;
1314 return(n);
1315}
1316
1317/*
1318 - mcadd - add a collating element to a cset
1319 == static void mcadd(register struct parse *p, register cset *cs, \
1320 == register char *cp);
1321 */
1322static void
1323mcadd(p, cs, cp)
1324register struct parse *p;
1325register cset *cs;
1326register char *cp;
1327{
1328 register size_t oldend = cs->smultis;
1329
1330 cs->smultis += strlen(cp) + 1;
1331 if (cs->multis == NULL)
1332 cs->multis = malloc(cs->smultis);
1333 else
1334 cs->multis = reallocf(cs->multis, cs->smultis);
1335 if (cs->multis == NULL) {
1336 SETERROR(REG_ESPACE);
1337 return;
1338 }
1339
1340 (void) strcpy(cs->multis + oldend - 1, cp);
1341 cs->multis[cs->smultis - 1] = '\0';
1342}
1343
1344#if used
1345/*
1346 - mcsub - subtract a collating element from a cset
1347 == static void mcsub(register cset *cs, register char *cp);
1348 */
1349static void
1350mcsub(cs, cp)
1351register cset *cs;
1352register char *cp;
1353{
1354 register char *fp = mcfind(cs, cp);
1355 register size_t len = strlen(fp);
1356
1357 assert(fp != NULL);
1358 (void) memmove(fp, fp + len + 1,
1359 cs->smultis - (fp + len + 1 - cs->multis));
1360 cs->smultis -= len;
1361
1362 if (cs->smultis == 0) {
1363 free(cs->multis);
1364 cs->multis = NULL;
1365 return;
1366 }
1367
1368 cs->multis = reallocf(cs->multis, cs->smultis);
1369 assert(cs->multis != NULL);
1370}
1371
1372/*
1373 - mcin - is a collating element in a cset?
1374 == static int mcin(register cset *cs, register char *cp);
1375 */
1376static int
1377mcin(cs, cp)
1378register cset *cs;
1379register char *cp;
1380{
1381 return(mcfind(cs, cp) != NULL);
1382}
1383
1384/*
1385 - mcfind - find a collating element in a cset
1386 == static char *mcfind(register cset *cs, register char *cp);
1387 */
1388static char *
1389mcfind(cs, cp)
1390register cset *cs;
1391register char *cp;
1392{
1393 register char *p;
1394
1395 if (cs->multis == NULL)
1396 return(NULL);
1397 for (p = cs->multis; *p != '\0'; p += strlen(p) + 1)
1398 if (strcmp(cp, p) == 0)
1399 return(p);
1400 return(NULL);
1401}
1402#endif
1403
1404/*
1405 - mcinvert - invert the list of collating elements in a cset
1406 == static void mcinvert(register struct parse *p, register cset *cs);
1407 *
1408 * This would have to know the set of possibilities. Implementation
1409 * is deferred.
1410 */
1411static void
1412mcinvert(p, cs)
1413register struct parse *p;
1414register cset *cs;
1415{
1416 assert(cs->multis == NULL); /* xxx */
1417}
1418
1419/*
1420 - mccase - add case counterparts of the list of collating elements in a cset
1421 == static void mccase(register struct parse *p, register cset *cs);
1422 *
1423 * This would have to know the set of possibilities. Implementation
1424 * is deferred.
1425 */
1426static void
1427mccase(p, cs)
1428register struct parse *p;
1429register cset *cs;
1430{
1431 assert(cs->multis == NULL); /* xxx */
1432}
1433
1434/*
1435 - isinsets - is this character in any sets?
1436 == static int isinsets(register struct re_guts *g, int c);
1437 */
1438static int /* predicate */
1439isinsets(g, c)
1440register struct re_guts *g;
1441int c;
1442{
1443 register uch *col;
1444 register int i;
1445 register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
1446 register unsigned uc = (uch)c;
1447
1448 for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
1449 if (col[uc] != 0)
1450 return(1);
1451 return(0);
1452}
1453
1454/*
1455 - samesets - are these two characters in exactly the same sets?
1456 == static int samesets(register struct re_guts *g, int c1, int c2);
1457 */
1458static int /* predicate */
1459samesets(g, c1, c2)
1460register struct re_guts *g;
1461int c1;
1462int c2;
1463{
1464 register uch *col;
1465 register int i;
1466 register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
1467 register unsigned uc1 = (uch)c1;
1468 register unsigned uc2 = (uch)c2;
1469
1470 for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
1471 if (col[uc1] != col[uc2])
1472 return(0);
1473 return(1);
1474}
1475
1476/*
1477 - categorize - sort out character categories
1478 == static void categorize(struct parse *p, register struct re_guts *g);
1479 */
1480static void
1481categorize(p, g)
1482struct parse *p;
1483register struct re_guts *g;
1484{
1485 register cat_t *cats = g->categories;
1486 register int c;
1487 register int c2;
1488 register cat_t cat;
1489
1490 /* avoid making error situations worse */
1491 if (p->error != 0)
1492 return;
1493
1494 for (c = CHAR_MIN; c <= CHAR_MAX; c++)
1495 if (cats[c] == 0 && isinsets(g, c)) {
1496 cat = g->ncategories++;
1497 cats[c] = cat;
1498 for (c2 = c+1; c2 <= CHAR_MAX; c2++)
1499 if (cats[c2] == 0 && samesets(g, c, c2))
1500 cats[c2] = cat;
1501 }
1502}
1503
1504/*
1505 - dupl - emit a duplicate of a bunch of sops
1506 == static sopno dupl(register struct parse *p, sopno start, sopno finish);
1507 */
1508static sopno /* start of duplicate */
1509dupl(p, start, finish)
1510register struct parse *p;
1511sopno start; /* from here */
1512sopno finish; /* to this less one */
1513{
1514 register sopno ret = HERE();
1515 register sopno len = finish - start;
1516
1517 assert(finish >= start);
1518 if (len == 0)
1519 return(ret);
1520 enlarge(p, p->ssize + len); /* this many unexpected additions */
1521 assert(p->ssize >= p->slen + len);
1522 (void) memcpy((char *)(p->strip + p->slen),
1523 (char *)(p->strip + start), (size_t)len*sizeof(sop));
1524 p->slen += len;
1525 return(ret);
1526}
1527
1528/*
1529 - doemit - emit a strip operator
1530 == static void doemit(register struct parse *p, sop op, size_t opnd);
1531 *
1532 * It might seem better to implement this as a macro with a function as
1533 * hard-case backup, but it's just too big and messy unless there are
1534 * some changes to the data structures. Maybe later.
1535 */
1536static void
1537doemit(p, op, opnd)
1538register struct parse *p;
1539sop op;
1540size_t opnd;
1541{
1542 /* avoid making error situations worse */
1543 if (p->error != 0)
1544 return;
1545
1546 /* deal with oversize operands ("can't happen", more or less) */
1547 assert(opnd < 1<<OPSHIFT);
1548
1549 /* deal with undersized strip */
1550 if (p->slen >= p->ssize)
1551 enlarge(p, (p->ssize+1) / 2 * 3); /* +50% */
1552 assert(p->slen < p->ssize);
1553
1554 /* finally, it's all reduced to the easy case */
1555 p->strip[p->slen++] = SOP(op, opnd);
1556}
1557
1558/*
1559 - doinsert - insert a sop into the strip
1560 == static void doinsert(register struct parse *p, sop op, size_t opnd, sopno pos);
1561 */
1562static void
1563doinsert(p, op, opnd, pos)
1564register struct parse *p;
1565sop op;
1566size_t opnd;
1567sopno pos;
1568{
1569 register sopno sn;
1570 register sop s;
1571 register int i;
1572
1573 /* avoid making error situations worse */
1574 if (p->error != 0)
1575 return;
1576
1577 sn = HERE();
1578 EMIT(op, opnd); /* do checks, ensure space */
1579 assert(HERE() == sn+1);
1580 s = p->strip[sn];
1581
1582 /* adjust paren pointers */
1583 assert(pos > 0);
1584 for (i = 1; i < NPAREN; i++) {
1585 if (p->pbegin[i] >= pos) {
1586 p->pbegin[i]++;
1587 }
1588 if (p->pend[i] >= pos) {
1589 p->pend[i]++;
1590 }
1591 }
1592
1593 memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos],
1594 (HERE()-pos-1)*sizeof(sop));
1595 p->strip[pos] = s;
1596}
1597
1598/*
1599 - dofwd - complete a forward reference
1600 == static void dofwd(register struct parse *p, sopno pos, sop value);
1601 */
1602static void
1603dofwd(p, pos, value)
1604register struct parse *p;
1605register sopno pos;
1606sop value;
1607{
1608 /* avoid making error situations worse */
1609 if (p->error != 0)
1610 return;
1611
1612 assert(value < 1<<OPSHIFT);
1613 p->strip[pos] = OP(p->strip[pos]) | value;
1614}
1615
1616/*
1617 - enlarge - enlarge the strip
1618 == static void enlarge(register struct parse *p, sopno size);
1619 */
1620static void
1621enlarge(p, size)
1622register struct parse *p;
1623register sopno size;
1624{
1625 register sop *sp;
1626
1627 if (p->ssize >= size)
1628 return;
1629
1630 sp = (sop *)realloc(p->strip, size*sizeof(sop));
1631 if (sp == NULL) {
1632 SETERROR(REG_ESPACE);
1633 return;
1634 }
1635 p->strip = sp;
1636 p->ssize = size;
1637}
1638
1639/*
1640 - stripsnug - compact the strip
1641 == static void stripsnug(register struct parse *p, register struct re_guts *g);
1642 */
1643static void
1644stripsnug(p, g)
1645register struct parse *p;
1646register struct re_guts *g;
1647{
1648 g->nstates = p->slen;
1649 g->strip = (sop *)realloc((char *)p->strip, p->slen * sizeof(sop));
1650 if (g->strip == NULL) {
1651 SETERROR(REG_ESPACE);
1652 g->strip = p->strip;
1653 }
1654}
1655
1656/*
1657 - findmust - fill in must and mlen with longest mandatory literal string
1658 == static void findmust(register struct parse *p, register struct re_guts *g);
1659 *
1660 * This algorithm could do fancy things like analyzing the operands of |
1661 * for common subsequences. Someday. This code is simple and finds most
1662 * of the interesting cases.
1663 *
1664 * Note that must and mlen got initialized during setup.
1665 */
1666static void
1667findmust(p, g)
1668struct parse *p;
1669register struct re_guts *g;
1670{
1671 register sop *scan;
1672 sop *start;
1673 register sop *newstart;
1674 register sopno newlen;
1675 register sop s;
1676 register char *cp;
1677 register sopno i;
1678
1679 /* avoid making error situations worse */
1680 if (p->error != 0)
1681 return;
1682
1683 /* find the longest OCHAR sequence in strip */
1684 newlen = 0;
1685 scan = g->strip + 1;
1686 do {
1687 s = *scan++;
1688 switch (OP(s)) {
1689 case OCHAR: /* sequence member */
1690 if (newlen == 0) /* new sequence */
1691 newstart = scan - 1;
1692 newlen++;
1693 break;
1694 case OPLUS_: /* things that don't break one */
1695 case OLPAREN:
1696 case ORPAREN:
1697 break;
1698 case OQUEST_: /* things that must be skipped */
1699 case OCH_:
1700 scan--;
1701 do {
1702 scan += OPND(s);
1703 s = *scan;
1704 /* assert() interferes w debug printouts */
1705 if (OP(s) != O_QUEST && OP(s) != O_CH &&
1706 OP(s) != OOR2) {
1707 g->iflags |= BAD;
1708 return;
1709 }
1710 } while (OP(s) != O_QUEST && OP(s) != O_CH);
1711 /* fallthrough */
1712 default: /* things that break a sequence */
1713 if (newlen > g->mlen) { /* ends one */
1714 start = newstart;
1715 g->mlen = newlen;
1716 }
1717 newlen = 0;
1718 break;
1719 }
1720 } while (OP(s) != OEND);
1721
1722 if (g->mlen == 0) /* there isn't one */
1723 return;
1724
1725 /* turn it into a character string */
1726 g->must = malloc((size_t)g->mlen + 1);
1727 if (g->must == NULL) { /* argh; just forget it */
1728 g->mlen = 0;
1729 return;
1730 }
1731 cp = g->must;
1732 scan = start;
1733 for (i = g->mlen; i > 0; i--) {
1734 while (OP(s = *scan++) != OCHAR)
1735 continue;
1736 assert(cp < g->must + g->mlen);
1737 *cp++ = (char)OPND(s);
1738 }
1739 assert(cp == g->must + g->mlen);
1740 *cp++ = '\0'; /* just on general principles */
1741}
1742
1743/*
283 g->nplus = pluscount(p, g);
284 g->magic = MAGIC2;
285 preg->re_nsub = g->nsub;
286 preg->re_g = g;
287 preg->re_magic = MAGIC1;
288#ifndef REDEBUG
289 /* not debugging, so can't rely on the assert() in regexec() */
290 if (g->iflags&BAD)
291 SETERROR(REG_ASSERT);
292#endif
293
294 /* win or lose, we're done */
295 if (p->error != 0) /* lose */
296 regfree(preg);
297 return(p->error);
298}
299
300/*
301 - p_ere - ERE parser top level, concatenation and alternation
302 == static void p_ere(register struct parse *p, int stop);
303 */
304static void
305p_ere(p, stop)
306register struct parse *p;
307int stop; /* character this ERE should end at */
308{
309 register char c;
310 register sopno prevback;
311 register sopno prevfwd;
312 register sopno conc;
313 register int first = 1; /* is this the first alternative? */
314
315 for (;;) {
316 /* do a bunch of concatenated expressions */
317 conc = HERE();
318 while (MORE() && (c = PEEK()) != '|' && c != stop)
319 p_ere_exp(p);
320 (void)REQUIRE(HERE() != conc, REG_EMPTY); /* require nonempty */
321
322 if (!EAT('|'))
323 break; /* NOTE BREAK OUT */
324
325 if (first) {
326 INSERT(OCH_, conc); /* offset is wrong */
327 prevfwd = conc;
328 prevback = conc;
329 first = 0;
330 }
331 ASTERN(OOR1, prevback);
332 prevback = THERE();
333 AHEAD(prevfwd); /* fix previous offset */
334 prevfwd = HERE();
335 EMIT(OOR2, 0); /* offset is very wrong */
336 }
337
338 if (!first) { /* tail-end fixups */
339 AHEAD(prevfwd);
340 ASTERN(O_CH, prevback);
341 }
342
343 assert(!MORE() || SEE(stop));
344}
345
346/*
347 - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op
348 == static void p_ere_exp(register struct parse *p);
349 */
350static void
351p_ere_exp(p)
352register struct parse *p;
353{
354 register char c;
355 register sopno pos;
356 register int count;
357 register int count2;
358 register sopno subno;
359 int wascaret = 0;
360
361 assert(MORE()); /* caller should have ensured this */
362 c = GETNEXT();
363
364 pos = HERE();
365 switch (c) {
366 case '(':
367 (void)REQUIRE(MORE(), REG_EPAREN);
368 p->g->nsub++;
369 subno = p->g->nsub;
370 if (subno < NPAREN)
371 p->pbegin[subno] = HERE();
372 EMIT(OLPAREN, subno);
373 if (!SEE(')'))
374 p_ere(p, ')');
375 if (subno < NPAREN) {
376 p->pend[subno] = HERE();
377 assert(p->pend[subno] != 0);
378 }
379 EMIT(ORPAREN, subno);
380 (void)MUSTEAT(')', REG_EPAREN);
381 break;
382#ifndef POSIX_MISTAKE
383 case ')': /* happens only if no current unmatched ( */
384 /*
385 * You may ask, why the ifndef? Because I didn't notice
386 * this until slightly too late for 1003.2, and none of the
387 * other 1003.2 regular-expression reviewers noticed it at
388 * all. So an unmatched ) is legal POSIX, at least until
389 * we can get it fixed.
390 */
391 SETERROR(REG_EPAREN);
392 break;
393#endif
394 case '^':
395 EMIT(OBOL, 0);
396 p->g->iflags |= USEBOL;
397 p->g->nbol++;
398 wascaret = 1;
399 break;
400 case '$':
401 EMIT(OEOL, 0);
402 p->g->iflags |= USEEOL;
403 p->g->neol++;
404 break;
405 case '|':
406 SETERROR(REG_EMPTY);
407 break;
408 case '*':
409 case '+':
410 case '?':
411 SETERROR(REG_BADRPT);
412 break;
413 case '.':
414 if (p->g->cflags&REG_NEWLINE)
415 nonnewline(p);
416 else
417 EMIT(OANY, 0);
418 break;
419 case '[':
420 p_bracket(p);
421 break;
422 case '\\':
423 (void)REQUIRE(MORE(), REG_EESCAPE);
424 c = GETNEXT();
425 ordinary(p, c);
426 break;
427 case '{': /* okay as ordinary except if digit follows */
428 (void)REQUIRE(!MORE() || !isdigit((uch)PEEK()), REG_BADRPT);
429 /* FALLTHROUGH */
430 default:
431 ordinary(p, c);
432 break;
433 }
434
435 if (!MORE())
436 return;
437 c = PEEK();
438 /* we call { a repetition if followed by a digit */
439 if (!( c == '*' || c == '+' || c == '?' ||
440 (c == '{' && MORE2() && isdigit((uch)PEEK2())) ))
441 return; /* no repetition, we're done */
442 NEXT();
443
444 (void)REQUIRE(!wascaret, REG_BADRPT);
445 switch (c) {
446 case '*': /* implemented as +? */
447 /* this case does not require the (y|) trick, noKLUDGE */
448 INSERT(OPLUS_, pos);
449 ASTERN(O_PLUS, pos);
450 INSERT(OQUEST_, pos);
451 ASTERN(O_QUEST, pos);
452 break;
453 case '+':
454 INSERT(OPLUS_, pos);
455 ASTERN(O_PLUS, pos);
456 break;
457 case '?':
458 /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
459 INSERT(OCH_, pos); /* offset slightly wrong */
460 ASTERN(OOR1, pos); /* this one's right */
461 AHEAD(pos); /* fix the OCH_ */
462 EMIT(OOR2, 0); /* offset very wrong... */
463 AHEAD(THERE()); /* ...so fix it */
464 ASTERN(O_CH, THERETHERE());
465 break;
466 case '{':
467 count = p_count(p);
468 if (EAT(',')) {
469 if (isdigit((uch)PEEK())) {
470 count2 = p_count(p);
471 (void)REQUIRE(count <= count2, REG_BADBR);
472 } else /* single number with comma */
473 count2 = INFINITY;
474 } else /* just a single number */
475 count2 = count;
476 repeat(p, pos, count, count2);
477 if (!EAT('}')) { /* error heuristics */
478 while (MORE() && PEEK() != '}')
479 NEXT();
480 (void)REQUIRE(MORE(), REG_EBRACE);
481 SETERROR(REG_BADBR);
482 }
483 break;
484 }
485
486 if (!MORE())
487 return;
488 c = PEEK();
489 if (!( c == '*' || c == '+' || c == '?' ||
490 (c == '{' && MORE2() && isdigit((uch)PEEK2())) ) )
491 return;
492 SETERROR(REG_BADRPT);
493}
494
495/*
496 - p_str - string (no metacharacters) "parser"
497 == static void p_str(register struct parse *p);
498 */
499static void
500p_str(p)
501register struct parse *p;
502{
503 (void)REQUIRE(MORE(), REG_EMPTY);
504 while (MORE())
505 ordinary(p, GETNEXT());
506}
507
508/*
509 - p_bre - BRE parser top level, anchoring and concatenation
510 == static void p_bre(register struct parse *p, register int end1, \
511 == register int end2);
512 * Giving end1 as OUT essentially eliminates the end1/end2 check.
513 *
514 * This implementation is a bit of a kludge, in that a trailing $ is first
515 * taken as an ordinary character and then revised to be an anchor. The
516 * only undesirable side effect is that '$' gets included as a character
517 * category in such cases. This is fairly harmless; not worth fixing.
518 * The amount of lookahead needed to avoid this kludge is excessive.
519 */
520static void
521p_bre(p, end1, end2)
522register struct parse *p;
523register int end1; /* first terminating character */
524register int end2; /* second terminating character */
525{
526 register sopno start = HERE();
527 register int first = 1; /* first subexpression? */
528 register int wasdollar = 0;
529
530 if (EAT('^')) {
531 EMIT(OBOL, 0);
532 p->g->iflags |= USEBOL;
533 p->g->nbol++;
534 }
535 while (MORE() && !SEETWO(end1, end2)) {
536 wasdollar = p_simp_re(p, first);
537 first = 0;
538 }
539 if (wasdollar) { /* oops, that was a trailing anchor */
540 DROP(1);
541 EMIT(OEOL, 0);
542 p->g->iflags |= USEEOL;
543 p->g->neol++;
544 }
545
546 (void)REQUIRE(HERE() != start, REG_EMPTY); /* require nonempty */
547}
548
549/*
550 - p_simp_re - parse a simple RE, an atom possibly followed by a repetition
551 == static int p_simp_re(register struct parse *p, int starordinary);
552 */
553static int /* was the simple RE an unbackslashed $? */
554p_simp_re(p, starordinary)
555register struct parse *p;
556int starordinary; /* is a leading * an ordinary character? */
557{
558 register int c;
559 register int count;
560 register int count2;
561 register sopno pos;
562 register int i;
563 register sopno subno;
564# define BACKSL (1<<CHAR_BIT)
565
566 pos = HERE(); /* repetion op, if any, covers from here */
567
568 assert(MORE()); /* caller should have ensured this */
569 c = GETNEXT();
570 if (c == '\\') {
571 (void)REQUIRE(MORE(), REG_EESCAPE);
572 c = BACKSL | GETNEXT();
573 }
574 switch (c) {
575 case '.':
576 if (p->g->cflags&REG_NEWLINE)
577 nonnewline(p);
578 else
579 EMIT(OANY, 0);
580 break;
581 case '[':
582 p_bracket(p);
583 break;
584 case BACKSL|'{':
585 SETERROR(REG_BADRPT);
586 break;
587 case BACKSL|'(':
588 p->g->nsub++;
589 subno = p->g->nsub;
590 if (subno < NPAREN)
591 p->pbegin[subno] = HERE();
592 EMIT(OLPAREN, subno);
593 /* the MORE here is an error heuristic */
594 if (MORE() && !SEETWO('\\', ')'))
595 p_bre(p, '\\', ')');
596 if (subno < NPAREN) {
597 p->pend[subno] = HERE();
598 assert(p->pend[subno] != 0);
599 }
600 EMIT(ORPAREN, subno);
601 (void)REQUIRE(EATTWO('\\', ')'), REG_EPAREN);
602 break;
603 case BACKSL|')': /* should not get here -- must be user */
604 case BACKSL|'}':
605 SETERROR(REG_EPAREN);
606 break;
607 case BACKSL|'1':
608 case BACKSL|'2':
609 case BACKSL|'3':
610 case BACKSL|'4':
611 case BACKSL|'5':
612 case BACKSL|'6':
613 case BACKSL|'7':
614 case BACKSL|'8':
615 case BACKSL|'9':
616 i = (c&~BACKSL) - '0';
617 assert(i < NPAREN);
618 if (p->pend[i] != 0) {
619 assert(i <= p->g->nsub);
620 EMIT(OBACK_, i);
621 assert(p->pbegin[i] != 0);
622 assert(OP(p->strip[p->pbegin[i]]) == OLPAREN);
623 assert(OP(p->strip[p->pend[i]]) == ORPAREN);
624 (void) dupl(p, p->pbegin[i]+1, p->pend[i]);
625 EMIT(O_BACK, i);
626 } else
627 SETERROR(REG_ESUBREG);
628 p->g->backrefs = 1;
629 break;
630 case '*':
631 (void)REQUIRE(starordinary, REG_BADRPT);
632 /* FALLTHROUGH */
633 default:
634 ordinary(p, (char)c);
635 break;
636 }
637
638 if (EAT('*')) { /* implemented as +? */
639 /* this case does not require the (y|) trick, noKLUDGE */
640 INSERT(OPLUS_, pos);
641 ASTERN(O_PLUS, pos);
642 INSERT(OQUEST_, pos);
643 ASTERN(O_QUEST, pos);
644 } else if (EATTWO('\\', '{')) {
645 count = p_count(p);
646 if (EAT(',')) {
647 if (MORE() && isdigit((uch)PEEK())) {
648 count2 = p_count(p);
649 (void)REQUIRE(count <= count2, REG_BADBR);
650 } else /* single number with comma */
651 count2 = INFINITY;
652 } else /* just a single number */
653 count2 = count;
654 repeat(p, pos, count, count2);
655 if (!EATTWO('\\', '}')) { /* error heuristics */
656 while (MORE() && !SEETWO('\\', '}'))
657 NEXT();
658 (void)REQUIRE(MORE(), REG_EBRACE);
659 SETERROR(REG_BADBR);
660 }
661 } else if (c == '$') /* $ (but not \$) ends it */
662 return(1);
663
664 return(0);
665}
666
667/*
668 - p_count - parse a repetition count
669 == static int p_count(register struct parse *p);
670 */
671static int /* the value */
672p_count(p)
673register struct parse *p;
674{
675 register int count = 0;
676 register int ndigits = 0;
677
678 while (MORE() && isdigit((uch)PEEK()) && count <= DUPMAX) {
679 count = count*10 + (GETNEXT() - '0');
680 ndigits++;
681 }
682
683 (void)REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR);
684 return(count);
685}
686
687/*
688 - p_bracket - parse a bracketed character list
689 == static void p_bracket(register struct parse *p);
690 *
691 * Note a significant property of this code: if the allocset() did SETERROR,
692 * no set operations are done.
693 */
694static void
695p_bracket(p)
696register struct parse *p;
697{
698 register cset *cs = allocset(p);
699 register int invert = 0;
700
701 /* Dept of Truly Sickening Special-Case Kludges */
702 if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) {
703 EMIT(OBOW, 0);
704 NEXTn(6);
705 return;
706 }
707 if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) {
708 EMIT(OEOW, 0);
709 NEXTn(6);
710 return;
711 }
712
713 if (EAT('^'))
714 invert++; /* make note to invert set at end */
715 if (EAT(']'))
716 CHadd(cs, ']');
717 else if (EAT('-'))
718 CHadd(cs, '-');
719 while (MORE() && PEEK() != ']' && !SEETWO('-', ']'))
720 p_b_term(p, cs);
721 if (EAT('-'))
722 CHadd(cs, '-');
723 (void)MUSTEAT(']', REG_EBRACK);
724
725 if (p->error != 0) /* don't mess things up further */
726 return;
727
728 if (p->g->cflags&REG_ICASE) {
729 register int i;
730 register int ci;
731
732 for (i = p->g->csetsize - 1; i >= 0; i--)
733 if (CHIN(cs, i) && isalpha(i)) {
734 ci = othercase(i);
735 if (ci != i)
736 CHadd(cs, ci);
737 }
738 if (cs->multis != NULL)
739 mccase(p, cs);
740 }
741 if (invert) {
742 register int i;
743
744 for (i = p->g->csetsize - 1; i >= 0; i--)
745 if (CHIN(cs, i))
746 CHsub(cs, i);
747 else
748 CHadd(cs, i);
749 if (p->g->cflags&REG_NEWLINE)
750 CHsub(cs, '\n');
751 if (cs->multis != NULL)
752 mcinvert(p, cs);
753 }
754
755 assert(cs->multis == NULL); /* xxx */
756
757 if (nch(p, cs) == 1) { /* optimize singleton sets */
758 ordinary(p, firstch(p, cs));
759 freeset(p, cs);
760 } else
761 EMIT(OANYOF, freezeset(p, cs));
762}
763
764/*
765 - p_b_term - parse one term of a bracketed character list
766 == static void p_b_term(register struct parse *p, register cset *cs);
767 */
768static void
769p_b_term(p, cs)
770register struct parse *p;
771register cset *cs;
772{
773 register char c;
774 register char start, finish;
775 register int i;
776
777 /* classify what we've got */
778 switch ((MORE()) ? PEEK() : '\0') {
779 case '[':
780 c = (MORE2()) ? PEEK2() : '\0';
781 break;
782 case '-':
783 SETERROR(REG_ERANGE);
784 return; /* NOTE RETURN */
785 break;
786 default:
787 c = '\0';
788 break;
789 }
790
791 switch (c) {
792 case ':': /* character class */
793 NEXT2();
794 (void)REQUIRE(MORE(), REG_EBRACK);
795 c = PEEK();
796 (void)REQUIRE(c != '-' && c != ']', REG_ECTYPE);
797 p_b_cclass(p, cs);
798 (void)REQUIRE(MORE(), REG_EBRACK);
799 (void)REQUIRE(EATTWO(':', ']'), REG_ECTYPE);
800 break;
801 case '=': /* equivalence class */
802 NEXT2();
803 (void)REQUIRE(MORE(), REG_EBRACK);
804 c = PEEK();
805 (void)REQUIRE(c != '-' && c != ']', REG_ECOLLATE);
806 p_b_eclass(p, cs);
807 (void)REQUIRE(MORE(), REG_EBRACK);
808 (void)REQUIRE(EATTWO('=', ']'), REG_ECOLLATE);
809 break;
810 default: /* symbol, ordinary character, or range */
811/* xxx revision needed for multichar stuff */
812 start = p_b_symbol(p);
813 if (SEE('-') && MORE2() && PEEK2() != ']') {
814 /* range */
815 NEXT();
816 if (EAT('-'))
817 finish = '-';
818 else
819 finish = p_b_symbol(p);
820 } else
821 finish = start;
822 if (start == finish)
823 CHadd(cs, start);
824 else {
825 if (__collate_load_error) {
826 (void)REQUIRE((uch)start <= (uch)finish, REG_ERANGE);
827 for (i = (uch)start; i <= (uch)finish; i++)
828 CHadd(cs, i);
829 } else {
830 (void)REQUIRE(__collate_range_cmp(start, finish) <= 0, REG_ERANGE);
831 for (i = CHAR_MIN; i <= CHAR_MAX; i++) {
832 if ( __collate_range_cmp(start, i) <= 0
833 && __collate_range_cmp(i, finish) <= 0
834 )
835 CHadd(cs, i);
836 }
837 }
838 }
839 break;
840 }
841}
842
843/*
844 - p_b_cclass - parse a character-class name and deal with it
845 == static void p_b_cclass(register struct parse *p, register cset *cs);
846 */
847static void
848p_b_cclass(p, cs)
849register struct parse *p;
850register cset *cs;
851{
852 register int c;
853 register char *sp = p->next;
854 register struct cclass *cp;
855 register size_t len;
856
857 while (MORE() && isalpha((uch)PEEK()))
858 NEXT();
859 len = p->next - sp;
860 for (cp = cclasses; cp->name != NULL; cp++)
861 if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
862 break;
863 if (cp->name == NULL) {
864 /* oops, didn't find it */
865 SETERROR(REG_ECTYPE);
866 return;
867 }
868
869 switch (cp->fidx) {
870 case CALNUM:
871 for (c = CHAR_MIN; c <= CHAR_MAX; c++)
872 if (isalnum((uch)c))
873 CHadd(cs, c);
874 break;
875 case CALPHA:
876 for (c = CHAR_MIN; c <= CHAR_MAX; c++)
877 if (isalpha((uch)c))
878 CHadd(cs, c);
879 break;
880 case CBLANK:
881 for (c = CHAR_MIN; c <= CHAR_MAX; c++)
882 if (isblank((uch)c))
883 CHadd(cs, c);
884 break;
885 case CCNTRL:
886 for (c = CHAR_MIN; c <= CHAR_MAX; c++)
887 if (iscntrl((uch)c))
888 CHadd(cs, c);
889 break;
890 case CDIGIT:
891 for (c = CHAR_MIN; c <= CHAR_MAX; c++)
892 if (isdigit((uch)c))
893 CHadd(cs, c);
894 break;
895 case CGRAPH:
896 for (c = CHAR_MIN; c <= CHAR_MAX; c++)
897 if (isgraph((uch)c))
898 CHadd(cs, c);
899 break;
900 case CLOWER:
901 for (c = CHAR_MIN; c <= CHAR_MAX; c++)
902 if (islower((uch)c))
903 CHadd(cs, c);
904 break;
905 case CPRINT:
906 for (c = CHAR_MIN; c <= CHAR_MAX; c++)
907 if (isprint((uch)c))
908 CHadd(cs, c);
909 break;
910 case CPUNCT:
911 for (c = CHAR_MIN; c <= CHAR_MAX; c++)
912 if (ispunct((uch)c))
913 CHadd(cs, c);
914 break;
915 case CSPACE:
916 for (c = CHAR_MIN; c <= CHAR_MAX; c++)
917 if (isspace((uch)c))
918 CHadd(cs, c);
919 break;
920 case CUPPER:
921 for (c = CHAR_MIN; c <= CHAR_MAX; c++)
922 if (isupper((uch)c))
923 CHadd(cs, c);
924 break;
925 case CXDIGIT:
926 for (c = CHAR_MIN; c <= CHAR_MAX; c++)
927 if (isxdigit((uch)c))
928 CHadd(cs, c);
929 break;
930 }
931#if 0
932 for (u = cp->multis; *u != '\0'; u += strlen(u) + 1)
933 MCadd(p, cs, u);
934#endif
935}
936
937/*
938 - p_b_eclass - parse an equivalence-class name and deal with it
939 == static void p_b_eclass(register struct parse *p, register cset *cs);
940 *
941 * This implementation is incomplete. xxx
942 */
943static void
944p_b_eclass(p, cs)
945register struct parse *p;
946register cset *cs;
947{
948 register char c;
949
950 c = p_b_coll_elem(p, '=');
951 CHadd(cs, c);
952}
953
954/*
955 - p_b_symbol - parse a character or [..]ed multicharacter collating symbol
956 == static char p_b_symbol(register struct parse *p);
957 */
958static char /* value of symbol */
959p_b_symbol(p)
960register struct parse *p;
961{
962 register char value;
963
964 (void)REQUIRE(MORE(), REG_EBRACK);
965 if (!EATTWO('[', '.'))
966 return(GETNEXT());
967
968 /* collating symbol */
969 value = p_b_coll_elem(p, '.');
970 (void)REQUIRE(EATTWO('.', ']'), REG_ECOLLATE);
971 return(value);
972}
973
974/*
975 - p_b_coll_elem - parse a collating-element name and look it up
976 == static char p_b_coll_elem(register struct parse *p, int endc);
977 */
978static char /* value of collating element */
979p_b_coll_elem(p, endc)
980register struct parse *p;
981int endc; /* name ended by endc,']' */
982{
983 register char *sp = p->next;
984 register struct cname *cp;
985 register int len;
986
987 while (MORE() && !SEETWO(endc, ']'))
988 NEXT();
989 if (!MORE()) {
990 SETERROR(REG_EBRACK);
991 return(0);
992 }
993 len = p->next - sp;
994 for (cp = cnames; cp->name != NULL; cp++)
995 if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
996 return(cp->code); /* known name */
997 if (len == 1)
998 return(*sp); /* single character */
999 SETERROR(REG_ECOLLATE); /* neither */
1000 return(0);
1001}
1002
1003/*
1004 - othercase - return the case counterpart of an alphabetic
1005 == static char othercase(int ch);
1006 */
1007static char /* if no counterpart, return ch */
1008othercase(ch)
1009int ch;
1010{
1011 ch = (uch)ch;
1012 assert(isalpha(ch));
1013 if (isupper(ch))
1014 return(tolower(ch));
1015 else if (islower(ch))
1016 return(toupper(ch));
1017 else /* peculiar, but could happen */
1018 return(ch);
1019}
1020
1021/*
1022 - bothcases - emit a dualcase version of a two-case character
1023 == static void bothcases(register struct parse *p, int ch);
1024 *
1025 * Boy, is this implementation ever a kludge...
1026 */
1027static void
1028bothcases(p, ch)
1029register struct parse *p;
1030int ch;
1031{
1032 register char *oldnext = p->next;
1033 register char *oldend = p->end;
1034 char bracket[3];
1035
1036 ch = (uch)ch;
1037 assert(othercase(ch) != ch); /* p_bracket() would recurse */
1038 p->next = bracket;
1039 p->end = bracket+2;
1040 bracket[0] = ch;
1041 bracket[1] = ']';
1042 bracket[2] = '\0';
1043 p_bracket(p);
1044 assert(p->next == bracket+2);
1045 p->next = oldnext;
1046 p->end = oldend;
1047}
1048
1049/*
1050 - ordinary - emit an ordinary character
1051 == static void ordinary(register struct parse *p, register int ch);
1052 */
1053static void
1054ordinary(p, ch)
1055register struct parse *p;
1056register int ch;
1057{
1058 register cat_t *cap = p->g->categories;
1059
1060 if ((p->g->cflags&REG_ICASE) && isalpha((uch)ch) && othercase(ch) != ch)
1061 bothcases(p, ch);
1062 else {
1063 EMIT(OCHAR, (uch)ch);
1064 if (cap[ch] == 0)
1065 cap[ch] = p->g->ncategories++;
1066 }
1067}
1068
1069/*
1070 - nonnewline - emit REG_NEWLINE version of OANY
1071 == static void nonnewline(register struct parse *p);
1072 *
1073 * Boy, is this implementation ever a kludge...
1074 */
1075static void
1076nonnewline(p)
1077register struct parse *p;
1078{
1079 register char *oldnext = p->next;
1080 register char *oldend = p->end;
1081 char bracket[4];
1082
1083 p->next = bracket;
1084 p->end = bracket+3;
1085 bracket[0] = '^';
1086 bracket[1] = '\n';
1087 bracket[2] = ']';
1088 bracket[3] = '\0';
1089 p_bracket(p);
1090 assert(p->next == bracket+3);
1091 p->next = oldnext;
1092 p->end = oldend;
1093}
1094
1095/*
1096 - repeat - generate code for a bounded repetition, recursively if needed
1097 == static void repeat(register struct parse *p, sopno start, int from, int to);
1098 */
1099static void
1100repeat(p, start, from, to)
1101register struct parse *p;
1102sopno start; /* operand from here to end of strip */
1103int from; /* repeated from this number */
1104int to; /* to this number of times (maybe INFINITY) */
1105{
1106 register sopno finish = HERE();
1107# define N 2
1108# define INF 3
1109# define REP(f, t) ((f)*8 + (t))
1110# define MAP(n) (((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N)
1111 register sopno copy;
1112
1113 if (p->error != 0) /* head off possible runaway recursion */
1114 return;
1115
1116 assert(from <= to);
1117
1118 switch (REP(MAP(from), MAP(to))) {
1119 case REP(0, 0): /* must be user doing this */
1120 DROP(finish-start); /* drop the operand */
1121 break;
1122 case REP(0, 1): /* as x{1,1}? */
1123 case REP(0, N): /* as x{1,n}? */
1124 case REP(0, INF): /* as x{1,}? */
1125 /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
1126 INSERT(OCH_, start); /* offset is wrong... */
1127 repeat(p, start+1, 1, to);
1128 ASTERN(OOR1, start);
1129 AHEAD(start); /* ... fix it */
1130 EMIT(OOR2, 0);
1131 AHEAD(THERE());
1132 ASTERN(O_CH, THERETHERE());
1133 break;
1134 case REP(1, 1): /* trivial case */
1135 /* done */
1136 break;
1137 case REP(1, N): /* as x?x{1,n-1} */
1138 /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
1139 INSERT(OCH_, start);
1140 ASTERN(OOR1, start);
1141 AHEAD(start);
1142 EMIT(OOR2, 0); /* offset very wrong... */
1143 AHEAD(THERE()); /* ...so fix it */
1144 ASTERN(O_CH, THERETHERE());
1145 copy = dupl(p, start+1, finish+1);
1146 assert(copy == finish+4);
1147 repeat(p, copy, 1, to-1);
1148 break;
1149 case REP(1, INF): /* as x+ */
1150 INSERT(OPLUS_, start);
1151 ASTERN(O_PLUS, start);
1152 break;
1153 case REP(N, N): /* as xx{m-1,n-1} */
1154 copy = dupl(p, start, finish);
1155 repeat(p, copy, from-1, to-1);
1156 break;
1157 case REP(N, INF): /* as xx{n-1,INF} */
1158 copy = dupl(p, start, finish);
1159 repeat(p, copy, from-1, to);
1160 break;
1161 default: /* "can't happen" */
1162 SETERROR(REG_ASSERT); /* just in case */
1163 break;
1164 }
1165}
1166
1167/*
1168 - seterr - set an error condition
1169 == static int seterr(register struct parse *p, int e);
1170 */
1171static int /* useless but makes type checking happy */
1172seterr(p, e)
1173register struct parse *p;
1174int e;
1175{
1176 if (p->error == 0) /* keep earliest error condition */
1177 p->error = e;
1178 p->next = nuls; /* try to bring things to a halt */
1179 p->end = nuls;
1180 return(0); /* make the return value well-defined */
1181}
1182
1183/*
1184 - allocset - allocate a set of characters for []
1185 == static cset *allocset(register struct parse *p);
1186 */
1187static cset *
1188allocset(p)
1189register struct parse *p;
1190{
1191 register int no = p->g->ncsets++;
1192 register size_t nc;
1193 register size_t nbytes;
1194 register cset *cs;
1195 register size_t css = (size_t)p->g->csetsize;
1196 register int i;
1197
1198 if (no >= p->ncsalloc) { /* need another column of space */
1199 p->ncsalloc += CHAR_BIT;
1200 nc = p->ncsalloc;
1201 assert(nc % CHAR_BIT == 0);
1202 nbytes = nc / CHAR_BIT * css;
1203 if (p->g->sets == NULL)
1204 p->g->sets = (cset *)malloc(nc * sizeof(cset));
1205 else
1206 p->g->sets = (cset *)reallocf((char *)p->g->sets,
1207 nc * sizeof(cset));
1208 if (p->g->setbits == NULL)
1209 p->g->setbits = (uch *)malloc(nbytes);
1210 else {
1211 p->g->setbits = (uch *)reallocf((char *)p->g->setbits,
1212 nbytes);
1213 /* xxx this isn't right if setbits is now NULL */
1214 for (i = 0; i < no; i++)
1215 p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT);
1216 }
1217 if (p->g->sets != NULL && p->g->setbits != NULL)
1218 (void) memset((char *)p->g->setbits + (nbytes - css),
1219 0, css);
1220 else {
1221 no = 0;
1222 SETERROR(REG_ESPACE);
1223 /* caller's responsibility not to do set ops */
1224 }
1225 }
1226
1227 assert(p->g->sets != NULL); /* xxx */
1228 cs = &p->g->sets[no];
1229 cs->ptr = p->g->setbits + css*((no)/CHAR_BIT);
1230 cs->mask = 1 << ((no) % CHAR_BIT);
1231 cs->hash = 0;
1232 cs->smultis = 0;
1233 cs->multis = NULL;
1234
1235 return(cs);
1236}
1237
1238/*
1239 - freeset - free a now-unused set
1240 == static void freeset(register struct parse *p, register cset *cs);
1241 */
1242static void
1243freeset(p, cs)
1244register struct parse *p;
1245register cset *cs;
1246{
1247 register int i;
1248 register cset *top = &p->g->sets[p->g->ncsets];
1249 register size_t css = (size_t)p->g->csetsize;
1250
1251 for (i = 0; i < css; i++)
1252 CHsub(cs, i);
1253 if (cs == top-1) /* recover only the easy case */
1254 p->g->ncsets--;
1255}
1256
1257/*
1258 - freezeset - final processing on a set of characters
1259 == static int freezeset(register struct parse *p, register cset *cs);
1260 *
1261 * The main task here is merging identical sets. This is usually a waste
1262 * of time (although the hash code minimizes the overhead), but can win
1263 * big if REG_ICASE is being used. REG_ICASE, by the way, is why the hash
1264 * is done using addition rather than xor -- all ASCII [aA] sets xor to
1265 * the same value!
1266 */
1267static int /* set number */
1268freezeset(p, cs)
1269register struct parse *p;
1270register cset *cs;
1271{
1272 register short h = cs->hash;
1273 register int i;
1274 register cset *top = &p->g->sets[p->g->ncsets];
1275 register cset *cs2;
1276 register size_t css = (size_t)p->g->csetsize;
1277
1278 /* look for an earlier one which is the same */
1279 for (cs2 = &p->g->sets[0]; cs2 < top; cs2++)
1280 if (cs2->hash == h && cs2 != cs) {
1281 /* maybe */
1282 for (i = 0; i < css; i++)
1283 if (!!CHIN(cs2, i) != !!CHIN(cs, i))
1284 break; /* no */
1285 if (i == css)
1286 break; /* yes */
1287 }
1288
1289 if (cs2 < top) { /* found one */
1290 freeset(p, cs);
1291 cs = cs2;
1292 }
1293
1294 return((int)(cs - p->g->sets));
1295}
1296
1297/*
1298 - firstch - return first character in a set (which must have at least one)
1299 == static int firstch(register struct parse *p, register cset *cs);
1300 */
1301static int /* character; there is no "none" value */
1302firstch(p, cs)
1303register struct parse *p;
1304register cset *cs;
1305{
1306 register int i;
1307 register size_t css = (size_t)p->g->csetsize;
1308
1309 for (i = 0; i < css; i++)
1310 if (CHIN(cs, i))
1311 return((char)i);
1312 assert(never);
1313 return(0); /* arbitrary */
1314}
1315
1316/*
1317 - nch - number of characters in a set
1318 == static int nch(register struct parse *p, register cset *cs);
1319 */
1320static int
1321nch(p, cs)
1322register struct parse *p;
1323register cset *cs;
1324{
1325 register int i;
1326 register size_t css = (size_t)p->g->csetsize;
1327 register int n = 0;
1328
1329 for (i = 0; i < css; i++)
1330 if (CHIN(cs, i))
1331 n++;
1332 return(n);
1333}
1334
1335/*
1336 - mcadd - add a collating element to a cset
1337 == static void mcadd(register struct parse *p, register cset *cs, \
1338 == register char *cp);
1339 */
1340static void
1341mcadd(p, cs, cp)
1342register struct parse *p;
1343register cset *cs;
1344register char *cp;
1345{
1346 register size_t oldend = cs->smultis;
1347
1348 cs->smultis += strlen(cp) + 1;
1349 if (cs->multis == NULL)
1350 cs->multis = malloc(cs->smultis);
1351 else
1352 cs->multis = reallocf(cs->multis, cs->smultis);
1353 if (cs->multis == NULL) {
1354 SETERROR(REG_ESPACE);
1355 return;
1356 }
1357
1358 (void) strcpy(cs->multis + oldend - 1, cp);
1359 cs->multis[cs->smultis - 1] = '\0';
1360}
1361
1362#if used
1363/*
1364 - mcsub - subtract a collating element from a cset
1365 == static void mcsub(register cset *cs, register char *cp);
1366 */
1367static void
1368mcsub(cs, cp)
1369register cset *cs;
1370register char *cp;
1371{
1372 register char *fp = mcfind(cs, cp);
1373 register size_t len = strlen(fp);
1374
1375 assert(fp != NULL);
1376 (void) memmove(fp, fp + len + 1,
1377 cs->smultis - (fp + len + 1 - cs->multis));
1378 cs->smultis -= len;
1379
1380 if (cs->smultis == 0) {
1381 free(cs->multis);
1382 cs->multis = NULL;
1383 return;
1384 }
1385
1386 cs->multis = reallocf(cs->multis, cs->smultis);
1387 assert(cs->multis != NULL);
1388}
1389
1390/*
1391 - mcin - is a collating element in a cset?
1392 == static int mcin(register cset *cs, register char *cp);
1393 */
1394static int
1395mcin(cs, cp)
1396register cset *cs;
1397register char *cp;
1398{
1399 return(mcfind(cs, cp) != NULL);
1400}
1401
1402/*
1403 - mcfind - find a collating element in a cset
1404 == static char *mcfind(register cset *cs, register char *cp);
1405 */
1406static char *
1407mcfind(cs, cp)
1408register cset *cs;
1409register char *cp;
1410{
1411 register char *p;
1412
1413 if (cs->multis == NULL)
1414 return(NULL);
1415 for (p = cs->multis; *p != '\0'; p += strlen(p) + 1)
1416 if (strcmp(cp, p) == 0)
1417 return(p);
1418 return(NULL);
1419}
1420#endif
1421
1422/*
1423 - mcinvert - invert the list of collating elements in a cset
1424 == static void mcinvert(register struct parse *p, register cset *cs);
1425 *
1426 * This would have to know the set of possibilities. Implementation
1427 * is deferred.
1428 */
1429static void
1430mcinvert(p, cs)
1431register struct parse *p;
1432register cset *cs;
1433{
1434 assert(cs->multis == NULL); /* xxx */
1435}
1436
1437/*
1438 - mccase - add case counterparts of the list of collating elements in a cset
1439 == static void mccase(register struct parse *p, register cset *cs);
1440 *
1441 * This would have to know the set of possibilities. Implementation
1442 * is deferred.
1443 */
1444static void
1445mccase(p, cs)
1446register struct parse *p;
1447register cset *cs;
1448{
1449 assert(cs->multis == NULL); /* xxx */
1450}
1451
1452/*
1453 - isinsets - is this character in any sets?
1454 == static int isinsets(register struct re_guts *g, int c);
1455 */
1456static int /* predicate */
1457isinsets(g, c)
1458register struct re_guts *g;
1459int c;
1460{
1461 register uch *col;
1462 register int i;
1463 register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
1464 register unsigned uc = (uch)c;
1465
1466 for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
1467 if (col[uc] != 0)
1468 return(1);
1469 return(0);
1470}
1471
1472/*
1473 - samesets - are these two characters in exactly the same sets?
1474 == static int samesets(register struct re_guts *g, int c1, int c2);
1475 */
1476static int /* predicate */
1477samesets(g, c1, c2)
1478register struct re_guts *g;
1479int c1;
1480int c2;
1481{
1482 register uch *col;
1483 register int i;
1484 register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
1485 register unsigned uc1 = (uch)c1;
1486 register unsigned uc2 = (uch)c2;
1487
1488 for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
1489 if (col[uc1] != col[uc2])
1490 return(0);
1491 return(1);
1492}
1493
1494/*
1495 - categorize - sort out character categories
1496 == static void categorize(struct parse *p, register struct re_guts *g);
1497 */
1498static void
1499categorize(p, g)
1500struct parse *p;
1501register struct re_guts *g;
1502{
1503 register cat_t *cats = g->categories;
1504 register int c;
1505 register int c2;
1506 register cat_t cat;
1507
1508 /* avoid making error situations worse */
1509 if (p->error != 0)
1510 return;
1511
1512 for (c = CHAR_MIN; c <= CHAR_MAX; c++)
1513 if (cats[c] == 0 && isinsets(g, c)) {
1514 cat = g->ncategories++;
1515 cats[c] = cat;
1516 for (c2 = c+1; c2 <= CHAR_MAX; c2++)
1517 if (cats[c2] == 0 && samesets(g, c, c2))
1518 cats[c2] = cat;
1519 }
1520}
1521
1522/*
1523 - dupl - emit a duplicate of a bunch of sops
1524 == static sopno dupl(register struct parse *p, sopno start, sopno finish);
1525 */
1526static sopno /* start of duplicate */
1527dupl(p, start, finish)
1528register struct parse *p;
1529sopno start; /* from here */
1530sopno finish; /* to this less one */
1531{
1532 register sopno ret = HERE();
1533 register sopno len = finish - start;
1534
1535 assert(finish >= start);
1536 if (len == 0)
1537 return(ret);
1538 enlarge(p, p->ssize + len); /* this many unexpected additions */
1539 assert(p->ssize >= p->slen + len);
1540 (void) memcpy((char *)(p->strip + p->slen),
1541 (char *)(p->strip + start), (size_t)len*sizeof(sop));
1542 p->slen += len;
1543 return(ret);
1544}
1545
1546/*
1547 - doemit - emit a strip operator
1548 == static void doemit(register struct parse *p, sop op, size_t opnd);
1549 *
1550 * It might seem better to implement this as a macro with a function as
1551 * hard-case backup, but it's just too big and messy unless there are
1552 * some changes to the data structures. Maybe later.
1553 */
1554static void
1555doemit(p, op, opnd)
1556register struct parse *p;
1557sop op;
1558size_t opnd;
1559{
1560 /* avoid making error situations worse */
1561 if (p->error != 0)
1562 return;
1563
1564 /* deal with oversize operands ("can't happen", more or less) */
1565 assert(opnd < 1<<OPSHIFT);
1566
1567 /* deal with undersized strip */
1568 if (p->slen >= p->ssize)
1569 enlarge(p, (p->ssize+1) / 2 * 3); /* +50% */
1570 assert(p->slen < p->ssize);
1571
1572 /* finally, it's all reduced to the easy case */
1573 p->strip[p->slen++] = SOP(op, opnd);
1574}
1575
1576/*
1577 - doinsert - insert a sop into the strip
1578 == static void doinsert(register struct parse *p, sop op, size_t opnd, sopno pos);
1579 */
1580static void
1581doinsert(p, op, opnd, pos)
1582register struct parse *p;
1583sop op;
1584size_t opnd;
1585sopno pos;
1586{
1587 register sopno sn;
1588 register sop s;
1589 register int i;
1590
1591 /* avoid making error situations worse */
1592 if (p->error != 0)
1593 return;
1594
1595 sn = HERE();
1596 EMIT(op, opnd); /* do checks, ensure space */
1597 assert(HERE() == sn+1);
1598 s = p->strip[sn];
1599
1600 /* adjust paren pointers */
1601 assert(pos > 0);
1602 for (i = 1; i < NPAREN; i++) {
1603 if (p->pbegin[i] >= pos) {
1604 p->pbegin[i]++;
1605 }
1606 if (p->pend[i] >= pos) {
1607 p->pend[i]++;
1608 }
1609 }
1610
1611 memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos],
1612 (HERE()-pos-1)*sizeof(sop));
1613 p->strip[pos] = s;
1614}
1615
1616/*
1617 - dofwd - complete a forward reference
1618 == static void dofwd(register struct parse *p, sopno pos, sop value);
1619 */
1620static void
1621dofwd(p, pos, value)
1622register struct parse *p;
1623register sopno pos;
1624sop value;
1625{
1626 /* avoid making error situations worse */
1627 if (p->error != 0)
1628 return;
1629
1630 assert(value < 1<<OPSHIFT);
1631 p->strip[pos] = OP(p->strip[pos]) | value;
1632}
1633
1634/*
1635 - enlarge - enlarge the strip
1636 == static void enlarge(register struct parse *p, sopno size);
1637 */
1638static void
1639enlarge(p, size)
1640register struct parse *p;
1641register sopno size;
1642{
1643 register sop *sp;
1644
1645 if (p->ssize >= size)
1646 return;
1647
1648 sp = (sop *)realloc(p->strip, size*sizeof(sop));
1649 if (sp == NULL) {
1650 SETERROR(REG_ESPACE);
1651 return;
1652 }
1653 p->strip = sp;
1654 p->ssize = size;
1655}
1656
1657/*
1658 - stripsnug - compact the strip
1659 == static void stripsnug(register struct parse *p, register struct re_guts *g);
1660 */
1661static void
1662stripsnug(p, g)
1663register struct parse *p;
1664register struct re_guts *g;
1665{
1666 g->nstates = p->slen;
1667 g->strip = (sop *)realloc((char *)p->strip, p->slen * sizeof(sop));
1668 if (g->strip == NULL) {
1669 SETERROR(REG_ESPACE);
1670 g->strip = p->strip;
1671 }
1672}
1673
1674/*
1675 - findmust - fill in must and mlen with longest mandatory literal string
1676 == static void findmust(register struct parse *p, register struct re_guts *g);
1677 *
1678 * This algorithm could do fancy things like analyzing the operands of |
1679 * for common subsequences. Someday. This code is simple and finds most
1680 * of the interesting cases.
1681 *
1682 * Note that must and mlen got initialized during setup.
1683 */
1684static void
1685findmust(p, g)
1686struct parse *p;
1687register struct re_guts *g;
1688{
1689 register sop *scan;
1690 sop *start;
1691 register sop *newstart;
1692 register sopno newlen;
1693 register sop s;
1694 register char *cp;
1695 register sopno i;
1696
1697 /* avoid making error situations worse */
1698 if (p->error != 0)
1699 return;
1700
1701 /* find the longest OCHAR sequence in strip */
1702 newlen = 0;
1703 scan = g->strip + 1;
1704 do {
1705 s = *scan++;
1706 switch (OP(s)) {
1707 case OCHAR: /* sequence member */
1708 if (newlen == 0) /* new sequence */
1709 newstart = scan - 1;
1710 newlen++;
1711 break;
1712 case OPLUS_: /* things that don't break one */
1713 case OLPAREN:
1714 case ORPAREN:
1715 break;
1716 case OQUEST_: /* things that must be skipped */
1717 case OCH_:
1718 scan--;
1719 do {
1720 scan += OPND(s);
1721 s = *scan;
1722 /* assert() interferes w debug printouts */
1723 if (OP(s) != O_QUEST && OP(s) != O_CH &&
1724 OP(s) != OOR2) {
1725 g->iflags |= BAD;
1726 return;
1727 }
1728 } while (OP(s) != O_QUEST && OP(s) != O_CH);
1729 /* fallthrough */
1730 default: /* things that break a sequence */
1731 if (newlen > g->mlen) { /* ends one */
1732 start = newstart;
1733 g->mlen = newlen;
1734 }
1735 newlen = 0;
1736 break;
1737 }
1738 } while (OP(s) != OEND);
1739
1740 if (g->mlen == 0) /* there isn't one */
1741 return;
1742
1743 /* turn it into a character string */
1744 g->must = malloc((size_t)g->mlen + 1);
1745 if (g->must == NULL) { /* argh; just forget it */
1746 g->mlen = 0;
1747 return;
1748 }
1749 cp = g->must;
1750 scan = start;
1751 for (i = g->mlen; i > 0; i--) {
1752 while (OP(s = *scan++) != OCHAR)
1753 continue;
1754 assert(cp < g->must + g->mlen);
1755 *cp++ = (char)OPND(s);
1756 }
1757 assert(cp == g->must + g->mlen);
1758 *cp++ = '\0'; /* just on general principles */
1759}
1760
1761/*
1762 - computejumps - compute char jumps for BM scan
1763 == static void computejumps(register struct parse *p, register struct re_guts *g);
1764 *
1765 * This algorithm assumes g->must exists and is has size greater than
1766 * zero. It's based on the algorithm found on Computer Algorithms by
1767 * Sara Baase.
1768 *
1769 * A char jump is the number of characters one needs to jump based on
1770 * the value of the character from the text that was mismatched.
1771 */
1772static void
1773computejumps(p, g)
1774struct parse *p;
1775struct re_guts *g;
1776{
1777 int ch;
1778 int mindex;
1779
1780 /* Avoid making errors worse */
1781 if (p->error != 0)
1782 return;
1783
1784 g->charjump = malloc(256 * sizeof(int));
1785 if (g->charjump == NULL) /* Not a fatal error */
1786 return;
1787
1788 /* If the character does not exist in the pattern, the jump
1789 * is equal to the number of characters in the pattern.
1790 */
1791 for (ch = 0; ch < 256; ch++)
1792 g->charjump[ch] = g->mlen;
1793
1794 /* If the character does exist, compute the jump that would
1795 * take us to the last character in the pattern equal to it
1796 * (notice that we match right to left, so that last character
1797 * is the first one that would be matched).
1798 */
1799 for (mindex = 0; mindex < g->mlen; mindex++)
1800 g->charjump[g->must[mindex]] = g->mlen - mindex - 1;
1801}
1802
1803/*
1804 - computematchjumps - compute match jumps for BM scan
1805 == static void computematchjumps(register struct parse *p, register struct re_guts *g);
1806 *
1807 * This algorithm assumes g->must exists and is has size greater than
1808 * zero. It's based on the algorithm found on Computer Algorithms by
1809 * Sara Baase.
1810 *
1811 * A match jump is the number of characters one needs to advance based
1812 * on the already-matched suffix.
1813 * Notice that all values here are minus (g->mlen-1), because of the way
1814 * the search algorithm works.
1815 */
1816static void
1817computematchjumps(p, g)
1818struct parse *p;
1819struct re_guts *g;
1820{
1821 int mindex; /* General "must" iterator */
1822 int suffix; /* Keeps track of matching suffix */
1823 int ssuffix; /* Keeps track of suffixes' suffix */
1824 int* pmatches; /* pmatches[k] points to the next i
1825 * such that i+1...mlen is a substring
1826 * of k+1...k+mlen-i-1
1827 */
1828
1829 /* Avoid making errors worse */
1830 if (p->error != 0)
1831 return;
1832
1833 pmatches = malloc(g->mlen * sizeof(unsigned int));
1834 if (pmatches == NULL) {
1835 g->matchjump = NULL;
1836 return;
1837 }
1838
1839 g->matchjump = malloc(g->mlen * sizeof(unsigned int));
1840 if (g->matchjump == NULL) /* Not a fatal error */
1841 return;
1842
1843 /* Set maximum possible jump for each character in the pattern */
1844 for (mindex = 0; mindex < g->mlen; mindex++)
1845 g->matchjump[mindex] = 2*g->mlen - mindex - 1;
1846
1847 /* Compute pmatches[] */
1848 for (mindex = g->mlen - 1, suffix = g->mlen; mindex >= 0;
1849 mindex--, suffix--) {
1850 pmatches[mindex] = suffix;
1851
1852 /* If a mismatch is found, interrupting the substring,
1853 * compute the matchjump for that position. If no
1854 * mismatch is found, then a text substring mismatched
1855 * against the suffix will also mismatch against the
1856 * substring.
1857 */
1858 while (suffix < g->mlen
1859 && g->must[mindex] != g->must[suffix]) {
1860 g->matchjump[suffix] = MIN(g->matchjump[suffix],
1861 g->mlen - mindex - 1);
1862 suffix = pmatches[suffix];
1863 }
1864 }
1865
1866 /* Compute the matchjump up to the last substring found to jump
1867 * to the beginning of the largest must pattern prefix matching
1868 * it's own suffix.
1869 */
1870 for (mindex = 0; mindex <= suffix; mindex++)
1871 g->matchjump[mindex] = MIN(g->matchjump[mindex],
1872 g->mlen + suffix - mindex);
1873
1874 ssuffix = pmatches[suffix];
1875 while(suffix < g->mlen) {
1876 while(suffix <= ssuffix) {
1877 g->matchjump[suffix] = MIN(g->matchjump[suffix],
1878 g->mlen + ssuffix - suffix);
1879 suffix++;
1880 }
1881 ssuffix = pmatches[ssuffix];
1882 }
1883
1884 free(pmatches);
1885}
1886
1887/*
1744 - pluscount - count + nesting
1745 == static sopno pluscount(register struct parse *p, register struct re_guts *g);
1746 */
1747static sopno /* nesting depth */
1748pluscount(p, g)
1749struct parse *p;
1750register struct re_guts *g;
1751{
1752 register sop *scan;
1753 register sop s;
1754 register sopno plusnest = 0;
1755 register sopno maxnest = 0;
1756
1757 if (p->error != 0)
1758 return(0); /* there may not be an OEND */
1759
1760 scan = g->strip + 1;
1761 do {
1762 s = *scan++;
1763 switch (OP(s)) {
1764 case OPLUS_:
1765 plusnest++;
1766 break;
1767 case O_PLUS:
1768 if (plusnest > maxnest)
1769 maxnest = plusnest;
1770 plusnest--;
1771 break;
1772 }
1773 } while (OP(s) != OEND);
1774 if (plusnest != 0)
1775 g->iflags |= BAD;
1776 return(maxnest);
1777}
1888 - pluscount - count + nesting
1889 == static sopno pluscount(register struct parse *p, register struct re_guts *g);
1890 */
1891static sopno /* nesting depth */
1892pluscount(p, g)
1893struct parse *p;
1894register struct re_guts *g;
1895{
1896 register sop *scan;
1897 register sop s;
1898 register sopno plusnest = 0;
1899 register sopno maxnest = 0;
1900
1901 if (p->error != 0)
1902 return(0); /* there may not be an OEND */
1903
1904 scan = g->strip + 1;
1905 do {
1906 s = *scan++;
1907 switch (OP(s)) {
1908 case OPLUS_:
1909 plusnest++;
1910 break;
1911 case O_PLUS:
1912 if (plusnest > maxnest)
1913 maxnest = plusnest;
1914 plusnest--;
1915 break;
1916 }
1917 } while (OP(s) != OEND);
1918 if (plusnest != 0)
1919 g->iflags |= BAD;
1920 return(maxnest);
1921}