engine.c revision 1.6
1/*	$OpenBSD: engine.c,v 1.6 2002/05/25 09:11:02 deraadt Exp $	*/
2
3/*-
4 * Copyright (c) 1992, 1993, 1994 Henry Spencer.
5 * Copyright (c) 1992, 1993, 1994
6 *	The Regents of the University of California.  All rights reserved.
7 *
8 * This code is derived from software contributed to Berkeley by
9 * Henry Spencer.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 *    must display the following acknowledgement:
21 *	This product includes software developed by the University of
22 *	California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 *    may be used to endorse or promote products derived from this software
25 *    without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 *
39 *	@(#)engine.c	8.5 (Berkeley) 3/20/94
40 */
41
42#if defined(SNAMES) && defined(LIBC_SCCS) && !defined(lint)
43static char enginercsid[] = "$OpenBSD: engine.c,v 1.6 2002/05/25 09:11:02 deraadt Exp $";
44#endif /* SNAMES and LIBC_SCCS and not lint */
45
46/*
47 * The matching engine and friends.  This file is #included by regexec.c
48 * after suitable #defines of a variety of macros used herein, so that
49 * different state representations can be used without duplicating masses
50 * of code.
51 */
52
53#ifdef SNAMES
54#define	matcher	smatcher
55#define	fast	sfast
56#define	slow	sslow
57#define	dissect	sdissect
58#define	backref	sbackref
59#define	step	sstep
60#define	print	sprint
61#define	at	sat
62#define	match	smat
63#endif
64#ifdef LNAMES
65#define	matcher	lmatcher
66#define	fast	lfast
67#define	slow	lslow
68#define	dissect	ldissect
69#define	backref	lbackref
70#define	step	lstep
71#define	print	lprint
72#define	at	lat
73#define	match	lmat
74#endif
75
76/* another structure passed up and down to avoid zillions of parameters */
77struct match {
78	struct re_guts *g;
79	int eflags;
80	regmatch_t *pmatch;	/* [nsub+1] (0 element unused) */
81	char *offp;		/* offsets work from here */
82	char *beginp;		/* start of string -- virtual NUL precedes */
83	char *endp;		/* end of string -- virtual NUL here */
84	char *coldp;		/* can be no match starting before here */
85	char **lastpos;		/* [nplus+1] */
86	STATEVARS;
87	states st;		/* current states */
88	states fresh;		/* states for a fresh start */
89	states tmp;		/* temporary */
90	states empty;		/* empty set of states */
91};
92
93/* ========= begin header generated by ./mkh ========= */
94#ifdef __cplusplus
95extern "C" {
96#endif
97
98/* === engine.c === */
99static int matcher(struct re_guts *g, char *string, size_t nmatch, regmatch_t pmatch[], int eflags);
100static char *dissect(struct match *m, char *start, char *stop, sopno startst, sopno stopst);
101static char *backref(struct match *m, char *start, char *stop, sopno startst, sopno stopst, sopno lev);
102static char *fast(struct match *m, char *start, char *stop, sopno startst, sopno stopst);
103static char *slow(struct match *m, char *start, char *stop, sopno startst, sopno stopst);
104static states step(struct re_guts *g, sopno start, sopno stop, states bef, int ch, states aft);
105#define	BOL	(OUT+1)
106#define	EOL	(BOL+1)
107#define	BOLEOL	(BOL+2)
108#define	NOTHING	(BOL+3)
109#define	BOW	(BOL+4)
110#define	EOW	(BOL+5)
111#define	CODEMAX	(BOL+5)		/* highest code used */
112#define	NONCHAR(c)	((c) > CHAR_MAX)
113#define	NNONCHAR	(CODEMAX-CHAR_MAX)
114#ifdef REDEBUG
115static void print(struct match *m, char *caption, states st, int ch, FILE *d);
116#endif
117#ifdef REDEBUG
118static void at(struct match *m, char *title, char *start, char *stop, sopno startst, sopno stopst);
119#endif
120#ifdef REDEBUG
121static char *pchar(int ch);
122#endif
123
124#ifdef __cplusplus
125}
126#endif
127/* ========= end header generated by ./mkh ========= */
128
129#ifdef REDEBUG
130#define	SP(t, s, c)	print(m, t, s, c, stdout)
131#define	AT(t, p1, p2, s1, s2)	at(m, t, p1, p2, s1, s2)
132#define	NOTE(str)	{ if (m->eflags&REG_TRACE) (void)printf("=%s\n", (str)); }
133#else
134#define	SP(t, s, c)	/* nothing */
135#define	AT(t, p1, p2, s1, s2)	/* nothing */
136#define	NOTE(s)	/* nothing */
137#endif
138
139/*
140 - matcher - the actual matching engine
141 == static int matcher(register struct re_guts *g, char *string, \
142 ==	size_t nmatch, regmatch_t pmatch[], int eflags);
143 */
144static int			/* 0 success, REG_NOMATCH failure */
145matcher(g, string, nmatch, pmatch, eflags)
146register struct re_guts *g;
147char *string;
148size_t nmatch;
149regmatch_t pmatch[];
150int eflags;
151{
152	register char *endp;
153	register int i;
154	struct match mv;
155	register struct match *m = &mv;
156	register char *dp;
157	const register sopno gf = g->firststate+1;	/* +1 for OEND */
158	const register sopno gl = g->laststate;
159	char *start;
160	char *stop;
161
162	/* simplify the situation where possible */
163	if (g->cflags&REG_NOSUB)
164		nmatch = 0;
165	if (eflags&REG_STARTEND) {
166		start = string + pmatch[0].rm_so;
167		stop = string + pmatch[0].rm_eo;
168	} else {
169		start = string;
170		stop = start + strlen(start);
171	}
172	if (stop < start)
173		return(REG_INVARG);
174
175	/* prescreening; this does wonders for this rather slow code */
176	if (g->must != NULL) {
177		for (dp = start; dp < stop; dp++)
178			if (*dp == g->must[0] && stop - dp >= g->mlen &&
179				memcmp(dp, g->must, (size_t)g->mlen) == 0)
180				break;
181		if (dp == stop)		/* we didn't find g->must */
182			return(REG_NOMATCH);
183	}
184
185	/* match struct setup */
186	m->g = g;
187	m->eflags = eflags;
188	m->pmatch = NULL;
189	m->lastpos = NULL;
190	m->offp = string;
191	m->beginp = start;
192	m->endp = stop;
193	STATESETUP(m, 4);
194	SETUP(m->st);
195	SETUP(m->fresh);
196	SETUP(m->tmp);
197	SETUP(m->empty);
198	CLEAR(m->empty);
199
200	/* this loop does only one repetition except for backrefs */
201	for (;;) {
202		endp = fast(m, start, stop, gf, gl);
203		if (endp == NULL) {		/* a miss */
204			STATETEARDOWN(m);
205			return(REG_NOMATCH);
206		}
207		if (nmatch == 0 && !g->backrefs)
208			break;		/* no further info needed */
209
210		/* where? */
211		assert(m->coldp != NULL);
212		for (;;) {
213			NOTE("finding start");
214			endp = slow(m, m->coldp, stop, gf, gl);
215			if (endp != NULL)
216				break;
217			assert(m->coldp < m->endp);
218			m->coldp++;
219		}
220		if (nmatch == 1 && !g->backrefs)
221			break;		/* no further info needed */
222
223		/* oh my, he wants the subexpressions... */
224		if (m->pmatch == NULL)
225			m->pmatch = (regmatch_t *)malloc((m->g->nsub + 1) *
226							sizeof(regmatch_t));
227		if (m->pmatch == NULL) {
228			STATETEARDOWN(m);
229			return(REG_ESPACE);
230		}
231		for (i = 1; i <= m->g->nsub; i++)
232			m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1;
233		if (!g->backrefs && !(m->eflags&REG_BACKR)) {
234			NOTE("dissecting");
235			dp = dissect(m, m->coldp, endp, gf, gl);
236		} else {
237			if (g->nplus > 0 && m->lastpos == NULL)
238				m->lastpos = (char **)malloc((g->nplus+1) *
239							sizeof(char *));
240			if (g->nplus > 0 && m->lastpos == NULL) {
241				free(m->pmatch);
242				STATETEARDOWN(m);
243				return(REG_ESPACE);
244			}
245			NOTE("backref dissect");
246			dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
247		}
248		if (dp != NULL)
249			break;
250
251		/* uh-oh... we couldn't find a subexpression-level match */
252		assert(g->backrefs);	/* must be back references doing it */
253		assert(g->nplus == 0 || m->lastpos != NULL);
254		for (;;) {
255			if (dp != NULL || endp <= m->coldp)
256				break;		/* defeat */
257			NOTE("backoff");
258			endp = slow(m, m->coldp, endp-1, gf, gl);
259			if (endp == NULL)
260				break;		/* defeat */
261			/* try it on a shorter possibility */
262#ifndef NDEBUG
263			for (i = 1; i <= m->g->nsub; i++) {
264				assert(m->pmatch[i].rm_so == -1);
265				assert(m->pmatch[i].rm_eo == -1);
266			}
267#endif
268			NOTE("backoff dissect");
269			dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
270		}
271		assert(dp == NULL || dp == endp);
272		if (dp != NULL)		/* found a shorter one */
273			break;
274
275		/* despite initial appearances, there is no match here */
276		NOTE("false alarm");
277		start = m->coldp + 1;	/* recycle starting later */
278		assert(start <= stop);
279	}
280
281	/* fill in the details if requested */
282	if (nmatch > 0) {
283		pmatch[0].rm_so = m->coldp - m->offp;
284		pmatch[0].rm_eo = endp - m->offp;
285	}
286	if (nmatch > 1) {
287		assert(m->pmatch != NULL);
288		for (i = 1; i < nmatch; i++)
289			if (i <= m->g->nsub)
290				pmatch[i] = m->pmatch[i];
291			else {
292				pmatch[i].rm_so = -1;
293				pmatch[i].rm_eo = -1;
294			}
295	}
296
297	if (m->pmatch != NULL)
298		free((char *)m->pmatch);
299	if (m->lastpos != NULL)
300		free((char *)m->lastpos);
301	STATETEARDOWN(m);
302	return(0);
303}
304
305/*
306 - dissect - figure out what matched what, no back references
307 == static char *dissect(register struct match *m, char *start, \
308 ==	char *stop, sopno startst, sopno stopst);
309 */
310static char *			/* == stop (success) always */
311dissect(m, start, stop, startst, stopst)
312register struct match *m;
313char *start;
314char *stop;
315sopno startst;
316sopno stopst;
317{
318	register int i;
319	register sopno ss;	/* start sop of current subRE */
320	register sopno es;	/* end sop of current subRE */
321	register char *sp;	/* start of string matched by it */
322	register char *stp;	/* string matched by it cannot pass here */
323	register char *rest;	/* start of rest of string */
324	register char *tail;	/* string unmatched by rest of RE */
325	register sopno ssub;	/* start sop of subsubRE */
326	register sopno esub;	/* end sop of subsubRE */
327	register char *ssp;	/* start of string matched by subsubRE */
328	register char *sep;	/* end of string matched by subsubRE */
329	register char *oldssp;	/* previous ssp */
330	register char *dp;
331
332	AT("diss", start, stop, startst, stopst);
333	sp = start;
334	for (ss = startst; ss < stopst; ss = es) {
335		/* identify end of subRE */
336		es = ss;
337		switch (OP(m->g->strip[es])) {
338		case OPLUS_:
339		case OQUEST_:
340			es += OPND(m->g->strip[es]);
341			break;
342		case OCH_:
343			while (OP(m->g->strip[es]) != O_CH)
344				es += OPND(m->g->strip[es]);
345			break;
346		}
347		es++;
348
349		/* figure out what it matched */
350		switch (OP(m->g->strip[ss])) {
351		case OEND:
352			assert(nope);
353			break;
354		case OCHAR:
355			sp++;
356			break;
357		case OBOL:
358		case OEOL:
359		case OBOW:
360		case OEOW:
361			break;
362		case OANY:
363		case OANYOF:
364			sp++;
365			break;
366		case OBACK_:
367		case O_BACK:
368			assert(nope);
369			break;
370		/* cases where length of match is hard to find */
371		case OQUEST_:
372			stp = stop;
373			for (;;) {
374				/* how long could this one be? */
375				rest = slow(m, sp, stp, ss, es);
376				assert(rest != NULL);	/* it did match */
377				/* could the rest match the rest? */
378				tail = slow(m, rest, stop, es, stopst);
379				if (tail == stop)
380					break;		/* yes! */
381				/* no -- try a shorter match for this one */
382				stp = rest - 1;
383				assert(stp >= sp);	/* it did work */
384			}
385			ssub = ss + 1;
386			esub = es - 1;
387			/* did innards match? */
388			if (slow(m, sp, rest, ssub, esub) != NULL) {
389				dp = dissect(m, sp, rest, ssub, esub);
390				assert(dp == rest);
391			} else		/* no */
392				assert(sp == rest);
393			sp = rest;
394			break;
395		case OPLUS_:
396			stp = stop;
397			for (;;) {
398				/* how long could this one be? */
399				rest = slow(m, sp, stp, ss, es);
400				assert(rest != NULL);	/* it did match */
401				/* could the rest match the rest? */
402				tail = slow(m, rest, stop, es, stopst);
403				if (tail == stop)
404					break;		/* yes! */
405				/* no -- try a shorter match for this one */
406				stp = rest - 1;
407				assert(stp >= sp);	/* it did work */
408			}
409			ssub = ss + 1;
410			esub = es - 1;
411			ssp = sp;
412			oldssp = ssp;
413			for (;;) {	/* find last match of innards */
414				sep = slow(m, ssp, rest, ssub, esub);
415				if (sep == NULL || sep == ssp)
416					break;	/* failed or matched null */
417				oldssp = ssp;	/* on to next try */
418				ssp = sep;
419			}
420			if (sep == NULL) {
421				/* last successful match */
422				sep = ssp;
423				ssp = oldssp;
424			}
425			assert(sep == rest);	/* must exhaust substring */
426			assert(slow(m, ssp, sep, ssub, esub) == rest);
427			dp = dissect(m, ssp, sep, ssub, esub);
428			assert(dp == sep);
429			sp = rest;
430			break;
431		case OCH_:
432			stp = stop;
433			for (;;) {
434				/* how long could this one be? */
435				rest = slow(m, sp, stp, ss, es);
436				assert(rest != NULL);	/* it did match */
437				/* could the rest match the rest? */
438				tail = slow(m, rest, stop, es, stopst);
439				if (tail == stop)
440					break;		/* yes! */
441				/* no -- try a shorter match for this one */
442				stp = rest - 1;
443				assert(stp >= sp);	/* it did work */
444			}
445			ssub = ss + 1;
446			esub = ss + OPND(m->g->strip[ss]) - 1;
447			assert(OP(m->g->strip[esub]) == OOR1);
448			for (;;) {	/* find first matching branch */
449				if (slow(m, sp, rest, ssub, esub) == rest)
450					break;	/* it matched all of it */
451				/* that one missed, try next one */
452				assert(OP(m->g->strip[esub]) == OOR1);
453				esub++;
454				assert(OP(m->g->strip[esub]) == OOR2);
455				ssub = esub + 1;
456				esub += OPND(m->g->strip[esub]);
457				if (OP(m->g->strip[esub]) == OOR2)
458					esub--;
459				else
460					assert(OP(m->g->strip[esub]) == O_CH);
461			}
462			dp = dissect(m, sp, rest, ssub, esub);
463			assert(dp == rest);
464			sp = rest;
465			break;
466		case O_PLUS:
467		case O_QUEST:
468		case OOR1:
469		case OOR2:
470		case O_CH:
471			assert(nope);
472			break;
473		case OLPAREN:
474			i = OPND(m->g->strip[ss]);
475			assert(0 < i && i <= m->g->nsub);
476			m->pmatch[i].rm_so = sp - m->offp;
477			break;
478		case ORPAREN:
479			i = OPND(m->g->strip[ss]);
480			assert(0 < i && i <= m->g->nsub);
481			m->pmatch[i].rm_eo = sp - m->offp;
482			break;
483		default:		/* uh oh */
484			assert(nope);
485			break;
486		}
487	}
488
489	assert(sp == stop);
490	return(sp);
491}
492
493/*
494 - backref - figure out what matched what, figuring in back references
495 == static char *backref(register struct match *m, char *start, \
496 ==	char *stop, sopno startst, sopno stopst, sopno lev);
497 */
498static char *			/* == stop (success) or NULL (failure) */
499backref(m, start, stop, startst, stopst, lev)
500register struct match *m;
501char *start;
502char *stop;
503sopno startst;
504sopno stopst;
505sopno lev;			/* PLUS nesting level */
506{
507	register int i;
508	register sopno ss;	/* start sop of current subRE */
509	register char *sp;	/* start of string matched by it */
510	register sopno ssub;	/* start sop of subsubRE */
511	register sopno esub;	/* end sop of subsubRE */
512	register char *ssp;	/* start of string matched by subsubRE */
513	register char *dp;
514	register size_t len;
515	register int hard;
516	register sop s;
517	register regoff_t offsave;
518	register cset *cs;
519
520	AT("back", start, stop, startst, stopst);
521	sp = start;
522
523	/* get as far as we can with easy stuff */
524	hard = 0;
525	for (ss = startst; !hard && ss < stopst; ss++)
526		switch (OP(s = m->g->strip[ss])) {
527		case OCHAR:
528			if (sp == stop || *sp++ != (char)OPND(s))
529				return(NULL);
530			break;
531		case OANY:
532			if (sp == stop)
533				return(NULL);
534			sp++;
535			break;
536		case OANYOF:
537			cs = &m->g->sets[OPND(s)];
538			if (sp == stop || !CHIN(cs, *sp++))
539				return(NULL);
540			break;
541		case OBOL:
542			if ( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
543					(sp < m->endp && *(sp-1) == '\n' &&
544						(m->g->cflags&REG_NEWLINE)) )
545				{ /* yes */ }
546			else
547				return(NULL);
548			break;
549		case OEOL:
550			if ( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
551					(sp < m->endp && *sp == '\n' &&
552						(m->g->cflags&REG_NEWLINE)) )
553				{ /* yes */ }
554			else
555				return(NULL);
556			break;
557		case OBOW:
558			if (( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
559					(sp < m->endp && *(sp-1) == '\n' &&
560						(m->g->cflags&REG_NEWLINE)) ||
561					(sp > m->beginp &&
562							!ISWORD(*(sp-1))) ) &&
563					(sp < m->endp && ISWORD(*sp)) )
564				{ /* yes */ }
565			else
566				return(NULL);
567			break;
568		case OEOW:
569			if (( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
570					(sp < m->endp && *sp == '\n' &&
571						(m->g->cflags&REG_NEWLINE)) ||
572					(sp < m->endp && !ISWORD(*sp)) ) &&
573					(sp > m->beginp && ISWORD(*(sp-1))) )
574				{ /* yes */ }
575			else
576				return(NULL);
577			break;
578		case O_QUEST:
579			break;
580		case OOR1:	/* matches null but needs to skip */
581			ss++;
582			s = m->g->strip[ss];
583			do {
584				assert(OP(s) == OOR2);
585				ss += OPND(s);
586			} while (OP(s = m->g->strip[ss]) != O_CH);
587			/* note that the ss++ gets us past the O_CH */
588			break;
589		default:	/* have to make a choice */
590			hard = 1;
591			break;
592		}
593	if (!hard) {		/* that was it! */
594		if (sp != stop)
595			return(NULL);
596		return(sp);
597	}
598	ss--;			/* adjust for the for's final increment */
599
600	/* the hard stuff */
601	AT("hard", sp, stop, ss, stopst);
602	s = m->g->strip[ss];
603	switch (OP(s)) {
604	case OBACK_:		/* the vilest depths */
605		i = OPND(s);
606		assert(0 < i && i <= m->g->nsub);
607		if (m->pmatch[i].rm_eo == -1)
608			return(NULL);
609		assert(m->pmatch[i].rm_so != -1);
610		len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so;
611		assert(stop - m->beginp >= len);
612		if (sp > stop - len)
613			return(NULL);	/* not enough left to match */
614		ssp = m->offp + m->pmatch[i].rm_so;
615		if (memcmp(sp, ssp, len) != 0)
616			return(NULL);
617		while (m->g->strip[ss] != SOP(O_BACK, i))
618			ss++;
619		return(backref(m, sp+len, stop, ss+1, stopst, lev));
620		break;
621	case OQUEST_:		/* to null or not */
622		dp = backref(m, sp, stop, ss+1, stopst, lev);
623		if (dp != NULL)
624			return(dp);	/* not */
625		return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev));
626		break;
627	case OPLUS_:
628		assert(m->lastpos != NULL);
629		assert(lev+1 <= m->g->nplus);
630		m->lastpos[lev+1] = sp;
631		return(backref(m, sp, stop, ss+1, stopst, lev+1));
632		break;
633	case O_PLUS:
634		if (sp == m->lastpos[lev])	/* last pass matched null */
635			return(backref(m, sp, stop, ss+1, stopst, lev-1));
636		/* try another pass */
637		m->lastpos[lev] = sp;
638		dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev);
639		if (dp == NULL)
640			return(backref(m, sp, stop, ss+1, stopst, lev-1));
641		else
642			return(dp);
643		break;
644	case OCH_:		/* find the right one, if any */
645		ssub = ss + 1;
646		esub = ss + OPND(s) - 1;
647		assert(OP(m->g->strip[esub]) == OOR1);
648		for (;;) {	/* find first matching branch */
649			dp = backref(m, sp, stop, ssub, esub, lev);
650			if (dp != NULL)
651				return(dp);
652			/* that one missed, try next one */
653			if (OP(m->g->strip[esub]) == O_CH)
654				return(NULL);	/* there is none */
655			esub++;
656			assert(OP(m->g->strip[esub]) == OOR2);
657			ssub = esub + 1;
658			esub += OPND(m->g->strip[esub]);
659			if (OP(m->g->strip[esub]) == OOR2)
660				esub--;
661			else
662				assert(OP(m->g->strip[esub]) == O_CH);
663		}
664		break;
665	case OLPAREN:		/* must undo assignment if rest fails */
666		i = OPND(s);
667		assert(0 < i && i <= m->g->nsub);
668		offsave = m->pmatch[i].rm_so;
669		m->pmatch[i].rm_so = sp - m->offp;
670		dp = backref(m, sp, stop, ss+1, stopst, lev);
671		if (dp != NULL)
672			return(dp);
673		m->pmatch[i].rm_so = offsave;
674		return(NULL);
675		break;
676	case ORPAREN:		/* must undo assignment if rest fails */
677		i = OPND(s);
678		assert(0 < i && i <= m->g->nsub);
679		offsave = m->pmatch[i].rm_eo;
680		m->pmatch[i].rm_eo = sp - m->offp;
681		dp = backref(m, sp, stop, ss+1, stopst, lev);
682		if (dp != NULL)
683			return(dp);
684		m->pmatch[i].rm_eo = offsave;
685		return(NULL);
686		break;
687	default:		/* uh oh */
688		assert(nope);
689		break;
690	}
691
692	/* "can't happen" */
693	assert(nope);
694	/* NOTREACHED */
695}
696
697/*
698 - fast - step through the string at top speed
699 == static char *fast(register struct match *m, char *start, \
700 ==	char *stop, sopno startst, sopno stopst);
701 */
702static char *			/* where tentative match ended, or NULL */
703fast(m, start, stop, startst, stopst)
704register struct match *m;
705char *start;
706char *stop;
707sopno startst;
708sopno stopst;
709{
710	register states st = m->st;
711	register states fresh = m->fresh;
712	register states tmp = m->tmp;
713	register char *p = start;
714	register int c = (start == m->beginp) ? OUT : *(start-1);
715	register int lastc;	/* previous c */
716	register int flagch;
717	register int i;
718	register char *coldp;	/* last p after which no match was underway */
719
720	CLEAR(st);
721	SET1(st, startst);
722	st = step(m->g, startst, stopst, st, NOTHING, st);
723	ASSIGN(fresh, st);
724	SP("start", st, *p);
725	coldp = NULL;
726	for (;;) {
727		/* next character */
728		lastc = c;
729		c = (p == m->endp) ? OUT : *p;
730		if (EQ(st, fresh))
731			coldp = p;
732
733		/* is there an EOL and/or BOL between lastc and c? */
734		flagch = '\0';
735		i = 0;
736		if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
737				(lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
738			flagch = BOL;
739			i = m->g->nbol;
740		}
741		if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
742				(c == OUT && !(m->eflags&REG_NOTEOL)) ) {
743			flagch = (flagch == BOL) ? BOLEOL : EOL;
744			i += m->g->neol;
745		}
746		if (i != 0) {
747			for (; i > 0; i--)
748				st = step(m->g, startst, stopst, st, flagch, st);
749			SP("boleol", st, c);
750		}
751
752		/* how about a word boundary? */
753		if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
754					(c != OUT && ISWORD(c)) ) {
755			flagch = BOW;
756		}
757		if ( (lastc != OUT && ISWORD(lastc)) &&
758				(flagch == EOL || (c != OUT && !ISWORD(c))) ) {
759			flagch = EOW;
760		}
761		if (flagch == BOW || flagch == EOW) {
762			st = step(m->g, startst, stopst, st, flagch, st);
763			SP("boweow", st, c);
764		}
765
766		/* are we done? */
767		if (ISSET(st, stopst) || p == stop)
768			break;		/* NOTE BREAK OUT */
769
770		/* no, we must deal with this character */
771		ASSIGN(tmp, st);
772		ASSIGN(st, fresh);
773		assert(c != OUT);
774		st = step(m->g, startst, stopst, tmp, c, st);
775		SP("aft", st, c);
776		assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
777		p++;
778	}
779
780	assert(coldp != NULL);
781	m->coldp = coldp;
782	if (ISSET(st, stopst))
783		return(p+1);
784	else
785		return(NULL);
786}
787
788/*
789 - slow - step through the string more deliberately
790 == static char *slow(register struct match *m, char *start, \
791 ==	char *stop, sopno startst, sopno stopst);
792 */
793static char *			/* where it ended */
794slow(m, start, stop, startst, stopst)
795register struct match *m;
796char *start;
797char *stop;
798sopno startst;
799sopno stopst;
800{
801	register states st = m->st;
802	register states empty = m->empty;
803	register states tmp = m->tmp;
804	register char *p = start;
805	register int c = (start == m->beginp) ? OUT : *(start-1);
806	register int lastc;	/* previous c */
807	register int flagch;
808	register int i;
809	register char *matchp;	/* last p at which a match ended */
810
811	AT("slow", start, stop, startst, stopst);
812	CLEAR(st);
813	SET1(st, startst);
814	SP("sstart", st, *p);
815	st = step(m->g, startst, stopst, st, NOTHING, st);
816	matchp = NULL;
817	for (;;) {
818		/* next character */
819		lastc = c;
820		c = (p == m->endp) ? OUT : *p;
821
822		/* is there an EOL and/or BOL between lastc and c? */
823		flagch = '\0';
824		i = 0;
825		if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
826				(lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
827			flagch = BOL;
828			i = m->g->nbol;
829		}
830		if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
831				(c == OUT && !(m->eflags&REG_NOTEOL)) ) {
832			flagch = (flagch == BOL) ? BOLEOL : EOL;
833			i += m->g->neol;
834		}
835		if (i != 0) {
836			for (; i > 0; i--)
837				st = step(m->g, startst, stopst, st, flagch, st);
838			SP("sboleol", st, c);
839		}
840
841		/* how about a word boundary? */
842		if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
843					(c != OUT && ISWORD(c)) ) {
844			flagch = BOW;
845		}
846		if ( (lastc != OUT && ISWORD(lastc)) &&
847				(flagch == EOL || (c != OUT && !ISWORD(c))) ) {
848			flagch = EOW;
849		}
850		if (flagch == BOW || flagch == EOW) {
851			st = step(m->g, startst, stopst, st, flagch, st);
852			SP("sboweow", st, c);
853		}
854
855		/* are we done? */
856		if (ISSET(st, stopst))
857			matchp = p;
858		if (EQ(st, empty) || p == stop)
859			break;		/* NOTE BREAK OUT */
860
861		/* no, we must deal with this character */
862		ASSIGN(tmp, st);
863		ASSIGN(st, empty);
864		assert(c != OUT);
865		st = step(m->g, startst, stopst, tmp, c, st);
866		SP("saft", st, c);
867		assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
868		p++;
869	}
870
871	return(matchp);
872}
873
874
875/*
876 - step - map set of states reachable before char to set reachable after
877 == static states step(register struct re_guts *g, sopno start, sopno stop, \
878 ==	register states bef, int ch, register states aft);
879 == #define	BOL	(OUT+1)
880 == #define	EOL	(BOL+1)
881 == #define	BOLEOL	(BOL+2)
882 == #define	NOTHING	(BOL+3)
883 == #define	BOW	(BOL+4)
884 == #define	EOW	(BOL+5)
885 == #define	CODEMAX	(BOL+5)		// highest code used
886 == #define	NONCHAR(c)	((c) > CHAR_MAX)
887 == #define	NNONCHAR	(CODEMAX-CHAR_MAX)
888 */
889static states
890step(g, start, stop, bef, ch, aft)
891register struct re_guts *g;
892sopno start;			/* start state within strip */
893sopno stop;			/* state after stop state within strip */
894register states bef;		/* states reachable before */
895int ch;				/* character or NONCHAR code */
896register states aft;		/* states already known reachable after */
897{
898	register cset *cs;
899	register sop s;
900	register sopno pc;
901	register onestate here;		/* note, macros know this name */
902	register sopno look;
903	register int i;
904
905	for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) {
906		s = g->strip[pc];
907		switch (OP(s)) {
908		case OEND:
909			assert(pc == stop-1);
910			break;
911		case OCHAR:
912			/* only characters can match */
913			assert(!NONCHAR(ch) || ch != (char)OPND(s));
914			if (ch == (char)OPND(s))
915				FWD(aft, bef, 1);
916			break;
917		case OBOL:
918			if (ch == BOL || ch == BOLEOL)
919				FWD(aft, bef, 1);
920			break;
921		case OEOL:
922			if (ch == EOL || ch == BOLEOL)
923				FWD(aft, bef, 1);
924			break;
925		case OBOW:
926			if (ch == BOW)
927				FWD(aft, bef, 1);
928			break;
929		case OEOW:
930			if (ch == EOW)
931				FWD(aft, bef, 1);
932			break;
933		case OANY:
934			if (!NONCHAR(ch))
935				FWD(aft, bef, 1);
936			break;
937		case OANYOF:
938			cs = &g->sets[OPND(s)];
939			if (!NONCHAR(ch) && CHIN(cs, ch))
940				FWD(aft, bef, 1);
941			break;
942		case OBACK_:		/* ignored here */
943		case O_BACK:
944			FWD(aft, aft, 1);
945			break;
946		case OPLUS_:		/* forward, this is just an empty */
947			FWD(aft, aft, 1);
948			break;
949		case O_PLUS:		/* both forward and back */
950			FWD(aft, aft, 1);
951			i = ISSETBACK(aft, OPND(s));
952			BACK(aft, aft, OPND(s));
953			if (!i && ISSETBACK(aft, OPND(s))) {
954				/* oho, must reconsider loop body */
955				pc -= OPND(s) + 1;
956				INIT(here, pc);
957			}
958			break;
959		case OQUEST_:		/* two branches, both forward */
960			FWD(aft, aft, 1);
961			FWD(aft, aft, OPND(s));
962			break;
963		case O_QUEST:		/* just an empty */
964			FWD(aft, aft, 1);
965			break;
966		case OLPAREN:		/* not significant here */
967		case ORPAREN:
968			FWD(aft, aft, 1);
969			break;
970		case OCH_:		/* mark the first two branches */
971			FWD(aft, aft, 1);
972			assert(OP(g->strip[pc+OPND(s)]) == OOR2);
973			FWD(aft, aft, OPND(s));
974			break;
975		case OOR1:		/* done a branch, find the O_CH */
976			if (ISSTATEIN(aft, here)) {
977				for (look = 1;
978						OP(s = g->strip[pc+look]) != O_CH;
979						look += OPND(s))
980					assert(OP(s) == OOR2);
981				FWD(aft, aft, look);
982			}
983			break;
984		case OOR2:		/* propagate OCH_'s marking */
985			FWD(aft, aft, 1);
986			if (OP(g->strip[pc+OPND(s)]) != O_CH) {
987				assert(OP(g->strip[pc+OPND(s)]) == OOR2);
988				FWD(aft, aft, OPND(s));
989			}
990			break;
991		case O_CH:		/* just empty */
992			FWD(aft, aft, 1);
993			break;
994		default:		/* ooooops... */
995			assert(nope);
996			break;
997		}
998	}
999
1000	return(aft);
1001}
1002
1003#ifdef REDEBUG
1004/*
1005 - print - print a set of states
1006 == #ifdef REDEBUG
1007 == static void print(struct match *m, char *caption, states st, \
1008 ==	int ch, FILE *d);
1009 == #endif
1010 */
1011static void
1012print(m, caption, st, ch, d)
1013struct match *m;
1014char *caption;
1015states st;
1016int ch;
1017FILE *d;
1018{
1019	register struct re_guts *g = m->g;
1020	register int i;
1021	register int first = 1;
1022
1023	if (!(m->eflags&REG_TRACE))
1024		return;
1025
1026	(void)fprintf(d, "%s", caption);
1027	if (ch != '\0')
1028		(void)fprintf(d, " %s", pchar(ch));
1029	for (i = 0; i < g->nstates; i++)
1030		if (ISSET(st, i)) {
1031			(void)fprintf(d, "%s%d", (first) ? "\t" : ", ", i);
1032			first = 0;
1033		}
1034	(void)fprintf(d, "\n");
1035}
1036
1037/*
1038 - at - print current situation
1039 == #ifdef REDEBUG
1040 == static void at(struct match *m, char *title, char *start, char *stop, \
1041 ==						sopno startst, sopno stopst);
1042 == #endif
1043 */
1044static void
1045at(m, title, start, stop, startst, stopst)
1046struct match *m;
1047char *title;
1048char *start;
1049char *stop;
1050sopno startst;
1051sopno stopst;
1052{
1053	if (!(m->eflags&REG_TRACE))
1054		return;
1055
1056	(void)printf("%s %s-", title, pchar(*start));
1057	(void)printf("%s ", pchar(*stop));
1058	(void)printf("%ld-%ld\n", (long)startst, (long)stopst);
1059}
1060
1061#ifndef PCHARDONE
1062#define	PCHARDONE	/* never again */
1063/*
1064 - pchar - make a character printable
1065 == #ifdef REDEBUG
1066 == static char *pchar(int ch);
1067 == #endif
1068 *
1069 * Is this identical to regchar() over in debug.c?  Well, yes.  But a
1070 * duplicate here avoids having a debugging-capable regexec.o tied to
1071 * a matching debug.o, and this is convenient.  It all disappears in
1072 * the non-debug compilation anyway, so it doesn't matter much.
1073 */
1074static char *			/* -> representation */
1075pchar(ch)
1076int ch;
1077{
1078	static char pbuf[10];
1079
1080	if (isprint(ch) || ch == ' ')
1081		(void)snprintf(pbuf, sizeof pbuf, "%c", ch);
1082	else
1083		(void)snprintf(pbuf, sizeof pbuf, "\\%o", ch);
1084	return(pbuf);
1085}
1086#endif
1087#endif
1088
1089#undef	matcher
1090#undef	fast
1091#undef	slow
1092#undef	dissect
1093#undef	backref
1094#undef	step
1095#undef	print
1096#undef	at
1097#undef	match
1098