engine.c revision 92889
11573Srgrimes/*-
21573Srgrimes * Copyright (c) 1992, 1993, 1994 Henry Spencer.
31573Srgrimes * Copyright (c) 1992, 1993, 1994
41573Srgrimes *	The Regents of the University of California.  All rights reserved.
51573Srgrimes *
61573Srgrimes * This code is derived from software contributed to Berkeley by
71573Srgrimes * Henry Spencer.
81573Srgrimes *
91573Srgrimes * Redistribution and use in source and binary forms, with or without
101573Srgrimes * modification, are permitted provided that the following conditions
111573Srgrimes * are met:
121573Srgrimes * 1. Redistributions of source code must retain the above copyright
131573Srgrimes *    notice, this list of conditions and the following disclaimer.
141573Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
151573Srgrimes *    notice, this list of conditions and the following disclaimer in the
161573Srgrimes *    documentation and/or other materials provided with the distribution.
171573Srgrimes * 3. All advertising materials mentioning features or use of this software
181573Srgrimes *    must display the following acknowledgement:
191573Srgrimes *	This product includes software developed by the University of
201573Srgrimes *	California, Berkeley and its contributors.
211573Srgrimes * 4. Neither the name of the University nor the names of its contributors
221573Srgrimes *    may be used to endorse or promote products derived from this software
231573Srgrimes *    without specific prior written permission.
241573Srgrimes *
251573Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
261573Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
271573Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
281573Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
291573Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
301573Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
311573Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
321573Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
331573Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
341573Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
351573Srgrimes * SUCH DAMAGE.
361573Srgrimes *
371573Srgrimes *	@(#)engine.c	8.5 (Berkeley) 3/20/94
3862232Sdcs *
3962232Sdcs * $FreeBSD: head/lib/libc/regex/engine.c 92889 2002-03-21 18:49:23Z obrien $
401573Srgrimes */
411573Srgrimes
421573Srgrimes/*
431573Srgrimes * The matching engine and friends.  This file is #included by regexec.c
441573Srgrimes * after suitable #defines of a variety of macros used herein, so that
451573Srgrimes * different state representations can be used without duplicating masses
461573Srgrimes * of code.
471573Srgrimes */
481573Srgrimes
491573Srgrimes#ifdef SNAMES
501573Srgrimes#define	matcher	smatcher
511573Srgrimes#define	fast	sfast
521573Srgrimes#define	slow	sslow
531573Srgrimes#define	dissect	sdissect
541573Srgrimes#define	backref	sbackref
551573Srgrimes#define	step	sstep
561573Srgrimes#define	print	sprint
571573Srgrimes#define	at	sat
581573Srgrimes#define	match	smat
591573Srgrimes#endif
601573Srgrimes#ifdef LNAMES
611573Srgrimes#define	matcher	lmatcher
621573Srgrimes#define	fast	lfast
631573Srgrimes#define	slow	lslow
641573Srgrimes#define	dissect	ldissect
651573Srgrimes#define	backref	lbackref
661573Srgrimes#define	step	lstep
671573Srgrimes#define	print	lprint
681573Srgrimes#define	at	lat
691573Srgrimes#define	match	lmat
701573Srgrimes#endif
711573Srgrimes
721573Srgrimes/* another structure passed up and down to avoid zillions of parameters */
731573Srgrimesstruct match {
741573Srgrimes	struct re_guts *g;
751573Srgrimes	int eflags;
761573Srgrimes	regmatch_t *pmatch;	/* [nsub+1] (0 element unused) */
771573Srgrimes	char *offp;		/* offsets work from here */
781573Srgrimes	char *beginp;		/* start of string -- virtual NUL precedes */
791573Srgrimes	char *endp;		/* end of string -- virtual NUL here */
801573Srgrimes	char *coldp;		/* can be no match starting before here */
811573Srgrimes	char **lastpos;		/* [nplus+1] */
821573Srgrimes	STATEVARS;
831573Srgrimes	states st;		/* current states */
841573Srgrimes	states fresh;		/* states for a fresh start */
851573Srgrimes	states tmp;		/* temporary */
861573Srgrimes	states empty;		/* empty set of states */
871573Srgrimes};
881573Srgrimes
891573Srgrimes/* ========= begin header generated by ./mkh ========= */
901573Srgrimes#ifdef __cplusplus
911573Srgrimesextern "C" {
921573Srgrimes#endif
931573Srgrimes
941573Srgrimes/* === engine.c === */
951573Srgrimesstatic int matcher __P((struct re_guts *g, char *string, size_t nmatch, regmatch_t pmatch[], int eflags));
961573Srgrimesstatic char *dissect __P((struct match *m, char *start, char *stop, sopno startst, sopno stopst));
971573Srgrimesstatic char *backref __P((struct match *m, char *start, char *stop, sopno startst, sopno stopst, sopno lev));
981573Srgrimesstatic char *fast __P((struct match *m, char *start, char *stop, sopno startst, sopno stopst));
991573Srgrimesstatic char *slow __P((struct match *m, char *start, char *stop, sopno startst, sopno stopst));
1001573Srgrimesstatic states step __P((struct re_guts *g, sopno start, sopno stop, states bef, int ch, states aft));
1011573Srgrimes#define	BOL	(OUT+1)
1021573Srgrimes#define	EOL	(BOL+1)
1031573Srgrimes#define	BOLEOL	(BOL+2)
1041573Srgrimes#define	NOTHING	(BOL+3)
1051573Srgrimes#define	BOW	(BOL+4)
1061573Srgrimes#define	EOW	(BOL+5)
1071573Srgrimes#define	CODEMAX	(BOL+5)		/* highest code used */
1081573Srgrimes#define	NONCHAR(c)	((c) > CHAR_MAX)
1091573Srgrimes#define	NNONCHAR	(CODEMAX-CHAR_MAX)
1101573Srgrimes#ifdef REDEBUG
1111573Srgrimesstatic void print __P((struct match *m, char *caption, states st, int ch, FILE *d));
1121573Srgrimes#endif
1131573Srgrimes#ifdef REDEBUG
1141573Srgrimesstatic void at __P((struct match *m, char *title, char *start, char *stop, sopno startst, sopno stopst));
1151573Srgrimes#endif
1161573Srgrimes#ifdef REDEBUG
1171573Srgrimesstatic char *pchar __P((int ch));
1181573Srgrimes#endif
1191573Srgrimes
1201573Srgrimes#ifdef __cplusplus
1211573Srgrimes}
1221573Srgrimes#endif
1231573Srgrimes/* ========= end header generated by ./mkh ========= */
1241573Srgrimes
1251573Srgrimes#ifdef REDEBUG
1261573Srgrimes#define	SP(t, s, c)	print(m, t, s, c, stdout)
1271573Srgrimes#define	AT(t, p1, p2, s1, s2)	at(m, t, p1, p2, s1, s2)
1281573Srgrimes#define	NOTE(str)	{ if (m->eflags&REG_TRACE) printf("=%s\n", (str)); }
1291573Srgrimes#else
1301573Srgrimes#define	SP(t, s, c)	/* nothing */
1311573Srgrimes#define	AT(t, p1, p2, s1, s2)	/* nothing */
1321573Srgrimes#define	NOTE(s)	/* nothing */
1331573Srgrimes#endif
1341573Srgrimes
1351573Srgrimes/*
1361573Srgrimes - matcher - the actual matching engine
13792889Sobrien == static int matcher(struct re_guts *g, char *string, \
1381573Srgrimes ==	size_t nmatch, regmatch_t pmatch[], int eflags);
1391573Srgrimes */
1401573Srgrimesstatic int			/* 0 success, REG_NOMATCH failure */
1411573Srgrimesmatcher(g, string, nmatch, pmatch, eflags)
14292889Sobrienstruct re_guts *g;
1431573Srgrimeschar *string;
1441573Srgrimessize_t nmatch;
1451573Srgrimesregmatch_t pmatch[];
1461573Srgrimesint eflags;
1471573Srgrimes{
14892889Sobrien	char *endp;
14992889Sobrien	int i;
1501573Srgrimes	struct match mv;
15192889Sobrien	struct match *m = &mv;
15292889Sobrien	char *dp;
15392889Sobrien	const sopno gf = g->firststate+1;	/* +1 for OEND */
15492889Sobrien	const sopno gl = g->laststate;
1551573Srgrimes	char *start;
1561573Srgrimes	char *stop;
15762232Sdcs	/* Boyer-Moore algorithms variables */
15892889Sobrien	char *pp;
15962232Sdcs	int cj, mj;
16092889Sobrien	char *mustfirst;
16192889Sobrien	char *mustlast;
16292889Sobrien	int *matchjump;
16392889Sobrien	int *charjump;
1641573Srgrimes
1651573Srgrimes	/* simplify the situation where possible */
1661573Srgrimes	if (g->cflags&REG_NOSUB)
1671573Srgrimes		nmatch = 0;
1681573Srgrimes	if (eflags&REG_STARTEND) {
1691573Srgrimes		start = string + pmatch[0].rm_so;
1701573Srgrimes		stop = string + pmatch[0].rm_eo;
1711573Srgrimes	} else {
1721573Srgrimes		start = string;
1731573Srgrimes		stop = start + strlen(start);
1741573Srgrimes	}
1751573Srgrimes	if (stop < start)
1761573Srgrimes		return(REG_INVARG);
1771573Srgrimes
1781573Srgrimes	/* prescreening; this does wonders for this rather slow code */
1791573Srgrimes	if (g->must != NULL) {
18062391Sdcs		if (g->charjump != NULL && g->matchjump != NULL) {
18162232Sdcs			mustfirst = g->must;
18262232Sdcs			mustlast = g->must + g->mlen - 1;
18362232Sdcs			charjump = g->charjump;
18462232Sdcs			matchjump = g->matchjump;
18562232Sdcs			pp = mustlast;
18662754Sdcs			for (dp = start+g->mlen-1; dp < stop;) {
18762232Sdcs				/* Fast skip non-matches */
18862754Sdcs				while (dp < stop && charjump[*dp])
18962754Sdcs					dp += charjump[*dp];
19062232Sdcs
19162754Sdcs				if (dp >= stop)
19262232Sdcs					break;
19362232Sdcs
19462232Sdcs				/* Greedy matcher */
19562232Sdcs				/* We depend on not being used for
19662232Sdcs				 * for strings of length 1
19762232Sdcs				 */
19862754Sdcs				while (*--dp == *--pp && pp != mustfirst);
19962232Sdcs
20062754Sdcs				if (*dp == *pp)
20162232Sdcs					break;
20262232Sdcs
20362232Sdcs				/* Jump to next possible match */
20462232Sdcs				mj = matchjump[pp - mustfirst];
20562754Sdcs				cj = charjump[*dp];
20662754Sdcs				dp += (cj < mj ? mj : cj);
20762754Sdcs				pp = mustlast;
20862232Sdcs			}
20962391Sdcs			if (pp != mustfirst)
21062232Sdcs				return(REG_NOMATCH);
21162232Sdcs		} else {
21262232Sdcs			for (dp = start; dp < stop; dp++)
21362232Sdcs				if (*dp == g->must[0] &&
21462232Sdcs				    stop - dp >= g->mlen &&
21562232Sdcs				    memcmp(dp, g->must, (size_t)g->mlen) == 0)
21662232Sdcs					break;
21762232Sdcs			if (dp == stop)		/* we didn't find g->must */
21862232Sdcs				return(REG_NOMATCH);
21962232Sdcs		}
2201573Srgrimes	}
2211573Srgrimes
2221573Srgrimes	/* match struct setup */
2231573Srgrimes	m->g = g;
2241573Srgrimes	m->eflags = eflags;
2251573Srgrimes	m->pmatch = NULL;
2261573Srgrimes	m->lastpos = NULL;
2271573Srgrimes	m->offp = string;
2281573Srgrimes	m->beginp = start;
2291573Srgrimes	m->endp = stop;
2301573Srgrimes	STATESETUP(m, 4);
2311573Srgrimes	SETUP(m->st);
2321573Srgrimes	SETUP(m->fresh);
2331573Srgrimes	SETUP(m->tmp);
2341573Srgrimes	SETUP(m->empty);
2351573Srgrimes	CLEAR(m->empty);
2361573Srgrimes
23762391Sdcs	/* Adjust start according to moffset, to speed things up */
23862391Sdcs	if (g->moffset > -1)
23962854Sdcs		start = ((dp - g->moffset) < start) ? start : dp - g->moffset;
24062391Sdcs
2411573Srgrimes	/* this loop does only one repetition except for backrefs */
2421573Srgrimes	for (;;) {
2431573Srgrimes		endp = fast(m, start, stop, gf, gl);
2441573Srgrimes		if (endp == NULL) {		/* a miss */
2451573Srgrimes			STATETEARDOWN(m);
2461573Srgrimes			return(REG_NOMATCH);
2471573Srgrimes		}
2481573Srgrimes		if (nmatch == 0 && !g->backrefs)
2491573Srgrimes			break;		/* no further info needed */
2501573Srgrimes
2511573Srgrimes		/* where? */
2521573Srgrimes		assert(m->coldp != NULL);
2531573Srgrimes		for (;;) {
2541573Srgrimes			NOTE("finding start");
2551573Srgrimes			endp = slow(m, m->coldp, stop, gf, gl);
2561573Srgrimes			if (endp != NULL)
2571573Srgrimes				break;
2581573Srgrimes			assert(m->coldp < m->endp);
2591573Srgrimes			m->coldp++;
2601573Srgrimes		}
2611573Srgrimes		if (nmatch == 1 && !g->backrefs)
2621573Srgrimes			break;		/* no further info needed */
2631573Srgrimes
2641573Srgrimes		/* oh my, he wants the subexpressions... */
2651573Srgrimes		if (m->pmatch == NULL)
2661573Srgrimes			m->pmatch = (regmatch_t *)malloc((m->g->nsub + 1) *
2671573Srgrimes							sizeof(regmatch_t));
2681573Srgrimes		if (m->pmatch == NULL) {
2691573Srgrimes			STATETEARDOWN(m);
2701573Srgrimes			return(REG_ESPACE);
2711573Srgrimes		}
2721573Srgrimes		for (i = 1; i <= m->g->nsub; i++)
2731573Srgrimes			m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1;
2741573Srgrimes		if (!g->backrefs && !(m->eflags&REG_BACKR)) {
2751573Srgrimes			NOTE("dissecting");
2761573Srgrimes			dp = dissect(m, m->coldp, endp, gf, gl);
2771573Srgrimes		} else {
2781573Srgrimes			if (g->nplus > 0 && m->lastpos == NULL)
2791573Srgrimes				m->lastpos = (char **)malloc((g->nplus+1) *
2801573Srgrimes							sizeof(char *));
2811573Srgrimes			if (g->nplus > 0 && m->lastpos == NULL) {
2821573Srgrimes				free(m->pmatch);
2831573Srgrimes				STATETEARDOWN(m);
2841573Srgrimes				return(REG_ESPACE);
2851573Srgrimes			}
2861573Srgrimes			NOTE("backref dissect");
2871573Srgrimes			dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
2881573Srgrimes		}
2891573Srgrimes		if (dp != NULL)
2901573Srgrimes			break;
2911573Srgrimes
2921573Srgrimes		/* uh-oh... we couldn't find a subexpression-level match */
2931573Srgrimes		assert(g->backrefs);	/* must be back references doing it */
2941573Srgrimes		assert(g->nplus == 0 || m->lastpos != NULL);
2951573Srgrimes		for (;;) {
2961573Srgrimes			if (dp != NULL || endp <= m->coldp)
2971573Srgrimes				break;		/* defeat */
2981573Srgrimes			NOTE("backoff");
2991573Srgrimes			endp = slow(m, m->coldp, endp-1, gf, gl);
3001573Srgrimes			if (endp == NULL)
3011573Srgrimes				break;		/* defeat */
3021573Srgrimes			/* try it on a shorter possibility */
3031573Srgrimes#ifndef NDEBUG
3041573Srgrimes			for (i = 1; i <= m->g->nsub; i++) {
3051573Srgrimes				assert(m->pmatch[i].rm_so == -1);
3061573Srgrimes				assert(m->pmatch[i].rm_eo == -1);
3071573Srgrimes			}
3081573Srgrimes#endif
3091573Srgrimes			NOTE("backoff dissect");
3101573Srgrimes			dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
3111573Srgrimes		}
3121573Srgrimes		assert(dp == NULL || dp == endp);
3131573Srgrimes		if (dp != NULL)		/* found a shorter one */
3141573Srgrimes			break;
3151573Srgrimes
3161573Srgrimes		/* despite initial appearances, there is no match here */
3171573Srgrimes		NOTE("false alarm");
3181573Srgrimes		start = m->coldp + 1;	/* recycle starting later */
3191573Srgrimes		assert(start <= stop);
3201573Srgrimes	}
3211573Srgrimes
3221573Srgrimes	/* fill in the details if requested */
3231573Srgrimes	if (nmatch > 0) {
3241573Srgrimes		pmatch[0].rm_so = m->coldp - m->offp;
3251573Srgrimes		pmatch[0].rm_eo = endp - m->offp;
3261573Srgrimes	}
3271573Srgrimes	if (nmatch > 1) {
3281573Srgrimes		assert(m->pmatch != NULL);
3291573Srgrimes		for (i = 1; i < nmatch; i++)
3301573Srgrimes			if (i <= m->g->nsub)
3311573Srgrimes				pmatch[i] = m->pmatch[i];
3321573Srgrimes			else {
3331573Srgrimes				pmatch[i].rm_so = -1;
3341573Srgrimes				pmatch[i].rm_eo = -1;
3351573Srgrimes			}
3361573Srgrimes	}
3371573Srgrimes
3381573Srgrimes	if (m->pmatch != NULL)
3391573Srgrimes		free((char *)m->pmatch);
3401573Srgrimes	if (m->lastpos != NULL)
3411573Srgrimes		free((char *)m->lastpos);
3421573Srgrimes	STATETEARDOWN(m);
3431573Srgrimes	return(0);
3441573Srgrimes}
3451573Srgrimes
3461573Srgrimes/*
3471573Srgrimes - dissect - figure out what matched what, no back references
34892889Sobrien == static char *dissect(struct match *m, char *start, \
3491573Srgrimes ==	char *stop, sopno startst, sopno stopst);
3501573Srgrimes */
3511573Srgrimesstatic char *			/* == stop (success) always */
3521573Srgrimesdissect(m, start, stop, startst, stopst)
35392889Sobrienstruct match *m;
3541573Srgrimeschar *start;
3551573Srgrimeschar *stop;
3561573Srgrimessopno startst;
3571573Srgrimessopno stopst;
3581573Srgrimes{
35992889Sobrien	int i;
36092889Sobrien	sopno ss;		/* start sop of current subRE */
36192889Sobrien	sopno es;		/* end sop of current subRE */
36292889Sobrien	char *sp;		/* start of string matched by it */
36392889Sobrien	char *stp;		/* string matched by it cannot pass here */
36492889Sobrien	char *rest;		/* start of rest of string */
36592889Sobrien	char *tail;		/* string unmatched by rest of RE */
36692889Sobrien	sopno ssub;		/* start sop of subsubRE */
36792889Sobrien	sopno esub;		/* end sop of subsubRE */
36892889Sobrien	char *ssp;		/* start of string matched by subsubRE */
36992889Sobrien	char *sep;		/* end of string matched by subsubRE */
37092889Sobrien	char *oldssp;		/* previous ssp */
37192889Sobrien	char *dp;
3721573Srgrimes
3731573Srgrimes	AT("diss", start, stop, startst, stopst);
3741573Srgrimes	sp = start;
3751573Srgrimes	for (ss = startst; ss < stopst; ss = es) {
3761573Srgrimes		/* identify end of subRE */
3771573Srgrimes		es = ss;
3781573Srgrimes		switch (OP(m->g->strip[es])) {
3791573Srgrimes		case OPLUS_:
3801573Srgrimes		case OQUEST_:
3811573Srgrimes			es += OPND(m->g->strip[es]);
3821573Srgrimes			break;
3831573Srgrimes		case OCH_:
3841573Srgrimes			while (OP(m->g->strip[es]) != O_CH)
3851573Srgrimes				es += OPND(m->g->strip[es]);
3861573Srgrimes			break;
3871573Srgrimes		}
3881573Srgrimes		es++;
3891573Srgrimes
3901573Srgrimes		/* figure out what it matched */
3911573Srgrimes		switch (OP(m->g->strip[ss])) {
3921573Srgrimes		case OEND:
3931573Srgrimes			assert(nope);
3941573Srgrimes			break;
3951573Srgrimes		case OCHAR:
3961573Srgrimes			sp++;
3971573Srgrimes			break;
3981573Srgrimes		case OBOL:
3991573Srgrimes		case OEOL:
4001573Srgrimes		case OBOW:
4011573Srgrimes		case OEOW:
4021573Srgrimes			break;
4031573Srgrimes		case OANY:
4041573Srgrimes		case OANYOF:
4051573Srgrimes			sp++;
4061573Srgrimes			break;
4071573Srgrimes		case OBACK_:
4081573Srgrimes		case O_BACK:
4091573Srgrimes			assert(nope);
4101573Srgrimes			break;
4111573Srgrimes		/* cases where length of match is hard to find */
4121573Srgrimes		case OQUEST_:
4131573Srgrimes			stp = stop;
4141573Srgrimes			for (;;) {
4151573Srgrimes				/* how long could this one be? */
4161573Srgrimes				rest = slow(m, sp, stp, ss, es);
4171573Srgrimes				assert(rest != NULL);	/* it did match */
4181573Srgrimes				/* could the rest match the rest? */
4191573Srgrimes				tail = slow(m, rest, stop, es, stopst);
4201573Srgrimes				if (tail == stop)
4211573Srgrimes					break;		/* yes! */
4221573Srgrimes				/* no -- try a shorter match for this one */
4231573Srgrimes				stp = rest - 1;
4241573Srgrimes				assert(stp >= sp);	/* it did work */
4251573Srgrimes			}
4261573Srgrimes			ssub = ss + 1;
4271573Srgrimes			esub = es - 1;
4281573Srgrimes			/* did innards match? */
4291573Srgrimes			if (slow(m, sp, rest, ssub, esub) != NULL) {
4301573Srgrimes				dp = dissect(m, sp, rest, ssub, esub);
4311573Srgrimes				assert(dp == rest);
4321573Srgrimes			} else		/* no */
4331573Srgrimes				assert(sp == rest);
4341573Srgrimes			sp = rest;
4351573Srgrimes			break;
4361573Srgrimes		case OPLUS_:
4371573Srgrimes			stp = stop;
4381573Srgrimes			for (;;) {
4391573Srgrimes				/* how long could this one be? */
4401573Srgrimes				rest = slow(m, sp, stp, ss, es);
4411573Srgrimes				assert(rest != NULL);	/* it did match */
4421573Srgrimes				/* could the rest match the rest? */
4431573Srgrimes				tail = slow(m, rest, stop, es, stopst);
4441573Srgrimes				if (tail == stop)
4451573Srgrimes					break;		/* yes! */
4461573Srgrimes				/* no -- try a shorter match for this one */
4471573Srgrimes				stp = rest - 1;
4481573Srgrimes				assert(stp >= sp);	/* it did work */
4491573Srgrimes			}
4501573Srgrimes			ssub = ss + 1;
4511573Srgrimes			esub = es - 1;
4521573Srgrimes			ssp = sp;
4531573Srgrimes			oldssp = ssp;
4541573Srgrimes			for (;;) {	/* find last match of innards */
4551573Srgrimes				sep = slow(m, ssp, rest, ssub, esub);
4561573Srgrimes				if (sep == NULL || sep == ssp)
4571573Srgrimes					break;	/* failed or matched null */
4581573Srgrimes				oldssp = ssp;	/* on to next try */
4591573Srgrimes				ssp = sep;
4601573Srgrimes			}
4611573Srgrimes			if (sep == NULL) {
4621573Srgrimes				/* last successful match */
4631573Srgrimes				sep = ssp;
4641573Srgrimes				ssp = oldssp;
4651573Srgrimes			}
4661573Srgrimes			assert(sep == rest);	/* must exhaust substring */
4671573Srgrimes			assert(slow(m, ssp, sep, ssub, esub) == rest);
4681573Srgrimes			dp = dissect(m, ssp, sep, ssub, esub);
4691573Srgrimes			assert(dp == sep);
4701573Srgrimes			sp = rest;
4711573Srgrimes			break;
4721573Srgrimes		case OCH_:
4731573Srgrimes			stp = stop;
4741573Srgrimes			for (;;) {
4751573Srgrimes				/* how long could this one be? */
4761573Srgrimes				rest = slow(m, sp, stp, ss, es);
4771573Srgrimes				assert(rest != NULL);	/* it did match */
4781573Srgrimes				/* could the rest match the rest? */
4791573Srgrimes				tail = slow(m, rest, stop, es, stopst);
4801573Srgrimes				if (tail == stop)
4811573Srgrimes					break;		/* yes! */
4821573Srgrimes				/* no -- try a shorter match for this one */
4831573Srgrimes				stp = rest - 1;
4841573Srgrimes				assert(stp >= sp);	/* it did work */
4851573Srgrimes			}
4861573Srgrimes			ssub = ss + 1;
4871573Srgrimes			esub = ss + OPND(m->g->strip[ss]) - 1;
4881573Srgrimes			assert(OP(m->g->strip[esub]) == OOR1);
4891573Srgrimes			for (;;) {	/* find first matching branch */
4901573Srgrimes				if (slow(m, sp, rest, ssub, esub) == rest)
4911573Srgrimes					break;	/* it matched all of it */
4921573Srgrimes				/* that one missed, try next one */
4931573Srgrimes				assert(OP(m->g->strip[esub]) == OOR1);
4941573Srgrimes				esub++;
4951573Srgrimes				assert(OP(m->g->strip[esub]) == OOR2);
4961573Srgrimes				ssub = esub + 1;
4971573Srgrimes				esub += OPND(m->g->strip[esub]);
4981573Srgrimes				if (OP(m->g->strip[esub]) == OOR2)
4991573Srgrimes					esub--;
5001573Srgrimes				else
5011573Srgrimes					assert(OP(m->g->strip[esub]) == O_CH);
5021573Srgrimes			}
5031573Srgrimes			dp = dissect(m, sp, rest, ssub, esub);
5041573Srgrimes			assert(dp == rest);
5051573Srgrimes			sp = rest;
5061573Srgrimes			break;
5071573Srgrimes		case O_PLUS:
5081573Srgrimes		case O_QUEST:
5091573Srgrimes		case OOR1:
5101573Srgrimes		case OOR2:
5111573Srgrimes		case O_CH:
5121573Srgrimes			assert(nope);
5131573Srgrimes			break;
5141573Srgrimes		case OLPAREN:
5151573Srgrimes			i = OPND(m->g->strip[ss]);
5161573Srgrimes			assert(0 < i && i <= m->g->nsub);
5171573Srgrimes			m->pmatch[i].rm_so = sp - m->offp;
5181573Srgrimes			break;
5191573Srgrimes		case ORPAREN:
5201573Srgrimes			i = OPND(m->g->strip[ss]);
5211573Srgrimes			assert(0 < i && i <= m->g->nsub);
5221573Srgrimes			m->pmatch[i].rm_eo = sp - m->offp;
5231573Srgrimes			break;
5241573Srgrimes		default:		/* uh oh */
5251573Srgrimes			assert(nope);
5261573Srgrimes			break;
5271573Srgrimes		}
5281573Srgrimes	}
5291573Srgrimes
5301573Srgrimes	assert(sp == stop);
5311573Srgrimes	return(sp);
5321573Srgrimes}
5331573Srgrimes
5341573Srgrimes/*
5351573Srgrimes - backref - figure out what matched what, figuring in back references
53692889Sobrien == static char *backref(struct match *m, char *start, \
5371573Srgrimes ==	char *stop, sopno startst, sopno stopst, sopno lev);
5381573Srgrimes */
5391573Srgrimesstatic char *			/* == stop (success) or NULL (failure) */
5401573Srgrimesbackref(m, start, stop, startst, stopst, lev)
54192889Sobrienstruct match *m;
5421573Srgrimeschar *start;
5431573Srgrimeschar *stop;
5441573Srgrimessopno startst;
5451573Srgrimessopno stopst;
5461573Srgrimessopno lev;			/* PLUS nesting level */
5471573Srgrimes{
54892889Sobrien	int i;
54992889Sobrien	sopno ss;		/* start sop of current subRE */
55092889Sobrien	char *sp;		/* start of string matched by it */
55192889Sobrien	sopno ssub;		/* start sop of subsubRE */
55292889Sobrien	sopno esub;		/* end sop of subsubRE */
55392889Sobrien	char *ssp;		/* start of string matched by subsubRE */
55492889Sobrien	char *dp;
55592889Sobrien	size_t len;
55692889Sobrien	int hard;
55792889Sobrien	sop s;
55892889Sobrien	regoff_t offsave;
55992889Sobrien	cset *cs;
5601573Srgrimes
5611573Srgrimes	AT("back", start, stop, startst, stopst);
5621573Srgrimes	sp = start;
5631573Srgrimes
5641573Srgrimes	/* get as far as we can with easy stuff */
5651573Srgrimes	hard = 0;
5661573Srgrimes	for (ss = startst; !hard && ss < stopst; ss++)
5671573Srgrimes		switch (OP(s = m->g->strip[ss])) {
5681573Srgrimes		case OCHAR:
5691573Srgrimes			if (sp == stop || *sp++ != (char)OPND(s))
5701573Srgrimes				return(NULL);
5711573Srgrimes			break;
5721573Srgrimes		case OANY:
5731573Srgrimes			if (sp == stop)
5741573Srgrimes				return(NULL);
5751573Srgrimes			sp++;
5761573Srgrimes			break;
5771573Srgrimes		case OANYOF:
5781573Srgrimes			cs = &m->g->sets[OPND(s)];
5791573Srgrimes			if (sp == stop || !CHIN(cs, *sp++))
5801573Srgrimes				return(NULL);
5811573Srgrimes			break;
5821573Srgrimes		case OBOL:
5831573Srgrimes			if ( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
5841573Srgrimes					(sp < m->endp && *(sp-1) == '\n' &&
5851573Srgrimes						(m->g->cflags&REG_NEWLINE)) )
5861573Srgrimes				{ /* yes */ }
5871573Srgrimes			else
5881573Srgrimes				return(NULL);
5891573Srgrimes			break;
5901573Srgrimes		case OEOL:
5911573Srgrimes			if ( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
5921573Srgrimes					(sp < m->endp && *sp == '\n' &&
5931573Srgrimes						(m->g->cflags&REG_NEWLINE)) )
5941573Srgrimes				{ /* yes */ }
5951573Srgrimes			else
5961573Srgrimes				return(NULL);
5971573Srgrimes			break;
5981573Srgrimes		case OBOW:
5991573Srgrimes			if (( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
6001573Srgrimes					(sp < m->endp && *(sp-1) == '\n' &&
6011573Srgrimes						(m->g->cflags&REG_NEWLINE)) ||
6021573Srgrimes					(sp > m->beginp &&
6031573Srgrimes							!ISWORD(*(sp-1))) ) &&
6041573Srgrimes					(sp < m->endp && ISWORD(*sp)) )
6051573Srgrimes				{ /* yes */ }
6061573Srgrimes			else
6071573Srgrimes				return(NULL);
6081573Srgrimes			break;
6091573Srgrimes		case OEOW:
6101573Srgrimes			if (( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
6111573Srgrimes					(sp < m->endp && *sp == '\n' &&
6121573Srgrimes						(m->g->cflags&REG_NEWLINE)) ||
6131573Srgrimes					(sp < m->endp && !ISWORD(*sp)) ) &&
6141573Srgrimes					(sp > m->beginp && ISWORD(*(sp-1))) )
6151573Srgrimes				{ /* yes */ }
6161573Srgrimes			else
6171573Srgrimes				return(NULL);
6181573Srgrimes			break;
6191573Srgrimes		case O_QUEST:
6201573Srgrimes			break;
6211573Srgrimes		case OOR1:	/* matches null but needs to skip */
6221573Srgrimes			ss++;
6231573Srgrimes			s = m->g->strip[ss];
6241573Srgrimes			do {
6251573Srgrimes				assert(OP(s) == OOR2);
6261573Srgrimes				ss += OPND(s);
6271573Srgrimes			} while (OP(s = m->g->strip[ss]) != O_CH);
6281573Srgrimes			/* note that the ss++ gets us past the O_CH */
6291573Srgrimes			break;
6301573Srgrimes		default:	/* have to make a choice */
6311573Srgrimes			hard = 1;
6321573Srgrimes			break;
6331573Srgrimes		}
6341573Srgrimes	if (!hard) {		/* that was it! */
6351573Srgrimes		if (sp != stop)
6361573Srgrimes			return(NULL);
6371573Srgrimes		return(sp);
6381573Srgrimes	}
6391573Srgrimes	ss--;			/* adjust for the for's final increment */
6401573Srgrimes
6411573Srgrimes	/* the hard stuff */
6421573Srgrimes	AT("hard", sp, stop, ss, stopst);
6431573Srgrimes	s = m->g->strip[ss];
6441573Srgrimes	switch (OP(s)) {
6451573Srgrimes	case OBACK_:		/* the vilest depths */
6461573Srgrimes		i = OPND(s);
6471573Srgrimes		assert(0 < i && i <= m->g->nsub);
6481573Srgrimes		if (m->pmatch[i].rm_eo == -1)
6491573Srgrimes			return(NULL);
6501573Srgrimes		assert(m->pmatch[i].rm_so != -1);
6511573Srgrimes		len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so;
6521573Srgrimes		assert(stop - m->beginp >= len);
6531573Srgrimes		if (sp > stop - len)
6541573Srgrimes			return(NULL);	/* not enough left to match */
6551573Srgrimes		ssp = m->offp + m->pmatch[i].rm_so;
6561573Srgrimes		if (memcmp(sp, ssp, len) != 0)
6571573Srgrimes			return(NULL);
6581573Srgrimes		while (m->g->strip[ss] != SOP(O_BACK, i))
6591573Srgrimes			ss++;
6601573Srgrimes		return(backref(m, sp+len, stop, ss+1, stopst, lev));
6611573Srgrimes		break;
6621573Srgrimes	case OQUEST_:		/* to null or not */
6631573Srgrimes		dp = backref(m, sp, stop, ss+1, stopst, lev);
6641573Srgrimes		if (dp != NULL)
6651573Srgrimes			return(dp);	/* not */
6661573Srgrimes		return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev));
6671573Srgrimes		break;
6681573Srgrimes	case OPLUS_:
6691573Srgrimes		assert(m->lastpos != NULL);
6701573Srgrimes		assert(lev+1 <= m->g->nplus);
6711573Srgrimes		m->lastpos[lev+1] = sp;
6721573Srgrimes		return(backref(m, sp, stop, ss+1, stopst, lev+1));
6731573Srgrimes		break;
6741573Srgrimes	case O_PLUS:
6751573Srgrimes		if (sp == m->lastpos[lev])	/* last pass matched null */
6761573Srgrimes			return(backref(m, sp, stop, ss+1, stopst, lev-1));
6771573Srgrimes		/* try another pass */
6781573Srgrimes		m->lastpos[lev] = sp;
6791573Srgrimes		dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev);
6801573Srgrimes		if (dp == NULL)
6811573Srgrimes			return(backref(m, sp, stop, ss+1, stopst, lev-1));
6821573Srgrimes		else
6831573Srgrimes			return(dp);
6841573Srgrimes		break;
6851573Srgrimes	case OCH_:		/* find the right one, if any */
6861573Srgrimes		ssub = ss + 1;
6871573Srgrimes		esub = ss + OPND(s) - 1;
6881573Srgrimes		assert(OP(m->g->strip[esub]) == OOR1);
6891573Srgrimes		for (;;) {	/* find first matching branch */
6901573Srgrimes			dp = backref(m, sp, stop, ssub, esub, lev);
6911573Srgrimes			if (dp != NULL)
6921573Srgrimes				return(dp);
6931573Srgrimes			/* that one missed, try next one */
6941573Srgrimes			if (OP(m->g->strip[esub]) == O_CH)
6951573Srgrimes				return(NULL);	/* there is none */
6961573Srgrimes			esub++;
6971573Srgrimes			assert(OP(m->g->strip[esub]) == OOR2);
6981573Srgrimes			ssub = esub + 1;
6991573Srgrimes			esub += OPND(m->g->strip[esub]);
7001573Srgrimes			if (OP(m->g->strip[esub]) == OOR2)
7011573Srgrimes				esub--;
7021573Srgrimes			else
7031573Srgrimes				assert(OP(m->g->strip[esub]) == O_CH);
7041573Srgrimes		}
7051573Srgrimes		break;
7061573Srgrimes	case OLPAREN:		/* must undo assignment if rest fails */
7071573Srgrimes		i = OPND(s);
7081573Srgrimes		assert(0 < i && i <= m->g->nsub);
7091573Srgrimes		offsave = m->pmatch[i].rm_so;
7101573Srgrimes		m->pmatch[i].rm_so = sp - m->offp;
7111573Srgrimes		dp = backref(m, sp, stop, ss+1, stopst, lev);
7121573Srgrimes		if (dp != NULL)
7131573Srgrimes			return(dp);
7141573Srgrimes		m->pmatch[i].rm_so = offsave;
7151573Srgrimes		return(NULL);
7161573Srgrimes		break;
7171573Srgrimes	case ORPAREN:		/* must undo assignment if rest fails */
7181573Srgrimes		i = OPND(s);
7191573Srgrimes		assert(0 < i && i <= m->g->nsub);
7201573Srgrimes		offsave = m->pmatch[i].rm_eo;
7211573Srgrimes		m->pmatch[i].rm_eo = sp - m->offp;
7221573Srgrimes		dp = backref(m, sp, stop, ss+1, stopst, lev);
7231573Srgrimes		if (dp != NULL)
7241573Srgrimes			return(dp);
7251573Srgrimes		m->pmatch[i].rm_eo = offsave;
7261573Srgrimes		return(NULL);
7271573Srgrimes		break;
7281573Srgrimes	default:		/* uh oh */
7291573Srgrimes		assert(nope);
7301573Srgrimes		break;
7311573Srgrimes	}
7321573Srgrimes
7331573Srgrimes	/* "can't happen" */
7341573Srgrimes	assert(nope);
7351573Srgrimes	/* NOTREACHED */
73611664Sphk	return "shut up gcc";
7371573Srgrimes}
7381573Srgrimes
7391573Srgrimes/*
7401573Srgrimes - fast - step through the string at top speed
74192889Sobrien == static char *fast(struct match *m, char *start, \
7421573Srgrimes ==	char *stop, sopno startst, sopno stopst);
7431573Srgrimes */
7441573Srgrimesstatic char *			/* where tentative match ended, or NULL */
7451573Srgrimesfast(m, start, stop, startst, stopst)
74692889Sobrienstruct match *m;
7471573Srgrimeschar *start;
7481573Srgrimeschar *stop;
7491573Srgrimessopno startst;
7501573Srgrimessopno stopst;
7511573Srgrimes{
75292889Sobrien	states st = m->st;
75392889Sobrien	states fresh = m->fresh;
75492889Sobrien	states tmp = m->tmp;
75592889Sobrien	char *p = start;
75692889Sobrien	int c = (start == m->beginp) ? OUT : *(start-1);
75792889Sobrien	int lastc;		/* previous c */
75892889Sobrien	int flagch;
75992889Sobrien	int i;
76092889Sobrien	char *coldp;		/* last p after which no match was underway */
7611573Srgrimes
7621573Srgrimes	CLEAR(st);
7631573Srgrimes	SET1(st, startst);
7641573Srgrimes	st = step(m->g, startst, stopst, st, NOTHING, st);
7651573Srgrimes	ASSIGN(fresh, st);
7661573Srgrimes	SP("start", st, *p);
7671573Srgrimes	coldp = NULL;
7681573Srgrimes	for (;;) {
7691573Srgrimes		/* next character */
7701573Srgrimes		lastc = c;
7711573Srgrimes		c = (p == m->endp) ? OUT : *p;
7721573Srgrimes		if (EQ(st, fresh))
7731573Srgrimes			coldp = p;
7741573Srgrimes
7751573Srgrimes		/* is there an EOL and/or BOL between lastc and c? */
7761573Srgrimes		flagch = '\0';
7771573Srgrimes		i = 0;
7781573Srgrimes		if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
7791573Srgrimes				(lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
7801573Srgrimes			flagch = BOL;
7811573Srgrimes			i = m->g->nbol;
7821573Srgrimes		}
7831573Srgrimes		if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
7841573Srgrimes				(c == OUT && !(m->eflags&REG_NOTEOL)) ) {
7851573Srgrimes			flagch = (flagch == BOL) ? BOLEOL : EOL;
7861573Srgrimes			i += m->g->neol;
7871573Srgrimes		}
7881573Srgrimes		if (i != 0) {
7891573Srgrimes			for (; i > 0; i--)
7901573Srgrimes				st = step(m->g, startst, stopst, st, flagch, st);
7911573Srgrimes			SP("boleol", st, c);
7921573Srgrimes		}
7931573Srgrimes
7941573Srgrimes		/* how about a word boundary? */
7951573Srgrimes		if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
7961573Srgrimes					(c != OUT && ISWORD(c)) ) {
7971573Srgrimes			flagch = BOW;
7981573Srgrimes		}
7991573Srgrimes		if ( (lastc != OUT && ISWORD(lastc)) &&
8001573Srgrimes				(flagch == EOL || (c != OUT && !ISWORD(c))) ) {
8011573Srgrimes			flagch = EOW;
8021573Srgrimes		}
8031573Srgrimes		if (flagch == BOW || flagch == EOW) {
8041573Srgrimes			st = step(m->g, startst, stopst, st, flagch, st);
8051573Srgrimes			SP("boweow", st, c);
8061573Srgrimes		}
8071573Srgrimes
8081573Srgrimes		/* are we done? */
8091573Srgrimes		if (ISSET(st, stopst) || p == stop)
8101573Srgrimes			break;		/* NOTE BREAK OUT */
8111573Srgrimes
8121573Srgrimes		/* no, we must deal with this character */
8131573Srgrimes		ASSIGN(tmp, st);
8141573Srgrimes		ASSIGN(st, fresh);
8151573Srgrimes		assert(c != OUT);
8161573Srgrimes		st = step(m->g, startst, stopst, tmp, c, st);
8171573Srgrimes		SP("aft", st, c);
8181573Srgrimes		assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
8191573Srgrimes		p++;
8201573Srgrimes	}
8211573Srgrimes
8221573Srgrimes	assert(coldp != NULL);
8231573Srgrimes	m->coldp = coldp;
8241573Srgrimes	if (ISSET(st, stopst))
8251573Srgrimes		return(p+1);
8261573Srgrimes	else
8271573Srgrimes		return(NULL);
8281573Srgrimes}
8291573Srgrimes
8301573Srgrimes/*
8311573Srgrimes - slow - step through the string more deliberately
83292889Sobrien == static char *slow(struct match *m, char *start, \
8331573Srgrimes ==	char *stop, sopno startst, sopno stopst);
8341573Srgrimes */
8351573Srgrimesstatic char *			/* where it ended */
8361573Srgrimesslow(m, start, stop, startst, stopst)
83792889Sobrienstruct match *m;
8381573Srgrimeschar *start;
8391573Srgrimeschar *stop;
8401573Srgrimessopno startst;
8411573Srgrimessopno stopst;
8421573Srgrimes{
84392889Sobrien	states st = m->st;
84492889Sobrien	states empty = m->empty;
84592889Sobrien	states tmp = m->tmp;
84692889Sobrien	char *p = start;
84792889Sobrien	int c = (start == m->beginp) ? OUT : *(start-1);
84892889Sobrien	int lastc;		/* previous c */
84992889Sobrien	int flagch;
85092889Sobrien	int i;
85192889Sobrien	char *matchp;		/* last p at which a match ended */
8521573Srgrimes
8531573Srgrimes	AT("slow", start, stop, startst, stopst);
8541573Srgrimes	CLEAR(st);
8551573Srgrimes	SET1(st, startst);
8561573Srgrimes	SP("sstart", st, *p);
8571573Srgrimes	st = step(m->g, startst, stopst, st, NOTHING, st);
8581573Srgrimes	matchp = NULL;
8591573Srgrimes	for (;;) {
8601573Srgrimes		/* next character */
8611573Srgrimes		lastc = c;
8621573Srgrimes		c = (p == m->endp) ? OUT : *p;
8631573Srgrimes
8641573Srgrimes		/* is there an EOL and/or BOL between lastc and c? */
8651573Srgrimes		flagch = '\0';
8661573Srgrimes		i = 0;
8671573Srgrimes		if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
8681573Srgrimes				(lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
8691573Srgrimes			flagch = BOL;
8701573Srgrimes			i = m->g->nbol;
8711573Srgrimes		}
8721573Srgrimes		if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
8731573Srgrimes				(c == OUT && !(m->eflags&REG_NOTEOL)) ) {
8741573Srgrimes			flagch = (flagch == BOL) ? BOLEOL : EOL;
8751573Srgrimes			i += m->g->neol;
8761573Srgrimes		}
8771573Srgrimes		if (i != 0) {
8781573Srgrimes			for (; i > 0; i--)
8791573Srgrimes				st = step(m->g, startst, stopst, st, flagch, st);
8801573Srgrimes			SP("sboleol", st, c);
8811573Srgrimes		}
8821573Srgrimes
8831573Srgrimes		/* how about a word boundary? */
8841573Srgrimes		if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
8851573Srgrimes					(c != OUT && ISWORD(c)) ) {
8861573Srgrimes			flagch = BOW;
8871573Srgrimes		}
8881573Srgrimes		if ( (lastc != OUT && ISWORD(lastc)) &&
8891573Srgrimes				(flagch == EOL || (c != OUT && !ISWORD(c))) ) {
8901573Srgrimes			flagch = EOW;
8911573Srgrimes		}
8921573Srgrimes		if (flagch == BOW || flagch == EOW) {
8931573Srgrimes			st = step(m->g, startst, stopst, st, flagch, st);
8941573Srgrimes			SP("sboweow", st, c);
8951573Srgrimes		}
8961573Srgrimes
8971573Srgrimes		/* are we done? */
8981573Srgrimes		if (ISSET(st, stopst))
8991573Srgrimes			matchp = p;
9001573Srgrimes		if (EQ(st, empty) || p == stop)
9011573Srgrimes			break;		/* NOTE BREAK OUT */
9021573Srgrimes
9031573Srgrimes		/* no, we must deal with this character */
9041573Srgrimes		ASSIGN(tmp, st);
9051573Srgrimes		ASSIGN(st, empty);
9061573Srgrimes		assert(c != OUT);
9071573Srgrimes		st = step(m->g, startst, stopst, tmp, c, st);
9081573Srgrimes		SP("saft", st, c);
9091573Srgrimes		assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
9101573Srgrimes		p++;
9111573Srgrimes	}
9121573Srgrimes
9131573Srgrimes	return(matchp);
9141573Srgrimes}
9151573Srgrimes
9161573Srgrimes
9171573Srgrimes/*
9181573Srgrimes - step - map set of states reachable before char to set reachable after
91992889Sobrien == static states step(struct re_guts *g, sopno start, sopno stop, \
92092889Sobrien ==	states bef, int ch, states aft);
9211573Srgrimes == #define	BOL	(OUT+1)
9221573Srgrimes == #define	EOL	(BOL+1)
9231573Srgrimes == #define	BOLEOL	(BOL+2)
9241573Srgrimes == #define	NOTHING	(BOL+3)
9251573Srgrimes == #define	BOW	(BOL+4)
9261573Srgrimes == #define	EOW	(BOL+5)
9271573Srgrimes == #define	CODEMAX	(BOL+5)		// highest code used
9281573Srgrimes == #define	NONCHAR(c)	((c) > CHAR_MAX)
9291573Srgrimes == #define	NNONCHAR	(CODEMAX-CHAR_MAX)
9301573Srgrimes */
9311573Srgrimesstatic states
9321573Srgrimesstep(g, start, stop, bef, ch, aft)
93392889Sobrienstruct re_guts *g;
9341573Srgrimessopno start;			/* start state within strip */
9351573Srgrimessopno stop;			/* state after stop state within strip */
93692889Sobrienstates bef;			/* states reachable before */
9371573Srgrimesint ch;				/* character or NONCHAR code */
93892889Sobrienstates aft;			/* states already known reachable after */
9391573Srgrimes{
94092889Sobrien	cset *cs;
94192889Sobrien	sop s;
94292889Sobrien	sopno pc;
94392889Sobrien	onestate here;		/* note, macros know this name */
94492889Sobrien	sopno look;
94592889Sobrien	int i;
9461573Srgrimes
9471573Srgrimes	for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) {
9481573Srgrimes		s = g->strip[pc];
9491573Srgrimes		switch (OP(s)) {
9501573Srgrimes		case OEND:
9511573Srgrimes			assert(pc == stop-1);
9521573Srgrimes			break;
9531573Srgrimes		case OCHAR:
9541573Srgrimes			/* only characters can match */
9551573Srgrimes			assert(!NONCHAR(ch) || ch != (char)OPND(s));
9561573Srgrimes			if (ch == (char)OPND(s))
9571573Srgrimes				FWD(aft, bef, 1);
9581573Srgrimes			break;
9591573Srgrimes		case OBOL:
9601573Srgrimes			if (ch == BOL || ch == BOLEOL)
9611573Srgrimes				FWD(aft, bef, 1);
9621573Srgrimes			break;
9631573Srgrimes		case OEOL:
9641573Srgrimes			if (ch == EOL || ch == BOLEOL)
9651573Srgrimes				FWD(aft, bef, 1);
9661573Srgrimes			break;
9671573Srgrimes		case OBOW:
9681573Srgrimes			if (ch == BOW)
9691573Srgrimes				FWD(aft, bef, 1);
9701573Srgrimes			break;
9711573Srgrimes		case OEOW:
9721573Srgrimes			if (ch == EOW)
9731573Srgrimes				FWD(aft, bef, 1);
9741573Srgrimes			break;
9751573Srgrimes		case OANY:
9761573Srgrimes			if (!NONCHAR(ch))
9771573Srgrimes				FWD(aft, bef, 1);
9781573Srgrimes			break;
9791573Srgrimes		case OANYOF:
9801573Srgrimes			cs = &g->sets[OPND(s)];
9811573Srgrimes			if (!NONCHAR(ch) && CHIN(cs, ch))
9821573Srgrimes				FWD(aft, bef, 1);
9831573Srgrimes			break;
9841573Srgrimes		case OBACK_:		/* ignored here */
9851573Srgrimes		case O_BACK:
9861573Srgrimes			FWD(aft, aft, 1);
9871573Srgrimes			break;
9881573Srgrimes		case OPLUS_:		/* forward, this is just an empty */
9891573Srgrimes			FWD(aft, aft, 1);
9901573Srgrimes			break;
9911573Srgrimes		case O_PLUS:		/* both forward and back */
9921573Srgrimes			FWD(aft, aft, 1);
9931573Srgrimes			i = ISSETBACK(aft, OPND(s));
9941573Srgrimes			BACK(aft, aft, OPND(s));
9951573Srgrimes			if (!i && ISSETBACK(aft, OPND(s))) {
9961573Srgrimes				/* oho, must reconsider loop body */
9971573Srgrimes				pc -= OPND(s) + 1;
9981573Srgrimes				INIT(here, pc);
9991573Srgrimes			}
10001573Srgrimes			break;
10011573Srgrimes		case OQUEST_:		/* two branches, both forward */
10021573Srgrimes			FWD(aft, aft, 1);
10031573Srgrimes			FWD(aft, aft, OPND(s));
10041573Srgrimes			break;
10051573Srgrimes		case O_QUEST:		/* just an empty */
10061573Srgrimes			FWD(aft, aft, 1);
10071573Srgrimes			break;
10081573Srgrimes		case OLPAREN:		/* not significant here */
10091573Srgrimes		case ORPAREN:
10101573Srgrimes			FWD(aft, aft, 1);
10111573Srgrimes			break;
10121573Srgrimes		case OCH_:		/* mark the first two branches */
10131573Srgrimes			FWD(aft, aft, 1);
10141573Srgrimes			assert(OP(g->strip[pc+OPND(s)]) == OOR2);
10151573Srgrimes			FWD(aft, aft, OPND(s));
10161573Srgrimes			break;
10171573Srgrimes		case OOR1:		/* done a branch, find the O_CH */
10181573Srgrimes			if (ISSTATEIN(aft, here)) {
10191573Srgrimes				for (look = 1;
10201573Srgrimes						OP(s = g->strip[pc+look]) != O_CH;
10211573Srgrimes						look += OPND(s))
10221573Srgrimes					assert(OP(s) == OOR2);
10231573Srgrimes				FWD(aft, aft, look);
10241573Srgrimes			}
10251573Srgrimes			break;
10261573Srgrimes		case OOR2:		/* propagate OCH_'s marking */
10271573Srgrimes			FWD(aft, aft, 1);
10281573Srgrimes			if (OP(g->strip[pc+OPND(s)]) != O_CH) {
10291573Srgrimes				assert(OP(g->strip[pc+OPND(s)]) == OOR2);
10301573Srgrimes				FWD(aft, aft, OPND(s));
10311573Srgrimes			}
10321573Srgrimes			break;
10331573Srgrimes		case O_CH:		/* just empty */
10341573Srgrimes			FWD(aft, aft, 1);
10351573Srgrimes			break;
10361573Srgrimes		default:		/* ooooops... */
10371573Srgrimes			assert(nope);
10381573Srgrimes			break;
10391573Srgrimes		}
10401573Srgrimes	}
10411573Srgrimes
10421573Srgrimes	return(aft);
10431573Srgrimes}
10441573Srgrimes
10451573Srgrimes#ifdef REDEBUG
10461573Srgrimes/*
10471573Srgrimes - print - print a set of states
10481573Srgrimes == #ifdef REDEBUG
10491573Srgrimes == static void print(struct match *m, char *caption, states st, \
10501573Srgrimes ==	int ch, FILE *d);
10511573Srgrimes == #endif
10521573Srgrimes */
10531573Srgrimesstatic void
10541573Srgrimesprint(m, caption, st, ch, d)
10551573Srgrimesstruct match *m;
10561573Srgrimeschar *caption;
10571573Srgrimesstates st;
10581573Srgrimesint ch;
10591573SrgrimesFILE *d;
10601573Srgrimes{
106192889Sobrien	struct re_guts *g = m->g;
106292889Sobrien	int i;
106392889Sobrien	int first = 1;
10641573Srgrimes
10651573Srgrimes	if (!(m->eflags&REG_TRACE))
10661573Srgrimes		return;
10671573Srgrimes
10681573Srgrimes	fprintf(d, "%s", caption);
10691573Srgrimes	if (ch != '\0')
10701573Srgrimes		fprintf(d, " %s", pchar(ch));
10711573Srgrimes	for (i = 0; i < g->nstates; i++)
10721573Srgrimes		if (ISSET(st, i)) {
10731573Srgrimes			fprintf(d, "%s%d", (first) ? "\t" : ", ", i);
10741573Srgrimes			first = 0;
10751573Srgrimes		}
10761573Srgrimes	fprintf(d, "\n");
10771573Srgrimes}
10781573Srgrimes
10798870Srgrimes/*
10801573Srgrimes - at - print current situation
10811573Srgrimes == #ifdef REDEBUG
10821573Srgrimes == static void at(struct match *m, char *title, char *start, char *stop, \
10831573Srgrimes ==						sopno startst, sopno stopst);
10841573Srgrimes == #endif
10851573Srgrimes */
10861573Srgrimesstatic void
10871573Srgrimesat(m, title, start, stop, startst, stopst)
10881573Srgrimesstruct match *m;
10891573Srgrimeschar *title;
10901573Srgrimeschar *start;
10911573Srgrimeschar *stop;
10921573Srgrimessopno startst;
10931573Srgrimessopno stopst;
10941573Srgrimes{
10951573Srgrimes	if (!(m->eflags&REG_TRACE))
10961573Srgrimes		return;
10971573Srgrimes
10981573Srgrimes	printf("%s %s-", title, pchar(*start));
10991573Srgrimes	printf("%s ", pchar(*stop));
11001573Srgrimes	printf("%ld-%ld\n", (long)startst, (long)stopst);
11011573Srgrimes}
11021573Srgrimes
11031573Srgrimes#ifndef PCHARDONE
11041573Srgrimes#define	PCHARDONE	/* never again */
11051573Srgrimes/*
11061573Srgrimes - pchar - make a character printable
11071573Srgrimes == #ifdef REDEBUG
11081573Srgrimes == static char *pchar(int ch);
11091573Srgrimes == #endif
11101573Srgrimes *
11111573Srgrimes * Is this identical to regchar() over in debug.c?  Well, yes.  But a
11121573Srgrimes * duplicate here avoids having a debugging-capable regexec.o tied to
11131573Srgrimes * a matching debug.o, and this is convenient.  It all disappears in
11141573Srgrimes * the non-debug compilation anyway, so it doesn't matter much.
11151573Srgrimes */
11161573Srgrimesstatic char *			/* -> representation */
11171573Srgrimespchar(ch)
11181573Srgrimesint ch;
11191573Srgrimes{
11201573Srgrimes	static char pbuf[10];
11211573Srgrimes
112217508Sache	if (isprint((uch)ch) || ch == ' ')
11231573Srgrimes		sprintf(pbuf, "%c", ch);
11241573Srgrimes	else
11251573Srgrimes		sprintf(pbuf, "\\%o", ch);
11261573Srgrimes	return(pbuf);
11271573Srgrimes}
11281573Srgrimes#endif
11291573Srgrimes#endif
11301573Srgrimes
11311573Srgrimes#undef	matcher
11321573Srgrimes#undef	fast
11331573Srgrimes#undef	slow
11341573Srgrimes#undef	dissect
11351573Srgrimes#undef	backref
11361573Srgrimes#undef	step
11371573Srgrimes#undef	print
11381573Srgrimes#undef	at
11391573Srgrimes#undef	match
1140