engine.c revision 62232
11573Srgrimes/*-
21573Srgrimes * Copyright (c) 1992, 1993, 1994 Henry Spencer.
31573Srgrimes * Copyright (c) 1992, 1993, 1994
41573Srgrimes *	The Regents of the University of California.  All rights reserved.
51573Srgrimes *
61573Srgrimes * This code is derived from software contributed to Berkeley by
71573Srgrimes * Henry Spencer.
81573Srgrimes *
91573Srgrimes * Redistribution and use in source and binary forms, with or without
101573Srgrimes * modification, are permitted provided that the following conditions
111573Srgrimes * are met:
121573Srgrimes * 1. Redistributions of source code must retain the above copyright
131573Srgrimes *    notice, this list of conditions and the following disclaimer.
141573Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
151573Srgrimes *    notice, this list of conditions and the following disclaimer in the
161573Srgrimes *    documentation and/or other materials provided with the distribution.
171573Srgrimes * 3. All advertising materials mentioning features or use of this software
181573Srgrimes *    must display the following acknowledgement:
191573Srgrimes *	This product includes software developed by the University of
201573Srgrimes *	California, Berkeley and its contributors.
211573Srgrimes * 4. Neither the name of the University nor the names of its contributors
221573Srgrimes *    may be used to endorse or promote products derived from this software
231573Srgrimes *    without specific prior written permission.
241573Srgrimes *
251573Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
261573Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
271573Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
281573Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
291573Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
301573Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
311573Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
321573Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
331573Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
341573Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
351573Srgrimes * SUCH DAMAGE.
361573Srgrimes *
371573Srgrimes *	@(#)engine.c	8.5 (Berkeley) 3/20/94
3862232Sdcs *
3962232Sdcs * $FreeBSD: head/lib/libc/regex/engine.c 62232 2000-06-29 04:48:34Z dcs $
401573Srgrimes */
411573Srgrimes
421573Srgrimes/*
431573Srgrimes * The matching engine and friends.  This file is #included by regexec.c
441573Srgrimes * after suitable #defines of a variety of macros used herein, so that
451573Srgrimes * different state representations can be used without duplicating masses
461573Srgrimes * of code.
471573Srgrimes */
481573Srgrimes
491573Srgrimes#ifdef SNAMES
501573Srgrimes#define	matcher	smatcher
511573Srgrimes#define	fast	sfast
521573Srgrimes#define	slow	sslow
531573Srgrimes#define	dissect	sdissect
541573Srgrimes#define	backref	sbackref
551573Srgrimes#define	step	sstep
561573Srgrimes#define	print	sprint
571573Srgrimes#define	at	sat
581573Srgrimes#define	match	smat
591573Srgrimes#endif
601573Srgrimes#ifdef LNAMES
611573Srgrimes#define	matcher	lmatcher
621573Srgrimes#define	fast	lfast
631573Srgrimes#define	slow	lslow
641573Srgrimes#define	dissect	ldissect
651573Srgrimes#define	backref	lbackref
661573Srgrimes#define	step	lstep
671573Srgrimes#define	print	lprint
681573Srgrimes#define	at	lat
691573Srgrimes#define	match	lmat
701573Srgrimes#endif
711573Srgrimes
721573Srgrimes/* another structure passed up and down to avoid zillions of parameters */
731573Srgrimesstruct match {
741573Srgrimes	struct re_guts *g;
751573Srgrimes	int eflags;
761573Srgrimes	regmatch_t *pmatch;	/* [nsub+1] (0 element unused) */
771573Srgrimes	char *offp;		/* offsets work from here */
781573Srgrimes	char *beginp;		/* start of string -- virtual NUL precedes */
791573Srgrimes	char *endp;		/* end of string -- virtual NUL here */
801573Srgrimes	char *coldp;		/* can be no match starting before here */
811573Srgrimes	char **lastpos;		/* [nplus+1] */
821573Srgrimes	STATEVARS;
831573Srgrimes	states st;		/* current states */
841573Srgrimes	states fresh;		/* states for a fresh start */
851573Srgrimes	states tmp;		/* temporary */
861573Srgrimes	states empty;		/* empty set of states */
871573Srgrimes};
881573Srgrimes
891573Srgrimes/* ========= begin header generated by ./mkh ========= */
901573Srgrimes#ifdef __cplusplus
911573Srgrimesextern "C" {
921573Srgrimes#endif
931573Srgrimes
941573Srgrimes/* === engine.c === */
951573Srgrimesstatic int matcher __P((struct re_guts *g, char *string, size_t nmatch, regmatch_t pmatch[], int eflags));
961573Srgrimesstatic char *dissect __P((struct match *m, char *start, char *stop, sopno startst, sopno stopst));
971573Srgrimesstatic char *backref __P((struct match *m, char *start, char *stop, sopno startst, sopno stopst, sopno lev));
981573Srgrimesstatic char *fast __P((struct match *m, char *start, char *stop, sopno startst, sopno stopst));
991573Srgrimesstatic char *slow __P((struct match *m, char *start, char *stop, sopno startst, sopno stopst));
1001573Srgrimesstatic states step __P((struct re_guts *g, sopno start, sopno stop, states bef, int ch, states aft));
1011573Srgrimes#define	BOL	(OUT+1)
1021573Srgrimes#define	EOL	(BOL+1)
1031573Srgrimes#define	BOLEOL	(BOL+2)
1041573Srgrimes#define	NOTHING	(BOL+3)
1051573Srgrimes#define	BOW	(BOL+4)
1061573Srgrimes#define	EOW	(BOL+5)
1071573Srgrimes#define	CODEMAX	(BOL+5)		/* highest code used */
1081573Srgrimes#define	NONCHAR(c)	((c) > CHAR_MAX)
1091573Srgrimes#define	NNONCHAR	(CODEMAX-CHAR_MAX)
1101573Srgrimes#ifdef REDEBUG
1111573Srgrimesstatic void print __P((struct match *m, char *caption, states st, int ch, FILE *d));
1121573Srgrimes#endif
1131573Srgrimes#ifdef REDEBUG
1141573Srgrimesstatic void at __P((struct match *m, char *title, char *start, char *stop, sopno startst, sopno stopst));
1151573Srgrimes#endif
1161573Srgrimes#ifdef REDEBUG
1171573Srgrimesstatic char *pchar __P((int ch));
1181573Srgrimes#endif
1191573Srgrimes
1201573Srgrimes#ifdef __cplusplus
1211573Srgrimes}
1221573Srgrimes#endif
1231573Srgrimes/* ========= end header generated by ./mkh ========= */
1241573Srgrimes
1251573Srgrimes#ifdef REDEBUG
1261573Srgrimes#define	SP(t, s, c)	print(m, t, s, c, stdout)
1271573Srgrimes#define	AT(t, p1, p2, s1, s2)	at(m, t, p1, p2, s1, s2)
1281573Srgrimes#define	NOTE(str)	{ if (m->eflags&REG_TRACE) printf("=%s\n", (str)); }
1291573Srgrimes#else
1301573Srgrimes#define	SP(t, s, c)	/* nothing */
1311573Srgrimes#define	AT(t, p1, p2, s1, s2)	/* nothing */
1321573Srgrimes#define	NOTE(s)	/* nothing */
1331573Srgrimes#endif
1341573Srgrimes
1351573Srgrimes/*
1361573Srgrimes - matcher - the actual matching engine
1371573Srgrimes == static int matcher(register struct re_guts *g, char *string, \
1381573Srgrimes ==	size_t nmatch, regmatch_t pmatch[], int eflags);
1391573Srgrimes */
1401573Srgrimesstatic int			/* 0 success, REG_NOMATCH failure */
1411573Srgrimesmatcher(g, string, nmatch, pmatch, eflags)
1421573Srgrimesregister struct re_guts *g;
1431573Srgrimeschar *string;
1441573Srgrimessize_t nmatch;
1451573Srgrimesregmatch_t pmatch[];
1461573Srgrimesint eflags;
1471573Srgrimes{
1481573Srgrimes	register char *endp;
1491573Srgrimes	register int i;
1501573Srgrimes	struct match mv;
1511573Srgrimes	register struct match *m = &mv;
1521573Srgrimes	register char *dp;
15317141Sjkh	register const sopno gf = g->firststate+1;	/* +1 for OEND */
15417141Sjkh	register const sopno gl = g->laststate;
1551573Srgrimes	char *start;
1561573Srgrimes	char *stop;
15762232Sdcs	/* Boyer-Moore algorithms variables */
15862232Sdcs	register unsigned char *pp;
15962232Sdcs	int cj, mj;
16062232Sdcs	register unsigned char *mustfirst;
16162232Sdcs	register unsigned char *mustlast;
16262232Sdcs	register int mustlen;
16362232Sdcs	register int *matchjump;
16462232Sdcs	register int *charjump;
16562232Sdcs	register unsigned char *bmp;
16662232Sdcs	register unsigned char *bmps;
1671573Srgrimes
1681573Srgrimes	/* simplify the situation where possible */
1691573Srgrimes	if (g->cflags&REG_NOSUB)
1701573Srgrimes		nmatch = 0;
1711573Srgrimes	if (eflags&REG_STARTEND) {
1721573Srgrimes		start = string + pmatch[0].rm_so;
1731573Srgrimes		stop = string + pmatch[0].rm_eo;
1741573Srgrimes	} else {
1751573Srgrimes		start = string;
1761573Srgrimes		stop = start + strlen(start);
1771573Srgrimes	}
1781573Srgrimes	if (stop < start)
1791573Srgrimes		return(REG_INVARG);
1801573Srgrimes
1811573Srgrimes	/* prescreening; this does wonders for this rather slow code */
1821573Srgrimes	if (g->must != NULL) {
18362232Sdcs		if(g->charjump != NULL && g->matchjump != NULL) {
18462232Sdcs			mustlen = -g->mlen;
18562232Sdcs			mustfirst = g->must;
18662232Sdcs			mustlast = g->must + g->mlen - 1;
18762232Sdcs			charjump = g->charjump;
18862232Sdcs			matchjump = g->matchjump;
18962232Sdcs			bmps = stop;
19062232Sdcs			pp = mustlast;
19162232Sdcs			for(bmp = start+g->mlen-1; bmp < bmps;) {
19262232Sdcs				/* Fast skip non-matches */
19362232Sdcs				while(bmp < bmps && charjump[*bmp])
19462232Sdcs					bmp += charjump[*bmp];
19562232Sdcs
19662232Sdcs				if(bmp >= bmps)
19762232Sdcs					break;
19862232Sdcs
19962232Sdcs				/* Greedy matcher */
20062232Sdcs				/* We depend on not being used for
20162232Sdcs				 * for strings of length 1
20262232Sdcs				 */
20362232Sdcs				while(*--bmp == *--pp && pp != mustfirst);
20462232Sdcs
20562232Sdcs				if(*bmp == *pp)
20662232Sdcs					break;
20762232Sdcs
20862232Sdcs				/* Jump to next possible match */
20962232Sdcs				mj = matchjump[pp - mustfirst];
21062232Sdcs				cj = charjump[*bmp];
21162232Sdcs				bmp += (cj < mj ? mj : cj);
21262232Sdcs				pp = mustlast;
21362232Sdcs			}
21462232Sdcs			if(pp != mustfirst)
21562232Sdcs				return(REG_NOMATCH);
21662232Sdcs		} else {
21762232Sdcs			for (dp = start; dp < stop; dp++)
21862232Sdcs				if (*dp == g->must[0] &&
21962232Sdcs				    stop - dp >= g->mlen &&
22062232Sdcs				    memcmp(dp, g->must, (size_t)g->mlen) == 0)
22162232Sdcs					break;
22262232Sdcs			if (dp == stop)		/* we didn't find g->must */
22362232Sdcs				return(REG_NOMATCH);
22462232Sdcs		}
2251573Srgrimes	}
2261573Srgrimes
2271573Srgrimes	/* match struct setup */
2281573Srgrimes	m->g = g;
2291573Srgrimes	m->eflags = eflags;
2301573Srgrimes	m->pmatch = NULL;
2311573Srgrimes	m->lastpos = NULL;
2321573Srgrimes	m->offp = string;
2331573Srgrimes	m->beginp = start;
2341573Srgrimes	m->endp = stop;
2351573Srgrimes	STATESETUP(m, 4);
2361573Srgrimes	SETUP(m->st);
2371573Srgrimes	SETUP(m->fresh);
2381573Srgrimes	SETUP(m->tmp);
2391573Srgrimes	SETUP(m->empty);
2401573Srgrimes	CLEAR(m->empty);
2411573Srgrimes
2421573Srgrimes	/* this loop does only one repetition except for backrefs */
2431573Srgrimes	for (;;) {
2441573Srgrimes		endp = fast(m, start, stop, gf, gl);
2451573Srgrimes		if (endp == NULL) {		/* a miss */
2461573Srgrimes			STATETEARDOWN(m);
2471573Srgrimes			return(REG_NOMATCH);
2481573Srgrimes		}
2491573Srgrimes		if (nmatch == 0 && !g->backrefs)
2501573Srgrimes			break;		/* no further info needed */
2511573Srgrimes
2521573Srgrimes		/* where? */
2531573Srgrimes		assert(m->coldp != NULL);
2541573Srgrimes		for (;;) {
2551573Srgrimes			NOTE("finding start");
2561573Srgrimes			endp = slow(m, m->coldp, stop, gf, gl);
2571573Srgrimes			if (endp != NULL)
2581573Srgrimes				break;
2591573Srgrimes			assert(m->coldp < m->endp);
2601573Srgrimes			m->coldp++;
2611573Srgrimes		}
2621573Srgrimes		if (nmatch == 1 && !g->backrefs)
2631573Srgrimes			break;		/* no further info needed */
2641573Srgrimes
2651573Srgrimes		/* oh my, he wants the subexpressions... */
2661573Srgrimes		if (m->pmatch == NULL)
2671573Srgrimes			m->pmatch = (regmatch_t *)malloc((m->g->nsub + 1) *
2681573Srgrimes							sizeof(regmatch_t));
2691573Srgrimes		if (m->pmatch == NULL) {
2701573Srgrimes			STATETEARDOWN(m);
2711573Srgrimes			return(REG_ESPACE);
2721573Srgrimes		}
2731573Srgrimes		for (i = 1; i <= m->g->nsub; i++)
2741573Srgrimes			m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1;
2751573Srgrimes		if (!g->backrefs && !(m->eflags&REG_BACKR)) {
2761573Srgrimes			NOTE("dissecting");
2771573Srgrimes			dp = dissect(m, m->coldp, endp, gf, gl);
2781573Srgrimes		} else {
2791573Srgrimes			if (g->nplus > 0 && m->lastpos == NULL)
2801573Srgrimes				m->lastpos = (char **)malloc((g->nplus+1) *
2811573Srgrimes							sizeof(char *));
2821573Srgrimes			if (g->nplus > 0 && m->lastpos == NULL) {
2831573Srgrimes				free(m->pmatch);
2841573Srgrimes				STATETEARDOWN(m);
2851573Srgrimes				return(REG_ESPACE);
2861573Srgrimes			}
2871573Srgrimes			NOTE("backref dissect");
2881573Srgrimes			dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
2891573Srgrimes		}
2901573Srgrimes		if (dp != NULL)
2911573Srgrimes			break;
2921573Srgrimes
2931573Srgrimes		/* uh-oh... we couldn't find a subexpression-level match */
2941573Srgrimes		assert(g->backrefs);	/* must be back references doing it */
2951573Srgrimes		assert(g->nplus == 0 || m->lastpos != NULL);
2961573Srgrimes		for (;;) {
2971573Srgrimes			if (dp != NULL || endp <= m->coldp)
2981573Srgrimes				break;		/* defeat */
2991573Srgrimes			NOTE("backoff");
3001573Srgrimes			endp = slow(m, m->coldp, endp-1, gf, gl);
3011573Srgrimes			if (endp == NULL)
3021573Srgrimes				break;		/* defeat */
3031573Srgrimes			/* try it on a shorter possibility */
3041573Srgrimes#ifndef NDEBUG
3051573Srgrimes			for (i = 1; i <= m->g->nsub; i++) {
3061573Srgrimes				assert(m->pmatch[i].rm_so == -1);
3071573Srgrimes				assert(m->pmatch[i].rm_eo == -1);
3081573Srgrimes			}
3091573Srgrimes#endif
3101573Srgrimes			NOTE("backoff dissect");
3111573Srgrimes			dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
3121573Srgrimes		}
3131573Srgrimes		assert(dp == NULL || dp == endp);
3141573Srgrimes		if (dp != NULL)		/* found a shorter one */
3151573Srgrimes			break;
3161573Srgrimes
3171573Srgrimes		/* despite initial appearances, there is no match here */
3181573Srgrimes		NOTE("false alarm");
3191573Srgrimes		start = m->coldp + 1;	/* recycle starting later */
3201573Srgrimes		assert(start <= stop);
3211573Srgrimes	}
3221573Srgrimes
3231573Srgrimes	/* fill in the details if requested */
3241573Srgrimes	if (nmatch > 0) {
3251573Srgrimes		pmatch[0].rm_so = m->coldp - m->offp;
3261573Srgrimes		pmatch[0].rm_eo = endp - m->offp;
3271573Srgrimes	}
3281573Srgrimes	if (nmatch > 1) {
3291573Srgrimes		assert(m->pmatch != NULL);
3301573Srgrimes		for (i = 1; i < nmatch; i++)
3311573Srgrimes			if (i <= m->g->nsub)
3321573Srgrimes				pmatch[i] = m->pmatch[i];
3331573Srgrimes			else {
3341573Srgrimes				pmatch[i].rm_so = -1;
3351573Srgrimes				pmatch[i].rm_eo = -1;
3361573Srgrimes			}
3371573Srgrimes	}
3381573Srgrimes
3391573Srgrimes	if (m->pmatch != NULL)
3401573Srgrimes		free((char *)m->pmatch);
3411573Srgrimes	if (m->lastpos != NULL)
3421573Srgrimes		free((char *)m->lastpos);
3431573Srgrimes	STATETEARDOWN(m);
3441573Srgrimes	return(0);
3451573Srgrimes}
3461573Srgrimes
3471573Srgrimes/*
3481573Srgrimes - dissect - figure out what matched what, no back references
3491573Srgrimes == static char *dissect(register struct match *m, char *start, \
3501573Srgrimes ==	char *stop, sopno startst, sopno stopst);
3511573Srgrimes */
3521573Srgrimesstatic char *			/* == stop (success) always */
3531573Srgrimesdissect(m, start, stop, startst, stopst)
3541573Srgrimesregister struct match *m;
3551573Srgrimeschar *start;
3561573Srgrimeschar *stop;
3571573Srgrimessopno startst;
3581573Srgrimessopno stopst;
3591573Srgrimes{
3601573Srgrimes	register int i;
3611573Srgrimes	register sopno ss;	/* start sop of current subRE */
3621573Srgrimes	register sopno es;	/* end sop of current subRE */
3631573Srgrimes	register char *sp;	/* start of string matched by it */
3641573Srgrimes	register char *stp;	/* string matched by it cannot pass here */
3651573Srgrimes	register char *rest;	/* start of rest of string */
3661573Srgrimes	register char *tail;	/* string unmatched by rest of RE */
3671573Srgrimes	register sopno ssub;	/* start sop of subsubRE */
3681573Srgrimes	register sopno esub;	/* end sop of subsubRE */
3691573Srgrimes	register char *ssp;	/* start of string matched by subsubRE */
3701573Srgrimes	register char *sep;	/* end of string matched by subsubRE */
3711573Srgrimes	register char *oldssp;	/* previous ssp */
3721573Srgrimes	register char *dp;
3731573Srgrimes
3741573Srgrimes	AT("diss", start, stop, startst, stopst);
3751573Srgrimes	sp = start;
3761573Srgrimes	for (ss = startst; ss < stopst; ss = es) {
3771573Srgrimes		/* identify end of subRE */
3781573Srgrimes		es = ss;
3791573Srgrimes		switch (OP(m->g->strip[es])) {
3801573Srgrimes		case OPLUS_:
3811573Srgrimes		case OQUEST_:
3821573Srgrimes			es += OPND(m->g->strip[es]);
3831573Srgrimes			break;
3841573Srgrimes		case OCH_:
3851573Srgrimes			while (OP(m->g->strip[es]) != O_CH)
3861573Srgrimes				es += OPND(m->g->strip[es]);
3871573Srgrimes			break;
3881573Srgrimes		}
3891573Srgrimes		es++;
3901573Srgrimes
3911573Srgrimes		/* figure out what it matched */
3921573Srgrimes		switch (OP(m->g->strip[ss])) {
3931573Srgrimes		case OEND:
3941573Srgrimes			assert(nope);
3951573Srgrimes			break;
3961573Srgrimes		case OCHAR:
3971573Srgrimes			sp++;
3981573Srgrimes			break;
3991573Srgrimes		case OBOL:
4001573Srgrimes		case OEOL:
4011573Srgrimes		case OBOW:
4021573Srgrimes		case OEOW:
4031573Srgrimes			break;
4041573Srgrimes		case OANY:
4051573Srgrimes		case OANYOF:
4061573Srgrimes			sp++;
4071573Srgrimes			break;
4081573Srgrimes		case OBACK_:
4091573Srgrimes		case O_BACK:
4101573Srgrimes			assert(nope);
4111573Srgrimes			break;
4121573Srgrimes		/* cases where length of match is hard to find */
4131573Srgrimes		case OQUEST_:
4141573Srgrimes			stp = stop;
4151573Srgrimes			for (;;) {
4161573Srgrimes				/* how long could this one be? */
4171573Srgrimes				rest = slow(m, sp, stp, ss, es);
4181573Srgrimes				assert(rest != NULL);	/* it did match */
4191573Srgrimes				/* could the rest match the rest? */
4201573Srgrimes				tail = slow(m, rest, stop, es, stopst);
4211573Srgrimes				if (tail == stop)
4221573Srgrimes					break;		/* yes! */
4231573Srgrimes				/* no -- try a shorter match for this one */
4241573Srgrimes				stp = rest - 1;
4251573Srgrimes				assert(stp >= sp);	/* it did work */
4261573Srgrimes			}
4271573Srgrimes			ssub = ss + 1;
4281573Srgrimes			esub = es - 1;
4291573Srgrimes			/* did innards match? */
4301573Srgrimes			if (slow(m, sp, rest, ssub, esub) != NULL) {
4311573Srgrimes				dp = dissect(m, sp, rest, ssub, esub);
4321573Srgrimes				assert(dp == rest);
4331573Srgrimes			} else		/* no */
4341573Srgrimes				assert(sp == rest);
4351573Srgrimes			sp = rest;
4361573Srgrimes			break;
4371573Srgrimes		case OPLUS_:
4381573Srgrimes			stp = stop;
4391573Srgrimes			for (;;) {
4401573Srgrimes				/* how long could this one be? */
4411573Srgrimes				rest = slow(m, sp, stp, ss, es);
4421573Srgrimes				assert(rest != NULL);	/* it did match */
4431573Srgrimes				/* could the rest match the rest? */
4441573Srgrimes				tail = slow(m, rest, stop, es, stopst);
4451573Srgrimes				if (tail == stop)
4461573Srgrimes					break;		/* yes! */
4471573Srgrimes				/* no -- try a shorter match for this one */
4481573Srgrimes				stp = rest - 1;
4491573Srgrimes				assert(stp >= sp);	/* it did work */
4501573Srgrimes			}
4511573Srgrimes			ssub = ss + 1;
4521573Srgrimes			esub = es - 1;
4531573Srgrimes			ssp = sp;
4541573Srgrimes			oldssp = ssp;
4551573Srgrimes			for (;;) {	/* find last match of innards */
4561573Srgrimes				sep = slow(m, ssp, rest, ssub, esub);
4571573Srgrimes				if (sep == NULL || sep == ssp)
4581573Srgrimes					break;	/* failed or matched null */
4591573Srgrimes				oldssp = ssp;	/* on to next try */
4601573Srgrimes				ssp = sep;
4611573Srgrimes			}
4621573Srgrimes			if (sep == NULL) {
4631573Srgrimes				/* last successful match */
4641573Srgrimes				sep = ssp;
4651573Srgrimes				ssp = oldssp;
4661573Srgrimes			}
4671573Srgrimes			assert(sep == rest);	/* must exhaust substring */
4681573Srgrimes			assert(slow(m, ssp, sep, ssub, esub) == rest);
4691573Srgrimes			dp = dissect(m, ssp, sep, ssub, esub);
4701573Srgrimes			assert(dp == sep);
4711573Srgrimes			sp = rest;
4721573Srgrimes			break;
4731573Srgrimes		case OCH_:
4741573Srgrimes			stp = stop;
4751573Srgrimes			for (;;) {
4761573Srgrimes				/* how long could this one be? */
4771573Srgrimes				rest = slow(m, sp, stp, ss, es);
4781573Srgrimes				assert(rest != NULL);	/* it did match */
4791573Srgrimes				/* could the rest match the rest? */
4801573Srgrimes				tail = slow(m, rest, stop, es, stopst);
4811573Srgrimes				if (tail == stop)
4821573Srgrimes					break;		/* yes! */
4831573Srgrimes				/* no -- try a shorter match for this one */
4841573Srgrimes				stp = rest - 1;
4851573Srgrimes				assert(stp >= sp);	/* it did work */
4861573Srgrimes			}
4871573Srgrimes			ssub = ss + 1;
4881573Srgrimes			esub = ss + OPND(m->g->strip[ss]) - 1;
4891573Srgrimes			assert(OP(m->g->strip[esub]) == OOR1);
4901573Srgrimes			for (;;) {	/* find first matching branch */
4911573Srgrimes				if (slow(m, sp, rest, ssub, esub) == rest)
4921573Srgrimes					break;	/* it matched all of it */
4931573Srgrimes				/* that one missed, try next one */
4941573Srgrimes				assert(OP(m->g->strip[esub]) == OOR1);
4951573Srgrimes				esub++;
4961573Srgrimes				assert(OP(m->g->strip[esub]) == OOR2);
4971573Srgrimes				ssub = esub + 1;
4981573Srgrimes				esub += OPND(m->g->strip[esub]);
4991573Srgrimes				if (OP(m->g->strip[esub]) == OOR2)
5001573Srgrimes					esub--;
5011573Srgrimes				else
5021573Srgrimes					assert(OP(m->g->strip[esub]) == O_CH);
5031573Srgrimes			}
5041573Srgrimes			dp = dissect(m, sp, rest, ssub, esub);
5051573Srgrimes			assert(dp == rest);
5061573Srgrimes			sp = rest;
5071573Srgrimes			break;
5081573Srgrimes		case O_PLUS:
5091573Srgrimes		case O_QUEST:
5101573Srgrimes		case OOR1:
5111573Srgrimes		case OOR2:
5121573Srgrimes		case O_CH:
5131573Srgrimes			assert(nope);
5141573Srgrimes			break;
5151573Srgrimes		case OLPAREN:
5161573Srgrimes			i = OPND(m->g->strip[ss]);
5171573Srgrimes			assert(0 < i && i <= m->g->nsub);
5181573Srgrimes			m->pmatch[i].rm_so = sp - m->offp;
5191573Srgrimes			break;
5201573Srgrimes		case ORPAREN:
5211573Srgrimes			i = OPND(m->g->strip[ss]);
5221573Srgrimes			assert(0 < i && i <= m->g->nsub);
5231573Srgrimes			m->pmatch[i].rm_eo = sp - m->offp;
5241573Srgrimes			break;
5251573Srgrimes		default:		/* uh oh */
5261573Srgrimes			assert(nope);
5271573Srgrimes			break;
5281573Srgrimes		}
5291573Srgrimes	}
5301573Srgrimes
5311573Srgrimes	assert(sp == stop);
5321573Srgrimes	return(sp);
5331573Srgrimes}
5341573Srgrimes
5351573Srgrimes/*
5361573Srgrimes - backref - figure out what matched what, figuring in back references
5371573Srgrimes == static char *backref(register struct match *m, char *start, \
5381573Srgrimes ==	char *stop, sopno startst, sopno stopst, sopno lev);
5391573Srgrimes */
5401573Srgrimesstatic char *			/* == stop (success) or NULL (failure) */
5411573Srgrimesbackref(m, start, stop, startst, stopst, lev)
5421573Srgrimesregister struct match *m;
5431573Srgrimeschar *start;
5441573Srgrimeschar *stop;
5451573Srgrimessopno startst;
5461573Srgrimessopno stopst;
5471573Srgrimessopno lev;			/* PLUS nesting level */
5481573Srgrimes{
5491573Srgrimes	register int i;
5501573Srgrimes	register sopno ss;	/* start sop of current subRE */
5511573Srgrimes	register char *sp;	/* start of string matched by it */
5521573Srgrimes	register sopno ssub;	/* start sop of subsubRE */
5531573Srgrimes	register sopno esub;	/* end sop of subsubRE */
5541573Srgrimes	register char *ssp;	/* start of string matched by subsubRE */
5551573Srgrimes	register char *dp;
5561573Srgrimes	register size_t len;
5571573Srgrimes	register int hard;
5581573Srgrimes	register sop s;
5591573Srgrimes	register regoff_t offsave;
5601573Srgrimes	register cset *cs;
5611573Srgrimes
5621573Srgrimes	AT("back", start, stop, startst, stopst);
5631573Srgrimes	sp = start;
5641573Srgrimes
5651573Srgrimes	/* get as far as we can with easy stuff */
5661573Srgrimes	hard = 0;
5671573Srgrimes	for (ss = startst; !hard && ss < stopst; ss++)
5681573Srgrimes		switch (OP(s = m->g->strip[ss])) {
5691573Srgrimes		case OCHAR:
5701573Srgrimes			if (sp == stop || *sp++ != (char)OPND(s))
5711573Srgrimes				return(NULL);
5721573Srgrimes			break;
5731573Srgrimes		case OANY:
5741573Srgrimes			if (sp == stop)
5751573Srgrimes				return(NULL);
5761573Srgrimes			sp++;
5771573Srgrimes			break;
5781573Srgrimes		case OANYOF:
5791573Srgrimes			cs = &m->g->sets[OPND(s)];
5801573Srgrimes			if (sp == stop || !CHIN(cs, *sp++))
5811573Srgrimes				return(NULL);
5821573Srgrimes			break;
5831573Srgrimes		case OBOL:
5841573Srgrimes			if ( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
5851573Srgrimes					(sp < m->endp && *(sp-1) == '\n' &&
5861573Srgrimes						(m->g->cflags&REG_NEWLINE)) )
5871573Srgrimes				{ /* yes */ }
5881573Srgrimes			else
5891573Srgrimes				return(NULL);
5901573Srgrimes			break;
5911573Srgrimes		case OEOL:
5921573Srgrimes			if ( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
5931573Srgrimes					(sp < m->endp && *sp == '\n' &&
5941573Srgrimes						(m->g->cflags&REG_NEWLINE)) )
5951573Srgrimes				{ /* yes */ }
5961573Srgrimes			else
5971573Srgrimes				return(NULL);
5981573Srgrimes			break;
5991573Srgrimes		case OBOW:
6001573Srgrimes			if (( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
6011573Srgrimes					(sp < m->endp && *(sp-1) == '\n' &&
6021573Srgrimes						(m->g->cflags&REG_NEWLINE)) ||
6031573Srgrimes					(sp > m->beginp &&
6041573Srgrimes							!ISWORD(*(sp-1))) ) &&
6051573Srgrimes					(sp < m->endp && ISWORD(*sp)) )
6061573Srgrimes				{ /* yes */ }
6071573Srgrimes			else
6081573Srgrimes				return(NULL);
6091573Srgrimes			break;
6101573Srgrimes		case OEOW:
6111573Srgrimes			if (( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
6121573Srgrimes					(sp < m->endp && *sp == '\n' &&
6131573Srgrimes						(m->g->cflags&REG_NEWLINE)) ||
6141573Srgrimes					(sp < m->endp && !ISWORD(*sp)) ) &&
6151573Srgrimes					(sp > m->beginp && ISWORD(*(sp-1))) )
6161573Srgrimes				{ /* yes */ }
6171573Srgrimes			else
6181573Srgrimes				return(NULL);
6191573Srgrimes			break;
6201573Srgrimes		case O_QUEST:
6211573Srgrimes			break;
6221573Srgrimes		case OOR1:	/* matches null but needs to skip */
6231573Srgrimes			ss++;
6241573Srgrimes			s = m->g->strip[ss];
6251573Srgrimes			do {
6261573Srgrimes				assert(OP(s) == OOR2);
6271573Srgrimes				ss += OPND(s);
6281573Srgrimes			} while (OP(s = m->g->strip[ss]) != O_CH);
6291573Srgrimes			/* note that the ss++ gets us past the O_CH */
6301573Srgrimes			break;
6311573Srgrimes		default:	/* have to make a choice */
6321573Srgrimes			hard = 1;
6331573Srgrimes			break;
6341573Srgrimes		}
6351573Srgrimes	if (!hard) {		/* that was it! */
6361573Srgrimes		if (sp != stop)
6371573Srgrimes			return(NULL);
6381573Srgrimes		return(sp);
6391573Srgrimes	}
6401573Srgrimes	ss--;			/* adjust for the for's final increment */
6411573Srgrimes
6421573Srgrimes	/* the hard stuff */
6431573Srgrimes	AT("hard", sp, stop, ss, stopst);
6441573Srgrimes	s = m->g->strip[ss];
6451573Srgrimes	switch (OP(s)) {
6461573Srgrimes	case OBACK_:		/* the vilest depths */
6471573Srgrimes		i = OPND(s);
6481573Srgrimes		assert(0 < i && i <= m->g->nsub);
6491573Srgrimes		if (m->pmatch[i].rm_eo == -1)
6501573Srgrimes			return(NULL);
6511573Srgrimes		assert(m->pmatch[i].rm_so != -1);
6521573Srgrimes		len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so;
6531573Srgrimes		assert(stop - m->beginp >= len);
6541573Srgrimes		if (sp > stop - len)
6551573Srgrimes			return(NULL);	/* not enough left to match */
6561573Srgrimes		ssp = m->offp + m->pmatch[i].rm_so;
6571573Srgrimes		if (memcmp(sp, ssp, len) != 0)
6581573Srgrimes			return(NULL);
6591573Srgrimes		while (m->g->strip[ss] != SOP(O_BACK, i))
6601573Srgrimes			ss++;
6611573Srgrimes		return(backref(m, sp+len, stop, ss+1, stopst, lev));
6621573Srgrimes		break;
6631573Srgrimes	case OQUEST_:		/* to null or not */
6641573Srgrimes		dp = backref(m, sp, stop, ss+1, stopst, lev);
6651573Srgrimes		if (dp != NULL)
6661573Srgrimes			return(dp);	/* not */
6671573Srgrimes		return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev));
6681573Srgrimes		break;
6691573Srgrimes	case OPLUS_:
6701573Srgrimes		assert(m->lastpos != NULL);
6711573Srgrimes		assert(lev+1 <= m->g->nplus);
6721573Srgrimes		m->lastpos[lev+1] = sp;
6731573Srgrimes		return(backref(m, sp, stop, ss+1, stopst, lev+1));
6741573Srgrimes		break;
6751573Srgrimes	case O_PLUS:
6761573Srgrimes		if (sp == m->lastpos[lev])	/* last pass matched null */
6771573Srgrimes			return(backref(m, sp, stop, ss+1, stopst, lev-1));
6781573Srgrimes		/* try another pass */
6791573Srgrimes		m->lastpos[lev] = sp;
6801573Srgrimes		dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev);
6811573Srgrimes		if (dp == NULL)
6821573Srgrimes			return(backref(m, sp, stop, ss+1, stopst, lev-1));
6831573Srgrimes		else
6841573Srgrimes			return(dp);
6851573Srgrimes		break;
6861573Srgrimes	case OCH_:		/* find the right one, if any */
6871573Srgrimes		ssub = ss + 1;
6881573Srgrimes		esub = ss + OPND(s) - 1;
6891573Srgrimes		assert(OP(m->g->strip[esub]) == OOR1);
6901573Srgrimes		for (;;) {	/* find first matching branch */
6911573Srgrimes			dp = backref(m, sp, stop, ssub, esub, lev);
6921573Srgrimes			if (dp != NULL)
6931573Srgrimes				return(dp);
6941573Srgrimes			/* that one missed, try next one */
6951573Srgrimes			if (OP(m->g->strip[esub]) == O_CH)
6961573Srgrimes				return(NULL);	/* there is none */
6971573Srgrimes			esub++;
6981573Srgrimes			assert(OP(m->g->strip[esub]) == OOR2);
6991573Srgrimes			ssub = esub + 1;
7001573Srgrimes			esub += OPND(m->g->strip[esub]);
7011573Srgrimes			if (OP(m->g->strip[esub]) == OOR2)
7021573Srgrimes				esub--;
7031573Srgrimes			else
7041573Srgrimes				assert(OP(m->g->strip[esub]) == O_CH);
7051573Srgrimes		}
7061573Srgrimes		break;
7071573Srgrimes	case OLPAREN:		/* must undo assignment if rest fails */
7081573Srgrimes		i = OPND(s);
7091573Srgrimes		assert(0 < i && i <= m->g->nsub);
7101573Srgrimes		offsave = m->pmatch[i].rm_so;
7111573Srgrimes		m->pmatch[i].rm_so = sp - m->offp;
7121573Srgrimes		dp = backref(m, sp, stop, ss+1, stopst, lev);
7131573Srgrimes		if (dp != NULL)
7141573Srgrimes			return(dp);
7151573Srgrimes		m->pmatch[i].rm_so = offsave;
7161573Srgrimes		return(NULL);
7171573Srgrimes		break;
7181573Srgrimes	case ORPAREN:		/* must undo assignment if rest fails */
7191573Srgrimes		i = OPND(s);
7201573Srgrimes		assert(0 < i && i <= m->g->nsub);
7211573Srgrimes		offsave = m->pmatch[i].rm_eo;
7221573Srgrimes		m->pmatch[i].rm_eo = sp - m->offp;
7231573Srgrimes		dp = backref(m, sp, stop, ss+1, stopst, lev);
7241573Srgrimes		if (dp != NULL)
7251573Srgrimes			return(dp);
7261573Srgrimes		m->pmatch[i].rm_eo = offsave;
7271573Srgrimes		return(NULL);
7281573Srgrimes		break;
7291573Srgrimes	default:		/* uh oh */
7301573Srgrimes		assert(nope);
7311573Srgrimes		break;
7321573Srgrimes	}
7331573Srgrimes
7341573Srgrimes	/* "can't happen" */
7351573Srgrimes	assert(nope);
7361573Srgrimes	/* NOTREACHED */
73711664Sphk	return "shut up gcc";
7381573Srgrimes}
7391573Srgrimes
7401573Srgrimes/*
7411573Srgrimes - fast - step through the string at top speed
7421573Srgrimes == static char *fast(register struct match *m, char *start, \
7431573Srgrimes ==	char *stop, sopno startst, sopno stopst);
7441573Srgrimes */
7451573Srgrimesstatic char *			/* where tentative match ended, or NULL */
7461573Srgrimesfast(m, start, stop, startst, stopst)
7471573Srgrimesregister struct match *m;
7481573Srgrimeschar *start;
7491573Srgrimeschar *stop;
7501573Srgrimessopno startst;
7511573Srgrimessopno stopst;
7521573Srgrimes{
7531573Srgrimes	register states st = m->st;
7541573Srgrimes	register states fresh = m->fresh;
7551573Srgrimes	register states tmp = m->tmp;
7561573Srgrimes	register char *p = start;
7571573Srgrimes	register int c = (start == m->beginp) ? OUT : *(start-1);
7581573Srgrimes	register int lastc;	/* previous c */
7591573Srgrimes	register int flagch;
7601573Srgrimes	register int i;
7611573Srgrimes	register char *coldp;	/* last p after which no match was underway */
7621573Srgrimes
7631573Srgrimes	CLEAR(st);
7641573Srgrimes	SET1(st, startst);
7651573Srgrimes	st = step(m->g, startst, stopst, st, NOTHING, st);
7661573Srgrimes	ASSIGN(fresh, st);
7671573Srgrimes	SP("start", st, *p);
7681573Srgrimes	coldp = NULL;
7691573Srgrimes	for (;;) {
7701573Srgrimes		/* next character */
7711573Srgrimes		lastc = c;
7721573Srgrimes		c = (p == m->endp) ? OUT : *p;
7731573Srgrimes		if (EQ(st, fresh))
7741573Srgrimes			coldp = p;
7751573Srgrimes
7761573Srgrimes		/* is there an EOL and/or BOL between lastc and c? */
7771573Srgrimes		flagch = '\0';
7781573Srgrimes		i = 0;
7791573Srgrimes		if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
7801573Srgrimes				(lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
7811573Srgrimes			flagch = BOL;
7821573Srgrimes			i = m->g->nbol;
7831573Srgrimes		}
7841573Srgrimes		if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
7851573Srgrimes				(c == OUT && !(m->eflags&REG_NOTEOL)) ) {
7861573Srgrimes			flagch = (flagch == BOL) ? BOLEOL : EOL;
7871573Srgrimes			i += m->g->neol;
7881573Srgrimes		}
7891573Srgrimes		if (i != 0) {
7901573Srgrimes			for (; i > 0; i--)
7911573Srgrimes				st = step(m->g, startst, stopst, st, flagch, st);
7921573Srgrimes			SP("boleol", st, c);
7931573Srgrimes		}
7941573Srgrimes
7951573Srgrimes		/* how about a word boundary? */
7961573Srgrimes		if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
7971573Srgrimes					(c != OUT && ISWORD(c)) ) {
7981573Srgrimes			flagch = BOW;
7991573Srgrimes		}
8001573Srgrimes		if ( (lastc != OUT && ISWORD(lastc)) &&
8011573Srgrimes				(flagch == EOL || (c != OUT && !ISWORD(c))) ) {
8021573Srgrimes			flagch = EOW;
8031573Srgrimes		}
8041573Srgrimes		if (flagch == BOW || flagch == EOW) {
8051573Srgrimes			st = step(m->g, startst, stopst, st, flagch, st);
8061573Srgrimes			SP("boweow", st, c);
8071573Srgrimes		}
8081573Srgrimes
8091573Srgrimes		/* are we done? */
8101573Srgrimes		if (ISSET(st, stopst) || p == stop)
8111573Srgrimes			break;		/* NOTE BREAK OUT */
8121573Srgrimes
8131573Srgrimes		/* no, we must deal with this character */
8141573Srgrimes		ASSIGN(tmp, st);
8151573Srgrimes		ASSIGN(st, fresh);
8161573Srgrimes		assert(c != OUT);
8171573Srgrimes		st = step(m->g, startst, stopst, tmp, c, st);
8181573Srgrimes		SP("aft", st, c);
8191573Srgrimes		assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
8201573Srgrimes		p++;
8211573Srgrimes	}
8221573Srgrimes
8231573Srgrimes	assert(coldp != NULL);
8241573Srgrimes	m->coldp = coldp;
8251573Srgrimes	if (ISSET(st, stopst))
8261573Srgrimes		return(p+1);
8271573Srgrimes	else
8281573Srgrimes		return(NULL);
8291573Srgrimes}
8301573Srgrimes
8311573Srgrimes/*
8321573Srgrimes - slow - step through the string more deliberately
8331573Srgrimes == static char *slow(register struct match *m, char *start, \
8341573Srgrimes ==	char *stop, sopno startst, sopno stopst);
8351573Srgrimes */
8361573Srgrimesstatic char *			/* where it ended */
8371573Srgrimesslow(m, start, stop, startst, stopst)
8381573Srgrimesregister struct match *m;
8391573Srgrimeschar *start;
8401573Srgrimeschar *stop;
8411573Srgrimessopno startst;
8421573Srgrimessopno stopst;
8431573Srgrimes{
8441573Srgrimes	register states st = m->st;
8451573Srgrimes	register states empty = m->empty;
8461573Srgrimes	register states tmp = m->tmp;
8471573Srgrimes	register char *p = start;
8481573Srgrimes	register int c = (start == m->beginp) ? OUT : *(start-1);
8491573Srgrimes	register int lastc;	/* previous c */
8501573Srgrimes	register int flagch;
8511573Srgrimes	register int i;
8521573Srgrimes	register char *matchp;	/* last p at which a match ended */
8531573Srgrimes
8541573Srgrimes	AT("slow", start, stop, startst, stopst);
8551573Srgrimes	CLEAR(st);
8561573Srgrimes	SET1(st, startst);
8571573Srgrimes	SP("sstart", st, *p);
8581573Srgrimes	st = step(m->g, startst, stopst, st, NOTHING, st);
8591573Srgrimes	matchp = NULL;
8601573Srgrimes	for (;;) {
8611573Srgrimes		/* next character */
8621573Srgrimes		lastc = c;
8631573Srgrimes		c = (p == m->endp) ? OUT : *p;
8641573Srgrimes
8651573Srgrimes		/* is there an EOL and/or BOL between lastc and c? */
8661573Srgrimes		flagch = '\0';
8671573Srgrimes		i = 0;
8681573Srgrimes		if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
8691573Srgrimes				(lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
8701573Srgrimes			flagch = BOL;
8711573Srgrimes			i = m->g->nbol;
8721573Srgrimes		}
8731573Srgrimes		if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
8741573Srgrimes				(c == OUT && !(m->eflags&REG_NOTEOL)) ) {
8751573Srgrimes			flagch = (flagch == BOL) ? BOLEOL : EOL;
8761573Srgrimes			i += m->g->neol;
8771573Srgrimes		}
8781573Srgrimes		if (i != 0) {
8791573Srgrimes			for (; i > 0; i--)
8801573Srgrimes				st = step(m->g, startst, stopst, st, flagch, st);
8811573Srgrimes			SP("sboleol", st, c);
8821573Srgrimes		}
8831573Srgrimes
8841573Srgrimes		/* how about a word boundary? */
8851573Srgrimes		if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
8861573Srgrimes					(c != OUT && ISWORD(c)) ) {
8871573Srgrimes			flagch = BOW;
8881573Srgrimes		}
8891573Srgrimes		if ( (lastc != OUT && ISWORD(lastc)) &&
8901573Srgrimes				(flagch == EOL || (c != OUT && !ISWORD(c))) ) {
8911573Srgrimes			flagch = EOW;
8921573Srgrimes		}
8931573Srgrimes		if (flagch == BOW || flagch == EOW) {
8941573Srgrimes			st = step(m->g, startst, stopst, st, flagch, st);
8951573Srgrimes			SP("sboweow", st, c);
8961573Srgrimes		}
8971573Srgrimes
8981573Srgrimes		/* are we done? */
8991573Srgrimes		if (ISSET(st, stopst))
9001573Srgrimes			matchp = p;
9011573Srgrimes		if (EQ(st, empty) || p == stop)
9021573Srgrimes			break;		/* NOTE BREAK OUT */
9031573Srgrimes
9041573Srgrimes		/* no, we must deal with this character */
9051573Srgrimes		ASSIGN(tmp, st);
9061573Srgrimes		ASSIGN(st, empty);
9071573Srgrimes		assert(c != OUT);
9081573Srgrimes		st = step(m->g, startst, stopst, tmp, c, st);
9091573Srgrimes		SP("saft", st, c);
9101573Srgrimes		assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
9111573Srgrimes		p++;
9121573Srgrimes	}
9131573Srgrimes
9141573Srgrimes	return(matchp);
9151573Srgrimes}
9161573Srgrimes
9171573Srgrimes
9181573Srgrimes/*
9191573Srgrimes - step - map set of states reachable before char to set reachable after
9201573Srgrimes == static states step(register struct re_guts *g, sopno start, sopno stop, \
9211573Srgrimes ==	register states bef, int ch, register states aft);
9221573Srgrimes == #define	BOL	(OUT+1)
9231573Srgrimes == #define	EOL	(BOL+1)
9241573Srgrimes == #define	BOLEOL	(BOL+2)
9251573Srgrimes == #define	NOTHING	(BOL+3)
9261573Srgrimes == #define	BOW	(BOL+4)
9271573Srgrimes == #define	EOW	(BOL+5)
9281573Srgrimes == #define	CODEMAX	(BOL+5)		// highest code used
9291573Srgrimes == #define	NONCHAR(c)	((c) > CHAR_MAX)
9301573Srgrimes == #define	NNONCHAR	(CODEMAX-CHAR_MAX)
9311573Srgrimes */
9321573Srgrimesstatic states
9331573Srgrimesstep(g, start, stop, bef, ch, aft)
9341573Srgrimesregister struct re_guts *g;
9351573Srgrimessopno start;			/* start state within strip */
9361573Srgrimessopno stop;			/* state after stop state within strip */
9371573Srgrimesregister states bef;		/* states reachable before */
9381573Srgrimesint ch;				/* character or NONCHAR code */
9391573Srgrimesregister states aft;		/* states already known reachable after */
9401573Srgrimes{
9411573Srgrimes	register cset *cs;
9421573Srgrimes	register sop s;
9431573Srgrimes	register sopno pc;
9441573Srgrimes	register onestate here;		/* note, macros know this name */
9451573Srgrimes	register sopno look;
9461573Srgrimes	register int i;
9471573Srgrimes
9481573Srgrimes	for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) {
9491573Srgrimes		s = g->strip[pc];
9501573Srgrimes		switch (OP(s)) {
9511573Srgrimes		case OEND:
9521573Srgrimes			assert(pc == stop-1);
9531573Srgrimes			break;
9541573Srgrimes		case OCHAR:
9551573Srgrimes			/* only characters can match */
9561573Srgrimes			assert(!NONCHAR(ch) || ch != (char)OPND(s));
9571573Srgrimes			if (ch == (char)OPND(s))
9581573Srgrimes				FWD(aft, bef, 1);
9591573Srgrimes			break;
9601573Srgrimes		case OBOL:
9611573Srgrimes			if (ch == BOL || ch == BOLEOL)
9621573Srgrimes				FWD(aft, bef, 1);
9631573Srgrimes			break;
9641573Srgrimes		case OEOL:
9651573Srgrimes			if (ch == EOL || ch == BOLEOL)
9661573Srgrimes				FWD(aft, bef, 1);
9671573Srgrimes			break;
9681573Srgrimes		case OBOW:
9691573Srgrimes			if (ch == BOW)
9701573Srgrimes				FWD(aft, bef, 1);
9711573Srgrimes			break;
9721573Srgrimes		case OEOW:
9731573Srgrimes			if (ch == EOW)
9741573Srgrimes				FWD(aft, bef, 1);
9751573Srgrimes			break;
9761573Srgrimes		case OANY:
9771573Srgrimes			if (!NONCHAR(ch))
9781573Srgrimes				FWD(aft, bef, 1);
9791573Srgrimes			break;
9801573Srgrimes		case OANYOF:
9811573Srgrimes			cs = &g->sets[OPND(s)];
9821573Srgrimes			if (!NONCHAR(ch) && CHIN(cs, ch))
9831573Srgrimes				FWD(aft, bef, 1);
9841573Srgrimes			break;
9851573Srgrimes		case OBACK_:		/* ignored here */
9861573Srgrimes		case O_BACK:
9871573Srgrimes			FWD(aft, aft, 1);
9881573Srgrimes			break;
9891573Srgrimes		case OPLUS_:		/* forward, this is just an empty */
9901573Srgrimes			FWD(aft, aft, 1);
9911573Srgrimes			break;
9921573Srgrimes		case O_PLUS:		/* both forward and back */
9931573Srgrimes			FWD(aft, aft, 1);
9941573Srgrimes			i = ISSETBACK(aft, OPND(s));
9951573Srgrimes			BACK(aft, aft, OPND(s));
9961573Srgrimes			if (!i && ISSETBACK(aft, OPND(s))) {
9971573Srgrimes				/* oho, must reconsider loop body */
9981573Srgrimes				pc -= OPND(s) + 1;
9991573Srgrimes				INIT(here, pc);
10001573Srgrimes			}
10011573Srgrimes			break;
10021573Srgrimes		case OQUEST_:		/* two branches, both forward */
10031573Srgrimes			FWD(aft, aft, 1);
10041573Srgrimes			FWD(aft, aft, OPND(s));
10051573Srgrimes			break;
10061573Srgrimes		case O_QUEST:		/* just an empty */
10071573Srgrimes			FWD(aft, aft, 1);
10081573Srgrimes			break;
10091573Srgrimes		case OLPAREN:		/* not significant here */
10101573Srgrimes		case ORPAREN:
10111573Srgrimes			FWD(aft, aft, 1);
10121573Srgrimes			break;
10131573Srgrimes		case OCH_:		/* mark the first two branches */
10141573Srgrimes			FWD(aft, aft, 1);
10151573Srgrimes			assert(OP(g->strip[pc+OPND(s)]) == OOR2);
10161573Srgrimes			FWD(aft, aft, OPND(s));
10171573Srgrimes			break;
10181573Srgrimes		case OOR1:		/* done a branch, find the O_CH */
10191573Srgrimes			if (ISSTATEIN(aft, here)) {
10201573Srgrimes				for (look = 1;
10211573Srgrimes						OP(s = g->strip[pc+look]) != O_CH;
10221573Srgrimes						look += OPND(s))
10231573Srgrimes					assert(OP(s) == OOR2);
10241573Srgrimes				FWD(aft, aft, look);
10251573Srgrimes			}
10261573Srgrimes			break;
10271573Srgrimes		case OOR2:		/* propagate OCH_'s marking */
10281573Srgrimes			FWD(aft, aft, 1);
10291573Srgrimes			if (OP(g->strip[pc+OPND(s)]) != O_CH) {
10301573Srgrimes				assert(OP(g->strip[pc+OPND(s)]) == OOR2);
10311573Srgrimes				FWD(aft, aft, OPND(s));
10321573Srgrimes			}
10331573Srgrimes			break;
10341573Srgrimes		case O_CH:		/* just empty */
10351573Srgrimes			FWD(aft, aft, 1);
10361573Srgrimes			break;
10371573Srgrimes		default:		/* ooooops... */
10381573Srgrimes			assert(nope);
10391573Srgrimes			break;
10401573Srgrimes		}
10411573Srgrimes	}
10421573Srgrimes
10431573Srgrimes	return(aft);
10441573Srgrimes}
10451573Srgrimes
10461573Srgrimes#ifdef REDEBUG
10471573Srgrimes/*
10481573Srgrimes - print - print a set of states
10491573Srgrimes == #ifdef REDEBUG
10501573Srgrimes == static void print(struct match *m, char *caption, states st, \
10511573Srgrimes ==	int ch, FILE *d);
10521573Srgrimes == #endif
10531573Srgrimes */
10541573Srgrimesstatic void
10551573Srgrimesprint(m, caption, st, ch, d)
10561573Srgrimesstruct match *m;
10571573Srgrimeschar *caption;
10581573Srgrimesstates st;
10591573Srgrimesint ch;
10601573SrgrimesFILE *d;
10611573Srgrimes{
10621573Srgrimes	register struct re_guts *g = m->g;
10631573Srgrimes	register int i;
10641573Srgrimes	register int first = 1;
10651573Srgrimes
10661573Srgrimes	if (!(m->eflags&REG_TRACE))
10671573Srgrimes		return;
10681573Srgrimes
10691573Srgrimes	fprintf(d, "%s", caption);
10701573Srgrimes	if (ch != '\0')
10711573Srgrimes		fprintf(d, " %s", pchar(ch));
10721573Srgrimes	for (i = 0; i < g->nstates; i++)
10731573Srgrimes		if (ISSET(st, i)) {
10741573Srgrimes			fprintf(d, "%s%d", (first) ? "\t" : ", ", i);
10751573Srgrimes			first = 0;
10761573Srgrimes		}
10771573Srgrimes	fprintf(d, "\n");
10781573Srgrimes}
10791573Srgrimes
10808870Srgrimes/*
10811573Srgrimes - at - print current situation
10821573Srgrimes == #ifdef REDEBUG
10831573Srgrimes == static void at(struct match *m, char *title, char *start, char *stop, \
10841573Srgrimes ==						sopno startst, sopno stopst);
10851573Srgrimes == #endif
10861573Srgrimes */
10871573Srgrimesstatic void
10881573Srgrimesat(m, title, start, stop, startst, stopst)
10891573Srgrimesstruct match *m;
10901573Srgrimeschar *title;
10911573Srgrimeschar *start;
10921573Srgrimeschar *stop;
10931573Srgrimessopno startst;
10941573Srgrimessopno stopst;
10951573Srgrimes{
10961573Srgrimes	if (!(m->eflags&REG_TRACE))
10971573Srgrimes		return;
10981573Srgrimes
10991573Srgrimes	printf("%s %s-", title, pchar(*start));
11001573Srgrimes	printf("%s ", pchar(*stop));
11011573Srgrimes	printf("%ld-%ld\n", (long)startst, (long)stopst);
11021573Srgrimes}
11031573Srgrimes
11041573Srgrimes#ifndef PCHARDONE
11051573Srgrimes#define	PCHARDONE	/* never again */
11061573Srgrimes/*
11071573Srgrimes - pchar - make a character printable
11081573Srgrimes == #ifdef REDEBUG
11091573Srgrimes == static char *pchar(int ch);
11101573Srgrimes == #endif
11111573Srgrimes *
11121573Srgrimes * Is this identical to regchar() over in debug.c?  Well, yes.  But a
11131573Srgrimes * duplicate here avoids having a debugging-capable regexec.o tied to
11141573Srgrimes * a matching debug.o, and this is convenient.  It all disappears in
11151573Srgrimes * the non-debug compilation anyway, so it doesn't matter much.
11161573Srgrimes */
11171573Srgrimesstatic char *			/* -> representation */
11181573Srgrimespchar(ch)
11191573Srgrimesint ch;
11201573Srgrimes{
11211573Srgrimes	static char pbuf[10];
11221573Srgrimes
112317508Sache	if (isprint((uch)ch) || ch == ' ')
11241573Srgrimes		sprintf(pbuf, "%c", ch);
11251573Srgrimes	else
11261573Srgrimes		sprintf(pbuf, "\\%o", ch);
11271573Srgrimes	return(pbuf);
11281573Srgrimes}
11291573Srgrimes#endif
11301573Srgrimes#endif
11311573Srgrimes
11321573Srgrimes#undef	matcher
11331573Srgrimes#undef	fast
11341573Srgrimes#undef	slow
11351573Srgrimes#undef	dissect
11361573Srgrimes#undef	backref
11371573Srgrimes#undef	step
11381573Srgrimes#undef	print
11391573Srgrimes#undef	at
11401573Srgrimes#undef	match
1141