engine.c revision 137959
11573Srgrimes/*-
21573Srgrimes * Copyright (c) 1992, 1993, 1994 Henry Spencer.
31573Srgrimes * Copyright (c) 1992, 1993, 1994
41573Srgrimes *	The Regents of the University of California.  All rights reserved.
51573Srgrimes *
61573Srgrimes * This code is derived from software contributed to Berkeley by
71573Srgrimes * Henry Spencer.
81573Srgrimes *
91573Srgrimes * Redistribution and use in source and binary forms, with or without
101573Srgrimes * modification, are permitted provided that the following conditions
111573Srgrimes * are met:
121573Srgrimes * 1. Redistributions of source code must retain the above copyright
131573Srgrimes *    notice, this list of conditions and the following disclaimer.
141573Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
151573Srgrimes *    notice, this list of conditions and the following disclaimer in the
161573Srgrimes *    documentation and/or other materials provided with the distribution.
171573Srgrimes * 3. All advertising materials mentioning features or use of this software
181573Srgrimes *    must display the following acknowledgement:
191573Srgrimes *	This product includes software developed by the University of
201573Srgrimes *	California, Berkeley and its contributors.
211573Srgrimes * 4. Neither the name of the University nor the names of its contributors
221573Srgrimes *    may be used to endorse or promote products derived from this software
231573Srgrimes *    without specific prior written permission.
241573Srgrimes *
251573Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
261573Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
271573Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
281573Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
291573Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
301573Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
311573Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
321573Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
331573Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
341573Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
351573Srgrimes * SUCH DAMAGE.
361573Srgrimes *
371573Srgrimes *	@(#)engine.c	8.5 (Berkeley) 3/20/94
381573Srgrimes */
391573Srgrimes
4092986Sobrien#include <sys/cdefs.h>
4192986Sobrien__FBSDID("$FreeBSD: head/lib/libc/regex/engine.c 137959 2004-11-21 03:14:32Z tjr $");
4292986Sobrien
431573Srgrimes/*
441573Srgrimes * The matching engine and friends.  This file is #included by regexec.c
451573Srgrimes * after suitable #defines of a variety of macros used herein, so that
461573Srgrimes * different state representations can be used without duplicating masses
471573Srgrimes * of code.
481573Srgrimes */
491573Srgrimes
501573Srgrimes#ifdef SNAMES
511573Srgrimes#define	matcher	smatcher
521573Srgrimes#define	fast	sfast
531573Srgrimes#define	slow	sslow
541573Srgrimes#define	dissect	sdissect
551573Srgrimes#define	backref	sbackref
561573Srgrimes#define	step	sstep
571573Srgrimes#define	print	sprint
581573Srgrimes#define	at	sat
591573Srgrimes#define	match	smat
601573Srgrimes#endif
611573Srgrimes#ifdef LNAMES
621573Srgrimes#define	matcher	lmatcher
631573Srgrimes#define	fast	lfast
641573Srgrimes#define	slow	lslow
651573Srgrimes#define	dissect	ldissect
661573Srgrimes#define	backref	lbackref
671573Srgrimes#define	step	lstep
681573Srgrimes#define	print	lprint
691573Srgrimes#define	at	lat
701573Srgrimes#define	match	lmat
711573Srgrimes#endif
72132019Stjr#ifdef MNAMES
73132019Stjr#define	matcher	mmatcher
74132019Stjr#define	fast	mfast
75132019Stjr#define	slow	mslow
76132019Stjr#define	dissect	mdissect
77132019Stjr#define	backref	mbackref
78132019Stjr#define	step	mstep
79132019Stjr#define	print	mprint
80132019Stjr#define	at	mat
81132019Stjr#define	match	mmat
82132019Stjr#endif
831573Srgrimes
841573Srgrimes/* another structure passed up and down to avoid zillions of parameters */
851573Srgrimesstruct match {
861573Srgrimes	struct re_guts *g;
871573Srgrimes	int eflags;
881573Srgrimes	regmatch_t *pmatch;	/* [nsub+1] (0 element unused) */
891573Srgrimes	char *offp;		/* offsets work from here */
901573Srgrimes	char *beginp;		/* start of string -- virtual NUL precedes */
911573Srgrimes	char *endp;		/* end of string -- virtual NUL here */
921573Srgrimes	char *coldp;		/* can be no match starting before here */
931573Srgrimes	char **lastpos;		/* [nplus+1] */
941573Srgrimes	STATEVARS;
951573Srgrimes	states st;		/* current states */
961573Srgrimes	states fresh;		/* states for a fresh start */
971573Srgrimes	states tmp;		/* temporary */
981573Srgrimes	states empty;		/* empty set of states */
99132019Stjr	mbstate_t mbs;		/* multibyte conversion state */
1001573Srgrimes};
1011573Srgrimes
1021573Srgrimes/* ========= begin header generated by ./mkh ========= */
1031573Srgrimes#ifdef __cplusplus
1041573Srgrimesextern "C" {
1051573Srgrimes#endif
1061573Srgrimes
1071573Srgrimes/* === engine.c === */
10892905Sobrienstatic int matcher(struct re_guts *g, char *string, size_t nmatch, regmatch_t pmatch[], int eflags);
10992905Sobrienstatic char *dissect(struct match *m, char *start, char *stop, sopno startst, sopno stopst);
11092905Sobrienstatic char *backref(struct match *m, char *start, char *stop, sopno startst, sopno stopst, sopno lev);
11192905Sobrienstatic char *fast(struct match *m, char *start, char *stop, sopno startst, sopno stopst);
11292905Sobrienstatic char *slow(struct match *m, char *start, char *stop, sopno startst, sopno stopst);
113132019Stjrstatic states step(struct re_guts *g, sopno start, sopno stop, states bef, wint_t ch, states aft);
114132019Stjr#define	BOL	(OUT-1)
115132019Stjr#define	EOL	(BOL-1)
116132019Stjr#define	BOLEOL	(BOL-2)
117132019Stjr#define	NOTHING	(BOL-3)
118132019Stjr#define	BOW	(BOL-4)
119132019Stjr#define	EOW	(BOL-5)
120132019Stjr#define	BADCHAR	(BOL-6)
121132019Stjr#define	NONCHAR(c)	((c) <= OUT)
1221573Srgrimes#ifdef REDEBUG
12392905Sobrienstatic void print(struct match *m, char *caption, states st, int ch, FILE *d);
1241573Srgrimes#endif
1251573Srgrimes#ifdef REDEBUG
12692905Sobrienstatic void at(struct match *m, char *title, char *start, char *stop, sopno startst, sopno stopst);
1271573Srgrimes#endif
1281573Srgrimes#ifdef REDEBUG
12992905Sobrienstatic char *pchar(int ch);
1301573Srgrimes#endif
1311573Srgrimes
1321573Srgrimes#ifdef __cplusplus
1331573Srgrimes}
1341573Srgrimes#endif
1351573Srgrimes/* ========= end header generated by ./mkh ========= */
1361573Srgrimes
1371573Srgrimes#ifdef REDEBUG
1381573Srgrimes#define	SP(t, s, c)	print(m, t, s, c, stdout)
1391573Srgrimes#define	AT(t, p1, p2, s1, s2)	at(m, t, p1, p2, s1, s2)
1401573Srgrimes#define	NOTE(str)	{ if (m->eflags&REG_TRACE) printf("=%s\n", (str)); }
1411573Srgrimes#else
1421573Srgrimes#define	SP(t, s, c)	/* nothing */
1431573Srgrimes#define	AT(t, p1, p2, s1, s2)	/* nothing */
1441573Srgrimes#define	NOTE(s)	/* nothing */
1451573Srgrimes#endif
1461573Srgrimes
1471573Srgrimes/*
1481573Srgrimes - matcher - the actual matching engine
14992889Sobrien == static int matcher(struct re_guts *g, char *string, \
1501573Srgrimes ==	size_t nmatch, regmatch_t pmatch[], int eflags);
1511573Srgrimes */
1521573Srgrimesstatic int			/* 0 success, REG_NOMATCH failure */
1531573Srgrimesmatcher(g, string, nmatch, pmatch, eflags)
15492889Sobrienstruct re_guts *g;
1551573Srgrimeschar *string;
1561573Srgrimessize_t nmatch;
1571573Srgrimesregmatch_t pmatch[];
1581573Srgrimesint eflags;
1591573Srgrimes{
16092889Sobrien	char *endp;
16192889Sobrien	int i;
1621573Srgrimes	struct match mv;
16392889Sobrien	struct match *m = &mv;
16492889Sobrien	char *dp;
16592889Sobrien	const sopno gf = g->firststate+1;	/* +1 for OEND */
16692889Sobrien	const sopno gl = g->laststate;
1671573Srgrimes	char *start;
1681573Srgrimes	char *stop;
16962232Sdcs	/* Boyer-Moore algorithms variables */
17092889Sobrien	char *pp;
17162232Sdcs	int cj, mj;
17292889Sobrien	char *mustfirst;
17392889Sobrien	char *mustlast;
17492889Sobrien	int *matchjump;
17592889Sobrien	int *charjump;
1761573Srgrimes
1771573Srgrimes	/* simplify the situation where possible */
1781573Srgrimes	if (g->cflags&REG_NOSUB)
1791573Srgrimes		nmatch = 0;
1801573Srgrimes	if (eflags&REG_STARTEND) {
1811573Srgrimes		start = string + pmatch[0].rm_so;
1821573Srgrimes		stop = string + pmatch[0].rm_eo;
1831573Srgrimes	} else {
1841573Srgrimes		start = string;
1851573Srgrimes		stop = start + strlen(start);
1861573Srgrimes	}
1871573Srgrimes	if (stop < start)
1881573Srgrimes		return(REG_INVARG);
1891573Srgrimes
1901573Srgrimes	/* prescreening; this does wonders for this rather slow code */
1911573Srgrimes	if (g->must != NULL) {
19262391Sdcs		if (g->charjump != NULL && g->matchjump != NULL) {
19362232Sdcs			mustfirst = g->must;
19462232Sdcs			mustlast = g->must + g->mlen - 1;
19562232Sdcs			charjump = g->charjump;
19662232Sdcs			matchjump = g->matchjump;
19762232Sdcs			pp = mustlast;
19862754Sdcs			for (dp = start+g->mlen-1; dp < stop;) {
19962232Sdcs				/* Fast skip non-matches */
200111010Snectar				while (dp < stop && charjump[(int)*dp])
201111010Snectar					dp += charjump[(int)*dp];
20262232Sdcs
20362754Sdcs				if (dp >= stop)
20462232Sdcs					break;
20562232Sdcs
20662232Sdcs				/* Greedy matcher */
20762232Sdcs				/* We depend on not being used for
20862232Sdcs				 * for strings of length 1
20962232Sdcs				 */
21062754Sdcs				while (*--dp == *--pp && pp != mustfirst);
21162232Sdcs
21262754Sdcs				if (*dp == *pp)
21362232Sdcs					break;
21462232Sdcs
21562232Sdcs				/* Jump to next possible match */
21662232Sdcs				mj = matchjump[pp - mustfirst];
217111010Snectar				cj = charjump[(int)*dp];
21862754Sdcs				dp += (cj < mj ? mj : cj);
21962754Sdcs				pp = mustlast;
22062232Sdcs			}
22162391Sdcs			if (pp != mustfirst)
22262232Sdcs				return(REG_NOMATCH);
22362232Sdcs		} else {
22462232Sdcs			for (dp = start; dp < stop; dp++)
22562232Sdcs				if (*dp == g->must[0] &&
22662232Sdcs				    stop - dp >= g->mlen &&
22762232Sdcs				    memcmp(dp, g->must, (size_t)g->mlen) == 0)
22862232Sdcs					break;
22962232Sdcs			if (dp == stop)		/* we didn't find g->must */
23062232Sdcs				return(REG_NOMATCH);
23162232Sdcs		}
2321573Srgrimes	}
2331573Srgrimes
2341573Srgrimes	/* match struct setup */
2351573Srgrimes	m->g = g;
2361573Srgrimes	m->eflags = eflags;
2371573Srgrimes	m->pmatch = NULL;
2381573Srgrimes	m->lastpos = NULL;
2391573Srgrimes	m->offp = string;
2401573Srgrimes	m->beginp = start;
2411573Srgrimes	m->endp = stop;
2421573Srgrimes	STATESETUP(m, 4);
2431573Srgrimes	SETUP(m->st);
2441573Srgrimes	SETUP(m->fresh);
2451573Srgrimes	SETUP(m->tmp);
2461573Srgrimes	SETUP(m->empty);
2471573Srgrimes	CLEAR(m->empty);
248132019Stjr	ZAPSTATE(&m->mbs);
2491573Srgrimes
25062391Sdcs	/* Adjust start according to moffset, to speed things up */
25162391Sdcs	if (g->moffset > -1)
25262854Sdcs		start = ((dp - g->moffset) < start) ? start : dp - g->moffset;
25362391Sdcs
2541573Srgrimes	/* this loop does only one repetition except for backrefs */
2551573Srgrimes	for (;;) {
2561573Srgrimes		endp = fast(m, start, stop, gf, gl);
2571573Srgrimes		if (endp == NULL) {		/* a miss */
2581573Srgrimes			STATETEARDOWN(m);
2591573Srgrimes			return(REG_NOMATCH);
2601573Srgrimes		}
2611573Srgrimes		if (nmatch == 0 && !g->backrefs)
2621573Srgrimes			break;		/* no further info needed */
2631573Srgrimes
2641573Srgrimes		/* where? */
2651573Srgrimes		assert(m->coldp != NULL);
2661573Srgrimes		for (;;) {
2671573Srgrimes			NOTE("finding start");
2681573Srgrimes			endp = slow(m, m->coldp, stop, gf, gl);
2691573Srgrimes			if (endp != NULL)
2701573Srgrimes				break;
2711573Srgrimes			assert(m->coldp < m->endp);
272132019Stjr			m->coldp += XMBRTOWC(NULL, m->coldp,
273132019Stjr			    m->endp - m->coldp, &m->mbs, 0);
2741573Srgrimes		}
2751573Srgrimes		if (nmatch == 1 && !g->backrefs)
2761573Srgrimes			break;		/* no further info needed */
2771573Srgrimes
2781573Srgrimes		/* oh my, he wants the subexpressions... */
2791573Srgrimes		if (m->pmatch == NULL)
2801573Srgrimes			m->pmatch = (regmatch_t *)malloc((m->g->nsub + 1) *
2811573Srgrimes							sizeof(regmatch_t));
2821573Srgrimes		if (m->pmatch == NULL) {
2831573Srgrimes			STATETEARDOWN(m);
2841573Srgrimes			return(REG_ESPACE);
2851573Srgrimes		}
2861573Srgrimes		for (i = 1; i <= m->g->nsub; i++)
2871573Srgrimes			m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1;
2881573Srgrimes		if (!g->backrefs && !(m->eflags&REG_BACKR)) {
2891573Srgrimes			NOTE("dissecting");
2901573Srgrimes			dp = dissect(m, m->coldp, endp, gf, gl);
2911573Srgrimes		} else {
2921573Srgrimes			if (g->nplus > 0 && m->lastpos == NULL)
2931573Srgrimes				m->lastpos = (char **)malloc((g->nplus+1) *
2941573Srgrimes							sizeof(char *));
2951573Srgrimes			if (g->nplus > 0 && m->lastpos == NULL) {
2961573Srgrimes				free(m->pmatch);
2971573Srgrimes				STATETEARDOWN(m);
2981573Srgrimes				return(REG_ESPACE);
2991573Srgrimes			}
3001573Srgrimes			NOTE("backref dissect");
3011573Srgrimes			dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
3021573Srgrimes		}
3031573Srgrimes		if (dp != NULL)
3041573Srgrimes			break;
3051573Srgrimes
3061573Srgrimes		/* uh-oh... we couldn't find a subexpression-level match */
3071573Srgrimes		assert(g->backrefs);	/* must be back references doing it */
3081573Srgrimes		assert(g->nplus == 0 || m->lastpos != NULL);
3091573Srgrimes		for (;;) {
3101573Srgrimes			if (dp != NULL || endp <= m->coldp)
3111573Srgrimes				break;		/* defeat */
3121573Srgrimes			NOTE("backoff");
3131573Srgrimes			endp = slow(m, m->coldp, endp-1, gf, gl);
3141573Srgrimes			if (endp == NULL)
3151573Srgrimes				break;		/* defeat */
3161573Srgrimes			/* try it on a shorter possibility */
3171573Srgrimes#ifndef NDEBUG
3181573Srgrimes			for (i = 1; i <= m->g->nsub; i++) {
3191573Srgrimes				assert(m->pmatch[i].rm_so == -1);
3201573Srgrimes				assert(m->pmatch[i].rm_eo == -1);
3211573Srgrimes			}
3221573Srgrimes#endif
3231573Srgrimes			NOTE("backoff dissect");
3241573Srgrimes			dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
3251573Srgrimes		}
3261573Srgrimes		assert(dp == NULL || dp == endp);
3271573Srgrimes		if (dp != NULL)		/* found a shorter one */
3281573Srgrimes			break;
3291573Srgrimes
3301573Srgrimes		/* despite initial appearances, there is no match here */
3311573Srgrimes		NOTE("false alarm");
332132019Stjr		/* recycle starting later */
333132019Stjr		start = m->coldp + XMBRTOWC(NULL, m->coldp,
334137959Stjr		    stop - m->coldp, &m->mbs, 0);
3351573Srgrimes		assert(start <= stop);
3361573Srgrimes	}
3371573Srgrimes
3381573Srgrimes	/* fill in the details if requested */
3391573Srgrimes	if (nmatch > 0) {
3401573Srgrimes		pmatch[0].rm_so = m->coldp - m->offp;
3411573Srgrimes		pmatch[0].rm_eo = endp - m->offp;
3421573Srgrimes	}
3431573Srgrimes	if (nmatch > 1) {
3441573Srgrimes		assert(m->pmatch != NULL);
3451573Srgrimes		for (i = 1; i < nmatch; i++)
3461573Srgrimes			if (i <= m->g->nsub)
3471573Srgrimes				pmatch[i] = m->pmatch[i];
3481573Srgrimes			else {
3491573Srgrimes				pmatch[i].rm_so = -1;
3501573Srgrimes				pmatch[i].rm_eo = -1;
3511573Srgrimes			}
3521573Srgrimes	}
3531573Srgrimes
3541573Srgrimes	if (m->pmatch != NULL)
3551573Srgrimes		free((char *)m->pmatch);
3561573Srgrimes	if (m->lastpos != NULL)
3571573Srgrimes		free((char *)m->lastpos);
3581573Srgrimes	STATETEARDOWN(m);
3591573Srgrimes	return(0);
3601573Srgrimes}
3611573Srgrimes
3621573Srgrimes/*
3631573Srgrimes - dissect - figure out what matched what, no back references
36492889Sobrien == static char *dissect(struct match *m, char *start, \
3651573Srgrimes ==	char *stop, sopno startst, sopno stopst);
3661573Srgrimes */
3671573Srgrimesstatic char *			/* == stop (success) always */
3681573Srgrimesdissect(m, start, stop, startst, stopst)
36992889Sobrienstruct match *m;
3701573Srgrimeschar *start;
3711573Srgrimeschar *stop;
3721573Srgrimessopno startst;
3731573Srgrimessopno stopst;
3741573Srgrimes{
37592889Sobrien	int i;
37692889Sobrien	sopno ss;		/* start sop of current subRE */
37792889Sobrien	sopno es;		/* end sop of current subRE */
37892889Sobrien	char *sp;		/* start of string matched by it */
37992889Sobrien	char *stp;		/* string matched by it cannot pass here */
38092889Sobrien	char *rest;		/* start of rest of string */
38192889Sobrien	char *tail;		/* string unmatched by rest of RE */
38292889Sobrien	sopno ssub;		/* start sop of subsubRE */
38392889Sobrien	sopno esub;		/* end sop of subsubRE */
38492889Sobrien	char *ssp;		/* start of string matched by subsubRE */
38592889Sobrien	char *sep;		/* end of string matched by subsubRE */
38692889Sobrien	char *oldssp;		/* previous ssp */
38792889Sobrien	char *dp;
3881573Srgrimes
3891573Srgrimes	AT("diss", start, stop, startst, stopst);
3901573Srgrimes	sp = start;
3911573Srgrimes	for (ss = startst; ss < stopst; ss = es) {
3921573Srgrimes		/* identify end of subRE */
3931573Srgrimes		es = ss;
3941573Srgrimes		switch (OP(m->g->strip[es])) {
3951573Srgrimes		case OPLUS_:
3961573Srgrimes		case OQUEST_:
3971573Srgrimes			es += OPND(m->g->strip[es]);
3981573Srgrimes			break;
3991573Srgrimes		case OCH_:
4001573Srgrimes			while (OP(m->g->strip[es]) != O_CH)
4011573Srgrimes				es += OPND(m->g->strip[es]);
4021573Srgrimes			break;
4031573Srgrimes		}
4041573Srgrimes		es++;
4051573Srgrimes
4061573Srgrimes		/* figure out what it matched */
4071573Srgrimes		switch (OP(m->g->strip[ss])) {
4081573Srgrimes		case OEND:
4091573Srgrimes			assert(nope);
4101573Srgrimes			break;
4111573Srgrimes		case OCHAR:
412132019Stjr			sp += XMBRTOWC(NULL, sp, stop - start, &m->mbs, 0);
4131573Srgrimes			break;
4141573Srgrimes		case OBOL:
4151573Srgrimes		case OEOL:
4161573Srgrimes		case OBOW:
4171573Srgrimes		case OEOW:
4181573Srgrimes			break;
4191573Srgrimes		case OANY:
4201573Srgrimes		case OANYOF:
421132019Stjr			sp += XMBRTOWC(NULL, sp, stop - start, &m->mbs, 0);
4221573Srgrimes			break;
4231573Srgrimes		case OBACK_:
4241573Srgrimes		case O_BACK:
4251573Srgrimes			assert(nope);
4261573Srgrimes			break;
4271573Srgrimes		/* cases where length of match is hard to find */
4281573Srgrimes		case OQUEST_:
4291573Srgrimes			stp = stop;
4301573Srgrimes			for (;;) {
4311573Srgrimes				/* how long could this one be? */
4321573Srgrimes				rest = slow(m, sp, stp, ss, es);
4331573Srgrimes				assert(rest != NULL);	/* it did match */
4341573Srgrimes				/* could the rest match the rest? */
4351573Srgrimes				tail = slow(m, rest, stop, es, stopst);
4361573Srgrimes				if (tail == stop)
4371573Srgrimes					break;		/* yes! */
4381573Srgrimes				/* no -- try a shorter match for this one */
4391573Srgrimes				stp = rest - 1;
4401573Srgrimes				assert(stp >= sp);	/* it did work */
4411573Srgrimes			}
4421573Srgrimes			ssub = ss + 1;
4431573Srgrimes			esub = es - 1;
4441573Srgrimes			/* did innards match? */
4451573Srgrimes			if (slow(m, sp, rest, ssub, esub) != NULL) {
4461573Srgrimes				dp = dissect(m, sp, rest, ssub, esub);
4471573Srgrimes				assert(dp == rest);
4481573Srgrimes			} else		/* no */
4491573Srgrimes				assert(sp == rest);
4501573Srgrimes			sp = rest;
4511573Srgrimes			break;
4521573Srgrimes		case OPLUS_:
4531573Srgrimes			stp = stop;
4541573Srgrimes			for (;;) {
4551573Srgrimes				/* how long could this one be? */
4561573Srgrimes				rest = slow(m, sp, stp, ss, es);
4571573Srgrimes				assert(rest != NULL);	/* it did match */
4581573Srgrimes				/* could the rest match the rest? */
4591573Srgrimes				tail = slow(m, rest, stop, es, stopst);
4601573Srgrimes				if (tail == stop)
4611573Srgrimes					break;		/* yes! */
4621573Srgrimes				/* no -- try a shorter match for this one */
4631573Srgrimes				stp = rest - 1;
4641573Srgrimes				assert(stp >= sp);	/* it did work */
4651573Srgrimes			}
4661573Srgrimes			ssub = ss + 1;
4671573Srgrimes			esub = es - 1;
4681573Srgrimes			ssp = sp;
4691573Srgrimes			oldssp = ssp;
4701573Srgrimes			for (;;) {	/* find last match of innards */
4711573Srgrimes				sep = slow(m, ssp, rest, ssub, esub);
4721573Srgrimes				if (sep == NULL || sep == ssp)
4731573Srgrimes					break;	/* failed or matched null */
4741573Srgrimes				oldssp = ssp;	/* on to next try */
4751573Srgrimes				ssp = sep;
4761573Srgrimes			}
4771573Srgrimes			if (sep == NULL) {
4781573Srgrimes				/* last successful match */
4791573Srgrimes				sep = ssp;
4801573Srgrimes				ssp = oldssp;
4811573Srgrimes			}
4821573Srgrimes			assert(sep == rest);	/* must exhaust substring */
4831573Srgrimes			assert(slow(m, ssp, sep, ssub, esub) == rest);
4841573Srgrimes			dp = dissect(m, ssp, sep, ssub, esub);
4851573Srgrimes			assert(dp == sep);
4861573Srgrimes			sp = rest;
4871573Srgrimes			break;
4881573Srgrimes		case OCH_:
4891573Srgrimes			stp = stop;
4901573Srgrimes			for (;;) {
4911573Srgrimes				/* how long could this one be? */
4921573Srgrimes				rest = slow(m, sp, stp, ss, es);
4931573Srgrimes				assert(rest != NULL);	/* it did match */
4941573Srgrimes				/* could the rest match the rest? */
4951573Srgrimes				tail = slow(m, rest, stop, es, stopst);
4961573Srgrimes				if (tail == stop)
4971573Srgrimes					break;		/* yes! */
4981573Srgrimes				/* no -- try a shorter match for this one */
4991573Srgrimes				stp = rest - 1;
5001573Srgrimes				assert(stp >= sp);	/* it did work */
5011573Srgrimes			}
5021573Srgrimes			ssub = ss + 1;
5031573Srgrimes			esub = ss + OPND(m->g->strip[ss]) - 1;
5041573Srgrimes			assert(OP(m->g->strip[esub]) == OOR1);
5051573Srgrimes			for (;;) {	/* find first matching branch */
5061573Srgrimes				if (slow(m, sp, rest, ssub, esub) == rest)
5071573Srgrimes					break;	/* it matched all of it */
5081573Srgrimes				/* that one missed, try next one */
5091573Srgrimes				assert(OP(m->g->strip[esub]) == OOR1);
5101573Srgrimes				esub++;
5111573Srgrimes				assert(OP(m->g->strip[esub]) == OOR2);
5121573Srgrimes				ssub = esub + 1;
5131573Srgrimes				esub += OPND(m->g->strip[esub]);
5141573Srgrimes				if (OP(m->g->strip[esub]) == OOR2)
5151573Srgrimes					esub--;
5161573Srgrimes				else
5171573Srgrimes					assert(OP(m->g->strip[esub]) == O_CH);
5181573Srgrimes			}
5191573Srgrimes			dp = dissect(m, sp, rest, ssub, esub);
5201573Srgrimes			assert(dp == rest);
5211573Srgrimes			sp = rest;
5221573Srgrimes			break;
5231573Srgrimes		case O_PLUS:
5241573Srgrimes		case O_QUEST:
5251573Srgrimes		case OOR1:
5261573Srgrimes		case OOR2:
5271573Srgrimes		case O_CH:
5281573Srgrimes			assert(nope);
5291573Srgrimes			break;
5301573Srgrimes		case OLPAREN:
5311573Srgrimes			i = OPND(m->g->strip[ss]);
5321573Srgrimes			assert(0 < i && i <= m->g->nsub);
5331573Srgrimes			m->pmatch[i].rm_so = sp - m->offp;
5341573Srgrimes			break;
5351573Srgrimes		case ORPAREN:
5361573Srgrimes			i = OPND(m->g->strip[ss]);
5371573Srgrimes			assert(0 < i && i <= m->g->nsub);
5381573Srgrimes			m->pmatch[i].rm_eo = sp - m->offp;
5391573Srgrimes			break;
5401573Srgrimes		default:		/* uh oh */
5411573Srgrimes			assert(nope);
5421573Srgrimes			break;
5431573Srgrimes		}
5441573Srgrimes	}
5451573Srgrimes
5461573Srgrimes	assert(sp == stop);
5471573Srgrimes	return(sp);
5481573Srgrimes}
5491573Srgrimes
5501573Srgrimes/*
5511573Srgrimes - backref - figure out what matched what, figuring in back references
55292889Sobrien == static char *backref(struct match *m, char *start, \
5531573Srgrimes ==	char *stop, sopno startst, sopno stopst, sopno lev);
5541573Srgrimes */
5551573Srgrimesstatic char *			/* == stop (success) or NULL (failure) */
5561573Srgrimesbackref(m, start, stop, startst, stopst, lev)
55792889Sobrienstruct match *m;
5581573Srgrimeschar *start;
5591573Srgrimeschar *stop;
5601573Srgrimessopno startst;
5611573Srgrimessopno stopst;
5621573Srgrimessopno lev;			/* PLUS nesting level */
5631573Srgrimes{
56492889Sobrien	int i;
56592889Sobrien	sopno ss;		/* start sop of current subRE */
56692889Sobrien	char *sp;		/* start of string matched by it */
56792889Sobrien	sopno ssub;		/* start sop of subsubRE */
56892889Sobrien	sopno esub;		/* end sop of subsubRE */
56992889Sobrien	char *ssp;		/* start of string matched by subsubRE */
57092889Sobrien	char *dp;
57192889Sobrien	size_t len;
57292889Sobrien	int hard;
57392889Sobrien	sop s;
57492889Sobrien	regoff_t offsave;
57592889Sobrien	cset *cs;
576132019Stjr	wint_t wc;
5771573Srgrimes
5781573Srgrimes	AT("back", start, stop, startst, stopst);
5791573Srgrimes	sp = start;
5801573Srgrimes
5811573Srgrimes	/* get as far as we can with easy stuff */
5821573Srgrimes	hard = 0;
5831573Srgrimes	for (ss = startst; !hard && ss < stopst; ss++)
5841573Srgrimes		switch (OP(s = m->g->strip[ss])) {
5851573Srgrimes		case OCHAR:
586132019Stjr			if (sp == stop)
5871573Srgrimes				return(NULL);
588132019Stjr			sp += XMBRTOWC(&wc, sp, stop - sp, &m->mbs, BADCHAR);
589132019Stjr			if (wc != OPND(s))
590132019Stjr				return(NULL);
5911573Srgrimes			break;
5921573Srgrimes		case OANY:
5931573Srgrimes			if (sp == stop)
5941573Srgrimes				return(NULL);
595132019Stjr			sp += XMBRTOWC(&wc, sp, stop - sp, &m->mbs, BADCHAR);
596132019Stjr			if (wc == BADCHAR)
597132019Stjr				return (NULL);
5981573Srgrimes			break;
5991573Srgrimes		case OANYOF:
600132019Stjr			if (sp == stop)
601132019Stjr				return (NULL);
6021573Srgrimes			cs = &m->g->sets[OPND(s)];
603132019Stjr			sp += XMBRTOWC(&wc, sp, stop - sp, &m->mbs, BADCHAR);
604132019Stjr			if (wc == BADCHAR || !CHIN(cs, wc))
6051573Srgrimes				return(NULL);
6061573Srgrimes			break;
6071573Srgrimes		case OBOL:
6081573Srgrimes			if ( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
6091573Srgrimes					(sp < m->endp && *(sp-1) == '\n' &&
6101573Srgrimes						(m->g->cflags&REG_NEWLINE)) )
6111573Srgrimes				{ /* yes */ }
6121573Srgrimes			else
6131573Srgrimes				return(NULL);
6141573Srgrimes			break;
6151573Srgrimes		case OEOL:
6161573Srgrimes			if ( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
6171573Srgrimes					(sp < m->endp && *sp == '\n' &&
6181573Srgrimes						(m->g->cflags&REG_NEWLINE)) )
6191573Srgrimes				{ /* yes */ }
6201573Srgrimes			else
6211573Srgrimes				return(NULL);
6221573Srgrimes			break;
6231573Srgrimes		case OBOW:
6241573Srgrimes			if (( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
6251573Srgrimes					(sp < m->endp && *(sp-1) == '\n' &&
6261573Srgrimes						(m->g->cflags&REG_NEWLINE)) ||
6271573Srgrimes					(sp > m->beginp &&
6281573Srgrimes							!ISWORD(*(sp-1))) ) &&
6291573Srgrimes					(sp < m->endp && ISWORD(*sp)) )
6301573Srgrimes				{ /* yes */ }
6311573Srgrimes			else
6321573Srgrimes				return(NULL);
6331573Srgrimes			break;
6341573Srgrimes		case OEOW:
6351573Srgrimes			if (( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
6361573Srgrimes					(sp < m->endp && *sp == '\n' &&
6371573Srgrimes						(m->g->cflags&REG_NEWLINE)) ||
6381573Srgrimes					(sp < m->endp && !ISWORD(*sp)) ) &&
6391573Srgrimes					(sp > m->beginp && ISWORD(*(sp-1))) )
6401573Srgrimes				{ /* yes */ }
6411573Srgrimes			else
6421573Srgrimes				return(NULL);
6431573Srgrimes			break;
6441573Srgrimes		case O_QUEST:
6451573Srgrimes			break;
6461573Srgrimes		case OOR1:	/* matches null but needs to skip */
6471573Srgrimes			ss++;
6481573Srgrimes			s = m->g->strip[ss];
6491573Srgrimes			do {
6501573Srgrimes				assert(OP(s) == OOR2);
6511573Srgrimes				ss += OPND(s);
6521573Srgrimes			} while (OP(s = m->g->strip[ss]) != O_CH);
6531573Srgrimes			/* note that the ss++ gets us past the O_CH */
6541573Srgrimes			break;
6551573Srgrimes		default:	/* have to make a choice */
6561573Srgrimes			hard = 1;
6571573Srgrimes			break;
6581573Srgrimes		}
6591573Srgrimes	if (!hard) {		/* that was it! */
6601573Srgrimes		if (sp != stop)
6611573Srgrimes			return(NULL);
6621573Srgrimes		return(sp);
6631573Srgrimes	}
6641573Srgrimes	ss--;			/* adjust for the for's final increment */
6651573Srgrimes
6661573Srgrimes	/* the hard stuff */
6671573Srgrimes	AT("hard", sp, stop, ss, stopst);
6681573Srgrimes	s = m->g->strip[ss];
6691573Srgrimes	switch (OP(s)) {
6701573Srgrimes	case OBACK_:		/* the vilest depths */
6711573Srgrimes		i = OPND(s);
6721573Srgrimes		assert(0 < i && i <= m->g->nsub);
6731573Srgrimes		if (m->pmatch[i].rm_eo == -1)
6741573Srgrimes			return(NULL);
6751573Srgrimes		assert(m->pmatch[i].rm_so != -1);
6761573Srgrimes		len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so;
6771573Srgrimes		assert(stop - m->beginp >= len);
6781573Srgrimes		if (sp > stop - len)
6791573Srgrimes			return(NULL);	/* not enough left to match */
6801573Srgrimes		ssp = m->offp + m->pmatch[i].rm_so;
6811573Srgrimes		if (memcmp(sp, ssp, len) != 0)
6821573Srgrimes			return(NULL);
6831573Srgrimes		while (m->g->strip[ss] != SOP(O_BACK, i))
6841573Srgrimes			ss++;
6851573Srgrimes		return(backref(m, sp+len, stop, ss+1, stopst, lev));
6861573Srgrimes		break;
6871573Srgrimes	case OQUEST_:		/* to null or not */
6881573Srgrimes		dp = backref(m, sp, stop, ss+1, stopst, lev);
6891573Srgrimes		if (dp != NULL)
6901573Srgrimes			return(dp);	/* not */
6911573Srgrimes		return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev));
6921573Srgrimes		break;
6931573Srgrimes	case OPLUS_:
6941573Srgrimes		assert(m->lastpos != NULL);
6951573Srgrimes		assert(lev+1 <= m->g->nplus);
6961573Srgrimes		m->lastpos[lev+1] = sp;
6971573Srgrimes		return(backref(m, sp, stop, ss+1, stopst, lev+1));
6981573Srgrimes		break;
6991573Srgrimes	case O_PLUS:
7001573Srgrimes		if (sp == m->lastpos[lev])	/* last pass matched null */
7011573Srgrimes			return(backref(m, sp, stop, ss+1, stopst, lev-1));
7021573Srgrimes		/* try another pass */
7031573Srgrimes		m->lastpos[lev] = sp;
7041573Srgrimes		dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev);
7051573Srgrimes		if (dp == NULL)
7061573Srgrimes			return(backref(m, sp, stop, ss+1, stopst, lev-1));
7071573Srgrimes		else
7081573Srgrimes			return(dp);
7091573Srgrimes		break;
7101573Srgrimes	case OCH_:		/* find the right one, if any */
7111573Srgrimes		ssub = ss + 1;
7121573Srgrimes		esub = ss + OPND(s) - 1;
7131573Srgrimes		assert(OP(m->g->strip[esub]) == OOR1);
7141573Srgrimes		for (;;) {	/* find first matching branch */
7151573Srgrimes			dp = backref(m, sp, stop, ssub, esub, lev);
7161573Srgrimes			if (dp != NULL)
7171573Srgrimes				return(dp);
7181573Srgrimes			/* that one missed, try next one */
7191573Srgrimes			if (OP(m->g->strip[esub]) == O_CH)
7201573Srgrimes				return(NULL);	/* there is none */
7211573Srgrimes			esub++;
7221573Srgrimes			assert(OP(m->g->strip[esub]) == OOR2);
7231573Srgrimes			ssub = esub + 1;
7241573Srgrimes			esub += OPND(m->g->strip[esub]);
7251573Srgrimes			if (OP(m->g->strip[esub]) == OOR2)
7261573Srgrimes				esub--;
7271573Srgrimes			else
7281573Srgrimes				assert(OP(m->g->strip[esub]) == O_CH);
7291573Srgrimes		}
7301573Srgrimes		break;
7311573Srgrimes	case OLPAREN:		/* must undo assignment if rest fails */
7321573Srgrimes		i = OPND(s);
7331573Srgrimes		assert(0 < i && i <= m->g->nsub);
7341573Srgrimes		offsave = m->pmatch[i].rm_so;
7351573Srgrimes		m->pmatch[i].rm_so = sp - m->offp;
7361573Srgrimes		dp = backref(m, sp, stop, ss+1, stopst, lev);
7371573Srgrimes		if (dp != NULL)
7381573Srgrimes			return(dp);
7391573Srgrimes		m->pmatch[i].rm_so = offsave;
7401573Srgrimes		return(NULL);
7411573Srgrimes		break;
7421573Srgrimes	case ORPAREN:		/* must undo assignment if rest fails */
7431573Srgrimes		i = OPND(s);
7441573Srgrimes		assert(0 < i && i <= m->g->nsub);
7451573Srgrimes		offsave = m->pmatch[i].rm_eo;
7461573Srgrimes		m->pmatch[i].rm_eo = sp - m->offp;
7471573Srgrimes		dp = backref(m, sp, stop, ss+1, stopst, lev);
7481573Srgrimes		if (dp != NULL)
7491573Srgrimes			return(dp);
7501573Srgrimes		m->pmatch[i].rm_eo = offsave;
7511573Srgrimes		return(NULL);
7521573Srgrimes		break;
7531573Srgrimes	default:		/* uh oh */
7541573Srgrimes		assert(nope);
7551573Srgrimes		break;
7561573Srgrimes	}
7571573Srgrimes
7581573Srgrimes	/* "can't happen" */
7591573Srgrimes	assert(nope);
7601573Srgrimes	/* NOTREACHED */
76111664Sphk	return "shut up gcc";
7621573Srgrimes}
7631573Srgrimes
7641573Srgrimes/*
7651573Srgrimes - fast - step through the string at top speed
76692889Sobrien == static char *fast(struct match *m, char *start, \
7671573Srgrimes ==	char *stop, sopno startst, sopno stopst);
7681573Srgrimes */
7691573Srgrimesstatic char *			/* where tentative match ended, or NULL */
7701573Srgrimesfast(m, start, stop, startst, stopst)
77192889Sobrienstruct match *m;
7721573Srgrimeschar *start;
7731573Srgrimeschar *stop;
7741573Srgrimessopno startst;
7751573Srgrimessopno stopst;
7761573Srgrimes{
77792889Sobrien	states st = m->st;
77892889Sobrien	states fresh = m->fresh;
77992889Sobrien	states tmp = m->tmp;
78092889Sobrien	char *p = start;
781132019Stjr	wint_t c;
782132019Stjr	wint_t lastc;		/* previous c */
783132019Stjr	wint_t flagch;
78492889Sobrien	int i;
78592889Sobrien	char *coldp;		/* last p after which no match was underway */
786132019Stjr	size_t clen;
7871573Srgrimes
7881573Srgrimes	CLEAR(st);
7891573Srgrimes	SET1(st, startst);
7901573Srgrimes	st = step(m->g, startst, stopst, st, NOTHING, st);
7911573Srgrimes	ASSIGN(fresh, st);
7921573Srgrimes	SP("start", st, *p);
7931573Srgrimes	coldp = NULL;
794132019Stjr	if (start == m->beginp)
795132019Stjr		c = OUT;
796132019Stjr	else {
797132019Stjr		/*
798132019Stjr		 * XXX Wrong if the previous character was multi-byte.
799132019Stjr		 * Newline never is (in encodings supported by FreeBSD),
800132019Stjr		 * so this only breaks the ISWORD tests below.
801132019Stjr		 */
802132019Stjr		c = (uch)*(start - 1);
803132019Stjr	}
8041573Srgrimes	for (;;) {
8051573Srgrimes		/* next character */
8061573Srgrimes		lastc = c;
807132019Stjr		if (p == m->endp)
808132019Stjr			c = OUT;
809132019Stjr		else
810137959Stjr			clen = XMBRTOWC(&c, p, stop - p, &m->mbs, BADCHAR);
8111573Srgrimes		if (EQ(st, fresh))
8121573Srgrimes			coldp = p;
8131573Srgrimes
8141573Srgrimes		/* is there an EOL and/or BOL between lastc and c? */
8151573Srgrimes		flagch = '\0';
8161573Srgrimes		i = 0;
8171573Srgrimes		if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
8181573Srgrimes				(lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
8191573Srgrimes			flagch = BOL;
8201573Srgrimes			i = m->g->nbol;
8211573Srgrimes		}
8221573Srgrimes		if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
8231573Srgrimes				(c == OUT && !(m->eflags&REG_NOTEOL)) ) {
8241573Srgrimes			flagch = (flagch == BOL) ? BOLEOL : EOL;
8251573Srgrimes			i += m->g->neol;
8261573Srgrimes		}
8271573Srgrimes		if (i != 0) {
8281573Srgrimes			for (; i > 0; i--)
8291573Srgrimes				st = step(m->g, startst, stopst, st, flagch, st);
8301573Srgrimes			SP("boleol", st, c);
8311573Srgrimes		}
8321573Srgrimes
8331573Srgrimes		/* how about a word boundary? */
8341573Srgrimes		if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
8351573Srgrimes					(c != OUT && ISWORD(c)) ) {
8361573Srgrimes			flagch = BOW;
8371573Srgrimes		}
8381573Srgrimes		if ( (lastc != OUT && ISWORD(lastc)) &&
8391573Srgrimes				(flagch == EOL || (c != OUT && !ISWORD(c))) ) {
8401573Srgrimes			flagch = EOW;
8411573Srgrimes		}
8421573Srgrimes		if (flagch == BOW || flagch == EOW) {
8431573Srgrimes			st = step(m->g, startst, stopst, st, flagch, st);
8441573Srgrimes			SP("boweow", st, c);
8451573Srgrimes		}
8461573Srgrimes
8471573Srgrimes		/* are we done? */
8481573Srgrimes		if (ISSET(st, stopst) || p == stop)
8491573Srgrimes			break;		/* NOTE BREAK OUT */
8501573Srgrimes
8511573Srgrimes		/* no, we must deal with this character */
8521573Srgrimes		ASSIGN(tmp, st);
8531573Srgrimes		ASSIGN(st, fresh);
8541573Srgrimes		assert(c != OUT);
8551573Srgrimes		st = step(m->g, startst, stopst, tmp, c, st);
8561573Srgrimes		SP("aft", st, c);
8571573Srgrimes		assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
858132019Stjr		p += clen;
8591573Srgrimes	}
8601573Srgrimes
8611573Srgrimes	assert(coldp != NULL);
8621573Srgrimes	m->coldp = coldp;
8631573Srgrimes	if (ISSET(st, stopst))
864137959Stjr		return(p+XMBRTOWC(NULL, p, stop - p, &m->mbs, 0));
8651573Srgrimes	else
8661573Srgrimes		return(NULL);
8671573Srgrimes}
8681573Srgrimes
8691573Srgrimes/*
8701573Srgrimes - slow - step through the string more deliberately
87192889Sobrien == static char *slow(struct match *m, char *start, \
8721573Srgrimes ==	char *stop, sopno startst, sopno stopst);
8731573Srgrimes */
8741573Srgrimesstatic char *			/* where it ended */
8751573Srgrimesslow(m, start, stop, startst, stopst)
87692889Sobrienstruct match *m;
8771573Srgrimeschar *start;
8781573Srgrimeschar *stop;
8791573Srgrimessopno startst;
8801573Srgrimessopno stopst;
8811573Srgrimes{
88292889Sobrien	states st = m->st;
88392889Sobrien	states empty = m->empty;
88492889Sobrien	states tmp = m->tmp;
88592889Sobrien	char *p = start;
886132019Stjr	wint_t c;
887132019Stjr	wint_t lastc;		/* previous c */
888132019Stjr	wint_t flagch;
88992889Sobrien	int i;
89092889Sobrien	char *matchp;		/* last p at which a match ended */
891132019Stjr	size_t clen;
8921573Srgrimes
8931573Srgrimes	AT("slow", start, stop, startst, stopst);
8941573Srgrimes	CLEAR(st);
8951573Srgrimes	SET1(st, startst);
8961573Srgrimes	SP("sstart", st, *p);
8971573Srgrimes	st = step(m->g, startst, stopst, st, NOTHING, st);
8981573Srgrimes	matchp = NULL;
899132019Stjr	if (start == m->beginp)
900132019Stjr		c = OUT;
901132019Stjr	else {
902132019Stjr		/*
903132019Stjr		 * XXX Wrong if the previous character was multi-byte.
904132019Stjr		 * Newline never is (in encodings supported by FreeBSD),
905132019Stjr		 * so this only breaks the ISWORD tests below.
906132019Stjr		 */
907132019Stjr		c = (uch)*(start - 1);
908132019Stjr	}
9091573Srgrimes	for (;;) {
9101573Srgrimes		/* next character */
9111573Srgrimes		lastc = c;
912132019Stjr		if (p == m->endp) {
913132019Stjr			c = OUT;
914132019Stjr			clen = 0;
915132019Stjr		} else
916137959Stjr			clen = XMBRTOWC(&c, p, stop - p, &m->mbs, BADCHAR);
9171573Srgrimes
9181573Srgrimes		/* is there an EOL and/or BOL between lastc and c? */
9191573Srgrimes		flagch = '\0';
9201573Srgrimes		i = 0;
9211573Srgrimes		if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
9221573Srgrimes				(lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
9231573Srgrimes			flagch = BOL;
9241573Srgrimes			i = m->g->nbol;
9251573Srgrimes		}
9261573Srgrimes		if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
9271573Srgrimes				(c == OUT && !(m->eflags&REG_NOTEOL)) ) {
9281573Srgrimes			flagch = (flagch == BOL) ? BOLEOL : EOL;
9291573Srgrimes			i += m->g->neol;
9301573Srgrimes		}
9311573Srgrimes		if (i != 0) {
9321573Srgrimes			for (; i > 0; i--)
9331573Srgrimes				st = step(m->g, startst, stopst, st, flagch, st);
9341573Srgrimes			SP("sboleol", st, c);
9351573Srgrimes		}
9361573Srgrimes
9371573Srgrimes		/* how about a word boundary? */
9381573Srgrimes		if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
9391573Srgrimes					(c != OUT && ISWORD(c)) ) {
9401573Srgrimes			flagch = BOW;
9411573Srgrimes		}
9421573Srgrimes		if ( (lastc != OUT && ISWORD(lastc)) &&
9431573Srgrimes				(flagch == EOL || (c != OUT && !ISWORD(c))) ) {
9441573Srgrimes			flagch = EOW;
9451573Srgrimes		}
9461573Srgrimes		if (flagch == BOW || flagch == EOW) {
9471573Srgrimes			st = step(m->g, startst, stopst, st, flagch, st);
9481573Srgrimes			SP("sboweow", st, c);
9491573Srgrimes		}
9501573Srgrimes
9511573Srgrimes		/* are we done? */
9521573Srgrimes		if (ISSET(st, stopst))
9531573Srgrimes			matchp = p;
9541573Srgrimes		if (EQ(st, empty) || p == stop)
9551573Srgrimes			break;		/* NOTE BREAK OUT */
9561573Srgrimes
9571573Srgrimes		/* no, we must deal with this character */
9581573Srgrimes		ASSIGN(tmp, st);
9591573Srgrimes		ASSIGN(st, empty);
9601573Srgrimes		assert(c != OUT);
9611573Srgrimes		st = step(m->g, startst, stopst, tmp, c, st);
9621573Srgrimes		SP("saft", st, c);
9631573Srgrimes		assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
964132019Stjr		p += clen;
9651573Srgrimes	}
9661573Srgrimes
9671573Srgrimes	return(matchp);
9681573Srgrimes}
9691573Srgrimes
9701573Srgrimes
9711573Srgrimes/*
9721573Srgrimes - step - map set of states reachable before char to set reachable after
97392889Sobrien == static states step(struct re_guts *g, sopno start, sopno stop, \
97492889Sobrien ==	states bef, int ch, states aft);
975132019Stjr == #define	BOL	(OUT-1)
976132019Stjr == #define	EOL	(BOL-1)
977132019Stjr == #define	BOLEOL	(BOL-2)
978132019Stjr == #define	NOTHING	(BOL-3)
979132019Stjr == #define	BOW	(BOL-4)
980132019Stjr == #define	EOW	(BOL-5)
981132019Stjr == #define	BADCHAR	(BOL-6)
982132019Stjr == #define	NONCHAR(c)	((c) <= OUT)
9831573Srgrimes */
9841573Srgrimesstatic states
9851573Srgrimesstep(g, start, stop, bef, ch, aft)
98692889Sobrienstruct re_guts *g;
9871573Srgrimessopno start;			/* start state within strip */
9881573Srgrimessopno stop;			/* state after stop state within strip */
98992889Sobrienstates bef;			/* states reachable before */
990132019Stjrwint_t ch;			/* character or NONCHAR code */
99192889Sobrienstates aft;			/* states already known reachable after */
9921573Srgrimes{
99392889Sobrien	cset *cs;
99492889Sobrien	sop s;
99592889Sobrien	sopno pc;
99692889Sobrien	onestate here;		/* note, macros know this name */
99792889Sobrien	sopno look;
99892889Sobrien	int i;
9991573Srgrimes
10001573Srgrimes	for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) {
10011573Srgrimes		s = g->strip[pc];
10021573Srgrimes		switch (OP(s)) {
10031573Srgrimes		case OEND:
10041573Srgrimes			assert(pc == stop-1);
10051573Srgrimes			break;
10061573Srgrimes		case OCHAR:
10071573Srgrimes			/* only characters can match */
1008132019Stjr			assert(!NONCHAR(ch) || ch != OPND(s));
1009132019Stjr			if (ch == OPND(s))
10101573Srgrimes				FWD(aft, bef, 1);
10111573Srgrimes			break;
10121573Srgrimes		case OBOL:
10131573Srgrimes			if (ch == BOL || ch == BOLEOL)
10141573Srgrimes				FWD(aft, bef, 1);
10151573Srgrimes			break;
10161573Srgrimes		case OEOL:
10171573Srgrimes			if (ch == EOL || ch == BOLEOL)
10181573Srgrimes				FWD(aft, bef, 1);
10191573Srgrimes			break;
10201573Srgrimes		case OBOW:
10211573Srgrimes			if (ch == BOW)
10221573Srgrimes				FWD(aft, bef, 1);
10231573Srgrimes			break;
10241573Srgrimes		case OEOW:
10251573Srgrimes			if (ch == EOW)
10261573Srgrimes				FWD(aft, bef, 1);
10271573Srgrimes			break;
10281573Srgrimes		case OANY:
10291573Srgrimes			if (!NONCHAR(ch))
10301573Srgrimes				FWD(aft, bef, 1);
10311573Srgrimes			break;
10321573Srgrimes		case OANYOF:
10331573Srgrimes			cs = &g->sets[OPND(s)];
10341573Srgrimes			if (!NONCHAR(ch) && CHIN(cs, ch))
10351573Srgrimes				FWD(aft, bef, 1);
10361573Srgrimes			break;
10371573Srgrimes		case OBACK_:		/* ignored here */
10381573Srgrimes		case O_BACK:
10391573Srgrimes			FWD(aft, aft, 1);
10401573Srgrimes			break;
10411573Srgrimes		case OPLUS_:		/* forward, this is just an empty */
10421573Srgrimes			FWD(aft, aft, 1);
10431573Srgrimes			break;
10441573Srgrimes		case O_PLUS:		/* both forward and back */
10451573Srgrimes			FWD(aft, aft, 1);
10461573Srgrimes			i = ISSETBACK(aft, OPND(s));
10471573Srgrimes			BACK(aft, aft, OPND(s));
10481573Srgrimes			if (!i && ISSETBACK(aft, OPND(s))) {
10491573Srgrimes				/* oho, must reconsider loop body */
10501573Srgrimes				pc -= OPND(s) + 1;
10511573Srgrimes				INIT(here, pc);
10521573Srgrimes			}
10531573Srgrimes			break;
10541573Srgrimes		case OQUEST_:		/* two branches, both forward */
10551573Srgrimes			FWD(aft, aft, 1);
10561573Srgrimes			FWD(aft, aft, OPND(s));
10571573Srgrimes			break;
10581573Srgrimes		case O_QUEST:		/* just an empty */
10591573Srgrimes			FWD(aft, aft, 1);
10601573Srgrimes			break;
10611573Srgrimes		case OLPAREN:		/* not significant here */
10621573Srgrimes		case ORPAREN:
10631573Srgrimes			FWD(aft, aft, 1);
10641573Srgrimes			break;
10651573Srgrimes		case OCH_:		/* mark the first two branches */
10661573Srgrimes			FWD(aft, aft, 1);
10671573Srgrimes			assert(OP(g->strip[pc+OPND(s)]) == OOR2);
10681573Srgrimes			FWD(aft, aft, OPND(s));
10691573Srgrimes			break;
10701573Srgrimes		case OOR1:		/* done a branch, find the O_CH */
10711573Srgrimes			if (ISSTATEIN(aft, here)) {
10721573Srgrimes				for (look = 1;
10731573Srgrimes						OP(s = g->strip[pc+look]) != O_CH;
10741573Srgrimes						look += OPND(s))
10751573Srgrimes					assert(OP(s) == OOR2);
10761573Srgrimes				FWD(aft, aft, look);
10771573Srgrimes			}
10781573Srgrimes			break;
10791573Srgrimes		case OOR2:		/* propagate OCH_'s marking */
10801573Srgrimes			FWD(aft, aft, 1);
10811573Srgrimes			if (OP(g->strip[pc+OPND(s)]) != O_CH) {
10821573Srgrimes				assert(OP(g->strip[pc+OPND(s)]) == OOR2);
10831573Srgrimes				FWD(aft, aft, OPND(s));
10841573Srgrimes			}
10851573Srgrimes			break;
10861573Srgrimes		case O_CH:		/* just empty */
10871573Srgrimes			FWD(aft, aft, 1);
10881573Srgrimes			break;
10891573Srgrimes		default:		/* ooooops... */
10901573Srgrimes			assert(nope);
10911573Srgrimes			break;
10921573Srgrimes		}
10931573Srgrimes	}
10941573Srgrimes
10951573Srgrimes	return(aft);
10961573Srgrimes}
10971573Srgrimes
10981573Srgrimes#ifdef REDEBUG
10991573Srgrimes/*
11001573Srgrimes - print - print a set of states
11011573Srgrimes == #ifdef REDEBUG
11021573Srgrimes == static void print(struct match *m, char *caption, states st, \
11031573Srgrimes ==	int ch, FILE *d);
11041573Srgrimes == #endif
11051573Srgrimes */
11061573Srgrimesstatic void
11071573Srgrimesprint(m, caption, st, ch, d)
11081573Srgrimesstruct match *m;
11091573Srgrimeschar *caption;
11101573Srgrimesstates st;
11111573Srgrimesint ch;
11121573SrgrimesFILE *d;
11131573Srgrimes{
111492889Sobrien	struct re_guts *g = m->g;
111592889Sobrien	int i;
111692889Sobrien	int first = 1;
11171573Srgrimes
11181573Srgrimes	if (!(m->eflags&REG_TRACE))
11191573Srgrimes		return;
11201573Srgrimes
11211573Srgrimes	fprintf(d, "%s", caption);
11221573Srgrimes	if (ch != '\0')
11231573Srgrimes		fprintf(d, " %s", pchar(ch));
11241573Srgrimes	for (i = 0; i < g->nstates; i++)
11251573Srgrimes		if (ISSET(st, i)) {
11261573Srgrimes			fprintf(d, "%s%d", (first) ? "\t" : ", ", i);
11271573Srgrimes			first = 0;
11281573Srgrimes		}
11291573Srgrimes	fprintf(d, "\n");
11301573Srgrimes}
11311573Srgrimes
11328870Srgrimes/*
11331573Srgrimes - at - print current situation
11341573Srgrimes == #ifdef REDEBUG
11351573Srgrimes == static void at(struct match *m, char *title, char *start, char *stop, \
11361573Srgrimes ==						sopno startst, sopno stopst);
11371573Srgrimes == #endif
11381573Srgrimes */
11391573Srgrimesstatic void
11401573Srgrimesat(m, title, start, stop, startst, stopst)
11411573Srgrimesstruct match *m;
11421573Srgrimeschar *title;
11431573Srgrimeschar *start;
11441573Srgrimeschar *stop;
11451573Srgrimessopno startst;
11461573Srgrimessopno stopst;
11471573Srgrimes{
11481573Srgrimes	if (!(m->eflags&REG_TRACE))
11491573Srgrimes		return;
11501573Srgrimes
11511573Srgrimes	printf("%s %s-", title, pchar(*start));
11521573Srgrimes	printf("%s ", pchar(*stop));
11531573Srgrimes	printf("%ld-%ld\n", (long)startst, (long)stopst);
11541573Srgrimes}
11551573Srgrimes
11561573Srgrimes#ifndef PCHARDONE
11571573Srgrimes#define	PCHARDONE	/* never again */
11581573Srgrimes/*
11591573Srgrimes - pchar - make a character printable
11601573Srgrimes == #ifdef REDEBUG
11611573Srgrimes == static char *pchar(int ch);
11621573Srgrimes == #endif
11631573Srgrimes *
11641573Srgrimes * Is this identical to regchar() over in debug.c?  Well, yes.  But a
11651573Srgrimes * duplicate here avoids having a debugging-capable regexec.o tied to
11661573Srgrimes * a matching debug.o, and this is convenient.  It all disappears in
11671573Srgrimes * the non-debug compilation anyway, so it doesn't matter much.
11681573Srgrimes */
11691573Srgrimesstatic char *			/* -> representation */
11701573Srgrimespchar(ch)
11711573Srgrimesint ch;
11721573Srgrimes{
11731573Srgrimes	static char pbuf[10];
11741573Srgrimes
117517508Sache	if (isprint((uch)ch) || ch == ' ')
11761573Srgrimes		sprintf(pbuf, "%c", ch);
11771573Srgrimes	else
11781573Srgrimes		sprintf(pbuf, "\\%o", ch);
11791573Srgrimes	return(pbuf);
11801573Srgrimes}
11811573Srgrimes#endif
11821573Srgrimes#endif
11831573Srgrimes
11841573Srgrimes#undef	matcher
11851573Srgrimes#undef	fast
11861573Srgrimes#undef	slow
11871573Srgrimes#undef	dissect
11881573Srgrimes#undef	backref
11891573Srgrimes#undef	step
11901573Srgrimes#undef	print
11911573Srgrimes#undef	at
11921573Srgrimes#undef	match
1193