11573Srgrimes/*-
21573Srgrimes * Copyright (c) 1992, 1993, 1994 Henry Spencer.
31573Srgrimes * Copyright (c) 1992, 1993, 1994
41573Srgrimes *	The Regents of the University of California.  All rights reserved.
51573Srgrimes *
61573Srgrimes * This code is derived from software contributed to Berkeley by
71573Srgrimes * Henry Spencer.
81573Srgrimes *
91573Srgrimes * Redistribution and use in source and binary forms, with or without
101573Srgrimes * modification, are permitted provided that the following conditions
111573Srgrimes * are met:
121573Srgrimes * 1. Redistributions of source code must retain the above copyright
131573Srgrimes *    notice, this list of conditions and the following disclaimer.
141573Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
151573Srgrimes *    notice, this list of conditions and the following disclaimer in the
161573Srgrimes *    documentation and/or other materials provided with the distribution.
171573Srgrimes * 4. Neither the name of the University nor the names of its contributors
181573Srgrimes *    may be used to endorse or promote products derived from this software
191573Srgrimes *    without specific prior written permission.
201573Srgrimes *
211573Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
221573Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
231573Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
241573Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
251573Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
261573Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
271573Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
281573Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
291573Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
301573Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
311573Srgrimes * SUCH DAMAGE.
321573Srgrimes *
331573Srgrimes *	@(#)engine.c	8.5 (Berkeley) 3/20/94
341573Srgrimes */
351573Srgrimes
3692986Sobrien#include <sys/cdefs.h>
3792986Sobrien__FBSDID("$FreeBSD: releng/10.3/lib/libc/regex/engine.c 265726 2014-05-09 01:20:39Z pfg $");
3892986Sobrien
391573Srgrimes/*
401573Srgrimes * The matching engine and friends.  This file is #included by regexec.c
411573Srgrimes * after suitable #defines of a variety of macros used herein, so that
421573Srgrimes * different state representations can be used without duplicating masses
431573Srgrimes * of code.
441573Srgrimes */
451573Srgrimes
461573Srgrimes#ifdef SNAMES
471573Srgrimes#define	matcher	smatcher
481573Srgrimes#define	fast	sfast
491573Srgrimes#define	slow	sslow
501573Srgrimes#define	dissect	sdissect
511573Srgrimes#define	backref	sbackref
521573Srgrimes#define	step	sstep
531573Srgrimes#define	print	sprint
541573Srgrimes#define	at	sat
551573Srgrimes#define	match	smat
561573Srgrimes#endif
571573Srgrimes#ifdef LNAMES
581573Srgrimes#define	matcher	lmatcher
591573Srgrimes#define	fast	lfast
601573Srgrimes#define	slow	lslow
611573Srgrimes#define	dissect	ldissect
621573Srgrimes#define	backref	lbackref
631573Srgrimes#define	step	lstep
641573Srgrimes#define	print	lprint
651573Srgrimes#define	at	lat
661573Srgrimes#define	match	lmat
671573Srgrimes#endif
68132019Stjr#ifdef MNAMES
69132019Stjr#define	matcher	mmatcher
70132019Stjr#define	fast	mfast
71132019Stjr#define	slow	mslow
72132019Stjr#define	dissect	mdissect
73132019Stjr#define	backref	mbackref
74132019Stjr#define	step	mstep
75132019Stjr#define	print	mprint
76132019Stjr#define	at	mat
77132019Stjr#define	match	mmat
78132019Stjr#endif
791573Srgrimes
801573Srgrimes/* another structure passed up and down to avoid zillions of parameters */
811573Srgrimesstruct match {
821573Srgrimes	struct re_guts *g;
831573Srgrimes	int eflags;
841573Srgrimes	regmatch_t *pmatch;	/* [nsub+1] (0 element unused) */
85169982Sdelphij	const char *offp;	/* offsets work from here */
86169982Sdelphij	const char *beginp;	/* start of string -- virtual NUL precedes */
87169982Sdelphij	const char *endp;	/* end of string -- virtual NUL here */
88169982Sdelphij	const char *coldp;	/* can be no match starting before here */
89169982Sdelphij	const char **lastpos;	/* [nplus+1] */
901573Srgrimes	STATEVARS;
911573Srgrimes	states st;		/* current states */
921573Srgrimes	states fresh;		/* states for a fresh start */
931573Srgrimes	states tmp;		/* temporary */
941573Srgrimes	states empty;		/* empty set of states */
95132019Stjr	mbstate_t mbs;		/* multibyte conversion state */
961573Srgrimes};
971573Srgrimes
981573Srgrimes/* ========= begin header generated by ./mkh ========= */
991573Srgrimes#ifdef __cplusplus
1001573Srgrimesextern "C" {
1011573Srgrimes#endif
1021573Srgrimes
1031573Srgrimes/* === engine.c === */
104169982Sdelphijstatic int matcher(struct re_guts *g, const char *string, size_t nmatch, regmatch_t pmatch[], int eflags);
105169982Sdelphijstatic const char *dissect(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst);
106169982Sdelphijstatic const char *backref(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst, sopno lev, int);
107169982Sdelphijstatic const char *fast(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst);
108169982Sdelphijstatic const char *slow(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst);
109132019Stjrstatic states step(struct re_guts *g, sopno start, sopno stop, states bef, wint_t ch, states aft);
110167222Sdelphij#define MAX_RECURSION	100
111132019Stjr#define	BOL	(OUT-1)
112132019Stjr#define	EOL	(BOL-1)
113132019Stjr#define	BOLEOL	(BOL-2)
114132019Stjr#define	NOTHING	(BOL-3)
115132019Stjr#define	BOW	(BOL-4)
116132019Stjr#define	EOW	(BOL-5)
117132019Stjr#define	BADCHAR	(BOL-6)
118132019Stjr#define	NONCHAR(c)	((c) <= OUT)
1191573Srgrimes#ifdef REDEBUG
120169982Sdelphijstatic void print(struct match *m, const char *caption, states st, int ch, FILE *d);
1211573Srgrimes#endif
1221573Srgrimes#ifdef REDEBUG
123169982Sdelphijstatic void at(struct match *m, const char *title, const char *start, const char *stop, sopno startst, sopno stopst);
1241573Srgrimes#endif
1251573Srgrimes#ifdef REDEBUG
126169982Sdelphijstatic const char *pchar(int ch);
1271573Srgrimes#endif
1281573Srgrimes
1291573Srgrimes#ifdef __cplusplus
1301573Srgrimes}
1311573Srgrimes#endif
1321573Srgrimes/* ========= end header generated by ./mkh ========= */
1331573Srgrimes
1341573Srgrimes#ifdef REDEBUG
1351573Srgrimes#define	SP(t, s, c)	print(m, t, s, c, stdout)
1361573Srgrimes#define	AT(t, p1, p2, s1, s2)	at(m, t, p1, p2, s1, s2)
1371573Srgrimes#define	NOTE(str)	{ if (m->eflags&REG_TRACE) printf("=%s\n", (str)); }
1381573Srgrimes#else
1391573Srgrimes#define	SP(t, s, c)	/* nothing */
1401573Srgrimes#define	AT(t, p1, p2, s1, s2)	/* nothing */
1411573Srgrimes#define	NOTE(s)	/* nothing */
1421573Srgrimes#endif
1431573Srgrimes
1441573Srgrimes/*
1451573Srgrimes - matcher - the actual matching engine
146169982Sdelphij == static int matcher(struct re_guts *g, const char *string, \
1471573Srgrimes ==	size_t nmatch, regmatch_t pmatch[], int eflags);
1481573Srgrimes */
1491573Srgrimesstatic int			/* 0 success, REG_NOMATCH failure */
150169982Sdelphijmatcher(struct re_guts *g,
151169982Sdelphij	const char *string,
152169982Sdelphij	size_t nmatch,
153169982Sdelphij	regmatch_t pmatch[],
154169982Sdelphij	int eflags)
1551573Srgrimes{
156169982Sdelphij	const char *endp;
15792889Sobrien	int i;
1581573Srgrimes	struct match mv;
15992889Sobrien	struct match *m = &mv;
160169982Sdelphij	const char *dp;
16192889Sobrien	const sopno gf = g->firststate+1;	/* +1 for OEND */
16292889Sobrien	const sopno gl = g->laststate;
163169982Sdelphij	const char *start;
164169982Sdelphij	const char *stop;
16562232Sdcs	/* Boyer-Moore algorithms variables */
166169982Sdelphij	const char *pp;
16762232Sdcs	int cj, mj;
168169982Sdelphij	const char *mustfirst;
169169982Sdelphij	const char *mustlast;
17092889Sobrien	int *matchjump;
17192889Sobrien	int *charjump;
1721573Srgrimes
1731573Srgrimes	/* simplify the situation where possible */
1741573Srgrimes	if (g->cflags&REG_NOSUB)
1751573Srgrimes		nmatch = 0;
1761573Srgrimes	if (eflags&REG_STARTEND) {
1771573Srgrimes		start = string + pmatch[0].rm_so;
1781573Srgrimes		stop = string + pmatch[0].rm_eo;
1791573Srgrimes	} else {
1801573Srgrimes		start = string;
1811573Srgrimes		stop = start + strlen(start);
1821573Srgrimes	}
1831573Srgrimes	if (stop < start)
1841573Srgrimes		return(REG_INVARG);
1851573Srgrimes
1861573Srgrimes	/* prescreening; this does wonders for this rather slow code */
1871573Srgrimes	if (g->must != NULL) {
18862391Sdcs		if (g->charjump != NULL && g->matchjump != NULL) {
18962232Sdcs			mustfirst = g->must;
19062232Sdcs			mustlast = g->must + g->mlen - 1;
19162232Sdcs			charjump = g->charjump;
19262232Sdcs			matchjump = g->matchjump;
19362232Sdcs			pp = mustlast;
19462754Sdcs			for (dp = start+g->mlen-1; dp < stop;) {
19562232Sdcs				/* Fast skip non-matches */
196111010Snectar				while (dp < stop && charjump[(int)*dp])
197111010Snectar					dp += charjump[(int)*dp];
19862232Sdcs
19962754Sdcs				if (dp >= stop)
20062232Sdcs					break;
20162232Sdcs
20262232Sdcs				/* Greedy matcher */
20362232Sdcs				/* We depend on not being used for
20462232Sdcs				 * for strings of length 1
20562232Sdcs				 */
20662754Sdcs				while (*--dp == *--pp && pp != mustfirst);
20762232Sdcs
20862754Sdcs				if (*dp == *pp)
20962232Sdcs					break;
21062232Sdcs
21162232Sdcs				/* Jump to next possible match */
21262232Sdcs				mj = matchjump[pp - mustfirst];
213111010Snectar				cj = charjump[(int)*dp];
21462754Sdcs				dp += (cj < mj ? mj : cj);
21562754Sdcs				pp = mustlast;
21662232Sdcs			}
21762391Sdcs			if (pp != mustfirst)
21862232Sdcs				return(REG_NOMATCH);
21962232Sdcs		} else {
22062232Sdcs			for (dp = start; dp < stop; dp++)
22162232Sdcs				if (*dp == g->must[0] &&
22262232Sdcs				    stop - dp >= g->mlen &&
22362232Sdcs				    memcmp(dp, g->must, (size_t)g->mlen) == 0)
22462232Sdcs					break;
22562232Sdcs			if (dp == stop)		/* we didn't find g->must */
22662232Sdcs				return(REG_NOMATCH);
22762232Sdcs		}
2281573Srgrimes	}
2291573Srgrimes
2301573Srgrimes	/* match struct setup */
2311573Srgrimes	m->g = g;
2321573Srgrimes	m->eflags = eflags;
2331573Srgrimes	m->pmatch = NULL;
2341573Srgrimes	m->lastpos = NULL;
2351573Srgrimes	m->offp = string;
2361573Srgrimes	m->beginp = start;
2371573Srgrimes	m->endp = stop;
2381573Srgrimes	STATESETUP(m, 4);
2391573Srgrimes	SETUP(m->st);
2401573Srgrimes	SETUP(m->fresh);
2411573Srgrimes	SETUP(m->tmp);
2421573Srgrimes	SETUP(m->empty);
2431573Srgrimes	CLEAR(m->empty);
244132019Stjr	ZAPSTATE(&m->mbs);
2451573Srgrimes
24662391Sdcs	/* Adjust start according to moffset, to speed things up */
24762391Sdcs	if (g->moffset > -1)
24862854Sdcs		start = ((dp - g->moffset) < start) ? start : dp - g->moffset;
24962391Sdcs
250197245Sdds	SP("mloop", m->st, *start);
251197245Sdds
2521573Srgrimes	/* this loop does only one repetition except for backrefs */
2531573Srgrimes	for (;;) {
2541573Srgrimes		endp = fast(m, start, stop, gf, gl);
2551573Srgrimes		if (endp == NULL) {		/* a miss */
256139437Sdds			if (m->pmatch != NULL)
257139437Sdds				free((char *)m->pmatch);
258139437Sdds			if (m->lastpos != NULL)
259139437Sdds				free((char *)m->lastpos);
2601573Srgrimes			STATETEARDOWN(m);
2611573Srgrimes			return(REG_NOMATCH);
2621573Srgrimes		}
2631573Srgrimes		if (nmatch == 0 && !g->backrefs)
2641573Srgrimes			break;		/* no further info needed */
2651573Srgrimes
2661573Srgrimes		/* where? */
2671573Srgrimes		assert(m->coldp != NULL);
2681573Srgrimes		for (;;) {
2691573Srgrimes			NOTE("finding start");
2701573Srgrimes			endp = slow(m, m->coldp, stop, gf, gl);
2711573Srgrimes			if (endp != NULL)
2721573Srgrimes				break;
2731573Srgrimes			assert(m->coldp < m->endp);
274132019Stjr			m->coldp += XMBRTOWC(NULL, m->coldp,
275132019Stjr			    m->endp - m->coldp, &m->mbs, 0);
2761573Srgrimes		}
2771573Srgrimes		if (nmatch == 1 && !g->backrefs)
2781573Srgrimes			break;		/* no further info needed */
2791573Srgrimes
2801573Srgrimes		/* oh my, he wants the subexpressions... */
2811573Srgrimes		if (m->pmatch == NULL)
2821573Srgrimes			m->pmatch = (regmatch_t *)malloc((m->g->nsub + 1) *
2831573Srgrimes							sizeof(regmatch_t));
2841573Srgrimes		if (m->pmatch == NULL) {
2851573Srgrimes			STATETEARDOWN(m);
2861573Srgrimes			return(REG_ESPACE);
2871573Srgrimes		}
2881573Srgrimes		for (i = 1; i <= m->g->nsub; i++)
2891573Srgrimes			m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1;
2901573Srgrimes		if (!g->backrefs && !(m->eflags&REG_BACKR)) {
2911573Srgrimes			NOTE("dissecting");
2921573Srgrimes			dp = dissect(m, m->coldp, endp, gf, gl);
2931573Srgrimes		} else {
2941573Srgrimes			if (g->nplus > 0 && m->lastpos == NULL)
295169982Sdelphij				m->lastpos = malloc((g->nplus+1) *
296169982Sdelphij						sizeof(const char *));
2971573Srgrimes			if (g->nplus > 0 && m->lastpos == NULL) {
2981573Srgrimes				free(m->pmatch);
2991573Srgrimes				STATETEARDOWN(m);
3001573Srgrimes				return(REG_ESPACE);
3011573Srgrimes			}
3021573Srgrimes			NOTE("backref dissect");
303167222Sdelphij			dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0);
3041573Srgrimes		}
3051573Srgrimes		if (dp != NULL)
3061573Srgrimes			break;
3071573Srgrimes
3081573Srgrimes		/* uh-oh... we couldn't find a subexpression-level match */
3091573Srgrimes		assert(g->backrefs);	/* must be back references doing it */
3101573Srgrimes		assert(g->nplus == 0 || m->lastpos != NULL);
3111573Srgrimes		for (;;) {
3121573Srgrimes			if (dp != NULL || endp <= m->coldp)
3131573Srgrimes				break;		/* defeat */
3141573Srgrimes			NOTE("backoff");
3151573Srgrimes			endp = slow(m, m->coldp, endp-1, gf, gl);
3161573Srgrimes			if (endp == NULL)
3171573Srgrimes				break;		/* defeat */
3181573Srgrimes			/* try it on a shorter possibility */
3191573Srgrimes#ifndef NDEBUG
3201573Srgrimes			for (i = 1; i <= m->g->nsub; i++) {
3211573Srgrimes				assert(m->pmatch[i].rm_so == -1);
3221573Srgrimes				assert(m->pmatch[i].rm_eo == -1);
3231573Srgrimes			}
3241573Srgrimes#endif
3251573Srgrimes			NOTE("backoff dissect");
326167222Sdelphij			dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0);
3271573Srgrimes		}
3281573Srgrimes		assert(dp == NULL || dp == endp);
3291573Srgrimes		if (dp != NULL)		/* found a shorter one */
3301573Srgrimes			break;
3311573Srgrimes
3321573Srgrimes		/* despite initial appearances, there is no match here */
3331573Srgrimes		NOTE("false alarm");
334132019Stjr		/* recycle starting later */
335132019Stjr		start = m->coldp + XMBRTOWC(NULL, m->coldp,
336137959Stjr		    stop - m->coldp, &m->mbs, 0);
3371573Srgrimes		assert(start <= stop);
3381573Srgrimes	}
3391573Srgrimes
3401573Srgrimes	/* fill in the details if requested */
3411573Srgrimes	if (nmatch > 0) {
3421573Srgrimes		pmatch[0].rm_so = m->coldp - m->offp;
3431573Srgrimes		pmatch[0].rm_eo = endp - m->offp;
3441573Srgrimes	}
3451573Srgrimes	if (nmatch > 1) {
3461573Srgrimes		assert(m->pmatch != NULL);
3471573Srgrimes		for (i = 1; i < nmatch; i++)
3481573Srgrimes			if (i <= m->g->nsub)
3491573Srgrimes				pmatch[i] = m->pmatch[i];
3501573Srgrimes			else {
3511573Srgrimes				pmatch[i].rm_so = -1;
3521573Srgrimes				pmatch[i].rm_eo = -1;
3531573Srgrimes			}
3541573Srgrimes	}
3551573Srgrimes
3561573Srgrimes	if (m->pmatch != NULL)
3571573Srgrimes		free((char *)m->pmatch);
3581573Srgrimes	if (m->lastpos != NULL)
3591573Srgrimes		free((char *)m->lastpos);
3601573Srgrimes	STATETEARDOWN(m);
3611573Srgrimes	return(0);
3621573Srgrimes}
3631573Srgrimes
3641573Srgrimes/*
3651573Srgrimes - dissect - figure out what matched what, no back references
366169982Sdelphij == static const char *dissect(struct match *m, const char *start, \
367169982Sdelphij ==	const char *stop, sopno startst, sopno stopst);
3681573Srgrimes */
369169982Sdelphijstatic const char *		/* == stop (success) always */
370169982Sdelphijdissect(struct match *m,
371169982Sdelphij	const char *start,
372169982Sdelphij	const char *stop,
373169982Sdelphij	sopno startst,
374169982Sdelphij	sopno stopst)
3751573Srgrimes{
37692889Sobrien	int i;
37792889Sobrien	sopno ss;		/* start sop of current subRE */
37892889Sobrien	sopno es;		/* end sop of current subRE */
379169982Sdelphij	const char *sp;		/* start of string matched by it */
380169982Sdelphij	const char *stp;	/* string matched by it cannot pass here */
381169982Sdelphij	const char *rest;	/* start of rest of string */
382169982Sdelphij	const char *tail;	/* string unmatched by rest of RE */
38392889Sobrien	sopno ssub;		/* start sop of subsubRE */
38492889Sobrien	sopno esub;		/* end sop of subsubRE */
385169982Sdelphij	const char *ssp;	/* start of string matched by subsubRE */
386169982Sdelphij	const char *sep;	/* end of string matched by subsubRE */
387169982Sdelphij	const char *oldssp;	/* previous ssp */
388169982Sdelphij	const char *dp;
3891573Srgrimes
3901573Srgrimes	AT("diss", start, stop, startst, stopst);
3911573Srgrimes	sp = start;
3921573Srgrimes	for (ss = startst; ss < stopst; ss = es) {
3931573Srgrimes		/* identify end of subRE */
3941573Srgrimes		es = ss;
3951573Srgrimes		switch (OP(m->g->strip[es])) {
3961573Srgrimes		case OPLUS_:
3971573Srgrimes		case OQUEST_:
3981573Srgrimes			es += OPND(m->g->strip[es]);
3991573Srgrimes			break;
4001573Srgrimes		case OCH_:
4011573Srgrimes			while (OP(m->g->strip[es]) != O_CH)
4021573Srgrimes				es += OPND(m->g->strip[es]);
4031573Srgrimes			break;
4041573Srgrimes		}
4051573Srgrimes		es++;
4061573Srgrimes
4071573Srgrimes		/* figure out what it matched */
4081573Srgrimes		switch (OP(m->g->strip[ss])) {
4091573Srgrimes		case OEND:
4101573Srgrimes			assert(nope);
4111573Srgrimes			break;
4121573Srgrimes		case OCHAR:
413132019Stjr			sp += XMBRTOWC(NULL, sp, stop - start, &m->mbs, 0);
4141573Srgrimes			break;
4151573Srgrimes		case OBOL:
4161573Srgrimes		case OEOL:
4171573Srgrimes		case OBOW:
4181573Srgrimes		case OEOW:
4191573Srgrimes			break;
4201573Srgrimes		case OANY:
4211573Srgrimes		case OANYOF:
422132019Stjr			sp += XMBRTOWC(NULL, sp, stop - start, &m->mbs, 0);
4231573Srgrimes			break;
4241573Srgrimes		case OBACK_:
4251573Srgrimes		case O_BACK:
4261573Srgrimes			assert(nope);
4271573Srgrimes			break;
4281573Srgrimes		/* cases where length of match is hard to find */
4291573Srgrimes		case OQUEST_:
4301573Srgrimes			stp = stop;
4311573Srgrimes			for (;;) {
4321573Srgrimes				/* how long could this one be? */
4331573Srgrimes				rest = slow(m, sp, stp, ss, es);
4341573Srgrimes				assert(rest != NULL);	/* it did match */
4351573Srgrimes				/* could the rest match the rest? */
4361573Srgrimes				tail = slow(m, rest, stop, es, stopst);
4371573Srgrimes				if (tail == stop)
4381573Srgrimes					break;		/* yes! */
4391573Srgrimes				/* no -- try a shorter match for this one */
4401573Srgrimes				stp = rest - 1;
4411573Srgrimes				assert(stp >= sp);	/* it did work */
4421573Srgrimes			}
4431573Srgrimes			ssub = ss + 1;
4441573Srgrimes			esub = es - 1;
4451573Srgrimes			/* did innards match? */
4461573Srgrimes			if (slow(m, sp, rest, ssub, esub) != NULL) {
4471573Srgrimes				dp = dissect(m, sp, rest, ssub, esub);
4481573Srgrimes				assert(dp == rest);
4491573Srgrimes			} else		/* no */
4501573Srgrimes				assert(sp == rest);
4511573Srgrimes			sp = rest;
4521573Srgrimes			break;
4531573Srgrimes		case OPLUS_:
4541573Srgrimes			stp = stop;
4551573Srgrimes			for (;;) {
4561573Srgrimes				/* how long could this one be? */
4571573Srgrimes				rest = slow(m, sp, stp, ss, es);
4581573Srgrimes				assert(rest != NULL);	/* it did match */
4591573Srgrimes				/* could the rest match the rest? */
4601573Srgrimes				tail = slow(m, rest, stop, es, stopst);
4611573Srgrimes				if (tail == stop)
4621573Srgrimes					break;		/* yes! */
4631573Srgrimes				/* no -- try a shorter match for this one */
4641573Srgrimes				stp = rest - 1;
4651573Srgrimes				assert(stp >= sp);	/* it did work */
4661573Srgrimes			}
4671573Srgrimes			ssub = ss + 1;
4681573Srgrimes			esub = es - 1;
4691573Srgrimes			ssp = sp;
4701573Srgrimes			oldssp = ssp;
4711573Srgrimes			for (;;) {	/* find last match of innards */
4721573Srgrimes				sep = slow(m, ssp, rest, ssub, esub);
4731573Srgrimes				if (sep == NULL || sep == ssp)
4741573Srgrimes					break;	/* failed or matched null */
4751573Srgrimes				oldssp = ssp;	/* on to next try */
4761573Srgrimes				ssp = sep;
4771573Srgrimes			}
4781573Srgrimes			if (sep == NULL) {
4791573Srgrimes				/* last successful match */
4801573Srgrimes				sep = ssp;
4811573Srgrimes				ssp = oldssp;
4821573Srgrimes			}
4831573Srgrimes			assert(sep == rest);	/* must exhaust substring */
4841573Srgrimes			assert(slow(m, ssp, sep, ssub, esub) == rest);
4851573Srgrimes			dp = dissect(m, ssp, sep, ssub, esub);
4861573Srgrimes			assert(dp == sep);
4871573Srgrimes			sp = rest;
4881573Srgrimes			break;
4891573Srgrimes		case OCH_:
4901573Srgrimes			stp = stop;
4911573Srgrimes			for (;;) {
4921573Srgrimes				/* how long could this one be? */
4931573Srgrimes				rest = slow(m, sp, stp, ss, es);
4941573Srgrimes				assert(rest != NULL);	/* it did match */
4951573Srgrimes				/* could the rest match the rest? */
4961573Srgrimes				tail = slow(m, rest, stop, es, stopst);
4971573Srgrimes				if (tail == stop)
4981573Srgrimes					break;		/* yes! */
4991573Srgrimes				/* no -- try a shorter match for this one */
5001573Srgrimes				stp = rest - 1;
5011573Srgrimes				assert(stp >= sp);	/* it did work */
5021573Srgrimes			}
5031573Srgrimes			ssub = ss + 1;
5041573Srgrimes			esub = ss + OPND(m->g->strip[ss]) - 1;
5051573Srgrimes			assert(OP(m->g->strip[esub]) == OOR1);
5061573Srgrimes			for (;;) {	/* find first matching branch */
5071573Srgrimes				if (slow(m, sp, rest, ssub, esub) == rest)
5081573Srgrimes					break;	/* it matched all of it */
5091573Srgrimes				/* that one missed, try next one */
5101573Srgrimes				assert(OP(m->g->strip[esub]) == OOR1);
5111573Srgrimes				esub++;
5121573Srgrimes				assert(OP(m->g->strip[esub]) == OOR2);
5131573Srgrimes				ssub = esub + 1;
5141573Srgrimes				esub += OPND(m->g->strip[esub]);
5151573Srgrimes				if (OP(m->g->strip[esub]) == OOR2)
5161573Srgrimes					esub--;
5171573Srgrimes				else
5181573Srgrimes					assert(OP(m->g->strip[esub]) == O_CH);
5191573Srgrimes			}
5201573Srgrimes			dp = dissect(m, sp, rest, ssub, esub);
5211573Srgrimes			assert(dp == rest);
5221573Srgrimes			sp = rest;
5231573Srgrimes			break;
5241573Srgrimes		case O_PLUS:
5251573Srgrimes		case O_QUEST:
5261573Srgrimes		case OOR1:
5271573Srgrimes		case OOR2:
5281573Srgrimes		case O_CH:
5291573Srgrimes			assert(nope);
5301573Srgrimes			break;
5311573Srgrimes		case OLPAREN:
5321573Srgrimes			i = OPND(m->g->strip[ss]);
5331573Srgrimes			assert(0 < i && i <= m->g->nsub);
5341573Srgrimes			m->pmatch[i].rm_so = sp - m->offp;
5351573Srgrimes			break;
5361573Srgrimes		case ORPAREN:
5371573Srgrimes			i = OPND(m->g->strip[ss]);
5381573Srgrimes			assert(0 < i && i <= m->g->nsub);
5391573Srgrimes			m->pmatch[i].rm_eo = sp - m->offp;
5401573Srgrimes			break;
5411573Srgrimes		default:		/* uh oh */
5421573Srgrimes			assert(nope);
5431573Srgrimes			break;
5441573Srgrimes		}
5451573Srgrimes	}
5461573Srgrimes
5471573Srgrimes	assert(sp == stop);
5481573Srgrimes	return(sp);
5491573Srgrimes}
5501573Srgrimes
5511573Srgrimes/*
5521573Srgrimes - backref - figure out what matched what, figuring in back references
553169982Sdelphij == static const char *backref(struct match *m, const char *start, \
554169982Sdelphij ==	const char *stop, sopno startst, sopno stopst, sopno lev);
5551573Srgrimes */
556169982Sdelphijstatic const char *		/* == stop (success) or NULL (failure) */
557169982Sdelphijbackref(struct match *m,
558169982Sdelphij	const char *start,
559169982Sdelphij	const char *stop,
560169982Sdelphij	sopno startst,
561169982Sdelphij	sopno stopst,
562169982Sdelphij	sopno lev,		/* PLUS nesting level */
563169982Sdelphij	int rec)
5641573Srgrimes{
56592889Sobrien	int i;
56692889Sobrien	sopno ss;		/* start sop of current subRE */
567169982Sdelphij	const char *sp;		/* start of string matched by it */
56892889Sobrien	sopno ssub;		/* start sop of subsubRE */
56992889Sobrien	sopno esub;		/* end sop of subsubRE */
570169982Sdelphij	const char *ssp;	/* start of string matched by subsubRE */
571169982Sdelphij	const char *dp;
57292889Sobrien	size_t len;
57392889Sobrien	int hard;
57492889Sobrien	sop s;
57592889Sobrien	regoff_t offsave;
57692889Sobrien	cset *cs;
577132019Stjr	wint_t wc;
5781573Srgrimes
5791573Srgrimes	AT("back", start, stop, startst, stopst);
5801573Srgrimes	sp = start;
5811573Srgrimes
5821573Srgrimes	/* get as far as we can with easy stuff */
5831573Srgrimes	hard = 0;
5841573Srgrimes	for (ss = startst; !hard && ss < stopst; ss++)
5851573Srgrimes		switch (OP(s = m->g->strip[ss])) {
5861573Srgrimes		case OCHAR:
587132019Stjr			if (sp == stop)
5881573Srgrimes				return(NULL);
589132019Stjr			sp += XMBRTOWC(&wc, sp, stop - sp, &m->mbs, BADCHAR);
590132019Stjr			if (wc != OPND(s))
591132019Stjr				return(NULL);
5921573Srgrimes			break;
5931573Srgrimes		case OANY:
5941573Srgrimes			if (sp == stop)
5951573Srgrimes				return(NULL);
596132019Stjr			sp += XMBRTOWC(&wc, sp, stop - sp, &m->mbs, BADCHAR);
597132019Stjr			if (wc == BADCHAR)
598132019Stjr				return (NULL);
5991573Srgrimes			break;
6001573Srgrimes		case OANYOF:
601132019Stjr			if (sp == stop)
602132019Stjr				return (NULL);
6031573Srgrimes			cs = &m->g->sets[OPND(s)];
604132019Stjr			sp += XMBRTOWC(&wc, sp, stop - sp, &m->mbs, BADCHAR);
605132019Stjr			if (wc == BADCHAR || !CHIN(cs, wc))
6061573Srgrimes				return(NULL);
6071573Srgrimes			break;
6081573Srgrimes		case OBOL:
6091573Srgrimes			if ( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
6101573Srgrimes					(sp < m->endp && *(sp-1) == '\n' &&
6111573Srgrimes						(m->g->cflags&REG_NEWLINE)) )
6121573Srgrimes				{ /* yes */ }
6131573Srgrimes			else
6141573Srgrimes				return(NULL);
6151573Srgrimes			break;
6161573Srgrimes		case OEOL:
6171573Srgrimes			if ( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
6181573Srgrimes					(sp < m->endp && *sp == '\n' &&
6191573Srgrimes						(m->g->cflags&REG_NEWLINE)) )
6201573Srgrimes				{ /* yes */ }
6211573Srgrimes			else
6221573Srgrimes				return(NULL);
6231573Srgrimes			break;
6241573Srgrimes		case OBOW:
6251573Srgrimes			if (( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
6261573Srgrimes					(sp < m->endp && *(sp-1) == '\n' &&
6271573Srgrimes						(m->g->cflags&REG_NEWLINE)) ||
6281573Srgrimes					(sp > m->beginp &&
6291573Srgrimes							!ISWORD(*(sp-1))) ) &&
6301573Srgrimes					(sp < m->endp && ISWORD(*sp)) )
6311573Srgrimes				{ /* yes */ }
6321573Srgrimes			else
6331573Srgrimes				return(NULL);
6341573Srgrimes			break;
6351573Srgrimes		case OEOW:
6361573Srgrimes			if (( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
6371573Srgrimes					(sp < m->endp && *sp == '\n' &&
6381573Srgrimes						(m->g->cflags&REG_NEWLINE)) ||
6391573Srgrimes					(sp < m->endp && !ISWORD(*sp)) ) &&
6401573Srgrimes					(sp > m->beginp && ISWORD(*(sp-1))) )
6411573Srgrimes				{ /* yes */ }
6421573Srgrimes			else
6431573Srgrimes				return(NULL);
6441573Srgrimes			break;
6451573Srgrimes		case O_QUEST:
6461573Srgrimes			break;
6471573Srgrimes		case OOR1:	/* matches null but needs to skip */
6481573Srgrimes			ss++;
6491573Srgrimes			s = m->g->strip[ss];
6501573Srgrimes			do {
6511573Srgrimes				assert(OP(s) == OOR2);
6521573Srgrimes				ss += OPND(s);
6531573Srgrimes			} while (OP(s = m->g->strip[ss]) != O_CH);
6541573Srgrimes			/* note that the ss++ gets us past the O_CH */
6551573Srgrimes			break;
6561573Srgrimes		default:	/* have to make a choice */
6571573Srgrimes			hard = 1;
6581573Srgrimes			break;
6591573Srgrimes		}
6601573Srgrimes	if (!hard) {		/* that was it! */
6611573Srgrimes		if (sp != stop)
6621573Srgrimes			return(NULL);
6631573Srgrimes		return(sp);
6641573Srgrimes	}
6651573Srgrimes	ss--;			/* adjust for the for's final increment */
6661573Srgrimes
6671573Srgrimes	/* the hard stuff */
6681573Srgrimes	AT("hard", sp, stop, ss, stopst);
6691573Srgrimes	s = m->g->strip[ss];
6701573Srgrimes	switch (OP(s)) {
6711573Srgrimes	case OBACK_:		/* the vilest depths */
6721573Srgrimes		i = OPND(s);
6731573Srgrimes		assert(0 < i && i <= m->g->nsub);
6741573Srgrimes		if (m->pmatch[i].rm_eo == -1)
6751573Srgrimes			return(NULL);
6761573Srgrimes		assert(m->pmatch[i].rm_so != -1);
6771573Srgrimes		len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so;
678167222Sdelphij		if (len == 0 && rec++ > MAX_RECURSION)
679167216Sdelphij			return(NULL);
6801573Srgrimes		assert(stop - m->beginp >= len);
6811573Srgrimes		if (sp > stop - len)
6821573Srgrimes			return(NULL);	/* not enough left to match */
6831573Srgrimes		ssp = m->offp + m->pmatch[i].rm_so;
6841573Srgrimes		if (memcmp(sp, ssp, len) != 0)
6851573Srgrimes			return(NULL);
6861573Srgrimes		while (m->g->strip[ss] != SOP(O_BACK, i))
6871573Srgrimes			ss++;
688167222Sdelphij		return(backref(m, sp+len, stop, ss+1, stopst, lev, rec));
6891573Srgrimes	case OQUEST_:		/* to null or not */
690167222Sdelphij		dp = backref(m, sp, stop, ss+1, stopst, lev, rec);
6911573Srgrimes		if (dp != NULL)
6921573Srgrimes			return(dp);	/* not */
693167222Sdelphij		return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev, rec));
6941573Srgrimes	case OPLUS_:
6951573Srgrimes		assert(m->lastpos != NULL);
6961573Srgrimes		assert(lev+1 <= m->g->nplus);
6971573Srgrimes		m->lastpos[lev+1] = sp;
698167222Sdelphij		return(backref(m, sp, stop, ss+1, stopst, lev+1, rec));
6991573Srgrimes	case O_PLUS:
7001573Srgrimes		if (sp == m->lastpos[lev])	/* last pass matched null */
701167222Sdelphij			return(backref(m, sp, stop, ss+1, stopst, lev-1, rec));
7021573Srgrimes		/* try another pass */
7031573Srgrimes		m->lastpos[lev] = sp;
704167222Sdelphij		dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev, rec);
7051573Srgrimes		if (dp == NULL)
706167222Sdelphij			return(backref(m, sp, stop, ss+1, stopst, lev-1, rec));
7071573Srgrimes		else
7081573Srgrimes			return(dp);
7091573Srgrimes	case OCH_:		/* find the right one, if any */
7101573Srgrimes		ssub = ss + 1;
7111573Srgrimes		esub = ss + OPND(s) - 1;
7121573Srgrimes		assert(OP(m->g->strip[esub]) == OOR1);
7131573Srgrimes		for (;;) {	/* find first matching branch */
714167222Sdelphij			dp = backref(m, sp, stop, ssub, esub, lev, rec);
7151573Srgrimes			if (dp != NULL)
7161573Srgrimes				return(dp);
7171573Srgrimes			/* that one missed, try next one */
7181573Srgrimes			if (OP(m->g->strip[esub]) == O_CH)
7191573Srgrimes				return(NULL);	/* there is none */
7201573Srgrimes			esub++;
7211573Srgrimes			assert(OP(m->g->strip[esub]) == OOR2);
7221573Srgrimes			ssub = esub + 1;
7231573Srgrimes			esub += OPND(m->g->strip[esub]);
7241573Srgrimes			if (OP(m->g->strip[esub]) == OOR2)
7251573Srgrimes				esub--;
7261573Srgrimes			else
7271573Srgrimes				assert(OP(m->g->strip[esub]) == O_CH);
7281573Srgrimes		}
729265726Spfg		/* NOTREACHED */
7301573Srgrimes		break;
7311573Srgrimes	case OLPAREN:		/* must undo assignment if rest fails */
7321573Srgrimes		i = OPND(s);
7331573Srgrimes		assert(0 < i && i <= m->g->nsub);
7341573Srgrimes		offsave = m->pmatch[i].rm_so;
7351573Srgrimes		m->pmatch[i].rm_so = sp - m->offp;
736167222Sdelphij		dp = backref(m, sp, stop, ss+1, stopst, lev, rec);
7371573Srgrimes		if (dp != NULL)
7381573Srgrimes			return(dp);
7391573Srgrimes		m->pmatch[i].rm_so = offsave;
7401573Srgrimes		return(NULL);
7411573Srgrimes	case ORPAREN:		/* must undo assignment if rest fails */
7421573Srgrimes		i = OPND(s);
7431573Srgrimes		assert(0 < i && i <= m->g->nsub);
7441573Srgrimes		offsave = m->pmatch[i].rm_eo;
7451573Srgrimes		m->pmatch[i].rm_eo = sp - m->offp;
746167222Sdelphij		dp = backref(m, sp, stop, ss+1, stopst, lev, rec);
7471573Srgrimes		if (dp != NULL)
7481573Srgrimes			return(dp);
7491573Srgrimes		m->pmatch[i].rm_eo = offsave;
7501573Srgrimes		return(NULL);
7511573Srgrimes	default:		/* uh oh */
7521573Srgrimes		assert(nope);
7531573Srgrimes		break;
7541573Srgrimes	}
7551573Srgrimes
7561573Srgrimes	/* "can't happen" */
7571573Srgrimes	assert(nope);
7581573Srgrimes	/* NOTREACHED */
75911664Sphk	return "shut up gcc";
7601573Srgrimes}
7611573Srgrimes
7621573Srgrimes/*
7631573Srgrimes - fast - step through the string at top speed
764169982Sdelphij == static const char *fast(struct match *m, const char *start, \
765169982Sdelphij ==	const char *stop, sopno startst, sopno stopst);
7661573Srgrimes */
767169982Sdelphijstatic const char *		/* where tentative match ended, or NULL */
768169982Sdelphijfast(	struct match *m,
769169982Sdelphij	const char *start,
770169982Sdelphij	const char *stop,
771169982Sdelphij	sopno startst,
772169982Sdelphij	sopno stopst)
7731573Srgrimes{
77492889Sobrien	states st = m->st;
77592889Sobrien	states fresh = m->fresh;
77692889Sobrien	states tmp = m->tmp;
777169982Sdelphij	const char *p = start;
778132019Stjr	wint_t c;
779132019Stjr	wint_t lastc;		/* previous c */
780132019Stjr	wint_t flagch;
78192889Sobrien	int i;
782169982Sdelphij	const char *coldp;	/* last p after which no match was underway */
783132019Stjr	size_t clen;
7841573Srgrimes
7851573Srgrimes	CLEAR(st);
7861573Srgrimes	SET1(st, startst);
787197245Sdds	SP("fast", st, *p);
7881573Srgrimes	st = step(m->g, startst, stopst, st, NOTHING, st);
7891573Srgrimes	ASSIGN(fresh, st);
7901573Srgrimes	SP("start", st, *p);
7911573Srgrimes	coldp = NULL;
792132019Stjr	if (start == m->beginp)
793132019Stjr		c = OUT;
794132019Stjr	else {
795132019Stjr		/*
796132019Stjr		 * XXX Wrong if the previous character was multi-byte.
797132019Stjr		 * Newline never is (in encodings supported by FreeBSD),
798132019Stjr		 * so this only breaks the ISWORD tests below.
799132019Stjr		 */
800132019Stjr		c = (uch)*(start - 1);
801132019Stjr	}
8021573Srgrimes	for (;;) {
8031573Srgrimes		/* next character */
8041573Srgrimes		lastc = c;
805149180Stjr		if (p == m->endp) {
806149180Stjr			clen = 0;
807132019Stjr			c = OUT;
808149180Stjr		} else
809149180Stjr			clen = XMBRTOWC(&c, p, m->endp - p, &m->mbs, BADCHAR);
8101573Srgrimes		if (EQ(st, fresh))
8111573Srgrimes			coldp = p;
8121573Srgrimes
8131573Srgrimes		/* is there an EOL and/or BOL between lastc and c? */
8141573Srgrimes		flagch = '\0';
8151573Srgrimes		i = 0;
8161573Srgrimes		if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
8171573Srgrimes				(lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
8181573Srgrimes			flagch = BOL;
8191573Srgrimes			i = m->g->nbol;
8201573Srgrimes		}
8211573Srgrimes		if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
8221573Srgrimes				(c == OUT && !(m->eflags&REG_NOTEOL)) ) {
8231573Srgrimes			flagch = (flagch == BOL) ? BOLEOL : EOL;
8241573Srgrimes			i += m->g->neol;
8251573Srgrimes		}
8261573Srgrimes		if (i != 0) {
8271573Srgrimes			for (; i > 0; i--)
8281573Srgrimes				st = step(m->g, startst, stopst, st, flagch, st);
8291573Srgrimes			SP("boleol", st, c);
8301573Srgrimes		}
8311573Srgrimes
8321573Srgrimes		/* how about a word boundary? */
8331573Srgrimes		if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
8341573Srgrimes					(c != OUT && ISWORD(c)) ) {
8351573Srgrimes			flagch = BOW;
8361573Srgrimes		}
8371573Srgrimes		if ( (lastc != OUT && ISWORD(lastc)) &&
8381573Srgrimes				(flagch == EOL || (c != OUT && !ISWORD(c))) ) {
8391573Srgrimes			flagch = EOW;
8401573Srgrimes		}
8411573Srgrimes		if (flagch == BOW || flagch == EOW) {
8421573Srgrimes			st = step(m->g, startst, stopst, st, flagch, st);
8431573Srgrimes			SP("boweow", st, c);
8441573Srgrimes		}
8451573Srgrimes
8461573Srgrimes		/* are we done? */
847149180Stjr		if (ISSET(st, stopst) || p == stop || clen > stop - p)
8481573Srgrimes			break;		/* NOTE BREAK OUT */
8491573Srgrimes
8501573Srgrimes		/* no, we must deal with this character */
8511573Srgrimes		ASSIGN(tmp, st);
8521573Srgrimes		ASSIGN(st, fresh);
8531573Srgrimes		assert(c != OUT);
8541573Srgrimes		st = step(m->g, startst, stopst, tmp, c, st);
8551573Srgrimes		SP("aft", st, c);
8561573Srgrimes		assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
857132019Stjr		p += clen;
8581573Srgrimes	}
8591573Srgrimes
8601573Srgrimes	assert(coldp != NULL);
8611573Srgrimes	m->coldp = coldp;
8621573Srgrimes	if (ISSET(st, stopst))
863137959Stjr		return(p+XMBRTOWC(NULL, p, stop - p, &m->mbs, 0));
8641573Srgrimes	else
8651573Srgrimes		return(NULL);
8661573Srgrimes}
8671573Srgrimes
8681573Srgrimes/*
8691573Srgrimes - slow - step through the string more deliberately
870169982Sdelphij == static const char *slow(struct match *m, const char *start, \
871169982Sdelphij ==	const char *stop, sopno startst, sopno stopst);
8721573Srgrimes */
873169982Sdelphijstatic const char *		/* where it ended */
874169982Sdelphijslow(	struct match *m,
875169982Sdelphij	const char *start,
876169982Sdelphij	const char *stop,
877169982Sdelphij	sopno startst,
878169982Sdelphij	sopno stopst)
8791573Srgrimes{
88092889Sobrien	states st = m->st;
88192889Sobrien	states empty = m->empty;
88292889Sobrien	states tmp = m->tmp;
883169982Sdelphij	const char *p = start;
884132019Stjr	wint_t c;
885132019Stjr	wint_t lastc;		/* previous c */
886132019Stjr	wint_t flagch;
88792889Sobrien	int i;
888169982Sdelphij	const char *matchp;	/* last p at which a match ended */
889132019Stjr	size_t clen;
8901573Srgrimes
8911573Srgrimes	AT("slow", start, stop, startst, stopst);
8921573Srgrimes	CLEAR(st);
8931573Srgrimes	SET1(st, startst);
8941573Srgrimes	SP("sstart", st, *p);
8951573Srgrimes	st = step(m->g, startst, stopst, st, NOTHING, st);
8961573Srgrimes	matchp = NULL;
897132019Stjr	if (start == m->beginp)
898132019Stjr		c = OUT;
899132019Stjr	else {
900132019Stjr		/*
901132019Stjr		 * XXX Wrong if the previous character was multi-byte.
902132019Stjr		 * Newline never is (in encodings supported by FreeBSD),
903132019Stjr		 * so this only breaks the ISWORD tests below.
904132019Stjr		 */
905132019Stjr		c = (uch)*(start - 1);
906132019Stjr	}
9071573Srgrimes	for (;;) {
9081573Srgrimes		/* next character */
9091573Srgrimes		lastc = c;
910132019Stjr		if (p == m->endp) {
911132019Stjr			c = OUT;
912132019Stjr			clen = 0;
913132019Stjr		} else
914149180Stjr			clen = XMBRTOWC(&c, p, m->endp - p, &m->mbs, BADCHAR);
9151573Srgrimes
9161573Srgrimes		/* is there an EOL and/or BOL between lastc and c? */
9171573Srgrimes		flagch = '\0';
9181573Srgrimes		i = 0;
9191573Srgrimes		if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
9201573Srgrimes				(lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
9211573Srgrimes			flagch = BOL;
9221573Srgrimes			i = m->g->nbol;
9231573Srgrimes		}
9241573Srgrimes		if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
9251573Srgrimes				(c == OUT && !(m->eflags&REG_NOTEOL)) ) {
9261573Srgrimes			flagch = (flagch == BOL) ? BOLEOL : EOL;
9271573Srgrimes			i += m->g->neol;
9281573Srgrimes		}
9291573Srgrimes		if (i != 0) {
9301573Srgrimes			for (; i > 0; i--)
9311573Srgrimes				st = step(m->g, startst, stopst, st, flagch, st);
9321573Srgrimes			SP("sboleol", st, c);
9331573Srgrimes		}
9341573Srgrimes
9351573Srgrimes		/* how about a word boundary? */
9361573Srgrimes		if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
9371573Srgrimes					(c != OUT && ISWORD(c)) ) {
9381573Srgrimes			flagch = BOW;
9391573Srgrimes		}
9401573Srgrimes		if ( (lastc != OUT && ISWORD(lastc)) &&
9411573Srgrimes				(flagch == EOL || (c != OUT && !ISWORD(c))) ) {
9421573Srgrimes			flagch = EOW;
9431573Srgrimes		}
9441573Srgrimes		if (flagch == BOW || flagch == EOW) {
9451573Srgrimes			st = step(m->g, startst, stopst, st, flagch, st);
9461573Srgrimes			SP("sboweow", st, c);
9471573Srgrimes		}
9481573Srgrimes
9491573Srgrimes		/* are we done? */
9501573Srgrimes		if (ISSET(st, stopst))
9511573Srgrimes			matchp = p;
952149180Stjr		if (EQ(st, empty) || p == stop || clen > stop - p)
9531573Srgrimes			break;		/* NOTE BREAK OUT */
9541573Srgrimes
9551573Srgrimes		/* no, we must deal with this character */
9561573Srgrimes		ASSIGN(tmp, st);
9571573Srgrimes		ASSIGN(st, empty);
9581573Srgrimes		assert(c != OUT);
9591573Srgrimes		st = step(m->g, startst, stopst, tmp, c, st);
9601573Srgrimes		SP("saft", st, c);
9611573Srgrimes		assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
962132019Stjr		p += clen;
9631573Srgrimes	}
9641573Srgrimes
9651573Srgrimes	return(matchp);
9661573Srgrimes}
9671573Srgrimes
9681573Srgrimes
9691573Srgrimes/*
9701573Srgrimes - step - map set of states reachable before char to set reachable after
97192889Sobrien == static states step(struct re_guts *g, sopno start, sopno stop, \
97292889Sobrien ==	states bef, int ch, states aft);
973132019Stjr == #define	BOL	(OUT-1)
974132019Stjr == #define	EOL	(BOL-1)
975132019Stjr == #define	BOLEOL	(BOL-2)
976132019Stjr == #define	NOTHING	(BOL-3)
977132019Stjr == #define	BOW	(BOL-4)
978132019Stjr == #define	EOW	(BOL-5)
979132019Stjr == #define	BADCHAR	(BOL-6)
980132019Stjr == #define	NONCHAR(c)	((c) <= OUT)
9811573Srgrimes */
9821573Srgrimesstatic states
983169982Sdelphijstep(struct re_guts *g,
984169982Sdelphij	sopno start,		/* start state within strip */
985169982Sdelphij	sopno stop,		/* state after stop state within strip */
986169982Sdelphij	states bef,		/* states reachable before */
987169982Sdelphij	wint_t ch,		/* character or NONCHAR code */
988169982Sdelphij	states aft)		/* states already known reachable after */
9891573Srgrimes{
99092889Sobrien	cset *cs;
99192889Sobrien	sop s;
99292889Sobrien	sopno pc;
99392889Sobrien	onestate here;		/* note, macros know this name */
99492889Sobrien	sopno look;
99592889Sobrien	int i;
9961573Srgrimes
9971573Srgrimes	for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) {
9981573Srgrimes		s = g->strip[pc];
9991573Srgrimes		switch (OP(s)) {
10001573Srgrimes		case OEND:
10011573Srgrimes			assert(pc == stop-1);
10021573Srgrimes			break;
10031573Srgrimes		case OCHAR:
10041573Srgrimes			/* only characters can match */
1005132019Stjr			assert(!NONCHAR(ch) || ch != OPND(s));
1006132019Stjr			if (ch == OPND(s))
10071573Srgrimes				FWD(aft, bef, 1);
10081573Srgrimes			break;
10091573Srgrimes		case OBOL:
10101573Srgrimes			if (ch == BOL || ch == BOLEOL)
10111573Srgrimes				FWD(aft, bef, 1);
10121573Srgrimes			break;
10131573Srgrimes		case OEOL:
10141573Srgrimes			if (ch == EOL || ch == BOLEOL)
10151573Srgrimes				FWD(aft, bef, 1);
10161573Srgrimes			break;
10171573Srgrimes		case OBOW:
10181573Srgrimes			if (ch == BOW)
10191573Srgrimes				FWD(aft, bef, 1);
10201573Srgrimes			break;
10211573Srgrimes		case OEOW:
10221573Srgrimes			if (ch == EOW)
10231573Srgrimes				FWD(aft, bef, 1);
10241573Srgrimes			break;
10251573Srgrimes		case OANY:
10261573Srgrimes			if (!NONCHAR(ch))
10271573Srgrimes				FWD(aft, bef, 1);
10281573Srgrimes			break;
10291573Srgrimes		case OANYOF:
10301573Srgrimes			cs = &g->sets[OPND(s)];
10311573Srgrimes			if (!NONCHAR(ch) && CHIN(cs, ch))
10321573Srgrimes				FWD(aft, bef, 1);
10331573Srgrimes			break;
10341573Srgrimes		case OBACK_:		/* ignored here */
10351573Srgrimes		case O_BACK:
10361573Srgrimes			FWD(aft, aft, 1);
10371573Srgrimes			break;
10381573Srgrimes		case OPLUS_:		/* forward, this is just an empty */
10391573Srgrimes			FWD(aft, aft, 1);
10401573Srgrimes			break;
10411573Srgrimes		case O_PLUS:		/* both forward and back */
10421573Srgrimes			FWD(aft, aft, 1);
10431573Srgrimes			i = ISSETBACK(aft, OPND(s));
10441573Srgrimes			BACK(aft, aft, OPND(s));
10451573Srgrimes			if (!i && ISSETBACK(aft, OPND(s))) {
10461573Srgrimes				/* oho, must reconsider loop body */
10471573Srgrimes				pc -= OPND(s) + 1;
10481573Srgrimes				INIT(here, pc);
10491573Srgrimes			}
10501573Srgrimes			break;
10511573Srgrimes		case OQUEST_:		/* two branches, both forward */
10521573Srgrimes			FWD(aft, aft, 1);
10531573Srgrimes			FWD(aft, aft, OPND(s));
10541573Srgrimes			break;
10551573Srgrimes		case O_QUEST:		/* just an empty */
10561573Srgrimes			FWD(aft, aft, 1);
10571573Srgrimes			break;
10581573Srgrimes		case OLPAREN:		/* not significant here */
10591573Srgrimes		case ORPAREN:
10601573Srgrimes			FWD(aft, aft, 1);
10611573Srgrimes			break;
10621573Srgrimes		case OCH_:		/* mark the first two branches */
10631573Srgrimes			FWD(aft, aft, 1);
10641573Srgrimes			assert(OP(g->strip[pc+OPND(s)]) == OOR2);
10651573Srgrimes			FWD(aft, aft, OPND(s));
10661573Srgrimes			break;
10671573Srgrimes		case OOR1:		/* done a branch, find the O_CH */
10681573Srgrimes			if (ISSTATEIN(aft, here)) {
10691573Srgrimes				for (look = 1;
10701573Srgrimes						OP(s = g->strip[pc+look]) != O_CH;
10711573Srgrimes						look += OPND(s))
10721573Srgrimes					assert(OP(s) == OOR2);
1073197246Sdds				FWD(aft, aft, look + 1);
10741573Srgrimes			}
10751573Srgrimes			break;
10761573Srgrimes		case OOR2:		/* propagate OCH_'s marking */
10771573Srgrimes			FWD(aft, aft, 1);
10781573Srgrimes			if (OP(g->strip[pc+OPND(s)]) != O_CH) {
10791573Srgrimes				assert(OP(g->strip[pc+OPND(s)]) == OOR2);
10801573Srgrimes				FWD(aft, aft, OPND(s));
10811573Srgrimes			}
10821573Srgrimes			break;
10831573Srgrimes		case O_CH:		/* just empty */
10841573Srgrimes			FWD(aft, aft, 1);
10851573Srgrimes			break;
10861573Srgrimes		default:		/* ooooops... */
10871573Srgrimes			assert(nope);
10881573Srgrimes			break;
10891573Srgrimes		}
10901573Srgrimes	}
10911573Srgrimes
10921573Srgrimes	return(aft);
10931573Srgrimes}
10941573Srgrimes
10951573Srgrimes#ifdef REDEBUG
10961573Srgrimes/*
10971573Srgrimes - print - print a set of states
10981573Srgrimes == #ifdef REDEBUG
1099169982Sdelphij == static void print(struct match *m, const char *caption, states st, \
11001573Srgrimes ==	int ch, FILE *d);
11011573Srgrimes == #endif
11021573Srgrimes */
11031573Srgrimesstatic void
1104169982Sdelphijprint(struct match *m,
1105169982Sdelphij	const char *caption,
1106169982Sdelphij	states st,
1107169982Sdelphij	int ch,
1108169982Sdelphij	FILE *d)
11091573Srgrimes{
111092889Sobrien	struct re_guts *g = m->g;
111192889Sobrien	int i;
111292889Sobrien	int first = 1;
11131573Srgrimes
11141573Srgrimes	if (!(m->eflags&REG_TRACE))
11151573Srgrimes		return;
11161573Srgrimes
11171573Srgrimes	fprintf(d, "%s", caption);
11181573Srgrimes	if (ch != '\0')
11191573Srgrimes		fprintf(d, " %s", pchar(ch));
11201573Srgrimes	for (i = 0; i < g->nstates; i++)
11211573Srgrimes		if (ISSET(st, i)) {
11221573Srgrimes			fprintf(d, "%s%d", (first) ? "\t" : ", ", i);
11231573Srgrimes			first = 0;
11241573Srgrimes		}
11251573Srgrimes	fprintf(d, "\n");
11261573Srgrimes}
11271573Srgrimes
11288870Srgrimes/*
11291573Srgrimes - at - print current situation
11301573Srgrimes == #ifdef REDEBUG
1131169982Sdelphij == static void at(struct match *m, const char *title, const char *start, \
1132169982Sdelphij ==			 const char *stop, sopno startst, sopno stopst);
11331573Srgrimes == #endif
11341573Srgrimes */
11351573Srgrimesstatic void
1136169982Sdelphijat(	struct match *m,
1137169982Sdelphij	const char *title,
1138169982Sdelphij	const char *start,
1139169982Sdelphij	const char *stop,
1140169982Sdelphij	sopno startst,
1141169982Sdelphij	sopno stopst)
11421573Srgrimes{
11431573Srgrimes	if (!(m->eflags&REG_TRACE))
11441573Srgrimes		return;
11451573Srgrimes
11461573Srgrimes	printf("%s %s-", title, pchar(*start));
11471573Srgrimes	printf("%s ", pchar(*stop));
11481573Srgrimes	printf("%ld-%ld\n", (long)startst, (long)stopst);
11491573Srgrimes}
11501573Srgrimes
11511573Srgrimes#ifndef PCHARDONE
11521573Srgrimes#define	PCHARDONE	/* never again */
11531573Srgrimes/*
11541573Srgrimes - pchar - make a character printable
11551573Srgrimes == #ifdef REDEBUG
1156169982Sdelphij == static const char *pchar(int ch);
11571573Srgrimes == #endif
11581573Srgrimes *
11591573Srgrimes * Is this identical to regchar() over in debug.c?  Well, yes.  But a
11601573Srgrimes * duplicate here avoids having a debugging-capable regexec.o tied to
11611573Srgrimes * a matching debug.o, and this is convenient.  It all disappears in
11621573Srgrimes * the non-debug compilation anyway, so it doesn't matter much.
11631573Srgrimes */
1164169982Sdelphijstatic const char *		/* -> representation */
1165169982Sdelphijpchar(int ch)
11661573Srgrimes{
11671573Srgrimes	static char pbuf[10];
11681573Srgrimes
116917508Sache	if (isprint((uch)ch) || ch == ' ')
11701573Srgrimes		sprintf(pbuf, "%c", ch);
11711573Srgrimes	else
11721573Srgrimes		sprintf(pbuf, "\\%o", ch);
11731573Srgrimes	return(pbuf);
11741573Srgrimes}
11751573Srgrimes#endif
11761573Srgrimes#endif
11771573Srgrimes
11781573Srgrimes#undef	matcher
11791573Srgrimes#undef	fast
11801573Srgrimes#undef	slow
11811573Srgrimes#undef	dissect
11821573Srgrimes#undef	backref
11831573Srgrimes#undef	step
11841573Srgrimes#undef	print
11851573Srgrimes#undef	at
11861573Srgrimes#undef	match
1187