engine.c revision 167216
11573Srgrimes/*-
21573Srgrimes * Copyright (c) 1992, 1993, 1994 Henry Spencer.
31573Srgrimes * Copyright (c) 1992, 1993, 1994
41573Srgrimes *	The Regents of the University of California.  All rights reserved.
51573Srgrimes *
61573Srgrimes * This code is derived from software contributed to Berkeley by
71573Srgrimes * Henry Spencer.
81573Srgrimes *
91573Srgrimes * Redistribution and use in source and binary forms, with or without
101573Srgrimes * modification, are permitted provided that the following conditions
111573Srgrimes * are met:
121573Srgrimes * 1. Redistributions of source code must retain the above copyright
131573Srgrimes *    notice, this list of conditions and the following disclaimer.
141573Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
151573Srgrimes *    notice, this list of conditions and the following disclaimer in the
161573Srgrimes *    documentation and/or other materials provided with the distribution.
171573Srgrimes * 4. Neither the name of the University nor the names of its contributors
181573Srgrimes *    may be used to endorse or promote products derived from this software
191573Srgrimes *    without specific prior written permission.
201573Srgrimes *
211573Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
221573Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
231573Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
241573Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
251573Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
261573Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
271573Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
281573Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
291573Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
301573Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
311573Srgrimes * SUCH DAMAGE.
321573Srgrimes *
331573Srgrimes *	@(#)engine.c	8.5 (Berkeley) 3/20/94
341573Srgrimes */
351573Srgrimes
3692986Sobrien#include <sys/cdefs.h>
3792986Sobrien__FBSDID("$FreeBSD: head/lib/libc/regex/engine.c 167216 2007-03-05 03:07:36Z delphij $");
3892986Sobrien
391573Srgrimes/*
401573Srgrimes * The matching engine and friends.  This file is #included by regexec.c
411573Srgrimes * after suitable #defines of a variety of macros used herein, so that
421573Srgrimes * different state representations can be used without duplicating masses
431573Srgrimes * of code.
441573Srgrimes */
451573Srgrimes
461573Srgrimes#ifdef SNAMES
471573Srgrimes#define	matcher	smatcher
481573Srgrimes#define	fast	sfast
491573Srgrimes#define	slow	sslow
501573Srgrimes#define	dissect	sdissect
511573Srgrimes#define	backref	sbackref
521573Srgrimes#define	step	sstep
531573Srgrimes#define	print	sprint
541573Srgrimes#define	at	sat
551573Srgrimes#define	match	smat
561573Srgrimes#endif
571573Srgrimes#ifdef LNAMES
581573Srgrimes#define	matcher	lmatcher
591573Srgrimes#define	fast	lfast
601573Srgrimes#define	slow	lslow
611573Srgrimes#define	dissect	ldissect
621573Srgrimes#define	backref	lbackref
631573Srgrimes#define	step	lstep
641573Srgrimes#define	print	lprint
651573Srgrimes#define	at	lat
661573Srgrimes#define	match	lmat
671573Srgrimes#endif
68132019Stjr#ifdef MNAMES
69132019Stjr#define	matcher	mmatcher
70132019Stjr#define	fast	mfast
71132019Stjr#define	slow	mslow
72132019Stjr#define	dissect	mdissect
73132019Stjr#define	backref	mbackref
74132019Stjr#define	step	mstep
75132019Stjr#define	print	mprint
76132019Stjr#define	at	mat
77132019Stjr#define	match	mmat
78132019Stjr#endif
791573Srgrimes
801573Srgrimes/* another structure passed up and down to avoid zillions of parameters */
811573Srgrimesstruct match {
821573Srgrimes	struct re_guts *g;
831573Srgrimes	int eflags;
841573Srgrimes	regmatch_t *pmatch;	/* [nsub+1] (0 element unused) */
851573Srgrimes	char *offp;		/* offsets work from here */
861573Srgrimes	char *beginp;		/* start of string -- virtual NUL precedes */
871573Srgrimes	char *endp;		/* end of string -- virtual NUL here */
881573Srgrimes	char *coldp;		/* can be no match starting before here */
891573Srgrimes	char **lastpos;		/* [nplus+1] */
901573Srgrimes	STATEVARS;
911573Srgrimes	states st;		/* current states */
921573Srgrimes	states fresh;		/* states for a fresh start */
931573Srgrimes	states tmp;		/* temporary */
941573Srgrimes	states empty;		/* empty set of states */
95132019Stjr	mbstate_t mbs;		/* multibyte conversion state */
961573Srgrimes};
971573Srgrimes
981573Srgrimes/* ========= begin header generated by ./mkh ========= */
991573Srgrimes#ifdef __cplusplus
1001573Srgrimesextern "C" {
1011573Srgrimes#endif
1021573Srgrimes
1031573Srgrimes/* === engine.c === */
10492905Sobrienstatic int matcher(struct re_guts *g, char *string, size_t nmatch, regmatch_t pmatch[], int eflags);
10592905Sobrienstatic char *dissect(struct match *m, char *start, char *stop, sopno startst, sopno stopst);
10692905Sobrienstatic char *backref(struct match *m, char *start, char *stop, sopno startst, sopno stopst, sopno lev);
10792905Sobrienstatic char *fast(struct match *m, char *start, char *stop, sopno startst, sopno stopst);
10892905Sobrienstatic char *slow(struct match *m, char *start, char *stop, sopno startst, sopno stopst);
109132019Stjrstatic states step(struct re_guts *g, sopno start, sopno stop, states bef, wint_t ch, states aft);
110132019Stjr#define	BOL	(OUT-1)
111132019Stjr#define	EOL	(BOL-1)
112132019Stjr#define	BOLEOL	(BOL-2)
113132019Stjr#define	NOTHING	(BOL-3)
114132019Stjr#define	BOW	(BOL-4)
115132019Stjr#define	EOW	(BOL-5)
116132019Stjr#define	BADCHAR	(BOL-6)
117132019Stjr#define	NONCHAR(c)	((c) <= OUT)
1181573Srgrimes#ifdef REDEBUG
11992905Sobrienstatic void print(struct match *m, char *caption, states st, int ch, FILE *d);
1201573Srgrimes#endif
1211573Srgrimes#ifdef REDEBUG
12292905Sobrienstatic void at(struct match *m, char *title, char *start, char *stop, sopno startst, sopno stopst);
1231573Srgrimes#endif
1241573Srgrimes#ifdef REDEBUG
12592905Sobrienstatic char *pchar(int ch);
1261573Srgrimes#endif
1271573Srgrimes
1281573Srgrimes#ifdef __cplusplus
1291573Srgrimes}
1301573Srgrimes#endif
1311573Srgrimes/* ========= end header generated by ./mkh ========= */
1321573Srgrimes
1331573Srgrimes#ifdef REDEBUG
1341573Srgrimes#define	SP(t, s, c)	print(m, t, s, c, stdout)
1351573Srgrimes#define	AT(t, p1, p2, s1, s2)	at(m, t, p1, p2, s1, s2)
1361573Srgrimes#define	NOTE(str)	{ if (m->eflags&REG_TRACE) printf("=%s\n", (str)); }
1371573Srgrimes#else
1381573Srgrimes#define	SP(t, s, c)	/* nothing */
1391573Srgrimes#define	AT(t, p1, p2, s1, s2)	/* nothing */
1401573Srgrimes#define	NOTE(s)	/* nothing */
1411573Srgrimes#endif
1421573Srgrimes
1431573Srgrimes/*
1441573Srgrimes - matcher - the actual matching engine
14592889Sobrien == static int matcher(struct re_guts *g, char *string, \
1461573Srgrimes ==	size_t nmatch, regmatch_t pmatch[], int eflags);
1471573Srgrimes */
1481573Srgrimesstatic int			/* 0 success, REG_NOMATCH failure */
1491573Srgrimesmatcher(g, string, nmatch, pmatch, eflags)
15092889Sobrienstruct re_guts *g;
1511573Srgrimeschar *string;
1521573Srgrimessize_t nmatch;
1531573Srgrimesregmatch_t pmatch[];
1541573Srgrimesint eflags;
1551573Srgrimes{
15692889Sobrien	char *endp;
15792889Sobrien	int i;
1581573Srgrimes	struct match mv;
15992889Sobrien	struct match *m = &mv;
16092889Sobrien	char *dp;
16192889Sobrien	const sopno gf = g->firststate+1;	/* +1 for OEND */
16292889Sobrien	const sopno gl = g->laststate;
1631573Srgrimes	char *start;
1641573Srgrimes	char *stop;
16562232Sdcs	/* Boyer-Moore algorithms variables */
16692889Sobrien	char *pp;
16762232Sdcs	int cj, mj;
16892889Sobrien	char *mustfirst;
16992889Sobrien	char *mustlast;
17092889Sobrien	int *matchjump;
17192889Sobrien	int *charjump;
1721573Srgrimes
1731573Srgrimes	/* simplify the situation where possible */
1741573Srgrimes	if (g->cflags&REG_NOSUB)
1751573Srgrimes		nmatch = 0;
1761573Srgrimes	if (eflags&REG_STARTEND) {
1771573Srgrimes		start = string + pmatch[0].rm_so;
1781573Srgrimes		stop = string + pmatch[0].rm_eo;
1791573Srgrimes	} else {
1801573Srgrimes		start = string;
1811573Srgrimes		stop = start + strlen(start);
1821573Srgrimes	}
1831573Srgrimes	if (stop < start)
1841573Srgrimes		return(REG_INVARG);
1851573Srgrimes
1861573Srgrimes	/* prescreening; this does wonders for this rather slow code */
1871573Srgrimes	if (g->must != NULL) {
18862391Sdcs		if (g->charjump != NULL && g->matchjump != NULL) {
18962232Sdcs			mustfirst = g->must;
19062232Sdcs			mustlast = g->must + g->mlen - 1;
19162232Sdcs			charjump = g->charjump;
19262232Sdcs			matchjump = g->matchjump;
19362232Sdcs			pp = mustlast;
19462754Sdcs			for (dp = start+g->mlen-1; dp < stop;) {
19562232Sdcs				/* Fast skip non-matches */
196111010Snectar				while (dp < stop && charjump[(int)*dp])
197111010Snectar					dp += charjump[(int)*dp];
19862232Sdcs
19962754Sdcs				if (dp >= stop)
20062232Sdcs					break;
20162232Sdcs
20262232Sdcs				/* Greedy matcher */
20362232Sdcs				/* We depend on not being used for
20462232Sdcs				 * for strings of length 1
20562232Sdcs				 */
20662754Sdcs				while (*--dp == *--pp && pp != mustfirst);
20762232Sdcs
20862754Sdcs				if (*dp == *pp)
20962232Sdcs					break;
21062232Sdcs
21162232Sdcs				/* Jump to next possible match */
21262232Sdcs				mj = matchjump[pp - mustfirst];
213111010Snectar				cj = charjump[(int)*dp];
21462754Sdcs				dp += (cj < mj ? mj : cj);
21562754Sdcs				pp = mustlast;
21662232Sdcs			}
21762391Sdcs			if (pp != mustfirst)
21862232Sdcs				return(REG_NOMATCH);
21962232Sdcs		} else {
22062232Sdcs			for (dp = start; dp < stop; dp++)
22162232Sdcs				if (*dp == g->must[0] &&
22262232Sdcs				    stop - dp >= g->mlen &&
22362232Sdcs				    memcmp(dp, g->must, (size_t)g->mlen) == 0)
22462232Sdcs					break;
22562232Sdcs			if (dp == stop)		/* we didn't find g->must */
22662232Sdcs				return(REG_NOMATCH);
22762232Sdcs		}
2281573Srgrimes	}
2291573Srgrimes
2301573Srgrimes	/* match struct setup */
2311573Srgrimes	m->g = g;
2321573Srgrimes	m->eflags = eflags;
2331573Srgrimes	m->pmatch = NULL;
2341573Srgrimes	m->lastpos = NULL;
2351573Srgrimes	m->offp = string;
2361573Srgrimes	m->beginp = start;
2371573Srgrimes	m->endp = stop;
2381573Srgrimes	STATESETUP(m, 4);
2391573Srgrimes	SETUP(m->st);
2401573Srgrimes	SETUP(m->fresh);
2411573Srgrimes	SETUP(m->tmp);
2421573Srgrimes	SETUP(m->empty);
2431573Srgrimes	CLEAR(m->empty);
244132019Stjr	ZAPSTATE(&m->mbs);
2451573Srgrimes
24662391Sdcs	/* Adjust start according to moffset, to speed things up */
24762391Sdcs	if (g->moffset > -1)
24862854Sdcs		start = ((dp - g->moffset) < start) ? start : dp - g->moffset;
24962391Sdcs
2501573Srgrimes	/* this loop does only one repetition except for backrefs */
2511573Srgrimes	for (;;) {
2521573Srgrimes		endp = fast(m, start, stop, gf, gl);
2531573Srgrimes		if (endp == NULL) {		/* a miss */
254139437Sdds			if (m->pmatch != NULL)
255139437Sdds				free((char *)m->pmatch);
256139437Sdds			if (m->lastpos != NULL)
257139437Sdds				free((char *)m->lastpos);
2581573Srgrimes			STATETEARDOWN(m);
2591573Srgrimes			return(REG_NOMATCH);
2601573Srgrimes		}
2611573Srgrimes		if (nmatch == 0 && !g->backrefs)
2621573Srgrimes			break;		/* no further info needed */
2631573Srgrimes
2641573Srgrimes		/* where? */
2651573Srgrimes		assert(m->coldp != NULL);
2661573Srgrimes		for (;;) {
2671573Srgrimes			NOTE("finding start");
2681573Srgrimes			endp = slow(m, m->coldp, stop, gf, gl);
2691573Srgrimes			if (endp != NULL)
2701573Srgrimes				break;
2711573Srgrimes			assert(m->coldp < m->endp);
272132019Stjr			m->coldp += XMBRTOWC(NULL, m->coldp,
273132019Stjr			    m->endp - m->coldp, &m->mbs, 0);
2741573Srgrimes		}
2751573Srgrimes		if (nmatch == 1 && !g->backrefs)
2761573Srgrimes			break;		/* no further info needed */
2771573Srgrimes
2781573Srgrimes		/* oh my, he wants the subexpressions... */
2791573Srgrimes		if (m->pmatch == NULL)
2801573Srgrimes			m->pmatch = (regmatch_t *)malloc((m->g->nsub + 1) *
2811573Srgrimes							sizeof(regmatch_t));
2821573Srgrimes		if (m->pmatch == NULL) {
2831573Srgrimes			STATETEARDOWN(m);
2841573Srgrimes			return(REG_ESPACE);
2851573Srgrimes		}
2861573Srgrimes		for (i = 1; i <= m->g->nsub; i++)
2871573Srgrimes			m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1;
2881573Srgrimes		if (!g->backrefs && !(m->eflags&REG_BACKR)) {
2891573Srgrimes			NOTE("dissecting");
2901573Srgrimes			dp = dissect(m, m->coldp, endp, gf, gl);
2911573Srgrimes		} else {
2921573Srgrimes			if (g->nplus > 0 && m->lastpos == NULL)
2931573Srgrimes				m->lastpos = (char **)malloc((g->nplus+1) *
2941573Srgrimes							sizeof(char *));
2951573Srgrimes			if (g->nplus > 0 && m->lastpos == NULL) {
2961573Srgrimes				free(m->pmatch);
2971573Srgrimes				STATETEARDOWN(m);
2981573Srgrimes				return(REG_ESPACE);
2991573Srgrimes			}
3001573Srgrimes			NOTE("backref dissect");
3011573Srgrimes			dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
3021573Srgrimes		}
3031573Srgrimes		if (dp != NULL)
3041573Srgrimes			break;
3051573Srgrimes
3061573Srgrimes		/* uh-oh... we couldn't find a subexpression-level match */
3071573Srgrimes		assert(g->backrefs);	/* must be back references doing it */
3081573Srgrimes		assert(g->nplus == 0 || m->lastpos != NULL);
3091573Srgrimes		for (;;) {
3101573Srgrimes			if (dp != NULL || endp <= m->coldp)
3111573Srgrimes				break;		/* defeat */
3121573Srgrimes			NOTE("backoff");
3131573Srgrimes			endp = slow(m, m->coldp, endp-1, gf, gl);
3141573Srgrimes			if (endp == NULL)
3151573Srgrimes				break;		/* defeat */
3161573Srgrimes			/* try it on a shorter possibility */
3171573Srgrimes#ifndef NDEBUG
3181573Srgrimes			for (i = 1; i <= m->g->nsub; i++) {
3191573Srgrimes				assert(m->pmatch[i].rm_so == -1);
3201573Srgrimes				assert(m->pmatch[i].rm_eo == -1);
3211573Srgrimes			}
3221573Srgrimes#endif
3231573Srgrimes			NOTE("backoff dissect");
3241573Srgrimes			dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
3251573Srgrimes		}
3261573Srgrimes		assert(dp == NULL || dp == endp);
3271573Srgrimes		if (dp != NULL)		/* found a shorter one */
3281573Srgrimes			break;
3291573Srgrimes
3301573Srgrimes		/* despite initial appearances, there is no match here */
3311573Srgrimes		NOTE("false alarm");
332132019Stjr		/* recycle starting later */
333132019Stjr		start = m->coldp + XMBRTOWC(NULL, m->coldp,
334137959Stjr		    stop - m->coldp, &m->mbs, 0);
3351573Srgrimes		assert(start <= stop);
3361573Srgrimes	}
3371573Srgrimes
3381573Srgrimes	/* fill in the details if requested */
3391573Srgrimes	if (nmatch > 0) {
3401573Srgrimes		pmatch[0].rm_so = m->coldp - m->offp;
3411573Srgrimes		pmatch[0].rm_eo = endp - m->offp;
3421573Srgrimes	}
3431573Srgrimes	if (nmatch > 1) {
3441573Srgrimes		assert(m->pmatch != NULL);
3451573Srgrimes		for (i = 1; i < nmatch; i++)
3461573Srgrimes			if (i <= m->g->nsub)
3471573Srgrimes				pmatch[i] = m->pmatch[i];
3481573Srgrimes			else {
3491573Srgrimes				pmatch[i].rm_so = -1;
3501573Srgrimes				pmatch[i].rm_eo = -1;
3511573Srgrimes			}
3521573Srgrimes	}
3531573Srgrimes
3541573Srgrimes	if (m->pmatch != NULL)
3551573Srgrimes		free((char *)m->pmatch);
3561573Srgrimes	if (m->lastpos != NULL)
3571573Srgrimes		free((char *)m->lastpos);
3581573Srgrimes	STATETEARDOWN(m);
3591573Srgrimes	return(0);
3601573Srgrimes}
3611573Srgrimes
3621573Srgrimes/*
3631573Srgrimes - dissect - figure out what matched what, no back references
36492889Sobrien == static char *dissect(struct match *m, char *start, \
3651573Srgrimes ==	char *stop, sopno startst, sopno stopst);
3661573Srgrimes */
3671573Srgrimesstatic char *			/* == stop (success) always */
3681573Srgrimesdissect(m, start, stop, startst, stopst)
36992889Sobrienstruct match *m;
3701573Srgrimeschar *start;
3711573Srgrimeschar *stop;
3721573Srgrimessopno startst;
3731573Srgrimessopno stopst;
3741573Srgrimes{
37592889Sobrien	int i;
37692889Sobrien	sopno ss;		/* start sop of current subRE */
37792889Sobrien	sopno es;		/* end sop of current subRE */
37892889Sobrien	char *sp;		/* start of string matched by it */
37992889Sobrien	char *stp;		/* string matched by it cannot pass here */
38092889Sobrien	char *rest;		/* start of rest of string */
38192889Sobrien	char *tail;		/* string unmatched by rest of RE */
38292889Sobrien	sopno ssub;		/* start sop of subsubRE */
38392889Sobrien	sopno esub;		/* end sop of subsubRE */
38492889Sobrien	char *ssp;		/* start of string matched by subsubRE */
38592889Sobrien	char *sep;		/* end of string matched by subsubRE */
38692889Sobrien	char *oldssp;		/* previous ssp */
38792889Sobrien	char *dp;
3881573Srgrimes
3891573Srgrimes	AT("diss", start, stop, startst, stopst);
3901573Srgrimes	sp = start;
3911573Srgrimes	for (ss = startst; ss < stopst; ss = es) {
3921573Srgrimes		/* identify end of subRE */
3931573Srgrimes		es = ss;
3941573Srgrimes		switch (OP(m->g->strip[es])) {
3951573Srgrimes		case OPLUS_:
3961573Srgrimes		case OQUEST_:
3971573Srgrimes			es += OPND(m->g->strip[es]);
3981573Srgrimes			break;
3991573Srgrimes		case OCH_:
4001573Srgrimes			while (OP(m->g->strip[es]) != O_CH)
4011573Srgrimes				es += OPND(m->g->strip[es]);
4021573Srgrimes			break;
4031573Srgrimes		}
4041573Srgrimes		es++;
4051573Srgrimes
4061573Srgrimes		/* figure out what it matched */
4071573Srgrimes		switch (OP(m->g->strip[ss])) {
4081573Srgrimes		case OEND:
4091573Srgrimes			assert(nope);
4101573Srgrimes			break;
4111573Srgrimes		case OCHAR:
412132019Stjr			sp += XMBRTOWC(NULL, sp, stop - start, &m->mbs, 0);
4131573Srgrimes			break;
4141573Srgrimes		case OBOL:
4151573Srgrimes		case OEOL:
4161573Srgrimes		case OBOW:
4171573Srgrimes		case OEOW:
4181573Srgrimes			break;
4191573Srgrimes		case OANY:
4201573Srgrimes		case OANYOF:
421132019Stjr			sp += XMBRTOWC(NULL, sp, stop - start, &m->mbs, 0);
4221573Srgrimes			break;
4231573Srgrimes		case OBACK_:
4241573Srgrimes		case O_BACK:
4251573Srgrimes			assert(nope);
4261573Srgrimes			break;
4271573Srgrimes		/* cases where length of match is hard to find */
4281573Srgrimes		case OQUEST_:
4291573Srgrimes			stp = stop;
4301573Srgrimes			for (;;) {
4311573Srgrimes				/* how long could this one be? */
4321573Srgrimes				rest = slow(m, sp, stp, ss, es);
4331573Srgrimes				assert(rest != NULL);	/* it did match */
4341573Srgrimes				/* could the rest match the rest? */
4351573Srgrimes				tail = slow(m, rest, stop, es, stopst);
4361573Srgrimes				if (tail == stop)
4371573Srgrimes					break;		/* yes! */
4381573Srgrimes				/* no -- try a shorter match for this one */
4391573Srgrimes				stp = rest - 1;
4401573Srgrimes				assert(stp >= sp);	/* it did work */
4411573Srgrimes			}
4421573Srgrimes			ssub = ss + 1;
4431573Srgrimes			esub = es - 1;
4441573Srgrimes			/* did innards match? */
4451573Srgrimes			if (slow(m, sp, rest, ssub, esub) != NULL) {
4461573Srgrimes				dp = dissect(m, sp, rest, ssub, esub);
4471573Srgrimes				assert(dp == rest);
4481573Srgrimes			} else		/* no */
4491573Srgrimes				assert(sp == rest);
4501573Srgrimes			sp = rest;
4511573Srgrimes			break;
4521573Srgrimes		case OPLUS_:
4531573Srgrimes			stp = stop;
4541573Srgrimes			for (;;) {
4551573Srgrimes				/* how long could this one be? */
4561573Srgrimes				rest = slow(m, sp, stp, ss, es);
4571573Srgrimes				assert(rest != NULL);	/* it did match */
4581573Srgrimes				/* could the rest match the rest? */
4591573Srgrimes				tail = slow(m, rest, stop, es, stopst);
4601573Srgrimes				if (tail == stop)
4611573Srgrimes					break;		/* yes! */
4621573Srgrimes				/* no -- try a shorter match for this one */
4631573Srgrimes				stp = rest - 1;
4641573Srgrimes				assert(stp >= sp);	/* it did work */
4651573Srgrimes			}
4661573Srgrimes			ssub = ss + 1;
4671573Srgrimes			esub = es - 1;
4681573Srgrimes			ssp = sp;
4691573Srgrimes			oldssp = ssp;
4701573Srgrimes			for (;;) {	/* find last match of innards */
4711573Srgrimes				sep = slow(m, ssp, rest, ssub, esub);
4721573Srgrimes				if (sep == NULL || sep == ssp)
4731573Srgrimes					break;	/* failed or matched null */
4741573Srgrimes				oldssp = ssp;	/* on to next try */
4751573Srgrimes				ssp = sep;
4761573Srgrimes			}
4771573Srgrimes			if (sep == NULL) {
4781573Srgrimes				/* last successful match */
4791573Srgrimes				sep = ssp;
4801573Srgrimes				ssp = oldssp;
4811573Srgrimes			}
4821573Srgrimes			assert(sep == rest);	/* must exhaust substring */
4831573Srgrimes			assert(slow(m, ssp, sep, ssub, esub) == rest);
4841573Srgrimes			dp = dissect(m, ssp, sep, ssub, esub);
4851573Srgrimes			assert(dp == sep);
4861573Srgrimes			sp = rest;
4871573Srgrimes			break;
4881573Srgrimes		case OCH_:
4891573Srgrimes			stp = stop;
4901573Srgrimes			for (;;) {
4911573Srgrimes				/* how long could this one be? */
4921573Srgrimes				rest = slow(m, sp, stp, ss, es);
4931573Srgrimes				assert(rest != NULL);	/* it did match */
4941573Srgrimes				/* could the rest match the rest? */
4951573Srgrimes				tail = slow(m, rest, stop, es, stopst);
4961573Srgrimes				if (tail == stop)
4971573Srgrimes					break;		/* yes! */
4981573Srgrimes				/* no -- try a shorter match for this one */
4991573Srgrimes				stp = rest - 1;
5001573Srgrimes				assert(stp >= sp);	/* it did work */
5011573Srgrimes			}
5021573Srgrimes			ssub = ss + 1;
5031573Srgrimes			esub = ss + OPND(m->g->strip[ss]) - 1;
5041573Srgrimes			assert(OP(m->g->strip[esub]) == OOR1);
5051573Srgrimes			for (;;) {	/* find first matching branch */
5061573Srgrimes				if (slow(m, sp, rest, ssub, esub) == rest)
5071573Srgrimes					break;	/* it matched all of it */
5081573Srgrimes				/* that one missed, try next one */
5091573Srgrimes				assert(OP(m->g->strip[esub]) == OOR1);
5101573Srgrimes				esub++;
5111573Srgrimes				assert(OP(m->g->strip[esub]) == OOR2);
5121573Srgrimes				ssub = esub + 1;
5131573Srgrimes				esub += OPND(m->g->strip[esub]);
5141573Srgrimes				if (OP(m->g->strip[esub]) == OOR2)
5151573Srgrimes					esub--;
5161573Srgrimes				else
5171573Srgrimes					assert(OP(m->g->strip[esub]) == O_CH);
5181573Srgrimes			}
5191573Srgrimes			dp = dissect(m, sp, rest, ssub, esub);
5201573Srgrimes			assert(dp == rest);
5211573Srgrimes			sp = rest;
5221573Srgrimes			break;
5231573Srgrimes		case O_PLUS:
5241573Srgrimes		case O_QUEST:
5251573Srgrimes		case OOR1:
5261573Srgrimes		case OOR2:
5271573Srgrimes		case O_CH:
5281573Srgrimes			assert(nope);
5291573Srgrimes			break;
5301573Srgrimes		case OLPAREN:
5311573Srgrimes			i = OPND(m->g->strip[ss]);
5321573Srgrimes			assert(0 < i && i <= m->g->nsub);
5331573Srgrimes			m->pmatch[i].rm_so = sp - m->offp;
5341573Srgrimes			break;
5351573Srgrimes		case ORPAREN:
5361573Srgrimes			i = OPND(m->g->strip[ss]);
5371573Srgrimes			assert(0 < i && i <= m->g->nsub);
5381573Srgrimes			m->pmatch[i].rm_eo = sp - m->offp;
5391573Srgrimes			break;
5401573Srgrimes		default:		/* uh oh */
5411573Srgrimes			assert(nope);
5421573Srgrimes			break;
5431573Srgrimes		}
5441573Srgrimes	}
5451573Srgrimes
5461573Srgrimes	assert(sp == stop);
5471573Srgrimes	return(sp);
5481573Srgrimes}
5491573Srgrimes
5501573Srgrimes/*
5511573Srgrimes - backref - figure out what matched what, figuring in back references
55292889Sobrien == static char *backref(struct match *m, char *start, \
5531573Srgrimes ==	char *stop, sopno startst, sopno stopst, sopno lev);
5541573Srgrimes */
5551573Srgrimesstatic char *			/* == stop (success) or NULL (failure) */
5561573Srgrimesbackref(m, start, stop, startst, stopst, lev)
55792889Sobrienstruct match *m;
5581573Srgrimeschar *start;
5591573Srgrimeschar *stop;
5601573Srgrimessopno startst;
5611573Srgrimessopno stopst;
5621573Srgrimessopno lev;			/* PLUS nesting level */
5631573Srgrimes{
56492889Sobrien	int i;
56592889Sobrien	sopno ss;		/* start sop of current subRE */
56692889Sobrien	char *sp;		/* start of string matched by it */
56792889Sobrien	sopno ssub;		/* start sop of subsubRE */
56892889Sobrien	sopno esub;		/* end sop of subsubRE */
56992889Sobrien	char *ssp;		/* start of string matched by subsubRE */
57092889Sobrien	char *dp;
57192889Sobrien	size_t len;
57292889Sobrien	int hard;
57392889Sobrien	sop s;
57492889Sobrien	regoff_t offsave;
57592889Sobrien	cset *cs;
576132019Stjr	wint_t wc;
5771573Srgrimes
5781573Srgrimes	AT("back", start, stop, startst, stopst);
5791573Srgrimes	sp = start;
5801573Srgrimes
5811573Srgrimes	/* get as far as we can with easy stuff */
5821573Srgrimes	hard = 0;
5831573Srgrimes	for (ss = startst; !hard && ss < stopst; ss++)
5841573Srgrimes		switch (OP(s = m->g->strip[ss])) {
5851573Srgrimes		case OCHAR:
586132019Stjr			if (sp == stop)
5871573Srgrimes				return(NULL);
588132019Stjr			sp += XMBRTOWC(&wc, sp, stop - sp, &m->mbs, BADCHAR);
589132019Stjr			if (wc != OPND(s))
590132019Stjr				return(NULL);
5911573Srgrimes			break;
5921573Srgrimes		case OANY:
5931573Srgrimes			if (sp == stop)
5941573Srgrimes				return(NULL);
595132019Stjr			sp += XMBRTOWC(&wc, sp, stop - sp, &m->mbs, BADCHAR);
596132019Stjr			if (wc == BADCHAR)
597132019Stjr				return (NULL);
5981573Srgrimes			break;
5991573Srgrimes		case OANYOF:
600132019Stjr			if (sp == stop)
601132019Stjr				return (NULL);
6021573Srgrimes			cs = &m->g->sets[OPND(s)];
603132019Stjr			sp += XMBRTOWC(&wc, sp, stop - sp, &m->mbs, BADCHAR);
604132019Stjr			if (wc == BADCHAR || !CHIN(cs, wc))
6051573Srgrimes				return(NULL);
6061573Srgrimes			break;
6071573Srgrimes		case OBOL:
6081573Srgrimes			if ( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
6091573Srgrimes					(sp < m->endp && *(sp-1) == '\n' &&
6101573Srgrimes						(m->g->cflags&REG_NEWLINE)) )
6111573Srgrimes				{ /* yes */ }
6121573Srgrimes			else
6131573Srgrimes				return(NULL);
6141573Srgrimes			break;
6151573Srgrimes		case OEOL:
6161573Srgrimes			if ( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
6171573Srgrimes					(sp < m->endp && *sp == '\n' &&
6181573Srgrimes						(m->g->cflags&REG_NEWLINE)) )
6191573Srgrimes				{ /* yes */ }
6201573Srgrimes			else
6211573Srgrimes				return(NULL);
6221573Srgrimes			break;
6231573Srgrimes		case OBOW:
6241573Srgrimes			if (( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
6251573Srgrimes					(sp < m->endp && *(sp-1) == '\n' &&
6261573Srgrimes						(m->g->cflags&REG_NEWLINE)) ||
6271573Srgrimes					(sp > m->beginp &&
6281573Srgrimes							!ISWORD(*(sp-1))) ) &&
6291573Srgrimes					(sp < m->endp && ISWORD(*sp)) )
6301573Srgrimes				{ /* yes */ }
6311573Srgrimes			else
6321573Srgrimes				return(NULL);
6331573Srgrimes			break;
6341573Srgrimes		case OEOW:
6351573Srgrimes			if (( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
6361573Srgrimes					(sp < m->endp && *sp == '\n' &&
6371573Srgrimes						(m->g->cflags&REG_NEWLINE)) ||
6381573Srgrimes					(sp < m->endp && !ISWORD(*sp)) ) &&
6391573Srgrimes					(sp > m->beginp && ISWORD(*(sp-1))) )
6401573Srgrimes				{ /* yes */ }
6411573Srgrimes			else
6421573Srgrimes				return(NULL);
6431573Srgrimes			break;
6441573Srgrimes		case O_QUEST:
6451573Srgrimes			break;
6461573Srgrimes		case OOR1:	/* matches null but needs to skip */
6471573Srgrimes			ss++;
6481573Srgrimes			s = m->g->strip[ss];
6491573Srgrimes			do {
6501573Srgrimes				assert(OP(s) == OOR2);
6511573Srgrimes				ss += OPND(s);
6521573Srgrimes			} while (OP(s = m->g->strip[ss]) != O_CH);
6531573Srgrimes			/* note that the ss++ gets us past the O_CH */
6541573Srgrimes			break;
6551573Srgrimes		default:	/* have to make a choice */
6561573Srgrimes			hard = 1;
6571573Srgrimes			break;
6581573Srgrimes		}
6591573Srgrimes	if (!hard) {		/* that was it! */
6601573Srgrimes		if (sp != stop)
6611573Srgrimes			return(NULL);
6621573Srgrimes		return(sp);
6631573Srgrimes	}
6641573Srgrimes	ss--;			/* adjust for the for's final increment */
6651573Srgrimes
6661573Srgrimes	/* the hard stuff */
6671573Srgrimes	AT("hard", sp, stop, ss, stopst);
6681573Srgrimes	s = m->g->strip[ss];
6691573Srgrimes	switch (OP(s)) {
6701573Srgrimes	case OBACK_:		/* the vilest depths */
6711573Srgrimes		i = OPND(s);
6721573Srgrimes		assert(0 < i && i <= m->g->nsub);
6731573Srgrimes		if (m->pmatch[i].rm_eo == -1)
6741573Srgrimes			return(NULL);
6751573Srgrimes		assert(m->pmatch[i].rm_so != -1);
6761573Srgrimes		len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so;
677167216Sdelphij		if (len == 0)
678167216Sdelphij			return(NULL);
6791573Srgrimes		assert(stop - m->beginp >= len);
6801573Srgrimes		if (sp > stop - len)
6811573Srgrimes			return(NULL);	/* not enough left to match */
6821573Srgrimes		ssp = m->offp + m->pmatch[i].rm_so;
6831573Srgrimes		if (memcmp(sp, ssp, len) != 0)
6841573Srgrimes			return(NULL);
6851573Srgrimes		while (m->g->strip[ss] != SOP(O_BACK, i))
6861573Srgrimes			ss++;
6871573Srgrimes		return(backref(m, sp+len, stop, ss+1, stopst, lev));
6881573Srgrimes		break;
6891573Srgrimes	case OQUEST_:		/* to null or not */
6901573Srgrimes		dp = backref(m, sp, stop, ss+1, stopst, lev);
6911573Srgrimes		if (dp != NULL)
6921573Srgrimes			return(dp);	/* not */
6931573Srgrimes		return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev));
6941573Srgrimes		break;
6951573Srgrimes	case OPLUS_:
6961573Srgrimes		assert(m->lastpos != NULL);
6971573Srgrimes		assert(lev+1 <= m->g->nplus);
6981573Srgrimes		m->lastpos[lev+1] = sp;
6991573Srgrimes		return(backref(m, sp, stop, ss+1, stopst, lev+1));
7001573Srgrimes		break;
7011573Srgrimes	case O_PLUS:
7021573Srgrimes		if (sp == m->lastpos[lev])	/* last pass matched null */
7031573Srgrimes			return(backref(m, sp, stop, ss+1, stopst, lev-1));
7041573Srgrimes		/* try another pass */
7051573Srgrimes		m->lastpos[lev] = sp;
7061573Srgrimes		dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev);
7071573Srgrimes		if (dp == NULL)
7081573Srgrimes			return(backref(m, sp, stop, ss+1, stopst, lev-1));
7091573Srgrimes		else
7101573Srgrimes			return(dp);
7111573Srgrimes		break;
7121573Srgrimes	case OCH_:		/* find the right one, if any */
7131573Srgrimes		ssub = ss + 1;
7141573Srgrimes		esub = ss + OPND(s) - 1;
7151573Srgrimes		assert(OP(m->g->strip[esub]) == OOR1);
7161573Srgrimes		for (;;) {	/* find first matching branch */
7171573Srgrimes			dp = backref(m, sp, stop, ssub, esub, lev);
7181573Srgrimes			if (dp != NULL)
7191573Srgrimes				return(dp);
7201573Srgrimes			/* that one missed, try next one */
7211573Srgrimes			if (OP(m->g->strip[esub]) == O_CH)
7221573Srgrimes				return(NULL);	/* there is none */
7231573Srgrimes			esub++;
7241573Srgrimes			assert(OP(m->g->strip[esub]) == OOR2);
7251573Srgrimes			ssub = esub + 1;
7261573Srgrimes			esub += OPND(m->g->strip[esub]);
7271573Srgrimes			if (OP(m->g->strip[esub]) == OOR2)
7281573Srgrimes				esub--;
7291573Srgrimes			else
7301573Srgrimes				assert(OP(m->g->strip[esub]) == O_CH);
7311573Srgrimes		}
7321573Srgrimes		break;
7331573Srgrimes	case OLPAREN:		/* must undo assignment if rest fails */
7341573Srgrimes		i = OPND(s);
7351573Srgrimes		assert(0 < i && i <= m->g->nsub);
7361573Srgrimes		offsave = m->pmatch[i].rm_so;
7371573Srgrimes		m->pmatch[i].rm_so = sp - m->offp;
7381573Srgrimes		dp = backref(m, sp, stop, ss+1, stopst, lev);
7391573Srgrimes		if (dp != NULL)
7401573Srgrimes			return(dp);
7411573Srgrimes		m->pmatch[i].rm_so = offsave;
7421573Srgrimes		return(NULL);
7431573Srgrimes		break;
7441573Srgrimes	case ORPAREN:		/* must undo assignment if rest fails */
7451573Srgrimes		i = OPND(s);
7461573Srgrimes		assert(0 < i && i <= m->g->nsub);
7471573Srgrimes		offsave = m->pmatch[i].rm_eo;
7481573Srgrimes		m->pmatch[i].rm_eo = sp - m->offp;
7491573Srgrimes		dp = backref(m, sp, stop, ss+1, stopst, lev);
7501573Srgrimes		if (dp != NULL)
7511573Srgrimes			return(dp);
7521573Srgrimes		m->pmatch[i].rm_eo = offsave;
7531573Srgrimes		return(NULL);
7541573Srgrimes		break;
7551573Srgrimes	default:		/* uh oh */
7561573Srgrimes		assert(nope);
7571573Srgrimes		break;
7581573Srgrimes	}
7591573Srgrimes
7601573Srgrimes	/* "can't happen" */
7611573Srgrimes	assert(nope);
7621573Srgrimes	/* NOTREACHED */
76311664Sphk	return "shut up gcc";
7641573Srgrimes}
7651573Srgrimes
7661573Srgrimes/*
7671573Srgrimes - fast - step through the string at top speed
76892889Sobrien == static char *fast(struct match *m, char *start, \
7691573Srgrimes ==	char *stop, sopno startst, sopno stopst);
7701573Srgrimes */
7711573Srgrimesstatic char *			/* where tentative match ended, or NULL */
7721573Srgrimesfast(m, start, stop, startst, stopst)
77392889Sobrienstruct match *m;
7741573Srgrimeschar *start;
7751573Srgrimeschar *stop;
7761573Srgrimessopno startst;
7771573Srgrimessopno stopst;
7781573Srgrimes{
77992889Sobrien	states st = m->st;
78092889Sobrien	states fresh = m->fresh;
78192889Sobrien	states tmp = m->tmp;
78292889Sobrien	char *p = start;
783132019Stjr	wint_t c;
784132019Stjr	wint_t lastc;		/* previous c */
785132019Stjr	wint_t flagch;
78692889Sobrien	int i;
78792889Sobrien	char *coldp;		/* last p after which no match was underway */
788132019Stjr	size_t clen;
7891573Srgrimes
7901573Srgrimes	CLEAR(st);
7911573Srgrimes	SET1(st, startst);
7921573Srgrimes	st = step(m->g, startst, stopst, st, NOTHING, st);
7931573Srgrimes	ASSIGN(fresh, st);
7941573Srgrimes	SP("start", st, *p);
7951573Srgrimes	coldp = NULL;
796132019Stjr	if (start == m->beginp)
797132019Stjr		c = OUT;
798132019Stjr	else {
799132019Stjr		/*
800132019Stjr		 * XXX Wrong if the previous character was multi-byte.
801132019Stjr		 * Newline never is (in encodings supported by FreeBSD),
802132019Stjr		 * so this only breaks the ISWORD tests below.
803132019Stjr		 */
804132019Stjr		c = (uch)*(start - 1);
805132019Stjr	}
8061573Srgrimes	for (;;) {
8071573Srgrimes		/* next character */
8081573Srgrimes		lastc = c;
809149180Stjr		if (p == m->endp) {
810149180Stjr			clen = 0;
811132019Stjr			c = OUT;
812149180Stjr		} else
813149180Stjr			clen = XMBRTOWC(&c, p, m->endp - p, &m->mbs, BADCHAR);
8141573Srgrimes		if (EQ(st, fresh))
8151573Srgrimes			coldp = p;
8161573Srgrimes
8171573Srgrimes		/* is there an EOL and/or BOL between lastc and c? */
8181573Srgrimes		flagch = '\0';
8191573Srgrimes		i = 0;
8201573Srgrimes		if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
8211573Srgrimes				(lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
8221573Srgrimes			flagch = BOL;
8231573Srgrimes			i = m->g->nbol;
8241573Srgrimes		}
8251573Srgrimes		if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
8261573Srgrimes				(c == OUT && !(m->eflags&REG_NOTEOL)) ) {
8271573Srgrimes			flagch = (flagch == BOL) ? BOLEOL : EOL;
8281573Srgrimes			i += m->g->neol;
8291573Srgrimes		}
8301573Srgrimes		if (i != 0) {
8311573Srgrimes			for (; i > 0; i--)
8321573Srgrimes				st = step(m->g, startst, stopst, st, flagch, st);
8331573Srgrimes			SP("boleol", st, c);
8341573Srgrimes		}
8351573Srgrimes
8361573Srgrimes		/* how about a word boundary? */
8371573Srgrimes		if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
8381573Srgrimes					(c != OUT && ISWORD(c)) ) {
8391573Srgrimes			flagch = BOW;
8401573Srgrimes		}
8411573Srgrimes		if ( (lastc != OUT && ISWORD(lastc)) &&
8421573Srgrimes				(flagch == EOL || (c != OUT && !ISWORD(c))) ) {
8431573Srgrimes			flagch = EOW;
8441573Srgrimes		}
8451573Srgrimes		if (flagch == BOW || flagch == EOW) {
8461573Srgrimes			st = step(m->g, startst, stopst, st, flagch, st);
8471573Srgrimes			SP("boweow", st, c);
8481573Srgrimes		}
8491573Srgrimes
8501573Srgrimes		/* are we done? */
851149180Stjr		if (ISSET(st, stopst) || p == stop || clen > stop - p)
8521573Srgrimes			break;		/* NOTE BREAK OUT */
8531573Srgrimes
8541573Srgrimes		/* no, we must deal with this character */
8551573Srgrimes		ASSIGN(tmp, st);
8561573Srgrimes		ASSIGN(st, fresh);
8571573Srgrimes		assert(c != OUT);
8581573Srgrimes		st = step(m->g, startst, stopst, tmp, c, st);
8591573Srgrimes		SP("aft", st, c);
8601573Srgrimes		assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
861132019Stjr		p += clen;
8621573Srgrimes	}
8631573Srgrimes
8641573Srgrimes	assert(coldp != NULL);
8651573Srgrimes	m->coldp = coldp;
8661573Srgrimes	if (ISSET(st, stopst))
867137959Stjr		return(p+XMBRTOWC(NULL, p, stop - p, &m->mbs, 0));
8681573Srgrimes	else
8691573Srgrimes		return(NULL);
8701573Srgrimes}
8711573Srgrimes
8721573Srgrimes/*
8731573Srgrimes - slow - step through the string more deliberately
87492889Sobrien == static char *slow(struct match *m, char *start, \
8751573Srgrimes ==	char *stop, sopno startst, sopno stopst);
8761573Srgrimes */
8771573Srgrimesstatic char *			/* where it ended */
8781573Srgrimesslow(m, start, stop, startst, stopst)
87992889Sobrienstruct match *m;
8801573Srgrimeschar *start;
8811573Srgrimeschar *stop;
8821573Srgrimessopno startst;
8831573Srgrimessopno stopst;
8841573Srgrimes{
88592889Sobrien	states st = m->st;
88692889Sobrien	states empty = m->empty;
88792889Sobrien	states tmp = m->tmp;
88892889Sobrien	char *p = start;
889132019Stjr	wint_t c;
890132019Stjr	wint_t lastc;		/* previous c */
891132019Stjr	wint_t flagch;
89292889Sobrien	int i;
89392889Sobrien	char *matchp;		/* last p at which a match ended */
894132019Stjr	size_t clen;
8951573Srgrimes
8961573Srgrimes	AT("slow", start, stop, startst, stopst);
8971573Srgrimes	CLEAR(st);
8981573Srgrimes	SET1(st, startst);
8991573Srgrimes	SP("sstart", st, *p);
9001573Srgrimes	st = step(m->g, startst, stopst, st, NOTHING, st);
9011573Srgrimes	matchp = NULL;
902132019Stjr	if (start == m->beginp)
903132019Stjr		c = OUT;
904132019Stjr	else {
905132019Stjr		/*
906132019Stjr		 * XXX Wrong if the previous character was multi-byte.
907132019Stjr		 * Newline never is (in encodings supported by FreeBSD),
908132019Stjr		 * so this only breaks the ISWORD tests below.
909132019Stjr		 */
910132019Stjr		c = (uch)*(start - 1);
911132019Stjr	}
9121573Srgrimes	for (;;) {
9131573Srgrimes		/* next character */
9141573Srgrimes		lastc = c;
915132019Stjr		if (p == m->endp) {
916132019Stjr			c = OUT;
917132019Stjr			clen = 0;
918132019Stjr		} else
919149180Stjr			clen = XMBRTOWC(&c, p, m->endp - p, &m->mbs, BADCHAR);
9201573Srgrimes
9211573Srgrimes		/* is there an EOL and/or BOL between lastc and c? */
9221573Srgrimes		flagch = '\0';
9231573Srgrimes		i = 0;
9241573Srgrimes		if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
9251573Srgrimes				(lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
9261573Srgrimes			flagch = BOL;
9271573Srgrimes			i = m->g->nbol;
9281573Srgrimes		}
9291573Srgrimes		if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
9301573Srgrimes				(c == OUT && !(m->eflags&REG_NOTEOL)) ) {
9311573Srgrimes			flagch = (flagch == BOL) ? BOLEOL : EOL;
9321573Srgrimes			i += m->g->neol;
9331573Srgrimes		}
9341573Srgrimes		if (i != 0) {
9351573Srgrimes			for (; i > 0; i--)
9361573Srgrimes				st = step(m->g, startst, stopst, st, flagch, st);
9371573Srgrimes			SP("sboleol", st, c);
9381573Srgrimes		}
9391573Srgrimes
9401573Srgrimes		/* how about a word boundary? */
9411573Srgrimes		if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
9421573Srgrimes					(c != OUT && ISWORD(c)) ) {
9431573Srgrimes			flagch = BOW;
9441573Srgrimes		}
9451573Srgrimes		if ( (lastc != OUT && ISWORD(lastc)) &&
9461573Srgrimes				(flagch == EOL || (c != OUT && !ISWORD(c))) ) {
9471573Srgrimes			flagch = EOW;
9481573Srgrimes		}
9491573Srgrimes		if (flagch == BOW || flagch == EOW) {
9501573Srgrimes			st = step(m->g, startst, stopst, st, flagch, st);
9511573Srgrimes			SP("sboweow", st, c);
9521573Srgrimes		}
9531573Srgrimes
9541573Srgrimes		/* are we done? */
9551573Srgrimes		if (ISSET(st, stopst))
9561573Srgrimes			matchp = p;
957149180Stjr		if (EQ(st, empty) || p == stop || clen > stop - p)
9581573Srgrimes			break;		/* NOTE BREAK OUT */
9591573Srgrimes
9601573Srgrimes		/* no, we must deal with this character */
9611573Srgrimes		ASSIGN(tmp, st);
9621573Srgrimes		ASSIGN(st, empty);
9631573Srgrimes		assert(c != OUT);
9641573Srgrimes		st = step(m->g, startst, stopst, tmp, c, st);
9651573Srgrimes		SP("saft", st, c);
9661573Srgrimes		assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
967132019Stjr		p += clen;
9681573Srgrimes	}
9691573Srgrimes
9701573Srgrimes	return(matchp);
9711573Srgrimes}
9721573Srgrimes
9731573Srgrimes
9741573Srgrimes/*
9751573Srgrimes - step - map set of states reachable before char to set reachable after
97692889Sobrien == static states step(struct re_guts *g, sopno start, sopno stop, \
97792889Sobrien ==	states bef, int ch, states aft);
978132019Stjr == #define	BOL	(OUT-1)
979132019Stjr == #define	EOL	(BOL-1)
980132019Stjr == #define	BOLEOL	(BOL-2)
981132019Stjr == #define	NOTHING	(BOL-3)
982132019Stjr == #define	BOW	(BOL-4)
983132019Stjr == #define	EOW	(BOL-5)
984132019Stjr == #define	BADCHAR	(BOL-6)
985132019Stjr == #define	NONCHAR(c)	((c) <= OUT)
9861573Srgrimes */
9871573Srgrimesstatic states
9881573Srgrimesstep(g, start, stop, bef, ch, aft)
98992889Sobrienstruct re_guts *g;
9901573Srgrimessopno start;			/* start state within strip */
9911573Srgrimessopno stop;			/* state after stop state within strip */
99292889Sobrienstates bef;			/* states reachable before */
993132019Stjrwint_t ch;			/* character or NONCHAR code */
99492889Sobrienstates aft;			/* states already known reachable after */
9951573Srgrimes{
99692889Sobrien	cset *cs;
99792889Sobrien	sop s;
99892889Sobrien	sopno pc;
99992889Sobrien	onestate here;		/* note, macros know this name */
100092889Sobrien	sopno look;
100192889Sobrien	int i;
10021573Srgrimes
10031573Srgrimes	for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) {
10041573Srgrimes		s = g->strip[pc];
10051573Srgrimes		switch (OP(s)) {
10061573Srgrimes		case OEND:
10071573Srgrimes			assert(pc == stop-1);
10081573Srgrimes			break;
10091573Srgrimes		case OCHAR:
10101573Srgrimes			/* only characters can match */
1011132019Stjr			assert(!NONCHAR(ch) || ch != OPND(s));
1012132019Stjr			if (ch == OPND(s))
10131573Srgrimes				FWD(aft, bef, 1);
10141573Srgrimes			break;
10151573Srgrimes		case OBOL:
10161573Srgrimes			if (ch == BOL || ch == BOLEOL)
10171573Srgrimes				FWD(aft, bef, 1);
10181573Srgrimes			break;
10191573Srgrimes		case OEOL:
10201573Srgrimes			if (ch == EOL || ch == BOLEOL)
10211573Srgrimes				FWD(aft, bef, 1);
10221573Srgrimes			break;
10231573Srgrimes		case OBOW:
10241573Srgrimes			if (ch == BOW)
10251573Srgrimes				FWD(aft, bef, 1);
10261573Srgrimes			break;
10271573Srgrimes		case OEOW:
10281573Srgrimes			if (ch == EOW)
10291573Srgrimes				FWD(aft, bef, 1);
10301573Srgrimes			break;
10311573Srgrimes		case OANY:
10321573Srgrimes			if (!NONCHAR(ch))
10331573Srgrimes				FWD(aft, bef, 1);
10341573Srgrimes			break;
10351573Srgrimes		case OANYOF:
10361573Srgrimes			cs = &g->sets[OPND(s)];
10371573Srgrimes			if (!NONCHAR(ch) && CHIN(cs, ch))
10381573Srgrimes				FWD(aft, bef, 1);
10391573Srgrimes			break;
10401573Srgrimes		case OBACK_:		/* ignored here */
10411573Srgrimes		case O_BACK:
10421573Srgrimes			FWD(aft, aft, 1);
10431573Srgrimes			break;
10441573Srgrimes		case OPLUS_:		/* forward, this is just an empty */
10451573Srgrimes			FWD(aft, aft, 1);
10461573Srgrimes			break;
10471573Srgrimes		case O_PLUS:		/* both forward and back */
10481573Srgrimes			FWD(aft, aft, 1);
10491573Srgrimes			i = ISSETBACK(aft, OPND(s));
10501573Srgrimes			BACK(aft, aft, OPND(s));
10511573Srgrimes			if (!i && ISSETBACK(aft, OPND(s))) {
10521573Srgrimes				/* oho, must reconsider loop body */
10531573Srgrimes				pc -= OPND(s) + 1;
10541573Srgrimes				INIT(here, pc);
10551573Srgrimes			}
10561573Srgrimes			break;
10571573Srgrimes		case OQUEST_:		/* two branches, both forward */
10581573Srgrimes			FWD(aft, aft, 1);
10591573Srgrimes			FWD(aft, aft, OPND(s));
10601573Srgrimes			break;
10611573Srgrimes		case O_QUEST:		/* just an empty */
10621573Srgrimes			FWD(aft, aft, 1);
10631573Srgrimes			break;
10641573Srgrimes		case OLPAREN:		/* not significant here */
10651573Srgrimes		case ORPAREN:
10661573Srgrimes			FWD(aft, aft, 1);
10671573Srgrimes			break;
10681573Srgrimes		case OCH_:		/* mark the first two branches */
10691573Srgrimes			FWD(aft, aft, 1);
10701573Srgrimes			assert(OP(g->strip[pc+OPND(s)]) == OOR2);
10711573Srgrimes			FWD(aft, aft, OPND(s));
10721573Srgrimes			break;
10731573Srgrimes		case OOR1:		/* done a branch, find the O_CH */
10741573Srgrimes			if (ISSTATEIN(aft, here)) {
10751573Srgrimes				for (look = 1;
10761573Srgrimes						OP(s = g->strip[pc+look]) != O_CH;
10771573Srgrimes						look += OPND(s))
10781573Srgrimes					assert(OP(s) == OOR2);
10791573Srgrimes				FWD(aft, aft, look);
10801573Srgrimes			}
10811573Srgrimes			break;
10821573Srgrimes		case OOR2:		/* propagate OCH_'s marking */
10831573Srgrimes			FWD(aft, aft, 1);
10841573Srgrimes			if (OP(g->strip[pc+OPND(s)]) != O_CH) {
10851573Srgrimes				assert(OP(g->strip[pc+OPND(s)]) == OOR2);
10861573Srgrimes				FWD(aft, aft, OPND(s));
10871573Srgrimes			}
10881573Srgrimes			break;
10891573Srgrimes		case O_CH:		/* just empty */
10901573Srgrimes			FWD(aft, aft, 1);
10911573Srgrimes			break;
10921573Srgrimes		default:		/* ooooops... */
10931573Srgrimes			assert(nope);
10941573Srgrimes			break;
10951573Srgrimes		}
10961573Srgrimes	}
10971573Srgrimes
10981573Srgrimes	return(aft);
10991573Srgrimes}
11001573Srgrimes
11011573Srgrimes#ifdef REDEBUG
11021573Srgrimes/*
11031573Srgrimes - print - print a set of states
11041573Srgrimes == #ifdef REDEBUG
11051573Srgrimes == static void print(struct match *m, char *caption, states st, \
11061573Srgrimes ==	int ch, FILE *d);
11071573Srgrimes == #endif
11081573Srgrimes */
11091573Srgrimesstatic void
11101573Srgrimesprint(m, caption, st, ch, d)
11111573Srgrimesstruct match *m;
11121573Srgrimeschar *caption;
11131573Srgrimesstates st;
11141573Srgrimesint ch;
11151573SrgrimesFILE *d;
11161573Srgrimes{
111792889Sobrien	struct re_guts *g = m->g;
111892889Sobrien	int i;
111992889Sobrien	int first = 1;
11201573Srgrimes
11211573Srgrimes	if (!(m->eflags&REG_TRACE))
11221573Srgrimes		return;
11231573Srgrimes
11241573Srgrimes	fprintf(d, "%s", caption);
11251573Srgrimes	if (ch != '\0')
11261573Srgrimes		fprintf(d, " %s", pchar(ch));
11271573Srgrimes	for (i = 0; i < g->nstates; i++)
11281573Srgrimes		if (ISSET(st, i)) {
11291573Srgrimes			fprintf(d, "%s%d", (first) ? "\t" : ", ", i);
11301573Srgrimes			first = 0;
11311573Srgrimes		}
11321573Srgrimes	fprintf(d, "\n");
11331573Srgrimes}
11341573Srgrimes
11358870Srgrimes/*
11361573Srgrimes - at - print current situation
11371573Srgrimes == #ifdef REDEBUG
11381573Srgrimes == static void at(struct match *m, char *title, char *start, char *stop, \
11391573Srgrimes ==						sopno startst, sopno stopst);
11401573Srgrimes == #endif
11411573Srgrimes */
11421573Srgrimesstatic void
11431573Srgrimesat(m, title, start, stop, startst, stopst)
11441573Srgrimesstruct match *m;
11451573Srgrimeschar *title;
11461573Srgrimeschar *start;
11471573Srgrimeschar *stop;
11481573Srgrimessopno startst;
11491573Srgrimessopno stopst;
11501573Srgrimes{
11511573Srgrimes	if (!(m->eflags&REG_TRACE))
11521573Srgrimes		return;
11531573Srgrimes
11541573Srgrimes	printf("%s %s-", title, pchar(*start));
11551573Srgrimes	printf("%s ", pchar(*stop));
11561573Srgrimes	printf("%ld-%ld\n", (long)startst, (long)stopst);
11571573Srgrimes}
11581573Srgrimes
11591573Srgrimes#ifndef PCHARDONE
11601573Srgrimes#define	PCHARDONE	/* never again */
11611573Srgrimes/*
11621573Srgrimes - pchar - make a character printable
11631573Srgrimes == #ifdef REDEBUG
11641573Srgrimes == static char *pchar(int ch);
11651573Srgrimes == #endif
11661573Srgrimes *
11671573Srgrimes * Is this identical to regchar() over in debug.c?  Well, yes.  But a
11681573Srgrimes * duplicate here avoids having a debugging-capable regexec.o tied to
11691573Srgrimes * a matching debug.o, and this is convenient.  It all disappears in
11701573Srgrimes * the non-debug compilation anyway, so it doesn't matter much.
11711573Srgrimes */
11721573Srgrimesstatic char *			/* -> representation */
11731573Srgrimespchar(ch)
11741573Srgrimesint ch;
11751573Srgrimes{
11761573Srgrimes	static char pbuf[10];
11771573Srgrimes
117817508Sache	if (isprint((uch)ch) || ch == ' ')
11791573Srgrimes		sprintf(pbuf, "%c", ch);
11801573Srgrimes	else
11811573Srgrimes		sprintf(pbuf, "\\%o", ch);
11821573Srgrimes	return(pbuf);
11831573Srgrimes}
11841573Srgrimes#endif
11851573Srgrimes#endif
11861573Srgrimes
11871573Srgrimes#undef	matcher
11881573Srgrimes#undef	fast
11891573Srgrimes#undef	slow
11901573Srgrimes#undef	dissect
11911573Srgrimes#undef	backref
11921573Srgrimes#undef	step
11931573Srgrimes#undef	print
11941573Srgrimes#undef	at
11951573Srgrimes#undef	match
1196