engine.c revision 1.2
1/*-
2 * Copyright (c) 1992, 1993, 1994 Henry Spencer.
3 * Copyright (c) 1992, 1993, 1994
4 *	The Regents of the University of California.  All rights reserved.
5 *
6 * This code is derived from software contributed to Berkeley by
7 * Henry Spencer.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 *    must display the following acknowledgement:
19 *	This product includes software developed by the University of
20 *	California, Berkeley and its contributors.
21 * 4. Neither the name of the University nor the names of its contributors
22 *    may be used to endorse or promote products derived from this software
23 *    without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 */
37
38#if defined(SNAMES) && defined(LIBC_SCCS) && !defined(lint)
39static char enginercsid[] = "$OpenBSD: engine.c,v 1.2 1996/08/19 08:31:06 tholo Exp $";
40#endif /* SNAMES and LIBC_SCCS and not lint */
41
42/*
43 * The matching engine and friends.  This file is #included by regexec.c
44 * after suitable #defines of a variety of macros used herein, so that
45 * different state representations can be used without duplicating masses
46 * of code.
47 */
48
49#ifdef SNAMES
50#define	matcher	smatcher
51#define	fast	sfast
52#define	slow	sslow
53#define	dissect	sdissect
54#define	backref	sbackref
55#define	step	sstep
56#define	print	sprint
57#define	at	sat
58#define	match	smat
59#endif
60#ifdef LNAMES
61#define	matcher	lmatcher
62#define	fast	lfast
63#define	slow	lslow
64#define	dissect	ldissect
65#define	backref	lbackref
66#define	step	lstep
67#define	print	lprint
68#define	at	lat
69#define	match	lmat
70#endif
71
72/* another structure passed up and down to avoid zillions of parameters */
73struct match {
74	struct re_guts *g;
75	int eflags;
76	regmatch_t *pmatch;	/* [nsub+1] (0 element unused) */
77	char *offp;		/* offsets work from here */
78	char *beginp;		/* start of string -- virtual NUL precedes */
79	char *endp;		/* end of string -- virtual NUL here */
80	char *coldp;		/* can be no match starting before here */
81	char **lastpos;		/* [nplus+1] */
82	STATEVARS;
83	states st;		/* current states */
84	states fresh;		/* states for a fresh start */
85	states tmp;		/* temporary */
86	states empty;		/* empty set of states */
87};
88
89/* ========= begin header generated by ./mkh ========= */
90#ifdef __cplusplus
91extern "C" {
92#endif
93
94/* === engine.c === */
95static int matcher __P((struct re_guts *g, char *string, size_t nmatch, regmatch_t pmatch[], int eflags));
96static char *dissect __P((struct match *m, char *start, char *stop, sopno startst, sopno stopst));
97static char *backref __P((struct match *m, char *start, char *stop, sopno startst, sopno stopst, sopno lev));
98static char *fast __P((struct match *m, char *start, char *stop, sopno startst, sopno stopst));
99static char *slow __P((struct match *m, char *start, char *stop, sopno startst, sopno stopst));
100static states step __P((struct re_guts *g, sopno start, sopno stop, states bef, int ch, states aft));
101#define	BOL	(OUT+1)
102#define	EOL	(BOL+1)
103#define	BOLEOL	(BOL+2)
104#define	NOTHING	(BOL+3)
105#define	BOW	(BOL+4)
106#define	EOW	(BOL+5)
107#define	CODEMAX	(BOL+5)		/* highest code used */
108#define	NONCHAR(c)	((c) > CHAR_MAX)
109#define	NNONCHAR	(CODEMAX-CHAR_MAX)
110#ifdef REDEBUG
111static void print __P((struct match *m, char *caption, states st, int ch, FILE *d));
112#endif
113#ifdef REDEBUG
114static void at __P((struct match *m, char *title, char *start, char *stop, sopno startst, sopno stopst));
115#endif
116#ifdef REDEBUG
117static char *pchar __P((int ch));
118#endif
119
120#ifdef __cplusplus
121}
122#endif
123/* ========= end header generated by ./mkh ========= */
124
125#ifdef REDEBUG
126#define	SP(t, s, c)	print(m, t, s, c, stdout)
127#define	AT(t, p1, p2, s1, s2)	at(m, t, p1, p2, s1, s2)
128#define	NOTE(str)	{ if (m->eflags&REG_TRACE) printf("=%s\n", (str)); }
129#else
130#define	SP(t, s, c)	/* nothing */
131#define	AT(t, p1, p2, s1, s2)	/* nothing */
132#define	NOTE(s)	/* nothing */
133#endif
134
135/*
136 - matcher - the actual matching engine
137 == static int matcher(register struct re_guts *g, char *string, \
138 ==	size_t nmatch, regmatch_t pmatch[], int eflags);
139 */
140static int			/* 0 success, REG_NOMATCH failure */
141matcher(g, string, nmatch, pmatch, eflags)
142register struct re_guts *g;
143char *string;
144size_t nmatch;
145regmatch_t pmatch[];
146int eflags;
147{
148	register char *endp;
149	register int i;
150	struct match mv;
151	register struct match *m = &mv;
152	register char *dp;
153	const register sopno gf = g->firststate+1;	/* +1 for OEND */
154	const register sopno gl = g->laststate;
155	char *start;
156	char *stop;
157
158	/* simplify the situation where possible */
159	if (g->cflags&REG_NOSUB)
160		nmatch = 0;
161	if (eflags&REG_STARTEND) {
162		start = string + pmatch[0].rm_so;
163		stop = string + pmatch[0].rm_eo;
164	} else {
165		start = string;
166		stop = start + strlen(start);
167	}
168	if (stop < start)
169		return(REG_INVARG);
170
171	/* prescreening; this does wonders for this rather slow code */
172	if (g->must != NULL) {
173		for (dp = start; dp < stop; dp++)
174			if (*dp == g->must[0] && stop - dp >= g->mlen &&
175				memcmp(dp, g->must, (size_t)g->mlen) == 0)
176				break;
177		if (dp == stop)		/* we didn't find g->must */
178			return(REG_NOMATCH);
179	}
180
181	/* match struct setup */
182	m->g = g;
183	m->eflags = eflags;
184	m->pmatch = NULL;
185	m->lastpos = NULL;
186	m->offp = string;
187	m->beginp = start;
188	m->endp = stop;
189	STATESETUP(m, 4);
190	SETUP(m->st);
191	SETUP(m->fresh);
192	SETUP(m->tmp);
193	SETUP(m->empty);
194	CLEAR(m->empty);
195
196	/* this loop does only one repetition except for backrefs */
197	for (;;) {
198		endp = fast(m, start, stop, gf, gl);
199		if (endp == NULL) {		/* a miss */
200			STATETEARDOWN(m);
201			return(REG_NOMATCH);
202		}
203		if (nmatch == 0 && !g->backrefs)
204			break;		/* no further info needed */
205
206		/* where? */
207		assert(m->coldp != NULL);
208		for (;;) {
209			NOTE("finding start");
210			endp = slow(m, m->coldp, stop, gf, gl);
211			if (endp != NULL)
212				break;
213			assert(m->coldp < m->endp);
214			m->coldp++;
215		}
216		if (nmatch == 1 && !g->backrefs)
217			break;		/* no further info needed */
218
219		/* oh my, he wants the subexpressions... */
220		if (m->pmatch == NULL)
221			m->pmatch = (regmatch_t *)malloc((m->g->nsub + 1) *
222							sizeof(regmatch_t));
223		if (m->pmatch == NULL) {
224			STATETEARDOWN(m);
225			return(REG_ESPACE);
226		}
227		for (i = 1; i <= m->g->nsub; i++)
228			m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1;
229		if (!g->backrefs && !(m->eflags&REG_BACKR)) {
230			NOTE("dissecting");
231			dp = dissect(m, m->coldp, endp, gf, gl);
232		} else {
233			if (g->nplus > 0 && m->lastpos == NULL)
234				m->lastpos = (char **)malloc((g->nplus+1) *
235							sizeof(char *));
236			if (g->nplus > 0 && m->lastpos == NULL) {
237				free(m->pmatch);
238				STATETEARDOWN(m);
239				return(REG_ESPACE);
240			}
241			NOTE("backref dissect");
242			dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
243		}
244		if (dp != NULL)
245			break;
246
247		/* uh-oh... we couldn't find a subexpression-level match */
248		assert(g->backrefs);	/* must be back references doing it */
249		assert(g->nplus == 0 || m->lastpos != NULL);
250		for (;;) {
251			if (dp != NULL || endp <= m->coldp)
252				break;		/* defeat */
253			NOTE("backoff");
254			endp = slow(m, m->coldp, endp-1, gf, gl);
255			if (endp == NULL)
256				break;		/* defeat */
257			/* try it on a shorter possibility */
258#ifndef NDEBUG
259			for (i = 1; i <= m->g->nsub; i++) {
260				assert(m->pmatch[i].rm_so == -1);
261				assert(m->pmatch[i].rm_eo == -1);
262			}
263#endif
264			NOTE("backoff dissect");
265			dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
266		}
267		assert(dp == NULL || dp == endp);
268		if (dp != NULL)		/* found a shorter one */
269			break;
270
271		/* despite initial appearances, there is no match here */
272		NOTE("false alarm");
273		start = m->coldp + 1;	/* recycle starting later */
274		assert(start <= stop);
275	}
276
277	/* fill in the details if requested */
278	if (nmatch > 0) {
279		pmatch[0].rm_so = m->coldp - m->offp;
280		pmatch[0].rm_eo = endp - m->offp;
281	}
282	if (nmatch > 1) {
283		assert(m->pmatch != NULL);
284		for (i = 1; i < nmatch; i++)
285			if (i <= m->g->nsub)
286				pmatch[i] = m->pmatch[i];
287			else {
288				pmatch[i].rm_so = -1;
289				pmatch[i].rm_eo = -1;
290			}
291	}
292
293	if (m->pmatch != NULL)
294		free((char *)m->pmatch);
295	if (m->lastpos != NULL)
296		free((char *)m->lastpos);
297	STATETEARDOWN(m);
298	return(0);
299}
300
301/*
302 - dissect - figure out what matched what, no back references
303 == static char *dissect(register struct match *m, char *start, \
304 ==	char *stop, sopno startst, sopno stopst);
305 */
306static char *			/* == stop (success) always */
307dissect(m, start, stop, startst, stopst)
308register struct match *m;
309char *start;
310char *stop;
311sopno startst;
312sopno stopst;
313{
314	register int i;
315	register sopno ss;	/* start sop of current subRE */
316	register sopno es;	/* end sop of current subRE */
317	register char *sp;	/* start of string matched by it */
318	register char *stp;	/* string matched by it cannot pass here */
319	register char *rest;	/* start of rest of string */
320	register char *tail;	/* string unmatched by rest of RE */
321	register sopno ssub;	/* start sop of subsubRE */
322	register sopno esub;	/* end sop of subsubRE */
323	register char *ssp;	/* start of string matched by subsubRE */
324	register char *sep;	/* end of string matched by subsubRE */
325	register char *oldssp;	/* previous ssp */
326	register char *dp;
327
328	AT("diss", start, stop, startst, stopst);
329	sp = start;
330	for (ss = startst; ss < stopst; ss = es) {
331		/* identify end of subRE */
332		es = ss;
333		switch (OP(m->g->strip[es])) {
334		case OPLUS_:
335		case OQUEST_:
336			es += OPND(m->g->strip[es]);
337			break;
338		case OCH_:
339			while (OP(m->g->strip[es]) != O_CH)
340				es += OPND(m->g->strip[es]);
341			break;
342		}
343		es++;
344
345		/* figure out what it matched */
346		switch (OP(m->g->strip[ss])) {
347		case OEND:
348			assert(nope);
349			break;
350		case OCHAR:
351			sp++;
352			break;
353		case OBOL:
354		case OEOL:
355		case OBOW:
356		case OEOW:
357			break;
358		case OANY:
359		case OANYOF:
360			sp++;
361			break;
362		case OBACK_:
363		case O_BACK:
364			assert(nope);
365			break;
366		/* cases where length of match is hard to find */
367		case OQUEST_:
368			stp = stop;
369			for (;;) {
370				/* how long could this one be? */
371				rest = slow(m, sp, stp, ss, es);
372				assert(rest != NULL);	/* it did match */
373				/* could the rest match the rest? */
374				tail = slow(m, rest, stop, es, stopst);
375				if (tail == stop)
376					break;		/* yes! */
377				/* no -- try a shorter match for this one */
378				stp = rest - 1;
379				assert(stp >= sp);	/* it did work */
380			}
381			ssub = ss + 1;
382			esub = es - 1;
383			/* did innards match? */
384			if (slow(m, sp, rest, ssub, esub) != NULL) {
385				dp = dissect(m, sp, rest, ssub, esub);
386				assert(dp == rest);
387			} else		/* no */
388				assert(sp == rest);
389			sp = rest;
390			break;
391		case OPLUS_:
392			stp = stop;
393			for (;;) {
394				/* how long could this one be? */
395				rest = slow(m, sp, stp, ss, es);
396				assert(rest != NULL);	/* it did match */
397				/* could the rest match the rest? */
398				tail = slow(m, rest, stop, es, stopst);
399				if (tail == stop)
400					break;		/* yes! */
401				/* no -- try a shorter match for this one */
402				stp = rest - 1;
403				assert(stp >= sp);	/* it did work */
404			}
405			ssub = ss + 1;
406			esub = es - 1;
407			ssp = sp;
408			oldssp = ssp;
409			for (;;) {	/* find last match of innards */
410				sep = slow(m, ssp, rest, ssub, esub);
411				if (sep == NULL || sep == ssp)
412					break;	/* failed or matched null */
413				oldssp = ssp;	/* on to next try */
414				ssp = sep;
415			}
416			if (sep == NULL) {
417				/* last successful match */
418				sep = ssp;
419				ssp = oldssp;
420			}
421			assert(sep == rest);	/* must exhaust substring */
422			assert(slow(m, ssp, sep, ssub, esub) == rest);
423			dp = dissect(m, ssp, sep, ssub, esub);
424			assert(dp == sep);
425			sp = rest;
426			break;
427		case OCH_:
428			stp = stop;
429			for (;;) {
430				/* how long could this one be? */
431				rest = slow(m, sp, stp, ss, es);
432				assert(rest != NULL);	/* it did match */
433				/* could the rest match the rest? */
434				tail = slow(m, rest, stop, es, stopst);
435				if (tail == stop)
436					break;		/* yes! */
437				/* no -- try a shorter match for this one */
438				stp = rest - 1;
439				assert(stp >= sp);	/* it did work */
440			}
441			ssub = ss + 1;
442			esub = ss + OPND(m->g->strip[ss]) - 1;
443			assert(OP(m->g->strip[esub]) == OOR1);
444			for (;;) {	/* find first matching branch */
445				if (slow(m, sp, rest, ssub, esub) == rest)
446					break;	/* it matched all of it */
447				/* that one missed, try next one */
448				assert(OP(m->g->strip[esub]) == OOR1);
449				esub++;
450				assert(OP(m->g->strip[esub]) == OOR2);
451				ssub = esub + 1;
452				esub += OPND(m->g->strip[esub]);
453				if (OP(m->g->strip[esub]) == OOR2)
454					esub--;
455				else
456					assert(OP(m->g->strip[esub]) == O_CH);
457			}
458			dp = dissect(m, sp, rest, ssub, esub);
459			assert(dp == rest);
460			sp = rest;
461			break;
462		case O_PLUS:
463		case O_QUEST:
464		case OOR1:
465		case OOR2:
466		case O_CH:
467			assert(nope);
468			break;
469		case OLPAREN:
470			i = OPND(m->g->strip[ss]);
471			assert(0 < i && i <= m->g->nsub);
472			m->pmatch[i].rm_so = sp - m->offp;
473			break;
474		case ORPAREN:
475			i = OPND(m->g->strip[ss]);
476			assert(0 < i && i <= m->g->nsub);
477			m->pmatch[i].rm_eo = sp - m->offp;
478			break;
479		default:		/* uh oh */
480			assert(nope);
481			break;
482		}
483	}
484
485	assert(sp == stop);
486	return(sp);
487}
488
489/*
490 - backref - figure out what matched what, figuring in back references
491 == static char *backref(register struct match *m, char *start, \
492 ==	char *stop, sopno startst, sopno stopst, sopno lev);
493 */
494static char *			/* == stop (success) or NULL (failure) */
495backref(m, start, stop, startst, stopst, lev)
496register struct match *m;
497char *start;
498char *stop;
499sopno startst;
500sopno stopst;
501sopno lev;			/* PLUS nesting level */
502{
503	register int i;
504	register sopno ss;	/* start sop of current subRE */
505	register char *sp;	/* start of string matched by it */
506	register sopno ssub;	/* start sop of subsubRE */
507	register sopno esub;	/* end sop of subsubRE */
508	register char *ssp;	/* start of string matched by subsubRE */
509	register char *dp;
510	register size_t len;
511	register int hard;
512	register sop s;
513	register regoff_t offsave;
514	register cset *cs;
515
516	AT("back", start, stop, startst, stopst);
517	sp = start;
518
519	/* get as far as we can with easy stuff */
520	hard = 0;
521	for (ss = startst; !hard && ss < stopst; ss++)
522		switch (OP(s = m->g->strip[ss])) {
523		case OCHAR:
524			if (sp == stop || *sp++ != (char)OPND(s))
525				return(NULL);
526			break;
527		case OANY:
528			if (sp == stop)
529				return(NULL);
530			sp++;
531			break;
532		case OANYOF:
533			cs = &m->g->sets[OPND(s)];
534			if (sp == stop || !CHIN(cs, *sp++))
535				return(NULL);
536			break;
537		case OBOL:
538			if ( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
539					(sp < m->endp && *(sp-1) == '\n' &&
540						(m->g->cflags&REG_NEWLINE)) )
541				{ /* yes */ }
542			else
543				return(NULL);
544			break;
545		case OEOL:
546			if ( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
547					(sp < m->endp && *sp == '\n' &&
548						(m->g->cflags&REG_NEWLINE)) )
549				{ /* yes */ }
550			else
551				return(NULL);
552			break;
553		case OBOW:
554			if (( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
555					(sp < m->endp && *(sp-1) == '\n' &&
556						(m->g->cflags&REG_NEWLINE)) ||
557					(sp > m->beginp &&
558							!ISWORD(*(sp-1))) ) &&
559					(sp < m->endp && ISWORD(*sp)) )
560				{ /* yes */ }
561			else
562				return(NULL);
563			break;
564		case OEOW:
565			if (( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
566					(sp < m->endp && *sp == '\n' &&
567						(m->g->cflags&REG_NEWLINE)) ||
568					(sp < m->endp && !ISWORD(*sp)) ) &&
569					(sp > m->beginp && ISWORD(*(sp-1))) )
570				{ /* yes */ }
571			else
572				return(NULL);
573			break;
574		case O_QUEST:
575			break;
576		case OOR1:	/* matches null but needs to skip */
577			ss++;
578			s = m->g->strip[ss];
579			do {
580				assert(OP(s) == OOR2);
581				ss += OPND(s);
582			} while (OP(s = m->g->strip[ss]) != O_CH);
583			/* note that the ss++ gets us past the O_CH */
584			break;
585		default:	/* have to make a choice */
586			hard = 1;
587			break;
588		}
589	if (!hard) {		/* that was it! */
590		if (sp != stop)
591			return(NULL);
592		return(sp);
593	}
594	ss--;			/* adjust for the for's final increment */
595
596	/* the hard stuff */
597	AT("hard", sp, stop, ss, stopst);
598	s = m->g->strip[ss];
599	switch (OP(s)) {
600	case OBACK_:		/* the vilest depths */
601		i = OPND(s);
602		assert(0 < i && i <= m->g->nsub);
603		if (m->pmatch[i].rm_eo == -1)
604			return(NULL);
605		assert(m->pmatch[i].rm_so != -1);
606		len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so;
607		assert(stop - m->beginp >= len);
608		if (sp > stop - len)
609			return(NULL);	/* not enough left to match */
610		ssp = m->offp + m->pmatch[i].rm_so;
611		if (memcmp(sp, ssp, len) != 0)
612			return(NULL);
613		while (m->g->strip[ss] != SOP(O_BACK, i))
614			ss++;
615		return(backref(m, sp+len, stop, ss+1, stopst, lev));
616		break;
617	case OQUEST_:		/* to null or not */
618		dp = backref(m, sp, stop, ss+1, stopst, lev);
619		if (dp != NULL)
620			return(dp);	/* not */
621		return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev));
622		break;
623	case OPLUS_:
624		assert(m->lastpos != NULL);
625		assert(lev+1 <= m->g->nplus);
626		m->lastpos[lev+1] = sp;
627		return(backref(m, sp, stop, ss+1, stopst, lev+1));
628		break;
629	case O_PLUS:
630		if (sp == m->lastpos[lev])	/* last pass matched null */
631			return(backref(m, sp, stop, ss+1, stopst, lev-1));
632		/* try another pass */
633		m->lastpos[lev] = sp;
634		dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev);
635		if (dp == NULL)
636			return(backref(m, sp, stop, ss+1, stopst, lev-1));
637		else
638			return(dp);
639		break;
640	case OCH_:		/* find the right one, if any */
641		ssub = ss + 1;
642		esub = ss + OPND(s) - 1;
643		assert(OP(m->g->strip[esub]) == OOR1);
644		for (;;) {	/* find first matching branch */
645			dp = backref(m, sp, stop, ssub, esub, lev);
646			if (dp != NULL)
647				return(dp);
648			/* that one missed, try next one */
649			if (OP(m->g->strip[esub]) == O_CH)
650				return(NULL);	/* there is none */
651			esub++;
652			assert(OP(m->g->strip[esub]) == OOR2);
653			ssub = esub + 1;
654			esub += OPND(m->g->strip[esub]);
655			if (OP(m->g->strip[esub]) == OOR2)
656				esub--;
657			else
658				assert(OP(m->g->strip[esub]) == O_CH);
659		}
660		break;
661	case OLPAREN:		/* must undo assignment if rest fails */
662		i = OPND(s);
663		assert(0 < i && i <= m->g->nsub);
664		offsave = m->pmatch[i].rm_so;
665		m->pmatch[i].rm_so = sp - m->offp;
666		dp = backref(m, sp, stop, ss+1, stopst, lev);
667		if (dp != NULL)
668			return(dp);
669		m->pmatch[i].rm_so = offsave;
670		return(NULL);
671		break;
672	case ORPAREN:		/* must undo assignment if rest fails */
673		i = OPND(s);
674		assert(0 < i && i <= m->g->nsub);
675		offsave = m->pmatch[i].rm_eo;
676		m->pmatch[i].rm_eo = sp - m->offp;
677		dp = backref(m, sp, stop, ss+1, stopst, lev);
678		if (dp != NULL)
679			return(dp);
680		m->pmatch[i].rm_eo = offsave;
681		return(NULL);
682		break;
683	default:		/* uh oh */
684		assert(nope);
685		break;
686	}
687
688	/* "can't happen" */
689	assert(nope);
690	/* NOTREACHED */
691}
692
693/*
694 - fast - step through the string at top speed
695 == static char *fast(register struct match *m, char *start, \
696 ==	char *stop, sopno startst, sopno stopst);
697 */
698static char *			/* where tentative match ended, or NULL */
699fast(m, start, stop, startst, stopst)
700register struct match *m;
701char *start;
702char *stop;
703sopno startst;
704sopno stopst;
705{
706	register states st = m->st;
707	register states fresh = m->fresh;
708	register states tmp = m->tmp;
709	register char *p = start;
710	register int c = (start == m->beginp) ? OUT : *(start-1);
711	register int lastc;	/* previous c */
712	register int flagch;
713	register int i;
714	register char *coldp;	/* last p after which no match was underway */
715
716	CLEAR(st);
717	SET1(st, startst);
718	st = step(m->g, startst, stopst, st, NOTHING, st);
719	ASSIGN(fresh, st);
720	SP("start", st, *p);
721	coldp = NULL;
722	for (;;) {
723		/* next character */
724		lastc = c;
725		c = (p == m->endp) ? OUT : *p;
726		if (EQ(st, fresh))
727			coldp = p;
728
729		/* is there an EOL and/or BOL between lastc and c? */
730		flagch = '\0';
731		i = 0;
732		if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
733				(lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
734			flagch = BOL;
735			i = m->g->nbol;
736		}
737		if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
738				(c == OUT && !(m->eflags&REG_NOTEOL)) ) {
739			flagch = (flagch == BOL) ? BOLEOL : EOL;
740			i += m->g->neol;
741		}
742		if (i != 0) {
743			for (; i > 0; i--)
744				st = step(m->g, startst, stopst, st, flagch, st);
745			SP("boleol", st, c);
746		}
747
748		/* how about a word boundary? */
749		if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
750					(c != OUT && ISWORD(c)) ) {
751			flagch = BOW;
752		}
753		if ( (lastc != OUT && ISWORD(lastc)) &&
754				(flagch == EOL || (c != OUT && !ISWORD(c))) ) {
755			flagch = EOW;
756		}
757		if (flagch == BOW || flagch == EOW) {
758			st = step(m->g, startst, stopst, st, flagch, st);
759			SP("boweow", st, c);
760		}
761
762		/* are we done? */
763		if (ISSET(st, stopst) || p == stop)
764			break;		/* NOTE BREAK OUT */
765
766		/* no, we must deal with this character */
767		ASSIGN(tmp, st);
768		ASSIGN(st, fresh);
769		assert(c != OUT);
770		st = step(m->g, startst, stopst, tmp, c, st);
771		SP("aft", st, c);
772		assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
773		p++;
774	}
775
776	assert(coldp != NULL);
777	m->coldp = coldp;
778	if (ISSET(st, stopst))
779		return(p+1);
780	else
781		return(NULL);
782}
783
784/*
785 - slow - step through the string more deliberately
786 == static char *slow(register struct match *m, char *start, \
787 ==	char *stop, sopno startst, sopno stopst);
788 */
789static char *			/* where it ended */
790slow(m, start, stop, startst, stopst)
791register struct match *m;
792char *start;
793char *stop;
794sopno startst;
795sopno stopst;
796{
797	register states st = m->st;
798	register states empty = m->empty;
799	register states tmp = m->tmp;
800	register char *p = start;
801	register int c = (start == m->beginp) ? OUT : *(start-1);
802	register int lastc;	/* previous c */
803	register int flagch;
804	register int i;
805	register char *matchp;	/* last p at which a match ended */
806
807	AT("slow", start, stop, startst, stopst);
808	CLEAR(st);
809	SET1(st, startst);
810	SP("sstart", st, *p);
811	st = step(m->g, startst, stopst, st, NOTHING, st);
812	matchp = NULL;
813	for (;;) {
814		/* next character */
815		lastc = c;
816		c = (p == m->endp) ? OUT : *p;
817
818		/* is there an EOL and/or BOL between lastc and c? */
819		flagch = '\0';
820		i = 0;
821		if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
822				(lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
823			flagch = BOL;
824			i = m->g->nbol;
825		}
826		if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
827				(c == OUT && !(m->eflags&REG_NOTEOL)) ) {
828			flagch = (flagch == BOL) ? BOLEOL : EOL;
829			i += m->g->neol;
830		}
831		if (i != 0) {
832			for (; i > 0; i--)
833				st = step(m->g, startst, stopst, st, flagch, st);
834			SP("sboleol", st, c);
835		}
836
837		/* how about a word boundary? */
838		if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
839					(c != OUT && ISWORD(c)) ) {
840			flagch = BOW;
841		}
842		if ( (lastc != OUT && ISWORD(lastc)) &&
843				(flagch == EOL || (c != OUT && !ISWORD(c))) ) {
844			flagch = EOW;
845		}
846		if (flagch == BOW || flagch == EOW) {
847			st = step(m->g, startst, stopst, st, flagch, st);
848			SP("sboweow", st, c);
849		}
850
851		/* are we done? */
852		if (ISSET(st, stopst))
853			matchp = p;
854		if (EQ(st, empty) || p == stop)
855			break;		/* NOTE BREAK OUT */
856
857		/* no, we must deal with this character */
858		ASSIGN(tmp, st);
859		ASSIGN(st, empty);
860		assert(c != OUT);
861		st = step(m->g, startst, stopst, tmp, c, st);
862		SP("saft", st, c);
863		assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
864		p++;
865	}
866
867	return(matchp);
868}
869
870
871/*
872 - step - map set of states reachable before char to set reachable after
873 == static states step(register struct re_guts *g, sopno start, sopno stop, \
874 ==	register states bef, int ch, register states aft);
875 == #define	BOL	(OUT+1)
876 == #define	EOL	(BOL+1)
877 == #define	BOLEOL	(BOL+2)
878 == #define	NOTHING	(BOL+3)
879 == #define	BOW	(BOL+4)
880 == #define	EOW	(BOL+5)
881 == #define	CODEMAX	(BOL+5)		// highest code used
882 == #define	NONCHAR(c)	((c) > CHAR_MAX)
883 == #define	NNONCHAR	(CODEMAX-CHAR_MAX)
884 */
885static states
886step(g, start, stop, bef, ch, aft)
887register struct re_guts *g;
888sopno start;			/* start state within strip */
889sopno stop;			/* state after stop state within strip */
890register states bef;		/* states reachable before */
891int ch;				/* character or NONCHAR code */
892register states aft;		/* states already known reachable after */
893{
894	register cset *cs;
895	register sop s;
896	register sopno pc;
897	register onestate here;		/* note, macros know this name */
898	register sopno look;
899	register int i;
900
901	for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) {
902		s = g->strip[pc];
903		switch (OP(s)) {
904		case OEND:
905			assert(pc == stop-1);
906			break;
907		case OCHAR:
908			/* only characters can match */
909			assert(!NONCHAR(ch) || ch != (char)OPND(s));
910			if (ch == (char)OPND(s))
911				FWD(aft, bef, 1);
912			break;
913		case OBOL:
914			if (ch == BOL || ch == BOLEOL)
915				FWD(aft, bef, 1);
916			break;
917		case OEOL:
918			if (ch == EOL || ch == BOLEOL)
919				FWD(aft, bef, 1);
920			break;
921		case OBOW:
922			if (ch == BOW)
923				FWD(aft, bef, 1);
924			break;
925		case OEOW:
926			if (ch == EOW)
927				FWD(aft, bef, 1);
928			break;
929		case OANY:
930			if (!NONCHAR(ch))
931				FWD(aft, bef, 1);
932			break;
933		case OANYOF:
934			cs = &g->sets[OPND(s)];
935			if (!NONCHAR(ch) && CHIN(cs, ch))
936				FWD(aft, bef, 1);
937			break;
938		case OBACK_:		/* ignored here */
939		case O_BACK:
940			FWD(aft, aft, 1);
941			break;
942		case OPLUS_:		/* forward, this is just an empty */
943			FWD(aft, aft, 1);
944			break;
945		case O_PLUS:		/* both forward and back */
946			FWD(aft, aft, 1);
947			i = ISSETBACK(aft, OPND(s));
948			BACK(aft, aft, OPND(s));
949			if (!i && ISSETBACK(aft, OPND(s))) {
950				/* oho, must reconsider loop body */
951				pc -= OPND(s) + 1;
952				INIT(here, pc);
953			}
954			break;
955		case OQUEST_:		/* two branches, both forward */
956			FWD(aft, aft, 1);
957			FWD(aft, aft, OPND(s));
958			break;
959		case O_QUEST:		/* just an empty */
960			FWD(aft, aft, 1);
961			break;
962		case OLPAREN:		/* not significant here */
963		case ORPAREN:
964			FWD(aft, aft, 1);
965			break;
966		case OCH_:		/* mark the first two branches */
967			FWD(aft, aft, 1);
968			assert(OP(g->strip[pc+OPND(s)]) == OOR2);
969			FWD(aft, aft, OPND(s));
970			break;
971		case OOR1:		/* done a branch, find the O_CH */
972			if (ISSTATEIN(aft, here)) {
973				for (look = 1;
974						OP(s = g->strip[pc+look]) != O_CH;
975						look += OPND(s))
976					assert(OP(s) == OOR2);
977				FWD(aft, aft, look);
978			}
979			break;
980		case OOR2:		/* propagate OCH_'s marking */
981			FWD(aft, aft, 1);
982			if (OP(g->strip[pc+OPND(s)]) != O_CH) {
983				assert(OP(g->strip[pc+OPND(s)]) == OOR2);
984				FWD(aft, aft, OPND(s));
985			}
986			break;
987		case O_CH:		/* just empty */
988			FWD(aft, aft, 1);
989			break;
990		default:		/* ooooops... */
991			assert(nope);
992			break;
993		}
994	}
995
996	return(aft);
997}
998
999#ifdef REDEBUG
1000/*
1001 - print - print a set of states
1002 == #ifdef REDEBUG
1003 == static void print(struct match *m, char *caption, states st, \
1004 ==	int ch, FILE *d);
1005 == #endif
1006 */
1007static void
1008print(m, caption, st, ch, d)
1009struct match *m;
1010char *caption;
1011states st;
1012int ch;
1013FILE *d;
1014{
1015	register struct re_guts *g = m->g;
1016	register int i;
1017	register int first = 1;
1018
1019	if (!(m->eflags&REG_TRACE))
1020		return;
1021
1022	fprintf(d, "%s", caption);
1023	if (ch != '\0')
1024		fprintf(d, " %s", pchar(ch));
1025	for (i = 0; i < g->nstates; i++)
1026		if (ISSET(st, i)) {
1027			fprintf(d, "%s%d", (first) ? "\t" : ", ", i);
1028			first = 0;
1029		}
1030	fprintf(d, "\n");
1031}
1032
1033/*
1034 - at - print current situation
1035 == #ifdef REDEBUG
1036 == static void at(struct match *m, char *title, char *start, char *stop, \
1037 ==						sopno startst, sopno stopst);
1038 == #endif
1039 */
1040static void
1041at(m, title, start, stop, startst, stopst)
1042struct match *m;
1043char *title;
1044char *start;
1045char *stop;
1046sopno startst;
1047sopno stopst;
1048{
1049	if (!(m->eflags&REG_TRACE))
1050		return;
1051
1052	printf("%s %s-", title, pchar(*start));
1053	printf("%s ", pchar(*stop));
1054	printf("%ld-%ld\n", (long)startst, (long)stopst);
1055}
1056
1057#ifndef PCHARDONE
1058#define	PCHARDONE	/* never again */
1059/*
1060 - pchar - make a character printable
1061 == #ifdef REDEBUG
1062 == static char *pchar(int ch);
1063 == #endif
1064 *
1065 * Is this identical to regchar() over in debug.c?  Well, yes.  But a
1066 * duplicate here avoids having a debugging-capable regexec.o tied to
1067 * a matching debug.o, and this is convenient.  It all disappears in
1068 * the non-debug compilation anyway, so it doesn't matter much.
1069 */
1070static char *			/* -> representation */
1071pchar(ch)
1072int ch;
1073{
1074	static char pbuf[10];
1075
1076	if (isprint(ch) || ch == ' ')
1077		sprintf(pbuf, "%c", ch);
1078	else
1079		sprintf(pbuf, "\\%o", ch);
1080	return(pbuf);
1081}
1082#endif
1083#endif
1084
1085#undef	matcher
1086#undef	fast
1087#undef	slow
1088#undef	dissect
1089#undef	backref
1090#undef	step
1091#undef	print
1092#undef	at
1093#undef	match
1094