1/*
2 * The matching engine and friends.  This file is #included by regexec.c
3 * after suitable #defines of a variety of macros used herein, so that
4 * different state representations can be used without duplicating masses
5 * of code.
6 */
7
8#ifdef SNAMES
9#define	matcher	smatcher
10#define	fast	sfast
11#define	slow	sslow
12#define	dissect	sdissect
13#define	backref	sbackref
14#define	step	sstep
15#define	print	sprint
16#define	at	sat
17#define	match	smat
18#endif
19#ifdef LNAMES
20#define	matcher	lmatcher
21#define	fast	lfast
22#define	slow	lslow
23#define	dissect	ldissect
24#define	backref	lbackref
25#define	step	lstep
26#define	print	lprint
27#define	at	lat
28#define	match	lmat
29#endif
30
31/* another structure passed up and down to avoid zillions of parameters */
32struct match {
33	struct re_guts *g;
34	int eflags;
35	regmatch_t *pmatch;	/* [nsub+1] (0 element unused) */
36	char *offp;		/* offsets work from here */
37	char *beginp;		/* start of string -- virtual NUL precedes */
38	char *endp;		/* end of string -- virtual NUL here */
39	char *coldp;		/* can be no match starting before here */
40	char **lastpos;		/* [nplus+1] */
41	STATEVARS;
42	states st;		/* current states */
43	states fresh;		/* states for a fresh start */
44	states tmp;		/* temporary */
45	states empty;		/* empty set of states */
46};
47
48#include "engine.ih"
49
50#ifdef REDEBUG
51#define	SP(t, s, c)	print(m, t, s, c, stdout)
52#define	AT(t, p1, p2, s1, s2)	at(m, t, p1, p2, s1, s2)
53#define	NOTE(str)	{ if (m->eflags&REG_TRACE) printf("=%s\n", (str)); }
54#else
55#define	SP(t, s, c)	/* nothing */
56#define	AT(t, p1, p2, s1, s2)	/* nothing */
57#define	NOTE(s)	/* nothing */
58#endif
59
60/*
61 - matcher - the actual matching engine
62 == static int matcher(register struct re_guts *g, char *string, \
63 ==	size_t nmatch, regmatch_t pmatch[], int eflags);
64 */
65static int			/* 0 success, REG_NOMATCH failure */
66matcher(g, string, nmatch, pmatch, eflags)
67register struct re_guts *g;
68char *string;
69size_t nmatch;
70regmatch_t pmatch[];
71int eflags;
72{
73	register char *endp;
74	register size_t i;
75	struct match mv;
76	register struct match *m = &mv;
77	register char *dp;
78	const register sopno gf = g->firststate+1;	/* +1 for OEND */
79	const register sopno gl = g->laststate;
80	char *start;
81	char *stop;
82
83	/* simplify the situation where possible */
84	if (g->cflags&REG_NOSUB)
85		nmatch = 0;
86	if (eflags&REG_STARTEND) {
87		start = string + pmatch[0].rm_so;
88		stop = string + pmatch[0].rm_eo;
89	} else {
90		start = string;
91		stop = start + strlen(start);
92	}
93	if (stop < start)
94		return(REG_INVARG);
95
96	/* prescreening; this does wonders for this rather slow code */
97	if (g->must != NULL) {
98		for (dp = start; dp < stop; dp++)
99			if (*dp == g->must[0] && stop - dp >= g->mlen &&
100				memcmp(dp, g->must, (size_t)g->mlen) == 0)
101				break;
102		if (dp == stop)		/* we didn't find g->must */
103			return(REG_NOMATCH);
104	}
105
106	/* match struct setup */
107	m->g = g;
108	m->eflags = eflags;
109	m->pmatch = NULL;
110	m->lastpos = NULL;
111	m->offp = string;
112	m->beginp = start;
113	m->endp = stop;
114	STATESETUP(m, 4);
115	SETUP(m->st);
116	SETUP(m->fresh);
117	SETUP(m->tmp);
118	SETUP(m->empty);
119	CLEAR(m->empty);
120
121	/* this loop does only one repetition except for backrefs */
122	for (;;) {
123		endp = fast(m, start, stop, gf, gl);
124		if (endp == NULL) {		/* a miss */
125			STATETEARDOWN(m);
126			return(REG_NOMATCH);
127		}
128		if (nmatch == 0 && !g->backrefs)
129			break;		/* no further info needed */
130
131		/* where? */
132		assert(m->coldp != NULL);
133		for (;;) {
134			NOTE("finding start");
135			endp = slow(m, m->coldp, stop, gf, gl);
136			if (endp != NULL)
137				break;
138			assert(m->coldp < m->endp);
139			m->coldp++;
140		}
141		if (nmatch == 1 && !g->backrefs)
142			break;		/* no further info needed */
143
144		/* oh my, he wants the subexpressions... */
145		if (m->pmatch == NULL)
146			m->pmatch = (regmatch_t *)malloc((m->g->nsub + 1) *
147							sizeof(regmatch_t));
148		if (m->pmatch == NULL) {
149			STATETEARDOWN(m);
150			return(REG_ESPACE);
151		}
152		for (i = 1; i <= m->g->nsub; i++)
153			m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1;
154		if (!g->backrefs && !(m->eflags&REG_BACKR)) {
155			NOTE("dissecting");
156			dp = dissect(m, m->coldp, endp, gf, gl);
157		} else {
158			if (g->nplus > 0 && m->lastpos == NULL)
159				m->lastpos = (char **)malloc((g->nplus+1) *
160							sizeof(char *));
161			if (g->nplus > 0 && m->lastpos == NULL) {
162				free(m->pmatch);
163				STATETEARDOWN(m);
164				return(REG_ESPACE);
165			}
166			NOTE("backref dissect");
167			dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
168		}
169		if (dp != NULL)
170			break;
171
172		/* uh-oh... we couldn't find a subexpression-level match */
173		assert(g->backrefs);	/* must be back references doing it */
174		assert(g->nplus == 0 || m->lastpos != NULL);
175		for (;;) {
176			if (dp != NULL || endp <= m->coldp)
177				break;		/* defeat */
178			NOTE("backoff");
179			endp = slow(m, m->coldp, endp-1, gf, gl);
180			if (endp == NULL)
181				break;		/* defeat */
182			/* try it on a shorter possibility */
183#ifndef NDEBUG
184			for (i = 1; i <= m->g->nsub; i++) {
185				assert(m->pmatch[i].rm_so == -1);
186				assert(m->pmatch[i].rm_eo == -1);
187			}
188#endif
189			NOTE("backoff dissect");
190			dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
191		}
192		assert(dp == NULL || dp == endp);
193		if (dp != NULL)		/* found a shorter one */
194			break;
195
196		/* despite initial appearances, there is no match here */
197		NOTE("false alarm");
198		start = m->coldp + 1;	/* recycle starting later */
199		assert(start <= stop);
200	}
201
202	/* fill in the details if requested */
203	if (nmatch > 0) {
204		pmatch[0].rm_so = m->coldp - m->offp;
205		pmatch[0].rm_eo = endp - m->offp;
206	}
207	if (nmatch > 1) {
208		assert(m->pmatch != NULL);
209		for (i = 1; i < nmatch; i++)
210			if (i <= m->g->nsub)
211				pmatch[i] = m->pmatch[i];
212			else {
213				pmatch[i].rm_so = -1;
214				pmatch[i].rm_eo = -1;
215			}
216	}
217
218	if (m->pmatch != NULL)
219		free((char *)m->pmatch);
220	if (m->lastpos != NULL)
221		free((char *)m->lastpos);
222	STATETEARDOWN(m);
223	return(0);
224}
225
226/*
227 - dissect - figure out what matched what, no back references
228 == static char *dissect(register struct match *m, char *start, \
229 ==	char *stop, sopno startst, sopno stopst);
230 */
231static char *			/* == stop (success) always */
232dissect(m, start, stop, startst, stopst)
233register struct match *m;
234char *start;
235char *stop;
236sopno startst;
237sopno stopst;
238{
239	register int i;
240	register sopno ss;	/* start sop of current subRE */
241	register sopno es;	/* end sop of current subRE */
242	register char *sp;	/* start of string matched by it */
243	register char *stp;	/* string matched by it cannot pass here */
244	register char *rest;	/* start of rest of string */
245	register char *tail;	/* string unmatched by rest of RE */
246	register sopno ssub;	/* start sop of subsubRE */
247	register sopno esub;	/* end sop of subsubRE */
248	register char *ssp;	/* start of string matched by subsubRE */
249	register char *sep;	/* end of string matched by subsubRE */
250	register char *oldssp;	/* previous ssp */
251	register char *dp;
252
253	AT("diss", start, stop, startst, stopst);
254	sp = start;
255	for (ss = startst; ss < stopst; ss = es) {
256		/* identify end of subRE */
257		es = ss;
258		switch (OP(m->g->strip[es])) {
259		case OPLUS_:
260		case OQUEST_:
261			es += OPND(m->g->strip[es]);
262			break;
263		case OCH_:
264			while (OP(m->g->strip[es]) != O_CH)
265				es += OPND(m->g->strip[es]);
266			break;
267		}
268		es++;
269
270		/* figure out what it matched */
271		switch (OP(m->g->strip[ss])) {
272		case OEND:
273			assert(nope);
274			break;
275		case OCHAR:
276			sp++;
277			break;
278		case OBOL:
279		case OEOL:
280		case OBOW:
281		case OEOW:
282			break;
283		case OANY:
284		case OANYOF:
285			sp++;
286			break;
287		case OBACK_:
288		case O_BACK:
289			assert(nope);
290			break;
291		/* cases where length of match is hard to find */
292		case OQUEST_:
293			stp = stop;
294			for (;;) {
295				/* how long could this one be? */
296				rest = slow(m, sp, stp, ss, es);
297				assert(rest != NULL);	/* it did match */
298				/* could the rest match the rest? */
299				tail = slow(m, rest, stop, es, stopst);
300				if (tail == stop)
301					break;		/* yes! */
302				/* no -- try a shorter match for this one */
303				stp = rest - 1;
304				assert(stp >= sp);	/* it did work */
305			}
306			ssub = ss + 1;
307			esub = es - 1;
308			/* did innards match? */
309			if (slow(m, sp, rest, ssub, esub) != NULL) {
310				dp = dissect(m, sp, rest, ssub, esub);
311				assert(dp == rest);
312			} else		/* no */
313				assert(sp == rest);
314			sp = rest;
315			break;
316		case OPLUS_:
317			stp = stop;
318			for (;;) {
319				/* how long could this one be? */
320				rest = slow(m, sp, stp, ss, es);
321				assert(rest != NULL);	/* it did match */
322				/* could the rest match the rest? */
323				tail = slow(m, rest, stop, es, stopst);
324				if (tail == stop)
325					break;		/* yes! */
326				/* no -- try a shorter match for this one */
327				stp = rest - 1;
328				assert(stp >= sp);	/* it did work */
329			}
330			ssub = ss + 1;
331			esub = es - 1;
332			ssp = sp;
333			oldssp = ssp;
334			for (;;) {	/* find last match of innards */
335				sep = slow(m, ssp, rest, ssub, esub);
336				if (sep == NULL || sep == ssp)
337					break;	/* failed or matched null */
338				oldssp = ssp;	/* on to next try */
339				ssp = sep;
340			}
341			if (sep == NULL) {
342				/* last successful match */
343				sep = ssp;
344				ssp = oldssp;
345			}
346			assert(sep == rest);	/* must exhaust substring */
347			assert(slow(m, ssp, sep, ssub, esub) == rest);
348			dp = dissect(m, ssp, sep, ssub, esub);
349			assert(dp == sep);
350			sp = rest;
351			break;
352		case OCH_:
353			stp = stop;
354			for (;;) {
355				/* how long could this one be? */
356				rest = slow(m, sp, stp, ss, es);
357				assert(rest != NULL);	/* it did match */
358				/* could the rest match the rest? */
359				tail = slow(m, rest, stop, es, stopst);
360				if (tail == stop)
361					break;		/* yes! */
362				/* no -- try a shorter match for this one */
363				stp = rest - 1;
364				assert(stp >= sp);	/* it did work */
365			}
366			ssub = ss + 1;
367			esub = ss + OPND(m->g->strip[ss]) - 1;
368			assert(OP(m->g->strip[esub]) == OOR1);
369			for (;;) {	/* find first matching branch */
370				if (slow(m, sp, rest, ssub, esub) == rest)
371					break;	/* it matched all of it */
372				/* that one missed, try next one */
373				assert(OP(m->g->strip[esub]) == OOR1);
374				esub++;
375				assert(OP(m->g->strip[esub]) == OOR2);
376				ssub = esub + 1;
377				esub += OPND(m->g->strip[esub]);
378				if (OP(m->g->strip[esub]) == OOR2)
379					esub--;
380				else
381					assert(OP(m->g->strip[esub]) == O_CH);
382			}
383			dp = dissect(m, sp, rest, ssub, esub);
384			assert(dp == rest);
385			sp = rest;
386			break;
387		case O_PLUS:
388		case O_QUEST:
389		case OOR1:
390		case OOR2:
391		case O_CH:
392			assert(nope);
393			break;
394		case OLPAREN:
395			i = OPND(m->g->strip[ss]);
396			assert(0 < i && i <= m->g->nsub);
397			m->pmatch[i].rm_so = sp - m->offp;
398			break;
399		case ORPAREN:
400			i = OPND(m->g->strip[ss]);
401			assert(0 < i && i <= m->g->nsub);
402			m->pmatch[i].rm_eo = sp - m->offp;
403			break;
404		default:		/* uh oh */
405			assert(nope);
406			break;
407		}
408	}
409
410	assert(sp == stop);
411	return(sp);
412}
413
414/*
415 - backref - figure out what matched what, figuring in back references
416 == static char *backref(register struct match *m, char *start, \
417 ==	char *stop, sopno startst, sopno stopst, sopno lev);
418 */
419static char *			/* == stop (success) or NULL (failure) */
420backref(m, start, stop, startst, stopst, lev)
421register struct match *m;
422char *start;
423char *stop;
424sopno startst;
425sopno stopst;
426sopno lev;			/* PLUS nesting level */
427{
428	register int i;
429	register sopno ss;	/* start sop of current subRE */
430	register char *sp;	/* start of string matched by it */
431	register sopno ssub;	/* start sop of subsubRE */
432	register sopno esub;	/* end sop of subsubRE */
433	register char *ssp;	/* start of string matched by subsubRE */
434	register char *dp;
435	register size_t len;
436	register int hard;
437	register sop s;
438	register regoff_t offsave;
439	register cset *cs;
440
441	AT("back", start, stop, startst, stopst);
442	sp = start;
443
444	/* get as far as we can with easy stuff */
445	hard = 0;
446	for (ss = startst; !hard && ss < stopst; ss++)
447		switch (OP(s = m->g->strip[ss])) {
448		case OCHAR:
449			if (sp == stop || *sp++ != (char)OPND(s))
450				return(NULL);
451			break;
452		case OANY:
453			if (sp == stop)
454				return(NULL);
455			sp++;
456			break;
457		case OANYOF:
458			cs = &m->g->sets[OPND(s)];
459			if (sp == stop || !CHIN(cs, *sp++))
460				return(NULL);
461			break;
462		case OBOL:
463			if ( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
464					(sp < m->endp && *(sp-1) == '\n' &&
465						(m->g->cflags&REG_NEWLINE)) )
466				{ /* yes */ }
467			else
468				return(NULL);
469			break;
470		case OEOL:
471			if ( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
472					(sp < m->endp && *sp == '\n' &&
473						(m->g->cflags&REG_NEWLINE)) )
474				{ /* yes */ }
475			else
476				return(NULL);
477			break;
478		case OBOW:
479			if (( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
480					(sp < m->endp && *(sp-1) == '\n' &&
481						(m->g->cflags&REG_NEWLINE)) ||
482					(sp > m->beginp &&
483							!ISWORD(*(sp-1))) ) &&
484					(sp < m->endp && ISWORD(*sp)) )
485				{ /* yes */ }
486			else
487				return(NULL);
488			break;
489		case OEOW:
490			if (( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
491					(sp < m->endp && *sp == '\n' &&
492						(m->g->cflags&REG_NEWLINE)) ||
493					(sp < m->endp && !ISWORD(*sp)) ) &&
494					(sp > m->beginp && ISWORD(*(sp-1))) )
495				{ /* yes */ }
496			else
497				return(NULL);
498			break;
499		case O_QUEST:
500			break;
501		case OOR1:	/* matches null but needs to skip */
502			ss++;
503			s = m->g->strip[ss];
504			do {
505				assert(OP(s) == OOR2);
506				ss += OPND(s);
507			} while (OP(s = m->g->strip[ss]) != O_CH);
508			/* note that the ss++ gets us past the O_CH */
509			break;
510		default:	/* have to make a choice */
511			hard = 1;
512			break;
513		}
514	if (!hard) {		/* that was it! */
515		if (sp != stop)
516			return(NULL);
517		return(sp);
518	}
519	ss--;			/* adjust for the for's final increment */
520
521	/* the hard stuff */
522	AT("hard", sp, stop, ss, stopst);
523	s = m->g->strip[ss];
524	switch (OP(s)) {
525	case OBACK_:		/* the vilest depths */
526		i = OPND(s);
527		assert(0 < i && i <= m->g->nsub);
528		if (m->pmatch[i].rm_eo == -1)
529			return(NULL);
530		assert(m->pmatch[i].rm_so != -1);
531		len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so;
532		assert(stop - m->beginp >= len);
533		if (sp > stop - len)
534			return(NULL);	/* not enough left to match */
535		ssp = m->offp + m->pmatch[i].rm_so;
536		if (memcmp(sp, ssp, len) != 0)
537			return(NULL);
538		while (m->g->strip[ss] != SOP(O_BACK, i))
539			ss++;
540		return(backref(m, sp+len, stop, ss+1, stopst, lev));
541		break;
542	case OQUEST_:		/* to null or not */
543		dp = backref(m, sp, stop, ss+1, stopst, lev);
544		if (dp != NULL)
545			return(dp);	/* not */
546		return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev));
547		break;
548	case OPLUS_:
549		assert(m->lastpos != NULL);
550		assert(lev+1 <= m->g->nplus);
551		m->lastpos[lev+1] = sp;
552		return(backref(m, sp, stop, ss+1, stopst, lev+1));
553		break;
554	case O_PLUS:
555		if (sp == m->lastpos[lev])	/* last pass matched null */
556			return(backref(m, sp, stop, ss+1, stopst, lev-1));
557		/* try another pass */
558		m->lastpos[lev] = sp;
559		dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev);
560		if (dp == NULL)
561			return(backref(m, sp, stop, ss+1, stopst, lev-1));
562		else
563			return(dp);
564		break;
565	case OCH_:		/* find the right one, if any */
566		ssub = ss + 1;
567		esub = ss + OPND(s) - 1;
568		assert(OP(m->g->strip[esub]) == OOR1);
569		for (;;) {	/* find first matching branch */
570			dp = backref(m, sp, stop, ssub, esub, lev);
571			if (dp != NULL)
572				return(dp);
573			/* that one missed, try next one */
574			if (OP(m->g->strip[esub]) == O_CH)
575				return(NULL);	/* there is none */
576			esub++;
577			assert(OP(m->g->strip[esub]) == OOR2);
578			ssub = esub + 1;
579			esub += OPND(m->g->strip[esub]);
580			if (OP(m->g->strip[esub]) == OOR2)
581				esub--;
582			else
583				assert(OP(m->g->strip[esub]) == O_CH);
584		}
585		break;
586	case OLPAREN:		/* must undo assignment if rest fails */
587		i = OPND(s);
588		assert(0 < i && i <= m->g->nsub);
589		offsave = m->pmatch[i].rm_so;
590		m->pmatch[i].rm_so = sp - m->offp;
591		dp = backref(m, sp, stop, ss+1, stopst, lev);
592		if (dp != NULL)
593			return(dp);
594		m->pmatch[i].rm_so = offsave;
595		return(NULL);
596		break;
597	case ORPAREN:		/* must undo assignment if rest fails */
598		i = OPND(s);
599		assert(0 < i && i <= m->g->nsub);
600		offsave = m->pmatch[i].rm_eo;
601		m->pmatch[i].rm_eo = sp - m->offp;
602		dp = backref(m, sp, stop, ss+1, stopst, lev);
603		if (dp != NULL)
604			return(dp);
605		m->pmatch[i].rm_eo = offsave;
606		return(NULL);
607		break;
608	default:		/* uh oh */
609		assert(nope);
610		break;
611	}
612
613	/* "can't happen" */
614	assert(nope);
615	/* NOTREACHED */
616	return((char *)NULL);	/* dummy */
617}
618
619/*
620 - fast - step through the string at top speed
621 == static char *fast(register struct match *m, char *start, \
622 ==	char *stop, sopno startst, sopno stopst);
623 */
624static char *			/* where tentative match ended, or NULL */
625fast(m, start, stop, startst, stopst)
626register struct match *m;
627char *start;
628char *stop;
629sopno startst;
630sopno stopst;
631{
632	register states st = m->st;
633	register states fresh = m->fresh;
634	register states tmp = m->tmp;
635	register char *p = start;
636	register int c = (start == m->beginp) ? OUT : *(start-1);
637	register int lastc;	/* previous c */
638	register int flagch;
639	register int i;
640	register char *coldp;	/* last p after which no match was underway */
641
642	CLEAR(st);
643	SET1(st, startst);
644	st = step(m->g, startst, stopst, st, NOTHING, st);
645	ASSIGN(fresh, st);
646	SP("start", st, *p);
647	coldp = NULL;
648	for (;;) {
649		/* next character */
650		lastc = c;
651		c = (p == m->endp) ? OUT : *p;
652		if (EQ(st, fresh))
653			coldp = p;
654
655		/* is there an EOL and/or BOL between lastc and c? */
656		flagch = '\0';
657		i = 0;
658		if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
659				(lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
660			flagch = BOL;
661			i = m->g->nbol;
662		}
663		if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
664				(c == OUT && !(m->eflags&REG_NOTEOL)) ) {
665			flagch = (flagch == BOL) ? BOLEOL : EOL;
666			i += m->g->neol;
667		}
668		if (i != 0) {
669			for (; i > 0; i--)
670				st = step(m->g, startst, stopst, st, flagch, st);
671			SP("boleol", st, c);
672		}
673
674		/* how about a word boundary? */
675		if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
676					(c != OUT && ISWORD(c)) ) {
677			flagch = BOW;
678		}
679		if ( (lastc != OUT && ISWORD(lastc)) &&
680				(flagch == EOL || (c != OUT && !ISWORD(c))) ) {
681			flagch = EOW;
682		}
683		if (flagch == BOW || flagch == EOW) {
684			st = step(m->g, startst, stopst, st, flagch, st);
685			SP("boweow", st, c);
686		}
687
688		/* are we done? */
689		if (ISSET(st, stopst) || p == stop)
690			break;		/* NOTE BREAK OUT */
691
692		/* no, we must deal with this character */
693		ASSIGN(tmp, st);
694		ASSIGN(st, fresh);
695		assert(c != OUT);
696		st = step(m->g, startst, stopst, tmp, c, st);
697		SP("aft", st, c);
698		assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
699		p++;
700	}
701
702	assert(coldp != NULL);
703	m->coldp = coldp;
704	if (ISSET(st, stopst))
705		return(p+1);
706	else
707		return(NULL);
708}
709
710/*
711 - slow - step through the string more deliberately
712 == static char *slow(register struct match *m, char *start, \
713 ==	char *stop, sopno startst, sopno stopst);
714 */
715static char *			/* where it ended */
716slow(m, start, stop, startst, stopst)
717register struct match *m;
718char *start;
719char *stop;
720sopno startst;
721sopno stopst;
722{
723	register states st = m->st;
724	register states empty = m->empty;
725	register states tmp = m->tmp;
726	register char *p = start;
727	register int c = (start == m->beginp) ? OUT : *(start-1);
728	register int lastc;	/* previous c */
729	register int flagch;
730	register int i;
731	register char *matchp;	/* last p at which a match ended */
732
733	AT("slow", start, stop, startst, stopst);
734	CLEAR(st);
735	SET1(st, startst);
736	SP("sstart", st, *p);
737	st = step(m->g, startst, stopst, st, NOTHING, st);
738	matchp = NULL;
739	for (;;) {
740		/* next character */
741		lastc = c;
742		c = (p == m->endp) ? OUT : *p;
743
744		/* is there an EOL and/or BOL between lastc and c? */
745		flagch = '\0';
746		i = 0;
747		if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
748				(lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
749			flagch = BOL;
750			i = m->g->nbol;
751		}
752		if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
753				(c == OUT && !(m->eflags&REG_NOTEOL)) ) {
754			flagch = (flagch == BOL) ? BOLEOL : EOL;
755			i += m->g->neol;
756		}
757		if (i != 0) {
758			for (; i > 0; i--)
759				st = step(m->g, startst, stopst, st, flagch, st);
760			SP("sboleol", st, c);
761		}
762
763		/* how about a word boundary? */
764		if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
765					(c != OUT && ISWORD(c)) ) {
766			flagch = BOW;
767		}
768		if ( (lastc != OUT && ISWORD(lastc)) &&
769				(flagch == EOL || (c != OUT && !ISWORD(c))) ) {
770			flagch = EOW;
771		}
772		if (flagch == BOW || flagch == EOW) {
773			st = step(m->g, startst, stopst, st, flagch, st);
774			SP("sboweow", st, c);
775		}
776
777		/* are we done? */
778		if (ISSET(st, stopst))
779			matchp = p;
780		if (EQ(st, empty) || p == stop)
781			break;		/* NOTE BREAK OUT */
782
783		/* no, we must deal with this character */
784		ASSIGN(tmp, st);
785		ASSIGN(st, empty);
786		assert(c != OUT);
787		st = step(m->g, startst, stopst, tmp, c, st);
788		SP("saft", st, c);
789		assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
790		p++;
791	}
792
793	return(matchp);
794}
795
796
797/*
798 - step - map set of states reachable before char to set reachable after
799 == static states step(register struct re_guts *g, sopno start, sopno stop, \
800 ==	register states bef, int ch, register states aft);
801 == #define	BOL	(OUT+1)
802 == #define	EOL	(BOL+1)
803 == #define	BOLEOL	(BOL+2)
804 == #define	NOTHING	(BOL+3)
805 == #define	BOW	(BOL+4)
806 == #define	EOW	(BOL+5)
807 == #define	CODEMAX	(BOL+5)		// highest code used
808 == #define	NONCHAR(c)	((c) > CHAR_MAX)
809 == #define	NNONCHAR	(CODEMAX-CHAR_MAX)
810 */
811static states
812step(g, start, stop, bef, ch, aft)
813register struct re_guts *g;
814sopno start;			/* start state within strip */
815sopno stop;			/* state after stop state within strip */
816register states bef;		/* states reachable before */
817int ch;				/* character or NONCHAR code */
818register states aft;		/* states already known reachable after */
819{
820	register cset *cs;
821	register sop s;
822	register sopno pc;
823	register onestate here;		/* note, macros know this name */
824	register sopno look;
825	register long i;
826
827	for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) {
828		s = g->strip[pc];
829		switch (OP(s)) {
830		case OEND:
831			assert(pc == stop-1);
832			break;
833		case OCHAR:
834			/* only characters can match */
835			assert(!NONCHAR(ch) || ch != (char)OPND(s));
836			if (ch == (char)OPND(s))
837				FWD(aft, bef, 1);
838			break;
839		case OBOL:
840			if (ch == BOL || ch == BOLEOL)
841				FWD(aft, bef, 1);
842			break;
843		case OEOL:
844			if (ch == EOL || ch == BOLEOL)
845				FWD(aft, bef, 1);
846			break;
847		case OBOW:
848			if (ch == BOW)
849				FWD(aft, bef, 1);
850			break;
851		case OEOW:
852			if (ch == EOW)
853				FWD(aft, bef, 1);
854			break;
855		case OANY:
856			if (!NONCHAR(ch))
857				FWD(aft, bef, 1);
858			break;
859		case OANYOF:
860			cs = &g->sets[OPND(s)];
861			if (!NONCHAR(ch) && CHIN(cs, ch))
862				FWD(aft, bef, 1);
863			break;
864		case OBACK_:		/* ignored here */
865		case O_BACK:
866			FWD(aft, aft, 1);
867			break;
868		case OPLUS_:		/* forward, this is just an empty */
869			FWD(aft, aft, 1);
870			break;
871		case O_PLUS:		/* both forward and back */
872			FWD(aft, aft, 1);
873			i = ISSETBACK(aft, OPND(s));
874			BACK(aft, aft, OPND(s));
875			if (!i && ISSETBACK(aft, OPND(s))) {
876				/* oho, must reconsider loop body */
877				pc -= OPND(s) + 1;
878				INIT(here, pc);
879			}
880			break;
881		case OQUEST_:		/* two branches, both forward */
882			FWD(aft, aft, 1);
883			FWD(aft, aft, OPND(s));
884			break;
885		case O_QUEST:		/* just an empty */
886			FWD(aft, aft, 1);
887			break;
888		case OLPAREN:		/* not significant here */
889		case ORPAREN:
890			FWD(aft, aft, 1);
891			break;
892		case OCH_:		/* mark the first two branches */
893			FWD(aft, aft, 1);
894			assert(OP(g->strip[pc+OPND(s)]) == OOR2);
895			FWD(aft, aft, OPND(s));
896			break;
897		case OOR1:		/* done a branch, find the O_CH */
898			if (ISSTATEIN(aft, here)) {
899				for (look = 1;
900						OP(s = g->strip[pc+look]) != O_CH;
901						look += OPND(s))
902					assert(OP(s) == OOR2);
903				FWD(aft, aft, look);
904			}
905			break;
906		case OOR2:		/* propagate OCH_'s marking */
907			FWD(aft, aft, 1);
908			if (OP(g->strip[pc+OPND(s)]) != O_CH) {
909				assert(OP(g->strip[pc+OPND(s)]) == OOR2);
910				FWD(aft, aft, OPND(s));
911			}
912			break;
913		case O_CH:		/* just empty */
914			FWD(aft, aft, 1);
915			break;
916		default:		/* ooooops... */
917			assert(nope);
918			break;
919		}
920	}
921
922	return(aft);
923}
924
925#ifdef REDEBUG
926/*
927 - print - print a set of states
928 == #ifdef REDEBUG
929 == static void print(struct match *m, char *caption, states st, \
930 ==	int ch, FILE *d);
931 == #endif
932 */
933static void
934print(m, caption, st, ch, d)
935struct match *m;
936char *caption;
937states st;
938int ch;
939FILE *d;
940{
941	register struct re_guts *g = m->g;
942	register int i;
943	register int first = 1;
944
945	if (!(m->eflags&REG_TRACE))
946		return;
947
948	fprintf(d, "%s", caption);
949	if (ch != '\0')
950		fprintf(d, " %s", pchar(ch));
951	for (i = 0; i < g->nstates; i++)
952		if (ISSET(st, i)) {
953			fprintf(d, "%s%d", (first) ? "\t" : ", ", i);
954			first = 0;
955		}
956	fprintf(d, "\n");
957}
958
959/*
960 - at - print current situation
961 == #ifdef REDEBUG
962 == static void at(struct match *m, char *title, char *start, char *stop, \
963 ==						sopno startst, sopno stopst);
964 == #endif
965 */
966static void
967at(m, title, start, stop, startst, stopst)
968struct match *m;
969char *title;
970char *start;
971char *stop;
972sopno startst;
973sopno stopst;
974{
975	if (!(m->eflags&REG_TRACE))
976		return;
977
978	printf("%s %s-", title, pchar(*start));
979	printf("%s ", pchar(*stop));
980	printf("%ld-%ld\n", (long)startst, (long)stopst);
981}
982
983#ifndef PCHARDONE
984#define	PCHARDONE	/* never again */
985/*
986 - pchar - make a character printable
987 == #ifdef REDEBUG
988 == static char *pchar(int ch);
989 == #endif
990 *
991 * Is this identical to regchar() over in debug.c?  Well, yes.  But a
992 * duplicate here avoids having a debugging-capable regexec.o tied to
993 * a matching debug.o, and this is convenient.  It all disappears in
994 * the non-debug compilation anyway, so it doesn't matter much.
995 */
996static char *			/* -> representation */
997pchar(ch)
998int ch;
999{
1000	static char pbuf[10];
1001
1002	if (isprint(ch) || ch == ' ')
1003		sprintf(pbuf, "%c", ch);
1004	else
1005		sprintf(pbuf, "\\%o", ch);
1006	return(pbuf);
1007}
1008#endif
1009#endif
1010
1011#undef	matcher
1012#undef	fast
1013#undef	slow
1014#undef	dissect
1015#undef	backref
1016#undef	step
1017#undef	print
1018#undef	at
1019#undef	match
1020