1/*
2 * re_*exec and friends - match REs
3 *
4 * Copyright (c) 1998, 1999 Henry Spencer.  All rights reserved.
5 *
6 * Development of this software was funded, in part, by Cray Research Inc.,
7 * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
8 * Corporation, none of whom are responsible for the results.  The author
9 * thanks all of them.
10 *
11 * Redistribution and use in source and binary forms -- with or without
12 * modification -- are permitted for any purpose, provided that
13 * redistributions in source form retain this entire copyright notice and
14 * indicate the origin and nature of any modifications.
15 *
16 * I'd appreciate being given credit for this package in the documentation
17 * of software which uses it, but that is not a requirement.
18 *
19 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
20 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
21 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
22 * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 *
30 */
31
32#include "regguts.h"
33
34
35
36/* lazy-DFA representation */
37struct arcp {			/* "pointer" to an outarc */
38	struct sset *ss;
39	color co;
40};
41
42struct sset {			/* state set */
43	unsigned *states;	/* pointer to bitvector */
44	unsigned hash;		/* hash of bitvector */
45#		define	HASH(bv, nw)	(((nw) == 1) ? *(bv) : hash(bv, nw))
46#	define	HIT(h,bv,ss,nw)	((ss)->hash == (h) && ((nw) == 1 || \
47		memcmp(VS(bv), VS((ss)->states), (nw)*sizeof(unsigned)) == 0))
48	int flags;
49#		define	STARTER		01	/* the initial state set */
50#		define	POSTSTATE	02	/* includes the goal state */
51#		define	LOCKED		04	/* locked in cache */
52#		define	NOPROGRESS	010	/* zero-progress state set */
53	struct arcp ins;	/* chain of inarcs pointing here */
54	chr *lastseen;		/* last entered on arrival here */
55	struct sset **outs;	/* outarc vector indexed by color */
56	struct arcp *inchain;	/* chain-pointer vector for outarcs */
57};
58
59struct dfa {
60	int nssets;		/* size of cache */
61	int nssused;		/* how many entries occupied yet */
62	int nstates;		/* number of states */
63	int ncolors;		/* length of outarc and inchain vectors */
64	int wordsper;		/* length of state-set bitvectors */
65	struct sset *ssets;	/* state-set cache */
66	unsigned *statesarea;	/* bitvector storage */
67	unsigned *work;		/* pointer to work area within statesarea */
68	struct sset **outsarea;	/* outarc-vector storage */
69	struct arcp *incarea;	/* inchain storage */
70	struct cnfa *cnfa;
71	struct colormap *cm;
72	chr *lastpost;		/* location of last cache-flushed success */
73	chr *lastnopr;		/* location of last cache-flushed NOPROGRESS */
74	struct sset *search;	/* replacement-search-pointer memory */
75	int cptsmalloced;	/* were the areas individually malloced? */
76	char *mallocarea;	/* self, or master malloced area, or NULL */
77};
78
79#define	WORK	1		/* number of work bitvectors needed */
80
81/* setup for non-malloc allocation for small cases */
82#define	FEWSTATES	20	/* must be less than UBITS */
83#define	FEWCOLORS	15
84struct smalldfa {
85	struct dfa dfa;
86	struct sset ssets[FEWSTATES*2];
87	unsigned statesarea[FEWSTATES*2 + WORK];
88	struct sset *outsarea[FEWSTATES*2 * FEWCOLORS];
89	struct arcp incarea[FEWSTATES*2 * FEWCOLORS];
90};
91#define	DOMALLOC	((struct smalldfa *)NULL)	/* force malloc */
92
93
94
95/* internal variables, bundled for easy passing around */
96struct vars {
97	regex_t *re;
98	struct guts *g;
99	int eflags;		/* copies of arguments */
100	size_t nmatch;
101	regmatch_t *pmatch;
102	rm_detail_t *details;
103	chr *start;		/* start of string */
104	chr *stop;		/* just past end of string */
105	int err;		/* error code if any (0 none) */
106	regoff_t *mem;		/* memory vector for backtracking */
107	struct smalldfa dfa1;
108	struct smalldfa dfa2;
109};
110#define	VISERR(vv)	((vv)->err != 0)	/* have we seen an error yet? */
111#define	ISERR()	VISERR(v)
112#define	VERR(vv,e)	(((vv)->err) ? (vv)->err : ((vv)->err = (e)))
113#define	ERR(e)	VERR(v, e)		/* record an error */
114#define	NOERR()	{if (ISERR()) return v->err;}	/* if error seen, return it */
115#define	OFF(p)	((p) - v->start)
116#define	LOFF(p)	((long)OFF(p))
117
118
119
120/*
121 * forward declarations
122 */
123/* =====^!^===== begin forwards =====^!^===== */
124/* automatically gathered by fwd; do not hand-edit */
125/* === regexec.c === */
126int exec _ANSI_ARGS_((regex_t *, CONST chr *, size_t, rm_detail_t *, size_t, regmatch_t [], int));
127static int find _ANSI_ARGS_((struct vars *, struct cnfa *, struct colormap *));
128static int cfind _ANSI_ARGS_((struct vars *, struct cnfa *, struct colormap *));
129static int cfindloop _ANSI_ARGS_((struct vars *, struct cnfa *, struct colormap *, struct dfa *, struct dfa *, chr **));
130static VOID zapsubs _ANSI_ARGS_((regmatch_t *, size_t));
131static VOID zapmem _ANSI_ARGS_((struct vars *, struct subre *));
132static VOID subset _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
133static int dissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
134static int condissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
135static int altdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
136static int cdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
137static int ccondissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
138static int crevdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
139static int cbrdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
140static int caltdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
141/* === rege_dfa.c === */
142static chr *longest _ANSI_ARGS_((struct vars *, struct dfa *, chr *, chr *, int *));
143static chr *shortest _ANSI_ARGS_((struct vars *, struct dfa *, chr *, chr *, chr *, chr **, int *));
144static chr *lastcold _ANSI_ARGS_((struct vars *, struct dfa *));
145static struct dfa *newdfa _ANSI_ARGS_((struct vars *, struct cnfa *, struct colormap *, struct smalldfa *));
146static VOID freedfa _ANSI_ARGS_((struct dfa *));
147static unsigned hash _ANSI_ARGS_((unsigned *, int));
148static struct sset *initialize _ANSI_ARGS_((struct vars *, struct dfa *, chr *));
149static struct sset *miss _ANSI_ARGS_((struct vars *, struct dfa *, struct sset *, pcolor, chr *, chr *));
150static int lacon _ANSI_ARGS_((struct vars *, struct cnfa *, chr *, pcolor));
151static struct sset *getvacant _ANSI_ARGS_((struct vars *, struct dfa *, chr *, chr *));
152static struct sset *pickss _ANSI_ARGS_((struct vars *, struct dfa *, chr *, chr *));
153/* automatically gathered by fwd; do not hand-edit */
154/* =====^!^===== end forwards =====^!^===== */
155
156
157
158/*
159 - exec - match regular expression
160 ^ int exec(regex_t *, CONST chr *, size_t, rm_detail_t *,
161 ^					size_t, regmatch_t [], int);
162 */
163int
164exec(re, string, len, details, nmatch, pmatch, flags)
165regex_t *re;
166CONST chr *string;
167size_t len;
168rm_detail_t *details;
169size_t nmatch;
170regmatch_t pmatch[];
171int flags;
172{
173	struct vars var;
174	register struct vars *v = &var;
175	int st;
176	size_t n;
177	int backref;
178#	define	LOCALMAT	20
179	regmatch_t mat[LOCALMAT];
180#	define	LOCALMEM	40
181	regoff_t mem[LOCALMEM];
182
183	/* sanity checks */
184	if (re == NULL || string == NULL || re->re_magic != REMAGIC)
185		return REG_INVARG;
186	if (re->re_csize != sizeof(chr))
187		return REG_MIXED;
188
189	/* setup */
190	v->re = re;
191	v->g = (struct guts *)re->re_guts;
192	if ((v->g->cflags&REG_EXPECT) && details == NULL)
193		return REG_INVARG;
194	if (v->g->info&REG_UIMPOSSIBLE)
195		return REG_NOMATCH;
196	backref = (v->g->info&REG_UBACKREF) ? 1 : 0;
197	v->eflags = flags;
198	if (v->g->cflags&REG_NOSUB)
199		nmatch = 0;		/* override client */
200	v->nmatch = nmatch;
201	if (backref) {
202		/* need work area */
203		if (v->g->nsub + 1 <= LOCALMAT)
204			v->pmatch = mat;
205		else
206			v->pmatch = (regmatch_t *)MALLOC((v->g->nsub + 1) *
207							sizeof(regmatch_t));
208		if (v->pmatch == NULL)
209			return REG_ESPACE;
210		v->nmatch = v->g->nsub + 1;
211	} else
212		v->pmatch = pmatch;
213	v->details = details;
214	v->start = (chr *)string;
215	v->stop = (chr *)string + len;
216	v->err = 0;
217	if (backref) {
218		/* need retry memory */
219		assert(v->g->ntree >= 0);
220		n = (size_t)v->g->ntree;
221		if (n <= LOCALMEM)
222			v->mem = mem;
223		else
224			v->mem = (regoff_t *)MALLOC(n*sizeof(regoff_t));
225		if (v->mem == NULL) {
226			if (v->pmatch != pmatch && v->pmatch != mat)
227				FREE(v->pmatch);
228			return REG_ESPACE;
229		}
230	} else
231		v->mem = NULL;
232
233	/* do it */
234	assert(v->g->tree != NULL);
235	if (backref)
236		st = cfind(v, &v->g->tree->cnfa, &v->g->cmap);
237	else
238		st = find(v, &v->g->tree->cnfa, &v->g->cmap);
239
240	/* copy (portion of) match vector over if necessary */
241	if (st == REG_OKAY && v->pmatch != pmatch && nmatch > 0) {
242		zapsubs(pmatch, nmatch);
243		n = (nmatch < v->nmatch) ? nmatch : v->nmatch;
244		memcpy(VS(pmatch), VS(v->pmatch), n*sizeof(regmatch_t));
245	}
246
247	/* clean up */
248	if (v->pmatch != pmatch && v->pmatch != mat)
249		FREE(v->pmatch);
250	if (v->mem != NULL && v->mem != mem)
251		FREE(v->mem);
252	return st;
253}
254
255/*
256 - find - find a match for the main NFA (no-complications case)
257 ^ static int find(struct vars *, struct cnfa *, struct colormap *);
258 */
259static int
260find(v, cnfa, cm)
261struct vars *v;
262struct cnfa *cnfa;
263struct colormap *cm;
264{
265	struct dfa *s;
266	struct dfa *d;
267	chr *begin;
268	chr *end = NULL;
269	chr *cold;
270	chr *open;		/* open and close of range of possible starts */
271	chr *close;
272	int hitend;
273	int shorter = (v->g->tree->flags&SHORTER) ? 1 : 0;
274
275	/* first, a shot with the search RE */
276	s = newdfa(v, &v->g->search, cm, &v->dfa1);
277	assert(!(ISERR() && s != NULL));
278	NOERR();
279	MDEBUG(("\nsearch at %ld\n", LOFF(v->start)));
280	cold = NULL;
281	close = shortest(v, s, v->start, v->start, v->stop, &cold, (int *)NULL);
282	freedfa(s);
283	NOERR();
284	if (v->g->cflags&REG_EXPECT) {
285		assert(v->details != NULL);
286		if (cold != NULL)
287			v->details->rm_extend.rm_so = OFF(cold);
288		else
289			v->details->rm_extend.rm_so = OFF(v->stop);
290		v->details->rm_extend.rm_eo = OFF(v->stop);	/* unknown */
291	}
292	if (close == NULL)		/* not found */
293		return REG_NOMATCH;
294	if (v->nmatch == 0)		/* found, don't need exact location */
295		return REG_OKAY;
296
297	/* find starting point and match */
298	assert(cold != NULL);
299	open = cold;
300	cold = NULL;
301	MDEBUG(("between %ld and %ld\n", LOFF(open), LOFF(close)));
302	d = newdfa(v, cnfa, cm, &v->dfa1);
303	assert(!(ISERR() && d != NULL));
304	NOERR();
305	for (begin = open; begin <= close; begin++) {
306		MDEBUG(("\nfind trying at %ld\n", LOFF(begin)));
307		if (shorter)
308			end = shortest(v, d, begin, begin, v->stop,
309							(chr **)NULL, &hitend);
310		else
311			end = longest(v, d, begin, v->stop, &hitend);
312		NOERR();
313		if (hitend && cold == NULL)
314			cold = begin;
315		if (end != NULL)
316			break;		/* NOTE BREAK OUT */
317	}
318	assert(end != NULL);		/* search RE succeeded so loop should */
319	freedfa(d);
320
321	/* and pin down details */
322	assert(v->nmatch > 0);
323	v->pmatch[0].rm_so = OFF(begin);
324	v->pmatch[0].rm_eo = OFF(end);
325	if (v->g->cflags&REG_EXPECT) {
326		if (cold != NULL)
327			v->details->rm_extend.rm_so = OFF(cold);
328		else
329			v->details->rm_extend.rm_so = OFF(v->stop);
330		v->details->rm_extend.rm_eo = OFF(v->stop);	/* unknown */
331	}
332	if (v->nmatch == 1)		/* no need for submatches */
333		return REG_OKAY;
334
335	/* submatches */
336	zapsubs(v->pmatch, v->nmatch);
337	return dissect(v, v->g->tree, begin, end);
338}
339
340/*
341 - cfind - find a match for the main NFA (with complications)
342 ^ static int cfind(struct vars *, struct cnfa *, struct colormap *);
343 */
344static int
345cfind(v, cnfa, cm)
346struct vars *v;
347struct cnfa *cnfa;
348struct colormap *cm;
349{
350	struct dfa *s;
351	struct dfa *d;
352	chr *cold = NULL; /* silence gcc 4 warning */
353	int ret;
354
355	s = newdfa(v, &v->g->search, cm, &v->dfa1);
356	NOERR();
357	d = newdfa(v, cnfa, cm, &v->dfa2);
358	if (ISERR()) {
359		assert(d == NULL);
360		freedfa(s);
361		return v->err;
362	}
363
364	ret = cfindloop(v, cnfa, cm, d, s, &cold);
365
366	freedfa(d);
367	freedfa(s);
368	NOERR();
369	if (v->g->cflags&REG_EXPECT) {
370		assert(v->details != NULL);
371		if (cold != NULL)
372			v->details->rm_extend.rm_so = OFF(cold);
373		else
374			v->details->rm_extend.rm_so = OFF(v->stop);
375		v->details->rm_extend.rm_eo = OFF(v->stop);	/* unknown */
376	}
377	return ret;
378}
379
380/*
381 - cfindloop - the heart of cfind
382 ^ static int cfindloop(struct vars *, struct cnfa *, struct colormap *,
383 ^	struct dfa *, struct dfa *, chr **);
384 */
385static int
386cfindloop(v, cnfa, cm, d, s, coldp)
387struct vars *v;
388struct cnfa *cnfa;
389struct colormap *cm;
390struct dfa *d;
391struct dfa *s;
392chr **coldp;			/* where to put coldstart pointer */
393{
394	chr *begin;
395	chr *end;
396	chr *cold;
397	chr *open;		/* open and close of range of possible starts */
398	chr *close;
399	chr *estart;
400	chr *estop;
401	int er;
402	int shorter = v->g->tree->flags&SHORTER;
403	int hitend;
404
405	assert(d != NULL && s != NULL);
406	cold = NULL;
407	close = v->start;
408	do {
409		MDEBUG(("\ncsearch at %ld\n", LOFF(close)));
410		close = shortest(v, s, close, close, v->stop, &cold, (int *)NULL);
411		if (close == NULL)
412			break;				/* NOTE BREAK */
413		assert(cold != NULL);
414		open = cold;
415		cold = NULL;
416		MDEBUG(("cbetween %ld and %ld\n", LOFF(open), LOFF(close)));
417		for (begin = open; begin <= close; begin++) {
418			MDEBUG(("\ncfind trying at %ld\n", LOFF(begin)));
419			estart = begin;
420			estop = v->stop;
421			for (;;) {
422				if (shorter)
423					end = shortest(v, d, begin, estart,
424						estop, (chr **)NULL, &hitend);
425				else
426					end = longest(v, d, begin, estop,
427								&hitend);
428				if (hitend && cold == NULL)
429					cold = begin;
430				if (end == NULL)
431					break;		/* NOTE BREAK OUT */
432				MDEBUG(("tentative end %ld\n", LOFF(end)));
433				zapsubs(v->pmatch, v->nmatch);
434				zapmem(v, v->g->tree);
435				er = cdissect(v, v->g->tree, begin, end);
436				if (er == REG_OKAY) {
437					if (v->nmatch > 0) {
438						v->pmatch[0].rm_so = OFF(begin);
439						v->pmatch[0].rm_eo = OFF(end);
440					}
441					*coldp = cold;
442					return REG_OKAY;
443				}
444				if (er != REG_NOMATCH) {
445					ERR(er);
446					return er;
447				}
448				if ((shorter) ? end == estop : end == begin) {
449					/* no point in trying again */
450					*coldp = cold;
451					return REG_NOMATCH;
452				}
453				/* go around and try again */
454				if (shorter)
455					estart = end + 1;
456				else
457					estop = end - 1;
458			}
459		}
460	} while (close < v->stop);
461
462	*coldp = cold;
463	return REG_NOMATCH;
464}
465
466/*
467 - zapsubs - initialize the subexpression matches to "no match"
468 ^ static VOID zapsubs(regmatch_t *, size_t);
469 */
470static VOID
471zapsubs(p, n)
472regmatch_t *p;
473size_t n;
474{
475	size_t i;
476
477	for (i = n-1; i > 0; i--) {
478		p[i].rm_so = -1;
479		p[i].rm_eo = -1;
480	}
481}
482
483/*
484 - zapmem - initialize the retry memory of a subtree to zeros
485 ^ static VOID zapmem(struct vars *, struct subre *);
486 */
487static VOID
488zapmem(v, t)
489struct vars *v;
490struct subre *t;
491{
492	if (t == NULL)
493		return;
494
495	assert(v->mem != NULL);
496	v->mem[t->retry] = 0;
497	if (t->op == '(') {
498		assert(t->subno > 0);
499		v->pmatch[t->subno].rm_so = -1;
500		v->pmatch[t->subno].rm_eo = -1;
501	}
502
503	if (t->left != NULL)
504		zapmem(v, t->left);
505	if (t->right != NULL)
506		zapmem(v, t->right);
507}
508
509/*
510 - subset - set any subexpression relevant to a successful subre
511 ^ static VOID subset(struct vars *, struct subre *, chr *, chr *);
512 */
513static VOID
514subset(v, sub, begin, end)
515struct vars *v;
516struct subre *sub;
517chr *begin;
518chr *end;
519{
520	int n = sub->subno;
521
522	assert(n > 0);
523	if ((size_t)n >= v->nmatch)
524		return;
525
526	MDEBUG(("setting %d\n", n));
527	v->pmatch[n].rm_so = OFF(begin);
528	v->pmatch[n].rm_eo = OFF(end);
529}
530
531/*
532 - dissect - determine subexpression matches (uncomplicated case)
533 ^ static int dissect(struct vars *, struct subre *, chr *, chr *);
534 */
535static int			/* regexec return code */
536dissect(v, t, begin, end)
537struct vars *v;
538struct subre *t;
539chr *begin;			/* beginning of relevant substring */
540chr *end;			/* end of same */
541{
542	assert(t != NULL);
543	MDEBUG(("dissect %ld-%ld\n", LOFF(begin), LOFF(end)));
544
545	switch (t->op) {
546	case '=':		/* terminal node */
547		assert(t->left == NULL && t->right == NULL);
548		return REG_OKAY;	/* no action, parent did the work */
549		break;
550	case '|':		/* alternation */
551		assert(t->left != NULL);
552		return altdissect(v, t, begin, end);
553		break;
554	case 'b':		/* back ref -- shouldn't be calling us! */
555		return REG_ASSERT;
556		break;
557	case '.':		/* concatenation */
558		assert(t->left != NULL && t->right != NULL);
559		return condissect(v, t, begin, end);
560		break;
561	case '(':		/* capturing */
562		assert(t->left != NULL && t->right == NULL);
563		assert(t->subno > 0);
564		subset(v, t, begin, end);
565		return dissect(v, t->left, begin, end);
566		break;
567	default:
568		return REG_ASSERT;
569		break;
570	}
571}
572
573/*
574 - condissect - determine concatenation subexpression matches (uncomplicated)
575 ^ static int condissect(struct vars *, struct subre *, chr *, chr *);
576 */
577static int			/* regexec return code */
578condissect(v, t, begin, end)
579struct vars *v;
580struct subre *t;
581chr *begin;			/* beginning of relevant substring */
582chr *end;			/* end of same */
583{
584	struct dfa *d;
585	struct dfa *d2;
586	chr *mid;
587	int i;
588	int shorter = (t->left->flags&SHORTER) ? 1 : 0;
589	chr *stop = (shorter) ? end : begin;
590
591	assert(t->op == '.');
592	assert(t->left != NULL && t->left->cnfa.nstates > 0);
593	assert(t->right != NULL && t->right->cnfa.nstates > 0);
594
595	d = newdfa(v, &t->left->cnfa, &v->g->cmap, &v->dfa1);
596	NOERR();
597	d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, &v->dfa2);
598	if (ISERR()) {
599		assert(d2 == NULL);
600		freedfa(d);
601		return v->err;
602	}
603
604	/* pick a tentative midpoint */
605	if (shorter)
606		mid = shortest(v, d, begin, begin, end, (chr **)NULL,
607								(int *)NULL);
608	else
609		mid = longest(v, d, begin, end, (int *)NULL);
610	if (mid == NULL) {
611		freedfa(d);
612		freedfa(d2);
613		return REG_ASSERT;
614	}
615	MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));
616
617	/* iterate until satisfaction or failure */
618	while (longest(v, d2, mid, end, (int *)NULL) != end) {
619		/* that midpoint didn't work, find a new one */
620		if (mid == stop) {
621			/* all possibilities exhausted! */
622			MDEBUG(("no midpoint!\n"));
623			freedfa(d);
624			freedfa(d2);
625			return REG_ASSERT;
626		}
627		if (shorter)
628			mid = shortest(v, d, begin, mid+1, end, (chr **)NULL,
629								(int *)NULL);
630		else
631			mid = longest(v, d, begin, mid-1, (int *)NULL);
632		if (mid == NULL) {
633			/* failed to find a new one! */
634			MDEBUG(("failed midpoint!\n"));
635			freedfa(d);
636			freedfa(d2);
637			return REG_ASSERT;
638		}
639		MDEBUG(("new midpoint %ld\n", LOFF(mid)));
640	}
641
642	/* satisfaction */
643	MDEBUG(("successful\n"));
644	freedfa(d);
645	freedfa(d2);
646	i = dissect(v, t->left, begin, mid);
647	if (i != REG_OKAY)
648		return i;
649	return dissect(v, t->right, mid, end);
650}
651
652/*
653 - altdissect - determine alternative subexpression matches (uncomplicated)
654 ^ static int altdissect(struct vars *, struct subre *, chr *, chr *);
655 */
656static int			/* regexec return code */
657altdissect(v, t, begin, end)
658struct vars *v;
659struct subre *t;
660chr *begin;			/* beginning of relevant substring */
661chr *end;			/* end of same */
662{
663	struct dfa *d;
664	int i;
665
666	assert(t != NULL);
667	assert(t->op == '|');
668
669	for (i = 0; t != NULL; t = t->right, i++) {
670		MDEBUG(("trying %dth\n", i));
671		assert(t->left != NULL && t->left->cnfa.nstates > 0);
672		d = newdfa(v, &t->left->cnfa, &v->g->cmap, &v->dfa1);
673		if (ISERR())
674			return v->err;
675		if (longest(v, d, begin, end, (int *)NULL) == end) {
676			MDEBUG(("success\n"));
677			freedfa(d);
678			return dissect(v, t->left, begin, end);
679		}
680		freedfa(d);
681	}
682	return REG_ASSERT;	/* none of them matched?!? */
683}
684
685/*
686 - cdissect - determine subexpression matches (with complications)
687 * The retry memory stores the offset of the trial midpoint from begin,
688 * plus 1 so that 0 uniquely means "clean slate".
689 ^ static int cdissect(struct vars *, struct subre *, chr *, chr *);
690 */
691static int			/* regexec return code */
692cdissect(v, t, begin, end)
693struct vars *v;
694struct subre *t;
695chr *begin;			/* beginning of relevant substring */
696chr *end;			/* end of same */
697{
698	int er;
699
700	assert(t != NULL);
701	MDEBUG(("cdissect %ld-%ld %c\n", LOFF(begin), LOFF(end), t->op));
702
703	switch (t->op) {
704	case '=':		/* terminal node */
705		assert(t->left == NULL && t->right == NULL);
706		return REG_OKAY;	/* no action, parent did the work */
707		break;
708	case '|':		/* alternation */
709		assert(t->left != NULL);
710		return caltdissect(v, t, begin, end);
711		break;
712	case 'b':		/* back ref -- shouldn't be calling us! */
713		assert(t->left == NULL && t->right == NULL);
714		return cbrdissect(v, t, begin, end);
715		break;
716	case '.':		/* concatenation */
717		assert(t->left != NULL && t->right != NULL);
718		return ccondissect(v, t, begin, end);
719		break;
720	case '(':		/* capturing */
721		assert(t->left != NULL && t->right == NULL);
722		assert(t->subno > 0);
723		er = cdissect(v, t->left, begin, end);
724		if (er == REG_OKAY)
725			subset(v, t, begin, end);
726		return er;
727		break;
728	default:
729		return REG_ASSERT;
730		break;
731	}
732}
733
734/*
735 - ccondissect - concatenation subexpression matches (with complications)
736 * The retry memory stores the offset of the trial midpoint from begin,
737 * plus 1 so that 0 uniquely means "clean slate".
738 ^ static int ccondissect(struct vars *, struct subre *, chr *, chr *);
739 */
740static int			/* regexec return code */
741ccondissect(v, t, begin, end)
742struct vars *v;
743struct subre *t;
744chr *begin;			/* beginning of relevant substring */
745chr *end;			/* end of same */
746{
747	struct dfa *d;
748	struct dfa *d2;
749	chr *mid;
750	int er;
751
752	assert(t->op == '.');
753	assert(t->left != NULL && t->left->cnfa.nstates > 0);
754	assert(t->right != NULL && t->right->cnfa.nstates > 0);
755
756	if (t->left->flags&SHORTER)		/* reverse scan */
757		return crevdissect(v, t, begin, end);
758
759	d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC);
760	if (ISERR())
761		return v->err;
762	d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, DOMALLOC);
763	if (ISERR()) {
764		freedfa(d);
765		return v->err;
766	}
767	MDEBUG(("cconcat %d\n", t->retry));
768
769	/* pick a tentative midpoint */
770	if (v->mem[t->retry] == 0) {
771		mid = longest(v, d, begin, end, (int *)NULL);
772		if (mid == NULL) {
773			freedfa(d);
774			freedfa(d2);
775			return REG_NOMATCH;
776		}
777		MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));
778		v->mem[t->retry] = (mid - begin) + 1;
779	} else {
780		mid = begin + (v->mem[t->retry] - 1);
781		MDEBUG(("working midpoint %ld\n", LOFF(mid)));
782	}
783
784	/* iterate until satisfaction or failure */
785	for (;;) {
786		/* try this midpoint on for size */
787		if (longest(v, d2, mid, end, NULL) == end) {
788			er = cdissect(v, t->left, begin, mid);
789			if (er == REG_OKAY) {
790				er = cdissect(v, t->right, mid, end);
791				if (er == REG_OKAY) {
792					/* satisfaction */
793					MDEBUG(("successful\n"));
794					freedfa(d);
795					freedfa(d2);
796					return REG_OKAY;
797				}
798			}
799			if ((er != REG_OKAY) && (er != REG_NOMATCH)) {
800				freedfa(d);
801				freedfa(d2);
802				return er;
803			}
804		}
805
806		/* that midpoint didn't work, find a new one */
807		if (mid == begin) {
808			/* all possibilities exhausted */
809			MDEBUG(("%d no midpoint\n", t->retry));
810			freedfa(d);
811			freedfa(d2);
812			return REG_NOMATCH;
813		}
814		mid = longest(v, d, begin, mid-1, (int *)NULL);
815		if (mid == NULL) {
816			/* failed to find a new one */
817			MDEBUG(("%d failed midpoint\n", t->retry));
818			freedfa(d);
819			freedfa(d2);
820			return REG_NOMATCH;
821		}
822		MDEBUG(("%d: new midpoint %ld\n", t->retry, LOFF(mid)));
823		v->mem[t->retry] = (mid - begin) + 1;
824		zapmem(v, t->left);
825		zapmem(v, t->right);
826	}
827}
828
829/*
830 - crevdissect - determine backref shortest-first subexpression matches
831 * The retry memory stores the offset of the trial midpoint from begin,
832 * plus 1 so that 0 uniquely means "clean slate".
833 ^ static int crevdissect(struct vars *, struct subre *, chr *, chr *);
834 */
835static int			/* regexec return code */
836crevdissect(v, t, begin, end)
837struct vars *v;
838struct subre *t;
839chr *begin;			/* beginning of relevant substring */
840chr *end;			/* end of same */
841{
842	struct dfa *d;
843	struct dfa *d2;
844	chr *mid;
845	int er;
846
847	assert(t->op == '.');
848	assert(t->left != NULL && t->left->cnfa.nstates > 0);
849	assert(t->right != NULL && t->right->cnfa.nstates > 0);
850	assert(t->left->flags&SHORTER);
851
852	/* concatenation -- need to split the substring between parts */
853	d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC);
854	if (ISERR())
855		return v->err;
856	d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, DOMALLOC);
857	if (ISERR()) {
858		freedfa(d);
859		return v->err;
860	}
861	MDEBUG(("crev %d\n", t->retry));
862
863	/* pick a tentative midpoint */
864	if (v->mem[t->retry] == 0) {
865		mid = shortest(v, d, begin, begin, end, (chr **)NULL, (int *)NULL);
866		if (mid == NULL) {
867			freedfa(d);
868			freedfa(d2);
869			return REG_NOMATCH;
870		}
871		MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));
872		v->mem[t->retry] = (mid - begin) + 1;
873	} else {
874		mid = begin + (v->mem[t->retry] - 1);
875		MDEBUG(("working midpoint %ld\n", LOFF(mid)));
876	}
877
878	/* iterate until satisfaction or failure */
879	for (;;) {
880		/* try this midpoint on for size */
881		if (longest(v, d2, mid, end, NULL) == end) {
882			er = cdissect(v, t->left, begin, mid);
883			if (er == REG_OKAY) {
884				er = cdissect(v, t->right, mid, end);
885				if (er == REG_OKAY) {
886					/* satisfaction */
887					MDEBUG(("successful\n"));
888					freedfa(d);
889					freedfa(d2);
890					return REG_OKAY;
891				}
892			}
893			if (er != REG_OKAY && er != REG_NOMATCH) {
894				freedfa(d);
895				freedfa(d2);
896				return er;
897			}
898		}
899
900		/* that midpoint didn't work, find a new one */
901		if (mid == end) {
902			/* all possibilities exhausted */
903			MDEBUG(("%d no midpoint\n", t->retry));
904			freedfa(d);
905			freedfa(d2);
906			return REG_NOMATCH;
907		}
908		mid = shortest(v, d, begin, mid+1, end, (chr **)NULL, (int *)NULL);
909		if (mid == NULL) {
910			/* failed to find a new one */
911			MDEBUG(("%d failed midpoint\n", t->retry));
912			freedfa(d);
913			freedfa(d2);
914			return REG_NOMATCH;
915		}
916		MDEBUG(("%d: new midpoint %ld\n", t->retry, LOFF(mid)));
917		v->mem[t->retry] = (mid - begin) + 1;
918		zapmem(v, t->left);
919		zapmem(v, t->right);
920	}
921}
922
923/*
924 - cbrdissect - determine backref subexpression matches
925 ^ static int cbrdissect(struct vars *, struct subre *, chr *, chr *);
926 */
927static int			/* regexec return code */
928cbrdissect(v, t, begin, end)
929struct vars *v;
930struct subre *t;
931chr *begin;			/* beginning of relevant substring */
932chr *end;			/* end of same */
933{
934	int i;
935	int n = t->subno;
936	size_t len;
937	chr *paren;
938	chr *p;
939	chr *stop;
940	int min = t->min;
941	int max = t->max;
942
943	assert(t != NULL);
944	assert(t->op == 'b');
945	assert(n >= 0);
946	assert((size_t)n < v->nmatch);
947
948	MDEBUG(("cbackref n%d %d{%d-%d}\n", t->retry, n, min, max));
949
950	if (v->pmatch[n].rm_so == -1)
951		return REG_NOMATCH;
952	paren = v->start + v->pmatch[n].rm_so;
953	len = v->pmatch[n].rm_eo - v->pmatch[n].rm_so;
954
955	/* no room to maneuver -- retries are pointless */
956	if (v->mem[t->retry])
957		return REG_NOMATCH;
958	v->mem[t->retry] = 1;
959
960	/* special-case zero-length string */
961	if (len == 0) {
962		if (begin == end)
963			return REG_OKAY;
964		return REG_NOMATCH;
965	}
966
967	/* and too-short string */
968	assert(end >= begin);
969	if ((size_t)(end - begin) < len)
970		return REG_NOMATCH;
971	stop = end - len;
972
973	/* count occurrences */
974	i = 0;
975	for (p = begin; p <= stop && (i < max || max == INFINITY); p += len) {
976		if ((*v->g->compare)(paren, p, len) != 0)
977				break;
978		i++;
979	}
980	MDEBUG(("cbackref found %d\n", i));
981
982	/* and sort it out */
983	if (p != end)			/* didn't consume all of it */
984		return REG_NOMATCH;
985	if (min <= i && (i <= max || max == INFINITY))
986		return REG_OKAY;
987	return REG_NOMATCH;		/* out of range */
988}
989
990/*
991 - caltdissect - determine alternative subexpression matches (w. complications)
992 ^ static int caltdissect(struct vars *, struct subre *, chr *, chr *);
993 */
994static int			/* regexec return code */
995caltdissect(v, t, begin, end)
996struct vars *v;
997struct subre *t;
998chr *begin;			/* beginning of relevant substring */
999chr *end;			/* end of same */
1000{
1001	struct dfa *d;
1002	int er;
1003#	define	UNTRIED	0	/* not yet tried at all */
1004#	define	TRYING	1	/* top matched, trying submatches */
1005#	define	TRIED	2	/* top didn't match or submatches exhausted */
1006
1007	if (t == NULL)
1008		return REG_NOMATCH;
1009	assert(t->op == '|');
1010	if (v->mem[t->retry] == TRIED)
1011		return caltdissect(v, t->right, begin, end);
1012
1013	MDEBUG(("calt n%d\n", t->retry));
1014	assert(t->left != NULL);
1015
1016	if (v->mem[t->retry] == UNTRIED) {
1017		d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC);
1018		if (ISERR())
1019			return v->err;
1020		if (longest(v, d, begin, end, (int *)NULL) != end) {
1021			freedfa(d);
1022			v->mem[t->retry] = TRIED;
1023			return caltdissect(v, t->right, begin, end);
1024		}
1025		freedfa(d);
1026		MDEBUG(("calt matched\n"));
1027		v->mem[t->retry] = TRYING;
1028	}
1029
1030	er = cdissect(v, t->left, begin, end);
1031	if (er != REG_NOMATCH)
1032		return er;
1033
1034	v->mem[t->retry] = TRIED;
1035	return caltdissect(v, t->right, begin, end);
1036}
1037
1038
1039
1040#include "rege_dfa.c"
1041