1/***********************************************************************
2*                                                                      *
3*               This software is part of the ast package               *
4*          Copyright (c) 1985-2011 AT&T Intellectual Property          *
5*                      and is licensed under the                       *
6*                  Common Public License, Version 1.0                  *
7*                    by AT&T Intellectual Property                     *
8*                                                                      *
9*                A copy of the License is available at                 *
10*            http://www.opensource.org/licenses/cpl1.0.txt             *
11*         (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9)         *
12*                                                                      *
13*              Information and Software Systems Research               *
14*                            AT&T Research                             *
15*                           Florham Park NJ                            *
16*                                                                      *
17*                 Glenn Fowler <gsf@research.att.com>                  *
18*                  David Korn <dgk@research.att.com>                   *
19*                   Phong Vo <kpv@research.att.com>                    *
20*                                                                      *
21***********************************************************************/
22#pragma prototyped
23
24/*
25 * posix regex executor
26 * single sized-record interface
27 */
28
29#include "reglib.h"
30
31#if _AST_REGEX_DEBUG
32
33#define DEBUG_TEST(f,y,n)	((debug&(debug_flag=f))?(y):(n))
34#define DEBUG_CODE(f,y,n)	do if(debug&(f)){y}else{n} while(0)
35#define DEBUG_INIT()		do { char* t; if (!debug) { debug = 0x80000000; if (t = getenv("_AST_regex_exec_debug")) debug |= strtoul(t, NiL, 0); } } while (0)
36
37static unsigned long	debug;
38static unsigned long	debug_flag;
39
40static const char*	rexnames[] =
41{
42	"REX_NULL",
43	"REX_ALT",
44	"REX_ALT_CATCH",
45	"REX_BACK",
46	"REX_BEG",
47	"REX_BEG_STR",
48	"REX_BM",
49	"REX_CAT",
50	"REX_CLASS",
51	"REX_COLL_CLASS",
52	"REX_CONJ",
53	"REX_CONJ_LEFT",
54	"REX_CONJ_RIGHT",
55	"REX_DONE",
56	"REX_DOT",
57	"REX_END",
58	"REX_END_STR",
59	"REX_EXEC",
60	"REX_FIN_STR",
61	"REX_GROUP",
62	"REX_GROUP_CATCH",
63	"REX_GROUP_AHEAD",
64	"REX_GROUP_AHEAD_CATCH",
65	"REX_GROUP_AHEAD_NOT",
66	"REX_GROUP_BEHIND",
67	"REX_GROUP_BEHIND_CATCH",
68	"REX_GROUP_BEHIND_NOT",
69	"REX_GROUP_BEHIND_NOT_CATCH",
70	"REX_GROUP_COND",
71	"REX_GROUP_COND_CATCH",
72	"REX_GROUP_CUT",
73	"REX_GROUP_CUT_CATCH",
74	"REX_KMP",
75	"REX_NEG",
76	"REX_NEG_CATCH",
77	"REX_NEST",
78	"REX_ONECHAR",
79	"REX_REP",
80	"REX_REP_CATCH",
81	"REX_STRING",
82	"REX_TRIE",
83	"REX_WBEG",
84	"REX_WEND",
85	"REX_WORD",
86	"REX_WORD_NOT"
87};
88
89static const char* rexname(Rex_t* rex)
90{
91	if (!rex)
92		return "NIL";
93	if (rex->type >= elementsof(rexnames))
94		return "ERROR";
95	return rexnames[rex->type];
96}
97
98#else
99
100#define DEBUG_INIT()
101#define DEBUG_TEST(f,y,n)	(n)
102#define DEBUG_CODE(f,y,n)	do {n} while(0)
103
104#endif
105
106#define BEG_ALT		1	/* beginning of an alt			*/
107#define BEG_ONE		2	/* beginning of one iteration of a rep	*/
108#define BEG_REP		3	/* beginning of a repetition		*/
109#define BEG_SUB		4	/* beginning of a subexpression		*/
110#define END_ANY		5	/* end of any of above			*/
111
112/*
113 * returns from parse()
114 */
115
116#define NONE		0	/* no parse found			*/
117#define GOOD		1	/* some parse was found			*/
118#define CUT		2	/* no match and no backtrack		*/
119#define BEST		3	/* an unbeatable parse was found	*/
120#define BAD		4	/* error ocurred			*/
121
122/*
123 * REG_SHELL_DOT test
124 */
125
126#define LEADING(e,r,s)	(*(s)==(e)->leading&&((s)==(e)->beg||*((s)-1)==(r)->explicit))
127
128/*
129 * Pos_t is for comparing parses. An entry is made in the
130 * array at the beginning and at the end of each Group_t,
131 * each iteration in a Group_t, and each Binary_t.
132 */
133
134typedef struct
135{
136	unsigned char*	p;		/* where in string		*/
137	size_t		length;		/* length in string		*/
138	short		serial;		/* preorder subpattern number	*/
139	short		be;		/* which end of pair		*/
140} Pos_t;
141
142/* ===== begin library support ===== */
143
144#define vector(t,v,i)	(((i)<(v)->max)?(t*)((v)->vec+(i)*(v)->siz):(t*)vecseek(&(v),i))
145
146static Vector_t*
147vecopen(int inc, int siz)
148{
149	Vector_t*	v;
150	Stk_t*		sp;
151
152	if (inc <= 0)
153		inc = 16;
154	if (!(sp = stkopen(STK_SMALL|STK_NULL)))
155		return 0;
156	if (!(v = (Vector_t*)stkseek(sp, sizeof(Vector_t) + inc * siz)))
157	{
158		stkclose(sp);
159		return 0;
160	}
161	v->stk = sp;
162	v->vec = (char*)v + sizeof(Vector_t);
163	v->max = v->inc = inc;
164	v->siz = siz;
165	v->cur = 0;
166	return v;
167}
168
169static void*
170vecseek(Vector_t** p, int index)
171{
172	Vector_t*	v = *p;
173
174	if (index >= v->max)
175	{
176		while ((v->max += v->inc) <= index);
177		if (!(v = (Vector_t*)stkseek(v->stk, sizeof(Vector_t) + v->max * v->siz)))
178			return 0;
179		*p = v;
180		v->vec = (char*)v + sizeof(Vector_t);
181	}
182	return v->vec + index * v->siz;
183}
184
185static void
186vecclose(Vector_t* v)
187{
188	if (v)
189		stkclose(v->stk);
190}
191
192typedef struct
193{
194	Stk_pos_t	pos;
195	char		data[1];
196} Stk_frame_t;
197
198#define stknew(s,p)	((p)->offset=stktell(s),(p)->base=stkfreeze(s,0))
199#define stkold(s,p)	stkset(s,(p)->base,(p)->offset)
200
201#define stkframe(s)	(*((Stk_frame_t**)stktop(s)-1))
202#define stkdata(s,t)	((t*)stkframe(s)->data)
203#define stkpop(s)	stkold(s,&(stkframe(s)->pos))
204
205static void*
206stkpush(Stk_t* sp, size_t size)
207{
208	Stk_frame_t*	f;
209	Stk_pos_t	p;
210
211	stknew(sp, &p);
212	size = sizeof(Stk_frame_t) + sizeof(size_t) + size - 1;
213	if (!(f = (Stk_frame_t*)stkalloc(sp, sizeof(Stk_frame_t) + sizeof(Stk_frame_t*) + size - 1)))
214		return 0;
215	f->pos = p;
216	stkframe(sp) = f;
217	return f->data;
218}
219
220/* ===== end library support ===== */
221
222/*
223 * Match_frame_t is for saving and restoring match records
224 * around alternate attempts, so that fossils will not be
225 * left in the match array.  These are the only entries in
226 * the match array that are not otherwise guaranteed to
227 * have current data in them when they get used.
228 */
229
230typedef struct
231{
232	size_t			size;
233	regmatch_t*		match;
234	regmatch_t		save[1];
235} Match_frame_t;
236
237#define matchpush(e,x)	((x)->re.group.number?_matchpush(e,x):0)
238#define matchcopy(e,x)	do if ((x)->re.group.number) { Match_frame_t* fp = (void*)stkframe(stkstd)->data; memcpy(fp->match, fp->save, fp->size); } while (0)
239#define matchpop(e,x)	do if ((x)->re.group.number) { Match_frame_t* fp = (void*)stkframe(stkstd)->data; memcpy(fp->match, fp->save, fp->size); stkpop(stkstd); } while (0)
240
241#define pospop(e)	(--(e)->pos->cur)
242
243/*
244 * allocate a frame and push a match onto the stack
245 */
246
247static int
248_matchpush(Env_t* env, Rex_t* rex)
249{
250	Match_frame_t*	f;
251	regmatch_t*	m;
252	regmatch_t*	e;
253	regmatch_t*	s;
254	int		num;
255
256	if (rex->re.group.number <= 0 || (num = rex->re.group.last - rex->re.group.number + 1) <= 0)
257		num = 0;
258	if (!(f = (Match_frame_t*)stkpush(stkstd, sizeof(Match_frame_t) + (num - 1) * sizeof(regmatch_t))))
259	{
260		env->error = REG_ESPACE;
261		return 1;
262	}
263	f->size = num * sizeof(regmatch_t);
264	f->match = m = env->match + rex->re.group.number;
265	e = m + num;
266	s = f->save;
267	while (m < e)
268	{
269		*s++ = *m;
270		*m++ = state.nomatch;
271	}
272	return 0;
273}
274
275/*
276 * allocate a frame and push a pos onto the stack
277 */
278
279static int
280pospush(Env_t* env, Rex_t* rex, unsigned char* p, int be)
281{
282	Pos_t*	pos;
283
284	if (!(pos = vector(Pos_t, env->pos, env->pos->cur)))
285	{
286		env->error = REG_ESPACE;
287		return 1;
288	}
289	pos->serial = rex->serial;
290	pos->p = p;
291	pos->be = be;
292	env->pos->cur++;
293	return 0;
294}
295
296/*
297 * two matches are known to have the same length
298 * os is start of old pos array, ns is start of new,
299 * oend and nend are end+1 pointers to ends of arrays.
300 * oe and ne are ends (not end+1) of subarrays.
301 * returns 1 if new is better, -1 if old, else 0.
302 */
303
304static int
305better(Env_t* env, Pos_t* os, Pos_t* ns, Pos_t* oend, Pos_t* nend, int level)
306{
307	Pos_t*	oe;
308	Pos_t*	ne;
309	int	k;
310	int	n;
311
312	if (env->error)
313		return -1;
314	for (;;)
315	{
316		DEBUG_CODE(0x0080,{sfprintf(sfstdout, "   %-*.*sold ", (level + 3) * 4, (level + 3) * 4, "");for (oe = os; oe < oend; oe++)sfprintf(sfstdout, "<%d,%d,%d>", oe->p - env->beg, oe->serial, oe->be);sfprintf(sfstdout, "\n   %-*.*snew ", (level + 3) * 4, (level + 3) * 4, "");for (oe = ns; oe < nend; oe++)sfprintf(sfstdout, "<%d,%d,%d>", oe->p - env->beg, oe->serial, oe->be);sfprintf(sfstdout, "\n");},{0;});
317		if (ns >= nend)
318			return DEBUG_TEST(0x8000,(os < oend),(0));
319		if (os >= oend)
320			return DEBUG_TEST(0x8000,(-1),(1));
321		n = os->serial;
322		if (ns->serial > n)
323			return -1;
324		if (n > ns->serial)
325		{
326			env->error = REG_PANIC;
327			return -1;
328		}
329		if (ns->p > os->p)
330			return 1;
331		if (os->p > ns->p)
332			return -1;
333		oe = os;
334		k = 0;
335		for (;;)
336			if ((++oe)->serial == n)
337			{
338				if (oe->be != END_ANY)
339					k++;
340				else if (k-- <= 0)
341					break;
342			}
343		ne = ns;
344		k = 0;
345		for (;;)
346			if ((++ne)->serial == n)
347			{
348				if (ne->be != END_ANY)
349					k++;
350				else if (k-- <= 0)
351					break;
352			}
353		if (ne->p > oe->p)
354			return 1;
355		if (oe->p > ne->p)
356			return -1;
357		if (k = better(env, os + 1, ns + 1, oe, ne, level + 1))
358			return k;
359		os = oe + 1;
360		ns = ne + 1;
361	}
362}
363
364#if _AST_REGEX_DEBUG
365
366static void
367showmatch(regmatch_t* p)
368{
369	sfputc(sfstdout, '(');
370	if (p->rm_so < 0)
371		sfputc(sfstdout, '?');
372	else
373		sfprintf(sfstdout, "%d", p->rm_so);
374	sfputc(sfstdout, ',');
375	if (p->rm_eo < 0)
376		sfputc(sfstdout, '?');
377	else
378		sfprintf(sfstdout, "%d", p->rm_eo);
379	sfputc(sfstdout, ')');
380}
381
382static int
383_better(Env_t* env, Pos_t* os, Pos_t* ns, Pos_t* oend, Pos_t* nend, int level)
384{
385	int	i;
386
387	DEBUG_CODE(0x0040,{sfprintf(sfstdout, "AHA better old ");for (i = 0; i <= env->nsub; i++)showmatch(&env->best[i]);sfprintf(sfstdout, "\n           new ");for (i = 0; i <= env->nsub; i++)showmatch(&env->match[i]);sfprintf(sfstdout, "\n");},{0;});
388	i = better(env, os, ns, oend, nend, 0);
389	DEBUG_TEST(0x0040,(sfprintf(sfstdout, "           %s\n", i <= 0 ? "OLD" : "NEW")),(0));
390	return i;
391}
392
393#define better	_better
394
395#endif
396
397#define follow(e,r,c,s)	((r)->next?parse(e,(r)->next,c,s):(c)?parse(e,c,0,s):BEST)
398
399static int		parse(Env_t*, Rex_t*, Rex_t*, unsigned char*);
400
401static int
402parserep(Env_t* env, Rex_t* rex, Rex_t* cont, unsigned char* s, int n)
403{
404	int	i;
405	int	r = NONE;
406	Rex_t	catcher;
407
408	DEBUG_TEST(0x0010,(sfprintf(sfstdout, "AHA#%04d 0x%04x parserep %s %d %d %d %d `%-.*s'\n", __LINE__, debug_flag, rexname(rex->re.group.expr.rex), rex->re.group.number, rex->lo, n, rex->hi, env->end - s, s)),(0));
409	if ((rex->flags & REG_MINIMAL) && n >= rex->lo && n < rex->hi)
410	{
411		if (env->stack && pospush(env, rex, s, END_ANY))
412			return BAD;
413		i = follow(env, rex, cont, s);
414		if (env->stack)
415			pospop(env);
416		switch (i)
417		{
418		case BAD:
419			return BAD;
420		case CUT:
421			return CUT;
422		case BEST:
423		case GOOD:
424			return BEST;
425		}
426	}
427	if (n < rex->hi)
428	{
429		catcher.type = REX_REP_CATCH;
430		catcher.serial = rex->serial;
431		catcher.re.rep_catch.ref = rex;
432		catcher.re.rep_catch.cont = cont;
433		catcher.re.rep_catch.beg = s;
434		catcher.re.rep_catch.n = n + 1;
435		catcher.next = rex->next;
436		if (n == 0)
437			rex->re.rep_catch.beg = s;
438		if (env->stack)
439		{
440			if (matchpush(env, rex))
441				return BAD;
442			if (pospush(env, rex, s, BEG_ONE))
443				return BAD;
444DEBUG_TEST(0x0004,(sfprintf(sfstdout,"AHA#%04d 0x%04x PUSH %d   (%d,%d)(%d,%d)(%d,%d) (%d,%d)(%d,%d)(%d,%d)\n", __LINE__, debug_flag, rex->re.group.number, env->best[0].rm_so, env->best[0].rm_eo, env->best[1].rm_so, env->best[1].rm_eo, env->best[2].rm_so, env->best[2].rm_eo, env->match[0].rm_so, env->match[0].rm_eo, env->match[1].rm_so, env->match[1].rm_eo, env->match[2].rm_so, env->match[2].rm_eo)),(0));
445		}
446		r = parse(env, rex->re.group.expr.rex, &catcher, s);
447		DEBUG_TEST(0x0010,(sfprintf(sfstdout, "AHA#%04d 0x%04x parserep parse %d %d `%-.*s'\n", __LINE__, debug_flag, rex->re.group.number, r, env->end - s, s)),(0));
448		if (env->stack)
449		{
450			pospop(env);
451			matchpop(env, rex);
452DEBUG_TEST(0x0004,(sfprintf(sfstdout,"AHA#%04d 0x%04x POP  %d %d (%d,%d)(%d,%d)(%d,%d) (%d,%d)(%d,%d)(%d,%d)\n", __LINE__, debug_flag, rex->re.group.number, r, env->best[0].rm_so, env->best[0].rm_eo, env->best[1].rm_so, env->best[1].rm_eo, env->best[2].rm_so, env->best[2].rm_eo, env->match[0].rm_so, env->match[0].rm_eo, env->match[1].rm_so, env->match[1].rm_eo, env->match[2].rm_so, env->match[2].rm_eo)),(0));
453		}
454		switch (r)
455		{
456		case BAD:
457			return BAD;
458		case BEST:
459			return BEST;
460		case CUT:
461			r = NONE;
462			break;
463		case GOOD:
464			if (rex->flags & REG_MINIMAL)
465				return BEST;
466			r = GOOD;
467			break;
468		}
469	}
470	if (n < rex->lo)
471		return r;
472	if (!(rex->flags & REG_MINIMAL) || n >= rex->hi)
473	{
474		if (env->stack && pospush(env, rex, s, END_ANY))
475			return BAD;
476		i = follow(env, rex, cont, s);
477		if (env->stack)
478			pospop(env);
479		switch (i)
480		{
481		case BAD:
482			r = BAD;
483			break;
484		case CUT:
485			r = CUT;
486			break;
487		case BEST:
488			r = BEST;
489			break;
490		case GOOD:
491			r = (rex->flags & REG_MINIMAL) ? BEST : GOOD;
492			break;
493		}
494	}
495	return r;
496}
497
498static int
499parsetrie(Env_t* env, Trie_node_t* x, Rex_t* rex, Rex_t* cont, unsigned char* s)
500{
501	unsigned char*	p;
502	int		r;
503
504	if (p = rex->map)
505	{
506		for (;;)
507		{
508			if (s >= env->end)
509				return NONE;
510			while (x->c != p[*s])
511				if (!(x = x->sib))
512					return NONE;
513			if (x->end)
514				break;
515			x = x->son;
516			s++;
517		}
518	}
519	else
520	{
521		for (;;)
522		{
523			if (s >= env->end)
524				return NONE;
525			while (x->c != *s)
526				if (!(x = x->sib))
527					return NONE;
528			if (x->end)
529				break;
530			x = x->son;
531			s++;
532		}
533	}
534	s++;
535	if (rex->flags & REG_MINIMAL)
536		switch (follow(env, rex, cont, s))
537		{
538		case BAD:
539			return BAD;
540		case CUT:
541			return CUT;
542		case BEST:
543		case GOOD:
544			return BEST;
545		}
546	if (x->son)
547		switch (parsetrie(env, x->son, rex, cont, s))
548		{
549		case BAD:
550			return BAD;
551		case CUT:
552			return CUT;
553		case BEST:
554			return BEST;
555		case GOOD:
556			if (rex->flags & REG_MINIMAL)
557				return BEST;
558			r = GOOD;
559			break;
560		default:
561			r = NONE;
562			break;
563		}
564	else
565		r = NONE;
566	if (!(rex->flags & REG_MINIMAL))
567		switch (follow(env, rex, cont, s))
568		{
569		case BAD:
570			return BAD;
571		case CUT:
572			return CUT;
573		case BEST:
574			return BEST;
575		case GOOD:
576			return GOOD;
577	}
578	return r;
579}
580
581static int
582collelt(register Celt_t* ce, char* key, int c, int x)
583{
584	Ckey_t	elt;
585
586	mbxfrm(elt, key, COLL_KEY_MAX);
587	for (;; ce++)
588	{
589		switch (ce->typ)
590		{
591		case COLL_call:
592			if (!x && (*ce->fun)(c))
593				return 1;
594			continue;
595		case COLL_char:
596			if (!strcmp((char*)ce->beg, (char*)elt))
597				return 1;
598			continue;
599		case COLL_range:
600			if (strcmp((char*)ce->beg, (char*)elt) <= ce->min && strcmp((char*)elt, (char*)ce->end) <= ce->max)
601				return 1;
602			continue;
603		case COLL_range_lc:
604			if (strcmp((char*)ce->beg, (char*)elt) <= ce->min && strcmp((char*)elt, (char*)ce->end) <= ce->max && (iswlower(c) || !iswupper(c)))
605				return 1;
606			continue;
607		case COLL_range_uc:
608			if (strcmp((char*)ce->beg, (char*)elt) <= ce->min && strcmp((char*)elt, (char*)ce->end) <= ce->max && (iswupper(c) || !iswlower(c)))
609				return 1;
610			continue;
611		}
612		break;
613	}
614	return 0;
615}
616
617static int
618collic(register Celt_t* ce, char* key, register char* nxt, int c, int x)
619{
620	if (!x)
621	{
622		if (collelt(ce, key, c, x))
623			return 1;
624		if (iswlower(c))
625			c = towupper(c);
626		else if (iswupper(c))
627			c = towlower(c);
628		else
629			return 0;
630		x = mbconv(key, c);
631		key[x] = 0;
632		return collelt(ce, key, c, 0);
633	}
634	while (*nxt)
635	{
636		if (collic(ce, key, nxt + 1, c, x))
637			return 1;
638		if (islower(*nxt))
639			*nxt = toupper(*nxt);
640		else if (isupper(*nxt))
641			*nxt = tolower(*nxt);
642		else
643			return 0;
644		nxt++;
645	}
646	return collelt(ce, key, c, x);
647}
648
649static int
650collmatch(Rex_t* rex, unsigned char* s, unsigned char* e, unsigned char** p)
651{
652	unsigned char*		t;
653	wchar_t			c;
654	int			w;
655	int			r;
656	int			x;
657	int			ic;
658	Ckey_t			key;
659	Ckey_t			elt;
660
661	ic = (rex->flags & REG_ICASE);
662	if ((w = MBSIZE(s)) > 1)
663	{
664		memcpy((char*)key, (char*)s, w);
665		key[w] = 0;
666		t = s;
667		c = mbchar(t);
668#if !_lib_wctype
669		c &= 0xff;
670#endif
671		x = 0;
672	}
673	else
674	{
675		c = s[0];
676		if (ic && isupper(c))
677			c = tolower(c);
678		key[0] = c;
679		key[1] = 0;
680		if (isalpha(c))
681		{
682			x = e - s;
683			if (x > COLL_KEY_MAX)
684				x = COLL_KEY_MAX;
685			while (w < x)
686			{
687				c = s[w];
688				if (!isalpha(c))
689					break;
690				r = mbxfrm(elt, key, COLL_KEY_MAX);
691				if (ic && isupper(c))
692					c = tolower(c);
693				key[w] = c;
694				key[w + 1] = 0;
695				if (mbxfrm(elt, key, COLL_KEY_MAX) != r)
696					break;
697				w++;
698			}
699		}
700		key[w] = 0;
701		c = key[0];
702		x = w - 1;
703	}
704	r = 1;
705	for (;;)
706	{
707		if (ic ? collic(rex->re.collate.elements, (char*)key, (char*)key, c, x) : collelt(rex->re.collate.elements, (char*)key, c, x))
708			break;
709		if (!x)
710		{
711			r = 0;
712			break;
713		}
714		w = x--;
715		key[w] = 0;
716	}
717	*p = s + w;
718	return rex->re.collate.invert ? !r : r;
719}
720
721static unsigned char*
722nestmatch(register unsigned char* s, register unsigned char* e, const unsigned short* type, register int co)
723{
724	register int	c;
725	register int	cc;
726	unsigned int	n;
727	int		oc;
728
729	if (type[co] & (REX_NEST_literal|REX_NEST_quote))
730	{
731		n = (type[co] & REX_NEST_literal) ? REX_NEST_terminator : (REX_NEST_escape|REX_NEST_terminator);
732		while (s < e)
733		{
734			c = *s++;
735			if (c == co)
736				return s;
737			else if (type[c] & n)
738			{
739				if (s >= e || (type[c] & REX_NEST_terminator))
740					break;
741				s++;
742			}
743		}
744	}
745	else
746	{
747		cc = type[co] >> REX_NEST_SHIFT;
748		oc = type[co] & (REX_NEST_open|REX_NEST_close);
749		n = 1;
750		while (s < e)
751		{
752			c = *s++;
753			switch (type[c] & (REX_NEST_escape|REX_NEST_open|REX_NEST_close|REX_NEST_delimiter|REX_NEST_separator|REX_NEST_terminator))
754			{
755			case REX_NEST_delimiter:
756			case REX_NEST_terminator:
757				return oc ? 0 : s;
758			case REX_NEST_separator:
759				if (!oc)
760					return s;
761				break;
762			case REX_NEST_escape:
763				if (s >= e)
764					return 0;
765				s++;
766				break;
767			case REX_NEST_open|REX_NEST_close:
768				if (c == cc)
769				{
770					if (!--n)
771						return s;
772				}
773				/*FALLTHROUGH*/
774			case REX_NEST_open:
775				if (c == co)
776				{
777					if (!++n)
778						return 0;
779				}
780				else if (!(s = nestmatch(s, e, type, c)))
781					return 0;
782				break;
783			case REX_NEST_close:
784				if (c != cc)
785					return 0;
786				if (!--n)
787					return s;
788				break;
789			}
790		}
791		return (oc || !(type[UCHAR_MAX+1] & REX_NEST_terminator)) ? 0 : s;
792	}
793	return 0;
794}
795
796static int
797parse(Env_t* env, Rex_t* rex, Rex_t* cont, unsigned char* s)
798{
799	int		c;
800	int		d;
801	int		i;
802	int		m;
803	int		n;
804	int		r;
805	int*		f;
806	unsigned char*	p;
807	unsigned char*	t;
808	unsigned char*	b;
809	unsigned char*	e;
810	char*		u;
811	regmatch_t*	o;
812	Trie_node_t*	x;
813	Rex_t*		q;
814	Rex_t		catcher;
815	Rex_t		next;
816
817	for (;;)
818	{
819DEBUG_TEST(0x0008,(sfprintf(sfstdout, "AHA#%04d 0x%04x parse %s `%-.*s'\n", __LINE__, debug_flag, rexname(rex), env->end - s, s)),(0));
820		switch (rex->type)
821		{
822		case REX_ALT:
823			if (env->stack)
824			{
825				if (matchpush(env, rex))
826					return BAD;
827				if (pospush(env, rex, s, BEG_ALT))
828					return BAD;
829				catcher.type = REX_ALT_CATCH;
830				catcher.serial = rex->serial;
831				catcher.re.alt_catch.cont = cont;
832				catcher.next = rex->next;
833				r = parse(env, rex->re.group.expr.binary.left, &catcher, s);
834				if (r < BEST || (rex->flags & REG_MINIMAL))
835				{
836					matchcopy(env, rex);
837					((Pos_t*)env->pos->vec + env->pos->cur - 1)->serial = catcher.serial = rex->re.group.expr.binary.serial;
838					n = parse(env, rex->re.group.expr.binary.right, &catcher, s);
839					if (n != NONE)
840						r = n;
841				}
842				pospop(env);
843				matchpop(env, rex);
844			}
845			else
846			{
847				if ((r = parse(env, rex->re.group.expr.binary.left, cont, s)) == NONE)
848					r = parse(env, rex->re.group.expr.binary.right, cont, s);
849				if (r == GOOD)
850					r = BEST;
851			}
852			return r;
853		case REX_ALT_CATCH:
854			if (pospush(env, rex, s, END_ANY))
855				return BAD;
856			r = follow(env, rex, rex->re.alt_catch.cont, s);
857			pospop(env);
858			return r;
859		case REX_BACK:
860			o = &env->match[rex->lo];
861			if (o->rm_so < 0)
862				return NONE;
863			i = o->rm_eo - o->rm_so;
864			e = s + i;
865			if (e > env->end)
866				return NONE;
867			t = env->beg + o->rm_so;
868			if (!(p = rex->map))
869			{
870				while (s < e)
871					if (*s++ != *t++)
872						return NONE;
873			}
874			else if (!mbwide())
875			{
876				while (s < e)
877					if (p[*s++] != p[*t++])
878						return NONE;
879			}
880			else
881			{
882				while (s < e)
883				{
884					c = mbchar(s);
885					d = mbchar(t);
886					if (towupper(c) != towupper(d))
887						return NONE;
888				}
889			}
890			break;
891		case REX_BEG:
892			if ((!(rex->flags & REG_NEWLINE) || s <= env->beg || *(s - 1) != '\n') && ((env->flags & REG_NOTBOL) || s != env->beg))
893				return NONE;
894			break;
895		case REX_CLASS:
896			if (LEADING(env, rex, s))
897				return NONE;
898			n = rex->hi;
899			if (n > env->end - s)
900				n = env->end - s;
901			m = rex->lo;
902			if (m > n)
903				return NONE;
904			r = NONE;
905			if (!(rex->flags & REG_MINIMAL))
906			{
907				for (i = 0; i < n; i++)
908					if (!settst(rex->re.charclass, s[i]))
909					{
910						n = i;
911						break;
912					}
913				for (s += n; n-- >= m; s--)
914					switch (follow(env, rex, cont, s))
915					{
916					case BAD:
917						return BAD;
918					case CUT:
919						return CUT;
920					case BEST:
921						return BEST;
922					case GOOD:
923						r = GOOD;
924						break;
925					}
926			}
927			else
928			{
929				for (e = s + m; s < e; s++)
930					if (!settst(rex->re.charclass, *s))
931						return r;
932				e += n - m;
933				for (;;)
934				{
935					switch (follow(env, rex, cont, s))
936					{
937					case BAD:
938						return BAD;
939					case CUT:
940						return CUT;
941					case BEST:
942					case GOOD:
943						return BEST;
944					}
945					if (s >= e || !settst(rex->re.charclass, *s))
946						break;
947					s++;
948				}
949			}
950			return r;
951		case REX_COLL_CLASS:
952			if (LEADING(env, rex, s))
953				return NONE;
954			n = rex->hi;
955			if (n > env->end - s)
956				n = env->end - s;
957			m = rex->lo;
958			if (m > n)
959				return NONE;
960			r = NONE;
961			e = env->end;
962			if (!(rex->flags & REG_MINIMAL))
963			{
964				if (!(b = (unsigned char*)stkpush(stkstd, n)))
965				{
966					env->error = REG_ESPACE;
967					return BAD;
968				}
969				for (i = 0; s < e && i < n && collmatch(rex, s, e, &t); i++)
970				{
971					b[i] = t - s;
972					s = t;
973				}
974				for (; i-- >= rex->lo; s -= b[i])
975					switch (follow(env, rex, cont, s))
976					{
977					case BAD:
978						stkpop(stkstd);
979						return BAD;
980					case CUT:
981						stkpop(stkstd);
982						return CUT;
983					case BEST:
984						stkpop(stkstd);
985						return BEST;
986					case GOOD:
987						r = GOOD;
988						break;
989					}
990				stkpop(stkstd);
991			}
992			else
993			{
994				for (i = 0; i < m && s < e; i++, s = t)
995					if (!collmatch(rex, s, e, &t))
996						return r;
997				while (i++ <= n)
998				{
999					switch (follow(env, rex, cont, s))
1000					{
1001					case BAD:
1002						return BAD;
1003					case CUT:
1004						return CUT;
1005					case BEST:
1006					case GOOD:
1007						return BEST;
1008					}
1009					if (s >= e || !collmatch(rex, s, e, &s))
1010						break;
1011				}
1012			}
1013			return r;
1014		case REX_CONJ:
1015			next.type = REX_CONJ_RIGHT;
1016			next.re.conj_right.cont = cont;
1017			next.next = rex->next;
1018			catcher.type = REX_CONJ_LEFT;
1019			catcher.re.conj_left.right = rex->re.group.expr.binary.right;
1020			catcher.re.conj_left.cont = &next;
1021			catcher.re.conj_left.beg = s;
1022			catcher.next = 0;
1023			return parse(env, rex->re.group.expr.binary.left, &catcher, s);
1024		case REX_CONJ_LEFT:
1025			rex->re.conj_left.cont->re.conj_right.end = s;
1026			cont = rex->re.conj_left.cont;
1027			s = rex->re.conj_left.beg;
1028			rex = rex->re.conj_left.right;
1029			continue;
1030		case REX_CONJ_RIGHT:
1031			if (rex->re.conj_right.end != s)
1032				return NONE;
1033			cont = rex->re.conj_right.cont;
1034			break;
1035		case REX_DONE:
1036			if (!env->stack)
1037				return BEST;
1038			n = s - env->beg;
1039			r = env->nsub;
1040			DEBUG_TEST(0x0100,(sfprintf(sfstdout,"AHA#%04d 0x%04x %s (%d,%d)(%d,%d)(%d,%d)(%d,%d) (%d,%d)(%d,%d)\n", __LINE__, debug_flag, rexname(rex), env->best[0].rm_so, env->best[0].rm_eo, env->best[1].rm_so, env->best[1].rm_eo, env->best[2].rm_so, env->best[2].rm_eo, env->best[3].rm_so, env->best[3].rm_eo, env->match[0].rm_so, env->match[0].rm_eo, env->match[1].rm_so, env->match[1].rm_eo)),(0));
1041			if ((i = env->best[0].rm_eo) >= 0)
1042			{
1043				if (rex->flags & REG_MINIMAL)
1044				{
1045					if (n > i)
1046						return GOOD;
1047				}
1048				else
1049				{
1050					if (n < i)
1051						return GOOD;
1052				}
1053				if (n == i && better(env,
1054						     (Pos_t*)env->bestpos->vec,
1055				   		     (Pos_t*)env->pos->vec,
1056				   		     (Pos_t*)env->bestpos->vec+env->bestpos->cur,
1057				   		     (Pos_t*)env->pos->vec+env->pos->cur,
1058						     0) <= 0)
1059					return GOOD;
1060			}
1061			env->best[0].rm_eo = n;
1062			memcpy(&env->best[1], &env->match[1], r * sizeof(regmatch_t));
1063			n = env->pos->cur;
1064			if (!vector(Pos_t, env->bestpos, n))
1065			{
1066				env->error = REG_ESPACE;
1067				return BAD;
1068			}
1069			env->bestpos->cur = n;
1070			memcpy(env->bestpos->vec, env->pos->vec, n * sizeof(Pos_t));
1071			DEBUG_TEST(0x0100,(sfprintf(sfstdout,"AHA#%04d 0x%04x %s (%d,%d)(%d,%d)(%d,%d)(%d,%d) (%d,%d)(%d,%d)\n", __LINE__, debug_flag, rexname(rex), env->best[0].rm_so, env->best[0].rm_eo, env->best[1].rm_so, env->best[1].rm_eo, env->best[2].rm_so, env->best[2].rm_eo, env->best[3].rm_so, env->best[3].rm_eo, env->match[0].rm_so, env->match[0].rm_eo, env->match[1].rm_so, env->match[1].rm_eo)),(0));
1072			return GOOD;
1073		case REX_DOT:
1074			if (LEADING(env, rex, s))
1075				return NONE;
1076			n = rex->hi;
1077			if (n > env->end - s)
1078				n = env->end - s;
1079			m = rex->lo;
1080			if (m > n)
1081				return NONE;
1082			if ((c = rex->explicit) >= 0 && !mbwide())
1083				for (i = 0; i < n; i++)
1084					if (s[i] == c)
1085					{
1086						n = i;
1087						break;
1088					}
1089			r = NONE;
1090			if (!(rex->flags & REG_MINIMAL))
1091			{
1092				if (!mbwide())
1093				{
1094					for (s += n; n-- >= m; s--)
1095						switch (follow(env, rex, cont, s))
1096						{
1097						case BAD:
1098							return BAD;
1099						case CUT:
1100							return CUT;
1101						case BEST:
1102							return BEST;
1103						case GOOD:
1104							r = GOOD;
1105							break;
1106						}
1107				}
1108				else
1109				{
1110					if (!(b = (unsigned char*)stkpush(stkstd, n)))
1111					{
1112						env->error = REG_ESPACE;
1113						return BAD;
1114					}
1115					e = env->end;
1116					for (i = 0; s < e && i < n && *s != c; i++)
1117						s += b[i] = MBSIZE(s);
1118					for (; i-- >= m; s -= b[i])
1119						switch (follow(env, rex, cont, s))
1120						{
1121						case BAD:
1122							stkpop(stkstd);
1123							return BAD;
1124						case CUT:
1125							stkpop(stkstd);
1126							return CUT;
1127						case BEST:
1128							stkpop(stkstd);
1129							return BEST;
1130						case GOOD:
1131							r = GOOD;
1132							break;
1133						}
1134					stkpop(stkstd);
1135				}
1136			}
1137			else
1138			{
1139				if (!mbwide())
1140				{
1141					e = s + n;
1142					for (s += m; s <= e; s++)
1143						switch (follow(env, rex, cont, s))
1144						{
1145						case BAD:
1146							return BAD;
1147						case CUT:
1148							return CUT;
1149						case BEST:
1150						case GOOD:
1151							return BEST;
1152						}
1153				}
1154				else
1155				{
1156					e = env->end;
1157					for (i = 0; s < e && i < m && *s != c; i++)
1158						s += MBSIZE(s);
1159					if (i >= m)
1160						for (; s <= e && i <= n; s += MBSIZE(s), i++)
1161							switch (follow(env, rex, cont, s))
1162							{
1163							case BAD:
1164								return BAD;
1165							case CUT:
1166								return CUT;
1167							case BEST:
1168							case GOOD:
1169								return BEST;
1170							}
1171				}
1172			}
1173			return r;
1174		case REX_END:
1175			if ((!(rex->flags & REG_NEWLINE) || *s != '\n') && ((env->flags & REG_NOTEOL) || s < env->end))
1176				return NONE;
1177			break;
1178		case REX_GROUP:
1179DEBUG_TEST(0x0200,(sfprintf(sfstdout,"AHA#%04d 0x%04x parse %s `%-.*s'\n", __LINE__, debug_flag, rexname(rex), env->end - s, s)),(0));
1180			if (env->stack)
1181			{
1182				if (rex->re.group.number)
1183					env->match[rex->re.group.number].rm_so = s - env->beg;
1184				if (pospush(env, rex, s, BEG_SUB))
1185					return BAD;
1186				catcher.re.group_catch.eo = rex->re.group.number ? &env->match[rex->re.group.number].rm_eo : (regoff_t*)0;
1187			}
1188			catcher.type = REX_GROUP_CATCH;
1189			catcher.serial = rex->serial;
1190			catcher.re.group_catch.cont = cont;
1191			catcher.next = rex->next;
1192			r = parse(env, rex->re.group.expr.rex, &catcher, s);
1193			if (env->stack)
1194			{
1195				pospop(env);
1196				if (rex->re.group.number)
1197					env->match[rex->re.group.number].rm_so = -1;
1198			}
1199			return r;
1200		case REX_GROUP_CATCH:
1201DEBUG_TEST(0x0200,(sfprintf(sfstdout,"AHA#%04d 0x%04x parse %s=>%s `%-.*s'\n", __LINE__, debug_flag, rexname(rex), rexname(rex->re.group_catch.cont), env->end - s, s)),(0));
1202			if (env->stack)
1203			{
1204				if (rex->re.group_catch.eo)
1205					*rex->re.group_catch.eo = s - env->beg;
1206				if (pospush(env, rex, s, END_ANY))
1207					return BAD;
1208			}
1209			r = follow(env, rex, rex->re.group_catch.cont, s);
1210			if (env->stack)
1211			{
1212				pospop(env);
1213				if (rex->re.group_catch.eo)
1214					*rex->re.group_catch.eo = -1;
1215			}
1216			return r;
1217		case REX_GROUP_AHEAD:
1218			catcher.type = REX_GROUP_AHEAD_CATCH;
1219			catcher.flags = rex->flags;
1220			catcher.serial = rex->serial;
1221			catcher.re.rep_catch.beg = s;
1222			catcher.re.rep_catch.cont = cont;
1223			catcher.next = rex->next;
1224			return parse(env, rex->re.group.expr.rex, &catcher, s);
1225		case REX_GROUP_AHEAD_CATCH:
1226			return follow(env, rex, rex->re.rep_catch.cont, rex->re.rep_catch.beg);
1227		case REX_GROUP_AHEAD_NOT:
1228			r = parse(env, rex->re.group.expr.rex, NiL, s);
1229			if (r == NONE)
1230				r = follow(env, rex, cont, s);
1231			else if (r != BAD)
1232				r = NONE;
1233			return r;
1234		case REX_GROUP_BEHIND:
1235			if ((s - env->beg) < rex->re.group.size)
1236				return NONE;
1237			catcher.type = REX_GROUP_BEHIND_CATCH;
1238			catcher.flags = rex->flags;
1239			catcher.serial = rex->serial;
1240			catcher.re.behind_catch.beg = s;
1241			catcher.re.behind_catch.end = e = env->end;
1242			catcher.re.behind_catch.cont = cont;
1243			catcher.next = rex->next;
1244			for (t = s - rex->re.group.size; t >= env->beg; t--)
1245			{
1246				env->end = s;
1247				r = parse(env, rex->re.group.expr.rex, &catcher, t);
1248				env->end = e;
1249				if (r != NONE)
1250					return r;
1251			}
1252			return NONE;
1253		case REX_GROUP_BEHIND_CATCH:
1254			if (s != rex->re.behind_catch.beg)
1255				return NONE;
1256			env->end = rex->re.behind_catch.end;
1257			return follow(env, rex, rex->re.behind_catch.cont, rex->re.behind_catch.beg);
1258		case REX_GROUP_BEHIND_NOT:
1259			if ((s - env->beg) < rex->re.group.size)
1260				r = NONE;
1261			else
1262			{
1263				catcher.type = REX_GROUP_BEHIND_NOT_CATCH;
1264				catcher.re.neg_catch.beg = s;
1265				catcher.next = 0;
1266				e = env->end;
1267				env->end = s;
1268				for (t = s - rex->re.group.size; t >= env->beg; t--)
1269				{
1270					r = parse(env, rex->re.group.expr.rex, &catcher, t);
1271					if (r != NONE)
1272						break;
1273				}
1274				env->end = e;
1275			}
1276			if (r == NONE)
1277				r = follow(env, rex, cont, s);
1278			else if (r != BAD)
1279				r = NONE;
1280			return r;
1281		case REX_GROUP_BEHIND_NOT_CATCH:
1282			return s == rex->re.neg_catch.beg ? GOOD : NONE;
1283		case REX_GROUP_COND:
1284			if (q = rex->re.group.expr.binary.right)
1285			{
1286				catcher.re.cond_catch.next[0] = q->re.group.expr.binary.right;
1287				catcher.re.cond_catch.next[1] = q->re.group.expr.binary.left;
1288			}
1289			else
1290				catcher.re.cond_catch.next[0] = catcher.re.cond_catch.next[1] = 0;
1291			if (q = rex->re.group.expr.binary.left)
1292			{
1293				catcher.type = REX_GROUP_COND_CATCH;
1294				catcher.flags = rex->flags;
1295				catcher.serial = rex->serial;
1296				catcher.re.cond_catch.yes = 0;
1297				catcher.re.cond_catch.beg = s;
1298				catcher.re.cond_catch.cont = cont;
1299				catcher.next = rex->next;
1300				r = parse(env, q, &catcher, s);
1301				if (r == BAD || catcher.re.cond_catch.yes)
1302					return r;
1303			}
1304			else if (!rex->re.group.size || rex->re.group.size > 0 && env->match[rex->re.group.size].rm_so >= 0)
1305				r = GOOD;
1306			else
1307				r = NONE;
1308			if (q = catcher.re.cond_catch.next[r != NONE])
1309			{
1310				catcher.type = REX_CAT;
1311				catcher.flags = q->flags;
1312				catcher.serial = q->serial;
1313				catcher.re.group_catch.cont = cont;
1314				catcher.next = rex->next;
1315				return parse(env, q, &catcher, s);
1316			}
1317			return follow(env, rex, cont, s);
1318		case REX_GROUP_COND_CATCH:
1319			rex->re.cond_catch.yes = 1;
1320			catcher.type = REX_CAT;
1321			catcher.flags = rex->flags;
1322			catcher.serial = rex->serial;
1323			catcher.re.group_catch.cont = rex->re.cond_catch.cont;
1324			catcher.next = rex->next;
1325			return parse(env, rex->re.cond_catch.next[1], &catcher, rex->re.cond_catch.beg);
1326		case REX_CAT:
1327			return follow(env, rex, rex->re.group_catch.cont, s);
1328		case REX_GROUP_CUT:
1329			catcher.type = REX_GROUP_CUT_CATCH;
1330			catcher.flags = rex->flags;
1331			catcher.serial = rex->serial;
1332			catcher.re.group_catch.cont = cont;
1333			catcher.next = rex->next;
1334			return parse(env, rex->re.group.expr.rex, &catcher, s);
1335		case REX_GROUP_CUT_CATCH:
1336			switch (r = follow(env, rex, rex->re.group_catch.cont, s))
1337			{
1338			case GOOD:
1339				r = BEST;
1340				break;
1341			case NONE:
1342				r = CUT;
1343				break;
1344			}
1345			return r;
1346		case REX_KMP:
1347			f = rex->re.string.fail;
1348			b = rex->re.string.base;
1349			n = rex->re.string.size;
1350			t = s;
1351			e = env->end;
1352			if (p = rex->map)
1353			{
1354				while (t + n <= e)
1355				{
1356					for (i = -1; t < e; t++)
1357					{
1358						while (i >= 0 && b[i+1] != p[*t])
1359							i = f[i];
1360						if (b[i+1] == p[*t])
1361							i++;
1362						if (i + 1 == n)
1363						{
1364							t++;
1365							if (env->stack)
1366								env->best[0].rm_so = t - s - n;
1367							switch (follow(env, rex, cont, t))
1368							{
1369							case BAD:
1370								return BAD;
1371							case CUT:
1372								return CUT;
1373							case BEST:
1374							case GOOD:
1375								return BEST;
1376							}
1377							t -= n - 1;
1378							break;
1379						}
1380					}
1381				}
1382			}
1383			else
1384			{
1385				while (t + n <= e)
1386				{
1387					for (i = -1; t < e; t++)
1388					{
1389						while (i >= 0 && b[i+1] != *t)
1390							i = f[i];
1391						if (b[i+1] == *t)
1392							i++;
1393						if (i + 1 == n)
1394						{
1395							t++;
1396							if (env->stack)
1397								env->best[0].rm_so = t - s - n;
1398							switch (follow(env, rex, cont, t))
1399							{
1400							case BAD:
1401								return BAD;
1402							case CUT:
1403								return CUT;
1404							case BEST:
1405							case GOOD:
1406								return BEST;
1407							}
1408							t -= n - 1;
1409							break;
1410						}
1411					}
1412				}
1413			}
1414			return NONE;
1415		case REX_NEG:
1416			if (LEADING(env, rex, s))
1417				return NONE;
1418			i = env->end - s;
1419			n = ((i + 7) >> 3) + 1;
1420			catcher.type = REX_NEG_CATCH;
1421			catcher.re.neg_catch.beg = s;
1422			if (!(p = (unsigned char*)stkpush(stkstd, n)))
1423				return BAD;
1424			memset(catcher.re.neg_catch.index = p, 0, n);
1425			catcher.next = rex->next;
1426			if (parse(env, rex->re.group.expr.rex, &catcher, s) == BAD)
1427				r = BAD;
1428			else
1429			{
1430				r = NONE;
1431				for (; i >= 0; i--)
1432					if (!bittst(p, i))
1433					{
1434						switch (follow(env, rex, cont, s + i))
1435						{
1436						case BAD:
1437							r = BAD;
1438							break;
1439						case BEST:
1440							r = BEST;
1441							break;
1442						case CUT:
1443							r = CUT;
1444							break;
1445						case GOOD:
1446							r = GOOD;
1447							/*FALLTHROUGH*/
1448						default:
1449							continue;
1450						}
1451						break;
1452					}
1453			}
1454			stkpop(stkstd);
1455			return r;
1456		case REX_NEG_CATCH:
1457			bitset(rex->re.neg_catch.index, s - rex->re.neg_catch.beg);
1458			return NONE;
1459		case REX_NEST:
1460			if (s >= env->end)
1461				return NONE;
1462			do
1463			{
1464				if ((c = *s++) == rex->re.nest.primary)
1465				{
1466					if (s >= env->end || !(s = nestmatch(s, env->end, rex->re.nest.type, c)))
1467						return NONE;
1468					break;
1469				}
1470				if (rex->re.nest.primary >= 0)
1471					return NONE;
1472			    	if (rex->re.nest.type[c] & (REX_NEST_delimiter|REX_NEST_separator|REX_NEST_terminator))
1473					break;
1474			    	if (!(s = nestmatch(s, env->end, rex->re.nest.type, c)))
1475					return NONE;
1476			} while (s < env->end && !(rex->re.nest.type[*(s-1)] & (REX_NEST_delimiter|REX_NEST_separator|REX_NEST_terminator)));
1477			break;
1478		case REX_NULL:
1479			break;
1480		case REX_ONECHAR:
1481			n = rex->hi;
1482			if (n > env->end - s)
1483				n = env->end - s;
1484			m = rex->lo;
1485			if (m > n)
1486				return NONE;
1487			r = NONE;
1488			c = rex->re.onechar;
1489			if (!(rex->flags & REG_MINIMAL))
1490			{
1491				if (!mbwide())
1492				{
1493					if (p = rex->map)
1494					{
1495						for (i = 0; i < n; i++, s++)
1496							if (p[*s] != c)
1497								break;
1498					}
1499					else
1500					{
1501						for (i = 0; i < n; i++, s++)
1502							if (*s != c)
1503								break;
1504					}
1505					for (; i-- >= m; s--)
1506						switch (follow(env, rex, cont, s))
1507						{
1508						case BAD:
1509							return BAD;
1510						case BEST:
1511							return BEST;
1512						case CUT:
1513							return CUT;
1514						case GOOD:
1515							r = GOOD;
1516							break;
1517						}
1518				}
1519				else
1520				{
1521					if (!(b = (unsigned char*)stkpush(stkstd, n)))
1522					{
1523						env->error = REG_ESPACE;
1524						return BAD;
1525					}
1526					e = env->end;
1527					if (!(rex->flags & REG_ICASE))
1528					{
1529						for (i = 0; s < e && i < n; i++, s = t)
1530						{
1531							t = s;
1532							if (mbchar(t) != c)
1533								break;
1534							b[i] = t - s;
1535						}
1536					}
1537					else
1538					{
1539						for (i = 0; s < e && i < n; i++, s = t)
1540						{
1541							t = s;
1542							if (towupper(mbchar(t)) != c)
1543								break;
1544							b[i] = t - s;
1545						}
1546					}
1547					for (; i-- >= m; s -= b[i])
1548						switch (follow(env, rex, cont, s))
1549						{
1550						case BAD:
1551							stkpop(stkstd);
1552							return BAD;
1553						case BEST:
1554							stkpop(stkstd);
1555							return BEST;
1556						case CUT:
1557							stkpop(stkstd);
1558							return CUT;
1559						case GOOD:
1560							r = GOOD;
1561							break;
1562						}
1563					stkpop(stkstd);
1564				}
1565			}
1566			else
1567			{
1568				if (!mbwide())
1569				{
1570					e = s + m;
1571					if (p = rex->map)
1572					{
1573						for (; s < e; s++)
1574							if (p[*s] != c)
1575								return r;
1576						e += n - m;
1577						for (;;)
1578						{
1579							switch (follow(env, rex, cont, s))
1580							{
1581							case BAD:
1582								return BAD;
1583							case CUT:
1584								return CUT;
1585							case BEST:
1586							case GOOD:
1587								return BEST;
1588							}
1589							if (s >= e || p[*s++] != c)
1590								break;
1591						}
1592					}
1593					else
1594					{
1595						for (; s < e; s++)
1596							if (*s != c)
1597								return r;
1598						e += n - m;
1599						for (;;)
1600						{
1601							switch (follow(env, rex, cont, s))
1602							{
1603							case BAD:
1604								return BAD;
1605							case CUT:
1606								return CUT;
1607							case BEST:
1608							case GOOD:
1609								return BEST;
1610							}
1611							if (s >= e || *s++ != c)
1612								break;
1613						}
1614					}
1615				}
1616				else
1617				{
1618					if (!(rex->flags & REG_ICASE))
1619					{
1620						for (i = 0; i < m && s < e; i++, s = t)
1621						{
1622							t = s;
1623							if (mbchar(t) != c)
1624								return r;
1625						}
1626						while (i++ <= n)
1627						{
1628							switch (follow(env, rex, cont, s))
1629							{
1630							case BAD:
1631								return BAD;
1632							case CUT:
1633								return CUT;
1634							case BEST:
1635							case GOOD:
1636								return BEST;
1637							}
1638							if (s >= e || mbchar(s) != c)
1639								break;
1640						}
1641					}
1642					else
1643					{
1644						for (i = 0; i < m && s < e; i++, s = t)
1645						{
1646							t = s;
1647							if (towupper(mbchar(t)) != c)
1648								return r;
1649						}
1650						while (i++ <= n)
1651						{
1652							switch (follow(env, rex, cont, s))
1653							{
1654							case BAD:
1655								return BAD;
1656							case CUT:
1657								return CUT;
1658							case BEST:
1659							case GOOD:
1660								return BEST;
1661							}
1662							if (s >= e || towupper(mbchar(s)) != c)
1663								break;
1664						}
1665					}
1666				}
1667			}
1668			return r;
1669		case REX_REP:
1670			if (env->stack && pospush(env, rex, s, BEG_REP))
1671				return BAD;
1672			r = parserep(env, rex, cont, s, 0);
1673			if (env->stack)
1674				pospop(env);
1675			return r;
1676		case REX_REP_CATCH:
1677			DEBUG_TEST(0x0020,(sfprintf(sfstdout, "AHA#%04d 0x%04x %s n %d len %d s `%-.*s'\n", __LINE__, debug_flag, rexname(rex), rex->re.rep_catch.n, s - rex->re.rep_catch.beg, env->end - s, s)),(0));
1678			if (env->stack && pospush(env, rex, s, END_ANY))
1679				return BAD;
1680			if (s == rex->re.rep_catch.beg && rex->re.rep_catch.n > rex->re.rep_catch.ref->lo)
1681			{
1682				/*
1683				 * optional empty iteration
1684				 */
1685
1686DEBUG_TEST(0x0002,(sfprintf(sfstdout, "AHA#%04d %p re.group.back=%d re.group.expr.rex=%s\n", __LINE__, rex->re.rep_catch.ref->re.group.expr.rex, rex->re.rep_catch.ref->re.group.expr.rex->re.group.back, rexname(rex->re.rep_catch.ref->re.group.expr.rex))),(0));
1687				if (!env->stack || s != rex->re.rep_catch.ref->re.rep_catch.beg && !rex->re.rep_catch.ref->re.group.expr.rex->re.group.back)
1688					r = NONE;
1689				else if (pospush(env, rex, s, END_ANY))
1690					r = BAD;
1691				else
1692				{
1693					r = follow(env, rex, rex->re.rep_catch.cont, s);
1694					pospop(env);
1695				}
1696			}
1697			else
1698				r = parserep(env, rex->re.rep_catch.ref, rex->re.rep_catch.cont, s, rex->re.rep_catch.n);
1699			if (env->stack)
1700				pospop(env);
1701			return r;
1702		case REX_STRING:
1703DEBUG_TEST(0x0200,(sfprintf(sfstdout,"AHA#%04d 0x%04x parse %s \"%-.*s\" `%-.*s'\n", __LINE__, debug_flag, rexname(rex), rex->re.string.size, rex->re.string.base, env->end - s, s)),(0));
1704			if (rex->re.string.size > (env->end - s))
1705				return NONE;
1706			t = rex->re.string.base;
1707			e = t + rex->re.string.size;
1708			if (!(p = rex->map))
1709			{
1710				while (t < e)
1711					if (*s++ != *t++)
1712						return NONE;
1713			}
1714			else if (!mbwide())
1715			{
1716				while (t < e)
1717					if (p[*s++] != *t++)
1718						return NONE;
1719			}
1720			else
1721			{
1722				while (t < e)
1723				{
1724					c = mbchar(s);
1725					d = mbchar(t);
1726					if (towupper(c) != d)
1727						return NONE;
1728				}
1729			}
1730			break;
1731		case REX_TRIE:
1732			if (((s + rex->re.trie.min) > env->end) || !(x = rex->re.trie.root[rex->map ? rex->map[*s] : *s]))
1733				return NONE;
1734			return parsetrie(env, x, rex, cont, s);
1735		case REX_EXEC:
1736			u = 0;
1737			r = (*env->disc->re_execf)(env->regex, rex->re.exec.data, rex->re.exec.text, rex->re.exec.size, (const char*)s, env->end - s, &u, env->disc);
1738			e = (unsigned char*)u;
1739			if (e >= s && e <= env->end)
1740				s = e;
1741			switch (r)
1742			{
1743			case 0:
1744				break;
1745			case REG_NOMATCH:
1746				return NONE;
1747			default:
1748				env->error = r;
1749				return BAD;
1750			}
1751			break;
1752		case REX_WBEG:
1753			if (!isword(*s) || s > env->beg && isword(*(s - 1)))
1754				return NONE;
1755			break;
1756		case REX_WEND:
1757			if (isword(*s) || s > env->beg && !isword(*(s - 1)))
1758				return NONE;
1759			break;
1760		case REX_WORD:
1761			if (s > env->beg && isword(*(s - 1)) == isword(*s))
1762				return NONE;
1763			break;
1764		case REX_WORD_NOT:
1765			if (s == env->beg || isword(*(s - 1)) != isword(*s))
1766				return NONE;
1767			break;
1768		case REX_BEG_STR:
1769			if (s != env->beg)
1770				return NONE;
1771			break;
1772		case REX_END_STR:
1773			for (t = s; t < env->end && *t == '\n'; t++);
1774			if (t < env->end)
1775				return NONE;
1776			break;
1777		case REX_FIN_STR:
1778			if (s < env->end)
1779				return NONE;
1780			break;
1781		}
1782		if (!(rex = rex->next))
1783		{
1784			if (!(rex = cont))
1785				break;
1786			cont = 0;
1787		}
1788	}
1789	return GOOD;
1790}
1791
1792#if _AST_REGEX_DEBUG
1793
1794static void
1795listnode(Rex_t* e, int level)
1796{
1797	int	i;
1798
1799	if (e)
1800	{
1801		do
1802		{
1803			for (i = 0; i < level; i++)
1804				sfprintf(sfstderr, "  ");
1805			sfprintf(sfstderr, "%s\n", rexname(e));
1806			switch (e->type)
1807			{
1808			case REX_ALT:
1809			case REX_CONJ:
1810				listnode(e->re.group.expr.binary.left, level + 1);
1811				listnode(e->re.group.expr.binary.right, level + 1);
1812				break;
1813			case REX_GROUP:
1814			case REX_GROUP_AHEAD:
1815			case REX_GROUP_AHEAD_NOT:
1816			case REX_GROUP_BEHIND:
1817			case REX_GROUP_BEHIND_NOT:
1818			case REX_GROUP_CUT:
1819			case REX_NEG:
1820			case REX_REP:
1821				listnode(e->re.group.expr.rex, level + 1);
1822				break;
1823			}
1824		} while (e = e->next);
1825	}
1826}
1827
1828static int
1829list(Env_t* env, Rex_t* rex)
1830{
1831	sfprintf(sfstderr, "AHA regex hard=%d stack=%p\n", env->hard, env->stack);
1832	if (rex)
1833		listnode(rex, 1);
1834	return 0;
1835}
1836
1837#endif
1838
1839/*
1840 * returning REG_BADPAT or REG_ESPACE is not explicitly
1841 * countenanced by the standard
1842 */
1843
1844int
1845regnexec(const regex_t* p, const char* s, size_t len, size_t nmatch, regmatch_t* match, regflags_t flags)
1846{
1847	register int	n;
1848	register int	i;
1849	int		j;
1850	int		k;
1851	int		m;
1852	int		advance;
1853	Env_t*		env;
1854	Rex_t*		e;
1855
1856	DEBUG_INIT();
1857	DEBUG_TEST(0x0001,(sfprintf(sfstdout, "AHA#%04d 0x%04x regnexec %d 0x%08x `%-.*s'\n", __LINE__, debug_flag, nmatch, flags, len, s)),(0));
1858	if (!p || !(env = p->env))
1859		return REG_BADPAT;
1860	if (!s)
1861		return fatal(env->disc, REG_BADPAT, NiL);
1862	if (len < env->min)
1863	{
1864		DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d REG_NOMATCH %d %d\n", __LINE__, len, env->min)),(0));
1865		return REG_NOMATCH;
1866	}
1867	env->regex = p;
1868	env->beg = (unsigned char*)s;
1869	env->end = env->beg + len;
1870	stknew(stkstd, &env->stk);
1871	env->flags &= ~REG_EXEC;
1872	env->flags |= (flags & REG_EXEC);
1873	advance = 0;
1874	if (env->stack = env->hard || !(env->flags & REG_NOSUB) && nmatch)
1875	{
1876		n = env->nsub;
1877		if (!(env->match = (regmatch_t*)stkpush(stkstd, 2 * (n + 1) * sizeof(regmatch_t))) ||
1878		    !env->pos && !(env->pos = vecopen(16, sizeof(Pos_t))) ||
1879		    !env->bestpos && !(env->bestpos = vecopen(16, sizeof(Pos_t))))
1880		{
1881			k = REG_ESPACE;
1882			goto done;
1883		}
1884		env->pos->cur = env->bestpos->cur = 0;
1885		env->best = &env->match[n + 1];
1886		env->best[0].rm_so = 0;
1887		env->best[0].rm_eo = -1;
1888		for (i = 0; i <= n; i++)
1889			env->match[i] = state.nomatch;
1890		if (flags & REG_ADVANCE)
1891			advance = 1;
1892	}
1893	DEBUG_TEST(0x1000,(list(env,env->rex)),(0));
1894	k = REG_NOMATCH;
1895	if ((e = env->rex)->type == REX_BM)
1896	{
1897		DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d REX_BM\n", __LINE__)),(0));
1898		if (len < e->re.bm.right)
1899		{
1900			DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d REG_NOMATCH %d %d\n", __LINE__, len, e->re.bm.right)),(0));
1901			goto done;
1902		}
1903		else if (!(flags & REG_LEFT))
1904		{
1905			register unsigned char*	buf = (unsigned char*)s;
1906			register size_t		index = e->re.bm.left + e->re.bm.size;
1907			register size_t		mid = len - e->re.bm.right;
1908			register size_t*	skip = e->re.bm.skip;
1909			register size_t*	fail = e->re.bm.fail;
1910			register Bm_mask_t**	mask = e->re.bm.mask;
1911			Bm_mask_t		m;
1912			size_t			x;
1913
1914			DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d REX_BM len=%d right=%d left=%d size=%d %d %d\n", __LINE__, len, e->re.bm.right, e->re.bm.left, e->re.bm.size, index, mid)),(0));
1915			for (;;)
1916			{
1917				while ((index += skip[buf[index]]) < mid);
1918				if (index < HIT)
1919				{
1920					DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d REG_NOMATCH %d %d\n", __LINE__, index, HIT)),(0));
1921					goto done;
1922				}
1923				index -= HIT;
1924				m = mask[n = e->re.bm.size - 1][buf[index]];
1925				do
1926				{
1927					if (!n--)
1928					{
1929						if (e->re.bm.back < 0)
1930							goto possible;
1931						if (advance)
1932						{
1933							i = index - e->re.bm.back;
1934							s += i;
1935							if (env->stack)
1936								env->best[0].rm_so += i;
1937							goto possible;
1938						}
1939						x = index;
1940						if (index < e->re.bm.back)
1941							index = 0;
1942						else
1943							index -= e->re.bm.back;
1944						while (index <= x)
1945						{
1946							if ((i = parse(env, e->next, &env->done, buf + index)) != NONE)
1947							{
1948								if (env->stack)
1949									env->best[0].rm_so = index;
1950								n = env->nsub;
1951								goto hit;
1952							}
1953							index++;
1954						}
1955						index += e->re.bm.size;
1956						break;
1957					}
1958				} while (m &= mask[n][buf[--index]]);
1959				if ((index += fail[n + 1]) >= len)
1960					goto done;
1961			}
1962		}
1963 possible:
1964		n = env->nsub;
1965		e = e->next;
1966	}
1967	j = env->once || (flags & REG_LEFT);
1968	DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d parse once=%d\n", __LINE__, j)),(0));
1969	while ((i = parse(env, e, &env->done, (unsigned char*)s)) == NONE || advance && !env->best[0].rm_eo && !(advance = 0))
1970	{
1971		if (j)
1972			goto done;
1973		i = MBSIZE(s);
1974		s += i;
1975		if ((unsigned char*)s > env->end - env->min)
1976			goto done;
1977		if (env->stack)
1978			env->best[0].rm_so += i;
1979	}
1980	if ((flags & REG_LEFT) && env->stack && env->best[0].rm_so)
1981		goto done;
1982 hit:
1983	if (k = env->error)
1984		goto done;
1985	if (i == CUT)
1986	{
1987		k = env->error = REG_NOMATCH;
1988		goto done;
1989	}
1990	if (!(env->flags & REG_NOSUB))
1991	{
1992		k = (env->flags & (REG_SHELL|REG_AUGMENTED)) == (REG_SHELL|REG_AUGMENTED);
1993		for (i = j = m = 0; j < nmatch; i++)
1994			if (!i || !k || (i & 1))
1995			{
1996				if (i > n)
1997					match[j] = state.nomatch;
1998				else
1999					match[m = j] = env->best[i];
2000				j++;
2001			}
2002		if (k)
2003		{
2004			while (m > 0 && match[m].rm_so == -1 && match[m].rm_eo == -1)
2005				m--;
2006			((regex_t*)p)->re_nsub = m;
2007		}
2008	}
2009	k = 0;
2010 done:
2011	stkold(stkstd, &env->stk);
2012	env->stk.base = 0;
2013	if (k > REG_NOMATCH)
2014		fatal(p->env->disc, k, NiL);
2015	return k;
2016}
2017
2018void
2019regfree(regex_t* p)
2020{
2021	Env_t*	env;
2022
2023	if (p && (env = p->env))
2024	{
2025#if _REG_subcomp
2026		if (env->sub)
2027		{
2028			regsubfree(p);
2029			p->re_sub = 0;
2030		}
2031#endif
2032		p->env = 0;
2033		if (--env->refs <= 0 && !(env->disc->re_flags & REG_NOFREE))
2034		{
2035			drop(env->disc, env->rex);
2036			if (env->pos)
2037				vecclose(env->pos);
2038			if (env->bestpos)
2039				vecclose(env->bestpos);
2040			if (env->stk.base)
2041				stkold(stkstd, &env->stk);
2042			alloc(env->disc, env, 0);
2043		}
2044	}
2045}
2046