1/***********************************************************************
2*                                                                      *
3*               This software is part of the ast package               *
4*          Copyright (c) 1985-2012 AT&T Intellectual Property          *
5*                      and is licensed under the                       *
6*                 Eclipse Public License, Version 1.0                  *
7*                    by AT&T Intellectual Property                     *
8*                                                                      *
9*                A copy of the License is available at                 *
10*          http://www.eclipse.org/org/documents/epl-v10.html           *
11*         (with md5 checksum b35adb5213ca9657e911e9befb180842)         *
12*                                                                      *
13*              Information and Software Systems Research               *
14*                            AT&T Research                             *
15*                           Florham Park NJ                            *
16*                                                                      *
17*                 Glenn Fowler <gsf@research.att.com>                  *
18*                  David Korn <dgk@research.att.com>                   *
19*                   Phong Vo <kpv@research.att.com>                    *
20*                                                                      *
21***********************************************************************/
22#pragma prototyped
23
24/*
25 * posix regex executor
26 * single sized-record interface
27 */
28
29#include "reglib.h"
30
31#if _AST_REGEX_DEBUG
32
33#define DEBUG_TEST(f,y,n)	((debug&(debug_flag=f))?(y):(n))
34#define DEBUG_CODE(f,y,n)	do if(debug&(f)){y}else{n} while(0)
35#define DEBUG_INIT()		do { char* t; if (!debug) { debug = 0x80000000; if (t = getenv("_AST_regex_exec_debug")) debug |= strtoul(t, NiL, 0); } } while (0)
36
37static unsigned long	debug;
38static unsigned long	debug_flag;
39
40static const char*	rexnames[] =
41{
42	"REX_NULL",
43	"REX_ALT",
44	"REX_ALT_CATCH",
45	"REX_BACK",
46	"REX_BEG",
47	"REX_BEG_STR",
48	"REX_BM",
49	"REX_CAT",
50	"REX_CLASS",
51	"REX_COLL_CLASS",
52	"REX_CONJ",
53	"REX_CONJ_LEFT",
54	"REX_CONJ_RIGHT",
55	"REX_DONE",
56	"REX_DOT",
57	"REX_END",
58	"REX_END_STR",
59	"REX_EXEC",
60	"REX_FIN_STR",
61	"REX_GROUP",
62	"REX_GROUP_CATCH",
63	"REX_GROUP_AHEAD",
64	"REX_GROUP_AHEAD_CATCH",
65	"REX_GROUP_AHEAD_NOT",
66	"REX_GROUP_BEHIND",
67	"REX_GROUP_BEHIND_CATCH",
68	"REX_GROUP_BEHIND_NOT",
69	"REX_GROUP_BEHIND_NOT_CATCH",
70	"REX_GROUP_COND",
71	"REX_GROUP_COND_CATCH",
72	"REX_GROUP_CUT",
73	"REX_GROUP_CUT_CATCH",
74	"REX_KMP",
75	"REX_NEG",
76	"REX_NEG_CATCH",
77	"REX_NEST",
78	"REX_ONECHAR",
79	"REX_REP",
80	"REX_REP_CATCH",
81	"REX_STRING",
82	"REX_TRIE",
83	"REX_WBEG",
84	"REX_WEND",
85	"REX_WORD",
86	"REX_WORD_NOT"
87};
88
89static const char* rexname(Rex_t* rex)
90{
91	if (!rex)
92		return "NIL";
93	if (rex->type >= elementsof(rexnames))
94		return "ERROR";
95	return rexnames[rex->type];
96}
97
98#else
99
100#define DEBUG_INIT()
101#define DEBUG_TEST(f,y,n)	(n)
102#define DEBUG_CODE(f,y,n)	do {n} while(0)
103
104#endif
105
106#define BEG_ALT		1	/* beginning of an alt			*/
107#define BEG_ONE		2	/* beginning of one iteration of a rep	*/
108#define BEG_REP		3	/* beginning of a repetition		*/
109#define BEG_SUB		4	/* beginning of a subexpression		*/
110#define END_ANY		5	/* end of any of above			*/
111
112/*
113 * returns from parse()
114 */
115
116#define NONE		0	/* no parse found			*/
117#define GOOD		1	/* some parse was found			*/
118#define CUT		2	/* no match and no backtrack		*/
119#define BEST		3	/* an unbeatable parse was found	*/
120#define BAD		4	/* error ocurred			*/
121
122/*
123 * REG_SHELL_DOT test
124 */
125
126#define LEADING(e,r,s)	(*(s)==(e)->leading&&((s)==(e)->beg||*((s)-1)==(r)->explicit))
127
128/*
129 * Pos_t is for comparing parses. An entry is made in the
130 * array at the beginning and at the end of each Group_t,
131 * each iteration in a Group_t, and each Binary_t.
132 */
133
134typedef struct
135{
136	unsigned char*	p;		/* where in string		*/
137	size_t		length;		/* length in string		*/
138	short		serial;		/* preorder subpattern number	*/
139	short		be;		/* which end of pair		*/
140} Pos_t;
141
142/* ===== begin library support ===== */
143
144#define vector(t,v,i)	(((i)<(v)->max)?(t*)((v)->vec+(i)*(v)->siz):(t*)vecseek(&(v),i))
145
146static Vector_t*
147vecopen(int inc, int siz)
148{
149	Vector_t*	v;
150	Stk_t*		sp;
151
152	if (inc <= 0)
153		inc = 16;
154	if (!(sp = stkopen(STK_SMALL|STK_NULL)))
155		return 0;
156	if (!(v = (Vector_t*)stkseek(sp, sizeof(Vector_t) + inc * siz)))
157	{
158		stkclose(sp);
159		return 0;
160	}
161	v->stk = sp;
162	v->vec = (char*)v + sizeof(Vector_t);
163	v->max = v->inc = inc;
164	v->siz = siz;
165	v->cur = 0;
166	return v;
167}
168
169static void*
170vecseek(Vector_t** p, int index)
171{
172	Vector_t*	v = *p;
173
174	if (index >= v->max)
175	{
176		while ((v->max += v->inc) <= index);
177		if (!(v = (Vector_t*)stkseek(v->stk, sizeof(Vector_t) + v->max * v->siz)))
178			return 0;
179		*p = v;
180		v->vec = (char*)v + sizeof(Vector_t);
181	}
182	return v->vec + index * v->siz;
183}
184
185static void
186vecclose(Vector_t* v)
187{
188	if (v)
189		stkclose(v->stk);
190}
191
192typedef struct
193{
194	Stk_pos_t	pos;
195	char		data[1];
196} Stk_frame_t;
197
198#define stknew(s,p)	((p)->offset=stktell(s),(p)->base=stkfreeze(s,0))
199#define stkold(s,p)	stkset(s,(p)->base,(p)->offset)
200
201#define stkframe(s)	(*((Stk_frame_t**)stktop(s)-1))
202#define stkdata(s,t)	((t*)stkframe(s)->data)
203#define stkpop(s)	stkold(s,&(stkframe(s)->pos))
204
205static void*
206stkpush(Stk_t* sp, size_t size)
207{
208	Stk_frame_t*	f;
209	Stk_pos_t	p;
210
211	stknew(sp, &p);
212	size = sizeof(Stk_frame_t) + sizeof(size_t) + size - 1;
213	if (!(f = (Stk_frame_t*)stkalloc(sp, sizeof(Stk_frame_t) + sizeof(Stk_frame_t*) + size - 1)))
214		return 0;
215	f->pos = p;
216	stkframe(sp) = f;
217	return f->data;
218}
219
220/* ===== end library support ===== */
221
222/*
223 * Match_frame_t is for saving and restoring match records
224 * around alternate attempts, so that fossils will not be
225 * left in the match array.  These are the only entries in
226 * the match array that are not otherwise guaranteed to
227 * have current data in them when they get used.
228 */
229
230typedef struct
231{
232	size_t			size;
233	regmatch_t*		match;
234	regmatch_t		save[1];
235} Match_frame_t;
236
237#define matchpush(e,x)	((x)->re.group.number?_matchpush(e,x):0)
238#define matchcopy(e,x)	do if ((x)->re.group.number) { Match_frame_t* fp = (void*)stkframe(stkstd)->data; memcpy(fp->match, fp->save, fp->size); } while (0)
239#define matchpop(e,x)	do if ((x)->re.group.number) { Match_frame_t* fp = (void*)stkframe(stkstd)->data; memcpy(fp->match, fp->save, fp->size); stkpop(stkstd); } while (0)
240
241#define pospop(e)	(--(e)->pos->cur)
242
243/*
244 * allocate a frame and push a match onto the stack
245 */
246
247static int
248_matchpush(Env_t* env, Rex_t* rex)
249{
250	Match_frame_t*	f;
251	regmatch_t*	m;
252	regmatch_t*	e;
253	regmatch_t*	s;
254	int		num;
255
256	if (rex->re.group.number <= 0 || (num = rex->re.group.last - rex->re.group.number + 1) <= 0)
257		num = 0;
258	if (!(f = (Match_frame_t*)stkpush(stkstd, sizeof(Match_frame_t) + (num - 1) * sizeof(regmatch_t))))
259	{
260		env->error = REG_ESPACE;
261		return 1;
262	}
263	f->size = num * sizeof(regmatch_t);
264	f->match = m = env->match + rex->re.group.number;
265	e = m + num;
266	s = f->save;
267	while (m < e)
268	{
269		*s++ = *m;
270		*m++ = state.nomatch;
271	}
272	return 0;
273}
274
275/*
276 * allocate a frame and push a pos onto the stack
277 */
278
279static int
280pospush(Env_t* env, Rex_t* rex, unsigned char* p, int be)
281{
282	Pos_t*	pos;
283
284	if (!(pos = vector(Pos_t, env->pos, env->pos->cur)))
285	{
286		env->error = REG_ESPACE;
287		return 1;
288	}
289	pos->serial = rex->serial;
290	pos->p = p;
291	pos->be = be;
292	env->pos->cur++;
293	return 0;
294}
295
296/*
297 * two matches are known to have the same length
298 * os is start of old pos array, ns is start of new,
299 * oend and nend are end+1 pointers to ends of arrays.
300 * oe and ne are ends (not end+1) of subarrays.
301 * returns 1 if new is better, -1 if old, else 0.
302 */
303
304static int
305better(Env_t* env, Pos_t* os, Pos_t* ns, Pos_t* oend, Pos_t* nend, int level)
306{
307	Pos_t*	oe;
308	Pos_t*	ne;
309	int	k;
310	int	n;
311
312	if (env->error)
313		return -1;
314	for (;;)
315	{
316		DEBUG_CODE(0x0080,{sfprintf(sfstdout, "   %-*.*sold ", (level + 3) * 4, (level + 3) * 4, "");for (oe = os; oe < oend; oe++)sfprintf(sfstdout, "<%d,%d,%d>", oe->p - env->beg, oe->serial, oe->be);sfprintf(sfstdout, "\n   %-*.*snew ", (level + 3) * 4, (level + 3) * 4, "");for (oe = ns; oe < nend; oe++)sfprintf(sfstdout, "<%d,%d,%d>", oe->p - env->beg, oe->serial, oe->be);sfprintf(sfstdout, "\n");},{0;});
317		if (ns >= nend)
318			return DEBUG_TEST(0x8000,(os < oend),(0));
319		if (os >= oend)
320			return DEBUG_TEST(0x8000,(-1),(1));
321		n = os->serial;
322		if (ns->serial > n)
323			return -1;
324		if (n > ns->serial)
325		{
326			env->error = REG_PANIC;
327			return -1;
328		}
329		if (ns->p > os->p)
330			return 1;
331		if (os->p > ns->p)
332			return -1;
333		oe = os;
334		k = 0;
335		for (;;)
336			if ((++oe)->serial == n)
337			{
338				if (oe->be != END_ANY)
339					k++;
340				else if (k-- <= 0)
341					break;
342			}
343		ne = ns;
344		k = 0;
345		for (;;)
346			if ((++ne)->serial == n)
347			{
348				if (ne->be != END_ANY)
349					k++;
350				else if (k-- <= 0)
351					break;
352			}
353		if (ne->p > oe->p)
354			return 1;
355		if (oe->p > ne->p)
356			return -1;
357		if (k = better(env, os + 1, ns + 1, oe, ne, level + 1))
358			return k;
359		os = oe + 1;
360		ns = ne + 1;
361	}
362}
363
364#if _AST_REGEX_DEBUG
365
366static void
367showmatch(regmatch_t* p)
368{
369	sfputc(sfstdout, '(');
370	if (p->rm_so < 0)
371		sfputc(sfstdout, '?');
372	else
373		sfprintf(sfstdout, "%z", p->rm_so);
374	sfputc(sfstdout, ',');
375	if (p->rm_eo < 0)
376		sfputc(sfstdout, '?');
377	else
378		sfprintf(sfstdout, "%z", p->rm_eo);
379	sfputc(sfstdout, ')');
380}
381
382static int
383_better(Env_t* env, Pos_t* os, Pos_t* ns, Pos_t* oend, Pos_t* nend, int level)
384{
385	int	i;
386
387	DEBUG_CODE(0x0040,{sfprintf(sfstdout, "AHA better old ");for (i = 0; i <= env->nsub; i++)showmatch(&env->best[i]);sfprintf(sfstdout, "\n           new ");for (i = 0; i <= env->nsub; i++)showmatch(&env->match[i]);sfprintf(sfstdout, "\n");},{0;});
388	i = better(env, os, ns, oend, nend, 0);
389	DEBUG_TEST(0x0040,(sfprintf(sfstdout, "           %s\n", i <= 0 ? "OLD" : "NEW")),(0));
390	return i;
391}
392
393#define better	_better
394
395#endif
396
397#define follow(e,r,c,s)	((r)->next?parse(e,(r)->next,c,s):(c)?parse(e,c,0,s):BEST)
398
399static int		parse(Env_t*, Rex_t*, Rex_t*, unsigned char*);
400
401static int
402parserep(Env_t* env, Rex_t* rex, Rex_t* cont, unsigned char* s, int n)
403{
404	int	i;
405	int	r = NONE;
406	Rex_t	catcher;
407
408	DEBUG_TEST(0x0010,(sfprintf(sfstdout, "AHA#%04d 0x%04x parserep %s %d %d %d %d `%-.*s'\n", __LINE__, debug_flag, rexname(rex->re.group.expr.rex), rex->re.group.number, rex->lo, n, rex->hi, env->end - s, s)),(0));
409	if ((rex->flags & REG_MINIMAL) && n >= rex->lo && n < rex->hi)
410	{
411		if (env->stack && pospush(env, rex, s, END_ANY))
412			return BAD;
413		i = follow(env, rex, cont, s);
414		if (env->stack)
415			pospop(env);
416		switch (i)
417		{
418		case BAD:
419			return BAD;
420		case CUT:
421			return CUT;
422		case BEST:
423		case GOOD:
424			return BEST;
425		}
426	}
427	if (n < rex->hi)
428	{
429		catcher.type = REX_REP_CATCH;
430		catcher.serial = rex->serial;
431		catcher.re.rep_catch.ref = rex;
432		catcher.re.rep_catch.cont = cont;
433		catcher.re.rep_catch.beg = s;
434		catcher.re.rep_catch.n = n + 1;
435		catcher.next = rex->next;
436		if (n == 0)
437			rex->re.rep_catch.beg = s;
438		if (env->stack)
439		{
440			if (matchpush(env, rex))
441				return BAD;
442			if (pospush(env, rex, s, BEG_ONE))
443				return BAD;
444DEBUG_TEST(0x0004,(sfprintf(sfstdout,"AHA#%04d 0x%04x PUSH %d   (%z,%z)(%z,%z)(%z,%z) (%z,%z)(%z,%z)(%z,%z)\n", __LINE__, debug_flag, rex->re.group.number, env->best[0].rm_so, env->best[0].rm_eo, env->best[1].rm_so, env->best[1].rm_eo, env->best[2].rm_so, env->best[2].rm_eo, env->match[0].rm_so, env->match[0].rm_eo, env->match[1].rm_so, env->match[1].rm_eo, env->match[2].rm_so, env->match[2].rm_eo)),(0));
445		}
446		r = parse(env, rex->re.group.expr.rex, &catcher, s);
447		DEBUG_TEST(0x0010,(sfprintf(sfstdout, "AHA#%04d 0x%04x parserep parse %d %d `%-.*s'\n", __LINE__, debug_flag, rex->re.group.number, r, env->end - s, s)),(0));
448		if (env->stack)
449		{
450			pospop(env);
451			matchpop(env, rex);
452DEBUG_TEST(0x0004,(sfprintf(sfstdout,"AHA#%04d 0x%04x POP  %d %d (%z,%z)(%z,%z)(%z,%z) (%z,%z)(%z,%z)(%z,%z)\n", __LINE__, debug_flag, rex->re.group.number, r, env->best[0].rm_so, env->best[0].rm_eo, env->best[1].rm_so, env->best[1].rm_eo, env->best[2].rm_so, env->best[2].rm_eo, env->match[0].rm_so, env->match[0].rm_eo, env->match[1].rm_so, env->match[1].rm_eo, env->match[2].rm_so, env->match[2].rm_eo)),(0));
453		}
454		switch (r)
455		{
456		case BAD:
457			return BAD;
458		case BEST:
459			return BEST;
460		case CUT:
461			r = NONE;
462			break;
463		case GOOD:
464			if (rex->flags & REG_MINIMAL)
465				return BEST;
466			r = GOOD;
467			break;
468		}
469	}
470	if (n < rex->lo)
471		return r;
472	if (!(rex->flags & REG_MINIMAL) || n >= rex->hi)
473	{
474		if (env->stack && pospush(env, rex, s, END_ANY))
475			return BAD;
476		i = follow(env, rex, cont, s);
477		if (env->stack)
478			pospop(env);
479		switch (i)
480		{
481		case BAD:
482			r = BAD;
483			break;
484		case CUT:
485			r = CUT;
486			break;
487		case BEST:
488			r = BEST;
489			break;
490		case GOOD:
491			r = (rex->flags & REG_MINIMAL) ? BEST : GOOD;
492			break;
493		}
494	}
495	return r;
496}
497
498static int
499parsetrie(Env_t* env, Trie_node_t* x, Rex_t* rex, Rex_t* cont, unsigned char* s)
500{
501	unsigned char*	p;
502	int		r;
503
504	if (p = rex->map)
505	{
506		for (;;)
507		{
508			if (s >= env->end)
509				return NONE;
510			while (x->c != p[*s])
511				if (!(x = x->sib))
512					return NONE;
513			if (x->end)
514				break;
515			x = x->son;
516			s++;
517		}
518	}
519	else
520	{
521		for (;;)
522		{
523			if (s >= env->end)
524				return NONE;
525			while (x->c != *s)
526				if (!(x = x->sib))
527					return NONE;
528			if (x->end)
529				break;
530			x = x->son;
531			s++;
532		}
533	}
534	s++;
535	if (rex->flags & REG_MINIMAL)
536		switch (follow(env, rex, cont, s))
537		{
538		case BAD:
539			return BAD;
540		case CUT:
541			return CUT;
542		case BEST:
543		case GOOD:
544			return BEST;
545		}
546	if (x->son)
547		switch (parsetrie(env, x->son, rex, cont, s))
548		{
549		case BAD:
550			return BAD;
551		case CUT:
552			return CUT;
553		case BEST:
554			return BEST;
555		case GOOD:
556			if (rex->flags & REG_MINIMAL)
557				return BEST;
558			r = GOOD;
559			break;
560		default:
561			r = NONE;
562			break;
563		}
564	else
565		r = NONE;
566	if (!(rex->flags & REG_MINIMAL))
567		switch (follow(env, rex, cont, s))
568		{
569		case BAD:
570			return BAD;
571		case CUT:
572			return CUT;
573		case BEST:
574			return BEST;
575		case GOOD:
576			return GOOD;
577	}
578	return r;
579}
580
581static int
582collelt(register Celt_t* ce, char* key, int c, int x)
583{
584	Ckey_t	elt;
585
586	mbxfrm(elt, key, COLL_KEY_MAX);
587	for (;; ce++)
588	{
589		switch (ce->typ)
590		{
591		case COLL_call:
592			if (!x && (*ce->fun)(c))
593				return 1;
594			continue;
595		case COLL_char:
596			if (!strcmp((char*)ce->beg, (char*)elt))
597				return 1;
598			continue;
599		case COLL_range:
600			if (strcmp((char*)ce->beg, (char*)elt) <= ce->min && strcmp((char*)elt, (char*)ce->end) <= ce->max)
601				return 1;
602			continue;
603		case COLL_range_lc:
604			if (strcmp((char*)ce->beg, (char*)elt) <= ce->min && strcmp((char*)elt, (char*)ce->end) <= ce->max && (iswlower(c) || !iswupper(c)))
605				return 1;
606			continue;
607		case COLL_range_uc:
608			if (strcmp((char*)ce->beg, (char*)elt) <= ce->min && strcmp((char*)elt, (char*)ce->end) <= ce->max && (iswupper(c) || !iswlower(c)))
609				return 1;
610			continue;
611		}
612		break;
613	}
614	return 0;
615}
616
617static int
618collic(register Celt_t* ce, char* key, register char* nxt, int c, int x)
619{
620	if (!x)
621	{
622		if (collelt(ce, key, c, x))
623			return 1;
624		if (iswlower(c))
625			c = towupper(c);
626		else if (iswupper(c))
627			c = towlower(c);
628		else
629			return 0;
630		x = mbconv(key, c);
631		key[x] = 0;
632		return collelt(ce, key, c, 0);
633	}
634	while (*nxt)
635	{
636		if (collic(ce, key, nxt + 1, c, x))
637			return 1;
638		if (islower(*nxt))
639			*nxt = toupper(*nxt);
640		else if (isupper(*nxt))
641			*nxt = tolower(*nxt);
642		else
643			return 0;
644		nxt++;
645	}
646	return collelt(ce, key, c, x);
647}
648
649static int
650collmatch(Rex_t* rex, unsigned char* s, unsigned char* e, unsigned char** p)
651{
652	unsigned char*		t;
653	wchar_t			c;
654	int			w;
655	int			r;
656	int			x;
657	int			ic;
658	Ckey_t			key;
659	Ckey_t			elt;
660
661	ic = (rex->flags & REG_ICASE);
662	if ((w = MBSIZE(s)) > 1)
663	{
664		memcpy((char*)key, (char*)s, w);
665		key[w] = 0;
666		t = s;
667		c = mbchar(t);
668#if !_lib_wctype
669		c &= 0xff;
670#endif
671		x = 0;
672	}
673	else
674	{
675		c = s[0];
676		if (ic && isupper(c))
677			c = tolower(c);
678		key[0] = c;
679		key[1] = 0;
680		if (isalpha(c))
681		{
682			x = e - s;
683			if (x > COLL_KEY_MAX)
684				x = COLL_KEY_MAX;
685			while (w < x)
686			{
687				c = s[w];
688				if (!isalpha(c))
689					break;
690				r = mbxfrm(elt, key, COLL_KEY_MAX);
691				if (ic && isupper(c))
692					c = tolower(c);
693				key[w] = c;
694				key[w + 1] = 0;
695				if (mbxfrm(elt, key, COLL_KEY_MAX) != r)
696					break;
697				w++;
698			}
699		}
700		key[w] = 0;
701		c = key[0];
702		x = w - 1;
703	}
704	r = 1;
705	for (;;)
706	{
707		if (ic ? collic(rex->re.collate.elements, (char*)key, (char*)key, c, x) : collelt(rex->re.collate.elements, (char*)key, c, x))
708			break;
709		if (!x)
710		{
711			r = 0;
712			break;
713		}
714		w = x--;
715		key[w] = 0;
716	}
717	*p = s + w;
718	return rex->re.collate.invert ? !r : r;
719}
720
721static unsigned char*
722nestmatch(register unsigned char* s, register unsigned char* e, const unsigned short* type, register int co)
723{
724	register int	c;
725	register int	cc;
726	unsigned int	n;
727	int		oc;
728
729	if (type[co] & (REX_NEST_literal|REX_NEST_quote))
730	{
731		n = (type[co] & REX_NEST_literal) ? REX_NEST_terminator : (REX_NEST_escape|REX_NEST_terminator);
732		while (s < e)
733		{
734			c = *s++;
735			if (c == co)
736				return s;
737			else if (type[c] & n)
738			{
739				if (s >= e || (type[c] & REX_NEST_terminator))
740					break;
741				s++;
742			}
743		}
744	}
745	else
746	{
747		cc = type[co] >> REX_NEST_SHIFT;
748		oc = type[co] & (REX_NEST_open|REX_NEST_close);
749		n = 1;
750		while (s < e)
751		{
752			c = *s++;
753			switch (type[c] & (REX_NEST_escape|REX_NEST_open|REX_NEST_close|REX_NEST_delimiter|REX_NEST_separator|REX_NEST_terminator))
754			{
755			case REX_NEST_delimiter:
756			case REX_NEST_terminator:
757				return oc ? 0 : s;
758			case REX_NEST_separator:
759				if (!oc)
760					return s;
761				break;
762			case REX_NEST_escape:
763				if (s >= e)
764					return 0;
765				s++;
766				break;
767			case REX_NEST_open|REX_NEST_close:
768				if (c == cc)
769				{
770					if (!--n)
771						return s;
772				}
773				/*FALLTHROUGH*/
774			case REX_NEST_open:
775				if (c == co)
776				{
777					if (!++n)
778						return 0;
779				}
780				else if (!(s = nestmatch(s, e, type, c)))
781					return 0;
782				break;
783			case REX_NEST_close:
784				if (c != cc)
785					return 0;
786				if (!--n)
787					return s;
788				break;
789			}
790		}
791		return (oc || !(type[UCHAR_MAX+1] & REX_NEST_terminator)) ? 0 : s;
792	}
793	return 0;
794}
795
796static int
797parse(Env_t* env, Rex_t* rex, Rex_t* cont, unsigned char* s)
798{
799	int		c;
800	int		d;
801	int		m;
802	int		r;
803	ssize_t		i;
804	ssize_t		n;
805	int*		f;
806	unsigned char*	p;
807	unsigned char*	t;
808	unsigned char*	b;
809	unsigned char*	e;
810	char*		u;
811	regmatch_t*	o;
812	Trie_node_t*	x;
813	Rex_t*		q;
814	Rex_t		catcher;
815	Rex_t		next;
816
817	for (;;)
818	{
819DEBUG_TEST(0x0008,(sfprintf(sfstdout, "AHA#%04d 0x%04x parse %s `%-.*s'\n", __LINE__, debug_flag, rexname(rex), env->end - s, s)),(0));
820		switch (rex->type)
821		{
822		case REX_ALT:
823			if (env->stack)
824			{
825				if (matchpush(env, rex))
826					return BAD;
827				if (pospush(env, rex, s, BEG_ALT))
828					return BAD;
829				catcher.type = REX_ALT_CATCH;
830				catcher.serial = rex->serial;
831				catcher.re.alt_catch.cont = cont;
832				catcher.next = rex->next;
833				r = parse(env, rex->re.group.expr.binary.left, &catcher, s);
834				if (r < BEST || (rex->flags & REG_MINIMAL))
835				{
836					matchcopy(env, rex);
837					((Pos_t*)env->pos->vec + env->pos->cur - 1)->serial = catcher.serial = rex->re.group.expr.binary.serial;
838					n = parse(env, rex->re.group.expr.binary.right, &catcher, s);
839					if (n != NONE)
840						r = n;
841				}
842				pospop(env);
843				matchpop(env, rex);
844			}
845			else
846			{
847				if ((r = parse(env, rex->re.group.expr.binary.left, cont, s)) == NONE)
848					r = parse(env, rex->re.group.expr.binary.right, cont, s);
849				if (r == GOOD)
850					r = BEST;
851			}
852			return r;
853		case REX_ALT_CATCH:
854			if (pospush(env, rex, s, END_ANY))
855				return BAD;
856			r = follow(env, rex, rex->re.alt_catch.cont, s);
857			pospop(env);
858			return r;
859		case REX_BACK:
860			o = &env->match[rex->lo];
861			if (o->rm_so < 0)
862				return NONE;
863			i = o->rm_eo - o->rm_so;
864			e = s + i;
865			if (e > env->end)
866				return NONE;
867			t = env->beg + o->rm_so;
868			if (!(p = rex->map))
869			{
870				while (s < e)
871					if (*s++ != *t++)
872						return NONE;
873			}
874			else if (!mbwide())
875			{
876				while (s < e)
877					if (p[*s++] != p[*t++])
878						return NONE;
879			}
880			else
881			{
882				while (s < e)
883				{
884					c = mbchar(s);
885					d = mbchar(t);
886					if (towupper(c) != towupper(d))
887						return NONE;
888				}
889			}
890			break;
891		case REX_BEG:
892			if ((!(rex->flags & REG_NEWLINE) || s <= env->beg || *(s - 1) != '\n') && ((env->flags & REG_NOTBOL) || s != env->beg))
893				return NONE;
894			break;
895		case REX_CLASS:
896			if (LEADING(env, rex, s))
897				return NONE;
898			n = rex->hi;
899			if (n > env->end - s)
900				n = env->end - s;
901			m = rex->lo;
902			if (m > n)
903				return NONE;
904			r = NONE;
905			if (!(rex->flags & REG_MINIMAL))
906			{
907				for (i = 0; i < n; i++)
908					if (!settst(rex->re.charclass, s[i]))
909					{
910						n = i;
911						break;
912					}
913				for (s += n; n-- >= m; s--)
914					switch (follow(env, rex, cont, s))
915					{
916					case BAD:
917						return BAD;
918					case CUT:
919						return CUT;
920					case BEST:
921						return BEST;
922					case GOOD:
923						r = GOOD;
924						break;
925					}
926			}
927			else
928			{
929				for (e = s + m; s < e; s++)
930					if (!settst(rex->re.charclass, *s))
931						return r;
932				e += n - m;
933				for (;;)
934				{
935					switch (follow(env, rex, cont, s))
936					{
937					case BAD:
938						return BAD;
939					case CUT:
940						return CUT;
941					case BEST:
942					case GOOD:
943						return BEST;
944					}
945					if (s >= e || !settst(rex->re.charclass, *s))
946						break;
947					s++;
948				}
949			}
950			return r;
951		case REX_COLL_CLASS:
952			if (LEADING(env, rex, s))
953				return NONE;
954			n = rex->hi;
955			if (n > env->end - s)
956				n = env->end - s;
957			m = rex->lo;
958			if (m > n)
959				return NONE;
960			r = NONE;
961			e = env->end;
962			if (!(rex->flags & REG_MINIMAL))
963			{
964				if (!(b = (unsigned char*)stkpush(stkstd, n)))
965				{
966					env->error = REG_ESPACE;
967					return BAD;
968				}
969				for (i = 0; s < e && i < n && collmatch(rex, s, e, &t); i++)
970				{
971					b[i] = t - s;
972					s = t;
973				}
974				for (; i-- >= rex->lo; s -= b[i])
975					switch (follow(env, rex, cont, s))
976					{
977					case BAD:
978						stkpop(stkstd);
979						return BAD;
980					case CUT:
981						stkpop(stkstd);
982						return CUT;
983					case BEST:
984						stkpop(stkstd);
985						return BEST;
986					case GOOD:
987						r = GOOD;
988						break;
989					}
990				stkpop(stkstd);
991			}
992			else
993			{
994				for (i = 0; i < m && s < e; i++, s = t)
995					if (!collmatch(rex, s, e, &t))
996						return r;
997				while (i++ <= n)
998				{
999					switch (follow(env, rex, cont, s))
1000					{
1001					case BAD:
1002						return BAD;
1003					case CUT:
1004						return CUT;
1005					case BEST:
1006					case GOOD:
1007						return BEST;
1008					}
1009					if (s >= e || !collmatch(rex, s, e, &s))
1010						break;
1011				}
1012			}
1013			return r;
1014		case REX_CONJ:
1015			next.type = REX_CONJ_RIGHT;
1016			next.re.conj_right.cont = cont;
1017			next.next = rex->next;
1018			catcher.type = REX_CONJ_LEFT;
1019			catcher.re.conj_left.right = rex->re.group.expr.binary.right;
1020			catcher.re.conj_left.cont = &next;
1021			catcher.re.conj_left.beg = s;
1022			catcher.next = 0;
1023			return parse(env, rex->re.group.expr.binary.left, &catcher, s);
1024		case REX_CONJ_LEFT:
1025			rex->re.conj_left.cont->re.conj_right.end = s;
1026			cont = rex->re.conj_left.cont;
1027			s = rex->re.conj_left.beg;
1028			rex = rex->re.conj_left.right;
1029			continue;
1030		case REX_CONJ_RIGHT:
1031			if (rex->re.conj_right.end != s)
1032				return NONE;
1033			cont = rex->re.conj_right.cont;
1034			break;
1035		case REX_DONE:
1036			if (!env->stack)
1037				return BEST;
1038			n = s - env->beg;
1039			r = env->nsub;
1040			DEBUG_TEST(0x0100,(sfprintf(sfstdout,"AHA#%04d 0x%04x %s (%z,%z)(%z,%z)(%z,%z)(%z,%z) (%z,%z)(%z,%z)\n", __LINE__, debug_flag, rexname(rex), env->best[0].rm_so, env->best[0].rm_eo, env->best[1].rm_so, env->best[1].rm_eo, env->best[2].rm_so, env->best[2].rm_eo, env->best[3].rm_so, env->best[3].rm_eo, env->match[0].rm_so, env->match[0].rm_eo, env->match[1].rm_so, env->match[1].rm_eo)),(0));
1041			if ((i = env->best[0].rm_eo) >= 0)
1042			{
1043				if (rex->flags & REG_MINIMAL)
1044				{
1045					if (n > i)
1046						return GOOD;
1047				}
1048				else
1049				{
1050					if (n < i)
1051						return GOOD;
1052				}
1053				if (n == i && better(env,
1054						     (Pos_t*)env->bestpos->vec,
1055				   		     (Pos_t*)env->pos->vec,
1056				   		     (Pos_t*)env->bestpos->vec+env->bestpos->cur,
1057				   		     (Pos_t*)env->pos->vec+env->pos->cur,
1058						     0) <= 0)
1059					return GOOD;
1060			}
1061			env->best[0].rm_eo = n;
1062			memcpy(&env->best[1], &env->match[1], r * sizeof(regmatch_t));
1063			n = env->pos->cur;
1064			if (!vector(Pos_t, env->bestpos, n))
1065			{
1066				env->error = REG_ESPACE;
1067				return BAD;
1068			}
1069			env->bestpos->cur = n;
1070			memcpy(env->bestpos->vec, env->pos->vec, n * sizeof(Pos_t));
1071			DEBUG_TEST(0x0100,(sfprintf(sfstdout,"AHA#%04d 0x%04x %s (%z,%z)(%z,%z)(%z,%z)(%z,%z) (%z,%z)(%z,%z)\n", __LINE__, debug_flag, rexname(rex), env->best[0].rm_so, env->best[0].rm_eo, env->best[1].rm_so, env->best[1].rm_eo, env->best[2].rm_so, env->best[2].rm_eo, env->best[3].rm_so, env->best[3].rm_eo, env->match[0].rm_so, env->match[0].rm_eo, env->match[1].rm_so, env->match[1].rm_eo)),(0));
1072			return GOOD;
1073		case REX_DOT:
1074			if (LEADING(env, rex, s))
1075				return NONE;
1076			n = rex->hi;
1077			if (n > env->end - s)
1078				n = env->end - s;
1079			m = rex->lo;
1080			if (m > n)
1081				return NONE;
1082			if ((c = rex->explicit) >= 0 && !mbwide())
1083				for (i = 0; i < n; i++)
1084					if (s[i] == c)
1085					{
1086						n = i;
1087						break;
1088					}
1089			r = NONE;
1090			if (!(rex->flags & REG_MINIMAL))
1091			{
1092				if (!mbwide())
1093				{
1094					for (s += n; n-- >= m; s--)
1095						switch (follow(env, rex, cont, s))
1096						{
1097						case BAD:
1098							return BAD;
1099						case CUT:
1100							return CUT;
1101						case BEST:
1102							return BEST;
1103						case GOOD:
1104							r = GOOD;
1105							break;
1106						}
1107				}
1108				else
1109				{
1110					if (!(b = (unsigned char*)stkpush(stkstd, n)))
1111					{
1112						env->error = REG_ESPACE;
1113						return BAD;
1114					}
1115					e = env->end;
1116					for (i = 0; s < e && i < n && *s != c; i++)
1117						s += b[i] = MBSIZE(s);
1118					for (; i-- >= m; s -= b[i])
1119						switch (follow(env, rex, cont, s))
1120						{
1121						case BAD:
1122							stkpop(stkstd);
1123							return BAD;
1124						case CUT:
1125							stkpop(stkstd);
1126							return CUT;
1127						case BEST:
1128							stkpop(stkstd);
1129							return BEST;
1130						case GOOD:
1131							r = GOOD;
1132							break;
1133						}
1134					stkpop(stkstd);
1135				}
1136			}
1137			else
1138			{
1139				if (!mbwide())
1140				{
1141					e = s + n;
1142					for (s += m; s <= e; s++)
1143						switch (follow(env, rex, cont, s))
1144						{
1145						case BAD:
1146							return BAD;
1147						case CUT:
1148							return CUT;
1149						case BEST:
1150						case GOOD:
1151							return BEST;
1152						}
1153				}
1154				else
1155				{
1156					e = env->end;
1157					for (i = 0; s < e && i < m && *s != c; i++)
1158						s += MBSIZE(s);
1159					if (i >= m)
1160						for (; s <= e && i <= n; s += MBSIZE(s), i++)
1161							switch (follow(env, rex, cont, s))
1162							{
1163							case BAD:
1164								return BAD;
1165							case CUT:
1166								return CUT;
1167							case BEST:
1168							case GOOD:
1169								return BEST;
1170							}
1171				}
1172			}
1173			return r;
1174		case REX_END:
1175			if ((!(rex->flags & REG_NEWLINE) || *s != '\n') && ((env->flags & REG_NOTEOL) || s < env->end))
1176				return NONE;
1177			break;
1178		case REX_GROUP:
1179DEBUG_TEST(0x0200,(sfprintf(sfstdout,"AHA#%04d 0x%04x parse %s `%-.*s'\n", __LINE__, debug_flag, rexname(rex), env->end - s, s)),(0));
1180			if (env->stack)
1181			{
1182				if (rex->re.group.number)
1183					env->match[rex->re.group.number].rm_so = s - env->beg;
1184				if (pospush(env, rex, s, BEG_SUB))
1185					return BAD;
1186				catcher.re.group_catch.eo = rex->re.group.number ? &env->match[rex->re.group.number].rm_eo : (regoff_t*)0;
1187			}
1188			catcher.type = REX_GROUP_CATCH;
1189			catcher.serial = rex->serial;
1190			catcher.re.group_catch.cont = cont;
1191			catcher.next = rex->next;
1192			r = parse(env, rex->re.group.expr.rex, &catcher, s);
1193			if (env->stack)
1194			{
1195				pospop(env);
1196				if (rex->re.group.number)
1197					env->match[rex->re.group.number].rm_so = -1;
1198			}
1199			return r;
1200		case REX_GROUP_CATCH:
1201DEBUG_TEST(0x0200,(sfprintf(sfstdout,"AHA#%04d 0x%04x parse %s=>%s `%-.*s'\n", __LINE__, debug_flag, rexname(rex), rexname(rex->re.group_catch.cont), env->end - s, s)),(0));
1202			if (env->stack)
1203			{
1204				if (rex->re.group_catch.eo)
1205					*rex->re.group_catch.eo = s - env->beg;
1206				if (pospush(env, rex, s, END_ANY))
1207					return BAD;
1208			}
1209			r = follow(env, rex, rex->re.group_catch.cont, s);
1210			if (env->stack)
1211			{
1212				pospop(env);
1213				if (rex->re.group_catch.eo)
1214					*rex->re.group_catch.eo = -1;
1215			}
1216			return r;
1217		case REX_GROUP_AHEAD:
1218			catcher.type = REX_GROUP_AHEAD_CATCH;
1219			catcher.flags = rex->flags;
1220			catcher.serial = rex->serial;
1221			catcher.re.rep_catch.beg = s;
1222			catcher.re.rep_catch.cont = cont;
1223			catcher.next = rex->next;
1224			return parse(env, rex->re.group.expr.rex, &catcher, s);
1225		case REX_GROUP_AHEAD_CATCH:
1226			return follow(env, rex, rex->re.rep_catch.cont, rex->re.rep_catch.beg);
1227		case REX_GROUP_AHEAD_NOT:
1228			r = parse(env, rex->re.group.expr.rex, NiL, s);
1229			if (r == NONE)
1230				r = follow(env, rex, cont, s);
1231			else if (r != BAD)
1232				r = NONE;
1233			return r;
1234		case REX_GROUP_BEHIND:
1235			if ((s - env->beg) < rex->re.group.size)
1236				return NONE;
1237			catcher.type = REX_GROUP_BEHIND_CATCH;
1238			catcher.flags = rex->flags;
1239			catcher.serial = rex->serial;
1240			catcher.re.behind_catch.beg = s;
1241			catcher.re.behind_catch.end = e = env->end;
1242			catcher.re.behind_catch.cont = cont;
1243			catcher.next = rex->next;
1244			for (t = s - rex->re.group.size; t >= env->beg; t--)
1245			{
1246				env->end = s;
1247				r = parse(env, rex->re.group.expr.rex, &catcher, t);
1248				env->end = e;
1249				if (r != NONE)
1250					return r;
1251			}
1252			return NONE;
1253		case REX_GROUP_BEHIND_CATCH:
1254			if (s != rex->re.behind_catch.beg)
1255				return NONE;
1256			env->end = rex->re.behind_catch.end;
1257			return follow(env, rex, rex->re.behind_catch.cont, rex->re.behind_catch.beg);
1258		case REX_GROUP_BEHIND_NOT:
1259			if ((s - env->beg) < rex->re.group.size)
1260				r = NONE;
1261			else
1262			{
1263				catcher.type = REX_GROUP_BEHIND_NOT_CATCH;
1264				catcher.re.neg_catch.beg = s;
1265				catcher.next = 0;
1266				e = env->end;
1267				env->end = s;
1268				for (t = s - rex->re.group.size; t >= env->beg; t--)
1269				{
1270					r = parse(env, rex->re.group.expr.rex, &catcher, t);
1271					if (r != NONE)
1272						break;
1273				}
1274				env->end = e;
1275			}
1276			if (r == NONE)
1277				r = follow(env, rex, cont, s);
1278			else if (r != BAD)
1279				r = NONE;
1280			return r;
1281		case REX_GROUP_BEHIND_NOT_CATCH:
1282			return s == rex->re.neg_catch.beg ? GOOD : NONE;
1283		case REX_GROUP_COND:
1284			if (q = rex->re.group.expr.binary.right)
1285			{
1286				catcher.re.cond_catch.next[0] = q->re.group.expr.binary.right;
1287				catcher.re.cond_catch.next[1] = q->re.group.expr.binary.left;
1288			}
1289			else
1290				catcher.re.cond_catch.next[0] = catcher.re.cond_catch.next[1] = 0;
1291			if (q = rex->re.group.expr.binary.left)
1292			{
1293				catcher.type = REX_GROUP_COND_CATCH;
1294				catcher.flags = rex->flags;
1295				catcher.serial = rex->serial;
1296				catcher.re.cond_catch.yes = 0;
1297				catcher.re.cond_catch.beg = s;
1298				catcher.re.cond_catch.cont = cont;
1299				catcher.next = rex->next;
1300				r = parse(env, q, &catcher, s);
1301				if (r == BAD || catcher.re.cond_catch.yes)
1302					return r;
1303			}
1304			else if (!rex->re.group.size || rex->re.group.size > 0 && env->match[rex->re.group.size].rm_so >= 0)
1305				r = GOOD;
1306			else
1307				r = NONE;
1308			if (q = catcher.re.cond_catch.next[r != NONE])
1309			{
1310				catcher.type = REX_CAT;
1311				catcher.flags = q->flags;
1312				catcher.serial = q->serial;
1313				catcher.re.group_catch.cont = cont;
1314				catcher.next = rex->next;
1315				return parse(env, q, &catcher, s);
1316			}
1317			return follow(env, rex, cont, s);
1318		case REX_GROUP_COND_CATCH:
1319			rex->re.cond_catch.yes = 1;
1320			catcher.type = REX_CAT;
1321			catcher.flags = rex->flags;
1322			catcher.serial = rex->serial;
1323			catcher.re.group_catch.cont = rex->re.cond_catch.cont;
1324			catcher.next = rex->next;
1325			return parse(env, rex->re.cond_catch.next[1], &catcher, rex->re.cond_catch.beg);
1326		case REX_CAT:
1327			return follow(env, rex, rex->re.group_catch.cont, s);
1328		case REX_GROUP_CUT:
1329			catcher.type = REX_GROUP_CUT_CATCH;
1330			catcher.flags = rex->flags;
1331			catcher.serial = rex->serial;
1332			catcher.re.group_catch.cont = cont;
1333			catcher.next = rex->next;
1334			return parse(env, rex->re.group.expr.rex, &catcher, s);
1335		case REX_GROUP_CUT_CATCH:
1336			switch (r = follow(env, rex, rex->re.group_catch.cont, s))
1337			{
1338			case GOOD:
1339				r = BEST;
1340				break;
1341			case NONE:
1342				r = CUT;
1343				break;
1344			}
1345			return r;
1346		case REX_KMP:
1347			f = rex->re.string.fail;
1348			b = rex->re.string.base;
1349			n = rex->re.string.size;
1350			t = s;
1351			e = env->end;
1352			if (p = rex->map)
1353			{
1354				while (t + n <= e)
1355				{
1356					for (i = -1; t < e; t++)
1357					{
1358						while (i >= 0 && b[i+1] != p[*t])
1359							i = f[i];
1360						if (b[i+1] == p[*t])
1361							i++;
1362						if (i + 1 == n)
1363						{
1364							t++;
1365							if (env->stack)
1366								env->best[0].rm_so = t - s - n;
1367							switch (follow(env, rex, cont, t))
1368							{
1369							case BAD:
1370								return BAD;
1371							case CUT:
1372								return CUT;
1373							case BEST:
1374							case GOOD:
1375								return BEST;
1376							}
1377							t -= n - 1;
1378							break;
1379						}
1380					}
1381				}
1382			}
1383			else
1384			{
1385				while (t + n <= e)
1386				{
1387					for (i = -1; t < e; t++)
1388					{
1389						while (i >= 0 && b[i+1] != *t)
1390							i = f[i];
1391						if (b[i+1] == *t)
1392							i++;
1393						if (i + 1 == n)
1394						{
1395							t++;
1396							if (env->stack)
1397								env->best[0].rm_so = t - s - n;
1398							switch (follow(env, rex, cont, t))
1399							{
1400							case BAD:
1401								return BAD;
1402							case CUT:
1403								return CUT;
1404							case BEST:
1405							case GOOD:
1406								return BEST;
1407							}
1408							t -= n - 1;
1409							break;
1410						}
1411					}
1412				}
1413			}
1414			return NONE;
1415		case REX_NEG:
1416			if (LEADING(env, rex, s))
1417				return NONE;
1418			i = env->end - s;
1419			n = ((i + 7) >> 3) + 1;
1420			catcher.type = REX_NEG_CATCH;
1421			catcher.re.neg_catch.beg = s;
1422			if (!(p = (unsigned char*)stkpush(stkstd, n)))
1423				return BAD;
1424			memset(catcher.re.neg_catch.index = p, 0, n);
1425			catcher.next = rex->next;
1426			if (parse(env, rex->re.group.expr.rex, &catcher, s) == BAD)
1427				r = BAD;
1428			else
1429			{
1430				r = NONE;
1431				for (; i >= 0; i--)
1432					if (!bittst(p, i))
1433					{
1434						switch (follow(env, rex, cont, s + i))
1435						{
1436						case BAD:
1437							r = BAD;
1438							break;
1439						case BEST:
1440							r = BEST;
1441							break;
1442						case CUT:
1443							r = CUT;
1444							break;
1445						case GOOD:
1446							r = GOOD;
1447							/*FALLTHROUGH*/
1448						default:
1449							continue;
1450						}
1451						break;
1452					}
1453			}
1454			stkpop(stkstd);
1455			return r;
1456		case REX_NEG_CATCH:
1457			bitset(rex->re.neg_catch.index, s - rex->re.neg_catch.beg);
1458			return NONE;
1459		case REX_NEST:
1460			if (s >= env->end)
1461				return NONE;
1462			do
1463			{
1464				if ((c = *s++) == rex->re.nest.primary)
1465				{
1466					if (s >= env->end || !(s = nestmatch(s, env->end, rex->re.nest.type, c)))
1467						return NONE;
1468					break;
1469				}
1470				if (rex->re.nest.primary >= 0)
1471					return NONE;
1472			    	if (rex->re.nest.type[c] & (REX_NEST_delimiter|REX_NEST_separator|REX_NEST_terminator))
1473					break;
1474			    	if (!(s = nestmatch(s, env->end, rex->re.nest.type, c)))
1475					return NONE;
1476			} while (s < env->end && !(rex->re.nest.type[*(s-1)] & (REX_NEST_delimiter|REX_NEST_separator|REX_NEST_terminator)));
1477			break;
1478		case REX_NULL:
1479			break;
1480		case REX_ONECHAR:
1481			n = rex->hi;
1482			if (n > env->end - s)
1483				n = env->end - s;
1484			m = rex->lo;
1485			if (m > n)
1486				return NONE;
1487			r = NONE;
1488			c = rex->re.onechar;
1489			if (!(rex->flags & REG_MINIMAL))
1490			{
1491				if (!mbwide())
1492				{
1493					if (p = rex->map)
1494					{
1495						for (i = 0; i < n; i++, s++)
1496							if (p[*s] != c)
1497								break;
1498					}
1499					else
1500					{
1501						for (i = 0; i < n; i++, s++)
1502							if (*s != c)
1503								break;
1504					}
1505					for (; i-- >= m; s--)
1506						switch (follow(env, rex, cont, s))
1507						{
1508						case BAD:
1509							return BAD;
1510						case BEST:
1511							return BEST;
1512						case CUT:
1513							return CUT;
1514						case GOOD:
1515							r = GOOD;
1516							break;
1517						}
1518				}
1519				else
1520				{
1521					if (!(b = (unsigned char*)stkpush(stkstd, n)))
1522					{
1523						env->error = REG_ESPACE;
1524						return BAD;
1525					}
1526					e = env->end;
1527					if (!(rex->flags & REG_ICASE))
1528					{
1529						for (i = 0; s < e && i < n; i++, s = t)
1530						{
1531							t = s;
1532							if (mbchar(t) != c)
1533								break;
1534							b[i] = t - s;
1535						}
1536					}
1537					else
1538					{
1539						for (i = 0; s < e && i < n; i++, s = t)
1540						{
1541							t = s;
1542							if (towupper(mbchar(t)) != c)
1543								break;
1544							b[i] = t - s;
1545						}
1546					}
1547					for (; i-- >= m; s -= b[i])
1548						switch (follow(env, rex, cont, s))
1549						{
1550						case BAD:
1551							stkpop(stkstd);
1552							return BAD;
1553						case BEST:
1554							stkpop(stkstd);
1555							return BEST;
1556						case CUT:
1557							stkpop(stkstd);
1558							return CUT;
1559						case GOOD:
1560							r = GOOD;
1561							break;
1562						}
1563					stkpop(stkstd);
1564				}
1565			}
1566			else
1567			{
1568				if (!mbwide())
1569				{
1570					e = s + m;
1571					if (p = rex->map)
1572					{
1573						for (; s < e; s++)
1574							if (p[*s] != c)
1575								return r;
1576						e += n - m;
1577						for (;;)
1578						{
1579							switch (follow(env, rex, cont, s))
1580							{
1581							case BAD:
1582								return BAD;
1583							case CUT:
1584								return CUT;
1585							case BEST:
1586							case GOOD:
1587								return BEST;
1588							}
1589							if (s >= e || p[*s++] != c)
1590								break;
1591						}
1592					}
1593					else
1594					{
1595						for (; s < e; s++)
1596							if (*s != c)
1597								return r;
1598						e += n - m;
1599						for (;;)
1600						{
1601							switch (follow(env, rex, cont, s))
1602							{
1603							case BAD:
1604								return BAD;
1605							case CUT:
1606								return CUT;
1607							case BEST:
1608							case GOOD:
1609								return BEST;
1610							}
1611							if (s >= e || *s++ != c)
1612								break;
1613						}
1614					}
1615				}
1616				else
1617				{
1618					e = env->end;
1619					if (!(rex->flags & REG_ICASE))
1620					{
1621						for (i = 0; i < m && s < e; i++, s = t)
1622						{
1623							t = s;
1624							if (mbchar(t) != c)
1625								return r;
1626						}
1627						while (i++ <= n)
1628						{
1629							switch (follow(env, rex, cont, s))
1630							{
1631							case BAD:
1632								return BAD;
1633							case CUT:
1634								return CUT;
1635							case BEST:
1636							case GOOD:
1637								return BEST;
1638							}
1639							if (s >= e || mbchar(s) != c)
1640								break;
1641						}
1642					}
1643					else
1644					{
1645						for (i = 0; i < m && s < e; i++, s = t)
1646						{
1647							t = s;
1648							if (towupper(mbchar(t)) != c)
1649								return r;
1650						}
1651						while (i++ <= n)
1652						{
1653							switch (follow(env, rex, cont, s))
1654							{
1655							case BAD:
1656								return BAD;
1657							case CUT:
1658								return CUT;
1659							case BEST:
1660							case GOOD:
1661								return BEST;
1662							}
1663							if (s >= e || towupper(mbchar(s)) != c)
1664								break;
1665						}
1666					}
1667				}
1668			}
1669			return r;
1670		case REX_REP:
1671			if (env->stack && pospush(env, rex, s, BEG_REP))
1672				return BAD;
1673			r = parserep(env, rex, cont, s, 0);
1674			if (env->stack)
1675				pospop(env);
1676			return r;
1677		case REX_REP_CATCH:
1678			DEBUG_TEST(0x0020,(sfprintf(sfstdout, "AHA#%04d 0x%04x %s n %d len %d s `%-.*s'\n", __LINE__, debug_flag, rexname(rex), rex->re.rep_catch.n, s - rex->re.rep_catch.beg, env->end - s, s)),(0));
1679			if (env->stack && pospush(env, rex, s, END_ANY))
1680				return BAD;
1681			if (s == rex->re.rep_catch.beg && rex->re.rep_catch.n > rex->re.rep_catch.ref->lo)
1682			{
1683				/*
1684				 * optional empty iteration
1685				 */
1686
1687DEBUG_TEST(0x0002,(sfprintf(sfstdout, "AHA#%04d %p re.group.back=%d re.group.expr.rex=%s\n", __LINE__, rex->re.rep_catch.ref->re.group.expr.rex, rex->re.rep_catch.ref->re.group.expr.rex->re.group.back, rexname(rex->re.rep_catch.ref->re.group.expr.rex))),(0));
1688				if (!env->stack || s != rex->re.rep_catch.ref->re.rep_catch.beg && !rex->re.rep_catch.ref->re.group.expr.rex->re.group.back)
1689					r = NONE;
1690				else if (pospush(env, rex, s, END_ANY))
1691					r = BAD;
1692				else
1693				{
1694					r = follow(env, rex, rex->re.rep_catch.cont, s);
1695					pospop(env);
1696				}
1697			}
1698			else
1699				r = parserep(env, rex->re.rep_catch.ref, rex->re.rep_catch.cont, s, rex->re.rep_catch.n);
1700			if (env->stack)
1701				pospop(env);
1702			return r;
1703		case REX_STRING:
1704DEBUG_TEST(0x0200,(sfprintf(sfstdout,"AHA#%04d 0x%04x parse %s \"%-.*s\" `%-.*s'\n", __LINE__, debug_flag, rexname(rex), rex->re.string.size, rex->re.string.base, env->end - s, s)),(0));
1705			if (rex->re.string.size > (env->end - s))
1706				return NONE;
1707			t = rex->re.string.base;
1708			e = t + rex->re.string.size;
1709			if (!(p = rex->map))
1710			{
1711				while (t < e)
1712					if (*s++ != *t++)
1713						return NONE;
1714			}
1715			else if (!mbwide())
1716			{
1717				while (t < e)
1718					if (p[*s++] != *t++)
1719						return NONE;
1720			}
1721			else
1722			{
1723				while (t < e)
1724				{
1725					c = mbchar(s);
1726					d = mbchar(t);
1727					if (towupper(c) != d)
1728						return NONE;
1729				}
1730			}
1731			break;
1732		case REX_TRIE:
1733			if (((s + rex->re.trie.min) > env->end) || !(x = rex->re.trie.root[rex->map ? rex->map[*s] : *s]))
1734				return NONE;
1735			return parsetrie(env, x, rex, cont, s);
1736		case REX_EXEC:
1737			u = 0;
1738			r = (*env->disc->re_execf)(env->regex, rex->re.exec.data, rex->re.exec.text, rex->re.exec.size, (const char*)s, env->end - s, &u, env->disc);
1739			e = (unsigned char*)u;
1740			if (e >= s && e <= env->end)
1741				s = e;
1742			switch (r)
1743			{
1744			case 0:
1745				break;
1746			case REG_NOMATCH:
1747				return NONE;
1748			default:
1749				env->error = r;
1750				return BAD;
1751			}
1752			break;
1753		case REX_WBEG:
1754			if (!isword(*s) || s > env->beg && isword(*(s - 1)))
1755				return NONE;
1756			break;
1757		case REX_WEND:
1758			if (isword(*s) || s > env->beg && !isword(*(s - 1)))
1759				return NONE;
1760			break;
1761		case REX_WORD:
1762			if (s > env->beg && isword(*(s - 1)) == isword(*s))
1763				return NONE;
1764			break;
1765		case REX_WORD_NOT:
1766			if (s == env->beg || isword(*(s - 1)) != isword(*s))
1767				return NONE;
1768			break;
1769		case REX_BEG_STR:
1770			if (s != env->beg)
1771				return NONE;
1772			break;
1773		case REX_END_STR:
1774			for (t = s; t < env->end && *t == '\n'; t++);
1775			if (t < env->end)
1776				return NONE;
1777			break;
1778		case REX_FIN_STR:
1779			if (s < env->end)
1780				return NONE;
1781			break;
1782		}
1783		if (!(rex = rex->next))
1784		{
1785			if (!(rex = cont))
1786				break;
1787			cont = 0;
1788		}
1789	}
1790	return GOOD;
1791}
1792
1793#if _AST_REGEX_DEBUG
1794
1795static void
1796listnode(Rex_t* e, int level)
1797{
1798	int	i;
1799
1800	if (e)
1801	{
1802		do
1803		{
1804			for (i = 0; i < level; i++)
1805				sfprintf(sfstderr, "  ");
1806			sfprintf(sfstderr, "%s\n", rexname(e));
1807			switch (e->type)
1808			{
1809			case REX_ALT:
1810			case REX_CONJ:
1811				listnode(e->re.group.expr.binary.left, level + 1);
1812				listnode(e->re.group.expr.binary.right, level + 1);
1813				break;
1814			case REX_GROUP:
1815			case REX_GROUP_AHEAD:
1816			case REX_GROUP_AHEAD_NOT:
1817			case REX_GROUP_BEHIND:
1818			case REX_GROUP_BEHIND_NOT:
1819			case REX_GROUP_CUT:
1820			case REX_NEG:
1821			case REX_REP:
1822				listnode(e->re.group.expr.rex, level + 1);
1823				break;
1824			}
1825		} while (e = e->next);
1826	}
1827}
1828
1829static int
1830list(Env_t* env, Rex_t* rex)
1831{
1832	sfprintf(sfstderr, "AHA regex hard=%d stack=%p\n", env->hard, env->stack);
1833	if (rex)
1834		listnode(rex, 1);
1835	return 0;
1836}
1837
1838#endif
1839
1840/*
1841 * returning REG_BADPAT or REG_ESPACE is not explicitly
1842 * countenanced by the standard
1843 */
1844
1845int
1846regnexec(const regex_t* p, const char* s, size_t len, size_t nmatch, regmatch_t* match, regflags_t flags)
1847{
1848	register ssize_t	n;
1849	register int		i;
1850	int			j;
1851	int			k;
1852	int			m;
1853	int			advance;
1854	Env_t*			env;
1855	Rex_t*			e;
1856
1857	DEBUG_INIT();
1858	DEBUG_TEST(0x0001,(sfprintf(sfstdout, "AHA#%04d 0x%04x regnexec %d 0x%08x `%-.*s'\n", __LINE__, debug_flag, nmatch, flags, len, s)),(0));
1859	if (!p || !(env = p->env))
1860		return REG_BADPAT;
1861	if (!s)
1862		return fatal(env->disc, REG_BADPAT, NiL);
1863	if (len < env->min)
1864	{
1865		DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d REG_NOMATCH %d %d\n", __LINE__, len, env->min)),(0));
1866		return REG_NOMATCH;
1867	}
1868	env->regex = p;
1869	env->beg = (unsigned char*)s;
1870	env->end = env->beg + len;
1871	stknew(stkstd, &env->stk);
1872	env->flags &= ~REG_EXEC;
1873	env->flags |= (flags & REG_EXEC);
1874	advance = 0;
1875	if (env->stack = env->hard || !(env->flags & REG_NOSUB) && nmatch)
1876	{
1877		n = env->nsub;
1878		if (!(env->match = (regmatch_t*)stkpush(stkstd, 2 * (n + 1) * sizeof(regmatch_t))) ||
1879		    !env->pos && !(env->pos = vecopen(16, sizeof(Pos_t))) ||
1880		    !env->bestpos && !(env->bestpos = vecopen(16, sizeof(Pos_t))))
1881		{
1882			k = REG_ESPACE;
1883			goto done;
1884		}
1885		env->pos->cur = env->bestpos->cur = 0;
1886		env->best = &env->match[n + 1];
1887		env->best[0].rm_so = 0;
1888		env->best[0].rm_eo = -1;
1889		for (i = 0; i <= n; i++)
1890			env->match[i] = state.nomatch;
1891		if (flags & REG_ADVANCE)
1892			advance = 1;
1893	}
1894	DEBUG_TEST(0x1000,(list(env,env->rex)),(0));
1895	k = REG_NOMATCH;
1896	if ((e = env->rex)->type == REX_BM)
1897	{
1898		DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d REX_BM\n", __LINE__)),(0));
1899		if (len < e->re.bm.right)
1900		{
1901			DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d REG_NOMATCH %d %d\n", __LINE__, len, e->re.bm.right)),(0));
1902			goto done;
1903		}
1904		else if (!(flags & REG_LEFT))
1905		{
1906			register unsigned char*	buf = (unsigned char*)s;
1907			register size_t		index = e->re.bm.left + e->re.bm.size;
1908			register size_t		mid = len - e->re.bm.right;
1909			register size_t*	skip = e->re.bm.skip;
1910			register size_t*	fail = e->re.bm.fail;
1911			register Bm_mask_t**	mask = e->re.bm.mask;
1912			Bm_mask_t		m;
1913			size_t			x;
1914
1915			DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d REX_BM len=%d right=%d left=%d size=%d %d %d\n", __LINE__, len, e->re.bm.right, e->re.bm.left, e->re.bm.size, index, mid)),(0));
1916			for (;;)
1917			{
1918				while ((index += skip[buf[index]]) < mid);
1919				if (index < HIT)
1920				{
1921					DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d REG_NOMATCH %d %d\n", __LINE__, index, HIT)),(0));
1922					goto done;
1923				}
1924				index -= HIT;
1925				m = mask[n = e->re.bm.size - 1][buf[index]];
1926				do
1927				{
1928					if (!n--)
1929					{
1930						if (e->re.bm.back < 0)
1931							goto possible;
1932						if (advance)
1933						{
1934							i = index - e->re.bm.back;
1935							s += i;
1936							if (env->stack)
1937								env->best[0].rm_so += i;
1938							goto possible;
1939						}
1940						x = index;
1941						if (index < e->re.bm.back)
1942							index = 0;
1943						else
1944							index -= e->re.bm.back;
1945						while (index <= x)
1946						{
1947							if ((i = parse(env, e->next, &env->done, buf + index)) != NONE)
1948							{
1949								if (env->stack)
1950									env->best[0].rm_so = index;
1951								n = env->nsub;
1952								goto hit;
1953							}
1954							index++;
1955						}
1956						index += e->re.bm.size;
1957						break;
1958					}
1959				} while (m &= mask[n][buf[--index]]);
1960				if ((index += fail[n + 1]) >= len)
1961					goto done;
1962			}
1963		}
1964 possible:
1965		n = env->nsub;
1966		e = e->next;
1967	}
1968	j = env->once || (flags & REG_LEFT);
1969	DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d parse once=%d\n", __LINE__, j)),(0));
1970	while ((i = parse(env, e, &env->done, (unsigned char*)s)) == NONE || advance && !env->best[0].rm_eo && !(advance = 0))
1971	{
1972		if (j)
1973			goto done;
1974		i = MBSIZE(s);
1975		s += i;
1976		if ((unsigned char*)s > env->end - env->min)
1977			goto done;
1978		if (env->stack)
1979			env->best[0].rm_so += i;
1980	}
1981	if ((flags & REG_LEFT) && env->stack && env->best[0].rm_so)
1982		goto done;
1983 hit:
1984	if (k = env->error)
1985		goto done;
1986	if (i == CUT)
1987	{
1988		k = env->error = REG_NOMATCH;
1989		goto done;
1990	}
1991	if (!(env->flags & REG_NOSUB))
1992	{
1993		k = (env->flags & (REG_SHELL|REG_AUGMENTED)) == (REG_SHELL|REG_AUGMENTED);
1994		for (i = j = m = 0; j < nmatch; i++)
1995			if (!i || !k || (i & 1))
1996			{
1997				if (i > n)
1998					match[j] = state.nomatch;
1999				else
2000					match[m = j] = env->best[i];
2001				j++;
2002			}
2003		if (k)
2004		{
2005			while (m > 0 && match[m].rm_so == -1 && match[m].rm_eo == -1)
2006				m--;
2007			((regex_t*)p)->re_nsub = m;
2008		}
2009	}
2010	k = 0;
2011 done:
2012	stkold(stkstd, &env->stk);
2013	env->stk.base = 0;
2014	if (k > REG_NOMATCH)
2015		fatal(p->env->disc, k, NiL);
2016	return k;
2017}
2018
2019void
2020regfree(regex_t* p)
2021{
2022	Env_t*	env;
2023
2024	if (p && (env = p->env))
2025	{
2026#if _REG_subcomp
2027		if (env->sub)
2028		{
2029			regsubfree(p);
2030			p->re_sub = 0;
2031		}
2032#endif
2033		p->env = 0;
2034		if (--env->refs <= 0 && !(env->disc->re_flags & REG_NOFREE))
2035		{
2036			drop(env->disc, env->rex);
2037			if (env->pos)
2038				vecclose(env->pos);
2039			if (env->bestpos)
2040				vecclose(env->bestpos);
2041			if (env->stk.base)
2042				stkold(stkstd, &env->stk);
2043			alloc(env->disc, env, 0);
2044		}
2045	}
2046}
2047
2048/*
2049 * 20120528: regoff_t changed from int to ssize_t
2050 */
2051
2052#if defined(__EXPORT__)
2053#define extern		__EXPORT__
2054#endif
2055
2056#undef	regnexec
2057#if _map_libc
2058#define regnexec	_ast_regnexec
2059#endif
2060
2061extern int
2062regnexec(const regex_t* p, const char* s, size_t len, size_t nmatch, oldregmatch_t* oldmatch, regflags_t flags)
2063{
2064	if (oldmatch)
2065	{
2066		regmatch_t*	match;
2067		ssize_t		i;
2068		int		r;
2069
2070		if (!(match = oldof(0, regmatch_t, nmatch, 0)))
2071			return -1;
2072		if (!(r = regnexec_20120528(p, s, len, nmatch, match, flags)))
2073			for (i = 0; i < nmatch; i++)
2074			{
2075				oldmatch[i].rm_so = match[i].rm_so;
2076				oldmatch[i].rm_eo = match[i].rm_eo;
2077			}
2078		free(match);
2079		return r;
2080	}
2081	return regnexec_20120528(p, s, len, 0, NiL, flags);
2082}
2083