1/*    regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below).  Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places.  In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
17/* The names of the functions have been changed from regcomp and
18 * regexec to  pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
22#ifdef PERL_EXT_RE_BUILD
23/* need to replace pregcomp et al, so enable that */
24#  ifndef PERL_IN_XSUB_RE
25#    define PERL_IN_XSUB_RE
26#  endif
27/* need access to debugger hooks */
28#  if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
29#    define DEBUGGING
30#  endif
31#endif
32
33#ifdef PERL_IN_XSUB_RE
34/* We *really* need to overwrite these symbols: */
35#  define Perl_regexec_flags my_regexec
36#  define Perl_regdump my_regdump
37#  define Perl_regprop my_regprop
38#  define Perl_re_intuit_start my_re_intuit_start
39/* *These* symbols are masked to allow static link. */
40#  define Perl_pregexec my_pregexec
41#  define Perl_reginitcolors my_reginitcolors
42#  define Perl_regclass_swash my_regclass_swash
43
44#  define PERL_NO_GET_CONTEXT
45#endif
46
47/*SUPPRESS 112*/
48/*
49 * pregcomp and pregexec -- regsub and regerror are not used in perl
50 *
51 *	Copyright (c) 1986 by University of Toronto.
52 *	Written by Henry Spencer.  Not derived from licensed software.
53 *
54 *	Permission is granted to anyone to use this software for any
55 *	purpose on any computer system, and to redistribute it freely,
56 *	subject to the following restrictions:
57 *
58 *	1. The author is not responsible for the consequences of use of
59 *		this software, no matter how awful, even if they arise
60 *		from defects in it.
61 *
62 *	2. The origin of this software must not be misrepresented, either
63 *		by explicit claim or by omission.
64 *
65 *	3. Altered versions must be plainly marked as such, and must not
66 *		be misrepresented as being the original software.
67 *
68 ****    Alterations to Henry's code are...
69 ****
70 ****    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
71 ****    2000, 2001, 2002, 2003, 2004, by Larry Wall and others
72 ****
73 ****    You may distribute under the terms of either the GNU General Public
74 ****    License or the Artistic License, as specified in the README file.
75 *
76 * Beware that some of this code is subtly aware of the way operator
77 * precedence is structured in regular expressions.  Serious changes in
78 * regular-expression syntax might require a total rethink.
79 */
80#include "EXTERN.h"
81#define PERL_IN_REGEXEC_C
82#include "perl.h"
83
84#include "regcomp.h"
85
86#define RF_tainted	1		/* tainted information used? */
87#define RF_warned	2		/* warned about big count? */
88#define RF_evaled	4		/* Did an EVAL with setting? */
89#define RF_utf8		8		/* String contains multibyte chars? */
90#define RF_false	16		/* odd number of nested negatives */
91
92#define UTF ((PL_reg_flags & RF_utf8) != 0)
93
94#define RS_init		1		/* eval environment created */
95#define RS_set		2		/* replsv value is set */
96
97#ifndef STATIC
98#define	STATIC	static
99#endif
100
101#define REGINCLASS(p,c)  (ANYOF_FLAGS(p) ? reginclass(p,c,0,0) : ANYOF_BITMAP_TEST(p,*(c)))
102
103/*
104 * Forwards.
105 */
106
107#define CHR_SVLEN(sv) (do_utf8 ? sv_len_utf8(sv) : SvCUR(sv))
108#define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
109
110#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
111#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
112#define HOP(pos,off) (PL_reg_match_utf8 ? reghop((U8*)pos, off) : (U8*)(pos + off))
113#define HOPMAYBE(pos,off) (PL_reg_match_utf8 ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
114#define HOPc(pos,off) ((char*)HOP(pos,off))
115#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
116
117#define HOPBACK(pos, off) (		\
118    (PL_reg_match_utf8)			\
119	? reghopmaybe((U8*)pos, -off)	\
120    : (pos - off >= PL_bostr)		\
121	? (U8*)(pos - off)		\
122    : (U8*)NULL				\
123)
124#define HOPBACKc(pos, off) (char*)HOPBACK(pos, off)
125
126#define reghop3_c(pos,off,lim) ((char*)reghop3((U8*)pos, off, (U8*)lim))
127#define reghopmaybe3_c(pos,off,lim) ((char*)reghopmaybe3((U8*)pos, off, (U8*)lim))
128#define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
129#define HOPMAYBE3(pos,off,lim) (PL_reg_match_utf8 ? reghopmaybe3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
130#define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
131#define HOPMAYBE3c(pos,off,lim) ((char*)HOPMAYBE3(pos,off,lim))
132
133#define LOAD_UTF8_CHARCLASS(a,b) STMT_START { if (!CAT2(PL_utf8_,a)) { ENTER; save_re_context(); (void)CAT2(is_utf8_, a)((U8*)b); LEAVE; } } STMT_END
134
135/* for use after a quantifier and before an EXACT-like node -- japhy */
136#define JUMPABLE(rn) ( \
137    OP(rn) == OPEN || OP(rn) == CLOSE || OP(rn) == EVAL || \
138    OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
139    OP(rn) == PLUS || OP(rn) == MINMOD || \
140    (PL_regkind[(U8)OP(rn)] == CURLY && ARG1(rn) > 0) \
141)
142
143#define HAS_TEXT(rn) ( \
144    PL_regkind[(U8)OP(rn)] == EXACT || PL_regkind[(U8)OP(rn)] == REF \
145)
146
147/*
148  Search for mandatory following text node; for lookahead, the text must
149  follow but for lookbehind (rn->flags != 0) we skip to the next step.
150*/
151#define FIND_NEXT_IMPT(rn) STMT_START { \
152    while (JUMPABLE(rn)) \
153	if (OP(rn) == SUSPEND || PL_regkind[(U8)OP(rn)] == CURLY) \
154	    rn = NEXTOPER(NEXTOPER(rn)); \
155	else if (OP(rn) == PLUS) \
156	    rn = NEXTOPER(rn); \
157	else if (OP(rn) == IFMATCH) \
158	    rn = (rn->flags == 0) ? NEXTOPER(NEXTOPER(rn)) : rn + ARG(rn); \
159	else rn += NEXT_OFF(rn); \
160} STMT_END
161
162static void restore_pos(pTHX_ void *arg);
163
164STATIC CHECKPOINT
165S_regcppush(pTHX_ I32 parenfloor)
166{
167    int retval = PL_savestack_ix;
168#define REGCP_PAREN_ELEMS 4
169    int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
170    int p;
171
172    if (paren_elems_to_push < 0)
173	Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
174
175#define REGCP_OTHER_ELEMS 6
176    SSGROW(paren_elems_to_push + REGCP_OTHER_ELEMS);
177    for (p = PL_regsize; p > parenfloor; p--) {
178/* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
179	SSPUSHINT(PL_regendp[p]);
180	SSPUSHINT(PL_regstartp[p]);
181	SSPUSHPTR(PL_reg_start_tmp[p]);
182	SSPUSHINT(p);
183    }
184/* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
185    SSPUSHINT(PL_regsize);
186    SSPUSHINT(*PL_reglastparen);
187    SSPUSHINT(*PL_reglastcloseparen);
188    SSPUSHPTR(PL_reginput);
189#define REGCP_FRAME_ELEMS 2
190/* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
191 * are needed for the regexp context stack bookkeeping. */
192    SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
193    SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
194
195    return retval;
196}
197
198/* These are needed since we do not localize EVAL nodes: */
199#  define REGCP_SET(cp)  DEBUG_r(PerlIO_printf(Perl_debug_log,		\
200			     "  Setting an EVAL scope, savestack=%"IVdf"\n",	\
201			     (IV)PL_savestack_ix)); cp = PL_savestack_ix
202
203#  define REGCP_UNWIND(cp)  DEBUG_r(cp != PL_savestack_ix ?		\
204				PerlIO_printf(Perl_debug_log,		\
205				"  Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
206				(IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
207
208STATIC char *
209S_regcppop(pTHX)
210{
211    I32 i;
212    U32 paren = 0;
213    char *input;
214    I32 tmps;
215
216    /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
217    i = SSPOPINT;
218    assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
219    i = SSPOPINT; /* Parentheses elements to pop. */
220    input = (char *) SSPOPPTR;
221    *PL_reglastcloseparen = SSPOPINT;
222    *PL_reglastparen = SSPOPINT;
223    PL_regsize = SSPOPINT;
224
225    /* Now restore the parentheses context. */
226    for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
227	 i > 0; i -= REGCP_PAREN_ELEMS) {
228	paren = (U32)SSPOPINT;
229	PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
230	PL_regstartp[paren] = SSPOPINT;
231	tmps = SSPOPINT;
232	if (paren <= *PL_reglastparen)
233	    PL_regendp[paren] = tmps;
234	DEBUG_r(
235	    PerlIO_printf(Perl_debug_log,
236			  "     restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
237			  (UV)paren, (IV)PL_regstartp[paren],
238			  (IV)(PL_reg_start_tmp[paren] - PL_bostr),
239			  (IV)PL_regendp[paren],
240			  (paren > *PL_reglastparen ? "(no)" : ""));
241	);
242    }
243    DEBUG_r(
244	if ((I32)(*PL_reglastparen + 1) <= PL_regnpar) {
245	    PerlIO_printf(Perl_debug_log,
246			  "     restoring \\%"IVdf"..\\%"IVdf" to undef\n",
247			  (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
248	}
249    );
250#if 1
251    /* It would seem that the similar code in regtry()
252     * already takes care of this, and in fact it is in
253     * a better location to since this code can #if 0-ed out
254     * but the code in regtry() is needed or otherwise tests
255     * requiring null fields (pat.t#187 and split.t#{13,14}
256     * (as of patchlevel 7877)  will fail.  Then again,
257     * this code seems to be necessary or otherwise
258     * building DynaLoader will fail:
259     * "Error: '*' not in typemap in DynaLoader.xs, line 164"
260     * --jhi */
261    for (paren = *PL_reglastparen + 1; (I32)paren <= PL_regnpar; paren++) {
262	if ((I32)paren > PL_regsize)
263	    PL_regstartp[paren] = -1;
264	PL_regendp[paren] = -1;
265    }
266#endif
267    return input;
268}
269
270STATIC char *
271S_regcp_set_to(pTHX_ I32 ss)
272{
273    I32 tmp = PL_savestack_ix;
274
275    PL_savestack_ix = ss;
276    regcppop();
277    PL_savestack_ix = tmp;
278    return Nullch;
279}
280
281typedef struct re_cc_state
282{
283    I32 ss;
284    regnode *node;
285    struct re_cc_state *prev;
286    CURCUR *cc;
287    regexp *re;
288} re_cc_state;
289
290#define regcpblow(cp) LEAVE_SCOPE(cp)	/* Ignores regcppush()ed data. */
291
292#define TRYPAREN(paren, n, input) {				\
293    if (paren) {						\
294	if (n) {						\
295	    PL_regstartp[paren] = HOPc(input, -1) - PL_bostr;	\
296	    PL_regendp[paren] = input - PL_bostr;		\
297	}							\
298	else							\
299	    PL_regendp[paren] = -1;				\
300    }								\
301    if (regmatch(next))						\
302	sayYES;							\
303    if (paren && n)						\
304	PL_regendp[paren] = -1;					\
305}
306
307
308/*
309 * pregexec and friends
310 */
311
312/*
313 - pregexec - match a regexp against a string
314 */
315I32
316Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
317	 char *strbeg, I32 minend, SV *screamer, U32 nosave)
318/* strend: pointer to null at end of string */
319/* strbeg: real beginning of string */
320/* minend: end of match must be >=minend after stringarg. */
321/* nosave: For optimizations. */
322{
323    return
324	regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
325		      nosave ? 0 : REXEC_COPY_STR);
326}
327
328STATIC void
329S_cache_re(pTHX_ regexp *prog)
330{
331    PL_regprecomp = prog->precomp;		/* Needed for FAIL. */
332#ifdef DEBUGGING
333    PL_regprogram = prog->program;
334#endif
335    PL_regnpar = prog->nparens;
336    PL_regdata = prog->data;
337    PL_reg_re = prog;
338}
339
340/*
341 * Need to implement the following flags for reg_anch:
342 *
343 * USE_INTUIT_NOML		- Useful to call re_intuit_start() first
344 * USE_INTUIT_ML
345 * INTUIT_AUTORITATIVE_NOML	- Can trust a positive answer
346 * INTUIT_AUTORITATIVE_ML
347 * INTUIT_ONCE_NOML		- Intuit can match in one location only.
348 * INTUIT_ONCE_ML
349 *
350 * Another flag for this function: SECOND_TIME (so that float substrs
351 * with giant delta may be not rechecked).
352 */
353
354/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
355
356/* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
357   Otherwise, only SvCUR(sv) is used to get strbeg. */
358
359/* XXXX We assume that strpos is strbeg unless sv. */
360
361/* XXXX Some places assume that there is a fixed substring.
362	An update may be needed if optimizer marks as "INTUITable"
363	RExen without fixed substrings.  Similarly, it is assumed that
364	lengths of all the strings are no more than minlen, thus they
365	cannot come from lookahead.
366	(Or minlen should take into account lookahead.) */
367
368/* A failure to find a constant substring means that there is no need to make
369   an expensive call to REx engine, thus we celebrate a failure.  Similarly,
370   finding a substring too deep into the string means that less calls to
371   regtry() should be needed.
372
373   REx compiler's optimizer found 4 possible hints:
374	a) Anchored substring;
375	b) Fixed substring;
376	c) Whether we are anchored (beginning-of-line or \G);
377	d) First node (of those at offset 0) which may distingush positions;
378   We use a)b)d) and multiline-part of c), and try to find a position in the
379   string which does not contradict any of them.
380 */
381
382/* Most of decisions we do here should have been done at compile time.
383   The nodes of the REx which we used for the search should have been
384   deleted from the finite automaton. */
385
386char *
387Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
388		     char *strend, U32 flags, re_scream_pos_data *data)
389{
390    register I32 start_shift = 0;
391    /* Should be nonnegative! */
392    register I32 end_shift   = 0;
393    register char *s;
394    register SV *check;
395    char *strbeg;
396    char *t;
397    int do_utf8 = sv ? SvUTF8(sv) : 0;	/* if no sv we have to assume bytes */
398    I32 ml_anch;
399    register char *other_last = Nullch;	/* other substr checked before this */
400    char *check_at = Nullch;		/* check substr found at this pos */
401#ifdef DEBUGGING
402    char *i_strpos = strpos;
403    SV *dsv = PERL_DEBUG_PAD_ZERO(0);
404#endif
405    RX_MATCH_UTF8_set(prog,do_utf8);
406
407    if (prog->reganch & ROPT_UTF8) {
408	DEBUG_r(PerlIO_printf(Perl_debug_log,
409			      "UTF-8 regex...\n"));
410	PL_reg_flags |= RF_utf8;
411    }
412
413    DEBUG_r({
414	 char *s   = PL_reg_match_utf8 ?
415	                 sv_uni_display(dsv, sv, 60, UNI_DISPLAY_REGEX) :
416	                 strpos;
417	 int   len = PL_reg_match_utf8 ?
418	                 strlen(s) : strend - strpos;
419	 if (!PL_colorset)
420	      reginitcolors();
421	 if (PL_reg_match_utf8)
422	     DEBUG_r(PerlIO_printf(Perl_debug_log,
423				   "UTF-8 target...\n"));
424	 PerlIO_printf(Perl_debug_log,
425		       "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
426		       PL_colors[4],PL_colors[5],PL_colors[0],
427		       prog->precomp,
428		       PL_colors[1],
429		       (strlen(prog->precomp) > 60 ? "..." : ""),
430		       PL_colors[0],
431		       (int)(len > 60 ? 60 : len),
432		       s, PL_colors[1],
433		       (len > 60 ? "..." : "")
434	      );
435    });
436
437    /* CHR_DIST() would be more correct here but it makes things slow. */
438    if (prog->minlen > strend - strpos) {
439	DEBUG_r(PerlIO_printf(Perl_debug_log,
440			      "String too short... [re_intuit_start]\n"));
441	goto fail;
442    }
443    strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
444    PL_regeol = strend;
445    if (do_utf8) {
446	if (!prog->check_utf8 && prog->check_substr)
447	    to_utf8_substr(prog);
448	check = prog->check_utf8;
449    } else {
450	if (!prog->check_substr && prog->check_utf8)
451	    to_byte_substr(prog);
452	check = prog->check_substr;
453    }
454   if (check == &PL_sv_undef) {
455	DEBUG_r(PerlIO_printf(Perl_debug_log,
456		"Non-utf string cannot match utf check string\n"));
457	goto fail;
458    }
459    if (prog->reganch & ROPT_ANCH) {	/* Match at beg-of-str or after \n */
460	ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
461		     || ( (prog->reganch & ROPT_ANCH_BOL)
462			  && !PL_multiline ) );	/* Check after \n? */
463
464	if (!ml_anch) {
465	  if ( !(prog->reganch & (ROPT_ANCH_GPOS /* Checked by the caller */
466				  | ROPT_IMPLICIT)) /* not a real BOL */
467	       /* SvCUR is not set on references: SvRV and SvPVX overlap */
468	       && sv && !SvROK(sv)
469	       && (strpos != strbeg)) {
470	      DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
471	      goto fail;
472	  }
473	  if (prog->check_offset_min == prog->check_offset_max &&
474	      !(prog->reganch & ROPT_CANY_SEEN)) {
475	    /* Substring at constant offset from beg-of-str... */
476	    I32 slen;
477
478	    s = HOP3c(strpos, prog->check_offset_min, strend);
479	    if (SvTAIL(check)) {
480		slen = SvCUR(check);	/* >= 1 */
481
482		if ( strend - s > slen || strend - s < slen - 1
483		     || (strend - s == slen && strend[-1] != '\n')) {
484		    DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
485		    goto fail_finish;
486		}
487		/* Now should match s[0..slen-2] */
488		slen--;
489		if (slen && (*SvPVX(check) != *s
490			     || (slen > 1
491				 && memNE(SvPVX(check), s, slen)))) {
492		  report_neq:
493		    DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
494		    goto fail_finish;
495		}
496	    }
497	    else if (*SvPVX(check) != *s
498		     || ((slen = SvCUR(check)) > 1
499			 && memNE(SvPVX(check), s, slen)))
500		goto report_neq;
501	    goto success_at_start;
502	  }
503	}
504	/* Match is anchored, but substr is not anchored wrt beg-of-str. */
505	s = strpos;
506	start_shift = prog->check_offset_min; /* okay to underestimate on CC */
507	end_shift = prog->minlen - start_shift -
508	    CHR_SVLEN(check) + (SvTAIL(check) != 0);
509	if (!ml_anch) {
510	    I32 end = prog->check_offset_max + CHR_SVLEN(check)
511					 - (SvTAIL(check) != 0);
512	    I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
513
514	    if (end_shift < eshift)
515		end_shift = eshift;
516	}
517    }
518    else {				/* Can match at random position */
519	ml_anch = 0;
520	s = strpos;
521	start_shift = prog->check_offset_min; /* okay to underestimate on CC */
522	/* Should be nonnegative! */
523	end_shift = prog->minlen - start_shift -
524	    CHR_SVLEN(check) + (SvTAIL(check) != 0);
525    }
526
527#ifdef DEBUGGING	/* 7/99: reports of failure (with the older version) */
528    if (end_shift < 0)
529	Perl_croak(aTHX_ "panic: end_shift");
530#endif
531
532  restart:
533    /* Find a possible match in the region s..strend by looking for
534       the "check" substring in the region corrected by start/end_shift. */
535    if (flags & REXEC_SCREAM) {
536	I32 p = -1;			/* Internal iterator of scream. */
537	I32 *pp = data ? data->scream_pos : &p;
538
539	if (PL_screamfirst[BmRARE(check)] >= 0
540	    || ( BmRARE(check) == '\n'
541		 && (BmPREVIOUS(check) == SvCUR(check) - 1)
542		 && SvTAIL(check) ))
543	    s = screaminstr(sv, check,
544			    start_shift + (s - strbeg), end_shift, pp, 0);
545	else
546	    goto fail_finish;
547	/* we may be pointing at the wrong string */
548	if (s && RX_MATCH_COPIED(prog))
549	    s = strbeg + (s - SvPVX(sv));
550	if (data)
551	    *data->scream_olds = s;
552    }
553    else if (prog->reganch & ROPT_CANY_SEEN)
554	s = fbm_instr((U8*)(s + start_shift),
555		      (U8*)(strend - end_shift),
556		      check, PL_multiline ? FBMrf_MULTILINE : 0);
557    else
558	s = fbm_instr(HOP3(s, start_shift, strend),
559		      HOP3(strend, -end_shift, strbeg),
560		      check, PL_multiline ? FBMrf_MULTILINE : 0);
561
562    /* Update the count-of-usability, remove useless subpatterns,
563	unshift s.  */
564
565    DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
566			  (s ? "Found" : "Did not find"),
567			  (check == (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) ? "anchored" : "floating"),
568			  PL_colors[0],
569			  (int)(SvCUR(check) - (SvTAIL(check)!=0)),
570			  SvPVX(check),
571			  PL_colors[1], (SvTAIL(check) ? "$" : ""),
572			  (s ? " at offset " : "...\n") ) );
573
574    if (!s)
575	goto fail_finish;
576
577    check_at = s;
578
579    /* Finish the diagnostic message */
580    DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
581
582    /* Got a candidate.  Check MBOL anchoring, and the *other* substr.
583       Start with the other substr.
584       XXXX no SCREAM optimization yet - and a very coarse implementation
585       XXXX /ttx+/ results in anchored=`ttx', floating=`x'.  floating will
586		*always* match.  Probably should be marked during compile...
587       Probably it is right to do no SCREAM here...
588     */
589
590    if (do_utf8 ? (prog->float_utf8 && prog->anchored_utf8) : (prog->float_substr && prog->anchored_substr)) {
591	/* Take into account the "other" substring. */
592	/* XXXX May be hopelessly wrong for UTF... */
593	if (!other_last)
594	    other_last = strpos;
595	if (check == (do_utf8 ? prog->float_utf8 : prog->float_substr)) {
596	  do_other_anchored:
597	    {
598		char *last = HOP3c(s, -start_shift, strbeg), *last1, *last2;
599		char *s1 = s;
600		SV* must;
601
602		t = s - prog->check_offset_max;
603		if (s - strpos > prog->check_offset_max  /* signed-corrected t > strpos */
604		    && (!do_utf8
605			|| ((t = reghopmaybe3_c(s, -(prog->check_offset_max), strpos))
606			    && t > strpos)))
607		    /* EMPTY */;
608		else
609		    t = strpos;
610		t = HOP3c(t, prog->anchored_offset, strend);
611		if (t < other_last)	/* These positions already checked */
612		    t = other_last;
613		last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
614		if (last < last1)
615		    last1 = last;
616 /* XXXX It is not documented what units *_offsets are in.  Assume bytes.  */
617		/* On end-of-str: see comment below. */
618		must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
619		if (must == &PL_sv_undef) {
620		    s = (char*)NULL;
621		    DEBUG_r(must = prog->anchored_utf8);	/* for debug */
622		}
623		else
624		    s = fbm_instr(
625			(unsigned char*)t,
626			HOP3(HOP3(last1, prog->anchored_offset, strend)
627				+ SvCUR(must), -(SvTAIL(must)!=0), strbeg),
628			must,
629			PL_multiline ? FBMrf_MULTILINE : 0
630		    );
631		DEBUG_r(PerlIO_printf(Perl_debug_log,
632			"%s anchored substr `%s%.*s%s'%s",
633			(s ? "Found" : "Contradicts"),
634			PL_colors[0],
635			  (int)(SvCUR(must)
636			  - (SvTAIL(must)!=0)),
637			  SvPVX(must),
638			  PL_colors[1], (SvTAIL(must) ? "$" : "")));
639		if (!s) {
640		    if (last1 >= last2) {
641			DEBUG_r(PerlIO_printf(Perl_debug_log,
642						", giving up...\n"));
643			goto fail_finish;
644		    }
645		    DEBUG_r(PerlIO_printf(Perl_debug_log,
646			", trying floating at offset %ld...\n",
647			(long)(HOP3c(s1, 1, strend) - i_strpos)));
648		    other_last = HOP3c(last1, prog->anchored_offset+1, strend);
649		    s = HOP3c(last, 1, strend);
650		    goto restart;
651		}
652		else {
653		    DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
654			  (long)(s - i_strpos)));
655		    t = HOP3c(s, -prog->anchored_offset, strbeg);
656		    other_last = HOP3c(s, 1, strend);
657		    s = s1;
658		    if (t == strpos)
659			goto try_at_start;
660		    goto try_at_offset;
661		}
662	    }
663	}
664	else {		/* Take into account the floating substring. */
665	    char *last, *last1;
666	    char *s1 = s;
667	    SV* must;
668
669	    t = HOP3c(s, -start_shift, strbeg);
670	    last1 = last =
671		HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
672	    if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
673		last = HOP3c(t, prog->float_max_offset, strend);
674	    s = HOP3c(t, prog->float_min_offset, strend);
675	    if (s < other_last)
676		s = other_last;
677 /* XXXX It is not documented what units *_offsets are in.  Assume bytes.  */
678	    must = do_utf8 ? prog->float_utf8 : prog->float_substr;
679	    /* fbm_instr() takes into account exact value of end-of-str
680	       if the check is SvTAIL(ed).  Since false positives are OK,
681	       and end-of-str is not later than strend we are OK. */
682	    if (must == &PL_sv_undef) {
683		s = (char*)NULL;
684		DEBUG_r(must = prog->float_utf8);	/* for debug message */
685	    }
686	    else
687		s = fbm_instr((unsigned char*)s,
688			      (unsigned char*)last + SvCUR(must)
689				  - (SvTAIL(must)!=0),
690			      must, PL_multiline ? FBMrf_MULTILINE : 0);
691	    DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
692		    (s ? "Found" : "Contradicts"),
693		    PL_colors[0],
694		      (int)(SvCUR(must) - (SvTAIL(must)!=0)),
695		      SvPVX(must),
696		      PL_colors[1], (SvTAIL(must) ? "$" : "")));
697	    if (!s) {
698		if (last1 == last) {
699		    DEBUG_r(PerlIO_printf(Perl_debug_log,
700					    ", giving up...\n"));
701		    goto fail_finish;
702		}
703		DEBUG_r(PerlIO_printf(Perl_debug_log,
704		    ", trying anchored starting at offset %ld...\n",
705		    (long)(s1 + 1 - i_strpos)));
706		other_last = last;
707		s = HOP3c(t, 1, strend);
708		goto restart;
709	    }
710	    else {
711		DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
712		      (long)(s - i_strpos)));
713		other_last = s; /* Fix this later. --Hugo */
714		s = s1;
715		if (t == strpos)
716		    goto try_at_start;
717		goto try_at_offset;
718	    }
719	}
720    }
721
722    t = s - prog->check_offset_max;
723    if (s - strpos > prog->check_offset_max  /* signed-corrected t > strpos */
724        && (!do_utf8
725	    || ((t = reghopmaybe3_c(s, -prog->check_offset_max, strpos))
726		 && t > strpos))) {
727	/* Fixed substring is found far enough so that the match
728	   cannot start at strpos. */
729      try_at_offset:
730	if (ml_anch && t[-1] != '\n') {
731	    /* Eventually fbm_*() should handle this, but often
732	       anchored_offset is not 0, so this check will not be wasted. */
733	    /* XXXX In the code below we prefer to look for "^" even in
734	       presence of anchored substrings.  And we search even
735	       beyond the found float position.  These pessimizations
736	       are historical artefacts only.  */
737	  find_anchor:
738	    while (t < strend - prog->minlen) {
739		if (*t == '\n') {
740		    if (t < check_at - prog->check_offset_min) {
741			if (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) {
742			    /* Since we moved from the found position,
743			       we definitely contradict the found anchored
744			       substr.  Due to the above check we do not
745			       contradict "check" substr.
746			       Thus we can arrive here only if check substr
747			       is float.  Redo checking for "other"=="fixed".
748			     */
749			    strpos = t + 1;
750			    DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
751				PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
752			    goto do_other_anchored;
753			}
754			/* We don't contradict the found floating substring. */
755			/* XXXX Why not check for STCLASS? */
756			s = t + 1;
757			DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
758			    PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
759			goto set_useful;
760		    }
761		    /* Position contradicts check-string */
762		    /* XXXX probably better to look for check-string
763		       than for "\n", so one should lower the limit for t? */
764		    DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
765			PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
766		    other_last = strpos = s = t + 1;
767		    goto restart;
768		}
769		t++;
770	    }
771	    DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
772			PL_colors[0],PL_colors[1]));
773	    goto fail_finish;
774	}
775	else {
776	    DEBUG_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
777			PL_colors[0],PL_colors[1]));
778	}
779	s = t;
780      set_useful:
781	++BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr);	/* hooray/5 */
782    }
783    else {
784	/* The found string does not prohibit matching at strpos,
785	   - no optimization of calling REx engine can be performed,
786	   unless it was an MBOL and we are not after MBOL,
787	   or a future STCLASS check will fail this. */
788      try_at_start:
789	/* Even in this situation we may use MBOL flag if strpos is offset
790	   wrt the start of the string. */
791	if (ml_anch && sv && !SvROK(sv)	/* See prev comment on SvROK */
792	    && (strpos != strbeg) && strpos[-1] != '\n'
793	    /* May be due to an implicit anchor of m{.*foo}  */
794	    && !(prog->reganch & ROPT_IMPLICIT))
795	{
796	    t = strpos;
797	    goto find_anchor;
798	}
799	DEBUG_r( if (ml_anch)
800	    PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
801			(long)(strpos - i_strpos), PL_colors[0],PL_colors[1]);
802	);
803      success_at_start:
804	if (!(prog->reganch & ROPT_NAUGHTY)	/* XXXX If strpos moved? */
805	    && (do_utf8 ? (
806		prog->check_utf8		/* Could be deleted already */
807		&& --BmUSEFUL(prog->check_utf8) < 0
808		&& (prog->check_utf8 == prog->float_utf8)
809	    ) : (
810		prog->check_substr		/* Could be deleted already */
811		&& --BmUSEFUL(prog->check_substr) < 0
812		&& (prog->check_substr == prog->float_substr)
813	    )))
814	{
815	    /* If flags & SOMETHING - do not do it many times on the same match */
816	    DEBUG_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
817	    SvREFCNT_dec(do_utf8 ? prog->check_utf8 : prog->check_substr);
818	    if (do_utf8 ? prog->check_substr : prog->check_utf8)
819		SvREFCNT_dec(do_utf8 ? prog->check_substr : prog->check_utf8);
820	    prog->check_substr = prog->check_utf8 = Nullsv;	/* disable */
821	    prog->float_substr = prog->float_utf8 = Nullsv;	/* clear */
822	    check = Nullsv;			/* abort */
823	    s = strpos;
824	    /* XXXX This is a remnant of the old implementation.  It
825	            looks wasteful, since now INTUIT can use many
826	            other heuristics. */
827	    prog->reganch &= ~RE_USE_INTUIT;
828	}
829	else
830	    s = strpos;
831    }
832
833    /* Last resort... */
834    /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
835    if (prog->regstclass) {
836	/* minlen == 0 is possible if regstclass is \b or \B,
837	   and the fixed substr is ''$.
838	   Since minlen is already taken into account, s+1 is before strend;
839	   accidentally, minlen >= 1 guaranties no false positives at s + 1
840	   even for \b or \B.  But (minlen? 1 : 0) below assumes that
841	   regstclass does not come from lookahead...  */
842	/* If regstclass takes bytelength more than 1: If charlength==1, OK.
843	   This leaves EXACTF only, which is dealt with in find_byclass().  */
844	U8* str = (U8*)STRING(prog->regstclass);
845	int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
846		    ? CHR_DIST(str+STR_LEN(prog->regstclass), str)
847		    : 1);
848	char *endpos = (prog->anchored_substr || prog->anchored_utf8 || ml_anch)
849		? HOP3c(s, (prog->minlen ? cl_l : 0), strend)
850		: (prog->float_substr || prog->float_utf8
851		   ? HOP3c(HOP3c(check_at, -start_shift, strbeg),
852			   cl_l, strend)
853		   : strend);
854	char *startpos = strbeg;
855
856	t = s;
857	cache_re(prog);
858	s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
859	if (!s) {
860#ifdef DEBUGGING
861	    char *what = 0;
862#endif
863	    if (endpos == strend) {
864		DEBUG_r( PerlIO_printf(Perl_debug_log,
865				"Could not match STCLASS...\n") );
866		goto fail;
867	    }
868	    DEBUG_r( PerlIO_printf(Perl_debug_log,
869				   "This position contradicts STCLASS...\n") );
870	    if ((prog->reganch & ROPT_ANCH) && !ml_anch)
871		goto fail;
872	    /* Contradict one of substrings */
873	    if (prog->anchored_substr || prog->anchored_utf8) {
874		if ((do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) == check) {
875		    DEBUG_r( what = "anchored" );
876		  hop_and_restart:
877		    s = HOP3c(t, 1, strend);
878		    if (s + start_shift + end_shift > strend) {
879			/* XXXX Should be taken into account earlier? */
880			DEBUG_r( PerlIO_printf(Perl_debug_log,
881					       "Could not match STCLASS...\n") );
882			goto fail;
883		    }
884		    if (!check)
885			goto giveup;
886		    DEBUG_r( PerlIO_printf(Perl_debug_log,
887				"Looking for %s substr starting at offset %ld...\n",
888				 what, (long)(s + start_shift - i_strpos)) );
889		    goto restart;
890		}
891		/* Have both, check_string is floating */
892		if (t + start_shift >= check_at) /* Contradicts floating=check */
893		    goto retry_floating_check;
894		/* Recheck anchored substring, but not floating... */
895		s = check_at;
896		if (!check)
897		    goto giveup;
898		DEBUG_r( PerlIO_printf(Perl_debug_log,
899			  "Looking for anchored substr starting at offset %ld...\n",
900			  (long)(other_last - i_strpos)) );
901		goto do_other_anchored;
902	    }
903	    /* Another way we could have checked stclass at the
904               current position only: */
905	    if (ml_anch) {
906		s = t = t + 1;
907		if (!check)
908		    goto giveup;
909		DEBUG_r( PerlIO_printf(Perl_debug_log,
910			  "Looking for /%s^%s/m starting at offset %ld...\n",
911			  PL_colors[0],PL_colors[1], (long)(t - i_strpos)) );
912		goto try_at_offset;
913	    }
914	    if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))	/* Could have been deleted */
915		goto fail;
916	    /* Check is floating subtring. */
917	  retry_floating_check:
918	    t = check_at - start_shift;
919	    DEBUG_r( what = "floating" );
920	    goto hop_and_restart;
921	}
922	if (t != s) {
923            DEBUG_r(PerlIO_printf(Perl_debug_log,
924			"By STCLASS: moving %ld --> %ld\n",
925                                  (long)(t - i_strpos), (long)(s - i_strpos))
926                   );
927        }
928        else {
929            DEBUG_r(PerlIO_printf(Perl_debug_log,
930                                  "Does not contradict STCLASS...\n");
931                   );
932        }
933    }
934  giveup:
935    DEBUG_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
936			  PL_colors[4], (check ? "Guessed" : "Giving up"),
937			  PL_colors[5], (long)(s - i_strpos)) );
938    return s;
939
940  fail_finish:				/* Substring not found */
941    if (prog->check_substr || prog->check_utf8)		/* could be removed already */
942	BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr) += 5; /* hooray */
943  fail:
944    DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
945			  PL_colors[4],PL_colors[5]));
946    return Nullch;
947}
948
949/* We know what class REx starts with.  Try to find this position... */
950STATIC char *
951S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *startpos, I32 norun)
952{
953	I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
954	char *m;
955	STRLEN ln;
956	STRLEN lnc;
957	register STRLEN uskip;
958	unsigned int c1;
959	unsigned int c2;
960	char *e;
961	register I32 tmp = 1;	/* Scratch variable? */
962	register bool do_utf8 = PL_reg_match_utf8;
963
964	/* We know what class it must start with. */
965	switch (OP(c)) {
966	case ANYOF:
967	    if (do_utf8) {
968		 while (s + (uskip = UTF8SKIP(s)) <= strend) {
969		      if ((ANYOF_FLAGS(c) & ANYOF_UNICODE) ||
970			  !UTF8_IS_INVARIANT((U8)s[0]) ?
971			  reginclass(c, (U8*)s, 0, do_utf8) :
972			  REGINCLASS(c, (U8*)s)) {
973			   if (tmp && (norun || regtry(prog, s)))
974				goto got_it;
975			   else
976				tmp = doevery;
977		      }
978		      else
979			   tmp = 1;
980		      s += uskip;
981		 }
982	    }
983	    else {
984		 while (s < strend) {
985		      STRLEN skip = 1;
986
987		      if (REGINCLASS(c, (U8*)s) ||
988			  (ANYOF_FOLD_SHARP_S(c, s, strend) &&
989			   /* The assignment of 2 is intentional:
990			    * for the folded sharp s, the skip is 2. */
991			   (skip = SHARP_S_SKIP))) {
992			   if (tmp && (norun || regtry(prog, s)))
993				goto got_it;
994			   else
995				tmp = doevery;
996		      }
997		      else
998			   tmp = 1;
999		      s += skip;
1000		 }
1001	    }
1002	    break;
1003	case CANY:
1004	    while (s < strend) {
1005	        if (tmp && (norun || regtry(prog, s)))
1006		    goto got_it;
1007		else
1008		    tmp = doevery;
1009		s++;
1010	    }
1011	    break;
1012	case EXACTF:
1013	    m   = STRING(c);
1014	    ln  = STR_LEN(c);	/* length to match in octets/bytes */
1015	    lnc = (I32) ln;	/* length to match in characters */
1016	    if (UTF) {
1017	        STRLEN ulen1, ulen2;
1018		U8 *sm = (U8 *) m;
1019		U8 tmpbuf1[UTF8_MAXLEN_UCLC+1];
1020		U8 tmpbuf2[UTF8_MAXLEN_UCLC+1];
1021
1022		to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
1023		to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
1024
1025		c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXLEN_UCLC,
1026				    0, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1027		c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXLEN_UCLC,
1028				    0, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1029		lnc = 0;
1030		while (sm < ((U8 *) m + ln)) {
1031		    lnc++;
1032		    sm += UTF8SKIP(sm);
1033		}
1034	    }
1035	    else {
1036		c1 = *(U8*)m;
1037		c2 = PL_fold[c1];
1038	    }
1039	    goto do_exactf;
1040	case EXACTFL:
1041	    m   = STRING(c);
1042	    ln  = STR_LEN(c);
1043	    lnc = (I32) ln;
1044	    c1 = *(U8*)m;
1045	    c2 = PL_fold_locale[c1];
1046	  do_exactf:
1047	    e = HOP3c(strend, -((I32)lnc), s);
1048
1049	    if (norun && e < s)
1050		e = s;			/* Due to minlen logic of intuit() */
1051
1052	    /* The idea in the EXACTF* cases is to first find the
1053	     * first character of the EXACTF* node and then, if
1054	     * necessary, case-insensitively compare the full
1055	     * text of the node.  The c1 and c2 are the first
1056	     * characters (though in Unicode it gets a bit
1057	     * more complicated because there are more cases
1058	     * than just upper and lower: one needs to use
1059	     * the so-called folding case for case-insensitive
1060	     * matching (called "loose matching" in Unicode).
1061	     * ibcmp_utf8() will do just that. */
1062
1063	    if (do_utf8) {
1064	        UV c, f;
1065	        U8 tmpbuf [UTF8_MAXLEN+1];
1066		U8 foldbuf[UTF8_MAXLEN_FOLD+1];
1067		STRLEN len, foldlen;
1068
1069		if (c1 == c2) {
1070		    /* Upper and lower of 1st char are equal -
1071		     * probably not a "letter". */
1072		    while (s <= e) {
1073		        c = utf8n_to_uvchr((U8*)s, UTF8_MAXLEN, &len,
1074					   ckWARN(WARN_UTF8) ?
1075					   0 : UTF8_ALLOW_ANY);
1076			if ( c == c1
1077			     && (ln == len ||
1078				 ibcmp_utf8(s, (char **)0, 0,  do_utf8,
1079					    m, (char **)0, ln, (bool)UTF))
1080			     && (norun || regtry(prog, s)) )
1081			    goto got_it;
1082			else {
1083			     uvchr_to_utf8(tmpbuf, c);
1084			     f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);
1085			     if ( f != c
1086				  && (f == c1 || f == c2)
1087				  && (ln == foldlen ||
1088				      !ibcmp_utf8((char *) foldbuf,
1089						  (char **)0, foldlen, do_utf8,
1090						  m,
1091						  (char **)0, ln, (bool)UTF))
1092				  && (norun || regtry(prog, s)) )
1093				  goto got_it;
1094			}
1095			s += len;
1096		    }
1097		}
1098		else {
1099		    while (s <= e) {
1100		      c = utf8n_to_uvchr((U8*)s, UTF8_MAXLEN, &len,
1101					   ckWARN(WARN_UTF8) ?
1102					   0 : UTF8_ALLOW_ANY);
1103
1104			/* Handle some of the three Greek sigmas cases.
1105			 * Note that not all the possible combinations
1106			 * are handled here: some of them are handled
1107			 * by the standard folding rules, and some of
1108			 * them (the character class or ANYOF cases)
1109			 * are handled during compiletime in
1110			 * regexec.c:S_regclass(). */
1111			if (c == (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA ||
1112			    c == (UV)UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA)
1113			    c = (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA;
1114
1115			if ( (c == c1 || c == c2)
1116			     && (ln == len ||
1117				 ibcmp_utf8(s, (char **)0, 0,  do_utf8,
1118					    m, (char **)0, ln, (bool)UTF))
1119			     && (norun || regtry(prog, s)) )
1120			    goto got_it;
1121			else {
1122			     uvchr_to_utf8(tmpbuf, c);
1123			     f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);
1124			     if ( f != c
1125				  && (f == c1 || f == c2)
1126				  && (ln == foldlen ||
1127				      !ibcmp_utf8((char *) foldbuf,
1128						  (char **)0, foldlen, do_utf8,
1129						  m,
1130						  (char **)0, ln, (bool)UTF))
1131				  && (norun || regtry(prog, s)) )
1132				  goto got_it;
1133			}
1134			s += len;
1135		    }
1136		}
1137	    }
1138	    else {
1139		if (c1 == c2)
1140		    while (s <= e) {
1141			if ( *(U8*)s == c1
1142			     && (ln == 1 || !(OP(c) == EXACTF
1143					      ? ibcmp(s, m, ln)
1144					      : ibcmp_locale(s, m, ln)))
1145			     && (norun || regtry(prog, s)) )
1146			    goto got_it;
1147			s++;
1148		    }
1149		else
1150		    while (s <= e) {
1151			if ( (*(U8*)s == c1 || *(U8*)s == c2)
1152			     && (ln == 1 || !(OP(c) == EXACTF
1153					      ? ibcmp(s, m, ln)
1154					      : ibcmp_locale(s, m, ln)))
1155			     && (norun || regtry(prog, s)) )
1156			    goto got_it;
1157			s++;
1158		    }
1159	    }
1160	    break;
1161	case BOUNDL:
1162	    PL_reg_flags |= RF_tainted;
1163	    /* FALL THROUGH */
1164	case BOUND:
1165	    if (do_utf8) {
1166		if (s == PL_bostr)
1167		    tmp = '\n';
1168		else {
1169		    U8 *r = reghop3((U8*)s, -1, (U8*)PL_bostr);
1170
1171		    tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, 0);
1172		}
1173		tmp = ((OP(c) == BOUND ?
1174			isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1175		LOAD_UTF8_CHARCLASS(alnum,"a");
1176		while (s + (uskip = UTF8SKIP(s)) <= strend) {
1177		    if (tmp == !(OP(c) == BOUND ?
1178				 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
1179				 isALNUM_LC_utf8((U8*)s)))
1180		    {
1181			tmp = !tmp;
1182			if ((norun || regtry(prog, s)))
1183			    goto got_it;
1184		    }
1185		    s += uskip;
1186		}
1187	    }
1188	    else {
1189		tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
1190		tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1191		while (s < strend) {
1192		    if (tmp ==
1193			!(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
1194			tmp = !tmp;
1195			if ((norun || regtry(prog, s)))
1196			    goto got_it;
1197		    }
1198		    s++;
1199		}
1200	    }
1201	    if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
1202		goto got_it;
1203	    break;
1204	case NBOUNDL:
1205	    PL_reg_flags |= RF_tainted;
1206	    /* FALL THROUGH */
1207	case NBOUND:
1208	    if (do_utf8) {
1209		if (s == PL_bostr)
1210		    tmp = '\n';
1211		else {
1212		    U8 *r = reghop3((U8*)s, -1, (U8*)PL_bostr);
1213
1214		    tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, 0);
1215		}
1216		tmp = ((OP(c) == NBOUND ?
1217			isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1218		LOAD_UTF8_CHARCLASS(alnum,"a");
1219		while (s + (uskip = UTF8SKIP(s)) <= strend) {
1220		    if (tmp == !(OP(c) == NBOUND ?
1221				 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
1222				 isALNUM_LC_utf8((U8*)s)))
1223			tmp = !tmp;
1224		    else if ((norun || regtry(prog, s)))
1225			goto got_it;
1226		    s += uskip;
1227		}
1228	    }
1229	    else {
1230		tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
1231		tmp = ((OP(c) == NBOUND ?
1232			isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1233		while (s < strend) {
1234		    if (tmp ==
1235			!(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1236			tmp = !tmp;
1237		    else if ((norun || regtry(prog, s)))
1238			goto got_it;
1239		    s++;
1240		}
1241	    }
1242	    if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
1243		goto got_it;
1244	    break;
1245	case ALNUM:
1246	    if (do_utf8) {
1247		LOAD_UTF8_CHARCLASS(alnum,"a");
1248		while (s + (uskip = UTF8SKIP(s)) <= strend) {
1249		    if (swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
1250			if (tmp && (norun || regtry(prog, s)))
1251			    goto got_it;
1252			else
1253			    tmp = doevery;
1254		    }
1255		    else
1256			tmp = 1;
1257		    s += uskip;
1258		}
1259	    }
1260	    else {
1261		while (s < strend) {
1262		    if (isALNUM(*s)) {
1263			if (tmp && (norun || regtry(prog, s)))
1264			    goto got_it;
1265			else
1266			    tmp = doevery;
1267		    }
1268		    else
1269			tmp = 1;
1270		    s++;
1271		}
1272	    }
1273	    break;
1274	case ALNUML:
1275	    PL_reg_flags |= RF_tainted;
1276	    if (do_utf8) {
1277		while (s + (uskip = UTF8SKIP(s)) <= strend) {
1278		    if (isALNUM_LC_utf8((U8*)s)) {
1279			if (tmp && (norun || regtry(prog, s)))
1280			    goto got_it;
1281			else
1282			    tmp = doevery;
1283		    }
1284		    else
1285			tmp = 1;
1286		    s += uskip;
1287		}
1288	    }
1289	    else {
1290		while (s < strend) {
1291		    if (isALNUM_LC(*s)) {
1292			if (tmp && (norun || regtry(prog, s)))
1293			    goto got_it;
1294			else
1295			    tmp = doevery;
1296		    }
1297		    else
1298			tmp = 1;
1299		    s++;
1300		}
1301	    }
1302	    break;
1303	case NALNUM:
1304	    if (do_utf8) {
1305		LOAD_UTF8_CHARCLASS(alnum,"a");
1306		while (s + (uskip = UTF8SKIP(s)) <= strend) {
1307		    if (!swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
1308			if (tmp && (norun || regtry(prog, s)))
1309			    goto got_it;
1310			else
1311			    tmp = doevery;
1312		    }
1313		    else
1314			tmp = 1;
1315		    s += uskip;
1316		}
1317	    }
1318	    else {
1319		while (s < strend) {
1320		    if (!isALNUM(*s)) {
1321			if (tmp && (norun || regtry(prog, s)))
1322			    goto got_it;
1323			else
1324			    tmp = doevery;
1325		    }
1326		    else
1327			tmp = 1;
1328		    s++;
1329		}
1330	    }
1331	    break;
1332	case NALNUML:
1333	    PL_reg_flags |= RF_tainted;
1334	    if (do_utf8) {
1335		while (s + (uskip = UTF8SKIP(s)) <= strend) {
1336		    if (!isALNUM_LC_utf8((U8*)s)) {
1337			if (tmp && (norun || regtry(prog, s)))
1338			    goto got_it;
1339			else
1340			    tmp = doevery;
1341		    }
1342		    else
1343			tmp = 1;
1344		    s += uskip;
1345		}
1346	    }
1347	    else {
1348		while (s < strend) {
1349		    if (!isALNUM_LC(*s)) {
1350			if (tmp && (norun || regtry(prog, s)))
1351			    goto got_it;
1352			else
1353			    tmp = doevery;
1354		    }
1355		    else
1356			tmp = 1;
1357		    s++;
1358		}
1359	    }
1360	    break;
1361	case SPACE:
1362	    if (do_utf8) {
1363		LOAD_UTF8_CHARCLASS(space," ");
1364		while (s + (uskip = UTF8SKIP(s)) <= strend) {
1365		    if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)) {
1366			if (tmp && (norun || regtry(prog, s)))
1367			    goto got_it;
1368			else
1369			    tmp = doevery;
1370		    }
1371		    else
1372			tmp = 1;
1373		    s += uskip;
1374		}
1375	    }
1376	    else {
1377		while (s < strend) {
1378		    if (isSPACE(*s)) {
1379			if (tmp && (norun || regtry(prog, s)))
1380			    goto got_it;
1381			else
1382			    tmp = doevery;
1383		    }
1384		    else
1385			tmp = 1;
1386		    s++;
1387		}
1388	    }
1389	    break;
1390	case SPACEL:
1391	    PL_reg_flags |= RF_tainted;
1392	    if (do_utf8) {
1393		while (s + (uskip = UTF8SKIP(s)) <= strend) {
1394		    if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
1395			if (tmp && (norun || regtry(prog, s)))
1396			    goto got_it;
1397			else
1398			    tmp = doevery;
1399		    }
1400		    else
1401			tmp = 1;
1402		    s += uskip;
1403		}
1404	    }
1405	    else {
1406		while (s < strend) {
1407		    if (isSPACE_LC(*s)) {
1408			if (tmp && (norun || regtry(prog, s)))
1409			    goto got_it;
1410			else
1411			    tmp = doevery;
1412		    }
1413		    else
1414			tmp = 1;
1415		    s++;
1416		}
1417	    }
1418	    break;
1419	case NSPACE:
1420	    if (do_utf8) {
1421		LOAD_UTF8_CHARCLASS(space," ");
1422		while (s + (uskip = UTF8SKIP(s)) <= strend) {
1423		    if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8))) {
1424			if (tmp && (norun || regtry(prog, s)))
1425			    goto got_it;
1426			else
1427			    tmp = doevery;
1428		    }
1429		    else
1430			tmp = 1;
1431		    s += uskip;
1432		}
1433	    }
1434	    else {
1435		while (s < strend) {
1436		    if (!isSPACE(*s)) {
1437			if (tmp && (norun || regtry(prog, s)))
1438			    goto got_it;
1439			else
1440			    tmp = doevery;
1441		    }
1442		    else
1443			tmp = 1;
1444		    s++;
1445		}
1446	    }
1447	    break;
1448	case NSPACEL:
1449	    PL_reg_flags |= RF_tainted;
1450	    if (do_utf8) {
1451		while (s + (uskip = UTF8SKIP(s)) <= strend) {
1452		    if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
1453			if (tmp && (norun || regtry(prog, s)))
1454			    goto got_it;
1455			else
1456			    tmp = doevery;
1457		    }
1458		    else
1459			tmp = 1;
1460		    s += uskip;
1461		}
1462	    }
1463	    else {
1464		while (s < strend) {
1465		    if (!isSPACE_LC(*s)) {
1466			if (tmp && (norun || regtry(prog, s)))
1467			    goto got_it;
1468			else
1469			    tmp = doevery;
1470		    }
1471		    else
1472			tmp = 1;
1473		    s++;
1474		}
1475	    }
1476	    break;
1477	case DIGIT:
1478	    if (do_utf8) {
1479		LOAD_UTF8_CHARCLASS(digit,"0");
1480		while (s + (uskip = UTF8SKIP(s)) <= strend) {
1481		    if (swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
1482			if (tmp && (norun || regtry(prog, s)))
1483			    goto got_it;
1484			else
1485			    tmp = doevery;
1486		    }
1487		    else
1488			tmp = 1;
1489		    s += uskip;
1490		}
1491	    }
1492	    else {
1493		while (s < strend) {
1494		    if (isDIGIT(*s)) {
1495			if (tmp && (norun || regtry(prog, s)))
1496			    goto got_it;
1497			else
1498			    tmp = doevery;
1499		    }
1500		    else
1501			tmp = 1;
1502		    s++;
1503		}
1504	    }
1505	    break;
1506	case DIGITL:
1507	    PL_reg_flags |= RF_tainted;
1508	    if (do_utf8) {
1509		while (s + (uskip = UTF8SKIP(s)) <= strend) {
1510		    if (isDIGIT_LC_utf8((U8*)s)) {
1511			if (tmp && (norun || regtry(prog, s)))
1512			    goto got_it;
1513			else
1514			    tmp = doevery;
1515		    }
1516		    else
1517			tmp = 1;
1518		    s += uskip;
1519		}
1520	    }
1521	    else {
1522		while (s < strend) {
1523		    if (isDIGIT_LC(*s)) {
1524			if (tmp && (norun || regtry(prog, s)))
1525			    goto got_it;
1526			else
1527			    tmp = doevery;
1528		    }
1529		    else
1530			tmp = 1;
1531		    s++;
1532		}
1533	    }
1534	    break;
1535	case NDIGIT:
1536	    if (do_utf8) {
1537		LOAD_UTF8_CHARCLASS(digit,"0");
1538		while (s + (uskip = UTF8SKIP(s)) <= strend) {
1539		    if (!swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
1540			if (tmp && (norun || regtry(prog, s)))
1541			    goto got_it;
1542			else
1543			    tmp = doevery;
1544		    }
1545		    else
1546			tmp = 1;
1547		    s += uskip;
1548		}
1549	    }
1550	    else {
1551		while (s < strend) {
1552		    if (!isDIGIT(*s)) {
1553			if (tmp && (norun || regtry(prog, s)))
1554			    goto got_it;
1555			else
1556			    tmp = doevery;
1557		    }
1558		    else
1559			tmp = 1;
1560		    s++;
1561		}
1562	    }
1563	    break;
1564	case NDIGITL:
1565	    PL_reg_flags |= RF_tainted;
1566	    if (do_utf8) {
1567		while (s + (uskip = UTF8SKIP(s)) <= strend) {
1568		    if (!isDIGIT_LC_utf8((U8*)s)) {
1569			if (tmp && (norun || regtry(prog, s)))
1570			    goto got_it;
1571			else
1572			    tmp = doevery;
1573		    }
1574		    else
1575			tmp = 1;
1576		    s += uskip;
1577		}
1578	    }
1579	    else {
1580		while (s < strend) {
1581		    if (!isDIGIT_LC(*s)) {
1582			if (tmp && (norun || regtry(prog, s)))
1583			    goto got_it;
1584			else
1585			    tmp = doevery;
1586		    }
1587		    else
1588			tmp = 1;
1589		    s++;
1590		}
1591	    }
1592	    break;
1593	default:
1594	    Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1595	    break;
1596	}
1597	return 0;
1598      got_it:
1599	return s;
1600}
1601
1602/*
1603 - regexec_flags - match a regexp against a string
1604 */
1605I32
1606Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1607	      char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1608/* strend: pointer to null at end of string */
1609/* strbeg: real beginning of string */
1610/* minend: end of match must be >=minend after stringarg. */
1611/* data: May be used for some additional optimizations. */
1612/* nosave: For optimizations. */
1613{
1614    register char *s;
1615    register regnode *c;
1616    register char *startpos = stringarg;
1617    I32 minlen;		/* must match at least this many chars */
1618    I32 dontbother = 0;	/* how many characters not to try at end */
1619    /* I32 start_shift = 0; */		/* Offset of the start to find
1620					 constant substr. */		/* CC */
1621    I32 end_shift = 0;			/* Same for the end. */		/* CC */
1622    I32 scream_pos = -1;		/* Internal iterator of scream. */
1623    char *scream_olds;
1624    SV* oreplsv = GvSV(PL_replgv);
1625    bool do_utf8 = DO_UTF8(sv);
1626#ifdef DEBUGGING
1627    SV *dsv0 = PERL_DEBUG_PAD_ZERO(0);
1628    SV *dsv1 = PERL_DEBUG_PAD_ZERO(1);
1629#endif
1630    RX_MATCH_UTF8_set(prog,do_utf8);
1631
1632    PL_regcc = 0;
1633
1634    cache_re(prog);
1635#ifdef DEBUGGING
1636    PL_regnarrate = DEBUG_r_TEST;
1637#endif
1638
1639    /* Be paranoid... */
1640    if (prog == NULL || startpos == NULL) {
1641	Perl_croak(aTHX_ "NULL regexp parameter");
1642	return 0;
1643    }
1644
1645    minlen = prog->minlen;
1646    if (strend - startpos < minlen) {
1647        DEBUG_r(PerlIO_printf(Perl_debug_log,
1648			      "String too short [regexec_flags]...\n"));
1649	goto phooey;
1650    }
1651
1652    /* Check validity of program. */
1653    if (UCHARAT(prog->program) != REG_MAGIC) {
1654	Perl_croak(aTHX_ "corrupted regexp program");
1655    }
1656
1657    PL_reg_flags = 0;
1658    PL_reg_eval_set = 0;
1659    PL_reg_maxiter = 0;
1660
1661    if (prog->reganch & ROPT_UTF8)
1662	PL_reg_flags |= RF_utf8;
1663
1664    /* Mark beginning of line for ^ and lookbehind. */
1665    PL_regbol = startpos;
1666    PL_bostr  = strbeg;
1667    PL_reg_sv = sv;
1668
1669    /* Mark end of line for $ (and such) */
1670    PL_regeol = strend;
1671
1672    /* see how far we have to get to not match where we matched before */
1673    PL_regtill = startpos+minend;
1674
1675    /* We start without call_cc context.  */
1676    PL_reg_call_cc = 0;
1677
1678    /* If there is a "must appear" string, look for it. */
1679    s = startpos;
1680
1681    if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1682	MAGIC *mg;
1683
1684	if (flags & REXEC_IGNOREPOS)	/* Means: check only at start */
1685	    PL_reg_ganch = startpos;
1686	else if (sv && SvTYPE(sv) >= SVt_PVMG
1687		  && SvMAGIC(sv)
1688		  && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1689		  && mg->mg_len >= 0) {
1690	    PL_reg_ganch = strbeg + mg->mg_len;	/* Defined pos() */
1691	    if (prog->reganch & ROPT_ANCH_GPOS) {
1692	        if (s > PL_reg_ganch)
1693		    goto phooey;
1694		s = PL_reg_ganch;
1695	    }
1696	}
1697	else				/* pos() not defined */
1698	    PL_reg_ganch = strbeg;
1699    }
1700
1701    if (!(flags & REXEC_CHECKED) && (prog->check_substr != Nullsv || prog->check_utf8 != Nullsv)) {
1702	re_scream_pos_data d;
1703
1704	d.scream_olds = &scream_olds;
1705	d.scream_pos = &scream_pos;
1706	s = re_intuit_start(prog, sv, s, strend, flags, &d);
1707	if (!s) {
1708	    DEBUG_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
1709	    goto phooey;	/* not present */
1710	}
1711    }
1712
1713    DEBUG_r({
1714	 char *s0   = UTF ?
1715	   pv_uni_display(dsv0, (U8*)prog->precomp, prog->prelen, 60,
1716			  UNI_DISPLAY_REGEX) :
1717	   prog->precomp;
1718	 int   len0 = UTF ? SvCUR(dsv0) : prog->prelen;
1719	 char *s1   = do_utf8 ? sv_uni_display(dsv1, sv, 60,
1720					       UNI_DISPLAY_REGEX) : startpos;
1721	 int   len1 = do_utf8 ? SvCUR(dsv1) : strend - startpos;
1722	 if (!PL_colorset)
1723	     reginitcolors();
1724	 PerlIO_printf(Perl_debug_log,
1725		       "%sMatching REx%s `%s%*.*s%s%s' against `%s%.*s%s%s'\n",
1726		       PL_colors[4],PL_colors[5],PL_colors[0],
1727		       len0, len0, s0,
1728		       PL_colors[1],
1729		       len0 > 60 ? "..." : "",
1730		       PL_colors[0],
1731		       (int)(len1 > 60 ? 60 : len1),
1732		       s1, PL_colors[1],
1733		       (len1 > 60 ? "..." : "")
1734	      );
1735    });
1736
1737    /* Simplest case:  anchored match need be tried only once. */
1738    /*  [unless only anchor is BOL and multiline is set] */
1739    if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1740	if (s == startpos && regtry(prog, startpos))
1741	    goto got_it;
1742	else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
1743		 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1744	{
1745	    char *end;
1746
1747	    if (minlen)
1748		dontbother = minlen - 1;
1749	    end = HOP3c(strend, -dontbother, strbeg) - 1;
1750	    /* for multiline we only have to try after newlines */
1751	    if (prog->check_substr || prog->check_utf8) {
1752		if (s == startpos)
1753		    goto after_try;
1754		while (1) {
1755		    if (regtry(prog, s))
1756			goto got_it;
1757		  after_try:
1758		    if (s >= end)
1759			goto phooey;
1760		    if (prog->reganch & RE_USE_INTUIT) {
1761			s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1762			if (!s)
1763			    goto phooey;
1764		    }
1765		    else
1766			s++;
1767		}
1768	    } else {
1769		if (s > startpos)
1770		    s--;
1771		while (s < end) {
1772		    if (*s++ == '\n') {	/* don't need PL_utf8skip here */
1773			if (regtry(prog, s))
1774			    goto got_it;
1775		    }
1776		}
1777	    }
1778	}
1779	goto phooey;
1780    } else if (prog->reganch & ROPT_ANCH_GPOS) {
1781	if (regtry(prog, PL_reg_ganch))
1782	    goto got_it;
1783	goto phooey;
1784    }
1785
1786    /* Messy cases:  unanchored match. */
1787    if ((prog->anchored_substr || prog->anchored_utf8) && prog->reganch & ROPT_SKIP) {
1788	/* we have /x+whatever/ */
1789	/* it must be a one character string (XXXX Except UTF?) */
1790	char ch;
1791#ifdef DEBUGGING
1792	int did_match = 0;
1793#endif
1794	if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1795	    do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1796	ch = SvPVX(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)[0];
1797
1798	if (do_utf8) {
1799	    while (s < strend) {
1800		if (*s == ch) {
1801		    DEBUG_r( did_match = 1 );
1802		    if (regtry(prog, s)) goto got_it;
1803		    s += UTF8SKIP(s);
1804		    while (s < strend && *s == ch)
1805			s += UTF8SKIP(s);
1806		}
1807		s += UTF8SKIP(s);
1808	    }
1809	}
1810	else {
1811	    while (s < strend) {
1812		if (*s == ch) {
1813		    DEBUG_r( did_match = 1 );
1814		    if (regtry(prog, s)) goto got_it;
1815		    s++;
1816		    while (s < strend && *s == ch)
1817			s++;
1818		}
1819		s++;
1820	    }
1821	}
1822	DEBUG_r(if (!did_match)
1823		PerlIO_printf(Perl_debug_log,
1824                                  "Did not find anchored character...\n")
1825               );
1826    }
1827    /*SUPPRESS 560*/
1828    else if (prog->anchored_substr != Nullsv
1829	      || prog->anchored_utf8 != Nullsv
1830	      || ((prog->float_substr != Nullsv || prog->float_utf8 != Nullsv)
1831		  && prog->float_max_offset < strend - s)) {
1832	SV *must;
1833	I32 back_max;
1834	I32 back_min;
1835	char *last;
1836	char *last1;		/* Last position checked before */
1837#ifdef DEBUGGING
1838	int did_match = 0;
1839#endif
1840	if (prog->anchored_substr || prog->anchored_utf8) {
1841	    if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1842		do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1843	    must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
1844	    back_max = back_min = prog->anchored_offset;
1845	} else {
1846	    if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1847		do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1848	    must = do_utf8 ? prog->float_utf8 : prog->float_substr;
1849	    back_max = prog->float_max_offset;
1850	    back_min = prog->float_min_offset;
1851	}
1852	if (must == &PL_sv_undef)
1853	    /* could not downgrade utf8 check substring, so must fail */
1854	    goto phooey;
1855
1856	last = HOP3c(strend,	/* Cannot start after this */
1857			  -(I32)(CHR_SVLEN(must)
1858				 - (SvTAIL(must) != 0) + back_min), strbeg);
1859
1860	if (s > PL_bostr)
1861	    last1 = HOPc(s, -1);
1862	else
1863	    last1 = s - 1;	/* bogus */
1864
1865	/* XXXX check_substr already used to find `s', can optimize if
1866	   check_substr==must. */
1867	scream_pos = -1;
1868	dontbother = end_shift;
1869	strend = HOPc(strend, -dontbother);
1870	while ( (s <= last) &&
1871		((flags & REXEC_SCREAM)
1872		 ? (s = screaminstr(sv, must, HOP3c(s, back_min, strend) - strbeg,
1873				    end_shift, &scream_pos, 0))
1874		 : (s = fbm_instr((unsigned char*)HOP3(s, back_min, strend),
1875				  (unsigned char*)strend, must,
1876				  PL_multiline ? FBMrf_MULTILINE : 0))) ) {
1877	    /* we may be pointing at the wrong string */
1878	    if ((flags & REXEC_SCREAM) && RX_MATCH_COPIED(prog))
1879		s = strbeg + (s - SvPVX(sv));
1880	    DEBUG_r( did_match = 1 );
1881	    if (HOPc(s, -back_max) > last1) {
1882		last1 = HOPc(s, -back_min);
1883		s = HOPc(s, -back_max);
1884	    }
1885	    else {
1886		char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1887
1888		last1 = HOPc(s, -back_min);
1889		s = t;
1890	    }
1891	    if (do_utf8) {
1892		while (s <= last1) {
1893		    if (regtry(prog, s))
1894			goto got_it;
1895		    s += UTF8SKIP(s);
1896		}
1897	    }
1898	    else {
1899		while (s <= last1) {
1900		    if (regtry(prog, s))
1901			goto got_it;
1902		    s++;
1903		}
1904	    }
1905	}
1906	DEBUG_r(if (!did_match)
1907                    PerlIO_printf(Perl_debug_log,
1908                                  "Did not find %s substr `%s%.*s%s'%s...\n",
1909			      ((must == prog->anchored_substr || must == prog->anchored_utf8)
1910			       ? "anchored" : "floating"),
1911			      PL_colors[0],
1912			      (int)(SvCUR(must) - (SvTAIL(must)!=0)),
1913			      SvPVX(must),
1914                                  PL_colors[1], (SvTAIL(must) ? "$" : ""))
1915               );
1916	goto phooey;
1917    }
1918    else if ((c = prog->regstclass)) {
1919	if (minlen) {
1920	    I32 op = (U8)OP(prog->regstclass);
1921	    /* don't bother with what can't match */
1922	    if (PL_regkind[op] != EXACT && op != CANY)
1923	        strend = HOPc(strend, -(minlen - 1));
1924	}
1925	DEBUG_r({
1926	    SV *prop = sv_newmortal();
1927	    char *s0;
1928	    char *s1;
1929	    int len0;
1930	    int len1;
1931
1932	    regprop(prop, c);
1933	    s0 = UTF ?
1934	      pv_uni_display(dsv0, (U8*)SvPVX(prop), SvCUR(prop), 60,
1935			     UNI_DISPLAY_REGEX) :
1936	      SvPVX(prop);
1937	    len0 = UTF ? SvCUR(dsv0) : SvCUR(prop);
1938	    s1 = UTF ?
1939	      sv_uni_display(dsv1, sv, 60, UNI_DISPLAY_REGEX) : s;
1940	    len1 = UTF ? SvCUR(dsv1) : strend - s;
1941	    PerlIO_printf(Perl_debug_log,
1942			  "Matching stclass `%*.*s' against `%*.*s'\n",
1943			  len0, len0, s0,
1944			  len1, len1, s1);
1945	});
1946  	if (find_byclass(prog, c, s, strend, startpos, 0))
1947	    goto got_it;
1948	DEBUG_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
1949    }
1950    else {
1951	dontbother = 0;
1952	if (prog->float_substr != Nullsv || prog->float_utf8 != Nullsv) {
1953	    /* Trim the end. */
1954	    char *last;
1955	    SV* float_real;
1956
1957	    if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1958		do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1959	    float_real = do_utf8 ? prog->float_utf8 : prog->float_substr;
1960
1961	    if (flags & REXEC_SCREAM) {
1962		last = screaminstr(sv, float_real, s - strbeg,
1963				   end_shift, &scream_pos, 1); /* last one */
1964		if (!last)
1965		    last = scream_olds; /* Only one occurrence. */
1966		/* we may be pointing at the wrong string */
1967		else if (RX_MATCH_COPIED(prog))
1968		    s = strbeg + (s - SvPVX(sv));
1969	    }
1970	    else {
1971		STRLEN len;
1972		char *little = SvPV(float_real, len);
1973
1974		if (SvTAIL(float_real)) {
1975		    if (memEQ(strend - len + 1, little, len - 1))
1976			last = strend - len + 1;
1977		    else if (!PL_multiline)
1978			last = memEQ(strend - len, little, len)
1979			    ? strend - len : Nullch;
1980		    else
1981			goto find_last;
1982		} else {
1983		  find_last:
1984		    if (len)
1985			last = rninstr(s, strend, little, little + len);
1986		    else
1987			last = strend;	/* matching `$' */
1988		}
1989	    }
1990	    if (last == NULL) {
1991		DEBUG_r(PerlIO_printf(Perl_debug_log,
1992				      "%sCan't trim the tail, match fails (should not happen)%s\n",
1993				      PL_colors[4],PL_colors[5]));
1994		goto phooey; /* Should not happen! */
1995	    }
1996	    dontbother = strend - last + prog->float_min_offset;
1997	}
1998	if (minlen && (dontbother < minlen))
1999	    dontbother = minlen - 1;
2000	strend -= dontbother; 		   /* this one's always in bytes! */
2001	/* We don't know much -- general case. */
2002	if (do_utf8) {
2003	    for (;;) {
2004		if (regtry(prog, s))
2005		    goto got_it;
2006		if (s >= strend)
2007		    break;
2008		s += UTF8SKIP(s);
2009	    };
2010	}
2011	else {
2012	    do {
2013		if (regtry(prog, s))
2014		    goto got_it;
2015	    } while (s++ < strend);
2016	}
2017    }
2018
2019    /* Failure. */
2020    goto phooey;
2021
2022got_it:
2023    RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
2024
2025    if (PL_reg_eval_set) {
2026	/* Preserve the current value of $^R */
2027	if (oreplsv != GvSV(PL_replgv))
2028	    sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
2029						  restored, the value remains
2030						  the same. */
2031	restore_pos(aTHX_ 0);
2032    }
2033
2034    /* make sure $`, $&, $', and $digit will work later */
2035    if ( !(flags & REXEC_NOT_FIRST) ) {
2036	if (RX_MATCH_COPIED(prog)) {
2037	    Safefree(prog->subbeg);
2038	    RX_MATCH_COPIED_off(prog);
2039	}
2040	if (flags & REXEC_COPY_STR) {
2041	    I32 i = PL_regeol - startpos + (stringarg - strbeg);
2042
2043	    s = savepvn(strbeg, i);
2044	    prog->subbeg = s;
2045	    prog->sublen = i;
2046	    RX_MATCH_COPIED_on(prog);
2047	}
2048	else {
2049	    prog->subbeg = strbeg;
2050	    prog->sublen = PL_regeol - strbeg;	/* strend may have been modified */
2051	}
2052    }
2053
2054    return 1;
2055
2056phooey:
2057    DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
2058			  PL_colors[4],PL_colors[5]));
2059    if (PL_reg_eval_set)
2060	restore_pos(aTHX_ 0);
2061    return 0;
2062}
2063
2064/*
2065 - regtry - try match at specific point
2066 */
2067STATIC I32			/* 0 failure, 1 success */
2068S_regtry(pTHX_ regexp *prog, char *startpos)
2069{
2070    register I32 i;
2071    register I32 *sp;
2072    register I32 *ep;
2073    CHECKPOINT lastcp;
2074
2075#ifdef DEBUGGING
2076    PL_regindent = 0;	/* XXXX Not good when matches are reenterable... */
2077#endif
2078    if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
2079	MAGIC *mg;
2080
2081	PL_reg_eval_set = RS_init;
2082	DEBUG_r(DEBUG_s(
2083	    PerlIO_printf(Perl_debug_log, "  setting stack tmpbase at %"IVdf"\n",
2084			  (IV)(PL_stack_sp - PL_stack_base));
2085	    ));
2086	SAVEI32(cxstack[cxstack_ix].blk_oldsp);
2087	cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
2088	/* Otherwise OP_NEXTSTATE will free whatever on stack now.  */
2089	SAVETMPS;
2090	/* Apparently this is not needed, judging by wantarray. */
2091	/* SAVEI8(cxstack[cxstack_ix].blk_gimme);
2092	   cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
2093
2094	if (PL_reg_sv) {
2095	    /* Make $_ available to executed code. */
2096	    if (PL_reg_sv != DEFSV) {
2097		/* SAVE_DEFSV does *not* suffice here for USE_5005THREADS */
2098		SAVESPTR(DEFSV);
2099		DEFSV = PL_reg_sv;
2100	    }
2101
2102	    if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
2103		  && (mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global)))) {
2104		/* prepare for quick setting of pos */
2105		sv_magic(PL_reg_sv, (SV*)0,
2106			PERL_MAGIC_regex_global, Nullch, 0);
2107		mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global);
2108		mg->mg_len = -1;
2109	    }
2110	    PL_reg_magic    = mg;
2111	    PL_reg_oldpos   = mg->mg_len;
2112	    SAVEDESTRUCTOR_X(restore_pos, 0);
2113        }
2114        if (!PL_reg_curpm) {
2115	    Newz(22,PL_reg_curpm, 1, PMOP);
2116#ifdef USE_ITHREADS
2117            {
2118                SV* repointer = newSViv(0);
2119                /* so we know which PL_regex_padav element is PL_reg_curpm */
2120                SvFLAGS(repointer) |= SVf_BREAK;
2121                av_push(PL_regex_padav,repointer);
2122                PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
2123                PL_regex_pad = AvARRAY(PL_regex_padav);
2124            }
2125#endif
2126        }
2127	PM_SETRE(PL_reg_curpm, prog);
2128	PL_reg_oldcurpm = PL_curpm;
2129	PL_curpm = PL_reg_curpm;
2130	if (RX_MATCH_COPIED(prog)) {
2131	    /*  Here is a serious problem: we cannot rewrite subbeg,
2132		since it may be needed if this match fails.  Thus
2133		$` inside (?{}) could fail... */
2134	    PL_reg_oldsaved = prog->subbeg;
2135	    PL_reg_oldsavedlen = prog->sublen;
2136	    RX_MATCH_COPIED_off(prog);
2137	}
2138	else
2139	    PL_reg_oldsaved = Nullch;
2140	prog->subbeg = PL_bostr;
2141	prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
2142    }
2143    prog->startp[0] = startpos - PL_bostr;
2144    PL_reginput = startpos;
2145    PL_regstartp = prog->startp;
2146    PL_regendp = prog->endp;
2147    PL_reglastparen = &prog->lastparen;
2148    PL_reglastcloseparen = &prog->lastcloseparen;
2149    prog->lastparen = 0;
2150    prog->lastcloseparen = 0;
2151    PL_regsize = 0;
2152    DEBUG_r(PL_reg_starttry = startpos);
2153    if (PL_reg_start_tmpl <= prog->nparens) {
2154	PL_reg_start_tmpl = prog->nparens*3/2 + 3;
2155        if(PL_reg_start_tmp)
2156            Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2157        else
2158            New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2159    }
2160
2161    /* XXXX What this code is doing here?!!!  There should be no need
2162       to do this again and again, PL_reglastparen should take care of
2163       this!  --ilya*/
2164
2165    /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
2166     * Actually, the code in regcppop() (which Ilya may be meaning by
2167     * PL_reglastparen), is not needed at all by the test suite
2168     * (op/regexp, op/pat, op/split), but that code is needed, oddly
2169     * enough, for building DynaLoader, or otherwise this
2170     * "Error: '*' not in typemap in DynaLoader.xs, line 164"
2171     * will happen.  Meanwhile, this code *is* needed for the
2172     * above-mentioned test suite tests to succeed.  The common theme
2173     * on those tests seems to be returning null fields from matches.
2174     * --jhi */
2175#if 1
2176    sp = prog->startp;
2177    ep = prog->endp;
2178    if (prog->nparens) {
2179	for (i = prog->nparens; i > (I32)*PL_reglastparen; i--) {
2180	    *++sp = -1;
2181	    *++ep = -1;
2182	}
2183    }
2184#endif
2185    REGCP_SET(lastcp);
2186    if (regmatch(prog->program + 1)) {
2187	prog->endp[0] = PL_reginput - PL_bostr;
2188	return 1;
2189    }
2190    REGCP_UNWIND(lastcp);
2191    return 0;
2192}
2193
2194#define RE_UNWIND_BRANCH	1
2195#define RE_UNWIND_BRANCHJ	2
2196
2197union re_unwind_t;
2198
2199typedef struct {		/* XX: makes sense to enlarge it... */
2200    I32 type;
2201    I32 prev;
2202    CHECKPOINT lastcp;
2203} re_unwind_generic_t;
2204
2205typedef struct {
2206    I32 type;
2207    I32 prev;
2208    CHECKPOINT lastcp;
2209    I32 lastparen;
2210    regnode *next;
2211    char *locinput;
2212    I32 nextchr;
2213#ifdef DEBUGGING
2214    int regindent;
2215#endif
2216} re_unwind_branch_t;
2217
2218typedef union re_unwind_t {
2219    I32 type;
2220    re_unwind_generic_t generic;
2221    re_unwind_branch_t branch;
2222} re_unwind_t;
2223
2224#define sayYES goto yes
2225#define sayNO goto no
2226#define sayNO_ANYOF goto no_anyof
2227#define sayYES_FINAL goto yes_final
2228#define sayYES_LOUD  goto yes_loud
2229#define sayNO_FINAL  goto no_final
2230#define sayNO_SILENT goto do_no
2231#define saySAME(x) if (x) goto yes; else goto no
2232
2233#define REPORT_CODE_OFF 24
2234
2235/*
2236 - regmatch - main matching routine
2237 *
2238 * Conceptually the strategy is simple:  check to see whether the current
2239 * node matches, call self recursively to see whether the rest matches,
2240 * and then act accordingly.  In practice we make some effort to avoid
2241 * recursion, in particular by going through "ordinary" nodes (that don't
2242 * need to know whether the rest of the match failed) by a loop instead of
2243 * by recursion.
2244 */
2245/* [lwall] I've hoisted the register declarations to the outer block in order to
2246 * maybe save a little bit of pushing and popping on the stack.  It also takes
2247 * advantage of machines that use a register save mask on subroutine entry.
2248 */
2249STATIC I32			/* 0 failure, 1 success */
2250S_regmatch(pTHX_ regnode *prog)
2251{
2252    register regnode *scan;	/* Current node. */
2253    regnode *next;		/* Next node. */
2254    regnode *inner;		/* Next node in internal branch. */
2255    register I32 nextchr;	/* renamed nextchr - nextchar colides with
2256				   function of same name */
2257    register I32 n;		/* no or next */
2258    register I32 ln = 0;	/* len or last */
2259    register char *s = Nullch;	/* operand or save */
2260    register char *locinput = PL_reginput;
2261    register I32 c1 = 0, c2 = 0, paren;	/* case fold search, parenth */
2262    int minmod = 0, sw = 0, logical = 0;
2263    I32 unwind = 0;
2264#if 0
2265    I32 firstcp = PL_savestack_ix;
2266#endif
2267    register bool do_utf8 = PL_reg_match_utf8;
2268#ifdef DEBUGGING
2269    SV *dsv0 = PERL_DEBUG_PAD_ZERO(0);
2270    SV *dsv1 = PERL_DEBUG_PAD_ZERO(1);
2271    SV *dsv2 = PERL_DEBUG_PAD_ZERO(2);
2272#endif
2273
2274#ifdef DEBUGGING
2275    PL_regindent++;
2276#endif
2277
2278    /* Note that nextchr is a byte even in UTF */
2279    nextchr = UCHARAT(locinput);
2280    scan = prog;
2281    while (scan != NULL) {
2282
2283        DEBUG_r( {
2284	    SV *prop = sv_newmortal();
2285	    int docolor = *PL_colors[0];
2286	    int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
2287	    int l = (PL_regeol - locinput) > taill ? taill : (PL_regeol - locinput);
2288	    /* The part of the string before starttry has one color
2289	       (pref0_len chars), between starttry and current
2290	       position another one (pref_len - pref0_len chars),
2291	       after the current position the third one.
2292	       We assume that pref0_len <= pref_len, otherwise we
2293	       decrease pref0_len.  */
2294	    int pref_len = (locinput - PL_bostr) > (5 + taill) - l
2295		? (5 + taill) - l : locinput - PL_bostr;
2296	    int pref0_len;
2297
2298	    while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
2299		pref_len++;
2300	    pref0_len = pref_len  - (locinput - PL_reg_starttry);
2301	    if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
2302		l = ( PL_regeol - locinput > (5 + taill) - pref_len
2303		      ? (5 + taill) - pref_len : PL_regeol - locinput);
2304	    while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
2305		l--;
2306	    if (pref0_len < 0)
2307		pref0_len = 0;
2308	    if (pref0_len > pref_len)
2309		pref0_len = pref_len;
2310	    regprop(prop, scan);
2311	    {
2312	      char *s0 =
2313		do_utf8 && OP(scan) != CANY ?
2314		pv_uni_display(dsv0, (U8*)(locinput - pref_len),
2315			       pref0_len, 60, UNI_DISPLAY_REGEX) :
2316		locinput - pref_len;
2317	      int len0 = do_utf8 ? strlen(s0) : pref0_len;
2318	      char *s1 = do_utf8 && OP(scan) != CANY ?
2319		pv_uni_display(dsv1, (U8*)(locinput - pref_len + pref0_len),
2320			       pref_len - pref0_len, 60, UNI_DISPLAY_REGEX) :
2321		locinput - pref_len + pref0_len;
2322	      int len1 = do_utf8 ? strlen(s1) : pref_len - pref0_len;
2323	      char *s2 = do_utf8 && OP(scan) != CANY ?
2324		pv_uni_display(dsv2, (U8*)locinput,
2325			       PL_regeol - locinput, 60, UNI_DISPLAY_REGEX) :
2326		locinput;
2327	      int len2 = do_utf8 ? strlen(s2) : l;
2328	      PerlIO_printf(Perl_debug_log,
2329			    "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
2330			    (IV)(locinput - PL_bostr),
2331			    PL_colors[4],
2332			    len0, s0,
2333			    PL_colors[5],
2334			    PL_colors[2],
2335			    len1, s1,
2336			    PL_colors[3],
2337			    (docolor ? "" : "> <"),
2338			    PL_colors[0],
2339			    len2, s2,
2340			    PL_colors[1],
2341			    15 - l - pref_len + 1,
2342			    "",
2343			    (IV)(scan - PL_regprogram), PL_regindent*2, "",
2344			    SvPVX(prop));
2345	    }
2346	});
2347
2348	next = scan + NEXT_OFF(scan);
2349	if (next == scan)
2350	    next = NULL;
2351
2352	switch (OP(scan)) {
2353	case BOL:
2354	    if (locinput == PL_bostr || (PL_multiline &&
2355		(nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
2356	    {
2357		/* regtill = regbol; */
2358		break;
2359	    }
2360	    sayNO;
2361	case MBOL:
2362	    if (locinput == PL_bostr ||
2363		((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
2364	    {
2365		break;
2366	    }
2367	    sayNO;
2368	case SBOL:
2369	    if (locinput == PL_bostr)
2370		break;
2371	    sayNO;
2372	case GPOS:
2373	    if (locinput == PL_reg_ganch)
2374		break;
2375	    sayNO;
2376	case EOL:
2377	    if (PL_multiline)
2378		goto meol;
2379	    else
2380		goto seol;
2381	case MEOL:
2382	  meol:
2383	    if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2384		sayNO;
2385	    break;
2386	case SEOL:
2387	  seol:
2388	    if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2389		sayNO;
2390	    if (PL_regeol - locinput > 1)
2391		sayNO;
2392	    break;
2393	case EOS:
2394	    if (PL_regeol != locinput)
2395		sayNO;
2396	    break;
2397	case SANY:
2398	    if (!nextchr && locinput >= PL_regeol)
2399		sayNO;
2400 	    if (do_utf8) {
2401	        locinput += PL_utf8skip[nextchr];
2402		if (locinput > PL_regeol)
2403 		    sayNO;
2404 		nextchr = UCHARAT(locinput);
2405 	    }
2406 	    else
2407 		nextchr = UCHARAT(++locinput);
2408	    break;
2409	case CANY:
2410	    if (!nextchr && locinput >= PL_regeol)
2411		sayNO;
2412	    nextchr = UCHARAT(++locinput);
2413	    break;
2414	case REG_ANY:
2415	    if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2416		sayNO;
2417	    if (do_utf8) {
2418		locinput += PL_utf8skip[nextchr];
2419		if (locinput > PL_regeol)
2420		    sayNO;
2421		nextchr = UCHARAT(locinput);
2422	    }
2423	    else
2424		nextchr = UCHARAT(++locinput);
2425	    break;
2426	case EXACT:
2427	    s = STRING(scan);
2428	    ln = STR_LEN(scan);
2429	    if (do_utf8 != UTF) {
2430		/* The target and the pattern have differing utf8ness. */
2431		char *l = locinput;
2432		char *e = s + ln;
2433		STRLEN ulen;
2434
2435		if (do_utf8) {
2436		    /* The target is utf8, the pattern is not utf8. */
2437		    while (s < e) {
2438			if (l >= PL_regeol)
2439			     sayNO;
2440			if (NATIVE_TO_UNI(*(U8*)s) !=
2441			    utf8n_to_uvuni((U8*)l, UTF8_MAXLEN, &ulen,
2442					   ckWARN(WARN_UTF8) ?
2443					   0 : UTF8_ALLOW_ANY))
2444			     sayNO;
2445			l += ulen;
2446			s ++;
2447		    }
2448		}
2449		else {
2450		    /* The target is not utf8, the pattern is utf8. */
2451		    while (s < e) {
2452			if (l >= PL_regeol)
2453			    sayNO;
2454			if (NATIVE_TO_UNI(*((U8*)l)) !=
2455			    utf8n_to_uvuni((U8*)s, UTF8_MAXLEN, &ulen,
2456					   ckWARN(WARN_UTF8) ?
2457					   0 : UTF8_ALLOW_ANY))
2458			    sayNO;
2459			s += ulen;
2460			l ++;
2461		    }
2462		}
2463		locinput = l;
2464		nextchr = UCHARAT(locinput);
2465		break;
2466	    }
2467	    /* The target and the pattern have the same utf8ness. */
2468	    /* Inline the first character, for speed. */
2469	    if (UCHARAT(s) != nextchr)
2470		sayNO;
2471	    if (PL_regeol - locinput < ln)
2472		sayNO;
2473	    if (ln > 1 && memNE(s, locinput, ln))
2474		sayNO;
2475	    locinput += ln;
2476	    nextchr = UCHARAT(locinput);
2477	    break;
2478	case EXACTFL:
2479	    PL_reg_flags |= RF_tainted;
2480	    /* FALL THROUGH */
2481	case EXACTF:
2482	    s = STRING(scan);
2483	    ln = STR_LEN(scan);
2484
2485	    if (do_utf8 || UTF) {
2486	      /* Either target or the pattern are utf8. */
2487		char *l = locinput;
2488		char *e = PL_regeol;
2489
2490		if (ibcmp_utf8(s, 0,  ln, (bool)UTF,
2491			       l, &e, 0,  do_utf8)) {
2492		     /* One more case for the sharp s:
2493		      * pack("U0U*", 0xDF) =~ /ss/i,
2494		      * the 0xC3 0x9F are the UTF-8
2495		      * byte sequence for the U+00DF. */
2496		     if (!(do_utf8 &&
2497			   toLOWER(s[0]) == 's' &&
2498			   ln >= 2 &&
2499			   toLOWER(s[1]) == 's' &&
2500			   (U8)l[0] == 0xC3 &&
2501			   e - l >= 2 &&
2502			   (U8)l[1] == 0x9F))
2503			  sayNO;
2504		}
2505		locinput = e;
2506		nextchr = UCHARAT(locinput);
2507		break;
2508	    }
2509
2510	    /* Neither the target and the pattern are utf8. */
2511
2512	    /* Inline the first character, for speed. */
2513	    if (UCHARAT(s) != nextchr &&
2514		UCHARAT(s) != ((OP(scan) == EXACTF)
2515			       ? PL_fold : PL_fold_locale)[nextchr])
2516		sayNO;
2517	    if (PL_regeol - locinput < ln)
2518		sayNO;
2519	    if (ln > 1 && (OP(scan) == EXACTF
2520			   ? ibcmp(s, locinput, ln)
2521			   : ibcmp_locale(s, locinput, ln)))
2522		sayNO;
2523	    locinput += ln;
2524	    nextchr = UCHARAT(locinput);
2525	    break;
2526	case ANYOF:
2527	    if (do_utf8) {
2528	        STRLEN inclasslen = PL_regeol - locinput;
2529
2530	        if (!reginclass(scan, (U8*)locinput, &inclasslen, do_utf8))
2531		    sayNO_ANYOF;
2532		if (locinput >= PL_regeol)
2533		    sayNO;
2534		locinput += inclasslen ? inclasslen : UTF8SKIP(locinput);
2535		nextchr = UCHARAT(locinput);
2536		break;
2537	    }
2538	    else {
2539		if (nextchr < 0)
2540		    nextchr = UCHARAT(locinput);
2541		if (!REGINCLASS(scan, (U8*)locinput))
2542		    sayNO_ANYOF;
2543		if (!nextchr && locinput >= PL_regeol)
2544		    sayNO;
2545		nextchr = UCHARAT(++locinput);
2546		break;
2547	    }
2548	no_anyof:
2549	    /* If we might have the case of the German sharp s
2550	     * in a casefolding Unicode character class. */
2551
2552	    if (ANYOF_FOLD_SHARP_S(scan, locinput, PL_regeol)) {
2553		 locinput += SHARP_S_SKIP;
2554		 nextchr = UCHARAT(locinput);
2555	    }
2556	    else
2557		 sayNO;
2558	    break;
2559	case ALNUML:
2560	    PL_reg_flags |= RF_tainted;
2561	    /* FALL THROUGH */
2562	case ALNUM:
2563	    if (!nextchr)
2564		sayNO;
2565	    if (do_utf8) {
2566		LOAD_UTF8_CHARCLASS(alnum,"a");
2567		if (!(OP(scan) == ALNUM
2568		      ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
2569		      : isALNUM_LC_utf8((U8*)locinput)))
2570		{
2571		    sayNO;
2572		}
2573		locinput += PL_utf8skip[nextchr];
2574		nextchr = UCHARAT(locinput);
2575		break;
2576	    }
2577	    if (!(OP(scan) == ALNUM
2578		  ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
2579		sayNO;
2580	    nextchr = UCHARAT(++locinput);
2581	    break;
2582	case NALNUML:
2583	    PL_reg_flags |= RF_tainted;
2584	    /* FALL THROUGH */
2585	case NALNUM:
2586	    if (!nextchr && locinput >= PL_regeol)
2587		sayNO;
2588	    if (do_utf8) {
2589		LOAD_UTF8_CHARCLASS(alnum,"a");
2590		if (OP(scan) == NALNUM
2591		    ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
2592		    : isALNUM_LC_utf8((U8*)locinput))
2593		{
2594		    sayNO;
2595		}
2596		locinput += PL_utf8skip[nextchr];
2597		nextchr = UCHARAT(locinput);
2598		break;
2599	    }
2600	    if (OP(scan) == NALNUM
2601		? isALNUM(nextchr) : isALNUM_LC(nextchr))
2602		sayNO;
2603	    nextchr = UCHARAT(++locinput);
2604	    break;
2605	case BOUNDL:
2606	case NBOUNDL:
2607	    PL_reg_flags |= RF_tainted;
2608	    /* FALL THROUGH */
2609	case BOUND:
2610	case NBOUND:
2611	    /* was last char in word? */
2612	    if (do_utf8) {
2613		if (locinput == PL_bostr)
2614		    ln = '\n';
2615		else {
2616		    U8 *r = reghop3((U8*)locinput, -1, (U8*)PL_bostr);
2617
2618		    ln = utf8n_to_uvchr(r, UTF8SKIP(r), 0, 0);
2619		}
2620		if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2621		    ln = isALNUM_uni(ln);
2622		    LOAD_UTF8_CHARCLASS(alnum,"a");
2623		    n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
2624		}
2625		else {
2626		    ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
2627		    n = isALNUM_LC_utf8((U8*)locinput);
2628		}
2629	    }
2630	    else {
2631		ln = (locinput != PL_bostr) ?
2632		    UCHARAT(locinput - 1) : '\n';
2633		if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2634		    ln = isALNUM(ln);
2635		    n = isALNUM(nextchr);
2636		}
2637		else {
2638		    ln = isALNUM_LC(ln);
2639		    n = isALNUM_LC(nextchr);
2640		}
2641	    }
2642	    if (((!ln) == (!n)) == (OP(scan) == BOUND ||
2643				    OP(scan) == BOUNDL))
2644		    sayNO;
2645	    break;
2646	case SPACEL:
2647	    PL_reg_flags |= RF_tainted;
2648	    /* FALL THROUGH */
2649	case SPACE:
2650	    if (!nextchr)
2651		sayNO;
2652	    if (do_utf8) {
2653		if (UTF8_IS_CONTINUED(nextchr)) {
2654		    LOAD_UTF8_CHARCLASS(space," ");
2655		    if (!(OP(scan) == SPACE
2656			  ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
2657			  : isSPACE_LC_utf8((U8*)locinput)))
2658		    {
2659			sayNO;
2660		    }
2661		    locinput += PL_utf8skip[nextchr];
2662		    nextchr = UCHARAT(locinput);
2663		    break;
2664		}
2665		if (!(OP(scan) == SPACE
2666		      ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2667		    sayNO;
2668		nextchr = UCHARAT(++locinput);
2669	    }
2670	    else {
2671		if (!(OP(scan) == SPACE
2672		      ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2673		    sayNO;
2674		nextchr = UCHARAT(++locinput);
2675	    }
2676	    break;
2677	case NSPACEL:
2678	    PL_reg_flags |= RF_tainted;
2679	    /* FALL THROUGH */
2680	case NSPACE:
2681	    if (!nextchr && locinput >= PL_regeol)
2682		sayNO;
2683	    if (do_utf8) {
2684		LOAD_UTF8_CHARCLASS(space," ");
2685		if (OP(scan) == NSPACE
2686		    ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
2687		    : isSPACE_LC_utf8((U8*)locinput))
2688		{
2689		    sayNO;
2690		}
2691		locinput += PL_utf8skip[nextchr];
2692		nextchr = UCHARAT(locinput);
2693		break;
2694	    }
2695	    if (OP(scan) == NSPACE
2696		? isSPACE(nextchr) : isSPACE_LC(nextchr))
2697		sayNO;
2698	    nextchr = UCHARAT(++locinput);
2699	    break;
2700	case DIGITL:
2701	    PL_reg_flags |= RF_tainted;
2702	    /* FALL THROUGH */
2703	case DIGIT:
2704	    if (!nextchr)
2705		sayNO;
2706	    if (do_utf8) {
2707		LOAD_UTF8_CHARCLASS(digit,"0");
2708		if (!(OP(scan) == DIGIT
2709		      ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
2710		      : isDIGIT_LC_utf8((U8*)locinput)))
2711		{
2712		    sayNO;
2713		}
2714		locinput += PL_utf8skip[nextchr];
2715		nextchr = UCHARAT(locinput);
2716		break;
2717	    }
2718	    if (!(OP(scan) == DIGIT
2719		  ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
2720		sayNO;
2721	    nextchr = UCHARAT(++locinput);
2722	    break;
2723	case NDIGITL:
2724	    PL_reg_flags |= RF_tainted;
2725	    /* FALL THROUGH */
2726	case NDIGIT:
2727	    if (!nextchr && locinput >= PL_regeol)
2728		sayNO;
2729	    if (do_utf8) {
2730		LOAD_UTF8_CHARCLASS(digit,"0");
2731		if (OP(scan) == NDIGIT
2732		    ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
2733		    : isDIGIT_LC_utf8((U8*)locinput))
2734		{
2735		    sayNO;
2736		}
2737		locinput += PL_utf8skip[nextchr];
2738		nextchr = UCHARAT(locinput);
2739		break;
2740	    }
2741	    if (OP(scan) == NDIGIT
2742		? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
2743		sayNO;
2744	    nextchr = UCHARAT(++locinput);
2745	    break;
2746	case CLUMP:
2747	    if (locinput >= PL_regeol)
2748		sayNO;
2749	    if  (do_utf8) {
2750		LOAD_UTF8_CHARCLASS(mark,"~");
2751		if (swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
2752		    sayNO;
2753		locinput += PL_utf8skip[nextchr];
2754		while (locinput < PL_regeol &&
2755		       swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
2756		    locinput += UTF8SKIP(locinput);
2757		if (locinput > PL_regeol)
2758		    sayNO;
2759	    }
2760	    else
2761	       locinput++;
2762	    nextchr = UCHARAT(locinput);
2763	    break;
2764	case REFFL:
2765	    PL_reg_flags |= RF_tainted;
2766	    /* FALL THROUGH */
2767        case REF:
2768	case REFF:
2769	    n = ARG(scan);  /* which paren pair */
2770	    ln = PL_regstartp[n];
2771	    PL_reg_leftiter = PL_reg_maxiter;		/* Void cache */
2772	    if ((I32)*PL_reglastparen < n || ln == -1)
2773		sayNO;			/* Do not match unless seen CLOSEn. */
2774	    if (ln == PL_regendp[n])
2775		break;
2776
2777	    s = PL_bostr + ln;
2778	    if (do_utf8 && OP(scan) != REF) {	/* REF can do byte comparison */
2779		char *l = locinput;
2780		char *e = PL_bostr + PL_regendp[n];
2781		/*
2782		 * Note that we can't do the "other character" lookup trick as
2783		 * in the 8-bit case (no pun intended) because in Unicode we
2784		 * have to map both upper and title case to lower case.
2785		 */
2786		if (OP(scan) == REFF) {
2787		    STRLEN ulen1, ulen2;
2788		    U8 tmpbuf1[UTF8_MAXLEN_UCLC+1];
2789		    U8 tmpbuf2[UTF8_MAXLEN_UCLC+1];
2790		    while (s < e) {
2791			if (l >= PL_regeol)
2792			    sayNO;
2793			toLOWER_utf8((U8*)s, tmpbuf1, &ulen1);
2794			toLOWER_utf8((U8*)l, tmpbuf2, &ulen2);
2795			if (ulen1 != ulen2 || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
2796			    sayNO;
2797			s += ulen1;
2798			l += ulen2;
2799		    }
2800		}
2801		locinput = l;
2802		nextchr = UCHARAT(locinput);
2803		break;
2804	    }
2805
2806	    /* Inline the first character, for speed. */
2807	    if (UCHARAT(s) != nextchr &&
2808		(OP(scan) == REF ||
2809		 (UCHARAT(s) != ((OP(scan) == REFF
2810				  ? PL_fold : PL_fold_locale)[nextchr]))))
2811		sayNO;
2812	    ln = PL_regendp[n] - ln;
2813	    if (locinput + ln > PL_regeol)
2814		sayNO;
2815	    if (ln > 1 && (OP(scan) == REF
2816			   ? memNE(s, locinput, ln)
2817			   : (OP(scan) == REFF
2818			      ? ibcmp(s, locinput, ln)
2819			      : ibcmp_locale(s, locinput, ln))))
2820		sayNO;
2821	    locinput += ln;
2822	    nextchr = UCHARAT(locinput);
2823	    break;
2824
2825	case NOTHING:
2826	case TAIL:
2827	    break;
2828	case BACK:
2829	    break;
2830	case EVAL:
2831	{
2832	    dSP;
2833	    OP_4tree *oop = PL_op;
2834	    COP *ocurcop = PL_curcop;
2835	    PAD *old_comppad;
2836	    SV *ret;
2837	    struct regexp *oreg = PL_reg_re;
2838
2839	    n = ARG(scan);
2840	    PL_op = (OP_4tree*)PL_regdata->data[n];
2841	    DEBUG_r( PerlIO_printf(Perl_debug_log, "  re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
2842	    PAD_SAVE_LOCAL(old_comppad, (PAD*)PL_regdata->data[n + 2]);
2843	    PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
2844
2845	    {
2846		SV **before = SP;
2847		CALLRUNOPS(aTHX);			/* Scalar context. */
2848		SPAGAIN;
2849		if (SP == before)
2850		    ret = &PL_sv_undef;   /* protect against empty (?{}) blocks. */
2851		else {
2852		    ret = POPs;
2853		    PUTBACK;
2854		}
2855	    }
2856
2857	    PL_op = oop;
2858	    PAD_RESTORE_LOCAL(old_comppad);
2859	    PL_curcop = ocurcop;
2860	    if (logical) {
2861		if (logical == 2) {	/* Postponed subexpression. */
2862		    regexp *re;
2863		    MAGIC *mg = Null(MAGIC*);
2864		    re_cc_state state;
2865		    CHECKPOINT cp, lastcp;
2866                    int toggleutf;
2867		    register SV *sv;
2868
2869		    if(SvROK(ret) && SvSMAGICAL(sv = SvRV(ret)))
2870			mg = mg_find(sv, PERL_MAGIC_qr);
2871		    else if (SvSMAGICAL(ret)) {
2872			if (SvGMAGICAL(ret))
2873			    sv_unmagic(ret, PERL_MAGIC_qr);
2874			else
2875			    mg = mg_find(ret, PERL_MAGIC_qr);
2876		    }
2877
2878		    if (mg) {
2879			re = (regexp *)mg->mg_obj;
2880			(void)ReREFCNT_inc(re);
2881		    }
2882		    else {
2883			STRLEN len;
2884			char *t = SvPV(ret, len);
2885			PMOP pm;
2886			char *oprecomp = PL_regprecomp;
2887			I32 osize = PL_regsize;
2888			I32 onpar = PL_regnpar;
2889
2890			Zero(&pm, 1, PMOP);
2891                        if (DO_UTF8(ret)) pm.op_pmdynflags |= PMdf_DYN_UTF8;
2892			re = CALLREGCOMP(aTHX_ t, t + len, &pm);
2893			if (!(SvFLAGS(ret)
2894			      & (SVs_TEMP | SVs_PADTMP | SVf_READONLY
2895				| SVs_GMG)))
2896			    sv_magic(ret,(SV*)ReREFCNT_inc(re),
2897					PERL_MAGIC_qr,0,0);
2898			PL_regprecomp = oprecomp;
2899			PL_regsize = osize;
2900			PL_regnpar = onpar;
2901		    }
2902		    DEBUG_r(
2903			PerlIO_printf(Perl_debug_log,
2904				      "Entering embedded `%s%.60s%s%s'\n",
2905				      PL_colors[0],
2906				      re->precomp,
2907				      PL_colors[1],
2908				      (strlen(re->precomp) > 60 ? "..." : ""))
2909			);
2910		    state.node = next;
2911		    state.prev = PL_reg_call_cc;
2912		    state.cc = PL_regcc;
2913		    state.re = PL_reg_re;
2914
2915		    PL_regcc = 0;
2916
2917		    cp = regcppush(0);	/* Save *all* the positions. */
2918		    REGCP_SET(lastcp);
2919		    cache_re(re);
2920		    state.ss = PL_savestack_ix;
2921		    *PL_reglastparen = 0;
2922		    *PL_reglastcloseparen = 0;
2923		    PL_reg_call_cc = &state;
2924		    PL_reginput = locinput;
2925		    toggleutf = ((PL_reg_flags & RF_utf8) != 0) ^
2926				((re->reganch & ROPT_UTF8) != 0);
2927		    if (toggleutf) PL_reg_flags ^= RF_utf8;
2928
2929		    /* XXXX This is too dramatic a measure... */
2930		    PL_reg_maxiter = 0;
2931
2932		    if (regmatch(re->program + 1)) {
2933			/* Even though we succeeded, we need to restore
2934			   global variables, since we may be wrapped inside
2935			   SUSPEND, thus the match may be not finished yet. */
2936
2937			/* XXXX Do this only if SUSPENDed? */
2938			PL_reg_call_cc = state.prev;
2939			PL_regcc = state.cc;
2940			PL_reg_re = state.re;
2941			cache_re(PL_reg_re);
2942			if (toggleutf) PL_reg_flags ^= RF_utf8;
2943
2944			/* XXXX This is too dramatic a measure... */
2945			PL_reg_maxiter = 0;
2946
2947			/* These are needed even if not SUSPEND. */
2948			ReREFCNT_dec(re);
2949			regcpblow(cp);
2950			sayYES;
2951		    }
2952		    ReREFCNT_dec(re);
2953		    REGCP_UNWIND(lastcp);
2954		    regcppop();
2955		    PL_reg_call_cc = state.prev;
2956		    PL_regcc = state.cc;
2957		    PL_reg_re = state.re;
2958		    cache_re(PL_reg_re);
2959		    if (toggleutf) PL_reg_flags ^= RF_utf8;
2960
2961		    /* XXXX This is too dramatic a measure... */
2962		    PL_reg_maxiter = 0;
2963
2964		    logical = 0;
2965		    sayNO;
2966		}
2967		sw = SvTRUE(ret);
2968		logical = 0;
2969	    }
2970	    else {
2971		sv_setsv(save_scalar(PL_replgv), ret);
2972		cache_re(oreg);
2973	    }
2974	    break;
2975	}
2976	case OPEN:
2977	    n = ARG(scan);  /* which paren pair */
2978	    PL_reg_start_tmp[n] = locinput;
2979	    if (n > PL_regsize)
2980		PL_regsize = n;
2981	    break;
2982	case CLOSE:
2983	    n = ARG(scan);  /* which paren pair */
2984	    PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2985	    PL_regendp[n] = locinput - PL_bostr;
2986	    if (n > (I32)*PL_reglastparen)
2987		*PL_reglastparen = n;
2988	    *PL_reglastcloseparen = n;
2989	    break;
2990	case GROUPP:
2991	    n = ARG(scan);  /* which paren pair */
2992	    sw = ((I32)*PL_reglastparen >= n && PL_regendp[n] != -1);
2993	    break;
2994	case IFTHEN:
2995	    PL_reg_leftiter = PL_reg_maxiter;		/* Void cache */
2996	    if (sw)
2997		next = NEXTOPER(NEXTOPER(scan));
2998	    else {
2999		next = scan + ARG(scan);
3000		if (OP(next) == IFTHEN) /* Fake one. */
3001		    next = NEXTOPER(NEXTOPER(next));
3002	    }
3003	    break;
3004	case LOGICAL:
3005	    logical = scan->flags;
3006	    break;
3007/*******************************************************************
3008 PL_regcc contains infoblock about the innermost (...)* loop, and
3009 a pointer to the next outer infoblock.
3010
3011 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
3012
3013   1) After matching X, regnode for CURLYX is processed;
3014
3015   2) This regnode creates infoblock on the stack, and calls
3016      regmatch() recursively with the starting point at WHILEM node;
3017
3018   3) Each hit of WHILEM node tries to match A and Z (in the order
3019      depending on the current iteration, min/max of {min,max} and
3020      greediness).  The information about where are nodes for "A"
3021      and "Z" is read from the infoblock, as is info on how many times "A"
3022      was already matched, and greediness.
3023
3024   4) After A matches, the same WHILEM node is hit again.
3025
3026   5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
3027      of the same pair.  Thus when WHILEM tries to match Z, it temporarily
3028      resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
3029      as in (Y(A)*Z)*.  If Z matches, the automaton will hit the WHILEM node
3030      of the external loop.
3031
3032 Currently present infoblocks form a tree with a stem formed by PL_curcc
3033 and whatever it mentions via ->next, and additional attached trees
3034 corresponding to temporarily unset infoblocks as in "5" above.
3035
3036 In the following picture infoblocks for outer loop of
3037 (Y(A)*?Z)*?T are denoted O, for inner I.  NULL starting block
3038 is denoted by x.  The matched string is YAAZYAZT.  Temporarily postponed
3039 infoblocks are drawn below the "reset" infoblock.
3040
3041 In fact in the picture below we do not show failed matches for Z and T
3042 by WHILEM blocks.  [We illustrate minimal matches, since for them it is
3043 more obvious *why* one needs to *temporary* unset infoblocks.]
3044
3045  Matched	REx position	InfoBlocks	Comment
3046  		(Y(A)*?Z)*?T	x
3047  		Y(A)*?Z)*?T	x <- O
3048  Y		(A)*?Z)*?T	x <- O
3049  Y		A)*?Z)*?T	x <- O <- I
3050  YA		)*?Z)*?T	x <- O <- I
3051  YA		A)*?Z)*?T	x <- O <- I
3052  YAA		)*?Z)*?T	x <- O <- I
3053  YAA		Z)*?T		x <- O		# Temporary unset I
3054				     I
3055
3056  YAAZ		Y(A)*?Z)*?T	x <- O
3057				     I
3058
3059  YAAZY		(A)*?Z)*?T	x <- O
3060				     I
3061
3062  YAAZY		A)*?Z)*?T	x <- O <- I
3063				     I
3064
3065  YAAZYA	)*?Z)*?T	x <- O <- I
3066				     I
3067
3068  YAAZYA	Z)*?T		x <- O		# Temporary unset I
3069				     I,I
3070
3071  YAAZYAZ	)*?T		x <- O
3072				     I,I
3073
3074  YAAZYAZ	T		x		# Temporary unset O
3075				O
3076				I,I
3077
3078  YAAZYAZT			x
3079				O
3080				I,I
3081 *******************************************************************/
3082	case CURLYX: {
3083		CURCUR cc;
3084		CHECKPOINT cp = PL_savestack_ix;
3085		/* No need to save/restore up to this paren */
3086		I32 parenfloor = scan->flags;
3087
3088		if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
3089		    next += ARG(next);
3090		cc.oldcc = PL_regcc;
3091		PL_regcc = &cc;
3092		/* XXXX Probably it is better to teach regpush to support
3093		   parenfloor > PL_regsize... */
3094		if (parenfloor > (I32)*PL_reglastparen)
3095		    parenfloor = *PL_reglastparen; /* Pessimization... */
3096		cc.parenfloor = parenfloor;
3097		cc.cur = -1;
3098		cc.min = ARG1(scan);
3099		cc.max  = ARG2(scan);
3100		cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
3101		cc.next = next;
3102		cc.minmod = minmod;
3103		cc.lastloc = 0;
3104		PL_reginput = locinput;
3105		n = regmatch(PREVOPER(next));	/* start on the WHILEM */
3106		regcpblow(cp);
3107		PL_regcc = cc.oldcc;
3108		saySAME(n);
3109	    }
3110	    /* NOT REACHED */
3111	case WHILEM: {
3112		/*
3113		 * This is really hard to understand, because after we match
3114		 * what we're trying to match, we must make sure the rest of
3115		 * the REx is going to match for sure, and to do that we have
3116		 * to go back UP the parse tree by recursing ever deeper.  And
3117		 * if it fails, we have to reset our parent's current state
3118		 * that we can try again after backing off.
3119		 */
3120
3121		CHECKPOINT cp, lastcp;
3122		CURCUR* cc = PL_regcc;
3123		char *lastloc = cc->lastloc; /* Detection of 0-len. */
3124
3125		n = cc->cur + 1;	/* how many we know we matched */
3126		PL_reginput = locinput;
3127
3128		DEBUG_r(
3129		    PerlIO_printf(Perl_debug_log,
3130				  "%*s  %ld out of %ld..%ld  cc=%"UVxf"\n",
3131				  REPORT_CODE_OFF+PL_regindent*2, "",
3132				  (long)n, (long)cc->min,
3133				  (long)cc->max, PTR2UV(cc))
3134		    );
3135
3136		/* If degenerate scan matches "", assume scan done. */
3137
3138		if (locinput == cc->lastloc && n >= cc->min) {
3139		    PL_regcc = cc->oldcc;
3140		    if (PL_regcc)
3141			ln = PL_regcc->cur;
3142		    DEBUG_r(
3143			PerlIO_printf(Perl_debug_log,
3144			   "%*s  empty match detected, try continuation...\n",
3145			   REPORT_CODE_OFF+PL_regindent*2, "")
3146			);
3147		    if (regmatch(cc->next))
3148			sayYES;
3149		    if (PL_regcc)
3150			PL_regcc->cur = ln;
3151		    PL_regcc = cc;
3152		    sayNO;
3153		}
3154
3155		/* First just match a string of min scans. */
3156
3157		if (n < cc->min) {
3158		    cc->cur = n;
3159		    cc->lastloc = locinput;
3160		    if (regmatch(cc->scan))
3161			sayYES;
3162		    cc->cur = n - 1;
3163		    cc->lastloc = lastloc;
3164		    sayNO;
3165		}
3166
3167		if (scan->flags) {
3168		    /* Check whether we already were at this position.
3169			Postpone detection until we know the match is not
3170			*that* much linear. */
3171		if (!PL_reg_maxiter) {
3172		    PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
3173		    PL_reg_leftiter = PL_reg_maxiter;
3174		}
3175		if (PL_reg_leftiter-- == 0) {
3176		    I32 size = (PL_reg_maxiter + 7)/8;
3177		    if (PL_reg_poscache) {
3178			if ((I32)PL_reg_poscache_size < size) {
3179			    Renew(PL_reg_poscache, size, char);
3180			    PL_reg_poscache_size = size;
3181			}
3182			Zero(PL_reg_poscache, size, char);
3183		    }
3184		    else {
3185			PL_reg_poscache_size = size;
3186			Newz(29, PL_reg_poscache, size, char);
3187		    }
3188		    DEBUG_r(
3189			PerlIO_printf(Perl_debug_log,
3190	      "%sDetected a super-linear match, switching on caching%s...\n",
3191				      PL_colors[4], PL_colors[5])
3192			);
3193		}
3194		if (PL_reg_leftiter < 0) {
3195		    I32 o = locinput - PL_bostr, b;
3196
3197		    o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
3198		    b = o % 8;
3199		    o /= 8;
3200		    if (PL_reg_poscache[o] & (1<<b)) {
3201		    DEBUG_r(
3202			PerlIO_printf(Perl_debug_log,
3203				      "%*s  already tried at this position...\n",
3204				      REPORT_CODE_OFF+PL_regindent*2, "")
3205			);
3206			if (PL_reg_flags & RF_false)
3207			    sayYES;
3208			else
3209			    sayNO_SILENT;
3210		    }
3211		    PL_reg_poscache[o] |= (1<<b);
3212		}
3213		}
3214
3215		/* Prefer next over scan for minimal matching. */
3216
3217		if (cc->minmod) {
3218		    PL_regcc = cc->oldcc;
3219		    if (PL_regcc)
3220			ln = PL_regcc->cur;
3221		    cp = regcppush(cc->parenfloor);
3222		    REGCP_SET(lastcp);
3223		    if (regmatch(cc->next)) {
3224			regcpblow(cp);
3225			sayYES;	/* All done. */
3226		    }
3227		    REGCP_UNWIND(lastcp);
3228		    regcppop();
3229		    if (PL_regcc)
3230			PL_regcc->cur = ln;
3231		    PL_regcc = cc;
3232
3233		    if (n >= cc->max) {	/* Maximum greed exceeded? */
3234			if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3235			    && !(PL_reg_flags & RF_warned)) {
3236			    PL_reg_flags |= RF_warned;
3237			    Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
3238				 "Complex regular subexpression recursion",
3239				 REG_INFTY - 1);
3240			}
3241			sayNO;
3242		    }
3243
3244		    DEBUG_r(
3245			PerlIO_printf(Perl_debug_log,
3246				      "%*s  trying longer...\n",
3247				      REPORT_CODE_OFF+PL_regindent*2, "")
3248			);
3249		    /* Try scanning more and see if it helps. */
3250		    PL_reginput = locinput;
3251		    cc->cur = n;
3252		    cc->lastloc = locinput;
3253		    cp = regcppush(cc->parenfloor);
3254		    REGCP_SET(lastcp);
3255		    if (regmatch(cc->scan)) {
3256			regcpblow(cp);
3257			sayYES;
3258		    }
3259		    REGCP_UNWIND(lastcp);
3260		    regcppop();
3261		    cc->cur = n - 1;
3262		    cc->lastloc = lastloc;
3263		    sayNO;
3264		}
3265
3266		/* Prefer scan over next for maximal matching. */
3267
3268		if (n < cc->max) {	/* More greed allowed? */
3269		    cp = regcppush(cc->parenfloor);
3270		    cc->cur = n;
3271		    cc->lastloc = locinput;
3272		    REGCP_SET(lastcp);
3273		    if (regmatch(cc->scan)) {
3274			regcpblow(cp);
3275			sayYES;
3276		    }
3277		    REGCP_UNWIND(lastcp);
3278		    regcppop();		/* Restore some previous $<digit>s? */
3279		    PL_reginput = locinput;
3280		    DEBUG_r(
3281			PerlIO_printf(Perl_debug_log,
3282				      "%*s  failed, try continuation...\n",
3283				      REPORT_CODE_OFF+PL_regindent*2, "")
3284			);
3285		}
3286		if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3287			&& !(PL_reg_flags & RF_warned)) {
3288		    PL_reg_flags |= RF_warned;
3289		    Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
3290			 "Complex regular subexpression recursion",
3291			 REG_INFTY - 1);
3292		}
3293
3294		/* Failed deeper matches of scan, so see if this one works. */
3295		PL_regcc = cc->oldcc;
3296		if (PL_regcc)
3297		    ln = PL_regcc->cur;
3298		if (regmatch(cc->next))
3299		    sayYES;
3300		if (PL_regcc)
3301		    PL_regcc->cur = ln;
3302		PL_regcc = cc;
3303		cc->cur = n - 1;
3304		cc->lastloc = lastloc;
3305		sayNO;
3306	    }
3307	    /* NOT REACHED */
3308	case BRANCHJ:
3309	    next = scan + ARG(scan);
3310	    if (next == scan)
3311		next = NULL;
3312	    inner = NEXTOPER(NEXTOPER(scan));
3313	    goto do_branch;
3314	case BRANCH:
3315	    inner = NEXTOPER(scan);
3316	  do_branch:
3317	    {
3318		c1 = OP(scan);
3319		if (OP(next) != c1)	/* No choice. */
3320		    next = inner;	/* Avoid recursion. */
3321		else {
3322		    I32 lastparen = *PL_reglastparen;
3323		    I32 unwind1;
3324		    re_unwind_branch_t *uw;
3325
3326		    /* Put unwinding data on stack */
3327		    unwind1 = SSNEWt(1,re_unwind_branch_t);
3328		    uw = SSPTRt(unwind1,re_unwind_branch_t);
3329		    uw->prev = unwind;
3330		    unwind = unwind1;
3331		    uw->type = ((c1 == BRANCH)
3332				? RE_UNWIND_BRANCH
3333				: RE_UNWIND_BRANCHJ);
3334		    uw->lastparen = lastparen;
3335		    uw->next = next;
3336		    uw->locinput = locinput;
3337		    uw->nextchr = nextchr;
3338#ifdef DEBUGGING
3339		    uw->regindent = ++PL_regindent;
3340#endif
3341
3342		    REGCP_SET(uw->lastcp);
3343
3344		    /* Now go into the first branch */
3345		    next = inner;
3346		}
3347	    }
3348	    break;
3349	case MINMOD:
3350	    minmod = 1;
3351	    break;
3352	case CURLYM:
3353	{
3354	    I32 l = 0;
3355	    CHECKPOINT lastcp;
3356
3357	    /* We suppose that the next guy does not need
3358	       backtracking: in particular, it is of constant length,
3359	       and has no parenths to influence future backrefs. */
3360	    ln = ARG1(scan);  /* min to match */
3361	    n  = ARG2(scan);  /* max to match */
3362	    paren = scan->flags;
3363	    if (paren) {
3364		if (paren > PL_regsize)
3365		    PL_regsize = paren;
3366		if (paren > (I32)*PL_reglastparen)
3367		    *PL_reglastparen = paren;
3368	    }
3369	    scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
3370	    if (paren)
3371		scan += NEXT_OFF(scan); /* Skip former OPEN. */
3372	    PL_reginput = locinput;
3373	    if (minmod) {
3374		minmod = 0;
3375		if (ln && regrepeat_hard(scan, ln, &l) < ln)
3376		    sayNO;
3377		/* if we matched something zero-length we don't need to
3378		   backtrack - capturing parens are already defined, so
3379		   the caveat in the maximal case doesn't apply
3380
3381		   XXXX if ln == 0, we can redo this check first time
3382		   through the following loop
3383		*/
3384		if (ln && l == 0)
3385		    n = ln;	/* don't backtrack */
3386		locinput = PL_reginput;
3387		if (HAS_TEXT(next) || JUMPABLE(next)) {
3388		    regnode *text_node = next;
3389
3390		    if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
3391
3392		    if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
3393		    else {
3394			if (PL_regkind[(U8)OP(text_node)] == REF) {
3395			    c1 = c2 = -1000;
3396			    goto assume_ok_MM;
3397			}
3398			else { c1 = (U8)*STRING(text_node); }
3399			if (OP(text_node) == EXACTF || OP(text_node) == REFF)
3400			    c2 = PL_fold[c1];
3401			else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
3402			    c2 = PL_fold_locale[c1];
3403			else
3404			    c2 = c1;
3405		    }
3406		}
3407		else
3408		    c1 = c2 = -1000;
3409	    assume_ok_MM:
3410		REGCP_SET(lastcp);
3411		/* This may be improved if l == 0.  */
3412		while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
3413		    /* If it could work, try it. */
3414		    if (c1 == -1000 ||
3415			UCHARAT(PL_reginput) == c1 ||
3416			UCHARAT(PL_reginput) == c2)
3417		    {
3418			if (paren) {
3419			    if (ln) {
3420				PL_regstartp[paren] =
3421				    HOPc(PL_reginput, -l) - PL_bostr;
3422				PL_regendp[paren] = PL_reginput - PL_bostr;
3423			    }
3424			    else
3425				PL_regendp[paren] = -1;
3426			}
3427			if (regmatch(next))
3428			    sayYES;
3429			REGCP_UNWIND(lastcp);
3430		    }
3431		    /* Couldn't or didn't -- move forward. */
3432		    PL_reginput = locinput;
3433		    if (regrepeat_hard(scan, 1, &l)) {
3434			ln++;
3435			locinput = PL_reginput;
3436		    }
3437		    else
3438			sayNO;
3439		}
3440	    }
3441	    else {
3442		n = regrepeat_hard(scan, n, &l);
3443		/* if we matched something zero-length we don't need to
3444		   backtrack, unless the minimum count is zero and we
3445		   are capturing the result - in that case the capture
3446		   being defined or not may affect later execution
3447		*/
3448		if (n != 0 && l == 0 && !(paren && ln == 0))
3449		    ln = n;	/* don't backtrack */
3450		locinput = PL_reginput;
3451		DEBUG_r(
3452		    PerlIO_printf(Perl_debug_log,
3453				  "%*s  matched %"IVdf" times, len=%"IVdf"...\n",
3454				  (int)(REPORT_CODE_OFF+PL_regindent*2), "",
3455				  (IV) n, (IV)l)
3456		    );
3457		if (n >= ln) {
3458		    if (HAS_TEXT(next) || JUMPABLE(next)) {
3459			regnode *text_node = next;
3460
3461			if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
3462
3463			if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
3464			else {
3465			    if (PL_regkind[(U8)OP(text_node)] == REF) {
3466				c1 = c2 = -1000;
3467				goto assume_ok_REG;
3468			    }
3469			    else { c1 = (U8)*STRING(text_node); }
3470
3471			    if (OP(text_node) == EXACTF || OP(text_node) == REFF)
3472				c2 = PL_fold[c1];
3473			    else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
3474				c2 = PL_fold_locale[c1];
3475			    else
3476				c2 = c1;
3477			}
3478		    }
3479		    else
3480			c1 = c2 = -1000;
3481		}
3482	    assume_ok_REG:
3483		REGCP_SET(lastcp);
3484		while (n >= ln) {
3485		    /* If it could work, try it. */
3486		    if (c1 == -1000 ||
3487			UCHARAT(PL_reginput) == c1 ||
3488			UCHARAT(PL_reginput) == c2)
3489		    {
3490			DEBUG_r(
3491				PerlIO_printf(Perl_debug_log,
3492					      "%*s  trying tail with n=%"IVdf"...\n",
3493					      (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
3494			    );
3495			if (paren) {
3496			    if (n) {
3497				PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
3498				PL_regendp[paren] = PL_reginput - PL_bostr;
3499			    }
3500			    else
3501				PL_regendp[paren] = -1;
3502			}
3503			if (regmatch(next))
3504			    sayYES;
3505			REGCP_UNWIND(lastcp);
3506		    }
3507		    /* Couldn't or didn't -- back up. */
3508		    n--;
3509		    locinput = HOPc(locinput, -l);
3510		    PL_reginput = locinput;
3511		}
3512	    }
3513	    sayNO;
3514	    break;
3515	}
3516	case CURLYN:
3517	    paren = scan->flags;	/* Which paren to set */
3518	    if (paren > PL_regsize)
3519		PL_regsize = paren;
3520	    if (paren > (I32)*PL_reglastparen)
3521		*PL_reglastparen = paren;
3522	    ln = ARG1(scan);  /* min to match */
3523	    n  = ARG2(scan);  /* max to match */
3524            scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
3525	    goto repeat;
3526	case CURLY:
3527	    paren = 0;
3528	    ln = ARG1(scan);  /* min to match */
3529	    n  = ARG2(scan);  /* max to match */
3530	    scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
3531	    goto repeat;
3532	case STAR:
3533	    ln = 0;
3534	    n = REG_INFTY;
3535	    scan = NEXTOPER(scan);
3536	    paren = 0;
3537	    goto repeat;
3538	case PLUS:
3539	    ln = 1;
3540	    n = REG_INFTY;
3541	    scan = NEXTOPER(scan);
3542	    paren = 0;
3543	  repeat:
3544	    /*
3545	    * Lookahead to avoid useless match attempts
3546	    * when we know what character comes next.
3547	    */
3548
3549	    /*
3550	    * Used to only do .*x and .*?x, but now it allows
3551	    * for )'s, ('s and (?{ ... })'s to be in the way
3552	    * of the quantifier and the EXACT-like node.  -- japhy
3553	    */
3554
3555	    if (HAS_TEXT(next) || JUMPABLE(next)) {
3556		U8 *s;
3557		regnode *text_node = next;
3558
3559		if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
3560
3561		if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
3562		else {
3563		    if (PL_regkind[(U8)OP(text_node)] == REF) {
3564			c1 = c2 = -1000;
3565			goto assume_ok_easy;
3566		    }
3567		    else { s = (U8*)STRING(text_node); }
3568
3569		    if (!UTF) {
3570			c2 = c1 = *s;
3571			if (OP(text_node) == EXACTF || OP(text_node) == REFF)
3572			    c2 = PL_fold[c1];
3573			else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
3574			    c2 = PL_fold_locale[c1];
3575		    }
3576		    else { /* UTF */
3577			if (OP(text_node) == EXACTF || OP(text_node) == REFF) {
3578			     STRLEN ulen1, ulen2;
3579			     U8 tmpbuf1[UTF8_MAXLEN_UCLC+1];
3580			     U8 tmpbuf2[UTF8_MAXLEN_UCLC+1];
3581
3582			     to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
3583			     to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
3584
3585			     c1 = utf8n_to_uvuni(tmpbuf1, UTF8_MAXLEN, 0,
3586						 ckWARN(WARN_UTF8) ?
3587						 0 : UTF8_ALLOW_ANY);
3588			     c2 = utf8n_to_uvuni(tmpbuf2, UTF8_MAXLEN, 0,
3589						 ckWARN(WARN_UTF8) ?
3590						 0 : UTF8_ALLOW_ANY);
3591			}
3592			else {
3593			    c2 = c1 = utf8n_to_uvchr(s, UTF8_MAXLEN, 0,
3594						     ckWARN(WARN_UTF8) ?
3595						     0 : UTF8_ALLOW_ANY);
3596			}
3597		    }
3598		}
3599	    }
3600	    else
3601		c1 = c2 = -1000;
3602	assume_ok_easy:
3603	    PL_reginput = locinput;
3604	    if (minmod) {
3605		CHECKPOINT lastcp;
3606		minmod = 0;
3607		if (ln && regrepeat(scan, ln) < ln)
3608		    sayNO;
3609		locinput = PL_reginput;
3610		REGCP_SET(lastcp);
3611		if (c1 != -1000) {
3612		    char *e; /* Should not check after this */
3613		    char *old = locinput;
3614		    int count = 0;
3615
3616		    if  (n == REG_INFTY) {
3617			e = PL_regeol - 1;
3618			if (do_utf8)
3619			    while (UTF8_IS_CONTINUATION(*(U8*)e))
3620				e--;
3621		    }
3622		    else if (do_utf8) {
3623			int m = n - ln;
3624			for (e = locinput;
3625			     m >0 && e + UTF8SKIP(e) <= PL_regeol; m--)
3626			    e += UTF8SKIP(e);
3627		    }
3628		    else {
3629			e = locinput + n - ln;
3630			if (e >= PL_regeol)
3631			    e = PL_regeol - 1;
3632		    }
3633		    while (1) {
3634			/* Find place 'next' could work */
3635			if (!do_utf8) {
3636			    if (c1 == c2) {
3637				while (locinput <= e &&
3638				       UCHARAT(locinput) != c1)
3639				    locinput++;
3640			    } else {
3641				while (locinput <= e
3642				       && UCHARAT(locinput) != c1
3643				       && UCHARAT(locinput) != c2)
3644				    locinput++;
3645			    }
3646			    count = locinput - old;
3647			}
3648			else {
3649			    STRLEN len;
3650			    if (c1 == c2) {
3651				/* count initialised to
3652				 * utf8_distance(old, locinput) */
3653				while (locinput <= e &&
3654				       utf8n_to_uvchr((U8*)locinput,
3655						      UTF8_MAXLEN, &len,
3656						      ckWARN(WARN_UTF8) ?
3657						      0 : UTF8_ALLOW_ANY) != (UV)c1) {
3658				    locinput += len;
3659				    count++;
3660				}
3661			    } else {
3662				/* count initialised to
3663				 * utf8_distance(old, locinput) */
3664				while (locinput <= e) {
3665				    UV c = utf8n_to_uvchr((U8*)locinput,
3666							  UTF8_MAXLEN, &len,
3667							  ckWARN(WARN_UTF8) ?
3668							  0 : UTF8_ALLOW_ANY);
3669				    if (c == (UV)c1 || c == (UV)c2)
3670					break;
3671				    locinput += len;
3672				    count++;
3673				}
3674			    }
3675			}
3676			if (locinput > e)
3677			    sayNO;
3678			/* PL_reginput == old now */
3679			if (locinput != old) {
3680			    ln = 1;	/* Did some */
3681			    if (regrepeat(scan, count) < count)
3682				sayNO;
3683			}
3684			/* PL_reginput == locinput now */
3685			TRYPAREN(paren, ln, locinput);
3686			PL_reginput = locinput;	/* Could be reset... */
3687			REGCP_UNWIND(lastcp);
3688			/* Couldn't or didn't -- move forward. */
3689			old = locinput;
3690			if (do_utf8)
3691			    locinput += UTF8SKIP(locinput);
3692			else
3693			    locinput++;
3694			count = 1;
3695		    }
3696		}
3697		else
3698		while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
3699		    UV c;
3700		    if (c1 != -1000) {
3701			if (do_utf8)
3702			    c = utf8n_to_uvchr((U8*)PL_reginput,
3703					       UTF8_MAXLEN, 0,
3704					       ckWARN(WARN_UTF8) ?
3705					       0 : UTF8_ALLOW_ANY);
3706			else
3707			    c = UCHARAT(PL_reginput);
3708			/* If it could work, try it. */
3709		        if (c == (UV)c1 || c == (UV)c2)
3710		        {
3711			    TRYPAREN(paren, ln, PL_reginput);
3712			    REGCP_UNWIND(lastcp);
3713		        }
3714		    }
3715		    /* If it could work, try it. */
3716		    else if (c1 == -1000)
3717		    {
3718			TRYPAREN(paren, ln, PL_reginput);
3719			REGCP_UNWIND(lastcp);
3720		    }
3721		    /* Couldn't or didn't -- move forward. */
3722		    PL_reginput = locinput;
3723		    if (regrepeat(scan, 1)) {
3724			ln++;
3725			locinput = PL_reginput;
3726		    }
3727		    else
3728			sayNO;
3729		}
3730	    }
3731	    else {
3732		CHECKPOINT lastcp;
3733		n = regrepeat(scan, n);
3734		locinput = PL_reginput;
3735		if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
3736		    ((!PL_multiline && OP(next) != MEOL) ||
3737			OP(next) == SEOL || OP(next) == EOS))
3738		{
3739		    ln = n;			/* why back off? */
3740		    /* ...because $ and \Z can match before *and* after
3741		       newline at the end.  Consider "\n\n" =~ /\n+\Z\n/.
3742		       We should back off by one in this case. */
3743		    if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
3744			ln--;
3745		}
3746		REGCP_SET(lastcp);
3747		if (paren) {
3748		    UV c = 0;
3749		    while (n >= ln) {
3750			if (c1 != -1000) {
3751			    if (do_utf8)
3752				c = utf8n_to_uvchr((U8*)PL_reginput,
3753						   UTF8_MAXLEN, 0,
3754						   ckWARN(WARN_UTF8) ?
3755						   0 : UTF8_ALLOW_ANY);
3756			    else
3757				c = UCHARAT(PL_reginput);
3758			}
3759			/* If it could work, try it. */
3760			if (c1 == -1000 || c == (UV)c1 || c == (UV)c2)
3761			    {
3762				TRYPAREN(paren, n, PL_reginput);
3763				REGCP_UNWIND(lastcp);
3764			    }
3765			/* Couldn't or didn't -- back up. */
3766			n--;
3767			PL_reginput = locinput = HOPc(locinput, -1);
3768		    }
3769		}
3770		else {
3771		    UV c = 0;
3772		    while (n >= ln) {
3773			if (c1 != -1000) {
3774			    if (do_utf8)
3775				c = utf8n_to_uvchr((U8*)PL_reginput,
3776						   UTF8_MAXLEN, 0,
3777						   ckWARN(WARN_UTF8) ?
3778						   0 : UTF8_ALLOW_ANY);
3779			    else
3780				c = UCHARAT(PL_reginput);
3781			}
3782			/* If it could work, try it. */
3783			if (c1 == -1000 || c == (UV)c1 || c == (UV)c2)
3784			    {
3785				TRYPAREN(paren, n, PL_reginput);
3786				REGCP_UNWIND(lastcp);
3787			    }
3788			/* Couldn't or didn't -- back up. */
3789			n--;
3790			PL_reginput = locinput = HOPc(locinput, -1);
3791		    }
3792		}
3793	    }
3794	    sayNO;
3795	    break;
3796	case END:
3797	    if (PL_reg_call_cc) {
3798		re_cc_state *cur_call_cc = PL_reg_call_cc;
3799		CURCUR *cctmp = PL_regcc;
3800		regexp *re = PL_reg_re;
3801		CHECKPOINT cp, lastcp;
3802
3803		cp = regcppush(0);	/* Save *all* the positions. */
3804		REGCP_SET(lastcp);
3805		regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
3806						    the caller. */
3807		PL_reginput = locinput;	/* Make position available to
3808					   the callcc. */
3809		cache_re(PL_reg_call_cc->re);
3810		PL_regcc = PL_reg_call_cc->cc;
3811		PL_reg_call_cc = PL_reg_call_cc->prev;
3812		if (regmatch(cur_call_cc->node)) {
3813		    PL_reg_call_cc = cur_call_cc;
3814		    regcpblow(cp);
3815		    sayYES;
3816		}
3817		REGCP_UNWIND(lastcp);
3818		regcppop();
3819		PL_reg_call_cc = cur_call_cc;
3820		PL_regcc = cctmp;
3821		PL_reg_re = re;
3822		cache_re(re);
3823
3824		DEBUG_r(
3825		    PerlIO_printf(Perl_debug_log,
3826				  "%*s  continuation failed...\n",
3827				  REPORT_CODE_OFF+PL_regindent*2, "")
3828		    );
3829		sayNO_SILENT;
3830	    }
3831	    if (locinput < PL_regtill) {
3832		DEBUG_r(PerlIO_printf(Perl_debug_log,
3833				      "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
3834				      PL_colors[4],
3835				      (long)(locinput - PL_reg_starttry),
3836				      (long)(PL_regtill - PL_reg_starttry),
3837				      PL_colors[5]));
3838		sayNO_FINAL;		/* Cannot match: too short. */
3839	    }
3840	    PL_reginput = locinput;	/* put where regtry can find it */
3841	    sayYES_FINAL;		/* Success! */
3842	case SUCCEED:
3843	    PL_reginput = locinput;	/* put where regtry can find it */
3844	    sayYES_LOUD;		/* Success! */
3845	case SUSPEND:
3846	    n = 1;
3847	    PL_reginput = locinput;
3848	    goto do_ifmatch;
3849	case UNLESSM:
3850	    n = 0;
3851	    if (scan->flags) {
3852		s = HOPBACKc(locinput, scan->flags);
3853		if (!s)
3854		    goto say_yes;
3855		PL_reginput = s;
3856	    }
3857	    else
3858		PL_reginput = locinput;
3859	    PL_reg_flags ^= RF_false;
3860	    goto do_ifmatch;
3861	case IFMATCH:
3862	    n = 1;
3863	    if (scan->flags) {
3864		s = HOPBACKc(locinput, scan->flags);
3865		if (!s)
3866		    goto say_no;
3867		PL_reginput = s;
3868	    }
3869	    else
3870		PL_reginput = locinput;
3871
3872	  do_ifmatch:
3873	    inner = NEXTOPER(NEXTOPER(scan));
3874	    if (regmatch(inner) != n) {
3875		if (n == 0)
3876		    PL_reg_flags ^= RF_false;
3877	      say_no:
3878		if (logical) {
3879		    logical = 0;
3880		    sw = 0;
3881		    goto do_longjump;
3882		}
3883		else
3884		    sayNO;
3885	    }
3886	    if (n == 0)
3887		PL_reg_flags ^= RF_false;
3888	  say_yes:
3889	    if (logical) {
3890		logical = 0;
3891		sw = 1;
3892	    }
3893	    if (OP(scan) == SUSPEND) {
3894		locinput = PL_reginput;
3895		nextchr = UCHARAT(locinput);
3896	    }
3897	    /* FALL THROUGH. */
3898	case LONGJMP:
3899	  do_longjump:
3900	    next = scan + ARG(scan);
3901	    if (next == scan)
3902		next = NULL;
3903	    break;
3904	default:
3905	    PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
3906			  PTR2UV(scan), OP(scan));
3907	    Perl_croak(aTHX_ "regexp memory corruption");
3908	}
3909      reenter:
3910	scan = next;
3911    }
3912
3913    /*
3914    * We get here only if there's trouble -- normally "case END" is
3915    * the terminating point.
3916    */
3917    Perl_croak(aTHX_ "corrupted regexp pointers");
3918    /*NOTREACHED*/
3919    sayNO;
3920
3921yes_loud:
3922    DEBUG_r(
3923	PerlIO_printf(Perl_debug_log,
3924		      "%*s  %scould match...%s\n",
3925		      REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
3926	);
3927    goto yes;
3928yes_final:
3929    DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
3930			  PL_colors[4],PL_colors[5]));
3931yes:
3932#ifdef DEBUGGING
3933    PL_regindent--;
3934#endif
3935
3936#if 0					/* Breaks $^R */
3937    if (unwind)
3938	regcpblow(firstcp);
3939#endif
3940    return 1;
3941
3942no:
3943    DEBUG_r(
3944	PerlIO_printf(Perl_debug_log,
3945		      "%*s  %sfailed...%s\n",
3946		      REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
3947	);
3948    goto do_no;
3949no_final:
3950do_no:
3951    if (unwind) {
3952	re_unwind_t *uw = SSPTRt(unwind,re_unwind_t);
3953
3954	switch (uw->type) {
3955	case RE_UNWIND_BRANCH:
3956	case RE_UNWIND_BRANCHJ:
3957	{
3958	    re_unwind_branch_t *uwb = &(uw->branch);
3959	    I32 lastparen = uwb->lastparen;
3960
3961	    REGCP_UNWIND(uwb->lastcp);
3962	    for (n = *PL_reglastparen; n > lastparen; n--)
3963		PL_regendp[n] = -1;
3964	    *PL_reglastparen = n;
3965	    scan = next = uwb->next;
3966	    if ( !scan ||
3967		 OP(scan) != (uwb->type == RE_UNWIND_BRANCH
3968			      ? BRANCH : BRANCHJ) ) {		/* Failure */
3969		unwind = uwb->prev;
3970#ifdef DEBUGGING
3971		PL_regindent--;
3972#endif
3973		goto do_no;
3974	    }
3975	    /* Have more choice yet.  Reuse the same uwb.  */
3976	    /*SUPPRESS 560*/
3977	    if ((n = (uwb->type == RE_UNWIND_BRANCH
3978		      ? NEXT_OFF(next) : ARG(next))))
3979		next += n;
3980	    else
3981		next = NULL;	/* XXXX Needn't unwinding in this case... */
3982	    uwb->next = next;
3983	    next = NEXTOPER(scan);
3984	    if (uwb->type == RE_UNWIND_BRANCHJ)
3985		next = NEXTOPER(next);
3986	    locinput = uwb->locinput;
3987	    nextchr = uwb->nextchr;
3988#ifdef DEBUGGING
3989	    PL_regindent = uwb->regindent;
3990#endif
3991
3992	    goto reenter;
3993	}
3994	/* NOT REACHED */
3995	default:
3996	    Perl_croak(aTHX_ "regexp unwind memory corruption");
3997	}
3998	/* NOT REACHED */
3999    }
4000#ifdef DEBUGGING
4001    PL_regindent--;
4002#endif
4003    return 0;
4004}
4005
4006/*
4007 - regrepeat - repeatedly match something simple, report how many
4008 */
4009/*
4010 * [This routine now assumes that it will only match on things of length 1.
4011 * That was true before, but now we assume scan - reginput is the count,
4012 * rather than incrementing count on every character.  [Er, except utf8.]]
4013 */
4014STATIC I32
4015S_regrepeat(pTHX_ regnode *p, I32 max)
4016{
4017    register char *scan;
4018    register I32 c;
4019    register char *loceol = PL_regeol;
4020    register I32 hardcount = 0;
4021    register bool do_utf8 = PL_reg_match_utf8;
4022
4023    scan = PL_reginput;
4024    if (max == REG_INFTY)
4025	max = I32_MAX;
4026    else if (max < loceol - scan)
4027      loceol = scan + max;
4028    switch (OP(p)) {
4029    case REG_ANY:
4030	if (do_utf8) {
4031	    loceol = PL_regeol;
4032	    while (scan < loceol && hardcount < max && *scan != '\n') {
4033		scan += UTF8SKIP(scan);
4034		hardcount++;
4035	    }
4036	} else {
4037	    while (scan < loceol && *scan != '\n')
4038		scan++;
4039	}
4040	break;
4041    case SANY:
4042        if (do_utf8) {
4043	    loceol = PL_regeol;
4044	    while (scan < loceol && hardcount < max) {
4045	        scan += UTF8SKIP(scan);
4046		hardcount++;
4047	    }
4048	}
4049	else
4050	    scan = loceol;
4051	break;
4052    case CANY:
4053	scan = loceol;
4054	break;
4055    case EXACT:		/* length of string is 1 */
4056	c = (U8)*STRING(p);
4057	while (scan < loceol && UCHARAT(scan) == c)
4058	    scan++;
4059	break;
4060    case EXACTF:	/* length of string is 1 */
4061	c = (U8)*STRING(p);
4062	while (scan < loceol &&
4063	       (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
4064	    scan++;
4065	break;
4066    case EXACTFL:	/* length of string is 1 */
4067	PL_reg_flags |= RF_tainted;
4068	c = (U8)*STRING(p);
4069	while (scan < loceol &&
4070	       (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
4071	    scan++;
4072	break;
4073    case ANYOF:
4074	if (do_utf8) {
4075	    loceol = PL_regeol;
4076	    while (hardcount < max && scan < loceol &&
4077		   reginclass(p, (U8*)scan, 0, do_utf8)) {
4078		scan += UTF8SKIP(scan);
4079		hardcount++;
4080	    }
4081	} else {
4082	    while (scan < loceol && REGINCLASS(p, (U8*)scan))
4083		scan++;
4084	}
4085	break;
4086    case ALNUM:
4087	if (do_utf8) {
4088	    loceol = PL_regeol;
4089	    LOAD_UTF8_CHARCLASS(alnum,"a");
4090	    while (hardcount < max && scan < loceol &&
4091		   swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
4092		scan += UTF8SKIP(scan);
4093		hardcount++;
4094	    }
4095	} else {
4096	    while (scan < loceol && isALNUM(*scan))
4097		scan++;
4098	}
4099	break;
4100    case ALNUML:
4101	PL_reg_flags |= RF_tainted;
4102	if (do_utf8) {
4103	    loceol = PL_regeol;
4104	    while (hardcount < max && scan < loceol &&
4105		   isALNUM_LC_utf8((U8*)scan)) {
4106		scan += UTF8SKIP(scan);
4107		hardcount++;
4108	    }
4109	} else {
4110	    while (scan < loceol && isALNUM_LC(*scan))
4111		scan++;
4112	}
4113	break;
4114    case NALNUM:
4115	if (do_utf8) {
4116	    loceol = PL_regeol;
4117	    LOAD_UTF8_CHARCLASS(alnum,"a");
4118	    while (hardcount < max && scan < loceol &&
4119		   !swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
4120		scan += UTF8SKIP(scan);
4121		hardcount++;
4122	    }
4123	} else {
4124	    while (scan < loceol && !isALNUM(*scan))
4125		scan++;
4126	}
4127	break;
4128    case NALNUML:
4129	PL_reg_flags |= RF_tainted;
4130	if (do_utf8) {
4131	    loceol = PL_regeol;
4132	    while (hardcount < max && scan < loceol &&
4133		   !isALNUM_LC_utf8((U8*)scan)) {
4134		scan += UTF8SKIP(scan);
4135		hardcount++;
4136	    }
4137	} else {
4138	    while (scan < loceol && !isALNUM_LC(*scan))
4139		scan++;
4140	}
4141	break;
4142    case SPACE:
4143	if (do_utf8) {
4144	    loceol = PL_regeol;
4145	    LOAD_UTF8_CHARCLASS(space," ");
4146	    while (hardcount < max && scan < loceol &&
4147		   (*scan == ' ' ||
4148		    swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
4149		scan += UTF8SKIP(scan);
4150		hardcount++;
4151	    }
4152	} else {
4153	    while (scan < loceol && isSPACE(*scan))
4154		scan++;
4155	}
4156	break;
4157    case SPACEL:
4158	PL_reg_flags |= RF_tainted;
4159	if (do_utf8) {
4160	    loceol = PL_regeol;
4161	    while (hardcount < max && scan < loceol &&
4162		   (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
4163		scan += UTF8SKIP(scan);
4164		hardcount++;
4165	    }
4166	} else {
4167	    while (scan < loceol && isSPACE_LC(*scan))
4168		scan++;
4169	}
4170	break;
4171    case NSPACE:
4172	if (do_utf8) {
4173	    loceol = PL_regeol;
4174	    LOAD_UTF8_CHARCLASS(space," ");
4175	    while (hardcount < max && scan < loceol &&
4176		   !(*scan == ' ' ||
4177		     swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
4178		scan += UTF8SKIP(scan);
4179		hardcount++;
4180	    }
4181	} else {
4182	    while (scan < loceol && !isSPACE(*scan))
4183		scan++;
4184	    break;
4185	}
4186    case NSPACEL:
4187	PL_reg_flags |= RF_tainted;
4188	if (do_utf8) {
4189	    loceol = PL_regeol;
4190	    while (hardcount < max && scan < loceol &&
4191		   !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
4192		scan += UTF8SKIP(scan);
4193		hardcount++;
4194	    }
4195	} else {
4196	    while (scan < loceol && !isSPACE_LC(*scan))
4197		scan++;
4198	}
4199	break;
4200    case DIGIT:
4201	if (do_utf8) {
4202	    loceol = PL_regeol;
4203	    LOAD_UTF8_CHARCLASS(digit,"0");
4204	    while (hardcount < max && scan < loceol &&
4205		   swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
4206		scan += UTF8SKIP(scan);
4207		hardcount++;
4208	    }
4209	} else {
4210	    while (scan < loceol && isDIGIT(*scan))
4211		scan++;
4212	}
4213	break;
4214    case NDIGIT:
4215	if (do_utf8) {
4216	    loceol = PL_regeol;
4217	    LOAD_UTF8_CHARCLASS(digit,"0");
4218	    while (hardcount < max && scan < loceol &&
4219		   !swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
4220		scan += UTF8SKIP(scan);
4221		hardcount++;
4222	    }
4223	} else {
4224	    while (scan < loceol && !isDIGIT(*scan))
4225		scan++;
4226	}
4227	break;
4228    default:		/* Called on something of 0 width. */
4229	break;		/* So match right here or not at all. */
4230    }
4231
4232    if (hardcount)
4233	c = hardcount;
4234    else
4235	c = scan - PL_reginput;
4236    PL_reginput = scan;
4237
4238    DEBUG_r(
4239	{
4240		SV *prop = sv_newmortal();
4241
4242		regprop(prop, p);
4243		PerlIO_printf(Perl_debug_log,
4244			      "%*s  %s can match %"IVdf" times out of %"IVdf"...\n",
4245			      REPORT_CODE_OFF+1, "", SvPVX(prop),(IV)c,(IV)max);
4246	});
4247
4248    return(c);
4249}
4250
4251/*
4252 - regrepeat_hard - repeatedly match something, report total lenth and length
4253 *
4254 * The repeater is supposed to have constant length.
4255 */
4256
4257STATIC I32
4258S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
4259{
4260    register char *scan = Nullch;
4261    register char *start;
4262    register char *loceol = PL_regeol;
4263    I32 l = 0;
4264    I32 count = 0, res = 1;
4265
4266    if (!max)
4267	return 0;
4268
4269    start = PL_reginput;
4270    if (PL_reg_match_utf8) {
4271	while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
4272	    if (!count++) {
4273		l = 0;
4274		while (start < PL_reginput) {
4275		    l++;
4276		    start += UTF8SKIP(start);
4277		}
4278		*lp = l;
4279		if (l == 0)
4280		    return max;
4281	    }
4282	    if (count == max)
4283		return count;
4284	}
4285    }
4286    else {
4287	while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
4288	    if (!count++) {
4289		*lp = l = PL_reginput - start;
4290		if (max != REG_INFTY && l*max < loceol - scan)
4291		    loceol = scan + l*max;
4292		if (l == 0)
4293		    return max;
4294	    }
4295	}
4296    }
4297    if (!res)
4298	PL_reginput = scan;
4299
4300    return count;
4301}
4302
4303/*
4304- regclass_swash - prepare the utf8 swash
4305*/
4306
4307SV *
4308Perl_regclass_swash(pTHX_ register regnode* node, bool doinit, SV** listsvp, SV **altsvp)
4309{
4310    SV *sw  = NULL;
4311    SV *si  = NULL;
4312    SV *alt = NULL;
4313
4314    if (PL_regdata && PL_regdata->count) {
4315	U32 n = ARG(node);
4316
4317	if (PL_regdata->what[n] == 's') {
4318	    SV *rv = (SV*)PL_regdata->data[n];
4319	    AV *av = (AV*)SvRV((SV*)rv);
4320	    SV **ary = AvARRAY(av);
4321	    SV **a, **b;
4322
4323	    /* See the end of regcomp.c:S_reglass() for
4324	     * documentation of these array elements. */
4325
4326	    si = *ary;
4327	    a  = SvTYPE(ary[1]) == SVt_RV   ? &ary[1] : 0;
4328	    b  = SvTYPE(ary[2]) == SVt_PVAV ? &ary[2] : 0;
4329
4330	    if (a)
4331		sw = *a;
4332	    else if (si && doinit) {
4333		sw = swash_init("utf8", "", si, 1, 0);
4334		(void)av_store(av, 1, sw);
4335	    }
4336	    if (b)
4337	        alt = *b;
4338	}
4339    }
4340
4341    if (listsvp)
4342	*listsvp = si;
4343    if (altsvp)
4344	*altsvp  = alt;
4345
4346    return sw;
4347}
4348
4349/*
4350 - reginclass - determine if a character falls into a character class
4351
4352  The n is the ANYOF regnode, the p is the target string, lenp
4353  is pointer to the maximum length of how far to go in the p
4354  (if the lenp is zero, UTF8SKIP(p) is used),
4355  do_utf8 tells whether the target string is in UTF-8.
4356
4357 */
4358
4359STATIC bool
4360S_reginclass(pTHX_ register regnode *n, register U8* p, STRLEN* lenp, register bool do_utf8)
4361{
4362    char flags = ANYOF_FLAGS(n);
4363    bool match = FALSE;
4364    UV c = *p;
4365    STRLEN len = 0;
4366    STRLEN plen;
4367
4368    if (do_utf8 && !UTF8_IS_INVARIANT(c))
4369	 c = utf8n_to_uvchr(p, UTF8_MAXLEN, &len,
4370			    ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
4371
4372    plen = lenp ? *lenp : UNISKIP(NATIVE_TO_UNI(c));
4373    if (do_utf8 || (flags & ANYOF_UNICODE)) {
4374        if (lenp)
4375	    *lenp = 0;
4376	if (do_utf8 && !ANYOF_RUNTIME(n)) {
4377	    if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
4378		match = TRUE;
4379	}
4380	if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256)
4381	    match = TRUE;
4382	if (!match) {
4383	    AV *av;
4384	    SV *sw = regclass_swash(n, TRUE, 0, (SV**)&av);
4385
4386	    if (sw) {
4387		if (swash_fetch(sw, p, do_utf8))
4388		    match = TRUE;
4389		else if (flags & ANYOF_FOLD) {
4390		    if (!match && lenp && av) {
4391		        I32 i;
4392
4393			for (i = 0; i <= av_len(av); i++) {
4394			    SV* sv = *av_fetch(av, i, FALSE);
4395			    STRLEN len;
4396			    char *s = SvPV(sv, len);
4397
4398			    if (len <= plen && memEQ(s, (char*)p, len)) {
4399			        *lenp = len;
4400				match = TRUE;
4401				break;
4402			    }
4403			}
4404		    }
4405		    if (!match) {
4406		        U8 tmpbuf[UTF8_MAXLEN_FOLD+1];
4407			STRLEN tmplen;
4408
4409		        to_utf8_fold(p, tmpbuf, &tmplen);
4410			if (swash_fetch(sw, tmpbuf, do_utf8))
4411			    match = TRUE;
4412		    }
4413		}
4414	    }
4415	}
4416	if (match && lenp && *lenp == 0)
4417	    *lenp = UNISKIP(NATIVE_TO_UNI(c));
4418    }
4419    if (!match && c < 256) {
4420	if (ANYOF_BITMAP_TEST(n, c))
4421	    match = TRUE;
4422	else if (flags & ANYOF_FOLD) {
4423	    U8 f;
4424
4425	    if (flags & ANYOF_LOCALE) {
4426		PL_reg_flags |= RF_tainted;
4427		f = PL_fold_locale[c];
4428	    }
4429	    else
4430		f = PL_fold[c];
4431	    if (f != c && ANYOF_BITMAP_TEST(n, f))
4432		match = TRUE;
4433	}
4434
4435	if (!match && (flags & ANYOF_CLASS)) {
4436	    PL_reg_flags |= RF_tainted;
4437	    if (
4438		(ANYOF_CLASS_TEST(n, ANYOF_ALNUM)   &&  isALNUM_LC(c))  ||
4439		(ANYOF_CLASS_TEST(n, ANYOF_NALNUM)  && !isALNUM_LC(c))  ||
4440		(ANYOF_CLASS_TEST(n, ANYOF_SPACE)   &&  isSPACE_LC(c))  ||
4441		(ANYOF_CLASS_TEST(n, ANYOF_NSPACE)  && !isSPACE_LC(c))  ||
4442		(ANYOF_CLASS_TEST(n, ANYOF_DIGIT)   &&  isDIGIT_LC(c))  ||
4443		(ANYOF_CLASS_TEST(n, ANYOF_NDIGIT)  && !isDIGIT_LC(c))  ||
4444		(ANYOF_CLASS_TEST(n, ANYOF_ALNUMC)  &&  isALNUMC_LC(c)) ||
4445		(ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
4446		(ANYOF_CLASS_TEST(n, ANYOF_ALPHA)   &&  isALPHA_LC(c))  ||
4447		(ANYOF_CLASS_TEST(n, ANYOF_NALPHA)  && !isALPHA_LC(c))  ||
4448		(ANYOF_CLASS_TEST(n, ANYOF_ASCII)   &&  isASCII(c))     ||
4449		(ANYOF_CLASS_TEST(n, ANYOF_NASCII)  && !isASCII(c))     ||
4450		(ANYOF_CLASS_TEST(n, ANYOF_CNTRL)   &&  isCNTRL_LC(c))  ||
4451		(ANYOF_CLASS_TEST(n, ANYOF_NCNTRL)  && !isCNTRL_LC(c))  ||
4452		(ANYOF_CLASS_TEST(n, ANYOF_GRAPH)   &&  isGRAPH_LC(c))  ||
4453		(ANYOF_CLASS_TEST(n, ANYOF_NGRAPH)  && !isGRAPH_LC(c))  ||
4454		(ANYOF_CLASS_TEST(n, ANYOF_LOWER)   &&  isLOWER_LC(c))  ||
4455		(ANYOF_CLASS_TEST(n, ANYOF_NLOWER)  && !isLOWER_LC(c))  ||
4456		(ANYOF_CLASS_TEST(n, ANYOF_PRINT)   &&  isPRINT_LC(c))  ||
4457		(ANYOF_CLASS_TEST(n, ANYOF_NPRINT)  && !isPRINT_LC(c))  ||
4458		(ANYOF_CLASS_TEST(n, ANYOF_PUNCT)   &&  isPUNCT_LC(c))  ||
4459		(ANYOF_CLASS_TEST(n, ANYOF_NPUNCT)  && !isPUNCT_LC(c))  ||
4460		(ANYOF_CLASS_TEST(n, ANYOF_UPPER)   &&  isUPPER_LC(c))  ||
4461		(ANYOF_CLASS_TEST(n, ANYOF_NUPPER)  && !isUPPER_LC(c))  ||
4462		(ANYOF_CLASS_TEST(n, ANYOF_XDIGIT)  &&  isXDIGIT(c))    ||
4463		(ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c))    ||
4464		(ANYOF_CLASS_TEST(n, ANYOF_PSXSPC)  &&  isPSXSPC(c))    ||
4465		(ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c))    ||
4466		(ANYOF_CLASS_TEST(n, ANYOF_BLANK)   &&  isBLANK(c))     ||
4467		(ANYOF_CLASS_TEST(n, ANYOF_NBLANK)  && !isBLANK(c))
4468		) /* How's that for a conditional? */
4469	    {
4470		match = TRUE;
4471	    }
4472	}
4473    }
4474
4475    return (flags & ANYOF_INVERT) ? !match : match;
4476}
4477
4478STATIC U8 *
4479S_reghop(pTHX_ U8 *s, I32 off)
4480{
4481    return S_reghop3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
4482}
4483
4484STATIC U8 *
4485S_reghop3(pTHX_ U8 *s, I32 off, U8* lim)
4486{
4487    if (off >= 0) {
4488	while (off-- && s < lim) {
4489	    /* XXX could check well-formedness here */
4490	    s += UTF8SKIP(s);
4491	}
4492    }
4493    else {
4494	while (off++) {
4495	    if (s > lim) {
4496		s--;
4497		if (UTF8_IS_CONTINUED(*s)) {
4498		    while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
4499			s--;
4500		}
4501		/* XXX could check well-formedness here */
4502	    }
4503	}
4504    }
4505    return s;
4506}
4507
4508STATIC U8 *
4509S_reghopmaybe(pTHX_ U8 *s, I32 off)
4510{
4511    return S_reghopmaybe3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
4512}
4513
4514STATIC U8 *
4515S_reghopmaybe3(pTHX_ U8* s, I32 off, U8* lim)
4516{
4517    if (off >= 0) {
4518	while (off-- && s < lim) {
4519	    /* XXX could check well-formedness here */
4520	    s += UTF8SKIP(s);
4521	}
4522	if (off >= 0)
4523	    return 0;
4524    }
4525    else {
4526	while (off++) {
4527	    if (s > lim) {
4528		s--;
4529		if (UTF8_IS_CONTINUED(*s)) {
4530		    while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
4531			s--;
4532		}
4533		/* XXX could check well-formedness here */
4534	    }
4535	    else
4536		break;
4537	}
4538	if (off <= 0)
4539	    return 0;
4540    }
4541    return s;
4542}
4543
4544static void
4545restore_pos(pTHX_ void *arg)
4546{
4547    if (PL_reg_eval_set) {
4548	if (PL_reg_oldsaved) {
4549	    PL_reg_re->subbeg = PL_reg_oldsaved;
4550	    PL_reg_re->sublen = PL_reg_oldsavedlen;
4551	    RX_MATCH_COPIED_on(PL_reg_re);
4552	}
4553	PL_reg_magic->mg_len = PL_reg_oldpos;
4554	PL_reg_eval_set = 0;
4555	PL_curpm = PL_reg_oldcurpm;
4556    }
4557}
4558
4559STATIC void
4560S_to_utf8_substr(pTHX_ register regexp *prog)
4561{
4562    SV* sv;
4563    if (prog->float_substr && !prog->float_utf8) {
4564	prog->float_utf8 = sv = NEWSV(117, 0);
4565	SvSetSV(sv, prog->float_substr);
4566	sv_utf8_upgrade(sv);
4567	if (SvTAIL(prog->float_substr))
4568	    SvTAIL_on(sv);
4569	if (prog->float_substr == prog->check_substr)
4570	    prog->check_utf8 = sv;
4571    }
4572    if (prog->anchored_substr && !prog->anchored_utf8) {
4573	prog->anchored_utf8 = sv = NEWSV(118, 0);
4574	SvSetSV(sv, prog->anchored_substr);
4575	sv_utf8_upgrade(sv);
4576	if (SvTAIL(prog->anchored_substr))
4577	    SvTAIL_on(sv);
4578	if (prog->anchored_substr == prog->check_substr)
4579	    prog->check_utf8 = sv;
4580    }
4581}
4582
4583STATIC void
4584S_to_byte_substr(pTHX_ register regexp *prog)
4585{
4586    SV* sv;
4587    if (prog->float_utf8 && !prog->float_substr) {
4588	prog->float_substr = sv = NEWSV(117, 0);
4589	SvSetSV(sv, prog->float_utf8);
4590	if (sv_utf8_downgrade(sv, TRUE)) {
4591	    if (SvTAIL(prog->float_utf8))
4592		SvTAIL_on(sv);
4593	} else {
4594	    SvREFCNT_dec(sv);
4595	    prog->float_substr = sv = &PL_sv_undef;
4596	}
4597	if (prog->float_utf8 == prog->check_utf8)
4598	    prog->check_substr = sv;
4599    }
4600    if (prog->anchored_utf8 && !prog->anchored_substr) {
4601	prog->anchored_substr = sv = NEWSV(118, 0);
4602	SvSetSV(sv, prog->anchored_utf8);
4603	if (sv_utf8_downgrade(sv, TRUE)) {
4604	    if (SvTAIL(prog->anchored_utf8))
4605		SvTAIL_on(sv);
4606	} else {
4607	    SvREFCNT_dec(sv);
4608	    prog->anchored_substr = sv = &PL_sv_undef;
4609	}
4610	if (prog->anchored_utf8 == prog->check_utf8)
4611	    prog->check_substr = sv;
4612    }
4613}
4614