regexec.c revision 1.14
1/*    regexec.c
2 */
3
4/*
5 * 	One Ring to rule them all, One Ring to find them
6 &
7 *     [p.v of _The Lord of the Rings_, opening poem]
8 *     [p.50 of _The Lord of the Rings_, I/iii: "The Shadow of the Past"]
9 *     [p.254 of _The Lord of the Rings_, II/ii: "The Council of Elrond"]
10 */
11
12/* This file contains functions for executing a regular expression.  See
13 * also regcomp.c which funnily enough, contains functions for compiling
14 * a regular expression.
15 *
16 * This file is also copied at build time to ext/re/re_exec.c, where
17 * it's built with -DPERL_EXT_RE_BUILD -DPERL_EXT_RE_DEBUG -DPERL_EXT.
18 * This causes the main functions to be compiled under new names and with
19 * debugging support added, which makes "use re 'debug'" work.
20 */
21
22/* NOTE: this is derived from Henry Spencer's regexp code, and should not
23 * confused with the original package (see point 3 below).  Thanks, Henry!
24 */
25
26/* Additional note: this code is very heavily munged from Henry's version
27 * in places.  In some spots I've traded clarity for efficiency, so don't
28 * blame Henry for some of the lack of readability.
29 */
30
31/* The names of the functions have been changed from regcomp and
32 * regexec to  pregcomp and pregexec in order to avoid conflicts
33 * with the POSIX routines of the same names.
34*/
35
36#ifdef PERL_EXT_RE_BUILD
37#include "re_top.h"
38#endif
39
40/*
41 * pregcomp and pregexec -- regsub and regerror are not used in perl
42 *
43 *	Copyright (c) 1986 by University of Toronto.
44 *	Written by Henry Spencer.  Not derived from licensed software.
45 *
46 *	Permission is granted to anyone to use this software for any
47 *	purpose on any computer system, and to redistribute it freely,
48 *	subject to the following restrictions:
49 *
50 *	1. The author is not responsible for the consequences of use of
51 *		this software, no matter how awful, even if they arise
52 *		from defects in it.
53 *
54 *	2. The origin of this software must not be misrepresented, either
55 *		by explicit claim or by omission.
56 *
57 *	3. Altered versions must be plainly marked as such, and must not
58 *		be misrepresented as being the original software.
59 *
60 ****    Alterations to Henry's code are...
61 ****
62 ****    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
63 ****    2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
64 ****    by Larry Wall and others
65 ****
66 ****    You may distribute under the terms of either the GNU General Public
67 ****    License or the Artistic License, as specified in the README file.
68 *
69 * Beware that some of this code is subtly aware of the way operator
70 * precedence is structured in regular expressions.  Serious changes in
71 * regular-expression syntax might require a total rethink.
72 */
73#include "EXTERN.h"
74#define PERL_IN_REGEXEC_C
75#include "perl.h"
76
77#ifdef PERL_IN_XSUB_RE
78#  include "re_comp.h"
79#else
80#  include "regcomp.h"
81#endif
82
83#define RF_tainted	1		/* tainted information used? */
84#define RF_warned	2		/* warned about big count? */
85
86#define RF_utf8		8		/* Pattern contains multibyte chars? */
87
88#define UTF ((PL_reg_flags & RF_utf8) != 0)
89
90#define RS_init		1		/* eval environment created */
91#define RS_set		2		/* replsv value is set */
92
93#ifndef STATIC
94#define	STATIC	static
95#endif
96
97#define REGINCLASS(prog,p,c)  (ANYOF_FLAGS(p) ? reginclass(prog,p,c,0,0) : ANYOF_BITMAP_TEST(p,*(c)))
98
99/*
100 * Forwards.
101 */
102
103#define CHR_SVLEN(sv) (do_utf8 ? sv_len_utf8(sv) : SvCUR(sv))
104#define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
105
106#define HOPc(pos,off) \
107	(char *)(PL_reg_match_utf8 \
108	    ? reghop3((U8*)pos, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr)) \
109	    : (U8*)(pos + off))
110#define HOPBACKc(pos, off) \
111	(char*)(PL_reg_match_utf8\
112	    ? reghopmaybe3((U8*)pos, -off, (U8*)PL_bostr) \
113	    : (pos - off >= PL_bostr)		\
114		? (U8*)pos - off		\
115		: NULL)
116
117#define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)(pos), off, (U8*)(lim)) : (U8*)(pos + off))
118#define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
119
120/* these are unrolled below in the CCC_TRY_XXX defined */
121#define LOAD_UTF8_CHARCLASS(class,str) STMT_START { \
122    if (!CAT2(PL_utf8_,class)) { bool ok; ENTER; save_re_context(); ok=CAT2(is_utf8_,class)((const U8*)str); assert(ok); LEAVE; } } STMT_END
123
124/* Doesn't do an assert to verify that is correct */
125#define LOAD_UTF8_CHARCLASS_NO_CHECK(class) STMT_START { \
126    if (!CAT2(PL_utf8_,class)) { bool ok; ENTER; save_re_context(); ok=CAT2(is_utf8_,class)((const U8*)" "); LEAVE; } } STMT_END
127
128#define LOAD_UTF8_CHARCLASS_ALNUM() LOAD_UTF8_CHARCLASS(alnum,"a")
129#define LOAD_UTF8_CHARCLASS_DIGIT() LOAD_UTF8_CHARCLASS(digit,"0")
130#define LOAD_UTF8_CHARCLASS_SPACE() LOAD_UTF8_CHARCLASS(space," ")
131
132#define LOAD_UTF8_CHARCLASS_GCB()  /* Grapheme cluster boundaries */        \
133	LOAD_UTF8_CHARCLASS(X_begin, " ");                                  \
134	LOAD_UTF8_CHARCLASS(X_non_hangul, "A");                             \
135	/* These are utf8 constants, and not utf-ebcdic constants, so the   \
136	    * assert should likely and hopefully fail on an EBCDIC machine */ \
137	LOAD_UTF8_CHARCLASS(X_extend, "\xcc\x80"); /* U+0300 */             \
138									    \
139	/* No asserts are done for these, in case called on an early        \
140	    * Unicode version in which they map to nothing */               \
141	LOAD_UTF8_CHARCLASS_NO_CHECK(X_prepend);/* U+0E40 "\xe0\xb9\x80" */ \
142	LOAD_UTF8_CHARCLASS_NO_CHECK(X_L);	    /* U+1100 "\xe1\x84\x80" */ \
143	LOAD_UTF8_CHARCLASS_NO_CHECK(X_LV);     /* U+AC00 "\xea\xb0\x80" */ \
144	LOAD_UTF8_CHARCLASS_NO_CHECK(X_LVT);    /* U+AC01 "\xea\xb0\x81" */ \
145	LOAD_UTF8_CHARCLASS_NO_CHECK(X_LV_LVT_V);/* U+AC01 "\xea\xb0\x81" */\
146	LOAD_UTF8_CHARCLASS_NO_CHECK(X_T);      /* U+11A8 "\xe1\x86\xa8" */ \
147	LOAD_UTF8_CHARCLASS_NO_CHECK(X_V)       /* U+1160 "\xe1\x85\xa0" */
148
149/*
150   We dont use PERL_LEGACY_UNICODE_CHARCLASS_MAPPINGS as the direct test
151   so that it is possible to override the option here without having to
152   rebuild the entire core. as we are required to do if we change regcomp.h
153   which is where PERL_LEGACY_UNICODE_CHARCLASS_MAPPINGS is defined.
154*/
155#if PERL_LEGACY_UNICODE_CHARCLASS_MAPPINGS
156#define BROKEN_UNICODE_CHARCLASS_MAPPINGS
157#endif
158
159#ifdef BROKEN_UNICODE_CHARCLASS_MAPPINGS
160#define LOAD_UTF8_CHARCLASS_PERL_WORD()   LOAD_UTF8_CHARCLASS_ALNUM()
161#define LOAD_UTF8_CHARCLASS_PERL_SPACE()  LOAD_UTF8_CHARCLASS_SPACE()
162#define LOAD_UTF8_CHARCLASS_POSIX_DIGIT() LOAD_UTF8_CHARCLASS_DIGIT()
163#define RE_utf8_perl_word   PL_utf8_alnum
164#define RE_utf8_perl_space  PL_utf8_space
165#define RE_utf8_posix_digit PL_utf8_digit
166#define perl_word  alnum
167#define perl_space space
168#define posix_digit digit
169#else
170#define LOAD_UTF8_CHARCLASS_PERL_WORD()   LOAD_UTF8_CHARCLASS(perl_word,"a")
171#define LOAD_UTF8_CHARCLASS_PERL_SPACE()  LOAD_UTF8_CHARCLASS(perl_space," ")
172#define LOAD_UTF8_CHARCLASS_POSIX_DIGIT() LOAD_UTF8_CHARCLASS(posix_digit,"0")
173#define RE_utf8_perl_word   PL_utf8_perl_word
174#define RE_utf8_perl_space  PL_utf8_perl_space
175#define RE_utf8_posix_digit PL_utf8_posix_digit
176#endif
177
178
179#define CCC_TRY_AFF(NAME,NAMEL,CLASS,STR,LCFUNC_utf8,FUNC,LCFUNC)                          \
180        case NAMEL:                                                              \
181            PL_reg_flags |= RF_tainted;                                                 \
182            /* FALL THROUGH */                                                          \
183        case NAME:                                                                     \
184            if (!nextchr)                                                               \
185                sayNO;                                                                  \
186            if (do_utf8 && UTF8_IS_CONTINUED(nextchr)) {                                \
187                if (!CAT2(PL_utf8_,CLASS)) {                                            \
188                    bool ok;                                                            \
189                    ENTER;                                                              \
190                    save_re_context();                                                  \
191                    ok=CAT2(is_utf8_,CLASS)((const U8*)STR);                            \
192                    assert(ok);                                                         \
193                    LEAVE;                                                              \
194                }                                                                       \
195                if (!(OP(scan) == NAME                                                  \
196                    ? (bool)swash_fetch(CAT2(PL_utf8_,CLASS), (U8*)locinput, do_utf8)   \
197                    : LCFUNC_utf8((U8*)locinput)))                                      \
198                {                                                                       \
199                    sayNO;                                                              \
200                }                                                                       \
201                locinput += PL_utf8skip[nextchr];                                       \
202                nextchr = UCHARAT(locinput);                                            \
203                break;                                                                  \
204            }                                                                           \
205            if (!(OP(scan) == NAME ? FUNC(nextchr) : LCFUNC(nextchr)))                  \
206                sayNO;                                                                  \
207            nextchr = UCHARAT(++locinput);                                              \
208            break
209
210#define CCC_TRY_NEG(NAME,NAMEL,CLASS,STR,LCFUNC_utf8,FUNC,LCFUNC)                        \
211        case NAMEL:                                                              \
212            PL_reg_flags |= RF_tainted;                                                 \
213            /* FALL THROUGH */                                                          \
214        case NAME :                                                                     \
215            if (!nextchr && locinput >= PL_regeol)                                      \
216                sayNO;                                                                  \
217            if (do_utf8 && UTF8_IS_CONTINUED(nextchr)) {                                \
218                if (!CAT2(PL_utf8_,CLASS)) {                                            \
219                    bool ok;                                                            \
220                    ENTER;                                                              \
221                    save_re_context();                                                  \
222                    ok=CAT2(is_utf8_,CLASS)((const U8*)STR);                            \
223                    assert(ok);                                                         \
224                    LEAVE;                                                              \
225                }                                                                       \
226                if ((OP(scan) == NAME                                                  \
227                    ? (bool)swash_fetch(CAT2(PL_utf8_,CLASS), (U8*)locinput, do_utf8)    \
228                    : LCFUNC_utf8((U8*)locinput)))                                      \
229                {                                                                       \
230                    sayNO;                                                              \
231                }                                                                       \
232                locinput += PL_utf8skip[nextchr];                                       \
233                nextchr = UCHARAT(locinput);                                            \
234                break;                                                                  \
235            }                                                                           \
236            if ((OP(scan) == NAME ? FUNC(nextchr) : LCFUNC(nextchr)))                   \
237                sayNO;                                                                  \
238            nextchr = UCHARAT(++locinput);                                              \
239            break
240
241
242
243
244
245/* TODO: Combine JUMPABLE and HAS_TEXT to cache OP(rn) */
246
247/* for use after a quantifier and before an EXACT-like node -- japhy */
248/* it would be nice to rework regcomp.sym to generate this stuff. sigh */
249#define JUMPABLE(rn) (      \
250    OP(rn) == OPEN ||       \
251    (OP(rn) == CLOSE && (!cur_eval || cur_eval->u.eval.close_paren != ARG(rn))) || \
252    OP(rn) == EVAL ||   \
253    OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
254    OP(rn) == PLUS || OP(rn) == MINMOD || \
255    OP(rn) == KEEPS || (PL_regkind[OP(rn)] == VERB) || \
256    (PL_regkind[OP(rn)] == CURLY && ARG1(rn) > 0) \
257)
258#define IS_EXACT(rn) (PL_regkind[OP(rn)] == EXACT)
259
260#define HAS_TEXT(rn) ( IS_EXACT(rn) || PL_regkind[OP(rn)] == REF )
261
262#if 0
263/* Currently these are only used when PL_regkind[OP(rn)] == EXACT so
264   we don't need this definition. */
265#define IS_TEXT(rn)   ( OP(rn)==EXACT   || OP(rn)==REF   || OP(rn)==NREF   )
266#define IS_TEXTF(rn)  ( OP(rn)==EXACTF  || OP(rn)==REFF  || OP(rn)==NREFF  )
267#define IS_TEXTFL(rn) ( OP(rn)==EXACTFL || OP(rn)==REFFL || OP(rn)==NREFFL )
268
269#else
270/* ... so we use this as its faster. */
271#define IS_TEXT(rn)   ( OP(rn)==EXACT   )
272#define IS_TEXTF(rn)  ( OP(rn)==EXACTF  )
273#define IS_TEXTFL(rn) ( OP(rn)==EXACTFL )
274
275#endif
276
277/*
278  Search for mandatory following text node; for lookahead, the text must
279  follow but for lookbehind (rn->flags != 0) we skip to the next step.
280*/
281#define FIND_NEXT_IMPT(rn) STMT_START { \
282    while (JUMPABLE(rn)) { \
283	const OPCODE type = OP(rn); \
284	if (type == SUSPEND || PL_regkind[type] == CURLY) \
285	    rn = NEXTOPER(NEXTOPER(rn)); \
286	else if (type == PLUS) \
287	    rn = NEXTOPER(rn); \
288	else if (type == IFMATCH) \
289	    rn = (rn->flags == 0) ? NEXTOPER(NEXTOPER(rn)) : rn + ARG(rn); \
290	else rn += NEXT_OFF(rn); \
291    } \
292} STMT_END
293
294
295static void restore_pos(pTHX_ void *arg);
296
297STATIC CHECKPOINT
298S_regcppush(pTHX_ I32 parenfloor)
299{
300    dVAR;
301    const int retval = PL_savestack_ix;
302#define REGCP_PAREN_ELEMS 4
303    const int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
304    int p;
305    GET_RE_DEBUG_FLAGS_DECL;
306
307    if (paren_elems_to_push < 0)
308	Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
309
310#define REGCP_OTHER_ELEMS 7
311    SSGROW(paren_elems_to_push + REGCP_OTHER_ELEMS);
312
313    for (p = PL_regsize; p > parenfloor; p--) {
314/* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
315	SSPUSHINT(PL_regoffs[p].end);
316	SSPUSHINT(PL_regoffs[p].start);
317	SSPUSHPTR(PL_reg_start_tmp[p]);
318	SSPUSHINT(p);
319	DEBUG_BUFFERS_r(PerlIO_printf(Perl_debug_log,
320	  "     saving \\%"UVuf" %"IVdf"(%"IVdf")..%"IVdf"\n",
321		      (UV)p, (IV)PL_regoffs[p].start,
322		      (IV)(PL_reg_start_tmp[p] - PL_bostr),
323		      (IV)PL_regoffs[p].end
324	));
325    }
326/* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
327    SSPUSHPTR(PL_regoffs);
328    SSPUSHINT(PL_regsize);
329    SSPUSHINT(*PL_reglastparen);
330    SSPUSHINT(*PL_reglastcloseparen);
331    SSPUSHPTR(PL_reginput);
332#define REGCP_FRAME_ELEMS 2
333/* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
334 * are needed for the regexp context stack bookkeeping. */
335    SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
336    SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
337
338    return retval;
339}
340
341/* These are needed since we do not localize EVAL nodes: */
342#define REGCP_SET(cp)                                           \
343    DEBUG_STATE_r(                                              \
344            PerlIO_printf(Perl_debug_log,		        \
345	        "  Setting an EVAL scope, savestack=%"IVdf"\n",	\
346	        (IV)PL_savestack_ix));                          \
347    cp = PL_savestack_ix
348
349#define REGCP_UNWIND(cp)                                        \
350    DEBUG_STATE_r(                                              \
351        if (cp != PL_savestack_ix) 		                \
352    	    PerlIO_printf(Perl_debug_log,		        \
353		"  Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
354	        (IV)(cp), (IV)PL_savestack_ix));                \
355    regcpblow(cp)
356
357STATIC char *
358S_regcppop(pTHX_ const regexp *rex)
359{
360    dVAR;
361    U32 i;
362    char *input;
363    GET_RE_DEBUG_FLAGS_DECL;
364
365    PERL_ARGS_ASSERT_REGCPPOP;
366
367    /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
368    i = SSPOPINT;
369    assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
370    i = SSPOPINT; /* Parentheses elements to pop. */
371    input = (char *) SSPOPPTR;
372    *PL_reglastcloseparen = SSPOPINT;
373    *PL_reglastparen = SSPOPINT;
374    PL_regsize = SSPOPINT;
375    PL_regoffs=(regexp_paren_pair *) SSPOPPTR;
376
377
378    /* Now restore the parentheses context. */
379    for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
380	 i > 0; i -= REGCP_PAREN_ELEMS) {
381	I32 tmps;
382	U32 paren = (U32)SSPOPINT;
383	PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
384	PL_regoffs[paren].start = SSPOPINT;
385	tmps = SSPOPINT;
386	if (paren <= *PL_reglastparen)
387	    PL_regoffs[paren].end = tmps;
388	DEBUG_BUFFERS_r(
389	    PerlIO_printf(Perl_debug_log,
390			  "     restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
391			  (UV)paren, (IV)PL_regoffs[paren].start,
392			  (IV)(PL_reg_start_tmp[paren] - PL_bostr),
393			  (IV)PL_regoffs[paren].end,
394			  (paren > *PL_reglastparen ? "(no)" : ""));
395	);
396    }
397    DEBUG_BUFFERS_r(
398	if (*PL_reglastparen + 1 <= rex->nparens) {
399	    PerlIO_printf(Perl_debug_log,
400			  "     restoring \\%"IVdf"..\\%"IVdf" to undef\n",
401			  (IV)(*PL_reglastparen + 1), (IV)rex->nparens);
402	}
403    );
404#if 1
405    /* It would seem that the similar code in regtry()
406     * already takes care of this, and in fact it is in
407     * a better location to since this code can #if 0-ed out
408     * but the code in regtry() is needed or otherwise tests
409     * requiring null fields (pat.t#187 and split.t#{13,14}
410     * (as of patchlevel 7877)  will fail.  Then again,
411     * this code seems to be necessary or otherwise
412     * this erroneously leaves $1 defined: "1" =~ /^(?:(\d)x)?\d$/
413     * --jhi updated by dapm */
414    for (i = *PL_reglastparen + 1; i <= rex->nparens; i++) {
415	if (i > PL_regsize)
416	    PL_regoffs[i].start = -1;
417	PL_regoffs[i].end = -1;
418    }
419#endif
420    return input;
421}
422
423#define regcpblow(cp) LEAVE_SCOPE(cp)	/* Ignores regcppush()ed data. */
424
425/*
426 * pregexec and friends
427 */
428
429#ifndef PERL_IN_XSUB_RE
430/*
431 - pregexec - match a regexp against a string
432 */
433I32
434Perl_pregexec(pTHX_ REGEXP * const prog, char* stringarg, register char *strend,
435	 char *strbeg, I32 minend, SV *screamer, U32 nosave)
436/* strend: pointer to null at end of string */
437/* strbeg: real beginning of string */
438/* minend: end of match must be >=minend after stringarg. */
439/* nosave: For optimizations. */
440{
441    PERL_ARGS_ASSERT_PREGEXEC;
442
443    return
444	regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
445		      nosave ? 0 : REXEC_COPY_STR);
446}
447#endif
448
449/*
450 * Need to implement the following flags for reg_anch:
451 *
452 * USE_INTUIT_NOML		- Useful to call re_intuit_start() first
453 * USE_INTUIT_ML
454 * INTUIT_AUTORITATIVE_NOML	- Can trust a positive answer
455 * INTUIT_AUTORITATIVE_ML
456 * INTUIT_ONCE_NOML		- Intuit can match in one location only.
457 * INTUIT_ONCE_ML
458 *
459 * Another flag for this function: SECOND_TIME (so that float substrs
460 * with giant delta may be not rechecked).
461 */
462
463/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
464
465/* If SCREAM, then SvPVX_const(sv) should be compatible with strpos and strend.
466   Otherwise, only SvCUR(sv) is used to get strbeg. */
467
468/* XXXX We assume that strpos is strbeg unless sv. */
469
470/* XXXX Some places assume that there is a fixed substring.
471	An update may be needed if optimizer marks as "INTUITable"
472	RExen without fixed substrings.  Similarly, it is assumed that
473	lengths of all the strings are no more than minlen, thus they
474	cannot come from lookahead.
475	(Or minlen should take into account lookahead.)
476  NOTE: Some of this comment is not correct. minlen does now take account
477  of lookahead/behind. Further research is required. -- demerphq
478
479*/
480
481/* A failure to find a constant substring means that there is no need to make
482   an expensive call to REx engine, thus we celebrate a failure.  Similarly,
483   finding a substring too deep into the string means that less calls to
484   regtry() should be needed.
485
486   REx compiler's optimizer found 4 possible hints:
487	a) Anchored substring;
488	b) Fixed substring;
489	c) Whether we are anchored (beginning-of-line or \G);
490	d) First node (of those at offset 0) which may distingush positions;
491   We use a)b)d) and multiline-part of c), and try to find a position in the
492   string which does not contradict any of them.
493 */
494
495/* Most of decisions we do here should have been done at compile time.
496   The nodes of the REx which we used for the search should have been
497   deleted from the finite automaton. */
498
499char *
500Perl_re_intuit_start(pTHX_ REGEXP * const rx, SV *sv, char *strpos,
501		     char *strend, const U32 flags, re_scream_pos_data *data)
502{
503    dVAR;
504    struct regexp *const prog = (struct regexp *)SvANY(rx);
505    register I32 start_shift = 0;
506    /* Should be nonnegative! */
507    register I32 end_shift   = 0;
508    register char *s;
509    register SV *check;
510    char *strbeg;
511    char *t;
512    const bool do_utf8 = (sv && SvUTF8(sv)) ? 1 : 0; /* if no sv we have to assume bytes */
513    I32 ml_anch;
514    register char *other_last = NULL;	/* other substr checked before this */
515    char *check_at = NULL;		/* check substr found at this pos */
516    const I32 multiline = prog->extflags & RXf_PMf_MULTILINE;
517    RXi_GET_DECL(prog,progi);
518#ifdef DEBUGGING
519    const char * const i_strpos = strpos;
520#endif
521    GET_RE_DEBUG_FLAGS_DECL;
522
523    PERL_ARGS_ASSERT_RE_INTUIT_START;
524
525    RX_MATCH_UTF8_set(rx,do_utf8);
526
527    if (RX_UTF8(rx)) {
528	PL_reg_flags |= RF_utf8;
529    }
530    DEBUG_EXECUTE_r(
531        debug_start_match(rx, do_utf8, strpos, strend,
532            sv ? "Guessing start of match in sv for"
533               : "Guessing start of match in string for");
534	      );
535
536    /* CHR_DIST() would be more correct here but it makes things slow. */
537    if (prog->minlen > strend - strpos) {
538	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
539			      "String too short... [re_intuit_start]\n"));
540	goto fail;
541    }
542
543    strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
544    PL_regeol = strend;
545    if (do_utf8) {
546	if (!prog->check_utf8 && prog->check_substr)
547	    to_utf8_substr(prog);
548	check = prog->check_utf8;
549    } else {
550	if (!prog->check_substr && prog->check_utf8)
551	    to_byte_substr(prog);
552	check = prog->check_substr;
553    }
554    if (check == &PL_sv_undef) {
555	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
556		"Non-utf8 string cannot match utf8 check string\n"));
557	goto fail;
558    }
559    if (prog->extflags & RXf_ANCH) {	/* Match at beg-of-str or after \n */
560	ml_anch = !( (prog->extflags & RXf_ANCH_SINGLE)
561		     || ( (prog->extflags & RXf_ANCH_BOL)
562			  && !multiline ) );	/* Check after \n? */
563
564	if (!ml_anch) {
565	  if ( !(prog->extflags & RXf_ANCH_GPOS) /* Checked by the caller */
566		&& !(prog->intflags & PREGf_IMPLICIT) /* not a real BOL */
567	       /* SvCUR is not set on references: SvRV and SvPVX_const overlap */
568	       && sv && !SvROK(sv)
569	       && (strpos != strbeg)) {
570	      DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
571	      goto fail;
572	  }
573	  if (prog->check_offset_min == prog->check_offset_max &&
574	      !(prog->extflags & RXf_CANY_SEEN)) {
575	    /* Substring at constant offset from beg-of-str... */
576	    I32 slen;
577
578	    s = HOP3c(strpos, prog->check_offset_min, strend);
579
580	    if (SvTAIL(check)) {
581		slen = SvCUR(check);	/* >= 1 */
582
583		if ( strend - s > slen || strend - s < slen - 1
584		     || (strend - s == slen && strend[-1] != '\n')) {
585		    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
586		    goto fail_finish;
587		}
588		/* Now should match s[0..slen-2] */
589		slen--;
590		if (slen && (*SvPVX_const(check) != *s
591			     || (slen > 1
592				 && memNE(SvPVX_const(check), s, slen)))) {
593		  report_neq:
594		    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
595		    goto fail_finish;
596		}
597	    }
598	    else if (*SvPVX_const(check) != *s
599		     || ((slen = SvCUR(check)) > 1
600			 && memNE(SvPVX_const(check), s, slen)))
601		goto report_neq;
602	    check_at = s;
603	    goto success_at_start;
604	  }
605	}
606	/* Match is anchored, but substr is not anchored wrt beg-of-str. */
607	s = strpos;
608	start_shift = prog->check_offset_min; /* okay to underestimate on CC */
609	end_shift = prog->check_end_shift;
610
611	if (!ml_anch) {
612	    const I32 end = prog->check_offset_max + CHR_SVLEN(check)
613					 - (SvTAIL(check) != 0);
614	    const I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
615
616	    if (end_shift < eshift)
617		end_shift = eshift;
618	}
619    }
620    else {				/* Can match at random position */
621	ml_anch = 0;
622	s = strpos;
623	start_shift = prog->check_offset_min;  /* okay to underestimate on CC */
624	end_shift = prog->check_end_shift;
625
626	/* end shift should be non negative here */
627    }
628
629#ifdef QDEBUGGING	/* 7/99: reports of failure (with the older version) */
630    if (end_shift < 0)
631	Perl_croak(aTHX_ "panic: end_shift: %"IVdf" pattern:\n%s\n ",
632		   (IV)end_shift, RX_PRECOMP(prog));
633#endif
634
635  restart:
636    /* Find a possible match in the region s..strend by looking for
637       the "check" substring in the region corrected by start/end_shift. */
638
639    {
640        I32 srch_start_shift = start_shift;
641        I32 srch_end_shift = end_shift;
642        if (srch_start_shift < 0 && strbeg - s > srch_start_shift) {
643	    srch_end_shift -= ((strbeg - s) - srch_start_shift);
644	    srch_start_shift = strbeg - s;
645	}
646    DEBUG_OPTIMISE_MORE_r({
647        PerlIO_printf(Perl_debug_log, "Check offset min: %"IVdf" Start shift: %"IVdf" End shift %"IVdf" Real End Shift: %"IVdf"\n",
648            (IV)prog->check_offset_min,
649            (IV)srch_start_shift,
650            (IV)srch_end_shift,
651            (IV)prog->check_end_shift);
652    });
653
654    if (flags & REXEC_SCREAM) {
655	I32 p = -1;			/* Internal iterator of scream. */
656	I32 * const pp = data ? data->scream_pos : &p;
657
658	if (PL_screamfirst[BmRARE(check)] >= 0
659	    || ( BmRARE(check) == '\n'
660		 && (BmPREVIOUS(check) == SvCUR(check) - 1)
661		 && SvTAIL(check) ))
662	    s = screaminstr(sv, check,
663			    srch_start_shift + (s - strbeg), srch_end_shift, pp, 0);
664	else
665	    goto fail_finish;
666	/* we may be pointing at the wrong string */
667	if (s && RXp_MATCH_COPIED(prog))
668	    s = strbeg + (s - SvPVX_const(sv));
669	if (data)
670	    *data->scream_olds = s;
671    }
672    else {
673        U8* start_point;
674        U8* end_point;
675        if (prog->extflags & RXf_CANY_SEEN) {
676            start_point= (U8*)(s + srch_start_shift);
677            end_point= (U8*)(strend - srch_end_shift);
678        } else {
679	    start_point= HOP3(s, srch_start_shift, srch_start_shift < 0 ? strbeg : strend);
680            end_point= HOP3(strend, -srch_end_shift, strbeg);
681	}
682	DEBUG_OPTIMISE_MORE_r({
683            PerlIO_printf(Perl_debug_log, "fbm_instr len=%d str=<%.*s>\n",
684                (int)(end_point - start_point),
685                (int)(end_point - start_point) > 20 ? 20 : (int)(end_point - start_point),
686                start_point);
687        });
688
689	s = fbm_instr( start_point, end_point,
690		      check, multiline ? FBMrf_MULTILINE : 0);
691    }
692    }
693    /* Update the count-of-usability, remove useless subpatterns,
694	unshift s.  */
695
696    DEBUG_EXECUTE_r({
697        RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
698            SvPVX_const(check), RE_SV_DUMPLEN(check), 30);
699        PerlIO_printf(Perl_debug_log, "%s %s substr %s%s%s",
700			  (s ? "Found" : "Did not find"),
701	    (check == (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)
702	        ? "anchored" : "floating"),
703	    quoted,
704	    RE_SV_TAIL(check),
705	    (s ? " at offset " : "...\n") );
706    });
707
708    if (!s)
709	goto fail_finish;
710    /* Finish the diagnostic message */
711    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
712
713    /* XXX dmq: first branch is for positive lookbehind...
714       Our check string is offset from the beginning of the pattern.
715       So we need to do any stclass tests offset forward from that
716       point. I think. :-(
717     */
718
719
720
721    check_at=s;
722
723
724    /* Got a candidate.  Check MBOL anchoring, and the *other* substr.
725       Start with the other substr.
726       XXXX no SCREAM optimization yet - and a very coarse implementation
727       XXXX /ttx+/ results in anchored="ttx", floating="x".  floating will
728		*always* match.  Probably should be marked during compile...
729       Probably it is right to do no SCREAM here...
730     */
731
732    if (do_utf8 ? (prog->float_utf8 && prog->anchored_utf8)
733                : (prog->float_substr && prog->anchored_substr))
734    {
735	/* Take into account the "other" substring. */
736	/* XXXX May be hopelessly wrong for UTF... */
737	if (!other_last)
738	    other_last = strpos;
739	if (check == (do_utf8 ? prog->float_utf8 : prog->float_substr)) {
740	  do_other_anchored:
741	    {
742		char * const last = HOP3c(s, -start_shift, strbeg);
743		char *last1, *last2;
744		char * const saved_s = s;
745		SV* must;
746
747		t = s - prog->check_offset_max;
748		if (s - strpos > prog->check_offset_max  /* signed-corrected t > strpos */
749		    && (!do_utf8
750			|| ((t = (char*)reghopmaybe3((U8*)s, -(prog->check_offset_max), (U8*)strpos))
751			    && t > strpos)))
752		    NOOP;
753		else
754		    t = strpos;
755		t = HOP3c(t, prog->anchored_offset, strend);
756		if (t < other_last)	/* These positions already checked */
757		    t = other_last;
758		last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
759		if (last < last1)
760		    last1 = last;
761                /* XXXX It is not documented what units *_offsets are in.
762                   We assume bytes, but this is clearly wrong.
763                   Meaning this code needs to be carefully reviewed for errors.
764                   dmq.
765                  */
766
767		/* On end-of-str: see comment below. */
768		must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
769		if (must == &PL_sv_undef) {
770		    s = (char*)NULL;
771		    DEBUG_r(must = prog->anchored_utf8);	/* for debug */
772		}
773		else
774		    s = fbm_instr(
775			(unsigned char*)t,
776			HOP3(HOP3(last1, prog->anchored_offset, strend)
777				+ SvCUR(must), -(SvTAIL(must)!=0), strbeg),
778			must,
779			multiline ? FBMrf_MULTILINE : 0
780		    );
781                DEBUG_EXECUTE_r({
782                    RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
783                        SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
784                    PerlIO_printf(Perl_debug_log, "%s anchored substr %s%s",
785			(s ? "Found" : "Contradicts"),
786                        quoted, RE_SV_TAIL(must));
787                });
788
789
790		if (!s) {
791		    if (last1 >= last2) {
792			DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
793						", giving up...\n"));
794			goto fail_finish;
795		    }
796		    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
797			", trying floating at offset %ld...\n",
798			(long)(HOP3c(saved_s, 1, strend) - i_strpos)));
799		    other_last = HOP3c(last1, prog->anchored_offset+1, strend);
800		    s = HOP3c(last, 1, strend);
801		    goto restart;
802		}
803		else {
804		    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
805			  (long)(s - i_strpos)));
806		    t = HOP3c(s, -prog->anchored_offset, strbeg);
807		    other_last = HOP3c(s, 1, strend);
808		    s = saved_s;
809		    if (t == strpos)
810			goto try_at_start;
811		    goto try_at_offset;
812		}
813	    }
814	}
815	else {		/* Take into account the floating substring. */
816	    char *last, *last1;
817	    char * const saved_s = s;
818	    SV* must;
819
820	    t = HOP3c(s, -start_shift, strbeg);
821	    last1 = last =
822		HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
823	    if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
824		last = HOP3c(t, prog->float_max_offset, strend);
825	    s = HOP3c(t, prog->float_min_offset, strend);
826	    if (s < other_last)
827		s = other_last;
828 /* XXXX It is not documented what units *_offsets are in.  Assume bytes.  */
829	    must = do_utf8 ? prog->float_utf8 : prog->float_substr;
830	    /* fbm_instr() takes into account exact value of end-of-str
831	       if the check is SvTAIL(ed).  Since false positives are OK,
832	       and end-of-str is not later than strend we are OK. */
833	    if (must == &PL_sv_undef) {
834		s = (char*)NULL;
835		DEBUG_r(must = prog->float_utf8);	/* for debug message */
836	    }
837	    else
838		s = fbm_instr((unsigned char*)s,
839			      (unsigned char*)last + SvCUR(must)
840				  - (SvTAIL(must)!=0),
841			      must, multiline ? FBMrf_MULTILINE : 0);
842	    DEBUG_EXECUTE_r({
843	        RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
844	            SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
845	        PerlIO_printf(Perl_debug_log, "%s floating substr %s%s",
846		    (s ? "Found" : "Contradicts"),
847		    quoted, RE_SV_TAIL(must));
848            });
849	    if (!s) {
850		if (last1 == last) {
851		    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
852					    ", giving up...\n"));
853		    goto fail_finish;
854		}
855		DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
856		    ", trying anchored starting at offset %ld...\n",
857		    (long)(saved_s + 1 - i_strpos)));
858		other_last = last;
859		s = HOP3c(t, 1, strend);
860		goto restart;
861	    }
862	    else {
863		DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
864		      (long)(s - i_strpos)));
865		other_last = s; /* Fix this later. --Hugo */
866		s = saved_s;
867		if (t == strpos)
868		    goto try_at_start;
869		goto try_at_offset;
870	    }
871	}
872    }
873
874
875    t= (char*)HOP3( s, -prog->check_offset_max, (prog->check_offset_max<0) ? strend : strpos);
876
877    DEBUG_OPTIMISE_MORE_r(
878        PerlIO_printf(Perl_debug_log,
879            "Check offset min:%"IVdf" max:%"IVdf" S:%"IVdf" t:%"IVdf" D:%"IVdf" end:%"IVdf"\n",
880            (IV)prog->check_offset_min,
881            (IV)prog->check_offset_max,
882            (IV)(s-strpos),
883            (IV)(t-strpos),
884            (IV)(t-s),
885            (IV)(strend-strpos)
886        )
887    );
888
889    if (s - strpos > prog->check_offset_max  /* signed-corrected t > strpos */
890        && (!do_utf8
891	    || ((t = (char*)reghopmaybe3((U8*)s, -prog->check_offset_max, (U8*) ((prog->check_offset_max<0) ? strend : strpos)))
892		 && t > strpos)))
893    {
894	/* Fixed substring is found far enough so that the match
895	   cannot start at strpos. */
896      try_at_offset:
897	if (ml_anch && t[-1] != '\n') {
898	    /* Eventually fbm_*() should handle this, but often
899	       anchored_offset is not 0, so this check will not be wasted. */
900	    /* XXXX In the code below we prefer to look for "^" even in
901	       presence of anchored substrings.  And we search even
902	       beyond the found float position.  These pessimizations
903	       are historical artefacts only.  */
904	  find_anchor:
905	    while (t < strend - prog->minlen) {
906		if (*t == '\n') {
907		    if (t < check_at - prog->check_offset_min) {
908			if (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) {
909			    /* Since we moved from the found position,
910			       we definitely contradict the found anchored
911			       substr.  Due to the above check we do not
912			       contradict "check" substr.
913			       Thus we can arrive here only if check substr
914			       is float.  Redo checking for "other"=="fixed".
915			     */
916			    strpos = t + 1;
917			    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
918				PL_colors[0], PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
919			    goto do_other_anchored;
920			}
921			/* We don't contradict the found floating substring. */
922			/* XXXX Why not check for STCLASS? */
923			s = t + 1;
924			DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
925			    PL_colors[0], PL_colors[1], (long)(s - i_strpos)));
926			goto set_useful;
927		    }
928		    /* Position contradicts check-string */
929		    /* XXXX probably better to look for check-string
930		       than for "\n", so one should lower the limit for t? */
931		    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
932			PL_colors[0], PL_colors[1], (long)(t + 1 - i_strpos)));
933		    other_last = strpos = s = t + 1;
934		    goto restart;
935		}
936		t++;
937	    }
938	    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
939			PL_colors[0], PL_colors[1]));
940	    goto fail_finish;
941	}
942	else {
943	    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
944			PL_colors[0], PL_colors[1]));
945	}
946	s = t;
947      set_useful:
948	++BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr);	/* hooray/5 */
949    }
950    else {
951	/* The found string does not prohibit matching at strpos,
952	   - no optimization of calling REx engine can be performed,
953	   unless it was an MBOL and we are not after MBOL,
954	   or a future STCLASS check will fail this. */
955      try_at_start:
956	/* Even in this situation we may use MBOL flag if strpos is offset
957	   wrt the start of the string. */
958	if (ml_anch && sv && !SvROK(sv)	/* See prev comment on SvROK */
959	    && (strpos != strbeg) && strpos[-1] != '\n'
960	    /* May be due to an implicit anchor of m{.*foo}  */
961	    && !(prog->intflags & PREGf_IMPLICIT))
962	{
963	    t = strpos;
964	    goto find_anchor;
965	}
966	DEBUG_EXECUTE_r( if (ml_anch)
967	    PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
968			  (long)(strpos - i_strpos), PL_colors[0], PL_colors[1]);
969	);
970      success_at_start:
971	if (!(prog->intflags & PREGf_NAUGHTY)	/* XXXX If strpos moved? */
972	    && (do_utf8 ? (
973		prog->check_utf8		/* Could be deleted already */
974		&& --BmUSEFUL(prog->check_utf8) < 0
975		&& (prog->check_utf8 == prog->float_utf8)
976	    ) : (
977		prog->check_substr		/* Could be deleted already */
978		&& --BmUSEFUL(prog->check_substr) < 0
979		&& (prog->check_substr == prog->float_substr)
980	    )))
981	{
982	    /* If flags & SOMETHING - do not do it many times on the same match */
983	    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
984	    /* XXX Does the destruction order has to change with do_utf8? */
985	    SvREFCNT_dec(do_utf8 ? prog->check_utf8 : prog->check_substr);
986	    SvREFCNT_dec(do_utf8 ? prog->check_substr : prog->check_utf8);
987	    prog->check_substr = prog->check_utf8 = NULL;	/* disable */
988	    prog->float_substr = prog->float_utf8 = NULL;	/* clear */
989	    check = NULL;			/* abort */
990	    s = strpos;
991	    /* XXXX This is a remnant of the old implementation.  It
992	            looks wasteful, since now INTUIT can use many
993	            other heuristics. */
994	    prog->extflags &= ~RXf_USE_INTUIT;
995	}
996	else
997	    s = strpos;
998    }
999
1000    /* Last resort... */
1001    /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
1002    /* trie stclasses are too expensive to use here, we are better off to
1003       leave it to regmatch itself */
1004    if (progi->regstclass && PL_regkind[OP(progi->regstclass)]!=TRIE) {
1005	/* minlen == 0 is possible if regstclass is \b or \B,
1006	   and the fixed substr is ''$.
1007	   Since minlen is already taken into account, s+1 is before strend;
1008	   accidentally, minlen >= 1 guaranties no false positives at s + 1
1009	   even for \b or \B.  But (minlen? 1 : 0) below assumes that
1010	   regstclass does not come from lookahead...  */
1011	/* If regstclass takes bytelength more than 1: If charlength==1, OK.
1012	   This leaves EXACTF only, which is dealt with in find_byclass().  */
1013        const U8* const str = (U8*)STRING(progi->regstclass);
1014        const int cl_l = (PL_regkind[OP(progi->regstclass)] == EXACT
1015		    ? CHR_DIST(str+STR_LEN(progi->regstclass), str)
1016		    : 1);
1017	char * endpos;
1018	if (prog->anchored_substr || prog->anchored_utf8 || ml_anch)
1019            endpos= HOP3c(s, (prog->minlen ? cl_l : 0), strend);
1020        else if (prog->float_substr || prog->float_utf8)
1021	    endpos= HOP3c(HOP3c(check_at, -start_shift, strbeg), cl_l, strend);
1022        else
1023            endpos= strend;
1024
1025        DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "start_shift: %"IVdf" check_at: %"IVdf" s: %"IVdf" endpos: %"IVdf"\n",
1026				      (IV)start_shift, (IV)(check_at - strbeg), (IV)(s - strbeg), (IV)(endpos - strbeg)));
1027
1028	t = s;
1029        s = find_byclass(prog, progi->regstclass, s, endpos, NULL);
1030	if (!s) {
1031#ifdef DEBUGGING
1032	    const char *what = NULL;
1033#endif
1034	    if (endpos == strend) {
1035		DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
1036				"Could not match STCLASS...\n") );
1037		goto fail;
1038	    }
1039	    DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
1040				   "This position contradicts STCLASS...\n") );
1041	    if ((prog->extflags & RXf_ANCH) && !ml_anch)
1042		goto fail;
1043	    /* Contradict one of substrings */
1044	    if (prog->anchored_substr || prog->anchored_utf8) {
1045		if ((do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) == check) {
1046		    DEBUG_EXECUTE_r( what = "anchored" );
1047		  hop_and_restart:
1048		    s = HOP3c(t, 1, strend);
1049		    if (s + start_shift + end_shift > strend) {
1050			/* XXXX Should be taken into account earlier? */
1051			DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
1052					       "Could not match STCLASS...\n") );
1053			goto fail;
1054		    }
1055		    if (!check)
1056			goto giveup;
1057		    DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
1058				"Looking for %s substr starting at offset %ld...\n",
1059				 what, (long)(s + start_shift - i_strpos)) );
1060		    goto restart;
1061		}
1062		/* Have both, check_string is floating */
1063		if (t + start_shift >= check_at) /* Contradicts floating=check */
1064		    goto retry_floating_check;
1065		/* Recheck anchored substring, but not floating... */
1066		s = check_at;
1067		if (!check)
1068		    goto giveup;
1069		DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
1070			  "Looking for anchored substr starting at offset %ld...\n",
1071			  (long)(other_last - i_strpos)) );
1072		goto do_other_anchored;
1073	    }
1074	    /* Another way we could have checked stclass at the
1075               current position only: */
1076	    if (ml_anch) {
1077		s = t = t + 1;
1078		if (!check)
1079		    goto giveup;
1080		DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
1081			  "Looking for /%s^%s/m starting at offset %ld...\n",
1082			  PL_colors[0], PL_colors[1], (long)(t - i_strpos)) );
1083		goto try_at_offset;
1084	    }
1085	    if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))	/* Could have been deleted */
1086		goto fail;
1087	    /* Check is floating subtring. */
1088	  retry_floating_check:
1089	    t = check_at - start_shift;
1090	    DEBUG_EXECUTE_r( what = "floating" );
1091	    goto hop_and_restart;
1092	}
1093	if (t != s) {
1094            DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
1095			"By STCLASS: moving %ld --> %ld\n",
1096                                  (long)(t - i_strpos), (long)(s - i_strpos))
1097                   );
1098        }
1099        else {
1100            DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
1101                                  "Does not contradict STCLASS...\n");
1102                   );
1103        }
1104    }
1105  giveup:
1106    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
1107			  PL_colors[4], (check ? "Guessed" : "Giving up"),
1108			  PL_colors[5], (long)(s - i_strpos)) );
1109    return s;
1110
1111  fail_finish:				/* Substring not found */
1112    if (prog->check_substr || prog->check_utf8)		/* could be removed already */
1113	BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr) += 5; /* hooray */
1114  fail:
1115    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
1116			  PL_colors[4], PL_colors[5]));
1117    return NULL;
1118}
1119
1120#define DECL_TRIE_TYPE(scan) \
1121    const enum { trie_plain, trie_utf8, trie_utf8_fold, trie_latin_utf8_fold } \
1122		    trie_type = (scan->flags != EXACT) \
1123		              ? (do_utf8 ? trie_utf8_fold : (UTF ? trie_latin_utf8_fold : trie_plain)) \
1124                              : (do_utf8 ? trie_utf8 : trie_plain)
1125
1126#define REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc, uscan, len,  \
1127uvc, charid, foldlen, foldbuf, uniflags) STMT_START {                       \
1128    switch (trie_type) {                                                    \
1129    case trie_utf8_fold:                                                    \
1130	if ( foldlen>0 ) {                                                  \
1131	    uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags ); \
1132	    foldlen -= len;                                                 \
1133	    uscan += len;                                                   \
1134	    len=0;                                                          \
1135	} else {                                                            \
1136	    uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags ); \
1137	    uvc = to_uni_fold( uvc, foldbuf, &foldlen );                    \
1138	    foldlen -= UNISKIP( uvc );                                      \
1139	    uscan = foldbuf + UNISKIP( uvc );                               \
1140	}                                                                   \
1141	break;                                                              \
1142    case trie_latin_utf8_fold:                                              \
1143	if ( foldlen>0 ) {                                                  \
1144	    uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags );     \
1145	    foldlen -= len;                                                 \
1146	    uscan += len;                                                   \
1147	    len=0;                                                          \
1148	} else {                                                            \
1149	    len = 1;                                                        \
1150	    uvc = to_uni_fold( *(U8*)uc, foldbuf, &foldlen );               \
1151	    foldlen -= UNISKIP( uvc );                                      \
1152	    uscan = foldbuf + UNISKIP( uvc );                               \
1153	}                                                                   \
1154	break;                                                              \
1155    case trie_utf8:                                                         \
1156	uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags );       \
1157	break;                                                              \
1158    case trie_plain:                                                        \
1159	uvc = (UV)*uc;                                                      \
1160	len = 1;                                                            \
1161    }                                                                       \
1162    if (uvc < 256) {                                                        \
1163	charid = trie->charmap[ uvc ];                                      \
1164    }                                                                       \
1165    else {                                                                  \
1166	charid = 0;                                                         \
1167	if (widecharmap) {                                                  \
1168	    SV** const svpp = hv_fetch(widecharmap,                         \
1169			(char*)&uvc, sizeof(UV), 0);                        \
1170	    if (svpp)                                                       \
1171		charid = (U16)SvIV(*svpp);                                  \
1172	}                                                                   \
1173    }                                                                       \
1174} STMT_END
1175
1176#define REXEC_FBC_EXACTISH_CHECK(CoNd)                 \
1177{                                                      \
1178    char *my_strend= (char *)strend;                   \
1179    if ( (CoNd)                                        \
1180	 && (ln == len ||                              \
1181	     !ibcmp_utf8(s, &my_strend, 0,  do_utf8,   \
1182			m, NULL, ln, (bool)UTF))       \
1183	 && (!reginfo || regtry(reginfo, &s)) )        \
1184	goto got_it;                                   \
1185    else {                                             \
1186	 U8 foldbuf[UTF8_MAXBYTES_CASE+1];             \
1187	 uvchr_to_utf8(tmpbuf, c);                     \
1188	 f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);  \
1189	 if ( f != c                                   \
1190	      && (f == c1 || f == c2)                  \
1191	      && (ln == len ||                         \
1192	        !ibcmp_utf8(s, &my_strend, 0,  do_utf8,\
1193			      m, NULL, ln, (bool)UTF)) \
1194	      && (!reginfo || regtry(reginfo, &s)) )   \
1195	      goto got_it;                             \
1196    }                                                  \
1197}                                                      \
1198s += len
1199
1200#define REXEC_FBC_EXACTISH_SCAN(CoNd)                     \
1201STMT_START {                                              \
1202    while (s <= e) {                                      \
1203	if ( (CoNd)                                       \
1204	     && (ln == 1 || !(OP(c) == EXACTF             \
1205			      ? ibcmp(s, m, ln)           \
1206			      : ibcmp_locale(s, m, ln)))  \
1207	     && (!reginfo || regtry(reginfo, &s)) )        \
1208	    goto got_it;                                  \
1209	s++;                                              \
1210    }                                                     \
1211} STMT_END
1212
1213#define REXEC_FBC_UTF8_SCAN(CoDe)                     \
1214STMT_START {                                          \
1215    while (s + (uskip = UTF8SKIP(s)) <= strend) {     \
1216	CoDe                                          \
1217	s += uskip;                                   \
1218    }                                                 \
1219} STMT_END
1220
1221#define REXEC_FBC_SCAN(CoDe)                          \
1222STMT_START {                                          \
1223    while (s < strend) {                              \
1224	CoDe                                          \
1225	s++;                                          \
1226    }                                                 \
1227} STMT_END
1228
1229#define REXEC_FBC_UTF8_CLASS_SCAN(CoNd)               \
1230REXEC_FBC_UTF8_SCAN(                                  \
1231    if (CoNd) {                                       \
1232	if (tmp && (!reginfo || regtry(reginfo, &s)))  \
1233	    goto got_it;                              \
1234	else                                          \
1235	    tmp = doevery;                            \
1236    }                                                 \
1237    else                                              \
1238	tmp = 1;                                      \
1239)
1240
1241#define REXEC_FBC_CLASS_SCAN(CoNd)                    \
1242REXEC_FBC_SCAN(                                       \
1243    if (CoNd) {                                       \
1244	if (tmp && (!reginfo || regtry(reginfo, &s)))  \
1245	    goto got_it;                              \
1246	else                                          \
1247	    tmp = doevery;                            \
1248    }                                                 \
1249    else                                              \
1250	tmp = 1;                                      \
1251)
1252
1253#define REXEC_FBC_TRYIT               \
1254if ((!reginfo || regtry(reginfo, &s))) \
1255    goto got_it
1256
1257#define REXEC_FBC_CSCAN(CoNdUtF8,CoNd)                         \
1258    if (do_utf8) {                                             \
1259	REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8);                   \
1260    }                                                          \
1261    else {                                                     \
1262	REXEC_FBC_CLASS_SCAN(CoNd);                            \
1263    }                                                          \
1264    break
1265
1266#define REXEC_FBC_CSCAN_PRELOAD(UtFpReLoAd,CoNdUtF8,CoNd)      \
1267    if (do_utf8) {                                             \
1268	UtFpReLoAd;                                            \
1269	REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8);                   \
1270    }                                                          \
1271    else {                                                     \
1272	REXEC_FBC_CLASS_SCAN(CoNd);                            \
1273    }                                                          \
1274    break
1275
1276#define REXEC_FBC_CSCAN_TAINT(CoNdUtF8,CoNd)                   \
1277    PL_reg_flags |= RF_tainted;                                \
1278    if (do_utf8) {                                             \
1279	REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8);                   \
1280    }                                                          \
1281    else {                                                     \
1282	REXEC_FBC_CLASS_SCAN(CoNd);                            \
1283    }                                                          \
1284    break
1285
1286#define DUMP_EXEC_POS(li,s,doutf8) \
1287    dump_exec_pos(li,s,(PL_regeol),(PL_bostr),(PL_reg_starttry),doutf8)
1288
1289/* We know what class REx starts with.  Try to find this position... */
1290/* if reginfo is NULL, its a dryrun */
1291/* annoyingly all the vars in this routine have different names from their counterparts
1292   in regmatch. /grrr */
1293
1294STATIC char *
1295S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
1296    const char *strend, regmatch_info *reginfo)
1297{
1298	dVAR;
1299	const I32 doevery = (prog->intflags & PREGf_SKIP) == 0;
1300	char *m;
1301	STRLEN ln;
1302	STRLEN lnc;
1303	register STRLEN uskip;
1304	unsigned int c1;
1305	unsigned int c2;
1306	char *e;
1307	register I32 tmp = 1;	/* Scratch variable? */
1308	register const bool do_utf8 = PL_reg_match_utf8;
1309        RXi_GET_DECL(prog,progi);
1310
1311	PERL_ARGS_ASSERT_FIND_BYCLASS;
1312
1313	/* We know what class it must start with. */
1314	switch (OP(c)) {
1315	case ANYOF:
1316	    if (do_utf8) {
1317		 REXEC_FBC_UTF8_CLASS_SCAN((ANYOF_FLAGS(c) & ANYOF_UNICODE) ||
1318			  !UTF8_IS_INVARIANT((U8)s[0]) ?
1319			  reginclass(prog, c, (U8*)s, 0, do_utf8) :
1320			  REGINCLASS(prog, c, (U8*)s));
1321	    }
1322	    else {
1323		 while (s < strend) {
1324		      STRLEN skip = 1;
1325
1326		      if (REGINCLASS(prog, c, (U8*)s) ||
1327			  (ANYOF_FOLD_SHARP_S(c, s, strend) &&
1328			   /* The assignment of 2 is intentional:
1329			    * for the folded sharp s, the skip is 2. */
1330			   (skip = SHARP_S_SKIP))) {
1331			   if (tmp && (!reginfo || regtry(reginfo, &s)))
1332				goto got_it;
1333			   else
1334				tmp = doevery;
1335		      }
1336		      else
1337			   tmp = 1;
1338		      s += skip;
1339		 }
1340	    }
1341	    break;
1342	case CANY:
1343	    REXEC_FBC_SCAN(
1344	        if (tmp && (!reginfo || regtry(reginfo, &s)))
1345		    goto got_it;
1346		else
1347		    tmp = doevery;
1348	    );
1349	    break;
1350	case EXACTF:
1351	    m   = STRING(c);
1352	    ln  = STR_LEN(c);	/* length to match in octets/bytes */
1353	    lnc = (I32) ln;	/* length to match in characters */
1354	    if (UTF) {
1355	        STRLEN ulen1, ulen2;
1356		U8 *sm = (U8 *) m;
1357		U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
1358		U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
1359		/* used by commented-out code below */
1360		/*const U32 uniflags = UTF8_ALLOW_DEFAULT;*/
1361
1362                /* XXX: Since the node will be case folded at compile
1363                   time this logic is a little odd, although im not
1364                   sure that its actually wrong. --dmq */
1365
1366		c1 = to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
1367		c2 = to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
1368
1369		/* XXX: This is kinda strange. to_utf8_XYZ returns the
1370                   codepoint of the first character in the converted
1371                   form, yet originally we did the extra step.
1372                   No tests fail by commenting this code out however
1373                   so Ive left it out. -- dmq.
1374
1375		c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXBYTES_CASE,
1376				    0, uniflags);
1377		c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXBYTES_CASE,
1378				    0, uniflags);
1379                */
1380
1381		lnc = 0;
1382		while (sm < ((U8 *) m + ln)) {
1383		    lnc++;
1384		    sm += UTF8SKIP(sm);
1385		}
1386	    }
1387	    else {
1388		c1 = *(U8*)m;
1389		c2 = PL_fold[c1];
1390	    }
1391	    goto do_exactf;
1392	case EXACTFL:
1393	    m   = STRING(c);
1394	    ln  = STR_LEN(c);
1395	    lnc = (I32) ln;
1396	    c1 = *(U8*)m;
1397	    c2 = PL_fold_locale[c1];
1398	  do_exactf:
1399	    e = HOP3c(strend, -((I32)lnc), s);
1400
1401	    if (!reginfo && e < s)
1402		e = s;			/* Due to minlen logic of intuit() */
1403
1404	    /* The idea in the EXACTF* cases is to first find the
1405	     * first character of the EXACTF* node and then, if
1406	     * necessary, case-insensitively compare the full
1407	     * text of the node.  The c1 and c2 are the first
1408	     * characters (though in Unicode it gets a bit
1409	     * more complicated because there are more cases
1410	     * than just upper and lower: one needs to use
1411	     * the so-called folding case for case-insensitive
1412	     * matching (called "loose matching" in Unicode).
1413	     * ibcmp_utf8() will do just that. */
1414
1415	    if (do_utf8 || UTF) {
1416	        UV c, f;
1417	        U8 tmpbuf [UTF8_MAXBYTES+1];
1418		STRLEN len = 1;
1419		STRLEN foldlen;
1420		const U32 uniflags = UTF8_ALLOW_DEFAULT;
1421		if (c1 == c2) {
1422		    /* Upper and lower of 1st char are equal -
1423		     * probably not a "letter". */
1424		    while (s <= e) {
1425		        if (do_utf8) {
1426		            c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
1427					   uniflags);
1428                        } else {
1429                            c = *((U8*)s);
1430                        }
1431			REXEC_FBC_EXACTISH_CHECK(c == c1);
1432		    }
1433		}
1434		else {
1435		    while (s <= e) {
1436		        if (do_utf8) {
1437		            c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
1438					   uniflags);
1439                        } else {
1440                            c = *((U8*)s);
1441                        }
1442
1443			/* Handle some of the three Greek sigmas cases.
1444			 * Note that not all the possible combinations
1445			 * are handled here: some of them are handled
1446			 * by the standard folding rules, and some of
1447			 * them (the character class or ANYOF cases)
1448			 * are handled during compiletime in
1449			 * regexec.c:S_regclass(). */
1450			if (c == (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA ||
1451			    c == (UV)UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA)
1452			    c = (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA;
1453
1454			REXEC_FBC_EXACTISH_CHECK(c == c1 || c == c2);
1455		    }
1456		}
1457	    }
1458	    else {
1459	        /* Neither pattern nor string are UTF8 */
1460		if (c1 == c2)
1461		    REXEC_FBC_EXACTISH_SCAN(*(U8*)s == c1);
1462		else
1463		    REXEC_FBC_EXACTISH_SCAN(*(U8*)s == c1 || *(U8*)s == c2);
1464	    }
1465	    break;
1466	case BOUNDL:
1467	    PL_reg_flags |= RF_tainted;
1468	    /* FALL THROUGH */
1469	case BOUND:
1470	    if (do_utf8) {
1471		if (s == PL_bostr)
1472		    tmp = '\n';
1473		else {
1474		    U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
1475		    tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT);
1476		}
1477		tmp = ((OP(c) == BOUND ?
1478			isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1479		LOAD_UTF8_CHARCLASS_ALNUM();
1480		REXEC_FBC_UTF8_SCAN(
1481		    if (tmp == !(OP(c) == BOUND ?
1482				 (bool)swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
1483				 isALNUM_LC_utf8((U8*)s)))
1484		    {
1485			tmp = !tmp;
1486			REXEC_FBC_TRYIT;
1487		}
1488		);
1489	    }
1490	    else {
1491		tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
1492		tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1493		REXEC_FBC_SCAN(
1494		    if (tmp ==
1495			!(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
1496			tmp = !tmp;
1497			REXEC_FBC_TRYIT;
1498		}
1499		);
1500	    }
1501	    if ((!prog->minlen && tmp) && (!reginfo || regtry(reginfo, &s)))
1502		goto got_it;
1503	    break;
1504	case NBOUNDL:
1505	    PL_reg_flags |= RF_tainted;
1506	    /* FALL THROUGH */
1507	case NBOUND:
1508	    if (do_utf8) {
1509		if (s == PL_bostr)
1510		    tmp = '\n';
1511		else {
1512		    U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
1513		    tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT);
1514		}
1515		tmp = ((OP(c) == NBOUND ?
1516			isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1517		LOAD_UTF8_CHARCLASS_ALNUM();
1518		REXEC_FBC_UTF8_SCAN(
1519		    if (tmp == !(OP(c) == NBOUND ?
1520				 (bool)swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
1521				 isALNUM_LC_utf8((U8*)s)))
1522			tmp = !tmp;
1523		    else REXEC_FBC_TRYIT;
1524		);
1525	    }
1526	    else {
1527		tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
1528		tmp = ((OP(c) == NBOUND ?
1529			isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1530		REXEC_FBC_SCAN(
1531		    if (tmp ==
1532			!(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1533			tmp = !tmp;
1534		    else REXEC_FBC_TRYIT;
1535		);
1536	    }
1537	    if ((!prog->minlen && !tmp) && (!reginfo || regtry(reginfo, &s)))
1538		goto got_it;
1539	    break;
1540	case ALNUM:
1541	    REXEC_FBC_CSCAN_PRELOAD(
1542		LOAD_UTF8_CHARCLASS_PERL_WORD(),
1543		swash_fetch(RE_utf8_perl_word, (U8*)s, do_utf8),
1544		isALNUM(*s)
1545	    );
1546	case ALNUML:
1547	    REXEC_FBC_CSCAN_TAINT(
1548		isALNUM_LC_utf8((U8*)s),
1549		isALNUM_LC(*s)
1550	    );
1551	case NALNUM:
1552	    REXEC_FBC_CSCAN_PRELOAD(
1553		LOAD_UTF8_CHARCLASS_PERL_WORD(),
1554		!swash_fetch(RE_utf8_perl_word, (U8*)s, do_utf8),
1555		!isALNUM(*s)
1556	    );
1557	case NALNUML:
1558	    REXEC_FBC_CSCAN_TAINT(
1559		!isALNUM_LC_utf8((U8*)s),
1560		!isALNUM_LC(*s)
1561	    );
1562	case SPACE:
1563	    REXEC_FBC_CSCAN_PRELOAD(
1564		LOAD_UTF8_CHARCLASS_PERL_SPACE(),
1565		*s == ' ' || swash_fetch(RE_utf8_perl_space,(U8*)s, do_utf8),
1566		isSPACE(*s)
1567	    );
1568	case SPACEL:
1569	    REXEC_FBC_CSCAN_TAINT(
1570		*s == ' ' || isSPACE_LC_utf8((U8*)s),
1571		isSPACE_LC(*s)
1572	    );
1573	case NSPACE:
1574	    REXEC_FBC_CSCAN_PRELOAD(
1575		LOAD_UTF8_CHARCLASS_PERL_SPACE(),
1576		!(*s == ' ' || swash_fetch(RE_utf8_perl_space,(U8*)s, do_utf8)),
1577		!isSPACE(*s)
1578	    );
1579	case NSPACEL:
1580	    REXEC_FBC_CSCAN_TAINT(
1581		!(*s == ' ' || isSPACE_LC_utf8((U8*)s)),
1582		!isSPACE_LC(*s)
1583	    );
1584	case DIGIT:
1585	    REXEC_FBC_CSCAN_PRELOAD(
1586		LOAD_UTF8_CHARCLASS_POSIX_DIGIT(),
1587		swash_fetch(RE_utf8_posix_digit,(U8*)s, do_utf8),
1588		isDIGIT(*s)
1589	    );
1590	case DIGITL:
1591	    REXEC_FBC_CSCAN_TAINT(
1592		isDIGIT_LC_utf8((U8*)s),
1593		isDIGIT_LC(*s)
1594	    );
1595	case NDIGIT:
1596	    REXEC_FBC_CSCAN_PRELOAD(
1597		LOAD_UTF8_CHARCLASS_POSIX_DIGIT(),
1598		!swash_fetch(RE_utf8_posix_digit,(U8*)s, do_utf8),
1599		!isDIGIT(*s)
1600	    );
1601	case NDIGITL:
1602	    REXEC_FBC_CSCAN_TAINT(
1603		!isDIGIT_LC_utf8((U8*)s),
1604		!isDIGIT_LC(*s)
1605	    );
1606	case LNBREAK:
1607	    REXEC_FBC_CSCAN(
1608		is_LNBREAK_utf8(s),
1609		is_LNBREAK_latin1(s)
1610	    );
1611	case VERTWS:
1612	    REXEC_FBC_CSCAN(
1613		is_VERTWS_utf8(s),
1614		is_VERTWS_latin1(s)
1615	    );
1616	case NVERTWS:
1617	    REXEC_FBC_CSCAN(
1618		!is_VERTWS_utf8(s),
1619		!is_VERTWS_latin1(s)
1620	    );
1621	case HORIZWS:
1622	    REXEC_FBC_CSCAN(
1623		is_HORIZWS_utf8(s),
1624		is_HORIZWS_latin1(s)
1625	    );
1626	case NHORIZWS:
1627	    REXEC_FBC_CSCAN(
1628		!is_HORIZWS_utf8(s),
1629		!is_HORIZWS_latin1(s)
1630	    );
1631	case AHOCORASICKC:
1632	case AHOCORASICK:
1633	    {
1634	        DECL_TRIE_TYPE(c);
1635                /* what trie are we using right now */
1636        	reg_ac_data *aho
1637        	    = (reg_ac_data*)progi->data->data[ ARG( c ) ];
1638        	reg_trie_data *trie
1639		    = (reg_trie_data*)progi->data->data[ aho->trie ];
1640		HV *widecharmap = MUTABLE_HV(progi->data->data[ aho->trie + 1 ]);
1641
1642		const char *last_start = strend - trie->minlen;
1643#ifdef DEBUGGING
1644		const char *real_start = s;
1645#endif
1646		STRLEN maxlen = trie->maxlen;
1647		SV *sv_points;
1648		U8 **points; /* map of where we were in the input string
1649		                when reading a given char. For ASCII this
1650		                is unnecessary overhead as the relationship
1651		                is always 1:1, but for Unicode, especially
1652		                case folded Unicode this is not true. */
1653		U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
1654		U8 *bitmap=NULL;
1655
1656
1657                GET_RE_DEBUG_FLAGS_DECL;
1658
1659                /* We can't just allocate points here. We need to wrap it in
1660                 * an SV so it gets freed properly if there is a croak while
1661                 * running the match */
1662                ENTER;
1663	        SAVETMPS;
1664                sv_points=newSV(maxlen * sizeof(U8 *));
1665                SvCUR_set(sv_points,
1666                    maxlen * sizeof(U8 *));
1667                SvPOK_on(sv_points);
1668                sv_2mortal(sv_points);
1669                points=(U8**)SvPV_nolen(sv_points );
1670                if ( trie_type != trie_utf8_fold
1671                     && (trie->bitmap || OP(c)==AHOCORASICKC) )
1672                {
1673                    if (trie->bitmap)
1674                        bitmap=(U8*)trie->bitmap;
1675                    else
1676                        bitmap=(U8*)ANYOF_BITMAP(c);
1677                }
1678                /* this is the Aho-Corasick algorithm modified a touch
1679                   to include special handling for long "unknown char"
1680                   sequences. The basic idea being that we use AC as long
1681                   as we are dealing with a possible matching char, when
1682                   we encounter an unknown char (and we have not encountered
1683                   an accepting state) we scan forward until we find a legal
1684                   starting char.
1685                   AC matching is basically that of trie matching, except
1686                   that when we encounter a failing transition, we fall back
1687                   to the current states "fail state", and try the current char
1688                   again, a process we repeat until we reach the root state,
1689                   state 1, or a legal transition. If we fail on the root state
1690                   then we can either terminate if we have reached an accepting
1691                   state previously, or restart the entire process from the beginning
1692                   if we have not.
1693
1694                 */
1695                while (s <= last_start) {
1696                    const U32 uniflags = UTF8_ALLOW_DEFAULT;
1697                    U8 *uc = (U8*)s;
1698                    U16 charid = 0;
1699                    U32 base = 1;
1700                    U32 state = 1;
1701                    UV uvc = 0;
1702                    STRLEN len = 0;
1703                    STRLEN foldlen = 0;
1704                    U8 *uscan = (U8*)NULL;
1705                    U8 *leftmost = NULL;
1706#ifdef DEBUGGING
1707                    U32 accepted_word= 0;
1708#endif
1709                    U32 pointpos = 0;
1710
1711                    while ( state && uc <= (U8*)strend ) {
1712                        int failed=0;
1713                        U32 word = aho->states[ state ].wordnum;
1714
1715                        if( state==1 ) {
1716                            if ( bitmap ) {
1717                                DEBUG_TRIE_EXECUTE_r(
1718                                    if ( uc <= (U8*)last_start && !BITMAP_TEST(bitmap,*uc) ) {
1719                                        dump_exec_pos( (char *)uc, c, strend, real_start,
1720                                            (char *)uc, do_utf8 );
1721                                        PerlIO_printf( Perl_debug_log,
1722                                            " Scanning for legal start char...\n");
1723                                    }
1724                                );
1725                                while ( uc <= (U8*)last_start  && !BITMAP_TEST(bitmap,*uc) ) {
1726                                    uc++;
1727                                }
1728                                s= (char *)uc;
1729                            }
1730                            if (uc >(U8*)last_start) break;
1731                        }
1732
1733                        if ( word ) {
1734                            U8 *lpos= points[ (pointpos - trie->wordlen[word-1] ) % maxlen ];
1735                            if (!leftmost || lpos < leftmost) {
1736                                DEBUG_r(accepted_word=word);
1737                                leftmost= lpos;
1738                            }
1739                            if (base==0) break;
1740
1741                        }
1742                        points[pointpos++ % maxlen]= uc;
1743			REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
1744					     uscan, len, uvc, charid, foldlen,
1745					     foldbuf, uniflags);
1746                        DEBUG_TRIE_EXECUTE_r({
1747                            dump_exec_pos( (char *)uc, c, strend, real_start,
1748                                s,   do_utf8 );
1749                            PerlIO_printf(Perl_debug_log,
1750                                " Charid:%3u CP:%4"UVxf" ",
1751                                 charid, uvc);
1752                        });
1753
1754                        do {
1755#ifdef DEBUGGING
1756                            word = aho->states[ state ].wordnum;
1757#endif
1758                            base = aho->states[ state ].trans.base;
1759
1760                            DEBUG_TRIE_EXECUTE_r({
1761                                if (failed)
1762                                    dump_exec_pos( (char *)uc, c, strend, real_start,
1763                                        s,   do_utf8 );
1764                                PerlIO_printf( Perl_debug_log,
1765                                    "%sState: %4"UVxf", word=%"UVxf,
1766                                    failed ? " Fail transition to " : "",
1767                                    (UV)state, (UV)word);
1768                            });
1769                            if ( base ) {
1770                                U32 tmp;
1771                                if (charid &&
1772                                     (base + charid > trie->uniquecharcount )
1773                                     && (base + charid - 1 - trie->uniquecharcount
1774                                            < trie->lasttrans)
1775                                     && trie->trans[base + charid - 1 -
1776                                            trie->uniquecharcount].check == state
1777                                     && (tmp=trie->trans[base + charid - 1 -
1778                                        trie->uniquecharcount ].next))
1779                                {
1780                                    DEBUG_TRIE_EXECUTE_r(
1781                                        PerlIO_printf( Perl_debug_log," - legal\n"));
1782                                    state = tmp;
1783                                    break;
1784                                }
1785                                else {
1786                                    DEBUG_TRIE_EXECUTE_r(
1787                                        PerlIO_printf( Perl_debug_log," - fail\n"));
1788                                    failed = 1;
1789                                    state = aho->fail[state];
1790                                }
1791                            }
1792                            else {
1793                                /* we must be accepting here */
1794                                DEBUG_TRIE_EXECUTE_r(
1795                                        PerlIO_printf( Perl_debug_log," - accepting\n"));
1796                                failed = 1;
1797                                break;
1798                            }
1799                        } while(state);
1800                        uc += len;
1801                        if (failed) {
1802                            if (leftmost)
1803                                break;
1804                            if (!state) state = 1;
1805                        }
1806                    }
1807                    if ( aho->states[ state ].wordnum ) {
1808                        U8 *lpos = points[ (pointpos - trie->wordlen[aho->states[ state ].wordnum-1]) % maxlen ];
1809                        if (!leftmost || lpos < leftmost) {
1810                            DEBUG_r(accepted_word=aho->states[ state ].wordnum);
1811                            leftmost = lpos;
1812                        }
1813                    }
1814                    if (leftmost) {
1815                        s = (char*)leftmost;
1816                        DEBUG_TRIE_EXECUTE_r({
1817                            PerlIO_printf(
1818                                Perl_debug_log,"Matches word #%"UVxf" at position %"IVdf". Trying full pattern...\n",
1819                                (UV)accepted_word, (IV)(s - real_start)
1820                            );
1821                        });
1822                        if (!reginfo || regtry(reginfo, &s)) {
1823                            FREETMPS;
1824		            LEAVE;
1825                            goto got_it;
1826                        }
1827                        s = HOPc(s,1);
1828                        DEBUG_TRIE_EXECUTE_r({
1829                            PerlIO_printf( Perl_debug_log,"Pattern failed. Looking for new start point...\n");
1830                        });
1831                    } else {
1832                        DEBUG_TRIE_EXECUTE_r(
1833                            PerlIO_printf( Perl_debug_log,"No match.\n"));
1834                        break;
1835                    }
1836                }
1837                FREETMPS;
1838                LEAVE;
1839	    }
1840	    break;
1841	default:
1842	    Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1843	    break;
1844	}
1845	return 0;
1846      got_it:
1847	return s;
1848}
1849
1850
1851/*
1852 - regexec_flags - match a regexp against a string
1853 */
1854I32
1855Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, register char *strend,
1856	      char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1857/* strend: pointer to null at end of string */
1858/* strbeg: real beginning of string */
1859/* minend: end of match must be >=minend after stringarg. */
1860/* data: May be used for some additional optimizations.
1861         Currently its only used, with a U32 cast, for transmitting
1862         the ganch offset when doing a /g match. This will change */
1863/* nosave: For optimizations. */
1864{
1865    dVAR;
1866    struct regexp *const prog = (struct regexp *)SvANY(rx);
1867    /*register*/ char *s;
1868    register regnode *c;
1869    /*register*/ char *startpos = stringarg;
1870    I32 minlen;		/* must match at least this many chars */
1871    I32 dontbother = 0;	/* how many characters not to try at end */
1872    I32 end_shift = 0;			/* Same for the end. */		/* CC */
1873    I32 scream_pos = -1;		/* Internal iterator of scream. */
1874    char *scream_olds = NULL;
1875    const bool do_utf8 = (bool)DO_UTF8(sv);
1876    I32 multiline;
1877    RXi_GET_DECL(prog,progi);
1878    regmatch_info reginfo;  /* create some info to pass to regtry etc */
1879    regexp_paren_pair *swap = NULL;
1880    GET_RE_DEBUG_FLAGS_DECL;
1881
1882    PERL_ARGS_ASSERT_REGEXEC_FLAGS;
1883    PERL_UNUSED_ARG(data);
1884
1885    /* Be paranoid... */
1886    if (prog == NULL || startpos == NULL) {
1887	Perl_croak(aTHX_ "NULL regexp parameter");
1888	return 0;
1889    }
1890
1891    multiline = prog->extflags & RXf_PMf_MULTILINE;
1892    reginfo.prog = rx;	 /* Yes, sorry that this is confusing.  */
1893
1894    RX_MATCH_UTF8_set(rx, do_utf8);
1895    DEBUG_EXECUTE_r(
1896        debug_start_match(rx, do_utf8, startpos, strend,
1897        "Matching");
1898    );
1899
1900    minlen = prog->minlen;
1901
1902    if (strend - startpos < (minlen+(prog->check_offset_min<0?prog->check_offset_min:0))) {
1903        DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
1904			      "String too short [regexec_flags]...\n"));
1905	goto phooey;
1906    }
1907
1908
1909    /* Check validity of program. */
1910    if (UCHARAT(progi->program) != REG_MAGIC) {
1911	Perl_croak(aTHX_ "corrupted regexp program");
1912    }
1913
1914    PL_reg_flags = 0;
1915    PL_reg_eval_set = 0;
1916    PL_reg_maxiter = 0;
1917
1918    if (RX_UTF8(rx))
1919	PL_reg_flags |= RF_utf8;
1920
1921    /* Mark beginning of line for ^ and lookbehind. */
1922    reginfo.bol = startpos; /* XXX not used ??? */
1923    PL_bostr  = strbeg;
1924    reginfo.sv = sv;
1925
1926    /* Mark end of line for $ (and such) */
1927    PL_regeol = strend;
1928
1929    /* see how far we have to get to not match where we matched before */
1930    reginfo.till = startpos+minend;
1931
1932    /* If there is a "must appear" string, look for it. */
1933    s = startpos;
1934
1935    if (prog->extflags & RXf_GPOS_SEEN) { /* Need to set reginfo->ganch */
1936	MAGIC *mg;
1937	if (flags & REXEC_IGNOREPOS){	/* Means: check only at start */
1938	    reginfo.ganch = startpos + prog->gofs;
1939	    DEBUG_GPOS_r(PerlIO_printf(Perl_debug_log,
1940	      "GPOS IGNOREPOS: reginfo.ganch = startpos + %"UVxf"\n",(UV)prog->gofs));
1941	} else if (sv && SvTYPE(sv) >= SVt_PVMG
1942		  && SvMAGIC(sv)
1943		  && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1944		  && mg->mg_len >= 0) {
1945	    reginfo.ganch = strbeg + mg->mg_len;	/* Defined pos() */
1946	    DEBUG_GPOS_r(PerlIO_printf(Perl_debug_log,
1947		"GPOS MAGIC: reginfo.ganch = strbeg + %"IVdf"\n",(IV)mg->mg_len));
1948
1949	    if (prog->extflags & RXf_ANCH_GPOS) {
1950	        if (s > reginfo.ganch)
1951		    goto phooey;
1952		s = reginfo.ganch - prog->gofs;
1953	        DEBUG_GPOS_r(PerlIO_printf(Perl_debug_log,
1954		     "GPOS ANCH_GPOS: s = ganch - %"UVxf"\n",(UV)prog->gofs));
1955		if (s < strbeg)
1956		    goto phooey;
1957	    }
1958	}
1959	else if (data) {
1960	    reginfo.ganch = strbeg + PTR2UV(data);
1961            DEBUG_GPOS_r(PerlIO_printf(Perl_debug_log,
1962		 "GPOS DATA: reginfo.ganch= strbeg + %"UVxf"\n",PTR2UV(data)));
1963
1964	} else {				/* pos() not defined */
1965	    reginfo.ganch = strbeg;
1966            DEBUG_GPOS_r(PerlIO_printf(Perl_debug_log,
1967		 "GPOS: reginfo.ganch = strbeg\n"));
1968	}
1969    }
1970    if (PL_curpm && (PM_GETRE(PL_curpm) == rx)) {
1971        /* We have to be careful. If the previous successful match
1972           was from this regex we don't want a subsequent partially
1973           successful match to clobber the old results.
1974           So when we detect this possibility we add a swap buffer
1975           to the re, and switch the buffer each match. If we fail
1976           we switch it back, otherwise we leave it swapped.
1977        */
1978        swap = prog->offs;
1979        /* do we need a save destructor here for eval dies? */
1980        Newxz(prog->offs, (prog->nparens + 1), regexp_paren_pair);
1981    }
1982    if (!(flags & REXEC_CHECKED) && (prog->check_substr != NULL || prog->check_utf8 != NULL)) {
1983	re_scream_pos_data d;
1984
1985	d.scream_olds = &scream_olds;
1986	d.scream_pos = &scream_pos;
1987	s = re_intuit_start(rx, sv, s, strend, flags, &d);
1988	if (!s) {
1989	    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
1990	    goto phooey;	/* not present */
1991	}
1992    }
1993
1994
1995
1996    /* Simplest case:  anchored match need be tried only once. */
1997    /*  [unless only anchor is BOL and multiline is set] */
1998    if (prog->extflags & (RXf_ANCH & ~RXf_ANCH_GPOS)) {
1999	if (s == startpos && regtry(&reginfo, &startpos))
2000	    goto got_it;
2001	else if (multiline || (prog->intflags & PREGf_IMPLICIT)
2002		 || (prog->extflags & RXf_ANCH_MBOL)) /* XXXX SBOL? */
2003	{
2004	    char *end;
2005
2006	    if (minlen)
2007		dontbother = minlen - 1;
2008	    end = HOP3c(strend, -dontbother, strbeg) - 1;
2009	    /* for multiline we only have to try after newlines */
2010	    if (prog->check_substr || prog->check_utf8) {
2011                /* because of the goto we can not easily reuse the macros for bifurcating the
2012                   unicode/non-unicode match modes here like we do elsewhere - demerphq */
2013                if (do_utf8) {
2014                    if (s == startpos)
2015                        goto after_try_utf8;
2016                    while (1) {
2017                        if (regtry(&reginfo, &s)) {
2018                            goto got_it;
2019                        }
2020                      after_try_utf8:
2021                        if (s > end) {
2022                            goto phooey;
2023                        }
2024                        if (prog->extflags & RXf_USE_INTUIT) {
2025                            s = re_intuit_start(rx, sv, s + UTF8SKIP(s), strend, flags, NULL);
2026                            if (!s) {
2027                                goto phooey;
2028                            }
2029                        }
2030                        else {
2031                            s += UTF8SKIP(s);
2032                        }
2033                    }
2034                } /* end search for check string in unicode */
2035                else {
2036                    if (s == startpos) {
2037                        goto after_try_latin;
2038                    }
2039                    while (1) {
2040                        if (regtry(&reginfo, &s)) {
2041                            goto got_it;
2042                        }
2043                      after_try_latin:
2044                        if (s > end) {
2045                            goto phooey;
2046                        }
2047                        if (prog->extflags & RXf_USE_INTUIT) {
2048                            s = re_intuit_start(rx, sv, s + 1, strend, flags, NULL);
2049                            if (!s) {
2050                                goto phooey;
2051                            }
2052                        }
2053                        else {
2054                            s++;
2055                        }
2056                    }
2057                } /* end search for check string in latin*/
2058	    } /* end search for check string */
2059	    else { /* search for newline */
2060		if (s > startpos) {
2061                    /*XXX: The s-- is almost definitely wrong here under unicode - demeprhq*/
2062		    s--;
2063		}
2064                /* We can use a more efficient search as newlines are the same in unicode as they are in latin */
2065		while (s < end) {
2066		    if (*s++ == '\n') {	/* don't need PL_utf8skip here */
2067			if (regtry(&reginfo, &s))
2068			    goto got_it;
2069		    }
2070		}
2071	    } /* end search for newline */
2072	} /* end anchored/multiline check string search */
2073	goto phooey;
2074    } else if (RXf_GPOS_CHECK == (prog->extflags & RXf_GPOS_CHECK))
2075    {
2076        /* the warning about reginfo.ganch being used without intialization
2077           is bogus -- we set it above, when prog->extflags & RXf_GPOS_SEEN
2078           and we only enter this block when the same bit is set. */
2079        char *tmp_s = reginfo.ganch - prog->gofs;
2080
2081	if (tmp_s >= strbeg && regtry(&reginfo, &tmp_s))
2082	    goto got_it;
2083	goto phooey;
2084    }
2085
2086    /* Messy cases:  unanchored match. */
2087    if ((prog->anchored_substr || prog->anchored_utf8) && prog->intflags & PREGf_SKIP) {
2088	/* we have /x+whatever/ */
2089	/* it must be a one character string (XXXX Except UTF?) */
2090	char ch;
2091#ifdef DEBUGGING
2092	int did_match = 0;
2093#endif
2094	if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
2095	    do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
2096	ch = SvPVX_const(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)[0];
2097
2098	if (do_utf8) {
2099	    REXEC_FBC_SCAN(
2100		if (*s == ch) {
2101		    DEBUG_EXECUTE_r( did_match = 1 );
2102		    if (regtry(&reginfo, &s)) goto got_it;
2103		    s += UTF8SKIP(s);
2104		    while (s < strend && *s == ch)
2105			s += UTF8SKIP(s);
2106		}
2107	    );
2108	}
2109	else {
2110	    REXEC_FBC_SCAN(
2111		if (*s == ch) {
2112		    DEBUG_EXECUTE_r( did_match = 1 );
2113		    if (regtry(&reginfo, &s)) goto got_it;
2114		    s++;
2115		    while (s < strend && *s == ch)
2116			s++;
2117		}
2118	    );
2119	}
2120	DEBUG_EXECUTE_r(if (!did_match)
2121		PerlIO_printf(Perl_debug_log,
2122                                  "Did not find anchored character...\n")
2123               );
2124    }
2125    else if (prog->anchored_substr != NULL
2126	      || prog->anchored_utf8 != NULL
2127	      || ((prog->float_substr != NULL || prog->float_utf8 != NULL)
2128		  && prog->float_max_offset < strend - s)) {
2129	SV *must;
2130	I32 back_max;
2131	I32 back_min;
2132	char *last;
2133	char *last1;		/* Last position checked before */
2134#ifdef DEBUGGING
2135	int did_match = 0;
2136#endif
2137	if (prog->anchored_substr || prog->anchored_utf8) {
2138	    if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
2139		do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
2140	    must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
2141	    back_max = back_min = prog->anchored_offset;
2142	} else {
2143	    if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
2144		do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
2145	    must = do_utf8 ? prog->float_utf8 : prog->float_substr;
2146	    back_max = prog->float_max_offset;
2147	    back_min = prog->float_min_offset;
2148	}
2149
2150
2151	if (must == &PL_sv_undef)
2152	    /* could not downgrade utf8 check substring, so must fail */
2153	    goto phooey;
2154
2155        if (back_min<0) {
2156	    last = strend;
2157	} else {
2158            last = HOP3c(strend,	/* Cannot start after this */
2159        	  -(I32)(CHR_SVLEN(must)
2160        		 - (SvTAIL(must) != 0) + back_min), strbeg);
2161        }
2162	if (s > PL_bostr)
2163	    last1 = HOPc(s, -1);
2164	else
2165	    last1 = s - 1;	/* bogus */
2166
2167	/* XXXX check_substr already used to find "s", can optimize if
2168	   check_substr==must. */
2169	scream_pos = -1;
2170	dontbother = end_shift;
2171	strend = HOPc(strend, -dontbother);
2172	while ( (s <= last) &&
2173		((flags & REXEC_SCREAM)
2174		 ? (s = screaminstr(sv, must, HOP3c(s, back_min, (back_min<0 ? strbeg : strend)) - strbeg,
2175				    end_shift, &scream_pos, 0))
2176		 : (s = fbm_instr((unsigned char*)HOP3(s, back_min, (back_min<0 ? strbeg : strend)),
2177				  (unsigned char*)strend, must,
2178				  multiline ? FBMrf_MULTILINE : 0))) ) {
2179	    /* we may be pointing at the wrong string */
2180	    if ((flags & REXEC_SCREAM) && RXp_MATCH_COPIED(prog))
2181		s = strbeg + (s - SvPVX_const(sv));
2182	    DEBUG_EXECUTE_r( did_match = 1 );
2183	    if (HOPc(s, -back_max) > last1) {
2184		last1 = HOPc(s, -back_min);
2185		s = HOPc(s, -back_max);
2186	    }
2187	    else {
2188		char * const t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
2189
2190		last1 = HOPc(s, -back_min);
2191		s = t;
2192	    }
2193	    if (do_utf8) {
2194		while (s <= last1) {
2195		    if (regtry(&reginfo, &s))
2196			goto got_it;
2197		    s += UTF8SKIP(s);
2198		}
2199	    }
2200	    else {
2201		while (s <= last1) {
2202		    if (regtry(&reginfo, &s))
2203			goto got_it;
2204		    s++;
2205		}
2206	    }
2207	}
2208	DEBUG_EXECUTE_r(if (!did_match) {
2209            RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
2210                SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
2211            PerlIO_printf(Perl_debug_log, "Did not find %s substr %s%s...\n",
2212			      ((must == prog->anchored_substr || must == prog->anchored_utf8)
2213			       ? "anchored" : "floating"),
2214                quoted, RE_SV_TAIL(must));
2215        });
2216	goto phooey;
2217    }
2218    else if ( (c = progi->regstclass) ) {
2219	if (minlen) {
2220	    const OPCODE op = OP(progi->regstclass);
2221	    /* don't bother with what can't match */
2222	    if (PL_regkind[op] != EXACT && op != CANY && PL_regkind[op] != TRIE)
2223	        strend = HOPc(strend, -(minlen - 1));
2224	}
2225	DEBUG_EXECUTE_r({
2226	    SV * const prop = sv_newmortal();
2227	    regprop(prog, prop, c);
2228	    {
2229		RE_PV_QUOTED_DECL(quoted,do_utf8,PERL_DEBUG_PAD_ZERO(1),
2230		    s,strend-s,60);
2231		PerlIO_printf(Perl_debug_log,
2232		    "Matching stclass %.*s against %s (%d chars)\n",
2233		    (int)SvCUR(prop), SvPVX_const(prop),
2234		     quoted, (int)(strend - s));
2235	    }
2236	});
2237        if (find_byclass(prog, c, s, strend, &reginfo))
2238	    goto got_it;
2239	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass... [regexec_flags]\n"));
2240    }
2241    else {
2242	dontbother = 0;
2243	if (prog->float_substr != NULL || prog->float_utf8 != NULL) {
2244	    /* Trim the end. */
2245	    char *last;
2246	    SV* float_real;
2247
2248	    if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
2249		do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
2250	    float_real = do_utf8 ? prog->float_utf8 : prog->float_substr;
2251
2252	    if (flags & REXEC_SCREAM) {
2253		last = screaminstr(sv, float_real, s - strbeg,
2254				   end_shift, &scream_pos, 1); /* last one */
2255		if (!last)
2256		    last = scream_olds; /* Only one occurrence. */
2257		/* we may be pointing at the wrong string */
2258		else if (RXp_MATCH_COPIED(prog))
2259		    s = strbeg + (s - SvPVX_const(sv));
2260	    }
2261	    else {
2262		STRLEN len;
2263                const char * const little = SvPV_const(float_real, len);
2264
2265		if (SvTAIL(float_real)) {
2266		    if (memEQ(strend - len + 1, little, len - 1))
2267			last = strend - len + 1;
2268		    else if (!multiline)
2269			last = memEQ(strend - len, little, len)
2270			    ? strend - len : NULL;
2271		    else
2272			goto find_last;
2273		} else {
2274		  find_last:
2275		    if (len)
2276			last = rninstr(s, strend, little, little + len);
2277		    else
2278			last = strend;	/* matching "$" */
2279		}
2280	    }
2281	    if (last == NULL) {
2282		DEBUG_EXECUTE_r(
2283		    PerlIO_printf(Perl_debug_log,
2284			"%sCan't trim the tail, match fails (should not happen)%s\n",
2285	                PL_colors[4], PL_colors[5]));
2286		goto phooey; /* Should not happen! */
2287	    }
2288	    dontbother = strend - last + prog->float_min_offset;
2289	}
2290	if (minlen && (dontbother < minlen))
2291	    dontbother = minlen - 1;
2292	strend -= dontbother; 		   /* this one's always in bytes! */
2293	/* We don't know much -- general case. */
2294	if (do_utf8) {
2295	    for (;;) {
2296		if (regtry(&reginfo, &s))
2297		    goto got_it;
2298		if (s >= strend)
2299		    break;
2300		s += UTF8SKIP(s);
2301	    };
2302	}
2303	else {
2304	    do {
2305		if (regtry(&reginfo, &s))
2306		    goto got_it;
2307	    } while (s++ < strend);
2308	}
2309    }
2310
2311    /* Failure. */
2312    goto phooey;
2313
2314got_it:
2315    Safefree(swap);
2316    RX_MATCH_TAINTED_set(rx, PL_reg_flags & RF_tainted);
2317
2318    if (PL_reg_eval_set)
2319	restore_pos(aTHX_ prog);
2320    if (RXp_PAREN_NAMES(prog))
2321        (void)hv_iterinit(RXp_PAREN_NAMES(prog));
2322
2323    /* make sure $`, $&, $', and $digit will work later */
2324    if ( !(flags & REXEC_NOT_FIRST) ) {
2325	RX_MATCH_COPY_FREE(rx);
2326	if (flags & REXEC_COPY_STR) {
2327	    const I32 i = PL_regeol - startpos + (stringarg - strbeg);
2328#ifdef PERL_OLD_COPY_ON_WRITE
2329	    if ((SvIsCOW(sv)
2330		 || (SvFLAGS(sv) & CAN_COW_MASK) == CAN_COW_FLAGS)) {
2331		if (DEBUG_C_TEST) {
2332		    PerlIO_printf(Perl_debug_log,
2333				  "Copy on write: regexp capture, type %d\n",
2334				  (int) SvTYPE(sv));
2335		}
2336		prog->saved_copy = sv_setsv_cow(prog->saved_copy, sv);
2337		prog->subbeg = (char *)SvPVX_const(prog->saved_copy);
2338		assert (SvPOKp(prog->saved_copy));
2339	    } else
2340#endif
2341	    {
2342		RX_MATCH_COPIED_on(rx);
2343		s = savepvn(strbeg, i);
2344		prog->subbeg = s;
2345	    }
2346	    prog->sublen = i;
2347	}
2348	else {
2349	    prog->subbeg = strbeg;
2350	    prog->sublen = PL_regeol - strbeg;	/* strend may have been modified */
2351	}
2352    }
2353
2354    return 1;
2355
2356phooey:
2357    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
2358			  PL_colors[4], PL_colors[5]));
2359    if (PL_reg_eval_set)
2360	restore_pos(aTHX_ prog);
2361    if (swap) {
2362        /* we failed :-( roll it back */
2363        Safefree(prog->offs);
2364        prog->offs = swap;
2365    }
2366
2367    return 0;
2368}
2369
2370
2371/*
2372 - regtry - try match at specific point
2373 */
2374STATIC I32			/* 0 failure, 1 success */
2375S_regtry(pTHX_ regmatch_info *reginfo, char **startpos)
2376{
2377    dVAR;
2378    CHECKPOINT lastcp;
2379    REGEXP *const rx = reginfo->prog;
2380    regexp *const prog = (struct regexp *)SvANY(rx);
2381    RXi_GET_DECL(prog,progi);
2382    GET_RE_DEBUG_FLAGS_DECL;
2383
2384    PERL_ARGS_ASSERT_REGTRY;
2385
2386    reginfo->cutpoint=NULL;
2387
2388    if ((prog->extflags & RXf_EVAL_SEEN) && !PL_reg_eval_set) {
2389	MAGIC *mg;
2390
2391	PL_reg_eval_set = RS_init;
2392	DEBUG_EXECUTE_r(DEBUG_s(
2393	    PerlIO_printf(Perl_debug_log, "  setting stack tmpbase at %"IVdf"\n",
2394			  (IV)(PL_stack_sp - PL_stack_base));
2395	    ));
2396	SAVESTACK_CXPOS();
2397	cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
2398	/* Otherwise OP_NEXTSTATE will free whatever on stack now.  */
2399	SAVETMPS;
2400	/* Apparently this is not needed, judging by wantarray. */
2401	/* SAVEI8(cxstack[cxstack_ix].blk_gimme);
2402	   cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
2403
2404	if (reginfo->sv) {
2405	    /* Make $_ available to executed code. */
2406	    if (reginfo->sv != DEFSV) {
2407		SAVE_DEFSV;
2408		DEFSV_set(reginfo->sv);
2409	    }
2410
2411	    if (!(SvTYPE(reginfo->sv) >= SVt_PVMG && SvMAGIC(reginfo->sv)
2412		  && (mg = mg_find(reginfo->sv, PERL_MAGIC_regex_global)))) {
2413		/* prepare for quick setting of pos */
2414#ifdef PERL_OLD_COPY_ON_WRITE
2415		if (SvIsCOW(reginfo->sv))
2416		    sv_force_normal_flags(reginfo->sv, 0);
2417#endif
2418		mg = sv_magicext(reginfo->sv, NULL, PERL_MAGIC_regex_global,
2419				 &PL_vtbl_mglob, NULL, 0);
2420		mg->mg_len = -1;
2421	    }
2422	    PL_reg_magic    = mg;
2423	    PL_reg_oldpos   = mg->mg_len;
2424	    SAVEDESTRUCTOR_X(restore_pos, prog);
2425        }
2426        if (!PL_reg_curpm) {
2427	    Newxz(PL_reg_curpm, 1, PMOP);
2428#ifdef USE_ITHREADS
2429            {
2430		SV* const repointer = &PL_sv_undef;
2431                /* this regexp is also owned by the new PL_reg_curpm, which
2432		   will try to free it.  */
2433                av_push(PL_regex_padav, repointer);
2434                PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
2435                PL_regex_pad = AvARRAY(PL_regex_padav);
2436            }
2437#endif
2438        }
2439#ifdef USE_ITHREADS
2440	/* It seems that non-ithreads works both with and without this code.
2441	   So for efficiency reasons it seems best not to have the code
2442	   compiled when it is not needed.  */
2443	/* This is safe against NULLs: */
2444	ReREFCNT_dec(PM_GETRE(PL_reg_curpm));
2445	/* PM_reg_curpm owns a reference to this regexp.  */
2446	ReREFCNT_inc(rx);
2447#endif
2448	PM_SETRE(PL_reg_curpm, rx);
2449	PL_reg_oldcurpm = PL_curpm;
2450	PL_curpm = PL_reg_curpm;
2451	if (RXp_MATCH_COPIED(prog)) {
2452	    /*  Here is a serious problem: we cannot rewrite subbeg,
2453		since it may be needed if this match fails.  Thus
2454		$` inside (?{}) could fail... */
2455	    PL_reg_oldsaved = prog->subbeg;
2456	    PL_reg_oldsavedlen = prog->sublen;
2457#ifdef PERL_OLD_COPY_ON_WRITE
2458	    PL_nrs = prog->saved_copy;
2459#endif
2460	    RXp_MATCH_COPIED_off(prog);
2461	}
2462	else
2463	    PL_reg_oldsaved = NULL;
2464	prog->subbeg = PL_bostr;
2465	prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
2466    }
2467    DEBUG_EXECUTE_r(PL_reg_starttry = *startpos);
2468    prog->offs[0].start = *startpos - PL_bostr;
2469    PL_reginput = *startpos;
2470    PL_reglastparen = &prog->lastparen;
2471    PL_reglastcloseparen = &prog->lastcloseparen;
2472    prog->lastparen = 0;
2473    prog->lastcloseparen = 0;
2474    PL_regsize = 0;
2475    PL_regoffs = prog->offs;
2476    if (PL_reg_start_tmpl <= prog->nparens) {
2477	PL_reg_start_tmpl = prog->nparens*3/2 + 3;
2478        if(PL_reg_start_tmp)
2479            Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2480        else
2481            Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2482    }
2483
2484    /* XXXX What this code is doing here?!!!  There should be no need
2485       to do this again and again, PL_reglastparen should take care of
2486       this!  --ilya*/
2487
2488    /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
2489     * Actually, the code in regcppop() (which Ilya may be meaning by
2490     * PL_reglastparen), is not needed at all by the test suite
2491     * (op/regexp, op/pat, op/split), but that code is needed otherwise
2492     * this erroneously leaves $1 defined: "1" =~ /^(?:(\d)x)?\d$/
2493     * Meanwhile, this code *is* needed for the
2494     * above-mentioned test suite tests to succeed.  The common theme
2495     * on those tests seems to be returning null fields from matches.
2496     * --jhi updated by dapm */
2497#if 1
2498    if (prog->nparens) {
2499	regexp_paren_pair *pp = PL_regoffs;
2500	register I32 i;
2501	for (i = prog->nparens; i > (I32)*PL_reglastparen; i--) {
2502	    ++pp;
2503	    pp->start = -1;
2504	    pp->end = -1;
2505	}
2506    }
2507#endif
2508    REGCP_SET(lastcp);
2509    if (regmatch(reginfo, progi->program + 1)) {
2510	PL_regoffs[0].end = PL_reginput - PL_bostr;
2511	return 1;
2512    }
2513    if (reginfo->cutpoint)
2514        *startpos= reginfo->cutpoint;
2515    REGCP_UNWIND(lastcp);
2516    return 0;
2517}
2518
2519
2520#define sayYES goto yes
2521#define sayNO goto no
2522#define sayNO_SILENT goto no_silent
2523
2524/* we dont use STMT_START/END here because it leads to
2525   "unreachable code" warnings, which are bogus, but distracting. */
2526#define CACHEsayNO \
2527    if (ST.cache_mask) \
2528       PL_reg_poscache[ST.cache_offset] |= ST.cache_mask; \
2529    sayNO
2530
2531/* this is used to determine how far from the left messages like
2532   'failed...' are printed. It should be set such that messages
2533   are inline with the regop output that created them.
2534*/
2535#define REPORT_CODE_OFF 32
2536
2537
2538/* Make sure there is a test for this +1 options in re_tests */
2539#define TRIE_INITAL_ACCEPT_BUFFLEN 4;
2540
2541#define CHRTEST_UNINIT -1001 /* c1/c2 haven't been calculated yet */
2542#define CHRTEST_VOID   -1000 /* the c1/c2 "next char" test should be skipped */
2543
2544#define SLAB_FIRST(s) (&(s)->states[0])
2545#define SLAB_LAST(s)  (&(s)->states[PERL_REGMATCH_SLAB_SLOTS-1])
2546
2547/* grab a new slab and return the first slot in it */
2548
2549STATIC regmatch_state *
2550S_push_slab(pTHX)
2551{
2552#if PERL_VERSION < 9 && !defined(PERL_CORE)
2553    dMY_CXT;
2554#endif
2555    regmatch_slab *s = PL_regmatch_slab->next;
2556    if (!s) {
2557	Newx(s, 1, regmatch_slab);
2558	s->prev = PL_regmatch_slab;
2559	s->next = NULL;
2560	PL_regmatch_slab->next = s;
2561    }
2562    PL_regmatch_slab = s;
2563    return SLAB_FIRST(s);
2564}
2565
2566
2567/* push a new state then goto it */
2568
2569#define PUSH_STATE_GOTO(state, node) \
2570    scan = node; \
2571    st->resume_state = state; \
2572    goto push_state;
2573
2574/* push a new state with success backtracking, then goto it */
2575
2576#define PUSH_YES_STATE_GOTO(state, node) \
2577    scan = node; \
2578    st->resume_state = state; \
2579    goto push_yes_state;
2580
2581
2582
2583/*
2584
2585regmatch() - main matching routine
2586
2587This is basically one big switch statement in a loop. We execute an op,
2588set 'next' to point the next op, and continue. If we come to a point which
2589we may need to backtrack to on failure such as (A|B|C), we push a
2590backtrack state onto the backtrack stack. On failure, we pop the top
2591state, and re-enter the loop at the state indicated. If there are no more
2592states to pop, we return failure.
2593
2594Sometimes we also need to backtrack on success; for example /A+/, where
2595after successfully matching one A, we need to go back and try to
2596match another one; similarly for lookahead assertions: if the assertion
2597completes successfully, we backtrack to the state just before the assertion
2598and then carry on.  In these cases, the pushed state is marked as
2599'backtrack on success too'. This marking is in fact done by a chain of
2600pointers, each pointing to the previous 'yes' state. On success, we pop to
2601the nearest yes state, discarding any intermediate failure-only states.
2602Sometimes a yes state is pushed just to force some cleanup code to be
2603called at the end of a successful match or submatch; e.g. (??{$re}) uses
2604it to free the inner regex.
2605
2606Note that failure backtracking rewinds the cursor position, while
2607success backtracking leaves it alone.
2608
2609A pattern is complete when the END op is executed, while a subpattern
2610such as (?=foo) is complete when the SUCCESS op is executed. Both of these
2611ops trigger the "pop to last yes state if any, otherwise return true"
2612behaviour.
2613
2614A common convention in this function is to use A and B to refer to the two
2615subpatterns (or to the first nodes thereof) in patterns like /A*B/: so A is
2616the subpattern to be matched possibly multiple times, while B is the entire
2617rest of the pattern. Variable and state names reflect this convention.
2618
2619The states in the main switch are the union of ops and failure/success of
2620substates associated with with that op.  For example, IFMATCH is the op
2621that does lookahead assertions /(?=A)B/ and so the IFMATCH state means
2622'execute IFMATCH'; while IFMATCH_A is a state saying that we have just
2623successfully matched A and IFMATCH_A_fail is a state saying that we have
2624just failed to match A. Resume states always come in pairs. The backtrack
2625state we push is marked as 'IFMATCH_A', but when that is popped, we resume
2626at IFMATCH_A or IFMATCH_A_fail, depending on whether we are backtracking
2627on success or failure.
2628
2629The struct that holds a backtracking state is actually a big union, with
2630one variant for each major type of op. The variable st points to the
2631top-most backtrack struct. To make the code clearer, within each
2632block of code we #define ST to alias the relevant union.
2633
2634Here's a concrete example of a (vastly oversimplified) IFMATCH
2635implementation:
2636
2637    switch (state) {
2638    ....
2639
2640#define ST st->u.ifmatch
2641
2642    case IFMATCH: // we are executing the IFMATCH op, (?=A)B
2643	ST.foo = ...; // some state we wish to save
2644	...
2645	// push a yes backtrack state with a resume value of
2646	// IFMATCH_A/IFMATCH_A_fail, then continue execution at the
2647	// first node of A:
2648	PUSH_YES_STATE_GOTO(IFMATCH_A, A);
2649	// NOTREACHED
2650
2651    case IFMATCH_A: // we have successfully executed A; now continue with B
2652	next = B;
2653	bar = ST.foo; // do something with the preserved value
2654	break;
2655
2656    case IFMATCH_A_fail: // A failed, so the assertion failed
2657	...;   // do some housekeeping, then ...
2658	sayNO; // propagate the failure
2659
2660#undef ST
2661
2662    ...
2663    }
2664
2665For any old-timers reading this who are familiar with the old recursive
2666approach, the code above is equivalent to:
2667
2668    case IFMATCH: // we are executing the IFMATCH op, (?=A)B
2669    {
2670	int foo = ...
2671	...
2672	if (regmatch(A)) {
2673	    next = B;
2674	    bar = foo;
2675	    break;
2676	}
2677	...;   // do some housekeeping, then ...
2678	sayNO; // propagate the failure
2679    }
2680
2681The topmost backtrack state, pointed to by st, is usually free. If you
2682want to claim it, populate any ST.foo fields in it with values you wish to
2683save, then do one of
2684
2685	PUSH_STATE_GOTO(resume_state, node);
2686	PUSH_YES_STATE_GOTO(resume_state, node);
2687
2688which sets that backtrack state's resume value to 'resume_state', pushes a
2689new free entry to the top of the backtrack stack, then goes to 'node'.
2690On backtracking, the free slot is popped, and the saved state becomes the
2691new free state. An ST.foo field in this new top state can be temporarily
2692accessed to retrieve values, but once the main loop is re-entered, it
2693becomes available for reuse.
2694
2695Note that the depth of the backtrack stack constantly increases during the
2696left-to-right execution of the pattern, rather than going up and down with
2697the pattern nesting. For example the stack is at its maximum at Z at the
2698end of the pattern, rather than at X in the following:
2699
2700    /(((X)+)+)+....(Y)+....Z/
2701
2702The only exceptions to this are lookahead/behind assertions and the cut,
2703(?>A), which pop all the backtrack states associated with A before
2704continuing.
2705
2706Bascktrack state structs are allocated in slabs of about 4K in size.
2707PL_regmatch_state and st always point to the currently active state,
2708and PL_regmatch_slab points to the slab currently containing
2709PL_regmatch_state.  The first time regmatch() is called, the first slab is
2710allocated, and is never freed until interpreter destruction. When the slab
2711is full, a new one is allocated and chained to the end. At exit from
2712regmatch(), slabs allocated since entry are freed.
2713
2714*/
2715
2716
2717#define DEBUG_STATE_pp(pp)				    \
2718    DEBUG_STATE_r({					    \
2719	DUMP_EXEC_POS(locinput, scan, do_utf8);		    \
2720	PerlIO_printf(Perl_debug_log,			    \
2721	    "    %*s"pp" %s%s%s%s%s\n",			    \
2722	    depth*2, "",				    \
2723	    PL_reg_name[st->resume_state],                     \
2724	    ((st==yes_state||st==mark_state) ? "[" : ""),   \
2725	    ((st==yes_state) ? "Y" : ""),                   \
2726	    ((st==mark_state) ? "M" : ""),                  \
2727	    ((st==yes_state||st==mark_state) ? "]" : "")    \
2728	);                                                  \
2729    });
2730
2731
2732#define REG_NODE_NUM(x) ((x) ? (int)((x)-prog) : -1)
2733
2734#ifdef DEBUGGING
2735
2736STATIC void
2737S_debug_start_match(pTHX_ const REGEXP *prog, const bool do_utf8,
2738    const char *start, const char *end, const char *blurb)
2739{
2740    const bool utf8_pat = RX_UTF8(prog) ? 1 : 0;
2741
2742    PERL_ARGS_ASSERT_DEBUG_START_MATCH;
2743
2744    if (!PL_colorset)
2745            reginitcolors();
2746    {
2747        RE_PV_QUOTED_DECL(s0, utf8_pat, PERL_DEBUG_PAD_ZERO(0),
2748            RX_PRECOMP_const(prog), RX_PRELEN(prog), 60);
2749
2750        RE_PV_QUOTED_DECL(s1, do_utf8, PERL_DEBUG_PAD_ZERO(1),
2751            start, end - start, 60);
2752
2753        PerlIO_printf(Perl_debug_log,
2754            "%s%s REx%s %s against %s\n",
2755		       PL_colors[4], blurb, PL_colors[5], s0, s1);
2756
2757        if (do_utf8||utf8_pat)
2758            PerlIO_printf(Perl_debug_log, "UTF-8 %s%s%s...\n",
2759                utf8_pat ? "pattern" : "",
2760                utf8_pat && do_utf8 ? " and " : "",
2761                do_utf8 ? "string" : ""
2762            );
2763    }
2764}
2765
2766STATIC void
2767S_dump_exec_pos(pTHX_ const char *locinput,
2768                      const regnode *scan,
2769                      const char *loc_regeol,
2770                      const char *loc_bostr,
2771                      const char *loc_reg_starttry,
2772                      const bool do_utf8)
2773{
2774    const int docolor = *PL_colors[0] || *PL_colors[2] || *PL_colors[4];
2775    const int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
2776    int l = (loc_regeol - locinput) > taill ? taill : (loc_regeol - locinput);
2777    /* The part of the string before starttry has one color
2778       (pref0_len chars), between starttry and current
2779       position another one (pref_len - pref0_len chars),
2780       after the current position the third one.
2781       We assume that pref0_len <= pref_len, otherwise we
2782       decrease pref0_len.  */
2783    int pref_len = (locinput - loc_bostr) > (5 + taill) - l
2784	? (5 + taill) - l : locinput - loc_bostr;
2785    int pref0_len;
2786
2787    PERL_ARGS_ASSERT_DUMP_EXEC_POS;
2788
2789    while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
2790	pref_len++;
2791    pref0_len = pref_len  - (locinput - loc_reg_starttry);
2792    if (l + pref_len < (5 + taill) && l < loc_regeol - locinput)
2793	l = ( loc_regeol - locinput > (5 + taill) - pref_len
2794	      ? (5 + taill) - pref_len : loc_regeol - locinput);
2795    while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
2796	l--;
2797    if (pref0_len < 0)
2798	pref0_len = 0;
2799    if (pref0_len > pref_len)
2800	pref0_len = pref_len;
2801    {
2802	const int is_uni = (do_utf8 && OP(scan) != CANY) ? 1 : 0;
2803
2804	RE_PV_COLOR_DECL(s0,len0,is_uni,PERL_DEBUG_PAD(0),
2805	    (locinput - pref_len),pref0_len, 60, 4, 5);
2806
2807	RE_PV_COLOR_DECL(s1,len1,is_uni,PERL_DEBUG_PAD(1),
2808		    (locinput - pref_len + pref0_len),
2809		    pref_len - pref0_len, 60, 2, 3);
2810
2811	RE_PV_COLOR_DECL(s2,len2,is_uni,PERL_DEBUG_PAD(2),
2812		    locinput, loc_regeol - locinput, 10, 0, 1);
2813
2814	const STRLEN tlen=len0+len1+len2;
2815	PerlIO_printf(Perl_debug_log,
2816		    "%4"IVdf" <%.*s%.*s%s%.*s>%*s|",
2817		    (IV)(locinput - loc_bostr),
2818		    len0, s0,
2819		    len1, s1,
2820		    (docolor ? "" : "> <"),
2821		    len2, s2,
2822		    (int)(tlen > 19 ? 0 :  19 - tlen),
2823		    "");
2824    }
2825}
2826
2827#endif
2828
2829/* reg_check_named_buff_matched()
2830 * Checks to see if a named buffer has matched. The data array of
2831 * buffer numbers corresponding to the buffer is expected to reside
2832 * in the regexp->data->data array in the slot stored in the ARG() of
2833 * node involved. Note that this routine doesn't actually care about the
2834 * name, that information is not preserved from compilation to execution.
2835 * Returns the index of the leftmost defined buffer with the given name
2836 * or 0 if non of the buffers matched.
2837 */
2838STATIC I32
2839S_reg_check_named_buff_matched(pTHX_ const regexp *rex, const regnode *scan)
2840{
2841    I32 n;
2842    RXi_GET_DECL(rex,rexi);
2843    SV *sv_dat= MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
2844    I32 *nums=(I32*)SvPVX(sv_dat);
2845
2846    PERL_ARGS_ASSERT_REG_CHECK_NAMED_BUFF_MATCHED;
2847
2848    for ( n=0; n<SvIVX(sv_dat); n++ ) {
2849        if ((I32)*PL_reglastparen >= nums[n] &&
2850            PL_regoffs[nums[n]].end != -1)
2851        {
2852            return nums[n];
2853        }
2854    }
2855    return 0;
2856}
2857
2858
2859/* free all slabs above current one  - called during LEAVE_SCOPE */
2860
2861STATIC void
2862S_clear_backtrack_stack(pTHX_ void *p)
2863{
2864    regmatch_slab *s = PL_regmatch_slab->next;
2865    PERL_UNUSED_ARG(p);
2866
2867    if (!s)
2868	return;
2869    PL_regmatch_slab->next = NULL;
2870    while (s) {
2871	regmatch_slab * const osl = s;
2872	s = s->next;
2873	Safefree(osl);
2874    }
2875}
2876
2877
2878#define SETREX(Re1,Re2) \
2879    if (PL_reg_eval_set) PM_SETRE((PL_reg_curpm), (Re2)); \
2880    Re1 = (Re2)
2881
2882STATIC I32			/* 0 failure, 1 success */
2883S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
2884{
2885#if PERL_VERSION < 9 && !defined(PERL_CORE)
2886    dMY_CXT;
2887#endif
2888    dVAR;
2889    register const bool do_utf8 = PL_reg_match_utf8;
2890    const U32 uniflags = UTF8_ALLOW_DEFAULT;
2891    REGEXP *rex_sv = reginfo->prog;
2892    regexp *rex = (struct regexp *)SvANY(rex_sv);
2893    RXi_GET_DECL(rex,rexi);
2894    I32	oldsave;
2895    /* the current state. This is a cached copy of PL_regmatch_state */
2896    register regmatch_state *st;
2897    /* cache heavy used fields of st in registers */
2898    register regnode *scan;
2899    register regnode *next;
2900    register U32 n = 0;	/* general value; init to avoid compiler warning */
2901    register I32 ln = 0; /* len or last;  init to avoid compiler warning */
2902    register char *locinput = PL_reginput;
2903    register I32 nextchr;   /* is always set to UCHARAT(locinput) */
2904
2905    bool result = 0;	    /* return value of S_regmatch */
2906    int depth = 0;	    /* depth of backtrack stack */
2907    U32 nochange_depth = 0; /* depth of GOSUB recursion with nochange */
2908    const U32 max_nochange_depth =
2909        (3 * rex->nparens > MAX_RECURSE_EVAL_NOCHANGE_DEPTH) ?
2910        3 * rex->nparens : MAX_RECURSE_EVAL_NOCHANGE_DEPTH;
2911    regmatch_state *yes_state = NULL; /* state to pop to on success of
2912							    subpattern */
2913    /* mark_state piggy backs on the yes_state logic so that when we unwind
2914       the stack on success we can update the mark_state as we go */
2915    regmatch_state *mark_state = NULL; /* last mark state we have seen */
2916    regmatch_state *cur_eval = NULL; /* most recent EVAL_AB state */
2917    struct regmatch_state  *cur_curlyx = NULL; /* most recent curlyx */
2918    U32 state_num;
2919    bool no_final = 0;      /* prevent failure from backtracking? */
2920    bool do_cutgroup = 0;   /* no_final only until next branch/trie entry */
2921    char *startpoint = PL_reginput;
2922    SV *popmark = NULL;     /* are we looking for a mark? */
2923    SV *sv_commit = NULL;   /* last mark name seen in failure */
2924    SV *sv_yes_mark = NULL; /* last mark name we have seen
2925                               during a successfull match */
2926    U32 lastopen = 0;       /* last open we saw */
2927    bool has_cutgroup = RX_HAS_CUTGROUP(rex) ? 1 : 0;
2928    SV* const oreplsv = GvSV(PL_replgv);
2929    /* these three flags are set by various ops to signal information to
2930     * the very next op. They have a useful lifetime of exactly one loop
2931     * iteration, and are not preserved or restored by state pushes/pops
2932     */
2933    bool sw = 0;	    /* the condition value in (?(cond)a|b) */
2934    bool minmod = 0;	    /* the next "{n,m}" is a "{n,m}?" */
2935    int logical = 0;	    /* the following EVAL is:
2936				0: (?{...})
2937				1: (?(?{...})X|Y)
2938				2: (??{...})
2939			       or the following IFMATCH/UNLESSM is:
2940			        false: plain (?=foo)
2941				true:  used as a condition: (?(?=foo))
2942			    */
2943#ifdef DEBUGGING
2944    GET_RE_DEBUG_FLAGS_DECL;
2945#endif
2946
2947    PERL_ARGS_ASSERT_REGMATCH;
2948
2949    DEBUG_OPTIMISE_r( DEBUG_EXECUTE_r({
2950	    PerlIO_printf(Perl_debug_log,"regmatch start\n");
2951    }));
2952    /* on first ever call to regmatch, allocate first slab */
2953    if (!PL_regmatch_slab) {
2954	Newx(PL_regmatch_slab, 1, regmatch_slab);
2955	PL_regmatch_slab->prev = NULL;
2956	PL_regmatch_slab->next = NULL;
2957	PL_regmatch_state = SLAB_FIRST(PL_regmatch_slab);
2958    }
2959
2960    oldsave = PL_savestack_ix;
2961    SAVEDESTRUCTOR_X(S_clear_backtrack_stack, NULL);
2962    SAVEVPTR(PL_regmatch_slab);
2963    SAVEVPTR(PL_regmatch_state);
2964
2965    /* grab next free state slot */
2966    st = ++PL_regmatch_state;
2967    if (st >  SLAB_LAST(PL_regmatch_slab))
2968	st = PL_regmatch_state = S_push_slab(aTHX);
2969
2970    /* Note that nextchr is a byte even in UTF */
2971    nextchr = UCHARAT(locinput);
2972    scan = prog;
2973    while (scan != NULL) {
2974
2975        DEBUG_EXECUTE_r( {
2976	    SV * const prop = sv_newmortal();
2977	    regnode *rnext=regnext(scan);
2978	    DUMP_EXEC_POS( locinput, scan, do_utf8 );
2979	    regprop(rex, prop, scan);
2980
2981	    PerlIO_printf(Perl_debug_log,
2982		    "%3"IVdf":%*s%s(%"IVdf")\n",
2983		    (IV)(scan - rexi->program), depth*2, "",
2984		    SvPVX_const(prop),
2985		    (PL_regkind[OP(scan)] == END || !rnext) ?
2986		        0 : (IV)(rnext - rexi->program));
2987	});
2988
2989	next = scan + NEXT_OFF(scan);
2990	if (next == scan)
2991	    next = NULL;
2992	state_num = OP(scan);
2993
2994      reenter_switch:
2995
2996	assert(PL_reglastparen == &rex->lastparen);
2997	assert(PL_reglastcloseparen == &rex->lastcloseparen);
2998	assert(PL_regoffs == rex->offs);
2999
3000	switch (state_num) {
3001	case BOL:
3002	    if (locinput == PL_bostr)
3003	    {
3004		/* reginfo->till = reginfo->bol; */
3005		break;
3006	    }
3007	    sayNO;
3008	case MBOL:
3009	    if (locinput == PL_bostr ||
3010		((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
3011	    {
3012		break;
3013	    }
3014	    sayNO;
3015	case SBOL:
3016	    if (locinput == PL_bostr)
3017		break;
3018	    sayNO;
3019	case GPOS:
3020	    if (locinput == reginfo->ganch)
3021		break;
3022	    sayNO;
3023
3024	case KEEPS:
3025	    /* update the startpoint */
3026	    st->u.keeper.val = PL_regoffs[0].start;
3027	    PL_reginput = locinput;
3028	    PL_regoffs[0].start = locinput - PL_bostr;
3029	    PUSH_STATE_GOTO(KEEPS_next, next);
3030	    /*NOT-REACHED*/
3031	case KEEPS_next_fail:
3032	    /* rollback the start point change */
3033	    PL_regoffs[0].start = st->u.keeper.val;
3034	    sayNO_SILENT;
3035	    /*NOT-REACHED*/
3036	case EOL:
3037		goto seol;
3038	case MEOL:
3039	    if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
3040		sayNO;
3041	    break;
3042	case SEOL:
3043	  seol:
3044	    if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
3045		sayNO;
3046	    if (PL_regeol - locinput > 1)
3047		sayNO;
3048	    break;
3049	case EOS:
3050	    if (PL_regeol != locinput)
3051		sayNO;
3052	    break;
3053	case SANY:
3054	    if (!nextchr && locinput >= PL_regeol)
3055		sayNO;
3056 	    if (do_utf8) {
3057	        locinput += PL_utf8skip[nextchr];
3058		if (locinput > PL_regeol)
3059 		    sayNO;
3060 		nextchr = UCHARAT(locinput);
3061 	    }
3062 	    else
3063 		nextchr = UCHARAT(++locinput);
3064	    break;
3065	case CANY:
3066	    if (!nextchr && locinput >= PL_regeol)
3067		sayNO;
3068	    nextchr = UCHARAT(++locinput);
3069	    break;
3070	case REG_ANY:
3071	    if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
3072		sayNO;
3073	    if (do_utf8) {
3074		locinput += PL_utf8skip[nextchr];
3075		if (locinput > PL_regeol)
3076		    sayNO;
3077		nextchr = UCHARAT(locinput);
3078	    }
3079	    else
3080		nextchr = UCHARAT(++locinput);
3081	    break;
3082
3083#undef  ST
3084#define ST st->u.trie
3085        case TRIEC:
3086            /* In this case the charclass data is available inline so
3087               we can fail fast without a lot of extra overhead.
3088             */
3089            if (scan->flags == EXACT || !do_utf8) {
3090                if(!ANYOF_BITMAP_TEST(scan, *locinput)) {
3091                    DEBUG_EXECUTE_r(
3092                        PerlIO_printf(Perl_debug_log,
3093                    	          "%*s  %sfailed to match trie start class...%s\n",
3094                    	          REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
3095                    );
3096                    sayNO_SILENT;
3097                    /* NOTREACHED */
3098                }
3099            }
3100            /* FALL THROUGH */
3101	case TRIE:
3102	    {
3103                /* what type of TRIE am I? (utf8 makes this contextual) */
3104                DECL_TRIE_TYPE(scan);
3105
3106                /* what trie are we using right now */
3107		reg_trie_data * const trie
3108        	    = (reg_trie_data*)rexi->data->data[ ARG( scan ) ];
3109		HV * widecharmap = MUTABLE_HV(rexi->data->data[ ARG( scan ) + 1 ]);
3110                U32 state = trie->startstate;
3111
3112        	if (trie->bitmap && trie_type != trie_utf8_fold &&
3113        	    !TRIE_BITMAP_TEST(trie,*locinput)
3114        	) {
3115        	    if (trie->states[ state ].wordnum) {
3116        	         DEBUG_EXECUTE_r(
3117                            PerlIO_printf(Perl_debug_log,
3118                        	          "%*s  %smatched empty string...%s\n",
3119                        	          REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
3120                        );
3121        	        break;
3122        	    } else {
3123        	        DEBUG_EXECUTE_r(
3124                            PerlIO_printf(Perl_debug_log,
3125                        	          "%*s  %sfailed to match trie start class...%s\n",
3126                        	          REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
3127                        );
3128        	        sayNO_SILENT;
3129        	   }
3130                }
3131
3132            {
3133		U8 *uc = ( U8* )locinput;
3134
3135		STRLEN len = 0;
3136		STRLEN foldlen = 0;
3137		U8 *uscan = (U8*)NULL;
3138		STRLEN bufflen=0;
3139		SV *sv_accept_buff = NULL;
3140		U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
3141
3142	    	ST.accepted = 0; /* how many accepting states we have seen */
3143		ST.B = next;
3144		ST.jump = trie->jump;
3145		ST.me = scan;
3146	        /*
3147        	   traverse the TRIE keeping track of all accepting states
3148        	   we transition through until we get to a failing node.
3149        	*/
3150
3151		while ( state && uc <= (U8*)PL_regeol ) {
3152                    U32 base = trie->states[ state ].trans.base;
3153                    UV uvc = 0;
3154                    U16 charid;
3155                    /* We use charid to hold the wordnum as we don't use it
3156                       for charid until after we have done the wordnum logic.
3157                       We define an alias just so that the wordnum logic reads
3158                       more naturally. */
3159
3160#define got_wordnum charid
3161                    got_wordnum = trie->states[ state ].wordnum;
3162
3163		    if ( got_wordnum ) {
3164			if ( ! ST.accepted ) {
3165			    ENTER;
3166			    SAVETMPS; /* XXX is this necessary? dmq */
3167			    bufflen = TRIE_INITAL_ACCEPT_BUFFLEN;
3168			    sv_accept_buff=newSV(bufflen *
3169					    sizeof(reg_trie_accepted) - 1);
3170			    SvCUR_set(sv_accept_buff, 0);
3171			    SvPOK_on(sv_accept_buff);
3172			    sv_2mortal(sv_accept_buff);
3173			    SAVETMPS;
3174			    ST.accept_buff =
3175				(reg_trie_accepted*)SvPV_nolen(sv_accept_buff );
3176			}
3177			do {
3178			    if (ST.accepted >= bufflen) {
3179				bufflen *= 2;
3180				ST.accept_buff =(reg_trie_accepted*)
3181				    SvGROW(sv_accept_buff,
3182				       	bufflen * sizeof(reg_trie_accepted));
3183			    }
3184			    SvCUR_set(sv_accept_buff,SvCUR(sv_accept_buff)
3185				+ sizeof(reg_trie_accepted));
3186
3187
3188			    ST.accept_buff[ST.accepted].wordnum = got_wordnum;
3189			    ST.accept_buff[ST.accepted].endpos = uc;
3190			    ++ST.accepted;
3191		        } while (trie->nextword && (got_wordnum= trie->nextword[got_wordnum]));
3192		    }
3193#undef got_wordnum
3194
3195		    DEBUG_TRIE_EXECUTE_r({
3196		                DUMP_EXEC_POS( (char *)uc, scan, do_utf8 );
3197			        PerlIO_printf( Perl_debug_log,
3198			            "%*s  %sState: %4"UVxf" Accepted: %4"UVxf" ",
3199			            2+depth * 2, "", PL_colors[4],
3200			            (UV)state, (UV)ST.accepted );
3201		    });
3202
3203		    if ( base ) {
3204			REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
3205					     uscan, len, uvc, charid, foldlen,
3206					     foldbuf, uniflags);
3207
3208			if (charid &&
3209			     (base + charid > trie->uniquecharcount )
3210			     && (base + charid - 1 - trie->uniquecharcount
3211				    < trie->lasttrans)
3212			     && trie->trans[base + charid - 1 -
3213				    trie->uniquecharcount].check == state)
3214			{
3215			    state = trie->trans[base + charid - 1 -
3216				trie->uniquecharcount ].next;
3217			}
3218			else {
3219			    state = 0;
3220			}
3221			uc += len;
3222
3223		    }
3224		    else {
3225			state = 0;
3226		    }
3227		    DEBUG_TRIE_EXECUTE_r(
3228		        PerlIO_printf( Perl_debug_log,
3229		            "Charid:%3x CP:%4"UVxf" After State: %4"UVxf"%s\n",
3230		            charid, uvc, (UV)state, PL_colors[5] );
3231		    );
3232		}
3233		if (!ST.accepted )
3234		   sayNO;
3235
3236		DEBUG_EXECUTE_r(
3237		    PerlIO_printf( Perl_debug_log,
3238			"%*s  %sgot %"IVdf" possible matches%s\n",
3239			REPORT_CODE_OFF + depth * 2, "",
3240			PL_colors[4], (IV)ST.accepted, PL_colors[5] );
3241		);
3242	    }}
3243            goto trie_first_try; /* jump into the fail handler */
3244	    /* NOTREACHED */
3245	case TRIE_next_fail: /* we failed - try next alterative */
3246            if ( ST.jump) {
3247                REGCP_UNWIND(ST.cp);
3248	        for (n = *PL_reglastparen; n > ST.lastparen; n--)
3249		    PL_regoffs[n].end = -1;
3250	        *PL_reglastparen = n;
3251	    }
3252          trie_first_try:
3253            if (do_cutgroup) {
3254                do_cutgroup = 0;
3255                no_final = 0;
3256            }
3257
3258            if ( ST.jump) {
3259                ST.lastparen = *PL_reglastparen;
3260	        REGCP_SET(ST.cp);
3261            }
3262	    if ( ST.accepted == 1 ) {
3263		/* only one choice left - just continue */
3264		DEBUG_EXECUTE_r({
3265		    AV *const trie_words
3266			= MUTABLE_AV(rexi->data->data[ARG(ST.me)+TRIE_WORDS_OFFSET]);
3267		    SV ** const tmp = av_fetch( trie_words,
3268		        ST.accept_buff[ 0 ].wordnum-1, 0 );
3269		    SV *sv= tmp ? sv_newmortal() : NULL;
3270
3271		    PerlIO_printf( Perl_debug_log,
3272			"%*s  %sonly one match left: #%d <%s>%s\n",
3273			REPORT_CODE_OFF+depth*2, "", PL_colors[4],
3274			ST.accept_buff[ 0 ].wordnum,
3275			tmp ? pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), 0,
3276	                        PL_colors[0], PL_colors[1],
3277	                        (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0)
3278                            )
3279			: "not compiled under -Dr",
3280			PL_colors[5] );
3281		});
3282		PL_reginput = (char *)ST.accept_buff[ 0 ].endpos;
3283		/* in this case we free tmps/leave before we call regmatch
3284		   as we wont be using accept_buff again. */
3285
3286		locinput = PL_reginput;
3287		nextchr = UCHARAT(locinput);
3288    		if ( !ST.jump || !ST.jump[ST.accept_buff[0].wordnum])
3289    		    scan = ST.B;
3290    		else
3291    		    scan = ST.me + ST.jump[ST.accept_buff[0].wordnum];
3292		if (!has_cutgroup) {
3293		    FREETMPS;
3294		    LEAVE;
3295                } else {
3296                    ST.accepted--;
3297                    PUSH_YES_STATE_GOTO(TRIE_next, scan);
3298                }
3299
3300		continue; /* execute rest of RE */
3301	    }
3302
3303	    if ( !ST.accepted-- ) {
3304	        DEBUG_EXECUTE_r({
3305		    PerlIO_printf( Perl_debug_log,
3306			"%*s  %sTRIE failed...%s\n",
3307			REPORT_CODE_OFF+depth*2, "",
3308			PL_colors[4],
3309			PL_colors[5] );
3310		});
3311		FREETMPS;
3312		LEAVE;
3313		sayNO_SILENT;
3314		/*NOTREACHED*/
3315	    }
3316
3317	    /*
3318	       There are at least two accepting states left.  Presumably
3319	       the number of accepting states is going to be low,
3320	       typically two. So we simply scan through to find the one
3321	       with lowest wordnum.  Once we find it, we swap the last
3322	       state into its place and decrement the size. We then try to
3323	       match the rest of the pattern at the point where the word
3324	       ends. If we succeed, control just continues along the
3325	       regex; if we fail we return here to try the next accepting
3326	       state
3327	     */
3328
3329	    {
3330		U32 best = 0;
3331		U32 cur;
3332		for( cur = 1 ; cur <= ST.accepted ; cur++ ) {
3333		    DEBUG_TRIE_EXECUTE_r(
3334			PerlIO_printf( Perl_debug_log,
3335			    "%*s  %sgot %"IVdf" (%d) as best, looking at %"IVdf" (%d)%s\n",
3336			    REPORT_CODE_OFF + depth * 2, "", PL_colors[4],
3337			    (IV)best, ST.accept_buff[ best ].wordnum, (IV)cur,
3338			    ST.accept_buff[ cur ].wordnum, PL_colors[5] );
3339		    );
3340
3341		    if (ST.accept_buff[cur].wordnum <
3342			    ST.accept_buff[best].wordnum)
3343			best = cur;
3344		}
3345
3346		DEBUG_EXECUTE_r({
3347		    AV *const trie_words
3348			= MUTABLE_AV(rexi->data->data[ARG(ST.me)+TRIE_WORDS_OFFSET]);
3349		    SV ** const tmp = av_fetch( trie_words,
3350		        ST.accept_buff[ best ].wordnum - 1, 0 );
3351		    regnode *nextop=(!ST.jump || !ST.jump[ST.accept_buff[best].wordnum]) ?
3352		                    ST.B :
3353		                    ST.me + ST.jump[ST.accept_buff[best].wordnum];
3354		    SV *sv= tmp ? sv_newmortal() : NULL;
3355
3356		    PerlIO_printf( Perl_debug_log,
3357		        "%*s  %strying alternation #%d <%s> at node #%d %s\n",
3358			REPORT_CODE_OFF+depth*2, "", PL_colors[4],
3359			ST.accept_buff[best].wordnum,
3360			tmp ? pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), 0,
3361	                        PL_colors[0], PL_colors[1],
3362	                        (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0)
3363                            ) : "not compiled under -Dr",
3364			    REG_NODE_NUM(nextop),
3365			PL_colors[5] );
3366		});
3367
3368		if ( best<ST.accepted ) {
3369		    reg_trie_accepted tmp = ST.accept_buff[ best ];
3370		    ST.accept_buff[ best ] = ST.accept_buff[ ST.accepted ];
3371		    ST.accept_buff[ ST.accepted ] = tmp;
3372		    best = ST.accepted;
3373		}
3374		PL_reginput = (char *)ST.accept_buff[ best ].endpos;
3375		if ( !ST.jump || !ST.jump[ST.accept_buff[best].wordnum]) {
3376		    scan = ST.B;
3377		} else {
3378		    scan = ST.me + ST.jump[ST.accept_buff[best].wordnum];
3379                }
3380                PUSH_YES_STATE_GOTO(TRIE_next, scan);
3381                /* NOTREACHED */
3382	    }
3383	    /* NOTREACHED */
3384        case TRIE_next:
3385	    /* we dont want to throw this away, see bug 57042*/
3386	    if (oreplsv != GvSV(PL_replgv))
3387		sv_setsv(oreplsv, GvSV(PL_replgv));
3388            FREETMPS;
3389	    LEAVE;
3390	    sayYES;
3391#undef  ST
3392
3393	case EXACT: {
3394	    char *s = STRING(scan);
3395	    ln = STR_LEN(scan);
3396	    if (do_utf8 != UTF) {
3397		/* The target and the pattern have differing utf8ness. */
3398		char *l = locinput;
3399		const char * const e = s + ln;
3400
3401		if (do_utf8) {
3402		    /* The target is utf8, the pattern is not utf8. */
3403		    while (s < e) {
3404			STRLEN ulen;
3405			if (l >= PL_regeol)
3406			     sayNO;
3407			if (NATIVE_TO_UNI(*(U8*)s) !=
3408			    utf8n_to_uvuni((U8*)l, UTF8_MAXBYTES, &ulen,
3409					    uniflags))
3410			     sayNO;
3411			l += ulen;
3412			s ++;
3413		    }
3414		}
3415		else {
3416		    /* The target is not utf8, the pattern is utf8. */
3417		    while (s < e) {
3418			STRLEN ulen;
3419			if (l >= PL_regeol)
3420			    sayNO;
3421			if (NATIVE_TO_UNI(*((U8*)l)) !=
3422			    utf8n_to_uvuni((U8*)s, UTF8_MAXBYTES, &ulen,
3423					   uniflags))
3424			    sayNO;
3425			s += ulen;
3426			l ++;
3427		    }
3428		}
3429		locinput = l;
3430		nextchr = UCHARAT(locinput);
3431		break;
3432	    }
3433	    /* The target and the pattern have the same utf8ness. */
3434	    /* Inline the first character, for speed. */
3435	    if (UCHARAT(s) != nextchr)
3436		sayNO;
3437	    if (PL_regeol - locinput < ln)
3438		sayNO;
3439	    if (ln > 1 && memNE(s, locinput, ln))
3440		sayNO;
3441	    locinput += ln;
3442	    nextchr = UCHARAT(locinput);
3443	    break;
3444	    }
3445	case EXACTFL:
3446	    PL_reg_flags |= RF_tainted;
3447	    /* FALL THROUGH */
3448	case EXACTF: {
3449	    char * const s = STRING(scan);
3450	    ln = STR_LEN(scan);
3451
3452	    if (do_utf8 || UTF) {
3453	      /* Either target or the pattern are utf8. */
3454		const char * const l = locinput;
3455		char *e = PL_regeol;
3456
3457		if (ibcmp_utf8(s, 0,  ln, (bool)UTF,
3458			       l, &e, 0,  do_utf8)) {
3459		     /* One more case for the sharp s:
3460		      * pack("U0U*", 0xDF) =~ /ss/i,
3461		      * the 0xC3 0x9F are the UTF-8
3462		      * byte sequence for the U+00DF. */
3463
3464		     if (!(do_utf8 &&
3465		           toLOWER(s[0]) == 's' &&
3466			   ln >= 2 &&
3467			   toLOWER(s[1]) == 's' &&
3468			   (U8)l[0] == 0xC3 &&
3469			   e - l >= 2 &&
3470			   (U8)l[1] == 0x9F))
3471			  sayNO;
3472		}
3473		locinput = e;
3474		nextchr = UCHARAT(locinput);
3475		break;
3476	    }
3477
3478	    /* Neither the target and the pattern are utf8. */
3479
3480	    /* Inline the first character, for speed. */
3481	    if (UCHARAT(s) != nextchr &&
3482		UCHARAT(s) != ((OP(scan) == EXACTF)
3483			       ? PL_fold : PL_fold_locale)[nextchr])
3484		sayNO;
3485	    if (PL_regeol - locinput < ln)
3486		sayNO;
3487	    if (ln > 1 && (OP(scan) == EXACTF
3488			   ? ibcmp(s, locinput, ln)
3489			   : ibcmp_locale(s, locinput, ln)))
3490		sayNO;
3491	    locinput += ln;
3492	    nextchr = UCHARAT(locinput);
3493	    break;
3494	    }
3495	case BOUNDL:
3496	case NBOUNDL:
3497	    PL_reg_flags |= RF_tainted;
3498	    /* FALL THROUGH */
3499	case BOUND:
3500	case NBOUND:
3501	    /* was last char in word? */
3502	    if (do_utf8) {
3503		if (locinput == PL_bostr)
3504		    ln = '\n';
3505		else {
3506		    const U8 * const r = reghop3((U8*)locinput, -1, (U8*)PL_bostr);
3507
3508		    ln = utf8n_to_uvchr(r, UTF8SKIP(r), 0, uniflags);
3509		}
3510		if (OP(scan) == BOUND || OP(scan) == NBOUND) {
3511		    ln = isALNUM_uni(ln);
3512		    LOAD_UTF8_CHARCLASS_ALNUM();
3513		    n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
3514		}
3515		else {
3516		    ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
3517		    n = isALNUM_LC_utf8((U8*)locinput);
3518		}
3519	    }
3520	    else {
3521		ln = (locinput != PL_bostr) ?
3522		    UCHARAT(locinput - 1) : '\n';
3523		if (OP(scan) == BOUND || OP(scan) == NBOUND) {
3524		    ln = isALNUM(ln);
3525		    n = isALNUM(nextchr);
3526		}
3527		else {
3528		    ln = isALNUM_LC(ln);
3529		    n = isALNUM_LC(nextchr);
3530		}
3531	    }
3532	    if (((!ln) == (!n)) == (OP(scan) == BOUND ||
3533				    OP(scan) == BOUNDL))
3534		    sayNO;
3535	    break;
3536	case ANYOF:
3537	    if (do_utf8) {
3538	        STRLEN inclasslen = PL_regeol - locinput;
3539
3540	        if (!reginclass(rex, scan, (U8*)locinput, &inclasslen, do_utf8))
3541		    goto anyof_fail;
3542		if (locinput >= PL_regeol)
3543		    sayNO;
3544		locinput += inclasslen ? inclasslen : UTF8SKIP(locinput);
3545		nextchr = UCHARAT(locinput);
3546		break;
3547	    }
3548	    else {
3549		if (nextchr < 0)
3550		    nextchr = UCHARAT(locinput);
3551		if (!REGINCLASS(rex, scan, (U8*)locinput))
3552		    goto anyof_fail;
3553		if (!nextchr && locinput >= PL_regeol)
3554		    sayNO;
3555		nextchr = UCHARAT(++locinput);
3556		break;
3557	    }
3558	anyof_fail:
3559	    /* If we might have the case of the German sharp s
3560	     * in a casefolding Unicode character class. */
3561
3562	    if (ANYOF_FOLD_SHARP_S(scan, locinput, PL_regeol)) {
3563		 locinput += SHARP_S_SKIP;
3564		 nextchr = UCHARAT(locinput);
3565	    }
3566	    else
3567		 sayNO;
3568	    break;
3569	/* Special char classes - The defines start on line 129 or so */
3570	CCC_TRY_AFF( ALNUM,  ALNUML, perl_word,   "a", isALNUM_LC_utf8, isALNUM, isALNUM_LC);
3571	CCC_TRY_NEG(NALNUM, NALNUML, perl_word,   "a", isALNUM_LC_utf8, isALNUM, isALNUM_LC);
3572
3573	CCC_TRY_AFF( SPACE,  SPACEL, perl_space,  " ", isSPACE_LC_utf8, isSPACE, isSPACE_LC);
3574	CCC_TRY_NEG(NSPACE, NSPACEL, perl_space,  " ", isSPACE_LC_utf8, isSPACE, isSPACE_LC);
3575
3576	CCC_TRY_AFF( DIGIT,  DIGITL, posix_digit, "0", isDIGIT_LC_utf8, isDIGIT, isDIGIT_LC);
3577	CCC_TRY_NEG(NDIGIT, NDIGITL, posix_digit, "0", isDIGIT_LC_utf8, isDIGIT, isDIGIT_LC);
3578
3579	case CLUMP: /* Match \X: logical Unicode character.  This is defined as
3580		       a Unicode extended Grapheme Cluster */
3581	    /* From http://www.unicode.org/reports/tr29 (5.2 version).  An
3582	      extended Grapheme Cluster is:
3583
3584	       CR LF
3585	       | Prepend* Begin Extend*
3586	       | .
3587
3588	       Begin is (Hangul-syllable | ! Control)
3589	       Extend is (Grapheme_Extend | Spacing_Mark)
3590	       Control is [ GCB_Control CR LF ]
3591
3592	       The discussion below shows how the code for CLUMP is derived
3593	       from this regex.  Note that most of these concepts are from
3594	       property values of the Grapheme Cluster Boundary (GCB) property.
3595	       No code point can have multiple property values for a given
3596	       property.  Thus a code point in Prepend can't be in Control, but
3597	       it must be in !Control.  This is why Control above includes
3598	       GCB_Control plus CR plus LF.  The latter two are used in the GCB
3599	       property separately, and so can't be in GCB_Control, even though
3600	       they logically are controls.  Control is not the same as gc=cc,
3601	       but includes format and other characters as well.
3602
3603	       The Unicode definition of Hangul-syllable is:
3604		   L+
3605		   | (L* ( ( V | LV ) V* | LVT ) T*)
3606		   | T+
3607		  )
3608	       Each of these is a value for the GCB property, and hence must be
3609	       disjoint, so the order they are tested is immaterial, so the
3610	       above can safely be changed to
3611		   T+
3612		   | L+
3613		   | (L* ( LVT | ( V | LV ) V*) T*)
3614
3615	       The last two terms can be combined like this:
3616		   L* ( L
3617		        | (( LVT | ( V | LV ) V*) T*))
3618
3619	       And refactored into this:
3620		   L* (L | LVT T* | V  V* T* | LV  V* T*)
3621
3622	       That means that if we have seen any L's at all we can quit
3623	       there, but if the next character is a LVT, a V or and LV we
3624	       should keep going.
3625
3626	       There is a subtlety with Prepend* which showed up in testing.
3627	       Note that the Begin, and only the Begin is required in:
3628	        | Prepend* Begin Extend*
3629	       Also, Begin contains '! Control'.  A Prepend must be a '!
3630	       Control', which means it must be a Begin.  What it comes down to
3631	       is that if we match Prepend* and then find no suitable Begin
3632	       afterwards, that if we backtrack the last Prepend, that one will
3633	       be a suitable Begin.
3634	    */
3635
3636	    if (locinput >= PL_regeol)
3637		sayNO;
3638	    if  (! do_utf8) {
3639
3640		/* Match either CR LF  or '.', as all the other possibilities
3641		 * require utf8 */
3642		locinput++;	    /* Match the . or CR */
3643		if (nextchr == '\r'
3644		    && locinput < PL_regeol
3645		    && UCHARAT(locinput) == '\n') locinput++;
3646	    }
3647	    else {
3648
3649		/* Utf8: See if is ( CR LF ); already know that locinput <
3650		 * PL_regeol, so locinput+1 is in bounds */
3651		if (nextchr == '\r' && UCHARAT(locinput + 1) == '\n') {
3652		    locinput += 2;
3653		}
3654		else {
3655		    /* In case have to backtrack to beginning, then match '.' */
3656		    char *starting = locinput;
3657
3658		    /* In case have to backtrack the last prepend */
3659		    char *previous_prepend = 0;
3660
3661		    LOAD_UTF8_CHARCLASS_GCB();
3662
3663		    /* Match (prepend)* */
3664		    while (locinput < PL_regeol
3665			   && swash_fetch(PL_utf8_X_prepend,
3666					  (U8*)locinput, do_utf8))
3667		    {
3668			previous_prepend = locinput;
3669			locinput += UTF8SKIP(locinput);
3670		    }
3671
3672		    /* As noted above, if we matched a prepend character, but
3673		     * the next thing won't match, back off the last prepend we
3674		     * matched, as it is guaranteed to match the begin */
3675		    if (previous_prepend
3676			&& (locinput >=  PL_regeol
3677			    || ! swash_fetch(PL_utf8_X_begin,
3678					     (U8*)locinput, do_utf8)))
3679		    {
3680			locinput = previous_prepend;
3681		    }
3682
3683		    /* Note that here we know PL_regeol > locinput, as we
3684		     * tested that upon input to this switch case, and if we
3685		     * moved locinput forward, we tested the result just above
3686		     * and it either passed, or we backed off so that it will
3687		     * now pass */
3688		    if (! swash_fetch(PL_utf8_X_begin, (U8*)locinput, do_utf8)) {
3689
3690			/* Here did not match the required 'Begin' in the
3691			 * second term.  So just match the very first
3692			 * character, the '.' of the final term of the regex */
3693			locinput = starting + UTF8SKIP(starting);
3694		    } else {
3695
3696			/* Here is the beginning of a character that can have
3697			 * an extender.  It is either a hangul syllable, or a
3698			 * non-control */
3699			if (swash_fetch(PL_utf8_X_non_hangul,
3700					(U8*)locinput, do_utf8))
3701			{
3702
3703			    /* Here not a Hangul syllable, must be a
3704			     * ('!  * Control') */
3705			    locinput += UTF8SKIP(locinput);
3706			} else {
3707
3708			    /* Here is a Hangul syllable.  It can be composed
3709			     * of several individual characters.  One
3710			     * possibility is T+ */
3711			    if (swash_fetch(PL_utf8_X_T,
3712					    (U8*)locinput, do_utf8))
3713			    {
3714				while (locinput < PL_regeol
3715					&& swash_fetch(PL_utf8_X_T,
3716							(U8*)locinput, do_utf8))
3717				{
3718				    locinput += UTF8SKIP(locinput);
3719				}
3720			    } else {
3721
3722				/* Here, not T+, but is a Hangul.  That means
3723				 * it is one of the others: L, LV, LVT or V,
3724				 * and matches:
3725				 * L* (L | LVT T* | V  V* T* | LV  V* T*) */
3726
3727				/* Match L*           */
3728				while (locinput < PL_regeol
3729					&& swash_fetch(PL_utf8_X_L,
3730							(U8*)locinput, do_utf8))
3731				{
3732				    locinput += UTF8SKIP(locinput);
3733				}
3734
3735				/* Here, have exhausted L*.  If the next
3736				 * character is not an LV, LVT nor V, it means
3737				 * we had to have at least one L, so matches L+
3738				 * in the original equation, we have a complete
3739				 * hangul syllable.  Are done. */
3740
3741				if (locinput < PL_regeol
3742				    && swash_fetch(PL_utf8_X_LV_LVT_V,
3743						    (U8*)locinput, do_utf8))
3744				{
3745
3746				    /* Otherwise keep going.  Must be LV, LVT
3747				     * or V.  See if LVT */
3748				    if (swash_fetch(PL_utf8_X_LVT,
3749						    (U8*)locinput, do_utf8))
3750				    {
3751					locinput += UTF8SKIP(locinput);
3752				    } else {
3753
3754					/* Must be  V or LV.  Take it, then
3755					 * match V*     */
3756					locinput += UTF8SKIP(locinput);
3757					while (locinput < PL_regeol
3758						&& swash_fetch(PL_utf8_X_V,
3759							 (U8*)locinput, do_utf8))
3760					{
3761					    locinput += UTF8SKIP(locinput);
3762					}
3763				    }
3764
3765				    /* And any of LV, LVT, or V can be followed
3766				     * by T*            */
3767				    while (locinput < PL_regeol
3768					   && swash_fetch(PL_utf8_X_T,
3769							   (U8*)locinput,
3770							   do_utf8))
3771				    {
3772					locinput += UTF8SKIP(locinput);
3773				    }
3774				}
3775			    }
3776			}
3777
3778			/* Match any extender */
3779			while (locinput < PL_regeol
3780				&& swash_fetch(PL_utf8_X_extend,
3781						(U8*)locinput, do_utf8))
3782			{
3783			    locinput += UTF8SKIP(locinput);
3784			}
3785		    }
3786		}
3787		if (locinput > PL_regeol) sayNO;
3788	    }
3789	    nextchr = UCHARAT(locinput);
3790	    break;
3791
3792	case NREFFL:
3793	{
3794	    char *s;
3795	    char type;
3796	    PL_reg_flags |= RF_tainted;
3797	    /* FALL THROUGH */
3798	case NREF:
3799	case NREFF:
3800	    type = OP(scan);
3801	    n = reg_check_named_buff_matched(rex,scan);
3802
3803            if ( n ) {
3804                type = REF + ( type - NREF );
3805                goto do_ref;
3806            } else {
3807                sayNO;
3808            }
3809            /* unreached */
3810	case REFFL:
3811	    PL_reg_flags |= RF_tainted;
3812	    /* FALL THROUGH */
3813        case REF:
3814	case REFF:
3815	    n = ARG(scan);  /* which paren pair */
3816	    type = OP(scan);
3817	  do_ref:
3818	    ln = PL_regoffs[n].start;
3819	    PL_reg_leftiter = PL_reg_maxiter;		/* Void cache */
3820	    if (*PL_reglastparen < n || ln == -1)
3821		sayNO;			/* Do not match unless seen CLOSEn. */
3822	    if (ln == PL_regoffs[n].end)
3823		break;
3824
3825	    s = PL_bostr + ln;
3826	    if (do_utf8 && type != REF) {	/* REF can do byte comparison */
3827		char *l = locinput;
3828		const char *e = PL_bostr + PL_regoffs[n].end;
3829		/*
3830		 * Note that we can't do the "other character" lookup trick as
3831		 * in the 8-bit case (no pun intended) because in Unicode we
3832		 * have to map both upper and title case to lower case.
3833		 */
3834		if (type == REFF) {
3835		    while (s < e) {
3836			STRLEN ulen1, ulen2;
3837			U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
3838			U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
3839
3840			if (l >= PL_regeol)
3841			    sayNO;
3842			toLOWER_utf8((U8*)s, tmpbuf1, &ulen1);
3843			toLOWER_utf8((U8*)l, tmpbuf2, &ulen2);
3844			if (ulen1 != ulen2 || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
3845			    sayNO;
3846			s += ulen1;
3847			l += ulen2;
3848		    }
3849		}
3850		locinput = l;
3851		nextchr = UCHARAT(locinput);
3852		break;
3853	    }
3854
3855	    /* Inline the first character, for speed. */
3856	    if (UCHARAT(s) != nextchr &&
3857		(type == REF ||
3858		 (UCHARAT(s) != (type == REFF
3859				  ? PL_fold : PL_fold_locale)[nextchr])))
3860		sayNO;
3861	    ln = PL_regoffs[n].end - ln;
3862	    if (locinput + ln > PL_regeol)
3863		sayNO;
3864	    if (ln > 1 && (type == REF
3865			   ? memNE(s, locinput, ln)
3866			   : (type == REFF
3867			      ? ibcmp(s, locinput, ln)
3868			      : ibcmp_locale(s, locinput, ln))))
3869		sayNO;
3870	    locinput += ln;
3871	    nextchr = UCHARAT(locinput);
3872	    break;
3873	}
3874	case NOTHING:
3875	case TAIL:
3876	    break;
3877	case BACK:
3878	    break;
3879
3880#undef  ST
3881#define ST st->u.eval
3882	{
3883	    SV *ret;
3884	    REGEXP *re_sv;
3885            regexp *re;
3886            regexp_internal *rei;
3887            regnode *startpoint;
3888
3889	case GOSTART:
3890	case GOSUB: /*    /(...(?1))/   /(...(?&foo))/   */
3891	    if (cur_eval && cur_eval->locinput==locinput) {
3892                if (cur_eval->u.eval.close_paren == (U32)ARG(scan))
3893                    Perl_croak(aTHX_ "Infinite recursion in regex");
3894                if ( ++nochange_depth > max_nochange_depth )
3895                    Perl_croak(aTHX_
3896                        "Pattern subroutine nesting without pos change"
3897                        " exceeded limit in regex");
3898            } else {
3899                nochange_depth = 0;
3900            }
3901	    re_sv = rex_sv;
3902            re = rex;
3903            rei = rexi;
3904            (void)ReREFCNT_inc(rex_sv);
3905            if (OP(scan)==GOSUB) {
3906                startpoint = scan + ARG2L(scan);
3907                ST.close_paren = ARG(scan);
3908            } else {
3909                startpoint = rei->program+1;
3910                ST.close_paren = 0;
3911            }
3912            goto eval_recurse_doit;
3913            /* NOTREACHED */
3914        case EVAL:  /*   /(?{A})B/   /(??{A})B/  and /(?(?{A})X|Y)B/   */
3915            if (cur_eval && cur_eval->locinput==locinput) {
3916		if ( ++nochange_depth > max_nochange_depth )
3917                    Perl_croak(aTHX_ "EVAL without pos change exceeded limit in regex");
3918            } else {
3919                nochange_depth = 0;
3920            }
3921	    {
3922		/* execute the code in the {...} */
3923		dSP;
3924		SV ** const before = SP;
3925		OP_4tree * const oop = PL_op;
3926		COP * const ocurcop = PL_curcop;
3927		PAD *old_comppad;
3928		char *saved_regeol = PL_regeol;
3929
3930		n = ARG(scan);
3931		PL_op = (OP_4tree*)rexi->data->data[n];
3932		DEBUG_STATE_r( PerlIO_printf(Perl_debug_log,
3933		    "  re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
3934		PAD_SAVE_LOCAL(old_comppad, (PAD*)rexi->data->data[n + 2]);
3935		PL_regoffs[0].end = PL_reg_magic->mg_len = locinput - PL_bostr;
3936
3937                if (sv_yes_mark) {
3938                    SV *sv_mrk = get_sv("REGMARK", 1);
3939                    sv_setsv(sv_mrk, sv_yes_mark);
3940                }
3941
3942		CALLRUNOPS(aTHX);			/* Scalar context. */
3943		SPAGAIN;
3944		if (SP == before)
3945		    ret = &PL_sv_undef;   /* protect against empty (?{}) blocks. */
3946		else {
3947		    ret = POPs;
3948		    PUTBACK;
3949		}
3950
3951		PL_op = oop;
3952		PAD_RESTORE_LOCAL(old_comppad);
3953		PL_curcop = ocurcop;
3954		PL_regeol = saved_regeol;
3955		if (!logical) {
3956		    /* /(?{...})/ */
3957		    sv_setsv(save_scalar(PL_replgv), ret);
3958		    break;
3959		}
3960	    }
3961	    if (logical == 2) { /* Postponed subexpression: /(??{...})/ */
3962		logical = 0;
3963		{
3964		    /* extract RE object from returned value; compiling if
3965		     * necessary */
3966		    MAGIC *mg = NULL;
3967		    REGEXP *rx = NULL;
3968
3969		    if (SvROK(ret)) {
3970			SV *const sv = SvRV(ret);
3971
3972			if (SvTYPE(sv) == SVt_REGEXP) {
3973			    rx = (REGEXP*) sv;
3974			} else if (SvSMAGICAL(sv)) {
3975			    mg = mg_find(sv, PERL_MAGIC_qr);
3976			    assert(mg);
3977			}
3978		    } else if (SvTYPE(ret) == SVt_REGEXP) {
3979			rx = (REGEXP*) ret;
3980		    } else if (SvSMAGICAL(ret)) {
3981			if (SvGMAGICAL(ret)) {
3982			    /* I don't believe that there is ever qr magic
3983			       here.  */
3984			    assert(!mg_find(ret, PERL_MAGIC_qr));
3985			    sv_unmagic(ret, PERL_MAGIC_qr);
3986			}
3987			else {
3988			    mg = mg_find(ret, PERL_MAGIC_qr);
3989			    /* testing suggests mg only ends up non-NULL for
3990			       scalars who were upgraded and compiled in the
3991			       else block below. In turn, this is only
3992			       triggered in the "postponed utf8 string" tests
3993			       in t/op/pat.t  */
3994			}
3995		    }
3996
3997		    if (mg) {
3998			rx = (REGEXP *) mg->mg_obj; /*XXX:dmq*/
3999			assert(rx);
4000		    }
4001		    if (rx) {
4002			rx = reg_temp_copy(NULL, rx);
4003		    }
4004		    else {
4005			U32 pm_flags = 0;
4006			const I32 osize = PL_regsize;
4007
4008			if (DO_UTF8(ret)) {
4009			    assert (SvUTF8(ret));
4010			} else if (SvUTF8(ret)) {
4011			    /* Not doing UTF-8, despite what the SV says. Is
4012			       this only if we're trapped in use 'bytes'?  */
4013			    /* Make a copy of the octet sequence, but without
4014			       the flag on, as the compiler now honours the
4015			       SvUTF8 flag on ret.  */
4016			    STRLEN len;
4017			    const char *const p = SvPV(ret, len);
4018			    ret = newSVpvn_flags(p, len, SVs_TEMP);
4019			}
4020			rx = CALLREGCOMP(ret, pm_flags);
4021			if (!(SvFLAGS(ret)
4022			      & (SVs_TEMP | SVs_PADTMP | SVf_READONLY
4023				 | SVs_GMG))) {
4024			    /* This isn't a first class regexp. Instead, it's
4025			       caching a regexp onto an existing, Perl visible
4026			       scalar.  */
4027			    sv_magic(ret, MUTABLE_SV(rx), PERL_MAGIC_qr, 0, 0);
4028			}
4029			PL_regsize = osize;
4030		    }
4031		    re_sv = rx;
4032		    re = (struct regexp *)SvANY(rx);
4033		}
4034                RXp_MATCH_COPIED_off(re);
4035                re->subbeg = rex->subbeg;
4036                re->sublen = rex->sublen;
4037		rei = RXi_GET(re);
4038                DEBUG_EXECUTE_r(
4039                    debug_start_match(re_sv, do_utf8, locinput, PL_regeol,
4040                        "Matching embedded");
4041		);
4042		startpoint = rei->program + 1;
4043               	ST.close_paren = 0; /* only used for GOSUB */
4044               	/* borrowed from regtry */
4045                if (PL_reg_start_tmpl <= re->nparens) {
4046                    PL_reg_start_tmpl = re->nparens*3/2 + 3;
4047                    if(PL_reg_start_tmp)
4048                        Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
4049                    else
4050                        Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
4051                }
4052
4053        eval_recurse_doit: /* Share code with GOSUB below this line */
4054		/* run the pattern returned from (??{...}) */
4055		ST.cp = regcppush(0);	/* Save *all* the positions. */
4056		REGCP_SET(ST.lastcp);
4057
4058		PL_regoffs = re->offs; /* essentially NOOP on GOSUB */
4059
4060		/* see regtry, specifically PL_reglast(?:close)?paren is a pointer! (i dont know why) :dmq */
4061		PL_reglastparen = &re->lastparen;
4062		PL_reglastcloseparen = &re->lastcloseparen;
4063		re->lastparen = 0;
4064		re->lastcloseparen = 0;
4065
4066		PL_reginput = locinput;
4067		PL_regsize = 0;
4068
4069		/* XXXX This is too dramatic a measure... */
4070		PL_reg_maxiter = 0;
4071
4072		ST.toggle_reg_flags = PL_reg_flags;
4073		if (RX_UTF8(re_sv))
4074		    PL_reg_flags |= RF_utf8;
4075		else
4076		    PL_reg_flags &= ~RF_utf8;
4077		ST.toggle_reg_flags ^= PL_reg_flags; /* diff of old and new */
4078
4079		ST.prev_rex = rex_sv;
4080		ST.prev_curlyx = cur_curlyx;
4081		SETREX(rex_sv,re_sv);
4082		rex = re;
4083		rexi = rei;
4084		cur_curlyx = NULL;
4085		ST.B = next;
4086		ST.prev_eval = cur_eval;
4087		cur_eval = st;
4088		/* now continue from first node in postoned RE */
4089		PUSH_YES_STATE_GOTO(EVAL_AB, startpoint);
4090		/* NOTREACHED */
4091	    }
4092	    /* logical is 1,   /(?(?{...})X|Y)/ */
4093	    sw = (bool)SvTRUE(ret);
4094	    logical = 0;
4095	    break;
4096	}
4097
4098	case EVAL_AB: /* cleanup after a successful (??{A})B */
4099	    /* note: this is called twice; first after popping B, then A */
4100	    PL_reg_flags ^= ST.toggle_reg_flags;
4101	    ReREFCNT_dec(rex_sv);
4102	    SETREX(rex_sv,ST.prev_rex);
4103	    rex = (struct regexp *)SvANY(rex_sv);
4104	    rexi = RXi_GET(rex);
4105	    regcpblow(ST.cp);
4106	    cur_eval = ST.prev_eval;
4107	    cur_curlyx = ST.prev_curlyx;
4108
4109	    /* rex was changed so update the pointer in PL_reglastparen and PL_reglastcloseparen */
4110	    PL_reglastparen = &rex->lastparen;
4111	    PL_reglastcloseparen = &rex->lastcloseparen;
4112	    /* also update PL_regoffs */
4113	    PL_regoffs = rex->offs;
4114
4115	    /* XXXX This is too dramatic a measure... */
4116	    PL_reg_maxiter = 0;
4117            if ( nochange_depth )
4118	        nochange_depth--;
4119	    sayYES;
4120
4121
4122	case EVAL_AB_fail: /* unsuccessfully ran A or B in (??{A})B */
4123	    /* note: this is called twice; first after popping B, then A */
4124	    PL_reg_flags ^= ST.toggle_reg_flags;
4125	    ReREFCNT_dec(rex_sv);
4126	    SETREX(rex_sv,ST.prev_rex);
4127	    rex = (struct regexp *)SvANY(rex_sv);
4128	    rexi = RXi_GET(rex);
4129	    /* rex was changed so update the pointer in PL_reglastparen and PL_reglastcloseparen */
4130	    PL_reglastparen = &rex->lastparen;
4131	    PL_reglastcloseparen = &rex->lastcloseparen;
4132
4133	    PL_reginput = locinput;
4134	    REGCP_UNWIND(ST.lastcp);
4135	    regcppop(rex);
4136	    cur_eval = ST.prev_eval;
4137	    cur_curlyx = ST.prev_curlyx;
4138	    /* XXXX This is too dramatic a measure... */
4139	    PL_reg_maxiter = 0;
4140	    if ( nochange_depth )
4141	        nochange_depth--;
4142	    sayNO_SILENT;
4143#undef ST
4144
4145	case OPEN:
4146	    n = ARG(scan);  /* which paren pair */
4147	    PL_reg_start_tmp[n] = locinput;
4148	    if (n > PL_regsize)
4149		PL_regsize = n;
4150            lastopen = n;
4151	    break;
4152	case CLOSE:
4153	    n = ARG(scan);  /* which paren pair */
4154	    PL_regoffs[n].start = PL_reg_start_tmp[n] - PL_bostr;
4155	    PL_regoffs[n].end = locinput - PL_bostr;
4156	    /*if (n > PL_regsize)
4157		PL_regsize = n;*/
4158	    if (n > *PL_reglastparen)
4159		*PL_reglastparen = n;
4160	    *PL_reglastcloseparen = n;
4161            if (cur_eval && cur_eval->u.eval.close_paren == n) {
4162	        goto fake_end;
4163	    }
4164	    break;
4165        case ACCEPT:
4166            if (ARG(scan)){
4167                regnode *cursor;
4168                for (cursor=scan;
4169                     cursor && OP(cursor)!=END;
4170                     cursor=regnext(cursor))
4171                {
4172                    if ( OP(cursor)==CLOSE ){
4173                        n = ARG(cursor);
4174                        if ( n <= lastopen ) {
4175                            PL_regoffs[n].start
4176				= PL_reg_start_tmp[n] - PL_bostr;
4177                            PL_regoffs[n].end = locinput - PL_bostr;
4178                            /*if (n > PL_regsize)
4179                            PL_regsize = n;*/
4180                            if (n > *PL_reglastparen)
4181                                *PL_reglastparen = n;
4182                            *PL_reglastcloseparen = n;
4183                            if ( n == ARG(scan) || (cur_eval &&
4184                                cur_eval->u.eval.close_paren == n))
4185                                break;
4186                        }
4187                    }
4188                }
4189            }
4190	    goto fake_end;
4191	    /*NOTREACHED*/
4192	case GROUPP:
4193	    n = ARG(scan);  /* which paren pair */
4194	    sw = (bool)(*PL_reglastparen >= n && PL_regoffs[n].end != -1);
4195	    break;
4196	case NGROUPP:
4197	    /* reg_check_named_buff_matched returns 0 for no match */
4198	    sw = (bool)(0 < reg_check_named_buff_matched(rex,scan));
4199	    break;
4200        case INSUBP:
4201            n = ARG(scan);
4202            sw = (cur_eval && (!n || cur_eval->u.eval.close_paren == n));
4203            break;
4204        case DEFINEP:
4205            sw = 0;
4206            break;
4207	case IFTHEN:
4208	    PL_reg_leftiter = PL_reg_maxiter;		/* Void cache */
4209	    if (sw)
4210		next = NEXTOPER(NEXTOPER(scan));
4211	    else {
4212		next = scan + ARG(scan);
4213		if (OP(next) == IFTHEN) /* Fake one. */
4214		    next = NEXTOPER(NEXTOPER(next));
4215	    }
4216	    break;
4217	case LOGICAL:
4218	    logical = scan->flags;
4219	    break;
4220
4221/*******************************************************************
4222
4223The CURLYX/WHILEM pair of ops handle the most generic case of the /A*B/
4224pattern, where A and B are subpatterns. (For simple A, CURLYM or
4225STAR/PLUS/CURLY/CURLYN are used instead.)
4226
4227A*B is compiled as <CURLYX><A><WHILEM><B>
4228
4229On entry to the subpattern, CURLYX is called. This pushes a CURLYX
4230state, which contains the current count, initialised to -1. It also sets
4231cur_curlyx to point to this state, with any previous value saved in the
4232state block.
4233
4234CURLYX then jumps straight to the WHILEM op, rather than executing A,
4235since the pattern may possibly match zero times (i.e. it's a while {} loop
4236rather than a do {} while loop).
4237
4238Each entry to WHILEM represents a successful match of A. The count in the
4239CURLYX block is incremented, another WHILEM state is pushed, and execution
4240passes to A or B depending on greediness and the current count.
4241
4242For example, if matching against the string a1a2a3b (where the aN are
4243substrings that match /A/), then the match progresses as follows: (the
4244pushed states are interspersed with the bits of strings matched so far):
4245
4246    <CURLYX cnt=-1>
4247    <CURLYX cnt=0><WHILEM>
4248    <CURLYX cnt=1><WHILEM> a1 <WHILEM>
4249    <CURLYX cnt=2><WHILEM> a1 <WHILEM> a2 <WHILEM>
4250    <CURLYX cnt=3><WHILEM> a1 <WHILEM> a2 <WHILEM> a3 <WHILEM>
4251    <CURLYX cnt=3><WHILEM> a1 <WHILEM> a2 <WHILEM> a3 <WHILEM> b
4252
4253(Contrast this with something like CURLYM, which maintains only a single
4254backtrack state:
4255
4256    <CURLYM cnt=0> a1
4257    a1 <CURLYM cnt=1> a2
4258    a1 a2 <CURLYM cnt=2> a3
4259    a1 a2 a3 <CURLYM cnt=3> b
4260)
4261
4262Each WHILEM state block marks a point to backtrack to upon partial failure
4263of A or B, and also contains some minor state data related to that
4264iteration.  The CURLYX block, pointed to by cur_curlyx, contains the
4265overall state, such as the count, and pointers to the A and B ops.
4266
4267This is complicated slightly by nested CURLYX/WHILEM's. Since cur_curlyx
4268must always point to the *current* CURLYX block, the rules are:
4269
4270When executing CURLYX, save the old cur_curlyx in the CURLYX state block,
4271and set cur_curlyx to point the new block.
4272
4273When popping the CURLYX block after a successful or unsuccessful match,
4274restore the previous cur_curlyx.
4275
4276When WHILEM is about to execute B, save the current cur_curlyx, and set it
4277to the outer one saved in the CURLYX block.
4278
4279When popping the WHILEM block after a successful or unsuccessful B match,
4280restore the previous cur_curlyx.
4281
4282Here's an example for the pattern (AI* BI)*BO
4283I and O refer to inner and outer, C and W refer to CURLYX and WHILEM:
4284
4285cur_
4286curlyx backtrack stack
4287------ ---------------
4288NULL
4289CO     <CO prev=NULL> <WO>
4290CI     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai
4291CO     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi
4292NULL   <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi <WO prev=CO> bo
4293
4294At this point the pattern succeeds, and we work back down the stack to
4295clean up, restoring as we go:
4296
4297CO     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi
4298CI     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai
4299CO     <CO prev=NULL> <WO>
4300NULL
4301
4302*******************************************************************/
4303
4304#define ST st->u.curlyx
4305
4306	case CURLYX:    /* start of /A*B/  (for complex A) */
4307	{
4308	    /* No need to save/restore up to this paren */
4309	    I32 parenfloor = scan->flags;
4310
4311	    assert(next); /* keep Coverity happy */
4312	    if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
4313		next += ARG(next);
4314
4315	    /* XXXX Probably it is better to teach regpush to support
4316	       parenfloor > PL_regsize... */
4317	    if (parenfloor > (I32)*PL_reglastparen)
4318		parenfloor = *PL_reglastparen; /* Pessimization... */
4319
4320	    ST.prev_curlyx= cur_curlyx;
4321	    cur_curlyx = st;
4322	    ST.cp = PL_savestack_ix;
4323
4324	    /* these fields contain the state of the current curly.
4325	     * they are accessed by subsequent WHILEMs */
4326	    ST.parenfloor = parenfloor;
4327	    ST.min = ARG1(scan);
4328	    ST.max = ARG2(scan);
4329	    ST.A = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
4330	    ST.B = next;
4331	    ST.minmod = minmod;
4332	    minmod = 0;
4333	    ST.count = -1;	/* this will be updated by WHILEM */
4334	    ST.lastloc = NULL;  /* this will be updated by WHILEM */
4335
4336	    PL_reginput = locinput;
4337	    PUSH_YES_STATE_GOTO(CURLYX_end, PREVOPER(next));
4338	    /* NOTREACHED */
4339	}
4340
4341	case CURLYX_end: /* just finished matching all of A*B */
4342	    cur_curlyx = ST.prev_curlyx;
4343	    sayYES;
4344	    /* NOTREACHED */
4345
4346	case CURLYX_end_fail: /* just failed to match all of A*B */
4347	    regcpblow(ST.cp);
4348	    cur_curlyx = ST.prev_curlyx;
4349	    sayNO;
4350	    /* NOTREACHED */
4351
4352
4353#undef ST
4354#define ST st->u.whilem
4355
4356	case WHILEM:     /* just matched an A in /A*B/  (for complex A) */
4357	{
4358	    /* see the discussion above about CURLYX/WHILEM */
4359	    I32 n;
4360	    assert(cur_curlyx); /* keep Coverity happy */
4361	    n = ++cur_curlyx->u.curlyx.count; /* how many A's matched */
4362	    ST.save_lastloc = cur_curlyx->u.curlyx.lastloc;
4363	    ST.cache_offset = 0;
4364	    ST.cache_mask = 0;
4365
4366	    PL_reginput = locinput;
4367
4368	    DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4369		  "%*s  whilem: matched %ld out of %ld..%ld\n",
4370		  REPORT_CODE_OFF+depth*2, "", (long)n,
4371		  (long)cur_curlyx->u.curlyx.min,
4372		  (long)cur_curlyx->u.curlyx.max)
4373	    );
4374
4375	    /* First just match a string of min A's. */
4376
4377	    if (n < cur_curlyx->u.curlyx.min) {
4378		cur_curlyx->u.curlyx.lastloc = locinput;
4379		PUSH_STATE_GOTO(WHILEM_A_pre, cur_curlyx->u.curlyx.A);
4380		/* NOTREACHED */
4381	    }
4382
4383	    /* If degenerate A matches "", assume A done. */
4384
4385	    if (locinput == cur_curlyx->u.curlyx.lastloc) {
4386		DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4387		   "%*s  whilem: empty match detected, trying continuation...\n",
4388		   REPORT_CODE_OFF+depth*2, "")
4389		);
4390		goto do_whilem_B_max;
4391	    }
4392
4393	    /* super-linear cache processing */
4394
4395	    if (scan->flags) {
4396
4397		if (!PL_reg_maxiter) {
4398		    /* start the countdown: Postpone detection until we
4399		     * know the match is not *that* much linear. */
4400		    PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
4401		    /* possible overflow for long strings and many CURLYX's */
4402		    if (PL_reg_maxiter < 0)
4403			PL_reg_maxiter = I32_MAX;
4404		    PL_reg_leftiter = PL_reg_maxiter;
4405		}
4406
4407		if (PL_reg_leftiter-- == 0) {
4408		    /* initialise cache */
4409		    const I32 size = (PL_reg_maxiter + 7)/8;
4410		    if (PL_reg_poscache) {
4411			if ((I32)PL_reg_poscache_size < size) {
4412			    Renew(PL_reg_poscache, size, char);
4413			    PL_reg_poscache_size = size;
4414			}
4415			Zero(PL_reg_poscache, size, char);
4416		    }
4417		    else {
4418			PL_reg_poscache_size = size;
4419			Newxz(PL_reg_poscache, size, char);
4420		    }
4421		    DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4422      "%swhilem: Detected a super-linear match, switching on caching%s...\n",
4423			      PL_colors[4], PL_colors[5])
4424		    );
4425		}
4426
4427		if (PL_reg_leftiter < 0) {
4428		    /* have we already failed at this position? */
4429		    I32 offset, mask;
4430		    offset  = (scan->flags & 0xf) - 1
4431		  		+ (locinput - PL_bostr)  * (scan->flags>>4);
4432		    mask    = 1 << (offset % 8);
4433		    offset /= 8;
4434		    if (PL_reg_poscache[offset] & mask) {
4435			DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4436			    "%*s  whilem: (cache) already tried at this position...\n",
4437			    REPORT_CODE_OFF+depth*2, "")
4438			);
4439			sayNO; /* cache records failure */
4440		    }
4441		    ST.cache_offset = offset;
4442		    ST.cache_mask   = mask;
4443		}
4444	    }
4445
4446	    /* Prefer B over A for minimal matching. */
4447
4448	    if (cur_curlyx->u.curlyx.minmod) {
4449		ST.save_curlyx = cur_curlyx;
4450		cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
4451		ST.cp = regcppush(ST.save_curlyx->u.curlyx.parenfloor);
4452		REGCP_SET(ST.lastcp);
4453		PUSH_YES_STATE_GOTO(WHILEM_B_min, ST.save_curlyx->u.curlyx.B);
4454		/* NOTREACHED */
4455	    }
4456
4457	    /* Prefer A over B for maximal matching. */
4458
4459	    if (n < cur_curlyx->u.curlyx.max) { /* More greed allowed? */
4460		ST.cp = regcppush(cur_curlyx->u.curlyx.parenfloor);
4461		cur_curlyx->u.curlyx.lastloc = locinput;
4462		REGCP_SET(ST.lastcp);
4463		PUSH_STATE_GOTO(WHILEM_A_max, cur_curlyx->u.curlyx.A);
4464		/* NOTREACHED */
4465	    }
4466	    goto do_whilem_B_max;
4467	}
4468	/* NOTREACHED */
4469
4470	case WHILEM_B_min: /* just matched B in a minimal match */
4471	case WHILEM_B_max: /* just matched B in a maximal match */
4472	    cur_curlyx = ST.save_curlyx;
4473	    sayYES;
4474	    /* NOTREACHED */
4475
4476	case WHILEM_B_max_fail: /* just failed to match B in a maximal match */
4477	    cur_curlyx = ST.save_curlyx;
4478	    cur_curlyx->u.curlyx.lastloc = ST.save_lastloc;
4479	    cur_curlyx->u.curlyx.count--;
4480	    CACHEsayNO;
4481	    /* NOTREACHED */
4482
4483	case WHILEM_A_min_fail: /* just failed to match A in a minimal match */
4484	    REGCP_UNWIND(ST.lastcp);
4485	    regcppop(rex);
4486	    /* FALL THROUGH */
4487	case WHILEM_A_pre_fail: /* just failed to match even minimal A */
4488	    cur_curlyx->u.curlyx.lastloc = ST.save_lastloc;
4489	    cur_curlyx->u.curlyx.count--;
4490	    CACHEsayNO;
4491	    /* NOTREACHED */
4492
4493	case WHILEM_A_max_fail: /* just failed to match A in a maximal match */
4494	    REGCP_UNWIND(ST.lastcp);
4495	    regcppop(rex);	/* Restore some previous $<digit>s? */
4496	    PL_reginput = locinput;
4497	    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
4498		"%*s  whilem: failed, trying continuation...\n",
4499		REPORT_CODE_OFF+depth*2, "")
4500	    );
4501	  do_whilem_B_max:
4502	    if (cur_curlyx->u.curlyx.count >= REG_INFTY
4503		&& ckWARN(WARN_REGEXP)
4504		&& !(PL_reg_flags & RF_warned))
4505	    {
4506		PL_reg_flags |= RF_warned;
4507		Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
4508		     "Complex regular subexpression recursion",
4509		     REG_INFTY - 1);
4510	    }
4511
4512	    /* now try B */
4513	    ST.save_curlyx = cur_curlyx;
4514	    cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
4515	    PUSH_YES_STATE_GOTO(WHILEM_B_max, ST.save_curlyx->u.curlyx.B);
4516	    /* NOTREACHED */
4517
4518	case WHILEM_B_min_fail: /* just failed to match B in a minimal match */
4519	    cur_curlyx = ST.save_curlyx;
4520	    REGCP_UNWIND(ST.lastcp);
4521	    regcppop(rex);
4522
4523	    if (cur_curlyx->u.curlyx.count >= cur_curlyx->u.curlyx.max) {
4524		/* Maximum greed exceeded */
4525		if (cur_curlyx->u.curlyx.count >= REG_INFTY
4526		    && ckWARN(WARN_REGEXP)
4527		    && !(PL_reg_flags & RF_warned))
4528		{
4529		    PL_reg_flags |= RF_warned;
4530		    Perl_warner(aTHX_ packWARN(WARN_REGEXP),
4531			"%s limit (%d) exceeded",
4532			"Complex regular subexpression recursion",
4533			REG_INFTY - 1);
4534		}
4535		cur_curlyx->u.curlyx.count--;
4536		CACHEsayNO;
4537	    }
4538
4539	    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
4540		"%*s  trying longer...\n", REPORT_CODE_OFF+depth*2, "")
4541	    );
4542	    /* Try grabbing another A and see if it helps. */
4543	    PL_reginput = locinput;
4544	    cur_curlyx->u.curlyx.lastloc = locinput;
4545	    ST.cp = regcppush(cur_curlyx->u.curlyx.parenfloor);
4546	    REGCP_SET(ST.lastcp);
4547	    PUSH_STATE_GOTO(WHILEM_A_min, ST.save_curlyx->u.curlyx.A);
4548	    /* NOTREACHED */
4549
4550#undef  ST
4551#define ST st->u.branch
4552
4553	case BRANCHJ:	    /*  /(...|A|...)/ with long next pointer */
4554	    next = scan + ARG(scan);
4555	    if (next == scan)
4556		next = NULL;
4557	    scan = NEXTOPER(scan);
4558	    /* FALL THROUGH */
4559
4560	case BRANCH:	    /*  /(...|A|...)/ */
4561	    scan = NEXTOPER(scan); /* scan now points to inner node */
4562	    ST.lastparen = *PL_reglastparen;
4563	    ST.next_branch = next;
4564	    REGCP_SET(ST.cp);
4565	    PL_reginput = locinput;
4566
4567	    /* Now go into the branch */
4568	    if (has_cutgroup) {
4569	        PUSH_YES_STATE_GOTO(BRANCH_next, scan);
4570	    } else {
4571	        PUSH_STATE_GOTO(BRANCH_next, scan);
4572	    }
4573	    /* NOTREACHED */
4574        case CUTGROUP:
4575            PL_reginput = locinput;
4576            sv_yes_mark = st->u.mark.mark_name = scan->flags ? NULL :
4577                MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
4578            PUSH_STATE_GOTO(CUTGROUP_next,next);
4579            /* NOTREACHED */
4580        case CUTGROUP_next_fail:
4581            do_cutgroup = 1;
4582            no_final = 1;
4583            if (st->u.mark.mark_name)
4584                sv_commit = st->u.mark.mark_name;
4585            sayNO;
4586            /* NOTREACHED */
4587        case BRANCH_next:
4588            sayYES;
4589            /* NOTREACHED */
4590	case BRANCH_next_fail: /* that branch failed; try the next, if any */
4591	    if (do_cutgroup) {
4592	        do_cutgroup = 0;
4593	        no_final = 0;
4594	    }
4595	    REGCP_UNWIND(ST.cp);
4596	    for (n = *PL_reglastparen; n > ST.lastparen; n--)
4597		PL_regoffs[n].end = -1;
4598	    *PL_reglastparen = n;
4599	    /*dmq: *PL_reglastcloseparen = n; */
4600	    scan = ST.next_branch;
4601	    /* no more branches? */
4602	    if (!scan || (OP(scan) != BRANCH && OP(scan) != BRANCHJ)) {
4603	        DEBUG_EXECUTE_r({
4604		    PerlIO_printf( Perl_debug_log,
4605			"%*s  %sBRANCH failed...%s\n",
4606			REPORT_CODE_OFF+depth*2, "",
4607			PL_colors[4],
4608			PL_colors[5] );
4609		});
4610		sayNO_SILENT;
4611            }
4612	    continue; /* execute next BRANCH[J] op */
4613	    /* NOTREACHED */
4614
4615	case MINMOD:
4616	    minmod = 1;
4617	    break;
4618
4619#undef  ST
4620#define ST st->u.curlym
4621
4622	case CURLYM:	/* /A{m,n}B/ where A is fixed-length */
4623
4624	    /* This is an optimisation of CURLYX that enables us to push
4625	     * only a single backtracking state, no matter how many matches
4626	     * there are in {m,n}. It relies on the pattern being constant
4627	     * length, with no parens to influence future backrefs
4628	     */
4629
4630	    ST.me = scan;
4631	    scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
4632
4633	    /* if paren positive, emulate an OPEN/CLOSE around A */
4634	    if (ST.me->flags) {
4635		U32 paren = ST.me->flags;
4636		if (paren > PL_regsize)
4637		    PL_regsize = paren;
4638		if (paren > *PL_reglastparen)
4639		    *PL_reglastparen = paren;
4640		scan += NEXT_OFF(scan); /* Skip former OPEN. */
4641	    }
4642	    ST.A = scan;
4643	    ST.B = next;
4644	    ST.alen = 0;
4645	    ST.count = 0;
4646	    ST.minmod = minmod;
4647	    minmod = 0;
4648	    ST.c1 = CHRTEST_UNINIT;
4649	    REGCP_SET(ST.cp);
4650
4651	    if (!(ST.minmod ? ARG1(ST.me) : ARG2(ST.me))) /* min/max */
4652		goto curlym_do_B;
4653
4654	  curlym_do_A: /* execute the A in /A{m,n}B/  */
4655	    PL_reginput = locinput;
4656	    PUSH_YES_STATE_GOTO(CURLYM_A, ST.A); /* match A */
4657	    /* NOTREACHED */
4658
4659	case CURLYM_A: /* we've just matched an A */
4660	    locinput = st->locinput;
4661	    nextchr = UCHARAT(locinput);
4662
4663	    ST.count++;
4664	    /* after first match, determine A's length: u.curlym.alen */
4665	    if (ST.count == 1) {
4666		if (PL_reg_match_utf8) {
4667		    char *s = locinput;
4668		    while (s < PL_reginput) {
4669			ST.alen++;
4670			s += UTF8SKIP(s);
4671		    }
4672		}
4673		else {
4674		    ST.alen = PL_reginput - locinput;
4675		}
4676		if (ST.alen == 0)
4677		    ST.count = ST.minmod ? ARG1(ST.me) : ARG2(ST.me);
4678	    }
4679	    DEBUG_EXECUTE_r(
4680		PerlIO_printf(Perl_debug_log,
4681			  "%*s  CURLYM now matched %"IVdf" times, len=%"IVdf"...\n",
4682			  (int)(REPORT_CODE_OFF+(depth*2)), "",
4683			  (IV) ST.count, (IV)ST.alen)
4684	    );
4685
4686	    locinput = PL_reginput;
4687
4688	    if (cur_eval && cur_eval->u.eval.close_paren &&
4689	        cur_eval->u.eval.close_paren == (U32)ST.me->flags)
4690	        goto fake_end;
4691
4692	    {
4693		I32 max = (ST.minmod ? ARG1(ST.me) : ARG2(ST.me));
4694		if ( max == REG_INFTY || ST.count < max )
4695		    goto curlym_do_A; /* try to match another A */
4696	    }
4697	    goto curlym_do_B; /* try to match B */
4698
4699	case CURLYM_A_fail: /* just failed to match an A */
4700	    REGCP_UNWIND(ST.cp);
4701
4702	    if (ST.minmod || ST.count < ARG1(ST.me) /* min*/
4703	        || (cur_eval && cur_eval->u.eval.close_paren &&
4704	            cur_eval->u.eval.close_paren == (U32)ST.me->flags))
4705		sayNO;
4706
4707	  curlym_do_B: /* execute the B in /A{m,n}B/  */
4708	    PL_reginput = locinput;
4709	    if (ST.c1 == CHRTEST_UNINIT) {
4710		/* calculate c1 and c2 for possible match of 1st char
4711		 * following curly */
4712		ST.c1 = ST.c2 = CHRTEST_VOID;
4713		if (HAS_TEXT(ST.B) || JUMPABLE(ST.B)) {
4714		    regnode *text_node = ST.B;
4715		    if (! HAS_TEXT(text_node))
4716			FIND_NEXT_IMPT(text_node);
4717	            /* this used to be
4718
4719	                (HAS_TEXT(text_node) && PL_regkind[OP(text_node)] == EXACT)
4720
4721	            	But the former is redundant in light of the latter.
4722
4723	            	if this changes back then the macro for
4724	            	IS_TEXT and friends need to change.
4725	             */
4726		    if (PL_regkind[OP(text_node)] == EXACT)
4727		    {
4728
4729			ST.c1 = (U8)*STRING(text_node);
4730			ST.c2 =
4731			    (IS_TEXTF(text_node))
4732			    ? PL_fold[ST.c1]
4733			    : (IS_TEXTFL(text_node))
4734				? PL_fold_locale[ST.c1]
4735				: ST.c1;
4736		    }
4737		}
4738	    }
4739
4740	    DEBUG_EXECUTE_r(
4741		PerlIO_printf(Perl_debug_log,
4742		    "%*s  CURLYM trying tail with matches=%"IVdf"...\n",
4743		    (int)(REPORT_CODE_OFF+(depth*2)),
4744		    "", (IV)ST.count)
4745		);
4746	    if (ST.c1 != CHRTEST_VOID
4747		    && UCHARAT(PL_reginput) != ST.c1
4748		    && UCHARAT(PL_reginput) != ST.c2)
4749	    {
4750		/* simulate B failing */
4751		DEBUG_OPTIMISE_r(
4752		    PerlIO_printf(Perl_debug_log,
4753		        "%*s  CURLYM Fast bail c1=%"IVdf" c2=%"IVdf"\n",
4754		        (int)(REPORT_CODE_OFF+(depth*2)),"",
4755		        (IV)ST.c1,(IV)ST.c2
4756		));
4757		state_num = CURLYM_B_fail;
4758		goto reenter_switch;
4759	    }
4760
4761	    if (ST.me->flags) {
4762		/* mark current A as captured */
4763		I32 paren = ST.me->flags;
4764		if (ST.count) {
4765		    PL_regoffs[paren].start
4766			= HOPc(PL_reginput, -ST.alen) - PL_bostr;
4767		    PL_regoffs[paren].end = PL_reginput - PL_bostr;
4768		    /*dmq: *PL_reglastcloseparen = paren; */
4769		}
4770		else
4771		    PL_regoffs[paren].end = -1;
4772		if (cur_eval && cur_eval->u.eval.close_paren &&
4773		    cur_eval->u.eval.close_paren == (U32)ST.me->flags)
4774		{
4775		    if (ST.count)
4776	                goto fake_end;
4777	            else
4778	                sayNO;
4779	        }
4780	    }
4781
4782	    PUSH_STATE_GOTO(CURLYM_B, ST.B); /* match B */
4783	    /* NOTREACHED */
4784
4785	case CURLYM_B_fail: /* just failed to match a B */
4786	    REGCP_UNWIND(ST.cp);
4787	    if (ST.minmod) {
4788		I32 max = ARG2(ST.me);
4789		if (max != REG_INFTY && ST.count == max)
4790		    sayNO;
4791		goto curlym_do_A; /* try to match a further A */
4792	    }
4793	    /* backtrack one A */
4794	    if (ST.count == ARG1(ST.me) /* min */)
4795		sayNO;
4796	    ST.count--;
4797	    locinput = HOPc(locinput, -ST.alen);
4798	    goto curlym_do_B; /* try to match B */
4799
4800#undef ST
4801#define ST st->u.curly
4802
4803#define CURLY_SETPAREN(paren, success) \
4804    if (paren) { \
4805	if (success) { \
4806	    PL_regoffs[paren].start = HOPc(locinput, -1) - PL_bostr; \
4807	    PL_regoffs[paren].end = locinput - PL_bostr; \
4808	    *PL_reglastcloseparen = paren; \
4809	} \
4810	else \
4811	    PL_regoffs[paren].end = -1; \
4812    }
4813
4814	case STAR:		/*  /A*B/ where A is width 1 */
4815	    ST.paren = 0;
4816	    ST.min = 0;
4817	    ST.max = REG_INFTY;
4818	    scan = NEXTOPER(scan);
4819	    goto repeat;
4820	case PLUS:		/*  /A+B/ where A is width 1 */
4821	    ST.paren = 0;
4822	    ST.min = 1;
4823	    ST.max = REG_INFTY;
4824	    scan = NEXTOPER(scan);
4825	    goto repeat;
4826	case CURLYN:		/*  /(A){m,n}B/ where A is width 1 */
4827	    ST.paren = scan->flags;	/* Which paren to set */
4828	    if (ST.paren > PL_regsize)
4829		PL_regsize = ST.paren;
4830	    if (ST.paren > *PL_reglastparen)
4831		*PL_reglastparen = ST.paren;
4832	    ST.min = ARG1(scan);  /* min to match */
4833	    ST.max = ARG2(scan);  /* max to match */
4834	    if (cur_eval && cur_eval->u.eval.close_paren &&
4835	        cur_eval->u.eval.close_paren == (U32)ST.paren) {
4836	        ST.min=1;
4837	        ST.max=1;
4838	    }
4839            scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
4840	    goto repeat;
4841	case CURLY:		/*  /A{m,n}B/ where A is width 1 */
4842	    ST.paren = 0;
4843	    ST.min = ARG1(scan);  /* min to match */
4844	    ST.max = ARG2(scan);  /* max to match */
4845	    scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
4846	  repeat:
4847	    /*
4848	    * Lookahead to avoid useless match attempts
4849	    * when we know what character comes next.
4850	    *
4851	    * Used to only do .*x and .*?x, but now it allows
4852	    * for )'s, ('s and (?{ ... })'s to be in the way
4853	    * of the quantifier and the EXACT-like node.  -- japhy
4854	    */
4855
4856	    if (ST.min > ST.max) /* XXX make this a compile-time check? */
4857		sayNO;
4858	    if (HAS_TEXT(next) || JUMPABLE(next)) {
4859		U8 *s;
4860		regnode *text_node = next;
4861
4862		if (! HAS_TEXT(text_node))
4863		    FIND_NEXT_IMPT(text_node);
4864
4865		if (! HAS_TEXT(text_node))
4866		    ST.c1 = ST.c2 = CHRTEST_VOID;
4867		else {
4868		    if ( PL_regkind[OP(text_node)] != EXACT ) {
4869			ST.c1 = ST.c2 = CHRTEST_VOID;
4870			goto assume_ok_easy;
4871		    }
4872		    else
4873			s = (U8*)STRING(text_node);
4874
4875                    /*  Currently we only get here when
4876
4877                        PL_rekind[OP(text_node)] == EXACT
4878
4879                        if this changes back then the macro for IS_TEXT and
4880                        friends need to change. */
4881		    if (!UTF) {
4882			ST.c2 = ST.c1 = *s;
4883			if (IS_TEXTF(text_node))
4884			    ST.c2 = PL_fold[ST.c1];
4885			else if (IS_TEXTFL(text_node))
4886			    ST.c2 = PL_fold_locale[ST.c1];
4887		    }
4888		    else { /* UTF */
4889			if (IS_TEXTF(text_node)) {
4890			     STRLEN ulen1, ulen2;
4891			     U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
4892			     U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
4893
4894			     to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
4895			     to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
4896#ifdef EBCDIC
4897			     ST.c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXLEN, 0,
4898						    ckWARN(WARN_UTF8) ?
4899                                                    0 : UTF8_ALLOW_ANY);
4900			     ST.c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXLEN, 0,
4901                                                    ckWARN(WARN_UTF8) ?
4902                                                    0 : UTF8_ALLOW_ANY);
4903#else
4904			     ST.c1 = utf8n_to_uvuni(tmpbuf1, UTF8_MAXBYTES, 0,
4905						    uniflags);
4906			     ST.c2 = utf8n_to_uvuni(tmpbuf2, UTF8_MAXBYTES, 0,
4907						    uniflags);
4908#endif
4909			}
4910			else {
4911			    ST.c2 = ST.c1 = utf8n_to_uvchr(s, UTF8_MAXBYTES, 0,
4912						     uniflags);
4913			}
4914		    }
4915		}
4916	    }
4917	    else
4918		ST.c1 = ST.c2 = CHRTEST_VOID;
4919	assume_ok_easy:
4920
4921	    ST.A = scan;
4922	    ST.B = next;
4923	    PL_reginput = locinput;
4924	    if (minmod) {
4925		minmod = 0;
4926		if (ST.min && regrepeat(rex, ST.A, ST.min, depth) < ST.min)
4927		    sayNO;
4928		ST.count = ST.min;
4929		locinput = PL_reginput;
4930		REGCP_SET(ST.cp);
4931		if (ST.c1 == CHRTEST_VOID)
4932		    goto curly_try_B_min;
4933
4934		ST.oldloc = locinput;
4935
4936		/* set ST.maxpos to the furthest point along the
4937		 * string that could possibly match */
4938		if  (ST.max == REG_INFTY) {
4939		    ST.maxpos = PL_regeol - 1;
4940		    if (do_utf8)
4941			while (UTF8_IS_CONTINUATION(*(U8*)ST.maxpos))
4942			    ST.maxpos--;
4943		}
4944		else if (do_utf8) {
4945		    int m = ST.max - ST.min;
4946		    for (ST.maxpos = locinput;
4947			 m >0 && ST.maxpos + UTF8SKIP(ST.maxpos) <= PL_regeol; m--)
4948			ST.maxpos += UTF8SKIP(ST.maxpos);
4949		}
4950		else {
4951		    ST.maxpos = locinput + ST.max - ST.min;
4952		    if (ST.maxpos >= PL_regeol)
4953			ST.maxpos = PL_regeol - 1;
4954		}
4955		goto curly_try_B_min_known;
4956
4957	    }
4958	    else {
4959		ST.count = regrepeat(rex, ST.A, ST.max, depth);
4960		locinput = PL_reginput;
4961		if (ST.count < ST.min)
4962		    sayNO;
4963		if ((ST.count > ST.min)
4964		    && (PL_regkind[OP(ST.B)] == EOL) && (OP(ST.B) != MEOL))
4965		{
4966		    /* A{m,n} must come at the end of the string, there's
4967		     * no point in backing off ... */
4968		    ST.min = ST.count;
4969		    /* ...except that $ and \Z can match before *and* after
4970		       newline at the end.  Consider "\n\n" =~ /\n+\Z\n/.
4971		       We may back off by one in this case. */
4972		    if (UCHARAT(PL_reginput - 1) == '\n' && OP(ST.B) != EOS)
4973			ST.min--;
4974		}
4975		REGCP_SET(ST.cp);
4976		goto curly_try_B_max;
4977	    }
4978	    /* NOTREACHED */
4979
4980
4981	case CURLY_B_min_known_fail:
4982	    /* failed to find B in a non-greedy match where c1,c2 valid */
4983	    if (ST.paren && ST.count)
4984		PL_regoffs[ST.paren].end = -1;
4985
4986	    PL_reginput = locinput;	/* Could be reset... */
4987	    REGCP_UNWIND(ST.cp);
4988	    /* Couldn't or didn't -- move forward. */
4989	    ST.oldloc = locinput;
4990	    if (do_utf8)
4991		locinput += UTF8SKIP(locinput);
4992	    else
4993		locinput++;
4994	    ST.count++;
4995	  curly_try_B_min_known:
4996	     /* find the next place where 'B' could work, then call B */
4997	    {
4998		int n;
4999		if (do_utf8) {
5000		    n = (ST.oldloc == locinput) ? 0 : 1;
5001		    if (ST.c1 == ST.c2) {
5002			STRLEN len;
5003			/* set n to utf8_distance(oldloc, locinput) */
5004			while (locinput <= ST.maxpos &&
5005			       utf8n_to_uvchr((U8*)locinput,
5006					      UTF8_MAXBYTES, &len,
5007					      uniflags) != (UV)ST.c1) {
5008			    locinput += len;
5009			    n++;
5010			}
5011		    }
5012		    else {
5013			/* set n to utf8_distance(oldloc, locinput) */
5014			while (locinput <= ST.maxpos) {
5015			    STRLEN len;
5016			    const UV c = utf8n_to_uvchr((U8*)locinput,
5017						  UTF8_MAXBYTES, &len,
5018						  uniflags);
5019			    if (c == (UV)ST.c1 || c == (UV)ST.c2)
5020				break;
5021			    locinput += len;
5022			    n++;
5023			}
5024		    }
5025		}
5026		else {
5027		    if (ST.c1 == ST.c2) {
5028			while (locinput <= ST.maxpos &&
5029			       UCHARAT(locinput) != ST.c1)
5030			    locinput++;
5031		    }
5032		    else {
5033			while (locinput <= ST.maxpos
5034			       && UCHARAT(locinput) != ST.c1
5035			       && UCHARAT(locinput) != ST.c2)
5036			    locinput++;
5037		    }
5038		    n = locinput - ST.oldloc;
5039		}
5040		if (locinput > ST.maxpos)
5041		    sayNO;
5042		/* PL_reginput == oldloc now */
5043		if (n) {
5044		    ST.count += n;
5045		    if (regrepeat(rex, ST.A, n, depth) < n)
5046			sayNO;
5047		}
5048		PL_reginput = locinput;
5049		CURLY_SETPAREN(ST.paren, ST.count);
5050		if (cur_eval && cur_eval->u.eval.close_paren &&
5051		    cur_eval->u.eval.close_paren == (U32)ST.paren) {
5052		    goto fake_end;
5053	        }
5054		PUSH_STATE_GOTO(CURLY_B_min_known, ST.B);
5055	    }
5056	    /* NOTREACHED */
5057
5058
5059	case CURLY_B_min_fail:
5060	    /* failed to find B in a non-greedy match where c1,c2 invalid */
5061	    if (ST.paren && ST.count)
5062		PL_regoffs[ST.paren].end = -1;
5063
5064	    REGCP_UNWIND(ST.cp);
5065	    /* failed -- move forward one */
5066	    PL_reginput = locinput;
5067	    if (regrepeat(rex, ST.A, 1, depth)) {
5068		ST.count++;
5069		locinput = PL_reginput;
5070		if (ST.count <= ST.max || (ST.max == REG_INFTY &&
5071			ST.count > 0)) /* count overflow ? */
5072		{
5073		  curly_try_B_min:
5074		    CURLY_SETPAREN(ST.paren, ST.count);
5075		    if (cur_eval && cur_eval->u.eval.close_paren &&
5076		        cur_eval->u.eval.close_paren == (U32)ST.paren) {
5077                        goto fake_end;
5078                    }
5079		    PUSH_STATE_GOTO(CURLY_B_min, ST.B);
5080		}
5081	    }
5082	    sayNO;
5083	    /* NOTREACHED */
5084
5085
5086	curly_try_B_max:
5087	    /* a successful greedy match: now try to match B */
5088            if (cur_eval && cur_eval->u.eval.close_paren &&
5089                cur_eval->u.eval.close_paren == (U32)ST.paren) {
5090                goto fake_end;
5091            }
5092	    {
5093		UV c = 0;
5094		if (ST.c1 != CHRTEST_VOID)
5095		    c = do_utf8 ? utf8n_to_uvchr((U8*)PL_reginput,
5096					   UTF8_MAXBYTES, 0, uniflags)
5097				: (UV) UCHARAT(PL_reginput);
5098		/* If it could work, try it. */
5099		if (ST.c1 == CHRTEST_VOID || c == (UV)ST.c1 || c == (UV)ST.c2) {
5100		    CURLY_SETPAREN(ST.paren, ST.count);
5101		    PUSH_STATE_GOTO(CURLY_B_max, ST.B);
5102		    /* NOTREACHED */
5103		}
5104	    }
5105	    /* FALL THROUGH */
5106	case CURLY_B_max_fail:
5107	    /* failed to find B in a greedy match */
5108	    if (ST.paren && ST.count)
5109		PL_regoffs[ST.paren].end = -1;
5110
5111	    REGCP_UNWIND(ST.cp);
5112	    /*  back up. */
5113	    if (--ST.count < ST.min)
5114		sayNO;
5115	    PL_reginput = locinput = HOPc(locinput, -1);
5116	    goto curly_try_B_max;
5117
5118#undef ST
5119
5120	case END:
5121	    fake_end:
5122	    if (cur_eval) {
5123		/* we've just finished A in /(??{A})B/; now continue with B */
5124		I32 tmpix;
5125		st->u.eval.toggle_reg_flags
5126			    = cur_eval->u.eval.toggle_reg_flags;
5127		PL_reg_flags ^= st->u.eval.toggle_reg_flags;
5128
5129		st->u.eval.prev_rex = rex_sv;		/* inner */
5130		SETREX(rex_sv,cur_eval->u.eval.prev_rex);
5131		rex = (struct regexp *)SvANY(rex_sv);
5132		rexi = RXi_GET(rex);
5133		cur_curlyx = cur_eval->u.eval.prev_curlyx;
5134		ReREFCNT_inc(rex_sv);
5135		st->u.eval.cp = regcppush(0);	/* Save *all* the positions. */
5136
5137		/* rex was changed so update the pointer in PL_reglastparen and PL_reglastcloseparen */
5138		PL_reglastparen = &rex->lastparen;
5139		PL_reglastcloseparen = &rex->lastcloseparen;
5140
5141		REGCP_SET(st->u.eval.lastcp);
5142		PL_reginput = locinput;
5143
5144		/* Restore parens of the outer rex without popping the
5145		 * savestack */
5146		tmpix = PL_savestack_ix;
5147		PL_savestack_ix = cur_eval->u.eval.lastcp;
5148		regcppop(rex);
5149		PL_savestack_ix = tmpix;
5150
5151		st->u.eval.prev_eval = cur_eval;
5152		cur_eval = cur_eval->u.eval.prev_eval;
5153		DEBUG_EXECUTE_r(
5154		    PerlIO_printf(Perl_debug_log, "%*s  EVAL trying tail ... %"UVxf"\n",
5155				      REPORT_CODE_OFF+depth*2, "",PTR2UV(cur_eval)););
5156                if ( nochange_depth )
5157	            nochange_depth--;
5158
5159                PUSH_YES_STATE_GOTO(EVAL_AB,
5160			st->u.eval.prev_eval->u.eval.B); /* match B */
5161	    }
5162
5163	    if (locinput < reginfo->till) {
5164		DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
5165				      "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
5166				      PL_colors[4],
5167				      (long)(locinput - PL_reg_starttry),
5168				      (long)(reginfo->till - PL_reg_starttry),
5169				      PL_colors[5]));
5170
5171		sayNO_SILENT;		/* Cannot match: too short. */
5172	    }
5173	    PL_reginput = locinput;	/* put where regtry can find it */
5174	    sayYES;			/* Success! */
5175
5176	case SUCCEED: /* successful SUSPEND/UNLESSM/IFMATCH/CURLYM */
5177	    DEBUG_EXECUTE_r(
5178	    PerlIO_printf(Perl_debug_log,
5179		"%*s  %ssubpattern success...%s\n",
5180		REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5]));
5181	    PL_reginput = locinput;	/* put where regtry can find it */
5182	    sayYES;			/* Success! */
5183
5184#undef  ST
5185#define ST st->u.ifmatch
5186
5187	case SUSPEND:	/* (?>A) */
5188	    ST.wanted = 1;
5189	    PL_reginput = locinput;
5190	    goto do_ifmatch;
5191
5192	case UNLESSM:	/* -ve lookaround: (?!A), or with flags, (?<!A) */
5193	    ST.wanted = 0;
5194	    goto ifmatch_trivial_fail_test;
5195
5196	case IFMATCH:	/* +ve lookaround: (?=A), or with flags, (?<=A) */
5197	    ST.wanted = 1;
5198	  ifmatch_trivial_fail_test:
5199	    if (scan->flags) {
5200		char * const s = HOPBACKc(locinput, scan->flags);
5201		if (!s) {
5202		    /* trivial fail */
5203		    if (logical) {
5204			logical = 0;
5205			sw = 1 - (bool)ST.wanted;
5206		    }
5207		    else if (ST.wanted)
5208			sayNO;
5209		    next = scan + ARG(scan);
5210		    if (next == scan)
5211			next = NULL;
5212		    break;
5213		}
5214		PL_reginput = s;
5215	    }
5216	    else
5217		PL_reginput = locinput;
5218
5219	  do_ifmatch:
5220	    ST.me = scan;
5221	    ST.logical = logical;
5222	    logical = 0; /* XXX: reset state of logical once it has been saved into ST */
5223
5224	    /* execute body of (?...A) */
5225	    PUSH_YES_STATE_GOTO(IFMATCH_A, NEXTOPER(NEXTOPER(scan)));
5226	    /* NOTREACHED */
5227
5228	case IFMATCH_A_fail: /* body of (?...A) failed */
5229	    ST.wanted = !ST.wanted;
5230	    /* FALL THROUGH */
5231
5232	case IFMATCH_A: /* body of (?...A) succeeded */
5233	    if (ST.logical) {
5234		sw = (bool)ST.wanted;
5235	    }
5236	    else if (!ST.wanted)
5237		sayNO;
5238
5239	    if (OP(ST.me) == SUSPEND)
5240		locinput = PL_reginput;
5241	    else {
5242		locinput = PL_reginput = st->locinput;
5243		nextchr = UCHARAT(locinput);
5244	    }
5245	    scan = ST.me + ARG(ST.me);
5246	    if (scan == ST.me)
5247		scan = NULL;
5248	    continue; /* execute B */
5249
5250#undef ST
5251
5252	case LONGJMP:
5253	    next = scan + ARG(scan);
5254	    if (next == scan)
5255		next = NULL;
5256	    break;
5257	case COMMIT:
5258	    reginfo->cutpoint = PL_regeol;
5259	    /* FALLTHROUGH */
5260	case PRUNE:
5261	    PL_reginput = locinput;
5262	    if (!scan->flags)
5263	        sv_yes_mark = sv_commit = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
5264	    PUSH_STATE_GOTO(COMMIT_next,next);
5265	    /* NOTREACHED */
5266	case COMMIT_next_fail:
5267	    no_final = 1;
5268	    /* FALLTHROUGH */
5269	case OPFAIL:
5270	    sayNO;
5271	    /* NOTREACHED */
5272
5273#define ST st->u.mark
5274        case MARKPOINT:
5275            ST.prev_mark = mark_state;
5276            ST.mark_name = sv_commit = sv_yes_mark
5277                = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
5278            mark_state = st;
5279            ST.mark_loc = PL_reginput = locinput;
5280            PUSH_YES_STATE_GOTO(MARKPOINT_next,next);
5281            /* NOTREACHED */
5282        case MARKPOINT_next:
5283            mark_state = ST.prev_mark;
5284            sayYES;
5285            /* NOTREACHED */
5286        case MARKPOINT_next_fail:
5287            if (popmark && sv_eq(ST.mark_name,popmark))
5288            {
5289                if (ST.mark_loc > startpoint)
5290	            reginfo->cutpoint = HOPBACKc(ST.mark_loc, 1);
5291                popmark = NULL; /* we found our mark */
5292                sv_commit = ST.mark_name;
5293
5294                DEBUG_EXECUTE_r({
5295                        PerlIO_printf(Perl_debug_log,
5296		            "%*s  %ssetting cutpoint to mark:%"SVf"...%s\n",
5297		            REPORT_CODE_OFF+depth*2, "",
5298		            PL_colors[4], SVfARG(sv_commit), PL_colors[5]);
5299		});
5300            }
5301            mark_state = ST.prev_mark;
5302            sv_yes_mark = mark_state ?
5303                mark_state->u.mark.mark_name : NULL;
5304            sayNO;
5305            /* NOTREACHED */
5306        case SKIP:
5307            PL_reginput = locinput;
5308            if (scan->flags) {
5309                /* (*SKIP) : if we fail we cut here*/
5310                ST.mark_name = NULL;
5311                ST.mark_loc = locinput;
5312                PUSH_STATE_GOTO(SKIP_next,next);
5313            } else {
5314                /* (*SKIP:NAME) : if there is a (*MARK:NAME) fail where it was,
5315                   otherwise do nothing.  Meaning we need to scan
5316                 */
5317                regmatch_state *cur = mark_state;
5318                SV *find = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
5319
5320                while (cur) {
5321                    if ( sv_eq( cur->u.mark.mark_name,
5322                                find ) )
5323                    {
5324                        ST.mark_name = find;
5325                        PUSH_STATE_GOTO( SKIP_next, next );
5326                    }
5327                    cur = cur->u.mark.prev_mark;
5328                }
5329            }
5330            /* Didn't find our (*MARK:NAME) so ignore this (*SKIP:NAME) */
5331            break;
5332	case SKIP_next_fail:
5333	    if (ST.mark_name) {
5334	        /* (*CUT:NAME) - Set up to search for the name as we
5335	           collapse the stack*/
5336	        popmark = ST.mark_name;
5337	    } else {
5338	        /* (*CUT) - No name, we cut here.*/
5339	        if (ST.mark_loc > startpoint)
5340	            reginfo->cutpoint = HOPBACKc(ST.mark_loc, 1);
5341	        /* but we set sv_commit to latest mark_name if there
5342	           is one so they can test to see how things lead to this
5343	           cut */
5344                if (mark_state)
5345                    sv_commit=mark_state->u.mark.mark_name;
5346            }
5347            no_final = 1;
5348            sayNO;
5349            /* NOTREACHED */
5350#undef ST
5351        case FOLDCHAR:
5352            n = ARG(scan);
5353            if ( n == (U32)what_len_TRICKYFOLD(locinput,do_utf8,ln) ) {
5354                locinput += ln;
5355            } else if ( 0xDF == n && !do_utf8 && !UTF ) {
5356                sayNO;
5357            } else  {
5358                U8 folded[UTF8_MAXBYTES_CASE+1];
5359                STRLEN foldlen;
5360                const char * const l = locinput;
5361                char *e = PL_regeol;
5362                to_uni_fold(n, folded, &foldlen);
5363
5364		if (ibcmp_utf8((const char*) folded, 0,  foldlen, 1,
5365                	       l, &e, 0,  do_utf8)) {
5366                        sayNO;
5367                }
5368                locinput = e;
5369            }
5370            nextchr = UCHARAT(locinput);
5371            break;
5372        case LNBREAK:
5373            if ((n=is_LNBREAK(locinput,do_utf8))) {
5374                locinput += n;
5375                nextchr = UCHARAT(locinput);
5376            } else
5377                sayNO;
5378            break;
5379
5380#define CASE_CLASS(nAmE)                              \
5381        case nAmE:                                    \
5382            if ((n=is_##nAmE(locinput,do_utf8))) {    \
5383                locinput += n;                        \
5384                nextchr = UCHARAT(locinput);          \
5385            } else                                    \
5386                sayNO;                                \
5387            break;                                    \
5388        case N##nAmE:                                 \
5389            if ((n=is_##nAmE(locinput,do_utf8))) {    \
5390                sayNO;                                \
5391            } else {                                  \
5392                locinput += UTF8SKIP(locinput);       \
5393                nextchr = UCHARAT(locinput);          \
5394            }                                         \
5395            break
5396
5397        CASE_CLASS(VERTWS);
5398        CASE_CLASS(HORIZWS);
5399#undef CASE_CLASS
5400
5401	default:
5402	    PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
5403			  PTR2UV(scan), OP(scan));
5404	    Perl_croak(aTHX_ "regexp memory corruption");
5405
5406	} /* end switch */
5407
5408        /* switch break jumps here */
5409	scan = next; /* prepare to execute the next op and ... */
5410	continue;    /* ... jump back to the top, reusing st */
5411	/* NOTREACHED */
5412
5413      push_yes_state:
5414	/* push a state that backtracks on success */
5415	st->u.yes.prev_yes_state = yes_state;
5416	yes_state = st;
5417	/* FALL THROUGH */
5418      push_state:
5419	/* push a new regex state, then continue at scan  */
5420	{
5421	    regmatch_state *newst;
5422
5423	    DEBUG_STACK_r({
5424	        regmatch_state *cur = st;
5425	        regmatch_state *curyes = yes_state;
5426	        int curd = depth;
5427	        regmatch_slab *slab = PL_regmatch_slab;
5428                for (;curd > -1;cur--,curd--) {
5429                    if (cur < SLAB_FIRST(slab)) {
5430                	slab = slab->prev;
5431                	cur = SLAB_LAST(slab);
5432                    }
5433                    PerlIO_printf(Perl_error_log, "%*s#%-3d %-10s %s\n",
5434                        REPORT_CODE_OFF + 2 + depth * 2,"",
5435                        curd, PL_reg_name[cur->resume_state],
5436                        (curyes == cur) ? "yes" : ""
5437                    );
5438                    if (curyes == cur)
5439	                curyes = cur->u.yes.prev_yes_state;
5440                }
5441            } else
5442                DEBUG_STATE_pp("push")
5443            );
5444	    depth++;
5445	    st->locinput = locinput;
5446	    newst = st+1;
5447	    if (newst >  SLAB_LAST(PL_regmatch_slab))
5448		newst = S_push_slab(aTHX);
5449	    PL_regmatch_state = newst;
5450
5451	    locinput = PL_reginput;
5452	    nextchr = UCHARAT(locinput);
5453	    st = newst;
5454	    continue;
5455	    /* NOTREACHED */
5456	}
5457    }
5458
5459    /*
5460    * We get here only if there's trouble -- normally "case END" is
5461    * the terminating point.
5462    */
5463    Perl_croak(aTHX_ "corrupted regexp pointers");
5464    /*NOTREACHED*/
5465    sayNO;
5466
5467yes:
5468    if (yes_state) {
5469	/* we have successfully completed a subexpression, but we must now
5470	 * pop to the state marked by yes_state and continue from there */
5471	assert(st != yes_state);
5472#ifdef DEBUGGING
5473	while (st != yes_state) {
5474	    st--;
5475	    if (st < SLAB_FIRST(PL_regmatch_slab)) {
5476		PL_regmatch_slab = PL_regmatch_slab->prev;
5477		st = SLAB_LAST(PL_regmatch_slab);
5478	    }
5479	    DEBUG_STATE_r({
5480	        if (no_final) {
5481	            DEBUG_STATE_pp("pop (no final)");
5482	        } else {
5483	            DEBUG_STATE_pp("pop (yes)");
5484	        }
5485	    });
5486	    depth--;
5487	}
5488#else
5489	while (yes_state < SLAB_FIRST(PL_regmatch_slab)
5490	    || yes_state > SLAB_LAST(PL_regmatch_slab))
5491	{
5492	    /* not in this slab, pop slab */
5493	    depth -= (st - SLAB_FIRST(PL_regmatch_slab) + 1);
5494	    PL_regmatch_slab = PL_regmatch_slab->prev;
5495	    st = SLAB_LAST(PL_regmatch_slab);
5496	}
5497	depth -= (st - yes_state);
5498#endif
5499	st = yes_state;
5500	yes_state = st->u.yes.prev_yes_state;
5501	PL_regmatch_state = st;
5502
5503        if (no_final) {
5504            locinput= st->locinput;
5505            nextchr = UCHARAT(locinput);
5506        }
5507	state_num = st->resume_state + no_final;
5508	goto reenter_switch;
5509    }
5510
5511    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
5512			  PL_colors[4], PL_colors[5]));
5513
5514    if (PL_reg_eval_set) {
5515	/* each successfully executed (?{...}) block does the equivalent of
5516	 *   local $^R = do {...}
5517	 * When popping the save stack, all these locals would be undone;
5518	 * bypass this by setting the outermost saved $^R to the latest
5519	 * value */
5520	if (oreplsv != GvSV(PL_replgv))
5521	    sv_setsv(oreplsv, GvSV(PL_replgv));
5522    }
5523    result = 1;
5524    goto final_exit;
5525
5526no:
5527    DEBUG_EXECUTE_r(
5528	PerlIO_printf(Perl_debug_log,
5529            "%*s  %sfailed...%s\n",
5530            REPORT_CODE_OFF+depth*2, "",
5531            PL_colors[4], PL_colors[5])
5532	);
5533
5534no_silent:
5535    if (no_final) {
5536        if (yes_state) {
5537            goto yes;
5538        } else {
5539            goto final_exit;
5540        }
5541    }
5542    if (depth) {
5543	/* there's a previous state to backtrack to */
5544	st--;
5545	if (st < SLAB_FIRST(PL_regmatch_slab)) {
5546	    PL_regmatch_slab = PL_regmatch_slab->prev;
5547	    st = SLAB_LAST(PL_regmatch_slab);
5548	}
5549	PL_regmatch_state = st;
5550	locinput= st->locinput;
5551	nextchr = UCHARAT(locinput);
5552
5553	DEBUG_STATE_pp("pop");
5554	depth--;
5555	if (yes_state == st)
5556	    yes_state = st->u.yes.prev_yes_state;
5557
5558	state_num = st->resume_state + 1; /* failure = success + 1 */
5559	goto reenter_switch;
5560    }
5561    result = 0;
5562
5563  final_exit:
5564    if (rex->intflags & PREGf_VERBARG_SEEN) {
5565        SV *sv_err = get_sv("REGERROR", 1);
5566        SV *sv_mrk = get_sv("REGMARK", 1);
5567        if (result) {
5568            sv_commit = &PL_sv_no;
5569            if (!sv_yes_mark)
5570                sv_yes_mark = &PL_sv_yes;
5571        } else {
5572            if (!sv_commit)
5573                sv_commit = &PL_sv_yes;
5574            sv_yes_mark = &PL_sv_no;
5575        }
5576        sv_setsv(sv_err, sv_commit);
5577        sv_setsv(sv_mrk, sv_yes_mark);
5578    }
5579
5580    /* clean up; in particular, free all slabs above current one */
5581    LEAVE_SCOPE(oldsave);
5582
5583    return result;
5584}
5585
5586/*
5587 - regrepeat - repeatedly match something simple, report how many
5588 */
5589/*
5590 * [This routine now assumes that it will only match on things of length 1.
5591 * That was true before, but now we assume scan - reginput is the count,
5592 * rather than incrementing count on every character.  [Er, except utf8.]]
5593 */
5594STATIC I32
5595S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth)
5596{
5597    dVAR;
5598    register char *scan;
5599    register I32 c;
5600    register char *loceol = PL_regeol;
5601    register I32 hardcount = 0;
5602    register bool do_utf8 = PL_reg_match_utf8;
5603#ifndef DEBUGGING
5604    PERL_UNUSED_ARG(depth);
5605#endif
5606
5607    PERL_ARGS_ASSERT_REGREPEAT;
5608
5609    scan = PL_reginput;
5610    if (max == REG_INFTY)
5611	max = I32_MAX;
5612    else if (max < loceol - scan)
5613	loceol = scan + max;
5614    switch (OP(p)) {
5615    case REG_ANY:
5616	if (do_utf8) {
5617	    loceol = PL_regeol;
5618	    while (scan < loceol && hardcount < max && *scan != '\n') {
5619		scan += UTF8SKIP(scan);
5620		hardcount++;
5621	    }
5622	} else {
5623	    while (scan < loceol && *scan != '\n')
5624		scan++;
5625	}
5626	break;
5627    case SANY:
5628        if (do_utf8) {
5629	    loceol = PL_regeol;
5630	    while (scan < loceol && hardcount < max) {
5631	        scan += UTF8SKIP(scan);
5632		hardcount++;
5633	    }
5634	}
5635	else
5636	    scan = loceol;
5637	break;
5638    case CANY:
5639	scan = loceol;
5640	break;
5641    case EXACT:		/* length of string is 1 */
5642	c = (U8)*STRING(p);
5643	while (scan < loceol && UCHARAT(scan) == c)
5644	    scan++;
5645	break;
5646    case EXACTF:	/* length of string is 1 */
5647	c = (U8)*STRING(p);
5648	while (scan < loceol &&
5649	       (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
5650	    scan++;
5651	break;
5652    case EXACTFL:	/* length of string is 1 */
5653	PL_reg_flags |= RF_tainted;
5654	c = (U8)*STRING(p);
5655	while (scan < loceol &&
5656	       (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
5657	    scan++;
5658	break;
5659    case ANYOF:
5660	if (do_utf8) {
5661	    loceol = PL_regeol;
5662	    while (hardcount < max && scan < loceol &&
5663		   reginclass(prog, p, (U8*)scan, 0, do_utf8)) {
5664		scan += UTF8SKIP(scan);
5665		hardcount++;
5666	    }
5667	} else {
5668	    while (scan < loceol && REGINCLASS(prog, p, (U8*)scan))
5669		scan++;
5670	}
5671	break;
5672    case ALNUM:
5673	if (do_utf8) {
5674	    loceol = PL_regeol;
5675	    LOAD_UTF8_CHARCLASS_ALNUM();
5676	    while (hardcount < max && scan < loceol &&
5677		   swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
5678		scan += UTF8SKIP(scan);
5679		hardcount++;
5680	    }
5681	} else {
5682	    while (scan < loceol && isALNUM(*scan))
5683		scan++;
5684	}
5685	break;
5686    case ALNUML:
5687	PL_reg_flags |= RF_tainted;
5688	if (do_utf8) {
5689	    loceol = PL_regeol;
5690	    while (hardcount < max && scan < loceol &&
5691		   isALNUM_LC_utf8((U8*)scan)) {
5692		scan += UTF8SKIP(scan);
5693		hardcount++;
5694	    }
5695	} else {
5696	    while (scan < loceol && isALNUM_LC(*scan))
5697		scan++;
5698	}
5699	break;
5700    case NALNUM:
5701	if (do_utf8) {
5702	    loceol = PL_regeol;
5703	    LOAD_UTF8_CHARCLASS_ALNUM();
5704	    while (hardcount < max && scan < loceol &&
5705		   !swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
5706		scan += UTF8SKIP(scan);
5707		hardcount++;
5708	    }
5709	} else {
5710	    while (scan < loceol && !isALNUM(*scan))
5711		scan++;
5712	}
5713	break;
5714    case NALNUML:
5715	PL_reg_flags |= RF_tainted;
5716	if (do_utf8) {
5717	    loceol = PL_regeol;
5718	    while (hardcount < max && scan < loceol &&
5719		   !isALNUM_LC_utf8((U8*)scan)) {
5720		scan += UTF8SKIP(scan);
5721		hardcount++;
5722	    }
5723	} else {
5724	    while (scan < loceol && !isALNUM_LC(*scan))
5725		scan++;
5726	}
5727	break;
5728    case SPACE:
5729	if (do_utf8) {
5730	    loceol = PL_regeol;
5731	    LOAD_UTF8_CHARCLASS_SPACE();
5732	    while (hardcount < max && scan < loceol &&
5733		   (*scan == ' ' ||
5734		    swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
5735		scan += UTF8SKIP(scan);
5736		hardcount++;
5737	    }
5738	} else {
5739	    while (scan < loceol && isSPACE(*scan))
5740		scan++;
5741	}
5742	break;
5743    case SPACEL:
5744	PL_reg_flags |= RF_tainted;
5745	if (do_utf8) {
5746	    loceol = PL_regeol;
5747	    while (hardcount < max && scan < loceol &&
5748		   (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
5749		scan += UTF8SKIP(scan);
5750		hardcount++;
5751	    }
5752	} else {
5753	    while (scan < loceol && isSPACE_LC(*scan))
5754		scan++;
5755	}
5756	break;
5757    case NSPACE:
5758	if (do_utf8) {
5759	    loceol = PL_regeol;
5760	    LOAD_UTF8_CHARCLASS_SPACE();
5761	    while (hardcount < max && scan < loceol &&
5762		   !(*scan == ' ' ||
5763		     swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
5764		scan += UTF8SKIP(scan);
5765		hardcount++;
5766	    }
5767	} else {
5768	    while (scan < loceol && !isSPACE(*scan))
5769		scan++;
5770	}
5771	break;
5772    case NSPACEL:
5773	PL_reg_flags |= RF_tainted;
5774	if (do_utf8) {
5775	    loceol = PL_regeol;
5776	    while (hardcount < max && scan < loceol &&
5777		   !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
5778		scan += UTF8SKIP(scan);
5779		hardcount++;
5780	    }
5781	} else {
5782	    while (scan < loceol && !isSPACE_LC(*scan))
5783		scan++;
5784	}
5785	break;
5786    case DIGIT:
5787	if (do_utf8) {
5788	    loceol = PL_regeol;
5789	    LOAD_UTF8_CHARCLASS_DIGIT();
5790	    while (hardcount < max && scan < loceol &&
5791		   swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
5792		scan += UTF8SKIP(scan);
5793		hardcount++;
5794	    }
5795	} else {
5796	    while (scan < loceol && isDIGIT(*scan))
5797		scan++;
5798	}
5799	break;
5800    case NDIGIT:
5801	if (do_utf8) {
5802	    loceol = PL_regeol;
5803	    LOAD_UTF8_CHARCLASS_DIGIT();
5804	    while (hardcount < max && scan < loceol &&
5805		   !swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
5806		scan += UTF8SKIP(scan);
5807		hardcount++;
5808	    }
5809	} else {
5810	    while (scan < loceol && !isDIGIT(*scan))
5811		scan++;
5812	}
5813    case LNBREAK:
5814        if (do_utf8) {
5815	    loceol = PL_regeol;
5816	    while (hardcount < max && scan < loceol && (c=is_LNBREAK_utf8(scan))) {
5817		scan += c;
5818		hardcount++;
5819	    }
5820	} else {
5821	    /*
5822	      LNBREAK can match two latin chars, which is ok,
5823	      because we have a null terminated string, but we
5824	      have to use hardcount in this situation
5825	    */
5826	    while (scan < loceol && (c=is_LNBREAK_latin1(scan)))  {
5827		scan+=c;
5828		hardcount++;
5829	    }
5830	}
5831	break;
5832    case HORIZWS:
5833        if (do_utf8) {
5834	    loceol = PL_regeol;
5835	    while (hardcount < max && scan < loceol && (c=is_HORIZWS_utf8(scan))) {
5836		scan += c;
5837		hardcount++;
5838	    }
5839	} else {
5840	    while (scan < loceol && is_HORIZWS_latin1(scan))
5841		scan++;
5842	}
5843	break;
5844    case NHORIZWS:
5845        if (do_utf8) {
5846	    loceol = PL_regeol;
5847	    while (hardcount < max && scan < loceol && !is_HORIZWS_utf8(scan)) {
5848		scan += UTF8SKIP(scan);
5849		hardcount++;
5850	    }
5851	} else {
5852	    while (scan < loceol && !is_HORIZWS_latin1(scan))
5853		scan++;
5854
5855	}
5856	break;
5857    case VERTWS:
5858        if (do_utf8) {
5859	    loceol = PL_regeol;
5860	    while (hardcount < max && scan < loceol && (c=is_VERTWS_utf8(scan))) {
5861		scan += c;
5862		hardcount++;
5863	    }
5864	} else {
5865	    while (scan < loceol && is_VERTWS_latin1(scan))
5866		scan++;
5867
5868	}
5869	break;
5870    case NVERTWS:
5871        if (do_utf8) {
5872	    loceol = PL_regeol;
5873	    while (hardcount < max && scan < loceol && !is_VERTWS_utf8(scan)) {
5874		scan += UTF8SKIP(scan);
5875		hardcount++;
5876	    }
5877	} else {
5878	    while (scan < loceol && !is_VERTWS_latin1(scan))
5879		scan++;
5880
5881	}
5882	break;
5883
5884    default:		/* Called on something of 0 width. */
5885	break;		/* So match right here or not at all. */
5886    }
5887
5888    if (hardcount)
5889	c = hardcount;
5890    else
5891	c = scan - PL_reginput;
5892    PL_reginput = scan;
5893
5894    DEBUG_r({
5895	GET_RE_DEBUG_FLAGS_DECL;
5896	DEBUG_EXECUTE_r({
5897	    SV * const prop = sv_newmortal();
5898	    regprop(prog, prop, p);
5899	    PerlIO_printf(Perl_debug_log,
5900			"%*s  %s can match %"IVdf" times out of %"IVdf"...\n",
5901			REPORT_CODE_OFF + depth*2, "", SvPVX_const(prop),(IV)c,(IV)max);
5902	});
5903    });
5904
5905    return(c);
5906}
5907
5908
5909#if !defined(PERL_IN_XSUB_RE) || defined(PLUGGABLE_RE_EXTENSION)
5910/*
5911- regclass_swash - prepare the utf8 swash
5912*/
5913
5914SV *
5915Perl_regclass_swash(pTHX_ const regexp *prog, register const regnode* node, bool doinit, SV** listsvp, SV **altsvp)
5916{
5917    dVAR;
5918    SV *sw  = NULL;
5919    SV *si  = NULL;
5920    SV *alt = NULL;
5921    RXi_GET_DECL(prog,progi);
5922    const struct reg_data * const data = prog ? progi->data : NULL;
5923
5924    PERL_ARGS_ASSERT_REGCLASS_SWASH;
5925
5926    if (data && data->count) {
5927	const U32 n = ARG(node);
5928
5929	if (data->what[n] == 's') {
5930	    SV * const rv = MUTABLE_SV(data->data[n]);
5931	    AV * const av = MUTABLE_AV(SvRV(rv));
5932	    SV **const ary = AvARRAY(av);
5933	    SV **a, **b;
5934
5935	    /* See the end of regcomp.c:S_regclass() for
5936	     * documentation of these array elements. */
5937
5938	    si = *ary;
5939	    a  = SvROK(ary[1]) ? &ary[1] : NULL;
5940	    b  = SvTYPE(ary[2]) == SVt_PVAV ? &ary[2] : NULL;
5941
5942	    if (a)
5943		sw = *a;
5944	    else if (si && doinit) {
5945		sw = swash_init("utf8", "", si, 1, 0);
5946		(void)av_store(av, 1, sw);
5947	    }
5948	    if (b)
5949	        alt = *b;
5950	}
5951    }
5952
5953    if (listsvp)
5954	*listsvp = si;
5955    if (altsvp)
5956	*altsvp  = alt;
5957
5958    return sw;
5959}
5960#endif
5961
5962/*
5963 - reginclass - determine if a character falls into a character class
5964
5965  The n is the ANYOF regnode, the p is the target string, lenp
5966  is pointer to the maximum length of how far to go in the p
5967  (if the lenp is zero, UTF8SKIP(p) is used),
5968  do_utf8 tells whether the target string is in UTF-8.
5969
5970 */
5971
5972STATIC bool
5973S_reginclass(pTHX_ const regexp *prog, register const regnode *n, register const U8* p, STRLEN* lenp, register bool do_utf8)
5974{
5975    dVAR;
5976    const char flags = ANYOF_FLAGS(n);
5977    bool match = FALSE;
5978    UV c = *p;
5979    STRLEN len = 0;
5980    STRLEN plen;
5981
5982    PERL_ARGS_ASSERT_REGINCLASS;
5983
5984    if (do_utf8 && !UTF8_IS_INVARIANT(c)) {
5985	c = utf8n_to_uvchr(p, UTF8_MAXBYTES, &len,
5986		(UTF8_ALLOW_DEFAULT & UTF8_ALLOW_ANYUV)
5987		| UTF8_ALLOW_FFFF | UTF8_CHECK_ONLY);
5988		/* see [perl #37836] for UTF8_ALLOW_ANYUV; [perl #38293] for
5989		 * UTF8_ALLOW_FFFF */
5990	if (len == (STRLEN)-1)
5991	    Perl_croak(aTHX_ "Malformed UTF-8 character (fatal)");
5992    }
5993
5994    plen = lenp ? *lenp : UNISKIP(NATIVE_TO_UNI(c));
5995    if (do_utf8 || (flags & ANYOF_UNICODE)) {
5996        if (lenp)
5997	    *lenp = 0;
5998	if (do_utf8 && !ANYOF_RUNTIME(n)) {
5999	    if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
6000		match = TRUE;
6001	}
6002	if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256)
6003	    match = TRUE;
6004	if (!match) {
6005	    AV *av;
6006	    SV * const sw = regclass_swash(prog, n, TRUE, 0, (SV**)&av);
6007
6008	    if (sw) {
6009		U8 * utf8_p;
6010		if (do_utf8) {
6011		    utf8_p = (U8 *) p;
6012		} else {
6013		    STRLEN len = 1;
6014		    utf8_p = bytes_to_utf8(p, &len);
6015		}
6016		if (swash_fetch(sw, utf8_p, 1))
6017		    match = TRUE;
6018		else if (flags & ANYOF_FOLD) {
6019		    if (!match && lenp && av) {
6020		        I32 i;
6021			for (i = 0; i <= av_len(av); i++) {
6022			    SV* const sv = *av_fetch(av, i, FALSE);
6023			    STRLEN len;
6024			    const char * const s = SvPV_const(sv, len);
6025			    if (len <= plen && memEQ(s, (char*)utf8_p, len)) {
6026			        *lenp = len;
6027				match = TRUE;
6028				break;
6029			    }
6030			}
6031		    }
6032		    if (!match) {
6033		        U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
6034
6035			STRLEN tmplen;
6036			to_utf8_fold(utf8_p, tmpbuf, &tmplen);
6037			if (swash_fetch(sw, tmpbuf, 1))
6038			    match = TRUE;
6039		    }
6040		}
6041
6042		/* If we allocated a string above, free it */
6043		if (! do_utf8) Safefree(utf8_p);
6044	    }
6045	}
6046	if (match && lenp && *lenp == 0)
6047	    *lenp = UNISKIP(NATIVE_TO_UNI(c));
6048    }
6049    if (!match && c < 256) {
6050	if (ANYOF_BITMAP_TEST(n, c))
6051	    match = TRUE;
6052	else if (flags & ANYOF_FOLD) {
6053	    U8 f;
6054
6055	    if (flags & ANYOF_LOCALE) {
6056		PL_reg_flags |= RF_tainted;
6057		f = PL_fold_locale[c];
6058	    }
6059	    else
6060		f = PL_fold[c];
6061	    if (f != c && ANYOF_BITMAP_TEST(n, f))
6062		match = TRUE;
6063	}
6064
6065	if (!match && (flags & ANYOF_CLASS)) {
6066	    PL_reg_flags |= RF_tainted;
6067	    if (
6068		(ANYOF_CLASS_TEST(n, ANYOF_ALNUM)   &&  isALNUM_LC(c))  ||
6069		(ANYOF_CLASS_TEST(n, ANYOF_NALNUM)  && !isALNUM_LC(c))  ||
6070		(ANYOF_CLASS_TEST(n, ANYOF_SPACE)   &&  isSPACE_LC(c))  ||
6071		(ANYOF_CLASS_TEST(n, ANYOF_NSPACE)  && !isSPACE_LC(c))  ||
6072		(ANYOF_CLASS_TEST(n, ANYOF_DIGIT)   &&  isDIGIT_LC(c))  ||
6073		(ANYOF_CLASS_TEST(n, ANYOF_NDIGIT)  && !isDIGIT_LC(c))  ||
6074		(ANYOF_CLASS_TEST(n, ANYOF_ALNUMC)  &&  isALNUMC_LC(c)) ||
6075		(ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
6076		(ANYOF_CLASS_TEST(n, ANYOF_ALPHA)   &&  isALPHA_LC(c))  ||
6077		(ANYOF_CLASS_TEST(n, ANYOF_NALPHA)  && !isALPHA_LC(c))  ||
6078		(ANYOF_CLASS_TEST(n, ANYOF_ASCII)   &&  isASCII(c))     ||
6079		(ANYOF_CLASS_TEST(n, ANYOF_NASCII)  && !isASCII(c))     ||
6080		(ANYOF_CLASS_TEST(n, ANYOF_CNTRL)   &&  isCNTRL_LC(c))  ||
6081		(ANYOF_CLASS_TEST(n, ANYOF_NCNTRL)  && !isCNTRL_LC(c))  ||
6082		(ANYOF_CLASS_TEST(n, ANYOF_GRAPH)   &&  isGRAPH_LC(c))  ||
6083		(ANYOF_CLASS_TEST(n, ANYOF_NGRAPH)  && !isGRAPH_LC(c))  ||
6084		(ANYOF_CLASS_TEST(n, ANYOF_LOWER)   &&  isLOWER_LC(c))  ||
6085		(ANYOF_CLASS_TEST(n, ANYOF_NLOWER)  && !isLOWER_LC(c))  ||
6086		(ANYOF_CLASS_TEST(n, ANYOF_PRINT)   &&  isPRINT_LC(c))  ||
6087		(ANYOF_CLASS_TEST(n, ANYOF_NPRINT)  && !isPRINT_LC(c))  ||
6088		(ANYOF_CLASS_TEST(n, ANYOF_PUNCT)   &&  isPUNCT_LC(c))  ||
6089		(ANYOF_CLASS_TEST(n, ANYOF_NPUNCT)  && !isPUNCT_LC(c))  ||
6090		(ANYOF_CLASS_TEST(n, ANYOF_UPPER)   &&  isUPPER_LC(c))  ||
6091		(ANYOF_CLASS_TEST(n, ANYOF_NUPPER)  && !isUPPER_LC(c))  ||
6092		(ANYOF_CLASS_TEST(n, ANYOF_XDIGIT)  &&  isXDIGIT(c))    ||
6093		(ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c))    ||
6094		(ANYOF_CLASS_TEST(n, ANYOF_PSXSPC)  &&  isPSXSPC(c))    ||
6095		(ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c))    ||
6096		(ANYOF_CLASS_TEST(n, ANYOF_BLANK)   &&  isBLANK(c))     ||
6097		(ANYOF_CLASS_TEST(n, ANYOF_NBLANK)  && !isBLANK(c))
6098		) /* How's that for a conditional? */
6099	    {
6100		match = TRUE;
6101	    }
6102	}
6103    }
6104
6105    return (flags & ANYOF_INVERT) ? !match : match;
6106}
6107
6108STATIC U8 *
6109S_reghop3(U8 *s, I32 off, const U8* lim)
6110{
6111    dVAR;
6112
6113    PERL_ARGS_ASSERT_REGHOP3;
6114
6115    if (off >= 0) {
6116	while (off-- && s < lim) {
6117	    /* XXX could check well-formedness here */
6118	    s += UTF8SKIP(s);
6119	}
6120    }
6121    else {
6122        while (off++ && s > lim) {
6123            s--;
6124            if (UTF8_IS_CONTINUED(*s)) {
6125                while (s > lim && UTF8_IS_CONTINUATION(*s))
6126                    s--;
6127	    }
6128            /* XXX could check well-formedness here */
6129	}
6130    }
6131    return s;
6132}
6133
6134#ifdef XXX_dmq
6135/* there are a bunch of places where we use two reghop3's that should
6136   be replaced with this routine. but since thats not done yet
6137   we ifdef it out - dmq
6138*/
6139STATIC U8 *
6140S_reghop4(U8 *s, I32 off, const U8* llim, const U8* rlim)
6141{
6142    dVAR;
6143
6144    PERL_ARGS_ASSERT_REGHOP4;
6145
6146    if (off >= 0) {
6147        while (off-- && s < rlim) {
6148            /* XXX could check well-formedness here */
6149            s += UTF8SKIP(s);
6150        }
6151    }
6152    else {
6153        while (off++ && s > llim) {
6154            s--;
6155            if (UTF8_IS_CONTINUED(*s)) {
6156                while (s > llim && UTF8_IS_CONTINUATION(*s))
6157                    s--;
6158            }
6159            /* XXX could check well-formedness here */
6160        }
6161    }
6162    return s;
6163}
6164#endif
6165
6166STATIC U8 *
6167S_reghopmaybe3(U8* s, I32 off, const U8* lim)
6168{
6169    dVAR;
6170
6171    PERL_ARGS_ASSERT_REGHOPMAYBE3;
6172
6173    if (off >= 0) {
6174	while (off-- && s < lim) {
6175	    /* XXX could check well-formedness here */
6176	    s += UTF8SKIP(s);
6177	}
6178	if (off >= 0)
6179	    return NULL;
6180    }
6181    else {
6182        while (off++ && s > lim) {
6183            s--;
6184            if (UTF8_IS_CONTINUED(*s)) {
6185                while (s > lim && UTF8_IS_CONTINUATION(*s))
6186                    s--;
6187	    }
6188            /* XXX could check well-formedness here */
6189	}
6190	if (off <= 0)
6191	    return NULL;
6192    }
6193    return s;
6194}
6195
6196static void
6197restore_pos(pTHX_ void *arg)
6198{
6199    dVAR;
6200    regexp * const rex = (regexp *)arg;
6201    if (PL_reg_eval_set) {
6202	if (PL_reg_oldsaved) {
6203	    rex->subbeg = PL_reg_oldsaved;
6204	    rex->sublen = PL_reg_oldsavedlen;
6205#ifdef PERL_OLD_COPY_ON_WRITE
6206	    rex->saved_copy = PL_nrs;
6207#endif
6208	    RXp_MATCH_COPIED_on(rex);
6209	}
6210	PL_reg_magic->mg_len = PL_reg_oldpos;
6211	PL_reg_eval_set = 0;
6212	PL_curpm = PL_reg_oldcurpm;
6213    }
6214}
6215
6216STATIC void
6217S_to_utf8_substr(pTHX_ register regexp *prog)
6218{
6219    int i = 1;
6220
6221    PERL_ARGS_ASSERT_TO_UTF8_SUBSTR;
6222
6223    do {
6224	if (prog->substrs->data[i].substr
6225	    && !prog->substrs->data[i].utf8_substr) {
6226	    SV* const sv = newSVsv(prog->substrs->data[i].substr);
6227	    prog->substrs->data[i].utf8_substr = sv;
6228	    sv_utf8_upgrade(sv);
6229	    if (SvVALID(prog->substrs->data[i].substr)) {
6230		const U8 flags = BmFLAGS(prog->substrs->data[i].substr);
6231		if (flags & FBMcf_TAIL) {
6232		    /* Trim the trailing \n that fbm_compile added last
6233		       time.  */
6234		    SvCUR_set(sv, SvCUR(sv) - 1);
6235		    /* Whilst this makes the SV technically "invalid" (as its
6236		       buffer is no longer followed by "\0") when fbm_compile()
6237		       adds the "\n" back, a "\0" is restored.  */
6238		}
6239		fbm_compile(sv, flags);
6240	    }
6241	    if (prog->substrs->data[i].substr == prog->check_substr)
6242		prog->check_utf8 = sv;
6243	}
6244    } while (i--);
6245}
6246
6247STATIC void
6248S_to_byte_substr(pTHX_ register regexp *prog)
6249{
6250    dVAR;
6251    int i = 1;
6252
6253    PERL_ARGS_ASSERT_TO_BYTE_SUBSTR;
6254
6255    do {
6256	if (prog->substrs->data[i].utf8_substr
6257	    && !prog->substrs->data[i].substr) {
6258	    SV* sv = newSVsv(prog->substrs->data[i].utf8_substr);
6259	    if (sv_utf8_downgrade(sv, TRUE)) {
6260		if (SvVALID(prog->substrs->data[i].utf8_substr)) {
6261		    const U8 flags
6262			= BmFLAGS(prog->substrs->data[i].utf8_substr);
6263		    if (flags & FBMcf_TAIL) {
6264			/* Trim the trailing \n that fbm_compile added last
6265			   time.  */
6266			SvCUR_set(sv, SvCUR(sv) - 1);
6267		    }
6268		    fbm_compile(sv, flags);
6269		}
6270	    } else {
6271		SvREFCNT_dec(sv);
6272		sv = &PL_sv_undef;
6273	    }
6274	    prog->substrs->data[i].substr = sv;
6275	    if (prog->substrs->data[i].utf8_substr == prog->check_utf8)
6276		prog->check_substr = sv;
6277	}
6278    } while (i--);
6279}
6280
6281/*
6282 * Local variables:
6283 * c-indentation-style: bsd
6284 * c-basic-offset: 4
6285 * indent-tabs-mode: t
6286 * End:
6287 *
6288 * ex: set ts=8 sts=4 sw=4 noet:
6289 */
6290