1/*************************************************
2*      Perl-Compatible Regular Expressions       *
3*************************************************/
4
5/* PCRE is a library of functions to support regular expressions whose syntax
6and semantics are as close as possible to those of the Perl 5 language.
7
8                       Written by Philip Hazel
9           Copyright (c) 1997-2014 University of Cambridge
10
11-----------------------------------------------------------------------------
12Redistribution and use in source and binary forms, with or without
13modification, are permitted provided that the following conditions are met:
14
15    * Redistributions of source code must retain the above copyright notice,
16      this list of conditions and the following disclaimer.
17
18    * Redistributions in binary form must reproduce the above copyright
19      notice, this list of conditions and the following disclaimer in the
20      documentation and/or other materials provided with the distribution.
21
22    * Neither the name of the University of Cambridge nor the names of its
23      contributors may be used to endorse or promote products derived from
24      this software without specific prior written permission.
25
26THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36POSSIBILITY OF SUCH DAMAGE.
37-----------------------------------------------------------------------------
38*/
39
40/* This module contains pcre_exec(), the externally visible function that does
41pattern matching using an NFA algorithm, trying to mimic Perl as closely as
42possible. There are also some static supporting functions. */
43
44#ifdef HAVE_CONFIG_H
45#include "config.h"
46#endif
47
48#define NLBLOCK md             /* Block containing newline information */
49#define PSSTART start_subject  /* Field containing processed string start */
50#define PSEND   end_subject    /* Field containing processed string end */
51
52#include "pcre_internal.h"
53
54/* Undefine some potentially clashing cpp symbols */
55
56#undef min
57#undef max
58
59/* The md->capture_last field uses the lower 16 bits for the last captured
60substring (which can never be greater than 65535) and a bit in the top half
61to mean "capture vector overflowed". This odd way of doing things was
62implemented when it was realized that preserving and restoring the overflow bit
63whenever the last capture number was saved/restored made for a neater
64interface, and doing it this way saved on (a) another variable, which would
65have increased the stack frame size (a big NO-NO in PCRE) and (b) another
66separate set of save/restore instructions. The following defines are used in
67implementing this. */
68
69#define CAPLMASK    0x0000ffff    /* The bits used for last_capture */
70#define OVFLMASK    0xffff0000    /* The bits used for the overflow flag */
71#define OVFLBIT     0x00010000    /* The bit that is set for overflow */
72
73/* Values for setting in md->match_function_type to indicate two special types
74of call to match(). We do it this way to save on using another stack variable,
75as stack usage is to be discouraged. */
76
77#define MATCH_CONDASSERT     1  /* Called to check a condition assertion */
78#define MATCH_CBEGROUP       2  /* Could-be-empty unlimited repeat group */
79
80/* Non-error returns from the match() function. Error returns are externally
81defined PCRE_ERROR_xxx codes, which are all negative. */
82
83#define MATCH_MATCH        1
84#define MATCH_NOMATCH      0
85
86/* Special internal returns from the match() function. Make them sufficiently
87negative to avoid the external error codes. */
88
89#define MATCH_ACCEPT       (-999)
90#define MATCH_KETRPOS      (-998)
91#define MATCH_ONCE         (-997)
92/* The next 5 must be kept together and in sequence so that a test that checks
93for any one of them can use a range. */
94#define MATCH_COMMIT       (-996)
95#define MATCH_PRUNE        (-995)
96#define MATCH_SKIP         (-994)
97#define MATCH_SKIP_ARG     (-993)
98#define MATCH_THEN         (-992)
99#define MATCH_BACKTRACK_MAX MATCH_THEN
100#define MATCH_BACKTRACK_MIN MATCH_COMMIT
101
102/* Maximum number of ints of offset to save on the stack for recursive calls.
103If the offset vector is bigger, malloc is used. This should be a multiple of 3,
104because the offset vector is always a multiple of 3 long. */
105
106#define REC_STACK_SAVE_MAX 30
107
108/* Min and max values for the common repeats; for the maxima, 0 => infinity */
109
110static const char rep_min[] = { 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, };
111static const char rep_max[] = { 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, };
112
113#ifdef PCRE_DEBUG
114/*************************************************
115*        Debugging function to print chars       *
116*************************************************/
117
118/* Print a sequence of chars in printable format, stopping at the end of the
119subject if the requested.
120
121Arguments:
122  p           points to characters
123  length      number to print
124  is_subject  TRUE if printing from within md->start_subject
125  md          pointer to matching data block, if is_subject is TRUE
126
127Returns:     nothing
128*/
129
130static void
131pchars(const pcre_uchar *p, int length, BOOL is_subject, match_data *md)
132{
133pcre_uint32 c;
134BOOL utf = md->utf;
135if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
136while (length-- > 0)
137  if (isprint(c = UCHAR21INCTEST(p))) printf("%c", (char)c); else printf("\\x{%02x}", c);
138}
139#endif
140
141
142
143/*************************************************
144*          Match a back-reference                *
145*************************************************/
146
147/* Normally, if a back reference hasn't been set, the length that is passed is
148negative, so the match always fails. However, in JavaScript compatibility mode,
149the length passed is zero. Note that in caseless UTF-8 mode, the number of
150subject bytes matched may be different to the number of reference bytes.
151
152Arguments:
153  offset      index into the offset vector
154  eptr        pointer into the subject
155  length      length of reference to be matched (number of bytes)
156  md          points to match data block
157  caseless    TRUE if caseless
158
159Returns:      >= 0 the number of subject bytes matched
160              -1 no match
161              -2 partial match; always given if at end subject
162*/
163
164static int
165match_ref(int offset, register PCRE_PUCHAR eptr, int length, match_data *md,
166  BOOL caseless)
167{
168PCRE_PUCHAR eptr_start = eptr;
169register PCRE_PUCHAR p = md->start_subject + md->offset_vector[offset];
170#if defined SUPPORT_UTF && defined SUPPORT_UCP
171BOOL utf = md->utf;
172#endif
173
174#ifdef PCRE_DEBUG
175if (eptr >= md->end_subject)
176  printf("matching subject <null>");
177else
178  {
179  printf("matching subject ");
180  pchars(eptr, length, TRUE, md);
181  }
182printf(" against backref ");
183pchars(p, length, FALSE, md);
184printf("\n");
185#endif
186
187/* Always fail if reference not set (and not JavaScript compatible - in that
188case the length is passed as zero). */
189
190if (length < 0) return -1;
191
192/* Separate the caseless case for speed. In UTF-8 mode we can only do this
193properly if Unicode properties are supported. Otherwise, we can check only
194ASCII characters. */
195
196if (caseless)
197  {
198#if defined SUPPORT_UTF && defined SUPPORT_UCP
199  if (utf)
200    {
201    /* Match characters up to the end of the reference. NOTE: the number of
202    data units matched may differ, because in UTF-8 there are some characters
203    whose upper and lower case versions code have different numbers of bytes.
204    For example, U+023A (2 bytes in UTF-8) is the upper case version of U+2C65
205    (3 bytes in UTF-8); a sequence of 3 of the former uses 6 bytes, as does a
206    sequence of two of the latter. It is important, therefore, to check the
207    length along the reference, not along the subject (earlier code did this
208    wrong). */
209
210    PCRE_PUCHAR endptr = p + length;
211    while (p < endptr)
212      {
213      pcre_uint32 c, d;
214      const ucd_record *ur;
215      if (eptr >= md->end_subject) return -2;   /* Partial match */
216      GETCHARINC(c, eptr);
217      GETCHARINC(d, p);
218      ur = GET_UCD(d);
219      if (c != d && c != d + ur->other_case)
220        {
221        const pcre_uint32 *pp = PRIV(ucd_caseless_sets) + ur->caseset;
222        for (;;)
223          {
224          if (c < *pp) return -1;
225          if (c == *pp++) break;
226          }
227        }
228      }
229    }
230  else
231#endif
232
233  /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
234  is no UCP support. */
235    {
236    while (length-- > 0)
237      {
238      pcre_uint32 cc, cp;
239      if (eptr >= md->end_subject) return -2;   /* Partial match */
240      cc = UCHAR21TEST(eptr);
241      cp = UCHAR21TEST(p);
242      if (TABLE_GET(cp, md->lcc, cp) != TABLE_GET(cc, md->lcc, cc)) return -1;
243      p++;
244      eptr++;
245      }
246    }
247  }
248
249/* In the caseful case, we can just compare the bytes, whether or not we
250are in UTF-8 mode. */
251
252else
253  {
254  while (length-- > 0)
255    {
256    if (eptr >= md->end_subject) return -2;   /* Partial match */
257    if (UCHAR21INCTEST(p) != UCHAR21INCTEST(eptr)) return -1;
258    }
259  }
260
261return (int)(eptr - eptr_start);
262}
263
264
265
266/***************************************************************************
267****************************************************************************
268                   RECURSION IN THE match() FUNCTION
269
270The match() function is highly recursive, though not every recursive call
271increases the recursive depth. Nevertheless, some regular expressions can cause
272it to recurse to a great depth. I was writing for Unix, so I just let it call
273itself recursively. This uses the stack for saving everything that has to be
274saved for a recursive call. On Unix, the stack can be large, and this works
275fine.
276
277It turns out that on some non-Unix-like systems there are problems with
278programs that use a lot of stack. (This despite the fact that every last chip
279has oodles of memory these days, and techniques for extending the stack have
280been known for decades.) So....
281
282There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
283calls by keeping local variables that need to be preserved in blocks of memory
284obtained from malloc() instead instead of on the stack. Macros are used to
285achieve this so that the actual code doesn't look very different to what it
286always used to.
287
288The original heap-recursive code used longjmp(). However, it seems that this
289can be very slow on some operating systems. Following a suggestion from Stan
290Switzer, the use of longjmp() has been abolished, at the cost of having to
291provide a unique number for each call to RMATCH. There is no way of generating
292a sequence of numbers at compile time in C. I have given them names, to make
293them stand out more clearly.
294
295Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
296FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
297tests. Furthermore, not using longjmp() means that local dynamic variables
298don't have indeterminate values; this has meant that the frame size can be
299reduced because the result can be "passed back" by straight setting of the
300variable instead of being passed in the frame.
301****************************************************************************
302***************************************************************************/
303
304/* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
305below must be updated in sync.  */
306
307enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
308       RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
309       RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
310       RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
311       RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
312       RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
313       RM61,  RM62, RM63, RM64, RM65, RM66, RM67 };
314
315/* These versions of the macros use the stack, as normal. There are debugging
316versions and production versions. Note that the "rw" argument of RMATCH isn't
317actually used in this definition. */
318
319#ifndef NO_RECURSE
320#define REGISTER register
321
322#ifdef PCRE_DEBUG
323#define RMATCH(ra,rb,rc,rd,re,rw) \
324  { \
325  printf("match() called in line %d\n", __LINE__); \
326  rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1); \
327  printf("to line %d\n", __LINE__); \
328  }
329#define RRETURN(ra) \
330  { \
331  printf("match() returned %d from line %d\n", ra, __LINE__); \
332  return ra; \
333  }
334#else
335#define RMATCH(ra,rb,rc,rd,re,rw) \
336  rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1)
337#define RRETURN(ra) return ra
338#endif
339
340#else
341
342
343/* These versions of the macros manage a private stack on the heap. Note that
344the "rd" argument of RMATCH isn't actually used in this definition. It's the md
345argument of match(), which never changes. */
346
347#define REGISTER
348
349#define RMATCH(ra,rb,rc,rd,re,rw)\
350  {\
351  heapframe *newframe = frame->Xnextframe;\
352  if (newframe == NULL)\
353    {\
354    newframe = (heapframe *)(PUBL(stack_malloc))(sizeof(heapframe));\
355    if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
356    newframe->Xnextframe = NULL;\
357    frame->Xnextframe = newframe;\
358    }\
359  frame->Xwhere = rw;\
360  newframe->Xeptr = ra;\
361  newframe->Xecode = rb;\
362  newframe->Xmstart = mstart;\
363  newframe->Xoffset_top = rc;\
364  newframe->Xeptrb = re;\
365  newframe->Xrdepth = frame->Xrdepth + 1;\
366  newframe->Xprevframe = frame;\
367  frame = newframe;\
368  DPRINTF(("restarting from line %d\n", __LINE__));\
369  goto HEAP_RECURSE;\
370  L_##rw:\
371  DPRINTF(("jumped back to line %d\n", __LINE__));\
372  }
373
374#define RRETURN(ra)\
375  {\
376  heapframe *oldframe = frame;\
377  frame = oldframe->Xprevframe;\
378  if (frame != NULL)\
379    {\
380    rrc = ra;\
381    goto HEAP_RETURN;\
382    }\
383  return ra;\
384  }
385
386
387/* Structure for remembering the local variables in a private frame */
388
389typedef struct heapframe {
390  struct heapframe *Xprevframe;
391  struct heapframe *Xnextframe;
392
393  /* Function arguments that may change */
394
395  PCRE_PUCHAR Xeptr;
396  const pcre_uchar *Xecode;
397  PCRE_PUCHAR Xmstart;
398  int Xoffset_top;
399  eptrblock *Xeptrb;
400  unsigned int Xrdepth;
401
402  /* Function local variables */
403
404  PCRE_PUCHAR Xcallpat;
405#ifdef SUPPORT_UTF
406  PCRE_PUCHAR Xcharptr;
407#endif
408  PCRE_PUCHAR Xdata;
409  PCRE_PUCHAR Xnext;
410  PCRE_PUCHAR Xpp;
411  PCRE_PUCHAR Xprev;
412  PCRE_PUCHAR Xsaved_eptr;
413
414  recursion_info Xnew_recursive;
415
416  BOOL Xcur_is_word;
417  BOOL Xcondition;
418  BOOL Xprev_is_word;
419
420#ifdef SUPPORT_UCP
421  int Xprop_type;
422  unsigned int Xprop_value;
423  int Xprop_fail_result;
424  int Xoclength;
425  pcre_uchar Xocchars[6];
426#endif
427
428  int Xcodelink;
429  int Xctype;
430  unsigned int Xfc;
431  int Xfi;
432  int Xlength;
433  int Xmax;
434  int Xmin;
435  unsigned int Xnumber;
436  int Xoffset;
437  unsigned int Xop;
438  pcre_int32 Xsave_capture_last;
439  int Xsave_offset1, Xsave_offset2, Xsave_offset3;
440  int Xstacksave[REC_STACK_SAVE_MAX];
441
442  eptrblock Xnewptrb;
443
444  /* Where to jump back to */
445
446  int Xwhere;
447
448} heapframe;
449
450#endif
451
452
453/***************************************************************************
454***************************************************************************/
455
456
457
458/*************************************************
459*         Match from current position            *
460*************************************************/
461
462/* This function is called recursively in many circumstances. Whenever it
463returns a negative (error) response, the outer incarnation must also return the
464same response. */
465
466/* These macros pack up tests that are used for partial matching, and which
467appear several times in the code. We set the "hit end" flag if the pointer is
468at the end of the subject and also past the start of the subject (i.e.
469something has been matched). For hard partial matching, we then return
470immediately. The second one is used when we already know we are past the end of
471the subject. */
472
473#define CHECK_PARTIAL()\
474  if (md->partial != 0 && eptr >= md->end_subject && \
475      eptr > md->start_used_ptr) \
476    { \
477    md->hitend = TRUE; \
478    if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); \
479    }
480
481#define SCHECK_PARTIAL()\
482  if (md->partial != 0 && eptr > md->start_used_ptr) \
483    { \
484    md->hitend = TRUE; \
485    if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); \
486    }
487
488
489/* Performance note: It might be tempting to extract commonly used fields from
490the md structure (e.g. utf, end_subject) into individual variables to improve
491performance. Tests using gcc on a SPARC disproved this; in the first case, it
492made performance worse.
493
494Arguments:
495   eptr        pointer to current character in subject
496   ecode       pointer to current position in compiled code
497   mstart      pointer to the current match start position (can be modified
498                 by encountering \K)
499   offset_top  current top pointer
500   md          pointer to "static" info for the match
501   eptrb       pointer to chain of blocks containing eptr at start of
502                 brackets - for testing for empty matches
503   rdepth      the recursion depth
504
505Returns:       MATCH_MATCH if matched            )  these values are >= 0
506               MATCH_NOMATCH if failed to match  )
507               a negative MATCH_xxx value for PRUNE, SKIP, etc
508               a negative PCRE_ERROR_xxx value if aborted by an error condition
509                 (e.g. stopped by repeated call or recursion limit)
510*/
511
512static int
513match(REGISTER PCRE_PUCHAR eptr, REGISTER const pcre_uchar *ecode,
514  PCRE_PUCHAR mstart, int offset_top, match_data *md, eptrblock *eptrb,
515  unsigned int rdepth)
516{
517/* These variables do not need to be preserved over recursion in this function,
518so they can be ordinary variables in all cases. Mark some of them with
519"register" because they are used a lot in loops. */
520
521register int  rrc;         /* Returns from recursive calls */
522register int  i;           /* Used for loops not involving calls to RMATCH() */
523register pcre_uint32 c;    /* Character values not kept over RMATCH() calls */
524register BOOL utf;         /* Local copy of UTF flag for speed */
525
526BOOL minimize, possessive; /* Quantifier options */
527BOOL caseless;
528int condcode;
529
530/* When recursion is not being used, all "local" variables that have to be
531preserved over calls to RMATCH() are part of a "frame". We set up the top-level
532frame on the stack here; subsequent instantiations are obtained from the heap
533whenever RMATCH() does a "recursion". See the macro definitions above. Putting
534the top-level on the stack rather than malloc-ing them all gives a performance
535boost in many cases where there is not much "recursion". */
536
537#ifdef NO_RECURSE
538heapframe *frame = (heapframe *)md->match_frames_base;
539
540/* Copy in the original argument variables */
541
542frame->Xeptr = eptr;
543frame->Xecode = ecode;
544frame->Xmstart = mstart;
545frame->Xoffset_top = offset_top;
546frame->Xeptrb = eptrb;
547frame->Xrdepth = rdepth;
548
549/* This is where control jumps back to to effect "recursion" */
550
551HEAP_RECURSE:
552
553/* Macros make the argument variables come from the current frame */
554
555#define eptr               frame->Xeptr
556#define ecode              frame->Xecode
557#define mstart             frame->Xmstart
558#define offset_top         frame->Xoffset_top
559#define eptrb              frame->Xeptrb
560#define rdepth             frame->Xrdepth
561
562/* Ditto for the local variables */
563
564#ifdef SUPPORT_UTF
565#define charptr            frame->Xcharptr
566#endif
567#define callpat            frame->Xcallpat
568#define codelink           frame->Xcodelink
569#define data               frame->Xdata
570#define next               frame->Xnext
571#define pp                 frame->Xpp
572#define prev               frame->Xprev
573#define saved_eptr         frame->Xsaved_eptr
574
575#define new_recursive      frame->Xnew_recursive
576
577#define cur_is_word        frame->Xcur_is_word
578#define condition          frame->Xcondition
579#define prev_is_word       frame->Xprev_is_word
580
581#ifdef SUPPORT_UCP
582#define prop_type          frame->Xprop_type
583#define prop_value         frame->Xprop_value
584#define prop_fail_result   frame->Xprop_fail_result
585#define oclength           frame->Xoclength
586#define occhars            frame->Xocchars
587#endif
588
589#define ctype              frame->Xctype
590#define fc                 frame->Xfc
591#define fi                 frame->Xfi
592#define length             frame->Xlength
593#define max                frame->Xmax
594#define min                frame->Xmin
595#define number             frame->Xnumber
596#define offset             frame->Xoffset
597#define op                 frame->Xop
598#define save_capture_last  frame->Xsave_capture_last
599#define save_offset1       frame->Xsave_offset1
600#define save_offset2       frame->Xsave_offset2
601#define save_offset3       frame->Xsave_offset3
602#define stacksave          frame->Xstacksave
603
604#define newptrb            frame->Xnewptrb
605
606/* When recursion is being used, local variables are allocated on the stack and
607get preserved during recursion in the normal way. In this environment, fi and
608i, and fc and c, can be the same variables. */
609
610#else         /* NO_RECURSE not defined */
611#define fi i
612#define fc c
613
614/* Many of the following variables are used only in small blocks of the code.
615My normal style of coding would have declared them within each of those blocks.
616However, in order to accommodate the version of this code that uses an external
617"stack" implemented on the heap, it is easier to declare them all here, so the
618declarations can be cut out in a block. The only declarations within blocks
619below are for variables that do not have to be preserved over a recursive call
620to RMATCH(). */
621
622#ifdef SUPPORT_UTF
623const pcre_uchar *charptr;
624#endif
625const pcre_uchar *callpat;
626const pcre_uchar *data;
627const pcre_uchar *next;
628PCRE_PUCHAR       pp;
629const pcre_uchar *prev;
630PCRE_PUCHAR       saved_eptr;
631
632recursion_info new_recursive;
633
634BOOL cur_is_word;
635BOOL condition;
636BOOL prev_is_word;
637
638#ifdef SUPPORT_UCP
639int prop_type;
640unsigned int prop_value;
641int prop_fail_result;
642int oclength;
643pcre_uchar occhars[6];
644#endif
645
646int codelink;
647int ctype;
648int length;
649int max;
650int min;
651unsigned int number;
652int offset;
653unsigned int op;
654pcre_int32 save_capture_last;
655int save_offset1, save_offset2, save_offset3;
656int stacksave[REC_STACK_SAVE_MAX];
657
658eptrblock newptrb;
659
660/* There is a special fudge for calling match() in a way that causes it to
661measure the size of its basic stack frame when the stack is being used for
662recursion. The second argument (ecode) being NULL triggers this behaviour. It
663cannot normally ever be NULL. The return is the negated value of the frame
664size. */
665
666if (ecode == NULL)
667  {
668  if (rdepth == 0)
669    return match((PCRE_PUCHAR)&rdepth, NULL, NULL, 0, NULL, NULL, 1);
670  else
671    {
672    int len = (char *)&rdepth - (char *)eptr;
673    return (len > 0)? -len : len;
674    }
675  }
676#endif     /* NO_RECURSE */
677
678/* To save space on the stack and in the heap frame, I have doubled up on some
679of the local variables that are used only in localised parts of the code, but
680still need to be preserved over recursive calls of match(). These macros define
681the alternative names that are used. */
682
683#define allow_zero    cur_is_word
684#define cbegroup      condition
685#define code_offset   codelink
686#define condassert    condition
687#define matched_once  prev_is_word
688#define foc           number
689#define save_mark     data
690
691/* These statements are here to stop the compiler complaining about unitialized
692variables. */
693
694#ifdef SUPPORT_UCP
695prop_value = 0;
696prop_fail_result = 0;
697#endif
698
699
700/* This label is used for tail recursion, which is used in a few cases even
701when NO_RECURSE is not defined, in order to reduce the amount of stack that is
702used. Thanks to Ian Taylor for noticing this possibility and sending the
703original patch. */
704
705TAIL_RECURSE:
706
707/* OK, now we can get on with the real code of the function. Recursive calls
708are specified by the macro RMATCH and RRETURN is used to return. When
709NO_RECURSE is *not* defined, these just turn into a recursive call to match()
710and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
711defined). However, RMATCH isn't like a function call because it's quite a
712complicated macro. It has to be used in one particular way. This shouldn't,
713however, impact performance when true recursion is being used. */
714
715#ifdef SUPPORT_UTF
716utf = md->utf;       /* Local copy of the flag */
717#else
718utf = FALSE;
719#endif
720
721/* First check that we haven't called match() too many times, or that we
722haven't exceeded the recursive call limit. */
723
724if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
725if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
726
727/* At the start of a group with an unlimited repeat that may match an empty
728string, the variable md->match_function_type is set to MATCH_CBEGROUP. It is
729done this way to save having to use another function argument, which would take
730up space on the stack. See also MATCH_CONDASSERT below.
731
732When MATCH_CBEGROUP is set, add the current subject pointer to the chain of
733such remembered pointers, to be checked when we hit the closing ket, in order
734to break infinite loops that match no characters. When match() is called in
735other circumstances, don't add to the chain. The MATCH_CBEGROUP feature must
736NOT be used with tail recursion, because the memory block that is used is on
737the stack, so a new one may be required for each match(). */
738
739if (md->match_function_type == MATCH_CBEGROUP)
740  {
741  newptrb.epb_saved_eptr = eptr;
742  newptrb.epb_prev = eptrb;
743  eptrb = &newptrb;
744  md->match_function_type = 0;
745  }
746
747/* Now start processing the opcodes. */
748
749for (;;)
750  {
751  minimize = possessive = FALSE;
752  op = *ecode;
753
754  switch(op)
755    {
756    case OP_MARK:
757    md->nomatch_mark = ecode + 2;
758    md->mark = NULL;    /* In case previously set by assertion */
759    RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
760      eptrb, RM55);
761    if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
762         md->mark == NULL) md->mark = ecode + 2;
763
764    /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
765    argument, and we must check whether that argument matches this MARK's
766    argument. It is passed back in md->start_match_ptr (an overloading of that
767    variable). If it does match, we reset that variable to the current subject
768    position and return MATCH_SKIP. Otherwise, pass back the return code
769    unaltered. */
770
771    else if (rrc == MATCH_SKIP_ARG &&
772        STRCMP_UC_UC_TEST(ecode + 2, md->start_match_ptr) == 0)
773      {
774      md->start_match_ptr = eptr;
775      RRETURN(MATCH_SKIP);
776      }
777    RRETURN(rrc);
778
779    case OP_FAIL:
780    RRETURN(MATCH_NOMATCH);
781
782    case OP_COMMIT:
783    RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
784      eptrb, RM52);
785    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
786    RRETURN(MATCH_COMMIT);
787
788    case OP_PRUNE:
789    RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
790      eptrb, RM51);
791    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
792    RRETURN(MATCH_PRUNE);
793
794    case OP_PRUNE_ARG:
795    md->nomatch_mark = ecode + 2;
796    md->mark = NULL;    /* In case previously set by assertion */
797    RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
798      eptrb, RM56);
799    if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
800         md->mark == NULL) md->mark = ecode + 2;
801    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
802    RRETURN(MATCH_PRUNE);
803
804    case OP_SKIP:
805    RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
806      eptrb, RM53);
807    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
808    md->start_match_ptr = eptr;   /* Pass back current position */
809    RRETURN(MATCH_SKIP);
810
811    /* Note that, for Perl compatibility, SKIP with an argument does NOT set
812    nomatch_mark. When a pattern match ends with a SKIP_ARG for which there was
813    not a matching mark, we have to re-run the match, ignoring the SKIP_ARG
814    that failed and any that precede it (either they also failed, or were not
815    triggered). To do this, we maintain a count of executed SKIP_ARGs. If a
816    SKIP_ARG gets to top level, the match is re-run with md->ignore_skip_arg
817    set to the count of the one that failed. */
818
819    case OP_SKIP_ARG:
820    md->skip_arg_count++;
821    if (md->skip_arg_count <= md->ignore_skip_arg)
822      {
823      ecode += PRIV(OP_lengths)[*ecode] + ecode[1];
824      break;
825      }
826    RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
827      eptrb, RM57);
828    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
829
830    /* Pass back the current skip name by overloading md->start_match_ptr and
831    returning the special MATCH_SKIP_ARG return code. This will either be
832    caught by a matching MARK, or get to the top, where it causes a rematch
833    with md->ignore_skip_arg set to the value of md->skip_arg_count. */
834
835    md->start_match_ptr = ecode + 2;
836    RRETURN(MATCH_SKIP_ARG);
837
838    /* For THEN (and THEN_ARG) we pass back the address of the opcode, so that
839    the branch in which it occurs can be determined. Overload the start of
840    match pointer to do this. */
841
842    case OP_THEN:
843    RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
844      eptrb, RM54);
845    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
846    md->start_match_ptr = ecode;
847    RRETURN(MATCH_THEN);
848
849    case OP_THEN_ARG:
850    md->nomatch_mark = ecode + 2;
851    md->mark = NULL;    /* In case previously set by assertion */
852    RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top,
853      md, eptrb, RM58);
854    if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
855         md->mark == NULL) md->mark = ecode + 2;
856    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
857    md->start_match_ptr = ecode;
858    RRETURN(MATCH_THEN);
859
860    /* Handle an atomic group that does not contain any capturing parentheses.
861    This can be handled like an assertion. Prior to 8.13, all atomic groups
862    were handled this way. In 8.13, the code was changed as below for ONCE, so
863    that backups pass through the group and thereby reset captured values.
864    However, this uses a lot more stack, so in 8.20, atomic groups that do not
865    contain any captures generate OP_ONCE_NC, which can be handled in the old,
866    less stack intensive way.
867
868    Check the alternative branches in turn - the matching won't pass the KET
869    for this kind of subpattern. If any one branch matches, we carry on as at
870    the end of a normal bracket, leaving the subject pointer, but resetting
871    the start-of-match value in case it was changed by \K. */
872
873    case OP_ONCE_NC:
874    prev = ecode;
875    saved_eptr = eptr;
876    save_mark = md->mark;
877    do
878      {
879      RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM64);
880      if (rrc == MATCH_MATCH)  /* Note: _not_ MATCH_ACCEPT */
881        {
882        mstart = md->start_match_ptr;
883        break;
884        }
885      if (rrc == MATCH_THEN)
886        {
887        next = ecode + GET(ecode,1);
888        if (md->start_match_ptr < next &&
889            (*ecode == OP_ALT || *next == OP_ALT))
890          rrc = MATCH_NOMATCH;
891        }
892
893      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
894      ecode += GET(ecode,1);
895      md->mark = save_mark;
896      }
897    while (*ecode == OP_ALT);
898
899    /* If hit the end of the group (which could be repeated), fail */
900
901    if (*ecode != OP_ONCE_NC && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
902
903    /* Continue as from after the group, updating the offsets high water
904    mark, since extracts may have been taken. */
905
906    do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
907
908    offset_top = md->end_offset_top;
909    eptr = md->end_match_ptr;
910
911    /* For a non-repeating ket, just continue at this level. This also
912    happens for a repeating ket if no characters were matched in the group.
913    This is the forcible breaking of infinite loops as implemented in Perl
914    5.005. */
915
916    if (*ecode == OP_KET || eptr == saved_eptr)
917      {
918      ecode += 1+LINK_SIZE;
919      break;
920      }
921
922    /* The repeating kets try the rest of the pattern or restart from the
923    preceding bracket, in the appropriate order. The second "call" of match()
924    uses tail recursion, to avoid using another stack frame. */
925
926    if (*ecode == OP_KETRMIN)
927      {
928      RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM65);
929      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
930      ecode = prev;
931      goto TAIL_RECURSE;
932      }
933    else  /* OP_KETRMAX */
934      {
935      RMATCH(eptr, prev, offset_top, md, eptrb, RM66);
936      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
937      ecode += 1 + LINK_SIZE;
938      goto TAIL_RECURSE;
939      }
940    /* Control never gets here */
941
942    /* Handle a capturing bracket, other than those that are possessive with an
943    unlimited repeat. If there is space in the offset vector, save the current
944    subject position in the working slot at the top of the vector. We mustn't
945    change the current values of the data slot, because they may be set from a
946    previous iteration of this group, and be referred to by a reference inside
947    the group. A failure to match might occur after the group has succeeded,
948    if something later on doesn't match. For this reason, we need to restore
949    the working value and also the values of the final offsets, in case they
950    were set by a previous iteration of the same bracket.
951
952    If there isn't enough space in the offset vector, treat this as if it were
953    a non-capturing bracket. Don't worry about setting the flag for the error
954    case here; that is handled in the code for KET. */
955
956    case OP_CBRA:
957    case OP_SCBRA:
958    number = GET2(ecode, 1+LINK_SIZE);
959    offset = number << 1;
960
961#ifdef PCRE_DEBUG
962    printf("start bracket %d\n", number);
963    printf("subject=");
964    pchars(eptr, 16, TRUE, md);
965    printf("\n");
966#endif
967
968    if (offset < md->offset_max)
969      {
970      save_offset1 = md->offset_vector[offset];
971      save_offset2 = md->offset_vector[offset+1];
972      save_offset3 = md->offset_vector[md->offset_end - number];
973      save_capture_last = md->capture_last;
974      save_mark = md->mark;
975
976      DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
977      md->offset_vector[md->offset_end - number] =
978        (int)(eptr - md->start_subject);
979
980      for (;;)
981        {
982        if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
983        RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
984          eptrb, RM1);
985        if (rrc == MATCH_ONCE) break;  /* Backing up through an atomic group */
986
987        /* If we backed up to a THEN, check whether it is within the current
988        branch by comparing the address of the THEN that is passed back with
989        the end of the branch. If it is within the current branch, and the
990        branch is one of two or more alternatives (it either starts or ends
991        with OP_ALT), we have reached the limit of THEN's action, so convert
992        the return code to NOMATCH, which will cause normal backtracking to
993        happen from now on. Otherwise, THEN is passed back to an outer
994        alternative. This implements Perl's treatment of parenthesized groups,
995        where a group not containing | does not affect the current alternative,
996        that is, (X) is NOT the same as (X|(*F)). */
997
998        if (rrc == MATCH_THEN)
999          {
1000          next = ecode + GET(ecode,1);
1001          if (md->start_match_ptr < next &&
1002              (*ecode == OP_ALT || *next == OP_ALT))
1003            rrc = MATCH_NOMATCH;
1004          }
1005
1006        /* Anything other than NOMATCH is passed back. */
1007
1008        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1009        md->capture_last = save_capture_last;
1010        ecode += GET(ecode, 1);
1011        md->mark = save_mark;
1012        if (*ecode != OP_ALT) break;
1013        }
1014
1015      DPRINTF(("bracket %d failed\n", number));
1016      md->offset_vector[offset] = save_offset1;
1017      md->offset_vector[offset+1] = save_offset2;
1018      md->offset_vector[md->offset_end - number] = save_offset3;
1019
1020      /* At this point, rrc will be one of MATCH_ONCE or MATCH_NOMATCH. */
1021
1022      RRETURN(rrc);
1023      }
1024
1025    /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
1026    as a non-capturing bracket. */
1027
1028    /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1029    /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1030
1031    DPRINTF(("insufficient capture room: treat as non-capturing\n"));
1032
1033    /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1034    /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1035
1036    /* Non-capturing or atomic group, except for possessive with unlimited
1037    repeat and ONCE group with no captures. Loop for all the alternatives.
1038
1039    When we get to the final alternative within the brackets, we used to return
1040    the result of a recursive call to match() whatever happened so it was
1041    possible to reduce stack usage by turning this into a tail recursion,
1042    except in the case of a possibly empty group. However, now that there is
1043    the possiblity of (*THEN) occurring in the final alternative, this
1044    optimization is no longer always possible.
1045
1046    We can optimize if we know there are no (*THEN)s in the pattern; at present
1047    this is the best that can be done.
1048
1049    MATCH_ONCE is returned when the end of an atomic group is successfully
1050    reached, but subsequent matching fails. It passes back up the tree (causing
1051    captured values to be reset) until the original atomic group level is
1052    reached. This is tested by comparing md->once_target with the start of the
1053    group. At this point, the return is converted into MATCH_NOMATCH so that
1054    previous backup points can be taken. */
1055
1056    case OP_ONCE:
1057    case OP_BRA:
1058    case OP_SBRA:
1059    DPRINTF(("start non-capturing bracket\n"));
1060
1061    for (;;)
1062      {
1063      if (op >= OP_SBRA || op == OP_ONCE)
1064        md->match_function_type = MATCH_CBEGROUP;
1065
1066      /* If this is not a possibly empty group, and there are no (*THEN)s in
1067      the pattern, and this is the final alternative, optimize as described
1068      above. */
1069
1070      else if (!md->hasthen && ecode[GET(ecode, 1)] != OP_ALT)
1071        {
1072        ecode += PRIV(OP_lengths)[*ecode];
1073        goto TAIL_RECURSE;
1074        }
1075
1076      /* In all other cases, we have to make another call to match(). */
1077
1078      save_mark = md->mark;
1079      save_capture_last = md->capture_last;
1080      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb,
1081        RM2);
1082
1083      /* See comment in the code for capturing groups above about handling
1084      THEN. */
1085
1086      if (rrc == MATCH_THEN)
1087        {
1088        next = ecode + GET(ecode,1);
1089        if (md->start_match_ptr < next &&
1090            (*ecode == OP_ALT || *next == OP_ALT))
1091          rrc = MATCH_NOMATCH;
1092        }
1093
1094      if (rrc != MATCH_NOMATCH)
1095        {
1096        if (rrc == MATCH_ONCE)
1097          {
1098          const pcre_uchar *scode = ecode;
1099          if (*scode != OP_ONCE)           /* If not at start, find it */
1100            {
1101            while (*scode == OP_ALT) scode += GET(scode, 1);
1102            scode -= GET(scode, 1);
1103            }
1104          if (md->once_target == scode) rrc = MATCH_NOMATCH;
1105          }
1106        RRETURN(rrc);
1107        }
1108      ecode += GET(ecode, 1);
1109      md->mark = save_mark;
1110      if (*ecode != OP_ALT) break;
1111      md->capture_last = save_capture_last;
1112      }
1113
1114    RRETURN(MATCH_NOMATCH);
1115
1116    /* Handle possessive capturing brackets with an unlimited repeat. We come
1117    here from BRAZERO with allow_zero set TRUE. The offset_vector values are
1118    handled similarly to the normal case above. However, the matching is
1119    different. The end of these brackets will always be OP_KETRPOS, which
1120    returns MATCH_KETRPOS without going further in the pattern. By this means
1121    we can handle the group by iteration rather than recursion, thereby
1122    reducing the amount of stack needed. */
1123
1124    case OP_CBRAPOS:
1125    case OP_SCBRAPOS:
1126    allow_zero = FALSE;
1127
1128    POSSESSIVE_CAPTURE:
1129    number = GET2(ecode, 1+LINK_SIZE);
1130    offset = number << 1;
1131
1132#ifdef PCRE_DEBUG
1133    printf("start possessive bracket %d\n", number);
1134    printf("subject=");
1135    pchars(eptr, 16, TRUE, md);
1136    printf("\n");
1137#endif
1138
1139    if (offset >= md->offset_max) goto POSSESSIVE_NON_CAPTURE;
1140
1141    matched_once = FALSE;
1142    code_offset = (int)(ecode - md->start_code);
1143
1144    save_offset1 = md->offset_vector[offset];
1145    save_offset2 = md->offset_vector[offset+1];
1146    save_offset3 = md->offset_vector[md->offset_end - number];
1147    save_capture_last = md->capture_last;
1148
1149    DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
1150
1151    /* Each time round the loop, save the current subject position for use
1152    when the group matches. For MATCH_MATCH, the group has matched, so we
1153    restart it with a new subject starting position, remembering that we had
1154    at least one match. For MATCH_NOMATCH, carry on with the alternatives, as
1155    usual. If we haven't matched any alternatives in any iteration, check to
1156    see if a previous iteration matched. If so, the group has matched;
1157    continue from afterwards. Otherwise it has failed; restore the previous
1158    capture values before returning NOMATCH. */
1159
1160    for (;;)
1161      {
1162      md->offset_vector[md->offset_end - number] =
1163        (int)(eptr - md->start_subject);
1164      if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1165      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1166        eptrb, RM63);
1167      if (rrc == MATCH_KETRPOS)
1168        {
1169        offset_top = md->end_offset_top;
1170        ecode = md->start_code + code_offset;
1171        save_capture_last = md->capture_last;
1172        matched_once = TRUE;
1173        mstart = md->start_match_ptr;    /* In case \K changed it */
1174        if (eptr == md->end_match_ptr)   /* Matched an empty string */
1175          {
1176          do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1177          break;
1178          }
1179        eptr = md->end_match_ptr;
1180        continue;
1181        }
1182
1183      /* See comment in the code for capturing groups above about handling
1184      THEN. */
1185
1186      if (rrc == MATCH_THEN)
1187        {
1188        next = ecode + GET(ecode,1);
1189        if (md->start_match_ptr < next &&
1190            (*ecode == OP_ALT || *next == OP_ALT))
1191          rrc = MATCH_NOMATCH;
1192        }
1193
1194      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1195      md->capture_last = save_capture_last;
1196      ecode += GET(ecode, 1);
1197      if (*ecode != OP_ALT) break;
1198      }
1199
1200    if (!matched_once)
1201      {
1202      md->offset_vector[offset] = save_offset1;
1203      md->offset_vector[offset+1] = save_offset2;
1204      md->offset_vector[md->offset_end - number] = save_offset3;
1205      }
1206
1207    if (allow_zero || matched_once)
1208      {
1209      ecode += 1 + LINK_SIZE;
1210      break;
1211      }
1212
1213    RRETURN(MATCH_NOMATCH);
1214
1215    /* Non-capturing possessive bracket with unlimited repeat. We come here
1216    from BRAZERO with allow_zero = TRUE. The code is similar to the above,
1217    without the capturing complication. It is written out separately for speed
1218    and cleanliness. */
1219
1220    case OP_BRAPOS:
1221    case OP_SBRAPOS:
1222    allow_zero = FALSE;
1223
1224    POSSESSIVE_NON_CAPTURE:
1225    matched_once = FALSE;
1226    code_offset = (int)(ecode - md->start_code);
1227    save_capture_last = md->capture_last;
1228
1229    for (;;)
1230      {
1231      if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1232      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1233        eptrb, RM48);
1234      if (rrc == MATCH_KETRPOS)
1235        {
1236        offset_top = md->end_offset_top;
1237        ecode = md->start_code + code_offset;
1238        matched_once = TRUE;
1239        mstart = md->start_match_ptr;   /* In case \K reset it */
1240        if (eptr == md->end_match_ptr)  /* Matched an empty string */
1241          {
1242          do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1243          break;
1244          }
1245        eptr = md->end_match_ptr;
1246        continue;
1247        }
1248
1249      /* See comment in the code for capturing groups above about handling
1250      THEN. */
1251
1252      if (rrc == MATCH_THEN)
1253        {
1254        next = ecode + GET(ecode,1);
1255        if (md->start_match_ptr < next &&
1256            (*ecode == OP_ALT || *next == OP_ALT))
1257          rrc = MATCH_NOMATCH;
1258        }
1259
1260      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1261      ecode += GET(ecode, 1);
1262      if (*ecode != OP_ALT) break;
1263      md->capture_last = save_capture_last;
1264      }
1265
1266    if (matched_once || allow_zero)
1267      {
1268      ecode += 1 + LINK_SIZE;
1269      break;
1270      }
1271    RRETURN(MATCH_NOMATCH);
1272
1273    /* Control never reaches here. */
1274
1275    /* Conditional group: compilation checked that there are no more than two
1276    branches. If the condition is false, skipping the first branch takes us
1277    past the end of the item if there is only one branch, but that's exactly
1278    what we want. */
1279
1280    case OP_COND:
1281    case OP_SCOND:
1282
1283    /* The variable codelink will be added to ecode when the condition is
1284    false, to get to the second branch. Setting it to the offset to the ALT
1285    or KET, then incrementing ecode achieves this effect. We now have ecode
1286    pointing to the condition or callout. */
1287
1288    codelink = GET(ecode, 1);   /* Offset to the second branch */
1289    ecode += 1 + LINK_SIZE;     /* From this opcode */
1290
1291    /* Because of the way auto-callout works during compile, a callout item is
1292    inserted between OP_COND and an assertion condition. */
1293
1294    if (*ecode == OP_CALLOUT)
1295      {
1296      if (PUBL(callout) != NULL)
1297        {
1298        PUBL(callout_block) cb;
1299        cb.version          = 2;   /* Version 1 of the callout block */
1300        cb.callout_number   = ecode[1];
1301        cb.offset_vector    = md->offset_vector;
1302#if defined COMPILE_PCRE8
1303        cb.subject          = (PCRE_SPTR)md->start_subject;
1304#elif defined COMPILE_PCRE16
1305        cb.subject          = (PCRE_SPTR16)md->start_subject;
1306#elif defined COMPILE_PCRE32
1307        cb.subject          = (PCRE_SPTR32)md->start_subject;
1308#endif
1309        cb.subject_length   = (int)(md->end_subject - md->start_subject);
1310        cb.start_match      = (int)(mstart - md->start_subject);
1311        cb.current_position = (int)(eptr - md->start_subject);
1312        cb.pattern_position = GET(ecode, 2);
1313        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1314        cb.capture_top      = offset_top/2;
1315        cb.capture_last     = md->capture_last & CAPLMASK;
1316        /* Internal change requires this for API compatibility. */
1317        if (cb.capture_last == 0) cb.capture_last = -1;
1318        cb.callout_data     = md->callout_data;
1319        cb.mark             = md->nomatch_mark;
1320        if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1321        if (rrc < 0) RRETURN(rrc);
1322        }
1323
1324      /* Advance ecode past the callout, so it now points to the condition. We
1325      must adjust codelink so that the value of ecode+codelink is unchanged. */
1326
1327      ecode += PRIV(OP_lengths)[OP_CALLOUT];
1328      codelink -= PRIV(OP_lengths)[OP_CALLOUT];
1329      }
1330
1331    /* Test the various possible conditions */
1332
1333    condition = FALSE;
1334    switch(condcode = *ecode)
1335      {
1336      case OP_RREF:         /* Numbered group recursion test */
1337      if (md->recursive != NULL)     /* Not recursing => FALSE */
1338        {
1339        unsigned int recno = GET2(ecode, 1);   /* Recursion group number*/
1340        condition = (recno == RREF_ANY || recno == md->recursive->group_num);
1341        }
1342      break;
1343
1344      case OP_DNRREF:       /* Duplicate named group recursion test */
1345      if (md->recursive != NULL)
1346        {
1347        int count = GET2(ecode, 1 + IMM2_SIZE);
1348        pcre_uchar *slot = md->name_table + GET2(ecode, 1) * md->name_entry_size;
1349        while (count-- > 0)
1350          {
1351          unsigned int recno = GET2(slot, 0);
1352          condition = recno == md->recursive->group_num;
1353          if (condition) break;
1354          slot += md->name_entry_size;
1355          }
1356        }
1357      break;
1358
1359      case OP_CREF:         /* Numbered group used test */
1360      offset = GET2(ecode, 1) << 1;  /* Doubled ref number */
1361      condition = offset < offset_top && md->offset_vector[offset] >= 0;
1362      break;
1363
1364      case OP_DNCREF:      /* Duplicate named group used test */
1365        {
1366        int count = GET2(ecode, 1 + IMM2_SIZE);
1367        pcre_uchar *slot = md->name_table + GET2(ecode, 1) * md->name_entry_size;
1368        while (count-- > 0)
1369          {
1370          offset = GET2(slot, 0) << 1;
1371          condition = offset < offset_top && md->offset_vector[offset] >= 0;
1372          if (condition) break;
1373          slot += md->name_entry_size;
1374          }
1375        }
1376      break;
1377
1378      case OP_DEF:     /* DEFINE - always false */
1379      case OP_FAIL:    /* From optimized (?!) condition */
1380      break;
1381
1382      /* The condition is an assertion. Call match() to evaluate it - setting
1383      md->match_function_type to MATCH_CONDASSERT causes it to stop at the end
1384      of an assertion. */
1385
1386      default:
1387      md->match_function_type = MATCH_CONDASSERT;
1388      RMATCH(eptr, ecode, offset_top, md, NULL, RM3);
1389      if (rrc == MATCH_MATCH)
1390        {
1391        if (md->end_offset_top > offset_top)
1392          offset_top = md->end_offset_top;  /* Captures may have happened */
1393        condition = TRUE;
1394
1395        /* Advance ecode past the assertion to the start of the first branch,
1396        but adjust it so that the general choosing code below works. If the
1397        assertion has a quantifier that allows zero repeats we must skip over
1398        the BRAZERO. This is a lunatic thing to do, but somebody did! */
1399
1400        if (*ecode == OP_BRAZERO) ecode++;
1401        ecode += GET(ecode, 1);
1402        while (*ecode == OP_ALT) ecode += GET(ecode, 1);
1403        ecode += 1 + LINK_SIZE - PRIV(OP_lengths)[condcode];
1404        }
1405
1406      /* PCRE doesn't allow the effect of (*THEN) to escape beyond an
1407      assertion; it is therefore treated as NOMATCH. Any other return is an
1408      error. */
1409
1410      else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1411        {
1412        RRETURN(rrc);         /* Need braces because of following else */
1413        }
1414      break;
1415      }
1416
1417    /* Choose branch according to the condition */
1418
1419    ecode += condition? PRIV(OP_lengths)[condcode] : codelink;
1420
1421    /* We are now at the branch that is to be obeyed. As there is only one, we
1422    can use tail recursion to avoid using another stack frame, except when
1423    there is unlimited repeat of a possibly empty group. In the latter case, a
1424    recursive call to match() is always required, unless the second alternative
1425    doesn't exist, in which case we can just plough on. Note that, for
1426    compatibility with Perl, the | in a conditional group is NOT treated as
1427    creating two alternatives. If a THEN is encountered in the branch, it
1428    propagates out to the enclosing alternative (unless nested in a deeper set
1429    of alternatives, of course). */
1430
1431    if (condition || ecode[-(1+LINK_SIZE)] == OP_ALT)
1432      {
1433      if (op != OP_SCOND)
1434        {
1435        goto TAIL_RECURSE;
1436        }
1437
1438      md->match_function_type = MATCH_CBEGROUP;
1439      RMATCH(eptr, ecode, offset_top, md, eptrb, RM49);
1440      RRETURN(rrc);
1441      }
1442
1443     /* Condition false & no alternative; continue after the group. */
1444
1445    else
1446      {
1447      }
1448    break;
1449
1450
1451    /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
1452    to close any currently open capturing brackets. */
1453
1454    case OP_CLOSE:
1455    number = GET2(ecode, 1);   /* Must be less than 65536 */
1456    offset = number << 1;
1457
1458#ifdef PCRE_DEBUG
1459      printf("end bracket %d at *ACCEPT", number);
1460      printf("\n");
1461#endif
1462
1463    md->capture_last = (md->capture_last & OVFLMASK) | number;
1464    if (offset >= md->offset_max) md->capture_last |= OVFLBIT; else
1465      {
1466      md->offset_vector[offset] =
1467        md->offset_vector[md->offset_end - number];
1468      md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1469
1470      /* If this group is at or above the current highwater mark, ensure that
1471      any groups between the current high water mark and this group are marked
1472      unset and then update the high water mark. */
1473
1474      if (offset >= offset_top)
1475        {
1476        register int *iptr = md->offset_vector + offset_top;
1477        register int *iend = md->offset_vector + offset;
1478        while (iptr < iend) *iptr++ = -1;
1479        offset_top = offset + 2;
1480        }
1481      }
1482    ecode += 1 + IMM2_SIZE;
1483    break;
1484
1485
1486    /* End of the pattern, either real or forced. */
1487
1488    case OP_END:
1489    case OP_ACCEPT:
1490    case OP_ASSERT_ACCEPT:
1491
1492    /* If we have matched an empty string, fail if not in an assertion and not
1493    in a recursion if either PCRE_NOTEMPTY is set, or if PCRE_NOTEMPTY_ATSTART
1494    is set and we have matched at the start of the subject. In both cases,
1495    backtracking will then try other alternatives, if any. */
1496
1497    if (eptr == mstart && op != OP_ASSERT_ACCEPT &&
1498         md->recursive == NULL &&
1499         (md->notempty ||
1500           (md->notempty_atstart &&
1501             mstart == md->start_subject + md->start_offset)))
1502      RRETURN(MATCH_NOMATCH);
1503
1504    /* Otherwise, we have a match. */
1505
1506    md->end_match_ptr = eptr;           /* Record where we ended */
1507    md->end_offset_top = offset_top;    /* and how many extracts were taken */
1508    md->start_match_ptr = mstart;       /* and the start (\K can modify) */
1509
1510    /* For some reason, the macros don't work properly if an expression is
1511    given as the argument to RRETURN when the heap is in use. */
1512
1513    rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
1514    RRETURN(rrc);
1515
1516    /* Assertion brackets. Check the alternative branches in turn - the
1517    matching won't pass the KET for an assertion. If any one branch matches,
1518    the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
1519    start of each branch to move the current point backwards, so the code at
1520    this level is identical to the lookahead case. When the assertion is part
1521    of a condition, we want to return immediately afterwards. The caller of
1522    this incarnation of the match() function will have set MATCH_CONDASSERT in
1523    md->match_function type, and one of these opcodes will be the first opcode
1524    that is processed. We use a local variable that is preserved over calls to
1525    match() to remember this case. */
1526
1527    case OP_ASSERT:
1528    case OP_ASSERTBACK:
1529    save_mark = md->mark;
1530    if (md->match_function_type == MATCH_CONDASSERT)
1531      {
1532      condassert = TRUE;
1533      md->match_function_type = 0;
1534      }
1535    else condassert = FALSE;
1536
1537    /* Loop for each branch */
1538
1539    do
1540      {
1541      RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM4);
1542
1543      /* A match means that the assertion is true; break out of the loop
1544      that matches its alternatives. */
1545
1546      if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1547        {
1548        mstart = md->start_match_ptr;   /* In case \K reset it */
1549        break;
1550        }
1551
1552      /* If not matched, restore the previous mark setting. */
1553
1554      md->mark = save_mark;
1555
1556      /* See comment in the code for capturing groups above about handling
1557      THEN. */
1558
1559      if (rrc == MATCH_THEN)
1560        {
1561        next = ecode + GET(ecode,1);
1562        if (md->start_match_ptr < next &&
1563            (*ecode == OP_ALT || *next == OP_ALT))
1564          rrc = MATCH_NOMATCH;
1565        }
1566
1567      /* Anything other than NOMATCH causes the entire assertion to fail,
1568      passing back the return code. This includes COMMIT, SKIP, PRUNE and an
1569      uncaptured THEN, which means they take their normal effect. This
1570      consistent approach does not always have exactly the same effect as in
1571      Perl. */
1572
1573      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1574      ecode += GET(ecode, 1);
1575      }
1576    while (*ecode == OP_ALT);   /* Continue for next alternative */
1577
1578    /* If we have tried all the alternative branches, the assertion has
1579    failed. If not, we broke out after a match. */
1580
1581    if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
1582
1583    /* If checking an assertion for a condition, return MATCH_MATCH. */
1584
1585    if (condassert) RRETURN(MATCH_MATCH);
1586
1587    /* Continue from after a successful assertion, updating the offsets high
1588    water mark, since extracts may have been taken during the assertion. */
1589
1590    do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1591    ecode += 1 + LINK_SIZE;
1592    offset_top = md->end_offset_top;
1593    continue;
1594
1595    /* Negative assertion: all branches must fail to match for the assertion to
1596    succeed. */
1597
1598    case OP_ASSERT_NOT:
1599    case OP_ASSERTBACK_NOT:
1600    save_mark = md->mark;
1601    if (md->match_function_type == MATCH_CONDASSERT)
1602      {
1603      condassert = TRUE;
1604      md->match_function_type = 0;
1605      }
1606    else condassert = FALSE;
1607
1608    /* Loop for each alternative branch. */
1609
1610    do
1611      {
1612      RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5);
1613      md->mark = save_mark;   /* Always restore the mark setting */
1614
1615      switch(rrc)
1616        {
1617        case MATCH_MATCH:            /* A successful match means */
1618        case MATCH_ACCEPT:           /* the assertion has failed. */
1619        RRETURN(MATCH_NOMATCH);
1620
1621        case MATCH_NOMATCH:          /* Carry on with next branch */
1622        break;
1623
1624        /* See comment in the code for capturing groups above about handling
1625        THEN. */
1626
1627        case MATCH_THEN:
1628        next = ecode + GET(ecode,1);
1629        if (md->start_match_ptr < next &&
1630            (*ecode == OP_ALT || *next == OP_ALT))
1631          {
1632          rrc = MATCH_NOMATCH;
1633          break;
1634          }
1635        /* Otherwise fall through. */
1636
1637        /* COMMIT, SKIP, PRUNE, and an uncaptured THEN cause the whole
1638        assertion to fail to match, without considering any more alternatives.
1639        Failing to match means the assertion is true. This is a consistent
1640        approach, but does not always have the same effect as in Perl. */
1641
1642        case MATCH_COMMIT:
1643        case MATCH_SKIP:
1644        case MATCH_SKIP_ARG:
1645        case MATCH_PRUNE:
1646        do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1647        goto NEG_ASSERT_TRUE;   /* Break out of alternation loop */
1648
1649        /* Anything else is an error */
1650
1651        default:
1652        RRETURN(rrc);
1653        }
1654
1655      /* Continue with next branch */
1656
1657      ecode += GET(ecode,1);
1658      }
1659    while (*ecode == OP_ALT);
1660
1661    /* All branches in the assertion failed to match. */
1662
1663    NEG_ASSERT_TRUE:
1664    if (condassert) RRETURN(MATCH_MATCH);  /* Condition assertion */
1665    ecode += 1 + LINK_SIZE;                /* Continue with current branch */
1666    continue;
1667
1668    /* Move the subject pointer back. This occurs only at the start of
1669    each branch of a lookbehind assertion. If we are too close to the start to
1670    move back, this match function fails. When working with UTF-8 we move
1671    back a number of characters, not bytes. */
1672
1673    case OP_REVERSE:
1674#ifdef SUPPORT_UTF
1675    if (utf)
1676      {
1677      i = GET(ecode, 1);
1678      while (i-- > 0)
1679        {
1680        eptr--;
1681        if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1682        BACKCHAR(eptr);
1683        }
1684      }
1685    else
1686#endif
1687
1688    /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
1689
1690      {
1691      eptr -= GET(ecode, 1);
1692      if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1693      }
1694
1695    /* Save the earliest consulted character, then skip to next op code */
1696
1697    if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
1698    ecode += 1 + LINK_SIZE;
1699    break;
1700
1701    /* The callout item calls an external function, if one is provided, passing
1702    details of the match so far. This is mainly for debugging, though the
1703    function is able to force a failure. */
1704
1705    case OP_CALLOUT:
1706    if (PUBL(callout) != NULL)
1707      {
1708      PUBL(callout_block) cb;
1709      cb.version          = 2;   /* Version 1 of the callout block */
1710      cb.callout_number   = ecode[1];
1711      cb.offset_vector    = md->offset_vector;
1712#if defined COMPILE_PCRE8
1713      cb.subject          = (PCRE_SPTR)md->start_subject;
1714#elif defined COMPILE_PCRE16
1715      cb.subject          = (PCRE_SPTR16)md->start_subject;
1716#elif defined COMPILE_PCRE32
1717      cb.subject          = (PCRE_SPTR32)md->start_subject;
1718#endif
1719      cb.subject_length   = (int)(md->end_subject - md->start_subject);
1720      cb.start_match      = (int)(mstart - md->start_subject);
1721      cb.current_position = (int)(eptr - md->start_subject);
1722      cb.pattern_position = GET(ecode, 2);
1723      cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1724      cb.capture_top      = offset_top/2;
1725      cb.capture_last     = md->capture_last & CAPLMASK;
1726      /* Internal change requires this for API compatibility. */
1727      if (cb.capture_last == 0) cb.capture_last = -1;
1728      cb.callout_data     = md->callout_data;
1729      cb.mark             = md->nomatch_mark;
1730      if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1731      if (rrc < 0) RRETURN(rrc);
1732      }
1733    ecode += 2 + 2*LINK_SIZE;
1734    break;
1735
1736    /* Recursion either matches the current regex, or some subexpression. The
1737    offset data is the offset to the starting bracket from the start of the
1738    whole pattern. (This is so that it works from duplicated subpatterns.)
1739
1740    The state of the capturing groups is preserved over recursion, and
1741    re-instated afterwards. We don't know how many are started and not yet
1742    finished (offset_top records the completed total) so we just have to save
1743    all the potential data. There may be up to 65535 such values, which is too
1744    large to put on the stack, but using malloc for small numbers seems
1745    expensive. As a compromise, the stack is used when there are no more than
1746    REC_STACK_SAVE_MAX values to store; otherwise malloc is used.
1747
1748    There are also other values that have to be saved. We use a chained
1749    sequence of blocks that actually live on the stack. Thanks to Robin Houston
1750    for the original version of this logic. It has, however, been hacked around
1751    a lot, so he is not to blame for the current way it works. */
1752
1753    case OP_RECURSE:
1754      {
1755      recursion_info *ri;
1756      unsigned int recno;
1757
1758      callpat = md->start_code + GET(ecode, 1);
1759      recno = (callpat == md->start_code)? 0 :
1760        GET2(callpat, 1 + LINK_SIZE);
1761
1762      /* Check for repeating a recursion without advancing the subject pointer.
1763      This should catch convoluted mutual recursions. (Some simple cases are
1764      caught at compile time.) */
1765
1766      for (ri = md->recursive; ri != NULL; ri = ri->prevrec)
1767        if (recno == ri->group_num && eptr == ri->subject_position)
1768          RRETURN(PCRE_ERROR_RECURSELOOP);
1769
1770      /* Add to "recursing stack" */
1771
1772      new_recursive.group_num = recno;
1773      new_recursive.saved_capture_last = md->capture_last;
1774      new_recursive.subject_position = eptr;
1775      new_recursive.prevrec = md->recursive;
1776      md->recursive = &new_recursive;
1777
1778      /* Where to continue from afterwards */
1779
1780      ecode += 1 + LINK_SIZE;
1781
1782      /* Now save the offset data */
1783
1784      new_recursive.saved_max = md->offset_end;
1785      if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
1786        new_recursive.offset_save = stacksave;
1787      else
1788        {
1789        new_recursive.offset_save =
1790          (int *)(PUBL(malloc))(new_recursive.saved_max * sizeof(int));
1791        if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
1792        }
1793      memcpy(new_recursive.offset_save, md->offset_vector,
1794            new_recursive.saved_max * sizeof(int));
1795
1796      /* OK, now we can do the recursion. After processing each alternative,
1797      restore the offset data and the last captured value. If there were nested
1798      recursions, md->recursive might be changed, so reset it before looping.
1799      */
1800
1801      DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1802      cbegroup = (*callpat >= OP_SBRA);
1803      do
1804        {
1805        if (cbegroup) md->match_function_type = MATCH_CBEGROUP;
1806        RMATCH(eptr, callpat + PRIV(OP_lengths)[*callpat], offset_top,
1807          md, eptrb, RM6);
1808        memcpy(md->offset_vector, new_recursive.offset_save,
1809            new_recursive.saved_max * sizeof(int));
1810        md->capture_last = new_recursive.saved_capture_last;
1811        md->recursive = new_recursive.prevrec;
1812        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1813          {
1814          DPRINTF(("Recursion matched\n"));
1815          if (new_recursive.offset_save != stacksave)
1816            (PUBL(free))(new_recursive.offset_save);
1817
1818          /* Set where we got to in the subject, and reset the start in case
1819          it was changed by \K. This *is* propagated back out of a recursion,
1820          for Perl compatibility. */
1821
1822          eptr = md->end_match_ptr;
1823          mstart = md->start_match_ptr;
1824          goto RECURSION_MATCHED;        /* Exit loop; end processing */
1825          }
1826
1827        /* PCRE does not allow THEN, SKIP, PRUNE or COMMIT to escape beyond a
1828        recursion; they cause a NOMATCH for the entire recursion. These codes
1829        are defined in a range that can be tested for. */
1830
1831        if (rrc >= MATCH_BACKTRACK_MIN && rrc <= MATCH_BACKTRACK_MAX)
1832          {
1833          if (new_recursive.offset_save != stacksave)
1834            (PUBL(free))(new_recursive.offset_save);
1835          RRETURN(MATCH_NOMATCH);
1836          }
1837
1838        /* Any return code other than NOMATCH is an error. */
1839
1840        if (rrc != MATCH_NOMATCH)
1841          {
1842          DPRINTF(("Recursion gave error %d\n", rrc));
1843          if (new_recursive.offset_save != stacksave)
1844            (PUBL(free))(new_recursive.offset_save);
1845          RRETURN(rrc);
1846          }
1847
1848        md->recursive = &new_recursive;
1849        callpat += GET(callpat, 1);
1850        }
1851      while (*callpat == OP_ALT);
1852
1853      DPRINTF(("Recursion didn't match\n"));
1854      md->recursive = new_recursive.prevrec;
1855      if (new_recursive.offset_save != stacksave)
1856        (PUBL(free))(new_recursive.offset_save);
1857      RRETURN(MATCH_NOMATCH);
1858      }
1859
1860    RECURSION_MATCHED:
1861    break;
1862
1863    /* An alternation is the end of a branch; scan along to find the end of the
1864    bracketed group and go to there. */
1865
1866    case OP_ALT:
1867    do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1868    break;
1869
1870    /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1871    indicating that it may occur zero times. It may repeat infinitely, or not
1872    at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1873    with fixed upper repeat limits are compiled as a number of copies, with the
1874    optional ones preceded by BRAZERO or BRAMINZERO. */
1875
1876    case OP_BRAZERO:
1877    next = ecode + 1;
1878    RMATCH(eptr, next, offset_top, md, eptrb, RM10);
1879    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1880    do next += GET(next, 1); while (*next == OP_ALT);
1881    ecode = next + 1 + LINK_SIZE;
1882    break;
1883
1884    case OP_BRAMINZERO:
1885    next = ecode + 1;
1886    do next += GET(next, 1); while (*next == OP_ALT);
1887    RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, eptrb, RM11);
1888    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1889    ecode++;
1890    break;
1891
1892    case OP_SKIPZERO:
1893    next = ecode+1;
1894    do next += GET(next,1); while (*next == OP_ALT);
1895    ecode = next + 1 + LINK_SIZE;
1896    break;
1897
1898    /* BRAPOSZERO occurs before a possessive bracket group. Don't do anything
1899    here; just jump to the group, with allow_zero set TRUE. */
1900
1901    case OP_BRAPOSZERO:
1902    op = *(++ecode);
1903    allow_zero = TRUE;
1904    if (op == OP_CBRAPOS || op == OP_SCBRAPOS) goto POSSESSIVE_CAPTURE;
1905      goto POSSESSIVE_NON_CAPTURE;
1906
1907    /* End of a group, repeated or non-repeating. */
1908
1909    case OP_KET:
1910    case OP_KETRMIN:
1911    case OP_KETRMAX:
1912    case OP_KETRPOS:
1913    prev = ecode - GET(ecode, 1);
1914
1915    /* If this was a group that remembered the subject start, in order to break
1916    infinite repeats of empty string matches, retrieve the subject start from
1917    the chain. Otherwise, set it NULL. */
1918
1919    if (*prev >= OP_SBRA || *prev == OP_ONCE)
1920      {
1921      saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */
1922      eptrb = eptrb->epb_prev;              /* Backup to previous group */
1923      }
1924    else saved_eptr = NULL;
1925
1926    /* If we are at the end of an assertion group or a non-capturing atomic
1927    group, stop matching and return MATCH_MATCH, but record the current high
1928    water mark for use by positive assertions. We also need to record the match
1929    start in case it was changed by \K. */
1930
1931    if ((*prev >= OP_ASSERT && *prev <= OP_ASSERTBACK_NOT) ||
1932         *prev == OP_ONCE_NC)
1933      {
1934      md->end_match_ptr = eptr;      /* For ONCE_NC */
1935      md->end_offset_top = offset_top;
1936      md->start_match_ptr = mstart;
1937      RRETURN(MATCH_MATCH);         /* Sets md->mark */
1938      }
1939
1940    /* For capturing groups we have to check the group number back at the start
1941    and if necessary complete handling an extraction by setting the offsets and
1942    bumping the high water mark. Whole-pattern recursion is coded as a recurse
1943    into group 0, so it won't be picked up here. Instead, we catch it when the
1944    OP_END is reached. Other recursion is handled here. We just have to record
1945    the current subject position and start match pointer and give a MATCH
1946    return. */
1947
1948    if (*prev == OP_CBRA || *prev == OP_SCBRA ||
1949        *prev == OP_CBRAPOS || *prev == OP_SCBRAPOS)
1950      {
1951      number = GET2(prev, 1+LINK_SIZE);
1952      offset = number << 1;
1953
1954#ifdef PCRE_DEBUG
1955      printf("end bracket %d", number);
1956      printf("\n");
1957#endif
1958
1959      /* Handle a recursively called group. */
1960
1961      if (md->recursive != NULL && md->recursive->group_num == number)
1962        {
1963        md->end_match_ptr = eptr;
1964        md->start_match_ptr = mstart;
1965        RRETURN(MATCH_MATCH);
1966        }
1967
1968      /* Deal with capturing */
1969
1970      md->capture_last = (md->capture_last & OVFLMASK) | number;
1971      if (offset >= md->offset_max) md->capture_last |= OVFLBIT; else
1972        {
1973        /* If offset is greater than offset_top, it means that we are
1974        "skipping" a capturing group, and that group's offsets must be marked
1975        unset. In earlier versions of PCRE, all the offsets were unset at the
1976        start of matching, but this doesn't work because atomic groups and
1977        assertions can cause a value to be set that should later be unset.
1978        Example: matching /(?>(a))b|(a)c/ against "ac". This sets group 1 as
1979        part of the atomic group, but this is not on the final matching path,
1980        so must be unset when 2 is set. (If there is no group 2, there is no
1981        problem, because offset_top will then be 2, indicating no capture.) */
1982
1983        if (offset > offset_top)
1984          {
1985          register int *iptr = md->offset_vector + offset_top;
1986          register int *iend = md->offset_vector + offset;
1987          while (iptr < iend) *iptr++ = -1;
1988          }
1989
1990        /* Now make the extraction */
1991
1992        md->offset_vector[offset] =
1993          md->offset_vector[md->offset_end - number];
1994        md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1995        if (offset_top <= offset) offset_top = offset + 2;
1996        }
1997      }
1998
1999    /* OP_KETRPOS is a possessive repeating ket. Remember the current position,
2000    and return the MATCH_KETRPOS. This makes it possible to do the repeats one
2001    at a time from the outer level, thus saving stack. This must precede the
2002    empty string test - in this case that test is done at the outer level. */
2003
2004    if (*ecode == OP_KETRPOS)
2005      {
2006      md->start_match_ptr = mstart;    /* In case \K reset it */
2007      md->end_match_ptr = eptr;
2008      md->end_offset_top = offset_top;
2009      RRETURN(MATCH_KETRPOS);
2010      }
2011
2012    /* For an ordinary non-repeating ket, just continue at this level. This
2013    also happens for a repeating ket if no characters were matched in the
2014    group. This is the forcible breaking of infinite loops as implemented in
2015    Perl 5.005. For a non-repeating atomic group that includes captures,
2016    establish a backup point by processing the rest of the pattern at a lower
2017    level. If this results in a NOMATCH return, pass MATCH_ONCE back to the
2018    original OP_ONCE level, thereby bypassing intermediate backup points, but
2019    resetting any captures that happened along the way. */
2020
2021    if (*ecode == OP_KET || eptr == saved_eptr)
2022      {
2023      if (*prev == OP_ONCE)
2024        {
2025        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM12);
2026        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2027        md->once_target = prev;  /* Level at which to change to MATCH_NOMATCH */
2028        RRETURN(MATCH_ONCE);
2029        }
2030      ecode += 1 + LINK_SIZE;    /* Carry on at this level */
2031      break;
2032      }
2033
2034    /* The normal repeating kets try the rest of the pattern or restart from
2035    the preceding bracket, in the appropriate order. In the second case, we can
2036    use tail recursion to avoid using another stack frame, unless we have an
2037    an atomic group or an unlimited repeat of a group that can match an empty
2038    string. */
2039
2040    if (*ecode == OP_KETRMIN)
2041      {
2042      RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM7);
2043      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2044      if (*prev == OP_ONCE)
2045        {
2046        RMATCH(eptr, prev, offset_top, md, eptrb, RM8);
2047        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2048        md->once_target = prev;  /* Level at which to change to MATCH_NOMATCH */
2049        RRETURN(MATCH_ONCE);
2050        }
2051      if (*prev >= OP_SBRA)    /* Could match an empty string */
2052        {
2053        RMATCH(eptr, prev, offset_top, md, eptrb, RM50);
2054        RRETURN(rrc);
2055        }
2056      ecode = prev;
2057      goto TAIL_RECURSE;
2058      }
2059    else  /* OP_KETRMAX */
2060      {
2061      RMATCH(eptr, prev, offset_top, md, eptrb, RM13);
2062      if (rrc == MATCH_ONCE && md->once_target == prev) rrc = MATCH_NOMATCH;
2063      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2064      if (*prev == OP_ONCE)
2065        {
2066        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM9);
2067        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2068        md->once_target = prev;
2069        RRETURN(MATCH_ONCE);
2070        }
2071      ecode += 1 + LINK_SIZE;
2072      goto TAIL_RECURSE;
2073      }
2074    /* Control never gets here */
2075
2076    /* Not multiline mode: start of subject assertion, unless notbol. */
2077
2078    case OP_CIRC:
2079    if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
2080
2081    /* Start of subject assertion */
2082
2083    case OP_SOD:
2084    if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);
2085    ecode++;
2086    break;
2087
2088    /* Multiline mode: start of subject unless notbol, or after any newline. */
2089
2090    case OP_CIRCM:
2091    if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
2092    if (eptr != md->start_subject &&
2093        (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
2094      RRETURN(MATCH_NOMATCH);
2095    ecode++;
2096    break;
2097
2098    /* Start of match assertion */
2099
2100    case OP_SOM:
2101    if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);
2102    ecode++;
2103    break;
2104
2105    /* Reset the start of match point */
2106
2107    case OP_SET_SOM:
2108    mstart = eptr;
2109    ecode++;
2110    break;
2111
2112    /* Multiline mode: assert before any newline, or before end of subject
2113    unless noteol is set. */
2114
2115    case OP_DOLLM:
2116    if (eptr < md->end_subject)
2117      {
2118      if (!IS_NEWLINE(eptr))
2119        {
2120        if (md->partial != 0 &&
2121            eptr + 1 >= md->end_subject &&
2122            NLBLOCK->nltype == NLTYPE_FIXED &&
2123            NLBLOCK->nllen == 2 &&
2124            UCHAR21TEST(eptr) == NLBLOCK->nl[0])
2125          {
2126          md->hitend = TRUE;
2127          if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2128          }
2129        RRETURN(MATCH_NOMATCH);
2130        }
2131      }
2132    else
2133      {
2134      if (md->noteol) RRETURN(MATCH_NOMATCH);
2135      SCHECK_PARTIAL();
2136      }
2137    ecode++;
2138    break;
2139
2140    /* Not multiline mode: assert before a terminating newline or before end of
2141    subject unless noteol is set. */
2142
2143    case OP_DOLL:
2144    if (md->noteol) RRETURN(MATCH_NOMATCH);
2145    if (!md->endonly) goto ASSERT_NL_OR_EOS;
2146
2147    /* ... else fall through for endonly */
2148
2149    /* End of subject assertion (\z) */
2150
2151    case OP_EOD:
2152    if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);
2153    SCHECK_PARTIAL();
2154    ecode++;
2155    break;
2156
2157    /* End of subject or ending \n assertion (\Z) */
2158
2159    case OP_EODN:
2160    ASSERT_NL_OR_EOS:
2161    if (eptr < md->end_subject &&
2162        (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
2163      {
2164      if (md->partial != 0 &&
2165          eptr + 1 >= md->end_subject &&
2166          NLBLOCK->nltype == NLTYPE_FIXED &&
2167          NLBLOCK->nllen == 2 &&
2168          UCHAR21TEST(eptr) == NLBLOCK->nl[0])
2169        {
2170        md->hitend = TRUE;
2171        if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2172        }
2173      RRETURN(MATCH_NOMATCH);
2174      }
2175
2176    /* Either at end of string or \n before end. */
2177
2178    SCHECK_PARTIAL();
2179    ecode++;
2180    break;
2181
2182    /* Word boundary assertions */
2183
2184    case OP_NOT_WORD_BOUNDARY:
2185    case OP_WORD_BOUNDARY:
2186      {
2187
2188      /* Find out if the previous and current characters are "word" characters.
2189      It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
2190      be "non-word" characters. Remember the earliest consulted character for
2191      partial matching. */
2192
2193#ifdef SUPPORT_UTF
2194      if (utf)
2195        {
2196        /* Get status of previous character */
2197
2198        if (eptr == md->start_subject) prev_is_word = FALSE; else
2199          {
2200          PCRE_PUCHAR lastptr = eptr - 1;
2201          BACKCHAR(lastptr);
2202          if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
2203          GETCHAR(c, lastptr);
2204#ifdef SUPPORT_UCP
2205          if (md->use_ucp)
2206            {
2207            if (c == '_') prev_is_word = TRUE; else
2208              {
2209              int cat = UCD_CATEGORY(c);
2210              prev_is_word = (cat == ucp_L || cat == ucp_N);
2211              }
2212            }
2213          else
2214#endif
2215          prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
2216          }
2217
2218        /* Get status of next character */
2219
2220        if (eptr >= md->end_subject)
2221          {
2222          SCHECK_PARTIAL();
2223          cur_is_word = FALSE;
2224          }
2225        else
2226          {
2227          GETCHAR(c, eptr);
2228#ifdef SUPPORT_UCP
2229          if (md->use_ucp)
2230            {
2231            if (c == '_') cur_is_word = TRUE; else
2232              {
2233              int cat = UCD_CATEGORY(c);
2234              cur_is_word = (cat == ucp_L || cat == ucp_N);
2235              }
2236            }
2237          else
2238#endif
2239          cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
2240          }
2241        }
2242      else
2243#endif
2244
2245      /* Not in UTF-8 mode, but we may still have PCRE_UCP set, and for
2246      consistency with the behaviour of \w we do use it in this case. */
2247
2248        {
2249        /* Get status of previous character */
2250
2251        if (eptr == md->start_subject) prev_is_word = FALSE; else
2252          {
2253          if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
2254#ifdef SUPPORT_UCP
2255          if (md->use_ucp)
2256            {
2257            c = eptr[-1];
2258            if (c == '_') prev_is_word = TRUE; else
2259              {
2260              int cat = UCD_CATEGORY(c);
2261              prev_is_word = (cat == ucp_L || cat == ucp_N);
2262              }
2263            }
2264          else
2265#endif
2266          prev_is_word = MAX_255(eptr[-1])
2267            && ((md->ctypes[eptr[-1]] & ctype_word) != 0);
2268          }
2269
2270        /* Get status of next character */
2271
2272        if (eptr >= md->end_subject)
2273          {
2274          SCHECK_PARTIAL();
2275          cur_is_word = FALSE;
2276          }
2277        else
2278#ifdef SUPPORT_UCP
2279        if (md->use_ucp)
2280          {
2281          c = *eptr;
2282          if (c == '_') cur_is_word = TRUE; else
2283            {
2284            int cat = UCD_CATEGORY(c);
2285            cur_is_word = (cat == ucp_L || cat == ucp_N);
2286            }
2287          }
2288        else
2289#endif
2290        cur_is_word = MAX_255(*eptr)
2291          && ((md->ctypes[*eptr] & ctype_word) != 0);
2292        }
2293
2294      /* Now see if the situation is what we want */
2295
2296      if ((*ecode++ == OP_WORD_BOUNDARY)?
2297           cur_is_word == prev_is_word : cur_is_word != prev_is_word)
2298        RRETURN(MATCH_NOMATCH);
2299      }
2300    break;
2301
2302    /* Match any single character type except newline; have to take care with
2303    CRLF newlines and partial matching. */
2304
2305    case OP_ANY:
2306    if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
2307    if (md->partial != 0 &&
2308        eptr + 1 >= md->end_subject &&
2309        NLBLOCK->nltype == NLTYPE_FIXED &&
2310        NLBLOCK->nllen == 2 &&
2311        UCHAR21TEST(eptr) == NLBLOCK->nl[0])
2312      {
2313      md->hitend = TRUE;
2314      if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2315      }
2316
2317    /* Fall through */
2318
2319    /* Match any single character whatsoever. */
2320
2321    case OP_ALLANY:
2322    if (eptr >= md->end_subject)   /* DO NOT merge the eptr++ here; it must */
2323      {                            /* not be updated before SCHECK_PARTIAL. */
2324      SCHECK_PARTIAL();
2325      RRETURN(MATCH_NOMATCH);
2326      }
2327    eptr++;
2328#ifdef SUPPORT_UTF
2329    if (utf) ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
2330#endif
2331    ecode++;
2332    break;
2333
2334    /* Match a single byte, even in UTF-8 mode. This opcode really does match
2335    any byte, even newline, independent of the setting of PCRE_DOTALL. */
2336
2337    case OP_ANYBYTE:
2338    if (eptr >= md->end_subject)   /* DO NOT merge the eptr++ here; it must */
2339      {                            /* not be updated before SCHECK_PARTIAL. */
2340      SCHECK_PARTIAL();
2341      RRETURN(MATCH_NOMATCH);
2342      }
2343    eptr++;
2344    ecode++;
2345    break;
2346
2347    case OP_NOT_DIGIT:
2348    if (eptr >= md->end_subject)
2349      {
2350      SCHECK_PARTIAL();
2351      RRETURN(MATCH_NOMATCH);
2352      }
2353    GETCHARINCTEST(c, eptr);
2354    if (
2355#if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2356       c < 256 &&
2357#endif
2358       (md->ctypes[c] & ctype_digit) != 0
2359       )
2360      RRETURN(MATCH_NOMATCH);
2361    ecode++;
2362    break;
2363
2364    case OP_DIGIT:
2365    if (eptr >= md->end_subject)
2366      {
2367      SCHECK_PARTIAL();
2368      RRETURN(MATCH_NOMATCH);
2369      }
2370    GETCHARINCTEST(c, eptr);
2371    if (
2372#if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2373       c > 255 ||
2374#endif
2375       (md->ctypes[c] & ctype_digit) == 0
2376       )
2377      RRETURN(MATCH_NOMATCH);
2378    ecode++;
2379    break;
2380
2381    case OP_NOT_WHITESPACE:
2382    if (eptr >= md->end_subject)
2383      {
2384      SCHECK_PARTIAL();
2385      RRETURN(MATCH_NOMATCH);
2386      }
2387    GETCHARINCTEST(c, eptr);
2388    if (
2389#if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2390       c < 256 &&
2391#endif
2392       (md->ctypes[c] & ctype_space) != 0
2393       )
2394      RRETURN(MATCH_NOMATCH);
2395    ecode++;
2396    break;
2397
2398    case OP_WHITESPACE:
2399    if (eptr >= md->end_subject)
2400      {
2401      SCHECK_PARTIAL();
2402      RRETURN(MATCH_NOMATCH);
2403      }
2404    GETCHARINCTEST(c, eptr);
2405    if (
2406#if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2407       c > 255 ||
2408#endif
2409       (md->ctypes[c] & ctype_space) == 0
2410       )
2411      RRETURN(MATCH_NOMATCH);
2412    ecode++;
2413    break;
2414
2415    case OP_NOT_WORDCHAR:
2416    if (eptr >= md->end_subject)
2417      {
2418      SCHECK_PARTIAL();
2419      RRETURN(MATCH_NOMATCH);
2420      }
2421    GETCHARINCTEST(c, eptr);
2422    if (
2423#if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2424       c < 256 &&
2425#endif
2426       (md->ctypes[c] & ctype_word) != 0
2427       )
2428      RRETURN(MATCH_NOMATCH);
2429    ecode++;
2430    break;
2431
2432    case OP_WORDCHAR:
2433    if (eptr >= md->end_subject)
2434      {
2435      SCHECK_PARTIAL();
2436      RRETURN(MATCH_NOMATCH);
2437      }
2438    GETCHARINCTEST(c, eptr);
2439    if (
2440#if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2441       c > 255 ||
2442#endif
2443       (md->ctypes[c] & ctype_word) == 0
2444       )
2445      RRETURN(MATCH_NOMATCH);
2446    ecode++;
2447    break;
2448
2449    case OP_ANYNL:
2450    if (eptr >= md->end_subject)
2451      {
2452      SCHECK_PARTIAL();
2453      RRETURN(MATCH_NOMATCH);
2454      }
2455    GETCHARINCTEST(c, eptr);
2456    switch(c)
2457      {
2458      default: RRETURN(MATCH_NOMATCH);
2459
2460      case CHAR_CR:
2461      if (eptr >= md->end_subject)
2462        {
2463        SCHECK_PARTIAL();
2464        }
2465      else if (UCHAR21TEST(eptr) == CHAR_LF) eptr++;
2466      break;
2467
2468      case CHAR_LF:
2469      break;
2470
2471      case CHAR_VT:
2472      case CHAR_FF:
2473      case CHAR_NEL:
2474#ifndef EBCDIC
2475      case 0x2028:
2476      case 0x2029:
2477#endif  /* Not EBCDIC */
2478      if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
2479      break;
2480      }
2481    ecode++;
2482    break;
2483
2484    case OP_NOT_HSPACE:
2485    if (eptr >= md->end_subject)
2486      {
2487      SCHECK_PARTIAL();
2488      RRETURN(MATCH_NOMATCH);
2489      }
2490    GETCHARINCTEST(c, eptr);
2491    switch(c)
2492      {
2493      HSPACE_CASES: RRETURN(MATCH_NOMATCH);  /* Byte and multibyte cases */
2494      default: break;
2495      }
2496    ecode++;
2497    break;
2498
2499    case OP_HSPACE:
2500    if (eptr >= md->end_subject)
2501      {
2502      SCHECK_PARTIAL();
2503      RRETURN(MATCH_NOMATCH);
2504      }
2505    GETCHARINCTEST(c, eptr);
2506    switch(c)
2507      {
2508      HSPACE_CASES: break;  /* Byte and multibyte cases */
2509      default: RRETURN(MATCH_NOMATCH);
2510      }
2511    ecode++;
2512    break;
2513
2514    case OP_NOT_VSPACE:
2515    if (eptr >= md->end_subject)
2516      {
2517      SCHECK_PARTIAL();
2518      RRETURN(MATCH_NOMATCH);
2519      }
2520    GETCHARINCTEST(c, eptr);
2521    switch(c)
2522      {
2523      VSPACE_CASES: RRETURN(MATCH_NOMATCH);
2524      default: break;
2525      }
2526    ecode++;
2527    break;
2528
2529    case OP_VSPACE:
2530    if (eptr >= md->end_subject)
2531      {
2532      SCHECK_PARTIAL();
2533      RRETURN(MATCH_NOMATCH);
2534      }
2535    GETCHARINCTEST(c, eptr);
2536    switch(c)
2537      {
2538      VSPACE_CASES: break;
2539      default: RRETURN(MATCH_NOMATCH);
2540      }
2541    ecode++;
2542    break;
2543
2544#ifdef SUPPORT_UCP
2545    /* Check the next character by Unicode property. We will get here only
2546    if the support is in the binary; otherwise a compile-time error occurs. */
2547
2548    case OP_PROP:
2549    case OP_NOTPROP:
2550    if (eptr >= md->end_subject)
2551      {
2552      SCHECK_PARTIAL();
2553      RRETURN(MATCH_NOMATCH);
2554      }
2555    GETCHARINCTEST(c, eptr);
2556      {
2557      const pcre_uint32 *cp;
2558      const ucd_record *prop = GET_UCD(c);
2559
2560      switch(ecode[1])
2561        {
2562        case PT_ANY:
2563        if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
2564        break;
2565
2566        case PT_LAMP:
2567        if ((prop->chartype == ucp_Lu ||
2568             prop->chartype == ucp_Ll ||
2569             prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
2570          RRETURN(MATCH_NOMATCH);
2571        break;
2572
2573        case PT_GC:
2574        if ((ecode[2] != PRIV(ucp_gentype)[prop->chartype]) == (op == OP_PROP))
2575          RRETURN(MATCH_NOMATCH);
2576        break;
2577
2578        case PT_PC:
2579        if ((ecode[2] != prop->chartype) == (op == OP_PROP))
2580          RRETURN(MATCH_NOMATCH);
2581        break;
2582
2583        case PT_SC:
2584        if ((ecode[2] != prop->script) == (op == OP_PROP))
2585          RRETURN(MATCH_NOMATCH);
2586        break;
2587
2588        /* These are specials */
2589
2590        case PT_ALNUM:
2591        if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
2592             PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
2593          RRETURN(MATCH_NOMATCH);
2594        break;
2595
2596        /* Perl space used to exclude VT, but from Perl 5.18 it is included,
2597        which means that Perl space and POSIX space are now identical. PCRE
2598        was changed at release 8.34. */
2599
2600        case PT_SPACE:    /* Perl space */
2601        case PT_PXSPACE:  /* POSIX space */
2602        switch(c)
2603          {
2604          HSPACE_CASES:
2605          VSPACE_CASES:
2606          if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
2607          break;
2608
2609          default:
2610          if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z) ==
2611            (op == OP_NOTPROP)) RRETURN(MATCH_NOMATCH);
2612          break;
2613          }
2614        break;
2615
2616        case PT_WORD:
2617        if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
2618             PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
2619             c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
2620          RRETURN(MATCH_NOMATCH);
2621        break;
2622
2623        case PT_CLIST:
2624        cp = PRIV(ucd_caseless_sets) + ecode[2];
2625        for (;;)
2626          {
2627          if (c < *cp)
2628            { if (op == OP_PROP) { RRETURN(MATCH_NOMATCH); } else break; }
2629          if (c == *cp++)
2630            { if (op == OP_PROP) break; else { RRETURN(MATCH_NOMATCH); } }
2631          }
2632        break;
2633
2634        case PT_UCNC:
2635        if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
2636             c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
2637             c >= 0xe000) == (op == OP_NOTPROP))
2638          RRETURN(MATCH_NOMATCH);
2639        break;
2640
2641        /* This should never occur */
2642
2643        default:
2644        RRETURN(PCRE_ERROR_INTERNAL);
2645        }
2646
2647      ecode += 3;
2648      }
2649    break;
2650
2651    /* Match an extended Unicode sequence. We will get here only if the support
2652    is in the binary; otherwise a compile-time error occurs. */
2653
2654    case OP_EXTUNI:
2655    if (eptr >= md->end_subject)
2656      {
2657      SCHECK_PARTIAL();
2658      RRETURN(MATCH_NOMATCH);
2659      }
2660    else
2661      {
2662      int lgb, rgb;
2663      GETCHARINCTEST(c, eptr);
2664      lgb = UCD_GRAPHBREAK(c);
2665      while (eptr < md->end_subject)
2666        {
2667        int len = 1;
2668        if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
2669        rgb = UCD_GRAPHBREAK(c);
2670        if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
2671        lgb = rgb;
2672        eptr += len;
2673        }
2674      }
2675    CHECK_PARTIAL();
2676    ecode++;
2677    break;
2678#endif  /* SUPPORT_UCP */
2679
2680
2681    /* Match a back reference, possibly repeatedly. Look past the end of the
2682    item to see if there is repeat information following. The code is similar
2683    to that for character classes, but repeated for efficiency. Then obey
2684    similar code to character type repeats - written out again for speed.
2685    However, if the referenced string is the empty string, always treat
2686    it as matched, any number of times (otherwise there could be infinite
2687    loops). If the reference is unset, there are two possibilities:
2688
2689    (a) In the default, Perl-compatible state, set the length negative;
2690    this ensures that every attempt at a match fails. We can't just fail
2691    here, because of the possibility of quantifiers with zero minima.
2692
2693    (b) If the JavaScript compatibility flag is set, set the length to zero
2694    so that the back reference matches an empty string.
2695
2696    Otherwise, set the length to the length of what was matched by the
2697    referenced subpattern.
2698
2699    The OP_REF and OP_REFI opcodes are used for a reference to a numbered group
2700    or to a non-duplicated named group. For a duplicated named group, OP_DNREF
2701    and OP_DNREFI are used. In this case we must scan the list of groups to
2702    which the name refers, and use the first one that is set. */
2703
2704    case OP_DNREF:
2705    case OP_DNREFI:
2706    caseless = op == OP_DNREFI;
2707      {
2708      int count = GET2(ecode, 1+IMM2_SIZE);
2709      pcre_uchar *slot = md->name_table + GET2(ecode, 1) * md->name_entry_size;
2710      ecode += 1 + 2*IMM2_SIZE;
2711
2712      /* Setting the default length first and initializing 'offset' avoids
2713      compiler warnings in the REF_REPEAT code. */
2714
2715      length = (md->jscript_compat)? 0 : -1;
2716      offset = 0;
2717
2718      while (count-- > 0)
2719        {
2720        offset = GET2(slot, 0) << 1;
2721        if (offset < offset_top && md->offset_vector[offset] >= 0)
2722          {
2723          length = md->offset_vector[offset+1] - md->offset_vector[offset];
2724          break;
2725          }
2726        slot += md->name_entry_size;
2727        }
2728      }
2729    goto REF_REPEAT;
2730
2731    case OP_REF:
2732    case OP_REFI:
2733    caseless = op == OP_REFI;
2734    offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
2735    ecode += 1 + IMM2_SIZE;
2736    if (offset >= offset_top || md->offset_vector[offset] < 0)
2737      length = (md->jscript_compat)? 0 : -1;
2738    else
2739      length = md->offset_vector[offset+1] - md->offset_vector[offset];
2740
2741    /* Set up for repetition, or handle the non-repeated case */
2742
2743    REF_REPEAT:
2744    switch (*ecode)
2745      {
2746      case OP_CRSTAR:
2747      case OP_CRMINSTAR:
2748      case OP_CRPLUS:
2749      case OP_CRMINPLUS:
2750      case OP_CRQUERY:
2751      case OP_CRMINQUERY:
2752      c = *ecode++ - OP_CRSTAR;
2753      minimize = (c & 1) != 0;
2754      min = rep_min[c];                 /* Pick up values from tables; */
2755      max = rep_max[c];                 /* zero for max => infinity */
2756      if (max == 0) max = INT_MAX;
2757      break;
2758
2759      case OP_CRRANGE:
2760      case OP_CRMINRANGE:
2761      minimize = (*ecode == OP_CRMINRANGE);
2762      min = GET2(ecode, 1);
2763      max = GET2(ecode, 1 + IMM2_SIZE);
2764      if (max == 0) max = INT_MAX;
2765      ecode += 1 + 2 * IMM2_SIZE;
2766      break;
2767
2768      default:               /* No repeat follows */
2769      if ((length = match_ref(offset, eptr, length, md, caseless)) < 0)
2770        {
2771        if (length == -2) eptr = md->end_subject;   /* Partial match */
2772        CHECK_PARTIAL();
2773        RRETURN(MATCH_NOMATCH);
2774        }
2775      eptr += length;
2776      continue;              /* With the main loop */
2777      }
2778
2779    /* Handle repeated back references. If the length of the reference is
2780    zero, just continue with the main loop. If the length is negative, it
2781    means the reference is unset in non-Java-compatible mode. If the minimum is
2782    zero, we can continue at the same level without recursion. For any other
2783    minimum, carrying on will result in NOMATCH. */
2784
2785    if (length == 0) continue;
2786    if (length < 0 && min == 0) continue;
2787
2788    /* First, ensure the minimum number of matches are present. We get back
2789    the length of the reference string explicitly rather than passing the
2790    address of eptr, so that eptr can be a register variable. */
2791
2792    for (i = 1; i <= min; i++)
2793      {
2794      int slength;
2795      if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2796        {
2797        if (slength == -2) eptr = md->end_subject;   /* Partial match */
2798        CHECK_PARTIAL();
2799        RRETURN(MATCH_NOMATCH);
2800        }
2801      eptr += slength;
2802      }
2803
2804    /* If min = max, continue at the same level without recursion.
2805    They are not both allowed to be zero. */
2806
2807    if (min == max) continue;
2808
2809    /* If minimizing, keep trying and advancing the pointer */
2810
2811    if (minimize)
2812      {
2813      for (fi = min;; fi++)
2814        {
2815        int slength;
2816        RMATCH(eptr, ecode, offset_top, md, eptrb, RM14);
2817        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2818        if (fi >= max) RRETURN(MATCH_NOMATCH);
2819        if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2820          {
2821          if (slength == -2) eptr = md->end_subject;   /* Partial match */
2822          CHECK_PARTIAL();
2823          RRETURN(MATCH_NOMATCH);
2824          }
2825        eptr += slength;
2826        }
2827      /* Control never gets here */
2828      }
2829
2830    /* If maximizing, find the longest string and work backwards */
2831
2832    else
2833      {
2834      pp = eptr;
2835      for (i = min; i < max; i++)
2836        {
2837        int slength;
2838        if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2839          {
2840          /* Can't use CHECK_PARTIAL because we don't want to update eptr in
2841          the soft partial matching case. */
2842
2843          if (slength == -2 && md->partial != 0 &&
2844              md->end_subject > md->start_used_ptr)
2845            {
2846            md->hitend = TRUE;
2847            if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2848            }
2849          break;
2850          }
2851        eptr += slength;
2852        }
2853
2854      while (eptr >= pp)
2855        {
2856        RMATCH(eptr, ecode, offset_top, md, eptrb, RM15);
2857        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2858        eptr -= length;
2859        }
2860      RRETURN(MATCH_NOMATCH);
2861      }
2862    /* Control never gets here */
2863
2864    /* Match a bit-mapped character class, possibly repeatedly. This op code is
2865    used when all the characters in the class have values in the range 0-255,
2866    and either the matching is caseful, or the characters are in the range
2867    0-127 when UTF-8 processing is enabled. The only difference between
2868    OP_CLASS and OP_NCLASS occurs when a data character outside the range is
2869    encountered.
2870
2871    First, look past the end of the item to see if there is repeat information
2872    following. Then obey similar code to character type repeats - written out
2873    again for speed. */
2874
2875    case OP_NCLASS:
2876    case OP_CLASS:
2877      {
2878      /* The data variable is saved across frames, so the byte map needs to
2879      be stored there. */
2880#define BYTE_MAP ((pcre_uint8 *)data)
2881      data = ecode + 1;                /* Save for matching */
2882      ecode += 1 + (32 / sizeof(pcre_uchar)); /* Advance past the item */
2883
2884      switch (*ecode)
2885        {
2886        case OP_CRSTAR:
2887        case OP_CRMINSTAR:
2888        case OP_CRPLUS:
2889        case OP_CRMINPLUS:
2890        case OP_CRQUERY:
2891        case OP_CRMINQUERY:
2892        case OP_CRPOSSTAR:
2893        case OP_CRPOSPLUS:
2894        case OP_CRPOSQUERY:
2895        c = *ecode++ - OP_CRSTAR;
2896        if (c < OP_CRPOSSTAR - OP_CRSTAR) minimize = (c & 1) != 0;
2897        else possessive = TRUE;
2898        min = rep_min[c];                 /* Pick up values from tables; */
2899        max = rep_max[c];                 /* zero for max => infinity */
2900        if (max == 0) max = INT_MAX;
2901        break;
2902
2903        case OP_CRRANGE:
2904        case OP_CRMINRANGE:
2905        case OP_CRPOSRANGE:
2906        minimize = (*ecode == OP_CRMINRANGE);
2907        possessive = (*ecode == OP_CRPOSRANGE);
2908        min = GET2(ecode, 1);
2909        max = GET2(ecode, 1 + IMM2_SIZE);
2910        if (max == 0) max = INT_MAX;
2911        ecode += 1 + 2 * IMM2_SIZE;
2912        break;
2913
2914        default:               /* No repeat follows */
2915        min = max = 1;
2916        break;
2917        }
2918
2919      /* First, ensure the minimum number of matches are present. */
2920
2921#ifdef SUPPORT_UTF
2922      if (utf)
2923        {
2924        for (i = 1; i <= min; i++)
2925          {
2926          if (eptr >= md->end_subject)
2927            {
2928            SCHECK_PARTIAL();
2929            RRETURN(MATCH_NOMATCH);
2930            }
2931          GETCHARINC(c, eptr);
2932          if (c > 255)
2933            {
2934            if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2935            }
2936          else
2937            if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2938          }
2939        }
2940      else
2941#endif
2942      /* Not UTF mode */
2943        {
2944        for (i = 1; i <= min; i++)
2945          {
2946          if (eptr >= md->end_subject)
2947            {
2948            SCHECK_PARTIAL();
2949            RRETURN(MATCH_NOMATCH);
2950            }
2951          c = *eptr++;
2952#ifndef COMPILE_PCRE8
2953          if (c > 255)
2954            {
2955            if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2956            }
2957          else
2958#endif
2959            if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2960          }
2961        }
2962
2963      /* If max == min we can continue with the main loop without the
2964      need to recurse. */
2965
2966      if (min == max) continue;
2967
2968      /* If minimizing, keep testing the rest of the expression and advancing
2969      the pointer while it matches the class. */
2970
2971      if (minimize)
2972        {
2973#ifdef SUPPORT_UTF
2974        if (utf)
2975          {
2976          for (fi = min;; fi++)
2977            {
2978            RMATCH(eptr, ecode, offset_top, md, eptrb, RM16);
2979            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2980            if (fi >= max) RRETURN(MATCH_NOMATCH);
2981            if (eptr >= md->end_subject)
2982              {
2983              SCHECK_PARTIAL();
2984              RRETURN(MATCH_NOMATCH);
2985              }
2986            GETCHARINC(c, eptr);
2987            if (c > 255)
2988              {
2989              if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2990              }
2991            else
2992              if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2993            }
2994          }
2995        else
2996#endif
2997        /* Not UTF mode */
2998          {
2999          for (fi = min;; fi++)
3000            {
3001            RMATCH(eptr, ecode, offset_top, md, eptrb, RM17);
3002            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3003            if (fi >= max) RRETURN(MATCH_NOMATCH);
3004            if (eptr >= md->end_subject)
3005              {
3006              SCHECK_PARTIAL();
3007              RRETURN(MATCH_NOMATCH);
3008              }
3009            c = *eptr++;
3010#ifndef COMPILE_PCRE8
3011            if (c > 255)
3012              {
3013              if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
3014              }
3015            else
3016#endif
3017              if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
3018            }
3019          }
3020        /* Control never gets here */
3021        }
3022
3023      /* If maximizing, find the longest possible run, then work backwards. */
3024
3025      else
3026        {
3027        pp = eptr;
3028
3029#ifdef SUPPORT_UTF
3030        if (utf)
3031          {
3032          for (i = min; i < max; i++)
3033            {
3034            int len = 1;
3035            if (eptr >= md->end_subject)
3036              {
3037              SCHECK_PARTIAL();
3038              break;
3039              }
3040            GETCHARLEN(c, eptr, len);
3041            if (c > 255)
3042              {
3043              if (op == OP_CLASS) break;
3044              }
3045            else
3046              if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
3047            eptr += len;
3048            }
3049
3050          if (possessive) continue;    /* No backtracking */
3051
3052          for (;;)
3053            {
3054            RMATCH(eptr, ecode, offset_top, md, eptrb, RM18);
3055            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3056            if (eptr-- == pp) break;        /* Stop if tried at original pos */
3057            BACKCHAR(eptr);
3058            }
3059          }
3060        else
3061#endif
3062          /* Not UTF mode */
3063          {
3064          for (i = min; i < max; i++)
3065            {
3066            if (eptr >= md->end_subject)
3067              {
3068              SCHECK_PARTIAL();
3069              break;
3070              }
3071            c = *eptr;
3072#ifndef COMPILE_PCRE8
3073            if (c > 255)
3074              {
3075              if (op == OP_CLASS) break;
3076              }
3077            else
3078#endif
3079              if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
3080            eptr++;
3081            }
3082
3083          if (possessive) continue;    /* No backtracking */
3084
3085          while (eptr >= pp)
3086            {
3087            RMATCH(eptr, ecode, offset_top, md, eptrb, RM19);
3088            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3089            eptr--;
3090            }
3091          }
3092
3093        RRETURN(MATCH_NOMATCH);
3094        }
3095#undef BYTE_MAP
3096      }
3097    /* Control never gets here */
3098
3099
3100    /* Match an extended character class. In the 8-bit library, this opcode is
3101    encountered only when UTF-8 mode mode is supported. In the 16-bit and
3102    32-bit libraries, codepoints greater than 255 may be encountered even when
3103    UTF is not supported. */
3104
3105#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3106    case OP_XCLASS:
3107      {
3108      data = ecode + 1 + LINK_SIZE;                /* Save for matching */
3109      ecode += GET(ecode, 1);                      /* Advance past the item */
3110
3111      switch (*ecode)
3112        {
3113        case OP_CRSTAR:
3114        case OP_CRMINSTAR:
3115        case OP_CRPLUS:
3116        case OP_CRMINPLUS:
3117        case OP_CRQUERY:
3118        case OP_CRMINQUERY:
3119        case OP_CRPOSSTAR:
3120        case OP_CRPOSPLUS:
3121        case OP_CRPOSQUERY:
3122        c = *ecode++ - OP_CRSTAR;
3123        if (c < OP_CRPOSSTAR - OP_CRSTAR) minimize = (c & 1) != 0;
3124        else possessive = TRUE;
3125        min = rep_min[c];                 /* Pick up values from tables; */
3126        max = rep_max[c];                 /* zero for max => infinity */
3127        if (max == 0) max = INT_MAX;
3128        break;
3129
3130        case OP_CRRANGE:
3131        case OP_CRMINRANGE:
3132        case OP_CRPOSRANGE:
3133        minimize = (*ecode == OP_CRMINRANGE);
3134        possessive = (*ecode == OP_CRPOSRANGE);
3135        min = GET2(ecode, 1);
3136        max = GET2(ecode, 1 + IMM2_SIZE);
3137        if (max == 0) max = INT_MAX;
3138        ecode += 1 + 2 * IMM2_SIZE;
3139        break;
3140
3141        default:               /* No repeat follows */
3142        min = max = 1;
3143        break;
3144        }
3145
3146      /* First, ensure the minimum number of matches are present. */
3147
3148      for (i = 1; i <= min; i++)
3149        {
3150        if (eptr >= md->end_subject)
3151          {
3152          SCHECK_PARTIAL();
3153          RRETURN(MATCH_NOMATCH);
3154          }
3155        GETCHARINCTEST(c, eptr);
3156        if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
3157        }
3158
3159      /* If max == min we can continue with the main loop without the
3160      need to recurse. */
3161
3162      if (min == max) continue;
3163
3164      /* If minimizing, keep testing the rest of the expression and advancing
3165      the pointer while it matches the class. */
3166
3167      if (minimize)
3168        {
3169        for (fi = min;; fi++)
3170          {
3171          RMATCH(eptr, ecode, offset_top, md, eptrb, RM20);
3172          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3173          if (fi >= max) RRETURN(MATCH_NOMATCH);
3174          if (eptr >= md->end_subject)
3175            {
3176            SCHECK_PARTIAL();
3177            RRETURN(MATCH_NOMATCH);
3178            }
3179          GETCHARINCTEST(c, eptr);
3180          if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
3181          }
3182        /* Control never gets here */
3183        }
3184
3185      /* If maximizing, find the longest possible run, then work backwards. */
3186
3187      else
3188        {
3189        pp = eptr;
3190        for (i = min; i < max; i++)
3191          {
3192          int len = 1;
3193          if (eptr >= md->end_subject)
3194            {
3195            SCHECK_PARTIAL();
3196            break;
3197            }
3198#ifdef SUPPORT_UTF
3199          GETCHARLENTEST(c, eptr, len);
3200#else
3201          c = *eptr;
3202#endif
3203          if (!PRIV(xclass)(c, data, utf)) break;
3204          eptr += len;
3205          }
3206
3207        if (possessive) continue;    /* No backtracking */
3208
3209        for(;;)
3210          {
3211          RMATCH(eptr, ecode, offset_top, md, eptrb, RM21);
3212          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3213          if (eptr-- == pp) break;        /* Stop if tried at original pos */
3214#ifdef SUPPORT_UTF
3215          if (utf) BACKCHAR(eptr);
3216#endif
3217          }
3218        RRETURN(MATCH_NOMATCH);
3219        }
3220
3221      /* Control never gets here */
3222      }
3223#endif    /* End of XCLASS */
3224
3225    /* Match a single character, casefully */
3226
3227    case OP_CHAR:
3228#ifdef SUPPORT_UTF
3229    if (utf)
3230      {
3231      length = 1;
3232      ecode++;
3233      GETCHARLEN(fc, ecode, length);
3234      if (length > md->end_subject - eptr)
3235        {
3236        CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
3237        RRETURN(MATCH_NOMATCH);
3238        }
3239      while (length-- > 0) if (*ecode++ != UCHAR21INC(eptr)) RRETURN(MATCH_NOMATCH);
3240      }
3241    else
3242#endif
3243    /* Not UTF mode */
3244      {
3245      if (md->end_subject - eptr < 1)
3246        {
3247        SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
3248        RRETURN(MATCH_NOMATCH);
3249        }
3250      if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
3251      ecode += 2;
3252      }
3253    break;
3254
3255    /* Match a single character, caselessly. If we are at the end of the
3256    subject, give up immediately. */
3257
3258    case OP_CHARI:
3259    if (eptr >= md->end_subject)
3260      {
3261      SCHECK_PARTIAL();
3262      RRETURN(MATCH_NOMATCH);
3263      }
3264
3265#ifdef SUPPORT_UTF
3266    if (utf)
3267      {
3268      length = 1;
3269      ecode++;
3270      GETCHARLEN(fc, ecode, length);
3271
3272      /* If the pattern character's value is < 128, we have only one byte, and
3273      we know that its other case must also be one byte long, so we can use the
3274      fast lookup table. We know that there is at least one byte left in the
3275      subject. */
3276
3277      if (fc < 128)
3278        {
3279        pcre_uint32 cc = UCHAR21(eptr);
3280        if (md->lcc[fc] != TABLE_GET(cc, md->lcc, cc)) RRETURN(MATCH_NOMATCH);
3281        ecode++;
3282        eptr++;
3283        }
3284
3285      /* Otherwise we must pick up the subject character. Note that we cannot
3286      use the value of "length" to check for sufficient bytes left, because the
3287      other case of the character may have more or fewer bytes.  */
3288
3289      else
3290        {
3291        pcre_uint32 dc;
3292        GETCHARINC(dc, eptr);
3293        ecode += length;
3294
3295        /* If we have Unicode property support, we can use it to test the other
3296        case of the character, if there is one. */
3297
3298        if (fc != dc)
3299          {
3300#ifdef SUPPORT_UCP
3301          if (dc != UCD_OTHERCASE(fc))
3302#endif
3303            RRETURN(MATCH_NOMATCH);
3304          }
3305        }
3306      }
3307    else
3308#endif   /* SUPPORT_UTF */
3309
3310    /* Not UTF mode */
3311      {
3312      if (TABLE_GET(ecode[1], md->lcc, ecode[1])
3313          != TABLE_GET(*eptr, md->lcc, *eptr)) RRETURN(MATCH_NOMATCH);
3314      eptr++;
3315      ecode += 2;
3316      }
3317    break;
3318
3319    /* Match a single character repeatedly. */
3320
3321    case OP_EXACT:
3322    case OP_EXACTI:
3323    min = max = GET2(ecode, 1);
3324    ecode += 1 + IMM2_SIZE;
3325    goto REPEATCHAR;
3326
3327    case OP_POSUPTO:
3328    case OP_POSUPTOI:
3329    possessive = TRUE;
3330    /* Fall through */
3331
3332    case OP_UPTO:
3333    case OP_UPTOI:
3334    case OP_MINUPTO:
3335    case OP_MINUPTOI:
3336    min = 0;
3337    max = GET2(ecode, 1);
3338    minimize = *ecode == OP_MINUPTO || *ecode == OP_MINUPTOI;
3339    ecode += 1 + IMM2_SIZE;
3340    goto REPEATCHAR;
3341
3342    case OP_POSSTAR:
3343    case OP_POSSTARI:
3344    possessive = TRUE;
3345    min = 0;
3346    max = INT_MAX;
3347    ecode++;
3348    goto REPEATCHAR;
3349
3350    case OP_POSPLUS:
3351    case OP_POSPLUSI:
3352    possessive = TRUE;
3353    min = 1;
3354    max = INT_MAX;
3355    ecode++;
3356    goto REPEATCHAR;
3357
3358    case OP_POSQUERY:
3359    case OP_POSQUERYI:
3360    possessive = TRUE;
3361    min = 0;
3362    max = 1;
3363    ecode++;
3364    goto REPEATCHAR;
3365
3366    case OP_STAR:
3367    case OP_STARI:
3368    case OP_MINSTAR:
3369    case OP_MINSTARI:
3370    case OP_PLUS:
3371    case OP_PLUSI:
3372    case OP_MINPLUS:
3373    case OP_MINPLUSI:
3374    case OP_QUERY:
3375    case OP_QUERYI:
3376    case OP_MINQUERY:
3377    case OP_MINQUERYI:
3378    c = *ecode++ - ((op < OP_STARI)? OP_STAR : OP_STARI);
3379    minimize = (c & 1) != 0;
3380    min = rep_min[c];                 /* Pick up values from tables; */
3381    max = rep_max[c];                 /* zero for max => infinity */
3382    if (max == 0) max = INT_MAX;
3383
3384    /* Common code for all repeated single-character matches. We first check
3385    for the minimum number of characters. If the minimum equals the maximum, we
3386    are done. Otherwise, if minimizing, check the rest of the pattern for a
3387    match; if there isn't one, advance up to the maximum, one character at a
3388    time.
3389
3390    If maximizing, advance up to the maximum number of matching characters,
3391    until eptr is past the end of the maximum run. If possessive, we are
3392    then done (no backing up). Otherwise, match at this position; anything
3393    other than no match is immediately returned. For nomatch, back up one
3394    character, unless we are matching \R and the last thing matched was
3395    \r\n, in which case, back up two bytes. When we reach the first optional
3396    character position, we can save stack by doing a tail recurse.
3397
3398    The various UTF/non-UTF and caseful/caseless cases are handled separately,
3399    for speed. */
3400
3401    REPEATCHAR:
3402#ifdef SUPPORT_UTF
3403    if (utf)
3404      {
3405      length = 1;
3406      charptr = ecode;
3407      GETCHARLEN(fc, ecode, length);
3408      ecode += length;
3409
3410      /* Handle multibyte character matching specially here. There is
3411      support for caseless matching if UCP support is present. */
3412
3413      if (length > 1)
3414        {
3415#ifdef SUPPORT_UCP
3416        pcre_uint32 othercase;
3417        if (op >= OP_STARI &&     /* Caseless */
3418            (othercase = UCD_OTHERCASE(fc)) != fc)
3419          oclength = PRIV(ord2utf)(othercase, occhars);
3420        else oclength = 0;
3421#endif  /* SUPPORT_UCP */
3422
3423        for (i = 1; i <= min; i++)
3424          {
3425          if (eptr <= md->end_subject - length &&
3426            memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3427#ifdef SUPPORT_UCP
3428          else if (oclength > 0 &&
3429                   eptr <= md->end_subject - oclength &&
3430                   memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3431#endif  /* SUPPORT_UCP */
3432          else
3433            {
3434            CHECK_PARTIAL();
3435            RRETURN(MATCH_NOMATCH);
3436            }
3437          }
3438
3439        if (min == max) continue;
3440
3441        if (minimize)
3442          {
3443          for (fi = min;; fi++)
3444            {
3445            RMATCH(eptr, ecode, offset_top, md, eptrb, RM22);
3446            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3447            if (fi >= max) RRETURN(MATCH_NOMATCH);
3448            if (eptr <= md->end_subject - length &&
3449              memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3450#ifdef SUPPORT_UCP
3451            else if (oclength > 0 &&
3452                     eptr <= md->end_subject - oclength &&
3453                     memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3454#endif  /* SUPPORT_UCP */
3455            else
3456              {
3457              CHECK_PARTIAL();
3458              RRETURN(MATCH_NOMATCH);
3459              }
3460            }
3461          /* Control never gets here */
3462          }
3463
3464        else  /* Maximize */
3465          {
3466          pp = eptr;
3467          for (i = min; i < max; i++)
3468            {
3469            if (eptr <= md->end_subject - length &&
3470                memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3471#ifdef SUPPORT_UCP
3472            else if (oclength > 0 &&
3473                     eptr <= md->end_subject - oclength &&
3474                     memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3475#endif  /* SUPPORT_UCP */
3476            else
3477              {
3478              CHECK_PARTIAL();
3479              break;
3480              }
3481            }
3482
3483          if (possessive) continue;    /* No backtracking */
3484          for(;;)
3485            {
3486            if (eptr <= pp) goto TAIL_RECURSE;
3487            RMATCH(eptr, ecode, offset_top, md, eptrb, RM23);
3488            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3489#ifdef SUPPORT_UCP
3490            eptr--;
3491            BACKCHAR(eptr);
3492#else   /* without SUPPORT_UCP */
3493            eptr -= length;
3494#endif  /* SUPPORT_UCP */
3495            }
3496          }
3497        /* Control never gets here */
3498        }
3499
3500      /* If the length of a UTF-8 character is 1, we fall through here, and
3501      obey the code as for non-UTF-8 characters below, though in this case the
3502      value of fc will always be < 128. */
3503      }
3504    else
3505#endif  /* SUPPORT_UTF */
3506      /* When not in UTF-8 mode, load a single-byte character. */
3507      fc = *ecode++;
3508
3509    /* The value of fc at this point is always one character, though we may
3510    or may not be in UTF mode. The code is duplicated for the caseless and
3511    caseful cases, for speed, since matching characters is likely to be quite
3512    common. First, ensure the minimum number of matches are present. If min =
3513    max, continue at the same level without recursing. Otherwise, if
3514    minimizing, keep trying the rest of the expression and advancing one
3515    matching character if failing, up to the maximum. Alternatively, if
3516    maximizing, find the maximum number of characters and work backwards. */
3517
3518    DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
3519      max, (char *)eptr));
3520
3521    if (op >= OP_STARI)  /* Caseless */
3522      {
3523#ifdef COMPILE_PCRE8
3524      /* fc must be < 128 if UTF is enabled. */
3525      foc = md->fcc[fc];
3526#else
3527#ifdef SUPPORT_UTF
3528#ifdef SUPPORT_UCP
3529      if (utf && fc > 127)
3530        foc = UCD_OTHERCASE(fc);
3531#else
3532      if (utf && fc > 127)
3533        foc = fc;
3534#endif /* SUPPORT_UCP */
3535      else
3536#endif /* SUPPORT_UTF */
3537        foc = TABLE_GET(fc, md->fcc, fc);
3538#endif /* COMPILE_PCRE8 */
3539
3540      for (i = 1; i <= min; i++)
3541        {
3542        pcre_uint32 cc;                 /* Faster than pcre_uchar */
3543        if (eptr >= md->end_subject)
3544          {
3545          SCHECK_PARTIAL();
3546          RRETURN(MATCH_NOMATCH);
3547          }
3548        cc = UCHAR21TEST(eptr);
3549        if (fc != cc && foc != cc) RRETURN(MATCH_NOMATCH);
3550        eptr++;
3551        }
3552      if (min == max) continue;
3553      if (minimize)
3554        {
3555        for (fi = min;; fi++)
3556          {
3557          pcre_uint32 cc;               /* Faster than pcre_uchar */
3558          RMATCH(eptr, ecode, offset_top, md, eptrb, RM24);
3559          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3560          if (fi >= max) RRETURN(MATCH_NOMATCH);
3561          if (eptr >= md->end_subject)
3562            {
3563            SCHECK_PARTIAL();
3564            RRETURN(MATCH_NOMATCH);
3565            }
3566          cc = UCHAR21TEST(eptr);
3567          if (fc != cc && foc != cc) RRETURN(MATCH_NOMATCH);
3568          eptr++;
3569          }
3570        /* Control never gets here */
3571        }
3572      else  /* Maximize */
3573        {
3574        pp = eptr;
3575        for (i = min; i < max; i++)
3576          {
3577          pcre_uint32 cc;               /* Faster than pcre_uchar */
3578          if (eptr >= md->end_subject)
3579            {
3580            SCHECK_PARTIAL();
3581            break;
3582            }
3583          cc = UCHAR21TEST(eptr);
3584          if (fc != cc && foc != cc) break;
3585          eptr++;
3586          }
3587        if (possessive) continue;       /* No backtracking */
3588        for (;;)
3589          {
3590          if (eptr == pp) goto TAIL_RECURSE;
3591          RMATCH(eptr, ecode, offset_top, md, eptrb, RM25);
3592          eptr--;
3593          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3594          }
3595        /* Control never gets here */
3596        }
3597      }
3598
3599    /* Caseful comparisons (includes all multi-byte characters) */
3600
3601    else
3602      {
3603      for (i = 1; i <= min; i++)
3604        {
3605        if (eptr >= md->end_subject)
3606          {
3607          SCHECK_PARTIAL();
3608          RRETURN(MATCH_NOMATCH);
3609          }
3610        if (fc != UCHAR21INCTEST(eptr)) RRETURN(MATCH_NOMATCH);
3611        }
3612
3613      if (min == max) continue;
3614
3615      if (minimize)
3616        {
3617        for (fi = min;; fi++)
3618          {
3619          RMATCH(eptr, ecode, offset_top, md, eptrb, RM26);
3620          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3621          if (fi >= max) RRETURN(MATCH_NOMATCH);
3622          if (eptr >= md->end_subject)
3623            {
3624            SCHECK_PARTIAL();
3625            RRETURN(MATCH_NOMATCH);
3626            }
3627          if (fc != UCHAR21INCTEST(eptr)) RRETURN(MATCH_NOMATCH);
3628          }
3629        /* Control never gets here */
3630        }
3631      else  /* Maximize */
3632        {
3633        pp = eptr;
3634        for (i = min; i < max; i++)
3635          {
3636          if (eptr >= md->end_subject)
3637            {
3638            SCHECK_PARTIAL();
3639            break;
3640            }
3641          if (fc != UCHAR21TEST(eptr)) break;
3642          eptr++;
3643          }
3644        if (possessive) continue;    /* No backtracking */
3645        for (;;)
3646          {
3647          if (eptr == pp) goto TAIL_RECURSE;
3648          RMATCH(eptr, ecode, offset_top, md, eptrb, RM27);
3649          eptr--;
3650          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3651          }
3652        /* Control never gets here */
3653        }
3654      }
3655    /* Control never gets here */
3656
3657    /* Match a negated single one-byte character. The character we are
3658    checking can be multibyte. */
3659
3660    case OP_NOT:
3661    case OP_NOTI:
3662    if (eptr >= md->end_subject)
3663      {
3664      SCHECK_PARTIAL();
3665      RRETURN(MATCH_NOMATCH);
3666      }
3667#ifdef SUPPORT_UTF
3668    if (utf)
3669      {
3670      register pcre_uint32 ch, och;
3671
3672      ecode++;
3673      GETCHARINC(ch, ecode);
3674      GETCHARINC(c, eptr);
3675
3676      if (op == OP_NOT)
3677        {
3678        if (ch == c) RRETURN(MATCH_NOMATCH);
3679        }
3680      else
3681        {
3682#ifdef SUPPORT_UCP
3683        if (ch > 127)
3684          och = UCD_OTHERCASE(ch);
3685#else
3686        if (ch > 127)
3687          och = ch;
3688#endif /* SUPPORT_UCP */
3689        else
3690          och = TABLE_GET(ch, md->fcc, ch);
3691        if (ch == c || och == c) RRETURN(MATCH_NOMATCH);
3692        }
3693      }
3694    else
3695#endif
3696      {
3697      register pcre_uint32 ch = ecode[1];
3698      c = *eptr++;
3699      if (ch == c || (op == OP_NOTI && TABLE_GET(ch, md->fcc, ch) == c))
3700        RRETURN(MATCH_NOMATCH);
3701      ecode += 2;
3702      }
3703    break;
3704
3705    /* Match a negated single one-byte character repeatedly. This is almost a
3706    repeat of the code for a repeated single character, but I haven't found a
3707    nice way of commoning these up that doesn't require a test of the
3708    positive/negative option for each character match. Maybe that wouldn't add
3709    very much to the time taken, but character matching *is* what this is all
3710    about... */
3711
3712    case OP_NOTEXACT:
3713    case OP_NOTEXACTI:
3714    min = max = GET2(ecode, 1);
3715    ecode += 1 + IMM2_SIZE;
3716    goto REPEATNOTCHAR;
3717
3718    case OP_NOTUPTO:
3719    case OP_NOTUPTOI:
3720    case OP_NOTMINUPTO:
3721    case OP_NOTMINUPTOI:
3722    min = 0;
3723    max = GET2(ecode, 1);
3724    minimize = *ecode == OP_NOTMINUPTO || *ecode == OP_NOTMINUPTOI;
3725    ecode += 1 + IMM2_SIZE;
3726    goto REPEATNOTCHAR;
3727
3728    case OP_NOTPOSSTAR:
3729    case OP_NOTPOSSTARI:
3730    possessive = TRUE;
3731    min = 0;
3732    max = INT_MAX;
3733    ecode++;
3734    goto REPEATNOTCHAR;
3735
3736    case OP_NOTPOSPLUS:
3737    case OP_NOTPOSPLUSI:
3738    possessive = TRUE;
3739    min = 1;
3740    max = INT_MAX;
3741    ecode++;
3742    goto REPEATNOTCHAR;
3743
3744    case OP_NOTPOSQUERY:
3745    case OP_NOTPOSQUERYI:
3746    possessive = TRUE;
3747    min = 0;
3748    max = 1;
3749    ecode++;
3750    goto REPEATNOTCHAR;
3751
3752    case OP_NOTPOSUPTO:
3753    case OP_NOTPOSUPTOI:
3754    possessive = TRUE;
3755    min = 0;
3756    max = GET2(ecode, 1);
3757    ecode += 1 + IMM2_SIZE;
3758    goto REPEATNOTCHAR;
3759
3760    case OP_NOTSTAR:
3761    case OP_NOTSTARI:
3762    case OP_NOTMINSTAR:
3763    case OP_NOTMINSTARI:
3764    case OP_NOTPLUS:
3765    case OP_NOTPLUSI:
3766    case OP_NOTMINPLUS:
3767    case OP_NOTMINPLUSI:
3768    case OP_NOTQUERY:
3769    case OP_NOTQUERYI:
3770    case OP_NOTMINQUERY:
3771    case OP_NOTMINQUERYI:
3772    c = *ecode++ - ((op >= OP_NOTSTARI)? OP_NOTSTARI: OP_NOTSTAR);
3773    minimize = (c & 1) != 0;
3774    min = rep_min[c];                 /* Pick up values from tables; */
3775    max = rep_max[c];                 /* zero for max => infinity */
3776    if (max == 0) max = INT_MAX;
3777
3778    /* Common code for all repeated single-byte matches. */
3779
3780    REPEATNOTCHAR:
3781    GETCHARINCTEST(fc, ecode);
3782
3783    /* The code is duplicated for the caseless and caseful cases, for speed,
3784    since matching characters is likely to be quite common. First, ensure the
3785    minimum number of matches are present. If min = max, continue at the same
3786    level without recursing. Otherwise, if minimizing, keep trying the rest of
3787    the expression and advancing one matching character if failing, up to the
3788    maximum. Alternatively, if maximizing, find the maximum number of
3789    characters and work backwards. */
3790
3791    DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
3792      max, (char *)eptr));
3793
3794    if (op >= OP_NOTSTARI)     /* Caseless */
3795      {
3796#ifdef SUPPORT_UTF
3797#ifdef SUPPORT_UCP
3798      if (utf && fc > 127)
3799        foc = UCD_OTHERCASE(fc);
3800#else
3801      if (utf && fc > 127)
3802        foc = fc;
3803#endif /* SUPPORT_UCP */
3804      else
3805#endif /* SUPPORT_UTF */
3806        foc = TABLE_GET(fc, md->fcc, fc);
3807
3808#ifdef SUPPORT_UTF
3809      if (utf)
3810        {
3811        register pcre_uint32 d;
3812        for (i = 1; i <= min; i++)
3813          {
3814          if (eptr >= md->end_subject)
3815            {
3816            SCHECK_PARTIAL();
3817            RRETURN(MATCH_NOMATCH);
3818            }
3819          GETCHARINC(d, eptr);
3820          if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH);
3821          }
3822        }
3823      else
3824#endif  /* SUPPORT_UTF */
3825      /* Not UTF mode */
3826        {
3827        for (i = 1; i <= min; i++)
3828          {
3829          if (eptr >= md->end_subject)
3830            {
3831            SCHECK_PARTIAL();
3832            RRETURN(MATCH_NOMATCH);
3833            }
3834          if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
3835          eptr++;
3836          }
3837        }
3838
3839      if (min == max) continue;
3840
3841      if (minimize)
3842        {
3843#ifdef SUPPORT_UTF
3844        if (utf)
3845          {
3846          register pcre_uint32 d;
3847          for (fi = min;; fi++)
3848            {
3849            RMATCH(eptr, ecode, offset_top, md, eptrb, RM28);
3850            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3851            if (fi >= max) RRETURN(MATCH_NOMATCH);
3852            if (eptr >= md->end_subject)
3853              {
3854              SCHECK_PARTIAL();
3855              RRETURN(MATCH_NOMATCH);
3856              }
3857            GETCHARINC(d, eptr);
3858            if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH);
3859            }
3860          }
3861        else
3862#endif  /*SUPPORT_UTF */
3863        /* Not UTF mode */
3864          {
3865          for (fi = min;; fi++)
3866            {
3867            RMATCH(eptr, ecode, offset_top, md, eptrb, RM29);
3868            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3869            if (fi >= max) RRETURN(MATCH_NOMATCH);
3870            if (eptr >= md->end_subject)
3871              {
3872              SCHECK_PARTIAL();
3873              RRETURN(MATCH_NOMATCH);
3874              }
3875            if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
3876            eptr++;
3877            }
3878          }
3879        /* Control never gets here */
3880        }
3881
3882      /* Maximize case */
3883
3884      else
3885        {
3886        pp = eptr;
3887
3888#ifdef SUPPORT_UTF
3889        if (utf)
3890          {
3891          register pcre_uint32 d;
3892          for (i = min; i < max; i++)
3893            {
3894            int len = 1;
3895            if (eptr >= md->end_subject)
3896              {
3897              SCHECK_PARTIAL();
3898              break;
3899              }
3900            GETCHARLEN(d, eptr, len);
3901            if (fc == d || (unsigned int)foc == d) break;
3902            eptr += len;
3903            }
3904          if (possessive) continue;    /* No backtracking */
3905          for(;;)
3906            {
3907            if (eptr <= pp) goto TAIL_RECURSE;
3908            RMATCH(eptr, ecode, offset_top, md, eptrb, RM30);
3909            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3910            eptr--;
3911            BACKCHAR(eptr);
3912            }
3913          }
3914        else
3915#endif  /* SUPPORT_UTF */
3916        /* Not UTF mode */
3917          {
3918          for (i = min; i < max; i++)
3919            {
3920            if (eptr >= md->end_subject)
3921              {
3922              SCHECK_PARTIAL();
3923              break;
3924              }
3925            if (fc == *eptr || foc == *eptr) break;
3926            eptr++;
3927            }
3928          if (possessive) continue;    /* No backtracking */
3929          for (;;)
3930            {
3931            if (eptr == pp) goto TAIL_RECURSE;
3932            RMATCH(eptr, ecode, offset_top, md, eptrb, RM31);
3933            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3934            eptr--;
3935            }
3936          }
3937        /* Control never gets here */
3938        }
3939      }
3940
3941    /* Caseful comparisons */
3942
3943    else
3944      {
3945#ifdef SUPPORT_UTF
3946      if (utf)
3947        {
3948        register pcre_uint32 d;
3949        for (i = 1; i <= min; i++)
3950          {
3951          if (eptr >= md->end_subject)
3952            {
3953            SCHECK_PARTIAL();
3954            RRETURN(MATCH_NOMATCH);
3955            }
3956          GETCHARINC(d, eptr);
3957          if (fc == d) RRETURN(MATCH_NOMATCH);
3958          }
3959        }
3960      else
3961#endif
3962      /* Not UTF mode */
3963        {
3964        for (i = 1; i <= min; i++)
3965          {
3966          if (eptr >= md->end_subject)
3967            {
3968            SCHECK_PARTIAL();
3969            RRETURN(MATCH_NOMATCH);
3970            }
3971          if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
3972          }
3973        }
3974
3975      if (min == max) continue;
3976
3977      if (minimize)
3978        {
3979#ifdef SUPPORT_UTF
3980        if (utf)
3981          {
3982          register pcre_uint32 d;
3983          for (fi = min;; fi++)
3984            {
3985            RMATCH(eptr, ecode, offset_top, md, eptrb, RM32);
3986            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3987            if (fi >= max) RRETURN(MATCH_NOMATCH);
3988            if (eptr >= md->end_subject)
3989              {
3990              SCHECK_PARTIAL();
3991              RRETURN(MATCH_NOMATCH);
3992              }
3993            GETCHARINC(d, eptr);
3994            if (fc == d) RRETURN(MATCH_NOMATCH);
3995            }
3996          }
3997        else
3998#endif
3999        /* Not UTF mode */
4000          {
4001          for (fi = min;; fi++)
4002            {
4003            RMATCH(eptr, ecode, offset_top, md, eptrb, RM33);
4004            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4005            if (fi >= max) RRETURN(MATCH_NOMATCH);
4006            if (eptr >= md->end_subject)
4007              {
4008              SCHECK_PARTIAL();
4009              RRETURN(MATCH_NOMATCH);
4010              }
4011            if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
4012            }
4013          }
4014        /* Control never gets here */
4015        }
4016
4017      /* Maximize case */
4018
4019      else
4020        {
4021        pp = eptr;
4022
4023#ifdef SUPPORT_UTF
4024        if (utf)
4025          {
4026          register pcre_uint32 d;
4027          for (i = min; i < max; i++)
4028            {
4029            int len = 1;
4030            if (eptr >= md->end_subject)
4031              {
4032              SCHECK_PARTIAL();
4033              break;
4034              }
4035            GETCHARLEN(d, eptr, len);
4036            if (fc == d) break;
4037            eptr += len;
4038            }
4039          if (possessive) continue;    /* No backtracking */
4040          for(;;)
4041            {
4042            if (eptr <= pp) goto TAIL_RECURSE;
4043            RMATCH(eptr, ecode, offset_top, md, eptrb, RM34);
4044            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4045            eptr--;
4046            BACKCHAR(eptr);
4047            }
4048          }
4049        else
4050#endif
4051        /* Not UTF mode */
4052          {
4053          for (i = min; i < max; i++)
4054            {
4055            if (eptr >= md->end_subject)
4056              {
4057              SCHECK_PARTIAL();
4058              break;
4059              }
4060            if (fc == *eptr) break;
4061            eptr++;
4062            }
4063          if (possessive) continue;    /* No backtracking */
4064          for (;;)
4065            {
4066            if (eptr == pp) goto TAIL_RECURSE;
4067            RMATCH(eptr, ecode, offset_top, md, eptrb, RM35);
4068            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4069            eptr--;
4070            }
4071          }
4072        /* Control never gets here */
4073        }
4074      }
4075    /* Control never gets here */
4076
4077    /* Match a single character type repeatedly; several different opcodes
4078    share code. This is very similar to the code for single characters, but we
4079    repeat it in the interests of efficiency. */
4080
4081    case OP_TYPEEXACT:
4082    min = max = GET2(ecode, 1);
4083    minimize = TRUE;
4084    ecode += 1 + IMM2_SIZE;
4085    goto REPEATTYPE;
4086
4087    case OP_TYPEUPTO:
4088    case OP_TYPEMINUPTO:
4089    min = 0;
4090    max = GET2(ecode, 1);
4091    minimize = *ecode == OP_TYPEMINUPTO;
4092    ecode += 1 + IMM2_SIZE;
4093    goto REPEATTYPE;
4094
4095    case OP_TYPEPOSSTAR:
4096    possessive = TRUE;
4097    min = 0;
4098    max = INT_MAX;
4099    ecode++;
4100    goto REPEATTYPE;
4101
4102    case OP_TYPEPOSPLUS:
4103    possessive = TRUE;
4104    min = 1;
4105    max = INT_MAX;
4106    ecode++;
4107    goto REPEATTYPE;
4108
4109    case OP_TYPEPOSQUERY:
4110    possessive = TRUE;
4111    min = 0;
4112    max = 1;
4113    ecode++;
4114    goto REPEATTYPE;
4115
4116    case OP_TYPEPOSUPTO:
4117    possessive = TRUE;
4118    min = 0;
4119    max = GET2(ecode, 1);
4120    ecode += 1 + IMM2_SIZE;
4121    goto REPEATTYPE;
4122
4123    case OP_TYPESTAR:
4124    case OP_TYPEMINSTAR:
4125    case OP_TYPEPLUS:
4126    case OP_TYPEMINPLUS:
4127    case OP_TYPEQUERY:
4128    case OP_TYPEMINQUERY:
4129    c = *ecode++ - OP_TYPESTAR;
4130    minimize = (c & 1) != 0;
4131    min = rep_min[c];                 /* Pick up values from tables; */
4132    max = rep_max[c];                 /* zero for max => infinity */
4133    if (max == 0) max = INT_MAX;
4134
4135    /* Common code for all repeated single character type matches. Note that
4136    in UTF-8 mode, '.' matches a character of any length, but for the other
4137    character types, the valid characters are all one-byte long. */
4138
4139    REPEATTYPE:
4140    ctype = *ecode++;      /* Code for the character type */
4141
4142#ifdef SUPPORT_UCP
4143    if (ctype == OP_PROP || ctype == OP_NOTPROP)
4144      {
4145      prop_fail_result = ctype == OP_NOTPROP;
4146      prop_type = *ecode++;
4147      prop_value = *ecode++;
4148      }
4149    else prop_type = -1;
4150#endif
4151
4152    /* First, ensure the minimum number of matches are present. Use inline
4153    code for maximizing the speed, and do the type test once at the start
4154    (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
4155    is tidier. Also separate the UCP code, which can be the same for both UTF-8
4156    and single-bytes. */
4157
4158    if (min > 0)
4159      {
4160#ifdef SUPPORT_UCP
4161      if (prop_type >= 0)
4162        {
4163        switch(prop_type)
4164          {
4165          case PT_ANY:
4166          if (prop_fail_result) RRETURN(MATCH_NOMATCH);
4167          for (i = 1; i <= min; i++)
4168            {
4169            if (eptr >= md->end_subject)
4170              {
4171              SCHECK_PARTIAL();
4172              RRETURN(MATCH_NOMATCH);
4173              }
4174            GETCHARINCTEST(c, eptr);
4175            }
4176          break;
4177
4178          case PT_LAMP:
4179          for (i = 1; i <= min; i++)
4180            {
4181            int chartype;
4182            if (eptr >= md->end_subject)
4183              {
4184              SCHECK_PARTIAL();
4185              RRETURN(MATCH_NOMATCH);
4186              }
4187            GETCHARINCTEST(c, eptr);
4188            chartype = UCD_CHARTYPE(c);
4189            if ((chartype == ucp_Lu ||
4190                 chartype == ucp_Ll ||
4191                 chartype == ucp_Lt) == prop_fail_result)
4192              RRETURN(MATCH_NOMATCH);
4193            }
4194          break;
4195
4196          case PT_GC:
4197          for (i = 1; i <= min; i++)
4198            {
4199            if (eptr >= md->end_subject)
4200              {
4201              SCHECK_PARTIAL();
4202              RRETURN(MATCH_NOMATCH);
4203              }
4204            GETCHARINCTEST(c, eptr);
4205            if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)
4206              RRETURN(MATCH_NOMATCH);
4207            }
4208          break;
4209
4210          case PT_PC:
4211          for (i = 1; i <= min; i++)
4212            {
4213            if (eptr >= md->end_subject)
4214              {
4215              SCHECK_PARTIAL();
4216              RRETURN(MATCH_NOMATCH);
4217              }
4218            GETCHARINCTEST(c, eptr);
4219            if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)
4220              RRETURN(MATCH_NOMATCH);
4221            }
4222          break;
4223
4224          case PT_SC:
4225          for (i = 1; i <= min; i++)
4226            {
4227            if (eptr >= md->end_subject)
4228              {
4229              SCHECK_PARTIAL();
4230              RRETURN(MATCH_NOMATCH);
4231              }
4232            GETCHARINCTEST(c, eptr);
4233            if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)
4234              RRETURN(MATCH_NOMATCH);
4235            }
4236          break;
4237
4238          case PT_ALNUM:
4239          for (i = 1; i <= min; i++)
4240            {
4241            int category;
4242            if (eptr >= md->end_subject)
4243              {
4244              SCHECK_PARTIAL();
4245              RRETURN(MATCH_NOMATCH);
4246              }
4247            GETCHARINCTEST(c, eptr);
4248            category = UCD_CATEGORY(c);
4249            if ((category == ucp_L || category == ucp_N) == prop_fail_result)
4250              RRETURN(MATCH_NOMATCH);
4251            }
4252          break;
4253
4254          /* Perl space used to exclude VT, but from Perl 5.18 it is included,
4255          which means that Perl space and POSIX space are now identical. PCRE
4256          was changed at release 8.34. */
4257
4258          case PT_SPACE:    /* Perl space */
4259          case PT_PXSPACE:  /* POSIX space */
4260          for (i = 1; i <= min; i++)
4261            {
4262            if (eptr >= md->end_subject)
4263              {
4264              SCHECK_PARTIAL();
4265              RRETURN(MATCH_NOMATCH);
4266              }
4267            GETCHARINCTEST(c, eptr);
4268            switch(c)
4269              {
4270              HSPACE_CASES:
4271              VSPACE_CASES:
4272              if (prop_fail_result) RRETURN(MATCH_NOMATCH);
4273              break;
4274
4275              default:
4276              if ((UCD_CATEGORY(c) == ucp_Z) == prop_fail_result)
4277                RRETURN(MATCH_NOMATCH);
4278              break;
4279              }
4280            }
4281          break;
4282
4283          case PT_WORD:
4284          for (i = 1; i <= min; i++)
4285            {
4286            int category;
4287            if (eptr >= md->end_subject)
4288              {
4289              SCHECK_PARTIAL();
4290              RRETURN(MATCH_NOMATCH);
4291              }
4292            GETCHARINCTEST(c, eptr);
4293            category = UCD_CATEGORY(c);
4294            if ((category == ucp_L || category == ucp_N || c == CHAR_UNDERSCORE)
4295                   == prop_fail_result)
4296              RRETURN(MATCH_NOMATCH);
4297            }
4298          break;
4299
4300          case PT_CLIST:
4301          for (i = 1; i <= min; i++)
4302            {
4303            const pcre_uint32 *cp;
4304            if (eptr >= md->end_subject)
4305              {
4306              SCHECK_PARTIAL();
4307              RRETURN(MATCH_NOMATCH);
4308              }
4309            GETCHARINCTEST(c, eptr);
4310            cp = PRIV(ucd_caseless_sets) + prop_value;
4311            for (;;)
4312              {
4313              if (c < *cp)
4314                { if (prop_fail_result) break; else { RRETURN(MATCH_NOMATCH); } }
4315              if (c == *cp++)
4316                { if (prop_fail_result) { RRETURN(MATCH_NOMATCH); } else break; }
4317              }
4318            }
4319          break;
4320
4321          case PT_UCNC:
4322          for (i = 1; i <= min; i++)
4323            {
4324            if (eptr >= md->end_subject)
4325              {
4326              SCHECK_PARTIAL();
4327              RRETURN(MATCH_NOMATCH);
4328              }
4329            GETCHARINCTEST(c, eptr);
4330            if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
4331                 c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
4332                 c >= 0xe000) == prop_fail_result)
4333              RRETURN(MATCH_NOMATCH);
4334            }
4335          break;
4336
4337          /* This should not occur */
4338
4339          default:
4340          RRETURN(PCRE_ERROR_INTERNAL);
4341          }
4342        }
4343
4344      /* Match extended Unicode sequences. We will get here only if the
4345      support is in the binary; otherwise a compile-time error occurs. */
4346
4347      else if (ctype == OP_EXTUNI)
4348        {
4349        for (i = 1; i <= min; i++)
4350          {
4351          if (eptr >= md->end_subject)
4352            {
4353            SCHECK_PARTIAL();
4354            RRETURN(MATCH_NOMATCH);
4355            }
4356          else
4357            {
4358            int lgb, rgb;
4359            GETCHARINCTEST(c, eptr);
4360            lgb = UCD_GRAPHBREAK(c);
4361           while (eptr < md->end_subject)
4362              {
4363              int len = 1;
4364              if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
4365              rgb = UCD_GRAPHBREAK(c);
4366              if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
4367              lgb = rgb;
4368              eptr += len;
4369              }
4370            }
4371          CHECK_PARTIAL();
4372          }
4373        }
4374
4375      else
4376#endif     /* SUPPORT_UCP */
4377
4378/* Handle all other cases when the coding is UTF-8 */
4379
4380#ifdef SUPPORT_UTF
4381      if (utf) switch(ctype)
4382        {
4383        case OP_ANY:
4384        for (i = 1; i <= min; i++)
4385          {
4386          if (eptr >= md->end_subject)
4387            {
4388            SCHECK_PARTIAL();
4389            RRETURN(MATCH_NOMATCH);
4390            }
4391          if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
4392          if (md->partial != 0 &&
4393              eptr + 1 >= md->end_subject &&
4394              NLBLOCK->nltype == NLTYPE_FIXED &&
4395              NLBLOCK->nllen == 2 &&
4396              UCHAR21(eptr) == NLBLOCK->nl[0])
4397            {
4398            md->hitend = TRUE;
4399            if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
4400            }
4401          eptr++;
4402          ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4403          }
4404        break;
4405
4406        case OP_ALLANY:
4407        for (i = 1; i <= min; i++)
4408          {
4409          if (eptr >= md->end_subject)
4410            {
4411            SCHECK_PARTIAL();
4412            RRETURN(MATCH_NOMATCH);
4413            }
4414          eptr++;
4415          ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4416          }
4417        break;
4418
4419        case OP_ANYBYTE:
4420        if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH);
4421        eptr += min;
4422        break;
4423
4424        case OP_ANYNL:
4425        for (i = 1; i <= min; i++)
4426          {
4427          if (eptr >= md->end_subject)
4428            {
4429            SCHECK_PARTIAL();
4430            RRETURN(MATCH_NOMATCH);
4431            }
4432          GETCHARINC(c, eptr);
4433          switch(c)
4434            {
4435            default: RRETURN(MATCH_NOMATCH);
4436
4437            case CHAR_CR:
4438            if (eptr < md->end_subject && UCHAR21(eptr) == CHAR_LF) eptr++;
4439            break;
4440
4441            case CHAR_LF:
4442            break;
4443
4444            case CHAR_VT:
4445            case CHAR_FF:
4446            case CHAR_NEL:
4447#ifndef EBCDIC
4448            case 0x2028:
4449            case 0x2029:
4450#endif  /* Not EBCDIC */
4451            if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
4452            break;
4453            }
4454          }
4455        break;
4456
4457        case OP_NOT_HSPACE:
4458        for (i = 1; i <= min; i++)
4459          {
4460          if (eptr >= md->end_subject)
4461            {
4462            SCHECK_PARTIAL();
4463            RRETURN(MATCH_NOMATCH);
4464            }
4465          GETCHARINC(c, eptr);
4466          switch(c)
4467            {
4468            HSPACE_CASES: RRETURN(MATCH_NOMATCH);  /* Byte and multibyte cases */
4469            default: break;
4470            }
4471          }
4472        break;
4473
4474        case OP_HSPACE:
4475        for (i = 1; i <= min; i++)
4476          {
4477          if (eptr >= md->end_subject)
4478            {
4479            SCHECK_PARTIAL();
4480            RRETURN(MATCH_NOMATCH);
4481            }
4482          GETCHARINC(c, eptr);
4483          switch(c)
4484            {
4485            HSPACE_CASES: break;  /* Byte and multibyte cases */
4486            default: RRETURN(MATCH_NOMATCH);
4487            }
4488          }
4489        break;
4490
4491        case OP_NOT_VSPACE:
4492        for (i = 1; i <= min; i++)
4493          {
4494          if (eptr >= md->end_subject)
4495            {
4496            SCHECK_PARTIAL();
4497            RRETURN(MATCH_NOMATCH);
4498            }
4499          GETCHARINC(c, eptr);
4500          switch(c)
4501            {
4502            VSPACE_CASES: RRETURN(MATCH_NOMATCH);
4503            default: break;
4504            }
4505          }
4506        break;
4507
4508        case OP_VSPACE:
4509        for (i = 1; i <= min; i++)
4510          {
4511          if (eptr >= md->end_subject)
4512            {
4513            SCHECK_PARTIAL();
4514            RRETURN(MATCH_NOMATCH);
4515            }
4516          GETCHARINC(c, eptr);
4517          switch(c)
4518            {
4519            VSPACE_CASES: break;
4520            default: RRETURN(MATCH_NOMATCH);
4521            }
4522          }
4523        break;
4524
4525        case OP_NOT_DIGIT:
4526        for (i = 1; i <= min; i++)
4527          {
4528          if (eptr >= md->end_subject)
4529            {
4530            SCHECK_PARTIAL();
4531            RRETURN(MATCH_NOMATCH);
4532            }
4533          GETCHARINC(c, eptr);
4534          if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
4535            RRETURN(MATCH_NOMATCH);
4536          }
4537        break;
4538
4539        case OP_DIGIT:
4540        for (i = 1; i <= min; i++)
4541          {
4542          pcre_uint32 cc;
4543          if (eptr >= md->end_subject)
4544            {
4545            SCHECK_PARTIAL();
4546            RRETURN(MATCH_NOMATCH);
4547            }
4548          cc = UCHAR21(eptr);
4549          if (cc >= 128 || (md->ctypes[cc] & ctype_digit) == 0)
4550            RRETURN(MATCH_NOMATCH);
4551          eptr++;
4552          /* No need to skip more bytes - we know it's a 1-byte character */
4553          }
4554        break;
4555
4556        case OP_NOT_WHITESPACE:
4557        for (i = 1; i <= min; i++)
4558          {
4559          pcre_uint32 cc;
4560          if (eptr >= md->end_subject)
4561            {
4562            SCHECK_PARTIAL();
4563            RRETURN(MATCH_NOMATCH);
4564            }
4565          cc = UCHAR21(eptr);
4566          if (cc < 128 && (md->ctypes[cc] & ctype_space) != 0)
4567            RRETURN(MATCH_NOMATCH);
4568          eptr++;
4569          ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4570          }
4571        break;
4572
4573        case OP_WHITESPACE:
4574        for (i = 1; i <= min; i++)
4575          {
4576          pcre_uint32 cc;
4577          if (eptr >= md->end_subject)
4578            {
4579            SCHECK_PARTIAL();
4580            RRETURN(MATCH_NOMATCH);
4581            }
4582          cc = UCHAR21(eptr);
4583          if (cc >= 128 || (md->ctypes[cc] & ctype_space) == 0)
4584            RRETURN(MATCH_NOMATCH);
4585          eptr++;
4586          /* No need to skip more bytes - we know it's a 1-byte character */
4587          }
4588        break;
4589
4590        case OP_NOT_WORDCHAR:
4591        for (i = 1; i <= min; i++)
4592          {
4593          pcre_uint32 cc;
4594          if (eptr >= md->end_subject)
4595            {
4596            SCHECK_PARTIAL();
4597            RRETURN(MATCH_NOMATCH);
4598            }
4599          cc = UCHAR21(eptr);
4600          if (cc < 128 && (md->ctypes[cc] & ctype_word) != 0)
4601            RRETURN(MATCH_NOMATCH);
4602          eptr++;
4603          ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4604          }
4605        break;
4606
4607        case OP_WORDCHAR:
4608        for (i = 1; i <= min; i++)
4609          {
4610          pcre_uint32 cc;
4611          if (eptr >= md->end_subject)
4612            {
4613            SCHECK_PARTIAL();
4614            RRETURN(MATCH_NOMATCH);
4615            }
4616          cc = UCHAR21(eptr);
4617          if (cc >= 128 || (md->ctypes[cc] & ctype_word) == 0)
4618            RRETURN(MATCH_NOMATCH);
4619          eptr++;
4620          /* No need to skip more bytes - we know it's a 1-byte character */
4621          }
4622        break;
4623
4624        default:
4625        RRETURN(PCRE_ERROR_INTERNAL);
4626        }  /* End switch(ctype) */
4627
4628      else
4629#endif     /* SUPPORT_UTF */
4630
4631      /* Code for the non-UTF-8 case for minimum matching of operators other
4632      than OP_PROP and OP_NOTPROP. */
4633
4634      switch(ctype)
4635        {
4636        case OP_ANY:
4637        for (i = 1; i <= min; i++)
4638          {
4639          if (eptr >= md->end_subject)
4640            {
4641            SCHECK_PARTIAL();
4642            RRETURN(MATCH_NOMATCH);
4643            }
4644          if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
4645          if (md->partial != 0 &&
4646              eptr + 1 >= md->end_subject &&
4647              NLBLOCK->nltype == NLTYPE_FIXED &&
4648              NLBLOCK->nllen == 2 &&
4649              *eptr == NLBLOCK->nl[0])
4650            {
4651            md->hitend = TRUE;
4652            if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
4653            }
4654          eptr++;
4655          }
4656        break;
4657
4658        case OP_ALLANY:
4659        if (eptr > md->end_subject - min)
4660          {
4661          SCHECK_PARTIAL();
4662          RRETURN(MATCH_NOMATCH);
4663          }
4664        eptr += min;
4665        break;
4666
4667        case OP_ANYBYTE:
4668        if (eptr > md->end_subject - min)
4669          {
4670          SCHECK_PARTIAL();
4671          RRETURN(MATCH_NOMATCH);
4672          }
4673        eptr += min;
4674        break;
4675
4676        case OP_ANYNL:
4677        for (i = 1; i <= min; i++)
4678          {
4679          if (eptr >= md->end_subject)
4680            {
4681            SCHECK_PARTIAL();
4682            RRETURN(MATCH_NOMATCH);
4683            }
4684          switch(*eptr++)
4685            {
4686            default: RRETURN(MATCH_NOMATCH);
4687
4688            case CHAR_CR:
4689            if (eptr < md->end_subject && *eptr == CHAR_LF) eptr++;
4690            break;
4691
4692            case CHAR_LF:
4693            break;
4694
4695            case CHAR_VT:
4696            case CHAR_FF:
4697            case CHAR_NEL:
4698#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4699            case 0x2028:
4700            case 0x2029:
4701#endif
4702            if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
4703            break;
4704            }
4705          }
4706        break;
4707
4708        case OP_NOT_HSPACE:
4709        for (i = 1; i <= min; i++)
4710          {
4711          if (eptr >= md->end_subject)
4712            {
4713            SCHECK_PARTIAL();
4714            RRETURN(MATCH_NOMATCH);
4715            }
4716          switch(*eptr++)
4717            {
4718            default: break;
4719            HSPACE_BYTE_CASES:
4720#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4721            HSPACE_MULTIBYTE_CASES:
4722#endif
4723            RRETURN(MATCH_NOMATCH);
4724            }
4725          }
4726        break;
4727
4728        case OP_HSPACE:
4729        for (i = 1; i <= min; i++)
4730          {
4731          if (eptr >= md->end_subject)
4732            {
4733            SCHECK_PARTIAL();
4734            RRETURN(MATCH_NOMATCH);
4735            }
4736          switch(*eptr++)
4737            {
4738            default: RRETURN(MATCH_NOMATCH);
4739            HSPACE_BYTE_CASES:
4740#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4741            HSPACE_MULTIBYTE_CASES:
4742#endif
4743            break;
4744            }
4745          }
4746        break;
4747
4748        case OP_NOT_VSPACE:
4749        for (i = 1; i <= min; i++)
4750          {
4751          if (eptr >= md->end_subject)
4752            {
4753            SCHECK_PARTIAL();
4754            RRETURN(MATCH_NOMATCH);
4755            }
4756          switch(*eptr++)
4757            {
4758            VSPACE_BYTE_CASES:
4759#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4760            VSPACE_MULTIBYTE_CASES:
4761#endif
4762            RRETURN(MATCH_NOMATCH);
4763            default: break;
4764            }
4765          }
4766        break;
4767
4768        case OP_VSPACE:
4769        for (i = 1; i <= min; i++)
4770          {
4771          if (eptr >= md->end_subject)
4772            {
4773            SCHECK_PARTIAL();
4774            RRETURN(MATCH_NOMATCH);
4775            }
4776          switch(*eptr++)
4777            {
4778            default: RRETURN(MATCH_NOMATCH);
4779            VSPACE_BYTE_CASES:
4780#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4781            VSPACE_MULTIBYTE_CASES:
4782#endif
4783            break;
4784            }
4785          }
4786        break;
4787
4788        case OP_NOT_DIGIT:
4789        for (i = 1; i <= min; i++)
4790          {
4791          if (eptr >= md->end_subject)
4792            {
4793            SCHECK_PARTIAL();
4794            RRETURN(MATCH_NOMATCH);
4795            }
4796          if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_digit) != 0)
4797            RRETURN(MATCH_NOMATCH);
4798          eptr++;
4799          }
4800        break;
4801
4802        case OP_DIGIT:
4803        for (i = 1; i <= min; i++)
4804          {
4805          if (eptr >= md->end_subject)
4806            {
4807            SCHECK_PARTIAL();
4808            RRETURN(MATCH_NOMATCH);
4809            }
4810          if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_digit) == 0)
4811            RRETURN(MATCH_NOMATCH);
4812          eptr++;
4813          }
4814        break;
4815
4816        case OP_NOT_WHITESPACE:
4817        for (i = 1; i <= min; i++)
4818          {
4819          if (eptr >= md->end_subject)
4820            {
4821            SCHECK_PARTIAL();
4822            RRETURN(MATCH_NOMATCH);
4823            }
4824          if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_space) != 0)
4825            RRETURN(MATCH_NOMATCH);
4826          eptr++;
4827          }
4828        break;
4829
4830        case OP_WHITESPACE:
4831        for (i = 1; i <= min; i++)
4832          {
4833          if (eptr >= md->end_subject)
4834            {
4835            SCHECK_PARTIAL();
4836            RRETURN(MATCH_NOMATCH);
4837            }
4838          if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_space) == 0)
4839            RRETURN(MATCH_NOMATCH);
4840          eptr++;
4841          }
4842        break;
4843
4844        case OP_NOT_WORDCHAR:
4845        for (i = 1; i <= min; i++)
4846          {
4847          if (eptr >= md->end_subject)
4848            {
4849            SCHECK_PARTIAL();
4850            RRETURN(MATCH_NOMATCH);
4851            }
4852          if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_word) != 0)
4853            RRETURN(MATCH_NOMATCH);
4854          eptr++;
4855          }
4856        break;
4857
4858        case OP_WORDCHAR:
4859        for (i = 1; i <= min; i++)
4860          {
4861          if (eptr >= md->end_subject)
4862            {
4863            SCHECK_PARTIAL();
4864            RRETURN(MATCH_NOMATCH);
4865            }
4866          if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_word) == 0)
4867            RRETURN(MATCH_NOMATCH);
4868          eptr++;
4869          }
4870        break;
4871
4872        default:
4873        RRETURN(PCRE_ERROR_INTERNAL);
4874        }
4875      }
4876
4877    /* If min = max, continue at the same level without recursing */
4878
4879    if (min == max) continue;
4880
4881    /* If minimizing, we have to test the rest of the pattern before each
4882    subsequent match. Again, separate the UTF-8 case for speed, and also
4883    separate the UCP cases. */
4884
4885    if (minimize)
4886      {
4887#ifdef SUPPORT_UCP
4888      if (prop_type >= 0)
4889        {
4890        switch(prop_type)
4891          {
4892          case PT_ANY:
4893          for (fi = min;; fi++)
4894            {
4895            RMATCH(eptr, ecode, offset_top, md, eptrb, RM36);
4896            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4897            if (fi >= max) RRETURN(MATCH_NOMATCH);
4898            if (eptr >= md->end_subject)
4899              {
4900              SCHECK_PARTIAL();
4901              RRETURN(MATCH_NOMATCH);
4902              }
4903            GETCHARINCTEST(c, eptr);
4904            if (prop_fail_result) RRETURN(MATCH_NOMATCH);
4905            }
4906          /* Control never gets here */
4907
4908          case PT_LAMP:
4909          for (fi = min;; fi++)
4910            {
4911            int chartype;
4912            RMATCH(eptr, ecode, offset_top, md, eptrb, RM37);
4913            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4914            if (fi >= max) RRETURN(MATCH_NOMATCH);
4915            if (eptr >= md->end_subject)
4916              {
4917              SCHECK_PARTIAL();
4918              RRETURN(MATCH_NOMATCH);
4919              }
4920            GETCHARINCTEST(c, eptr);
4921            chartype = UCD_CHARTYPE(c);
4922            if ((chartype == ucp_Lu ||
4923                 chartype == ucp_Ll ||
4924                 chartype == ucp_Lt) == prop_fail_result)
4925              RRETURN(MATCH_NOMATCH);
4926            }
4927          /* Control never gets here */
4928
4929          case PT_GC:
4930          for (fi = min;; fi++)
4931            {
4932            RMATCH(eptr, ecode, offset_top, md, eptrb, RM38);
4933            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4934            if (fi >= max) RRETURN(MATCH_NOMATCH);
4935            if (eptr >= md->end_subject)
4936              {
4937              SCHECK_PARTIAL();
4938              RRETURN(MATCH_NOMATCH);
4939              }
4940            GETCHARINCTEST(c, eptr);
4941            if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)
4942              RRETURN(MATCH_NOMATCH);
4943            }
4944          /* Control never gets here */
4945
4946          case PT_PC:
4947          for (fi = min;; fi++)
4948            {
4949            RMATCH(eptr, ecode, offset_top, md, eptrb, RM39);
4950            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4951            if (fi >= max) RRETURN(MATCH_NOMATCH);
4952            if (eptr >= md->end_subject)
4953              {
4954              SCHECK_PARTIAL();
4955              RRETURN(MATCH_NOMATCH);
4956              }
4957            GETCHARINCTEST(c, eptr);
4958            if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)
4959              RRETURN(MATCH_NOMATCH);
4960            }
4961          /* Control never gets here */
4962
4963          case PT_SC:
4964          for (fi = min;; fi++)
4965            {
4966            RMATCH(eptr, ecode, offset_top, md, eptrb, RM40);
4967            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4968            if (fi >= max) RRETURN(MATCH_NOMATCH);
4969            if (eptr >= md->end_subject)
4970              {
4971              SCHECK_PARTIAL();
4972              RRETURN(MATCH_NOMATCH);
4973              }
4974            GETCHARINCTEST(c, eptr);
4975            if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)
4976              RRETURN(MATCH_NOMATCH);
4977            }
4978          /* Control never gets here */
4979
4980          case PT_ALNUM:
4981          for (fi = min;; fi++)
4982            {
4983            int category;
4984            RMATCH(eptr, ecode, offset_top, md, eptrb, RM59);
4985            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4986            if (fi >= max) RRETURN(MATCH_NOMATCH);
4987            if (eptr >= md->end_subject)
4988              {
4989              SCHECK_PARTIAL();
4990              RRETURN(MATCH_NOMATCH);
4991              }
4992            GETCHARINCTEST(c, eptr);
4993            category = UCD_CATEGORY(c);
4994            if ((category == ucp_L || category == ucp_N) == prop_fail_result)
4995              RRETURN(MATCH_NOMATCH);
4996            }
4997          /* Control never gets here */
4998
4999          /* Perl space used to exclude VT, but from Perl 5.18 it is included,
5000          which means that Perl space and POSIX space are now identical. PCRE
5001          was changed at release 8.34. */
5002
5003          case PT_SPACE:    /* Perl space */
5004          case PT_PXSPACE:  /* POSIX space */
5005          for (fi = min;; fi++)
5006            {
5007            RMATCH(eptr, ecode, offset_top, md, eptrb, RM61);
5008            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5009            if (fi >= max) RRETURN(MATCH_NOMATCH);
5010            if (eptr >= md->end_subject)
5011              {
5012              SCHECK_PARTIAL();
5013              RRETURN(MATCH_NOMATCH);
5014              }
5015            GETCHARINCTEST(c, eptr);
5016            switch(c)
5017              {
5018              HSPACE_CASES:
5019              VSPACE_CASES:
5020              if (prop_fail_result) RRETURN(MATCH_NOMATCH);
5021              break;
5022
5023              default:
5024              if ((UCD_CATEGORY(c) == ucp_Z) == prop_fail_result)
5025                RRETURN(MATCH_NOMATCH);
5026              break;
5027              }
5028            }
5029          /* Control never gets here */
5030
5031          case PT_WORD:
5032          for (fi = min;; fi++)
5033            {
5034            int category;
5035            RMATCH(eptr, ecode, offset_top, md, eptrb, RM62);
5036            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5037            if (fi >= max) RRETURN(MATCH_NOMATCH);
5038            if (eptr >= md->end_subject)
5039              {
5040              SCHECK_PARTIAL();
5041              RRETURN(MATCH_NOMATCH);
5042              }
5043            GETCHARINCTEST(c, eptr);
5044            category = UCD_CATEGORY(c);
5045            if ((category == ucp_L ||
5046                 category == ucp_N ||
5047                 c == CHAR_UNDERSCORE)
5048                   == prop_fail_result)
5049              RRETURN(MATCH_NOMATCH);
5050            }
5051          /* Control never gets here */
5052
5053          case PT_CLIST:
5054          for (fi = min;; fi++)
5055            {
5056            const pcre_uint32 *cp;
5057            RMATCH(eptr, ecode, offset_top, md, eptrb, RM67);
5058            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5059            if (fi >= max) RRETURN(MATCH_NOMATCH);
5060            if (eptr >= md->end_subject)
5061              {
5062              SCHECK_PARTIAL();
5063              RRETURN(MATCH_NOMATCH);
5064              }
5065            GETCHARINCTEST(c, eptr);
5066            cp = PRIV(ucd_caseless_sets) + prop_value;
5067            for (;;)
5068              {
5069              if (c < *cp)
5070                { if (prop_fail_result) break; else { RRETURN(MATCH_NOMATCH); } }
5071              if (c == *cp++)
5072                { if (prop_fail_result) { RRETURN(MATCH_NOMATCH); } else break; }
5073              }
5074            }
5075          /* Control never gets here */
5076
5077          case PT_UCNC:
5078          for (fi = min;; fi++)
5079            {
5080            RMATCH(eptr, ecode, offset_top, md, eptrb, RM60);
5081            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5082            if (fi >= max) RRETURN(MATCH_NOMATCH);
5083            if (eptr >= md->end_subject)
5084              {
5085              SCHECK_PARTIAL();
5086              RRETURN(MATCH_NOMATCH);
5087              }
5088            GETCHARINCTEST(c, eptr);
5089            if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
5090                 c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
5091                 c >= 0xe000) == prop_fail_result)
5092              RRETURN(MATCH_NOMATCH);
5093            }
5094          /* Control never gets here */
5095
5096          /* This should never occur */
5097          default:
5098          RRETURN(PCRE_ERROR_INTERNAL);
5099          }
5100        }
5101
5102      /* Match extended Unicode sequences. We will get here only if the
5103      support is in the binary; otherwise a compile-time error occurs. */
5104
5105      else if (ctype == OP_EXTUNI)
5106        {
5107        for (fi = min;; fi++)
5108          {
5109          RMATCH(eptr, ecode, offset_top, md, eptrb, RM41);
5110          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5111          if (fi >= max) RRETURN(MATCH_NOMATCH);
5112          if (eptr >= md->end_subject)
5113            {
5114            SCHECK_PARTIAL();
5115            RRETURN(MATCH_NOMATCH);
5116            }
5117          else
5118            {
5119            int lgb, rgb;
5120            GETCHARINCTEST(c, eptr);
5121            lgb = UCD_GRAPHBREAK(c);
5122            while (eptr < md->end_subject)
5123              {
5124              int len = 1;
5125              if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
5126              rgb = UCD_GRAPHBREAK(c);
5127              if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
5128              lgb = rgb;
5129              eptr += len;
5130              }
5131            }
5132          CHECK_PARTIAL();
5133          }
5134        }
5135      else
5136#endif     /* SUPPORT_UCP */
5137
5138#ifdef SUPPORT_UTF
5139      if (utf)
5140        {
5141        for (fi = min;; fi++)
5142          {
5143          RMATCH(eptr, ecode, offset_top, md, eptrb, RM42);
5144          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5145          if (fi >= max) RRETURN(MATCH_NOMATCH);
5146          if (eptr >= md->end_subject)
5147            {
5148            SCHECK_PARTIAL();
5149            RRETURN(MATCH_NOMATCH);
5150            }
5151          if (ctype == OP_ANY && IS_NEWLINE(eptr))
5152            RRETURN(MATCH_NOMATCH);
5153          GETCHARINC(c, eptr);
5154          switch(ctype)
5155            {
5156            case OP_ANY:               /* This is the non-NL case */
5157            if (md->partial != 0 &&    /* Take care with CRLF partial */
5158                eptr >= md->end_subject &&
5159                NLBLOCK->nltype == NLTYPE_FIXED &&
5160                NLBLOCK->nllen == 2 &&
5161                c == NLBLOCK->nl[0])
5162              {
5163              md->hitend = TRUE;
5164              if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5165              }
5166            break;
5167
5168            case OP_ALLANY:
5169            case OP_ANYBYTE:
5170            break;
5171
5172            case OP_ANYNL:
5173            switch(c)
5174              {
5175              default: RRETURN(MATCH_NOMATCH);
5176              case CHAR_CR:
5177              if (eptr < md->end_subject && UCHAR21(eptr) == CHAR_LF) eptr++;
5178              break;
5179
5180              case CHAR_LF:
5181              break;
5182
5183              case CHAR_VT:
5184              case CHAR_FF:
5185              case CHAR_NEL:
5186#ifndef EBCDIC
5187              case 0x2028:
5188              case 0x2029:
5189#endif  /* Not EBCDIC */
5190              if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
5191              break;
5192              }
5193            break;
5194
5195            case OP_NOT_HSPACE:
5196            switch(c)
5197              {
5198              HSPACE_CASES: RRETURN(MATCH_NOMATCH);
5199              default: break;
5200              }
5201            break;
5202
5203            case OP_HSPACE:
5204            switch(c)
5205              {
5206              HSPACE_CASES: break;
5207              default: RRETURN(MATCH_NOMATCH);
5208              }
5209            break;
5210
5211            case OP_NOT_VSPACE:
5212            switch(c)
5213              {
5214              VSPACE_CASES: RRETURN(MATCH_NOMATCH);
5215              default: break;
5216              }
5217            break;
5218
5219            case OP_VSPACE:
5220            switch(c)
5221              {
5222              VSPACE_CASES: break;
5223              default: RRETURN(MATCH_NOMATCH);
5224              }
5225            break;
5226
5227            case OP_NOT_DIGIT:
5228            if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
5229              RRETURN(MATCH_NOMATCH);
5230            break;
5231
5232            case OP_DIGIT:
5233            if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0)
5234              RRETURN(MATCH_NOMATCH);
5235            break;
5236
5237            case OP_NOT_WHITESPACE:
5238            if (c < 256 && (md->ctypes[c] & ctype_space) != 0)
5239              RRETURN(MATCH_NOMATCH);
5240            break;
5241
5242            case OP_WHITESPACE:
5243            if (c >= 256 || (md->ctypes[c] & ctype_space) == 0)
5244              RRETURN(MATCH_NOMATCH);
5245            break;
5246
5247            case OP_NOT_WORDCHAR:
5248            if (c < 256 && (md->ctypes[c] & ctype_word) != 0)
5249              RRETURN(MATCH_NOMATCH);
5250            break;
5251
5252            case OP_WORDCHAR:
5253            if (c >= 256 || (md->ctypes[c] & ctype_word) == 0)
5254              RRETURN(MATCH_NOMATCH);
5255            break;
5256
5257            default:
5258            RRETURN(PCRE_ERROR_INTERNAL);
5259            }
5260          }
5261        }
5262      else
5263#endif
5264      /* Not UTF mode */
5265        {
5266        for (fi = min;; fi++)
5267          {
5268          RMATCH(eptr, ecode, offset_top, md, eptrb, RM43);
5269          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5270          if (fi >= max) RRETURN(MATCH_NOMATCH);
5271          if (eptr >= md->end_subject)
5272            {
5273            SCHECK_PARTIAL();
5274            RRETURN(MATCH_NOMATCH);
5275            }
5276          if (ctype == OP_ANY && IS_NEWLINE(eptr))
5277            RRETURN(MATCH_NOMATCH);
5278          c = *eptr++;
5279          switch(ctype)
5280            {
5281            case OP_ANY:               /* This is the non-NL case */
5282            if (md->partial != 0 &&    /* Take care with CRLF partial */
5283                eptr >= md->end_subject &&
5284                NLBLOCK->nltype == NLTYPE_FIXED &&
5285                NLBLOCK->nllen == 2 &&
5286                c == NLBLOCK->nl[0])
5287              {
5288              md->hitend = TRUE;
5289              if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5290              }
5291            break;
5292
5293            case OP_ALLANY:
5294            case OP_ANYBYTE:
5295            break;
5296
5297            case OP_ANYNL:
5298            switch(c)
5299              {
5300              default: RRETURN(MATCH_NOMATCH);
5301              case CHAR_CR:
5302              if (eptr < md->end_subject && *eptr == CHAR_LF) eptr++;
5303              break;
5304
5305              case CHAR_LF:
5306              break;
5307
5308              case CHAR_VT:
5309              case CHAR_FF:
5310              case CHAR_NEL:
5311#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5312              case 0x2028:
5313              case 0x2029:
5314#endif
5315              if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
5316              break;
5317              }
5318            break;
5319
5320            case OP_NOT_HSPACE:
5321            switch(c)
5322              {
5323              default: break;
5324              HSPACE_BYTE_CASES:
5325#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5326              HSPACE_MULTIBYTE_CASES:
5327#endif
5328              RRETURN(MATCH_NOMATCH);
5329              }
5330            break;
5331
5332            case OP_HSPACE:
5333            switch(c)
5334              {
5335              default: RRETURN(MATCH_NOMATCH);
5336              HSPACE_BYTE_CASES:
5337#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5338              HSPACE_MULTIBYTE_CASES:
5339#endif
5340              break;
5341              }
5342            break;
5343
5344            case OP_NOT_VSPACE:
5345            switch(c)
5346              {
5347              default: break;
5348              VSPACE_BYTE_CASES:
5349#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5350              VSPACE_MULTIBYTE_CASES:
5351#endif
5352              RRETURN(MATCH_NOMATCH);
5353              }
5354            break;
5355
5356            case OP_VSPACE:
5357            switch(c)
5358              {
5359              default: RRETURN(MATCH_NOMATCH);
5360              VSPACE_BYTE_CASES:
5361#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5362              VSPACE_MULTIBYTE_CASES:
5363#endif
5364              break;
5365              }
5366            break;
5367
5368            case OP_NOT_DIGIT:
5369            if (MAX_255(c) && (md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
5370            break;
5371
5372            case OP_DIGIT:
5373            if (!MAX_255(c) || (md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
5374            break;
5375
5376            case OP_NOT_WHITESPACE:
5377            if (MAX_255(c) && (md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
5378            break;
5379
5380            case OP_WHITESPACE:
5381            if (!MAX_255(c) || (md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
5382            break;
5383
5384            case OP_NOT_WORDCHAR:
5385            if (MAX_255(c) && (md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH);
5386            break;
5387
5388            case OP_WORDCHAR:
5389            if (!MAX_255(c) || (md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH);
5390            break;
5391
5392            default:
5393            RRETURN(PCRE_ERROR_INTERNAL);
5394            }
5395          }
5396        }
5397      /* Control never gets here */
5398      }
5399
5400    /* If maximizing, it is worth using inline code for speed, doing the type
5401    test once at the start (i.e. keep it out of the loop). Again, keep the
5402    UTF-8 and UCP stuff separate. */
5403
5404    else
5405      {
5406      pp = eptr;  /* Remember where we started */
5407
5408#ifdef SUPPORT_UCP
5409      if (prop_type >= 0)
5410        {
5411        switch(prop_type)
5412          {
5413          case PT_ANY:
5414          for (i = min; i < max; i++)
5415            {
5416            int len = 1;
5417            if (eptr >= md->end_subject)
5418              {
5419              SCHECK_PARTIAL();
5420              break;
5421              }
5422            GETCHARLENTEST(c, eptr, len);
5423            if (prop_fail_result) break;
5424            eptr+= len;
5425            }
5426          break;
5427
5428          case PT_LAMP:
5429          for (i = min; i < max; i++)
5430            {
5431            int chartype;
5432            int len = 1;
5433            if (eptr >= md->end_subject)
5434              {
5435              SCHECK_PARTIAL();
5436              break;
5437              }
5438            GETCHARLENTEST(c, eptr, len);
5439            chartype = UCD_CHARTYPE(c);
5440            if ((chartype == ucp_Lu ||
5441                 chartype == ucp_Ll ||
5442                 chartype == ucp_Lt) == prop_fail_result)
5443              break;
5444            eptr+= len;
5445            }
5446          break;
5447
5448          case PT_GC:
5449          for (i = min; i < max; i++)
5450            {
5451            int len = 1;
5452            if (eptr >= md->end_subject)
5453              {
5454              SCHECK_PARTIAL();
5455              break;
5456              }
5457            GETCHARLENTEST(c, eptr, len);
5458            if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result) break;
5459            eptr+= len;
5460            }
5461          break;
5462
5463          case PT_PC:
5464          for (i = min; i < max; i++)
5465            {
5466            int len = 1;
5467            if (eptr >= md->end_subject)
5468              {
5469              SCHECK_PARTIAL();
5470              break;
5471              }
5472            GETCHARLENTEST(c, eptr, len);
5473            if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result) break;
5474            eptr+= len;
5475            }
5476          break;
5477
5478          case PT_SC:
5479          for (i = min; i < max; i++)
5480            {
5481            int len = 1;
5482            if (eptr >= md->end_subject)
5483              {
5484              SCHECK_PARTIAL();
5485              break;
5486              }
5487            GETCHARLENTEST(c, eptr, len);
5488            if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result) break;
5489            eptr+= len;
5490            }
5491          break;
5492
5493          case PT_ALNUM:
5494          for (i = min; i < max; i++)
5495            {
5496            int category;
5497            int len = 1;
5498            if (eptr >= md->end_subject)
5499              {
5500              SCHECK_PARTIAL();
5501              break;
5502              }
5503            GETCHARLENTEST(c, eptr, len);
5504            category = UCD_CATEGORY(c);
5505            if ((category == ucp_L || category == ucp_N) == prop_fail_result)
5506              break;
5507            eptr+= len;
5508            }
5509          break;
5510
5511          /* Perl space used to exclude VT, but from Perl 5.18 it is included,
5512          which means that Perl space and POSIX space are now identical. PCRE
5513          was changed at release 8.34. */
5514
5515          case PT_SPACE:    /* Perl space */
5516          case PT_PXSPACE:  /* POSIX space */
5517          for (i = min; i < max; i++)
5518            {
5519            int len = 1;
5520            if (eptr >= md->end_subject)
5521              {
5522              SCHECK_PARTIAL();
5523              break;
5524              }
5525            GETCHARLENTEST(c, eptr, len);
5526            switch(c)
5527              {
5528              HSPACE_CASES:
5529              VSPACE_CASES:
5530              if (prop_fail_result) goto ENDLOOP99;  /* Break the loop */
5531              break;
5532
5533              default:
5534              if ((UCD_CATEGORY(c) == ucp_Z) == prop_fail_result)
5535                goto ENDLOOP99;   /* Break the loop */
5536              break;
5537              }
5538            eptr+= len;
5539            }
5540          ENDLOOP99:
5541          break;
5542
5543          case PT_WORD:
5544          for (i = min; i < max; i++)
5545            {
5546            int category;
5547            int len = 1;
5548            if (eptr >= md->end_subject)
5549              {
5550              SCHECK_PARTIAL();
5551              break;
5552              }
5553            GETCHARLENTEST(c, eptr, len);
5554            category = UCD_CATEGORY(c);
5555            if ((category == ucp_L || category == ucp_N ||
5556                 c == CHAR_UNDERSCORE) == prop_fail_result)
5557              break;
5558            eptr+= len;
5559            }
5560          break;
5561
5562          case PT_CLIST:
5563          for (i = min; i < max; i++)
5564            {
5565            const pcre_uint32 *cp;
5566            int len = 1;
5567            if (eptr >= md->end_subject)
5568              {
5569              SCHECK_PARTIAL();
5570              break;
5571              }
5572            GETCHARLENTEST(c, eptr, len);
5573            cp = PRIV(ucd_caseless_sets) + prop_value;
5574            for (;;)
5575              {
5576              if (c < *cp)
5577                { if (prop_fail_result) break; else goto GOT_MAX; }
5578              if (c == *cp++)
5579                { if (prop_fail_result) goto GOT_MAX; else break; }
5580              }
5581            eptr += len;
5582            }
5583          GOT_MAX:
5584          break;
5585
5586          case PT_UCNC:
5587          for (i = min; i < max; i++)
5588            {
5589            int len = 1;
5590            if (eptr >= md->end_subject)
5591              {
5592              SCHECK_PARTIAL();
5593              break;
5594              }
5595            GETCHARLENTEST(c, eptr, len);
5596            if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
5597                 c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
5598                 c >= 0xe000) == prop_fail_result)
5599              break;
5600            eptr += len;
5601            }
5602          break;
5603
5604          default:
5605          RRETURN(PCRE_ERROR_INTERNAL);
5606          }
5607
5608        /* eptr is now past the end of the maximum run */
5609
5610        if (possessive) continue;    /* No backtracking */
5611        for(;;)
5612          {
5613          if (eptr <= pp) goto TAIL_RECURSE;
5614          RMATCH(eptr, ecode, offset_top, md, eptrb, RM44);
5615          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5616          eptr--;
5617          if (utf) BACKCHAR(eptr);
5618          }
5619        }
5620
5621      /* Match extended Unicode grapheme clusters. We will get here only if the
5622      support is in the binary; otherwise a compile-time error occurs. */
5623
5624      else if (ctype == OP_EXTUNI)
5625        {
5626        for (i = min; i < max; i++)
5627          {
5628          if (eptr >= md->end_subject)
5629            {
5630            SCHECK_PARTIAL();
5631            break;
5632            }
5633          else
5634            {
5635            int lgb, rgb;
5636            GETCHARINCTEST(c, eptr);
5637            lgb = UCD_GRAPHBREAK(c);
5638            while (eptr < md->end_subject)
5639              {
5640              int len = 1;
5641              if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
5642              rgb = UCD_GRAPHBREAK(c);
5643              if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
5644              lgb = rgb;
5645              eptr += len;
5646              }
5647            }
5648          CHECK_PARTIAL();
5649          }
5650
5651        /* eptr is now past the end of the maximum run */
5652
5653        if (possessive) continue;    /* No backtracking */
5654
5655        /* We use <= pp rather than == pp to detect the start of the run while
5656        backtracking because the use of \C in UTF mode can cause BACKCHAR to
5657        move back past pp. This is just palliative; the use of \C in UTF mode
5658        is fraught with danger. */
5659
5660        for(;;)
5661          {
5662          int lgb, rgb;
5663          PCRE_PUCHAR fptr;
5664
5665          if (eptr <= pp) goto TAIL_RECURSE;   /* At start of char run */
5666          RMATCH(eptr, ecode, offset_top, md, eptrb, RM45);
5667          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5668
5669          /* Backtracking over an extended grapheme cluster involves inspecting
5670          the previous two characters (if present) to see if a break is
5671          permitted between them. */
5672
5673          eptr--;
5674          if (!utf) c = *eptr; else
5675            {
5676            BACKCHAR(eptr);
5677            GETCHAR(c, eptr);
5678            }
5679          rgb = UCD_GRAPHBREAK(c);
5680
5681          for (;;)
5682            {
5683            if (eptr <= pp) goto TAIL_RECURSE;   /* At start of char run */
5684            fptr = eptr - 1;
5685            if (!utf) c = *fptr; else
5686              {
5687              BACKCHAR(fptr);
5688              GETCHAR(c, fptr);
5689              }
5690            lgb = UCD_GRAPHBREAK(c);
5691            if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
5692            eptr = fptr;
5693            rgb = lgb;
5694            }
5695          }
5696        }
5697
5698      else
5699#endif   /* SUPPORT_UCP */
5700
5701#ifdef SUPPORT_UTF
5702      if (utf)
5703        {
5704        switch(ctype)
5705          {
5706          case OP_ANY:
5707          for (i = min; i < max; i++)
5708            {
5709            if (eptr >= md->end_subject)
5710              {
5711              SCHECK_PARTIAL();
5712              break;
5713              }
5714            if (IS_NEWLINE(eptr)) break;
5715            if (md->partial != 0 &&    /* Take care with CRLF partial */
5716                eptr + 1 >= md->end_subject &&
5717                NLBLOCK->nltype == NLTYPE_FIXED &&
5718                NLBLOCK->nllen == 2 &&
5719                UCHAR21(eptr) == NLBLOCK->nl[0])
5720              {
5721              md->hitend = TRUE;
5722              if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5723              }
5724            eptr++;
5725            ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
5726            }
5727          break;
5728
5729          case OP_ALLANY:
5730          if (max < INT_MAX)
5731            {
5732            for (i = min; i < max; i++)
5733              {
5734              if (eptr >= md->end_subject)
5735                {
5736                SCHECK_PARTIAL();
5737                break;
5738                }
5739              eptr++;
5740              ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
5741              }
5742            }
5743          else
5744            {
5745            eptr = md->end_subject;   /* Unlimited UTF-8 repeat */
5746            SCHECK_PARTIAL();
5747            }
5748          break;
5749
5750          /* The byte case is the same as non-UTF8 */
5751
5752          case OP_ANYBYTE:
5753          c = max - min;
5754          if (c > (unsigned int)(md->end_subject - eptr))
5755            {
5756            eptr = md->end_subject;
5757            SCHECK_PARTIAL();
5758            }
5759          else eptr += c;
5760          break;
5761
5762          case OP_ANYNL:
5763          for (i = min; i < max; i++)
5764            {
5765            int len = 1;
5766            if (eptr >= md->end_subject)
5767              {
5768              SCHECK_PARTIAL();
5769              break;
5770              }
5771            GETCHARLEN(c, eptr, len);
5772            if (c == CHAR_CR)
5773              {
5774              if (++eptr >= md->end_subject) break;
5775              if (UCHAR21(eptr) == CHAR_LF) eptr++;
5776              }
5777            else
5778              {
5779              if (c != CHAR_LF &&
5780                  (md->bsr_anycrlf ||
5781                   (c != CHAR_VT && c != CHAR_FF && c != CHAR_NEL
5782#ifndef EBCDIC
5783                    && c != 0x2028 && c != 0x2029
5784#endif  /* Not EBCDIC */
5785                    )))
5786                break;
5787              eptr += len;
5788              }
5789            }
5790          break;
5791
5792          case OP_NOT_HSPACE:
5793          case OP_HSPACE:
5794          for (i = min; i < max; i++)
5795            {
5796            BOOL gotspace;
5797            int len = 1;
5798            if (eptr >= md->end_subject)
5799              {
5800              SCHECK_PARTIAL();
5801              break;
5802              }
5803            GETCHARLEN(c, eptr, len);
5804            switch(c)
5805              {
5806              HSPACE_CASES: gotspace = TRUE; break;
5807              default: gotspace = FALSE; break;
5808              }
5809            if (gotspace == (ctype == OP_NOT_HSPACE)) break;
5810            eptr += len;
5811            }
5812          break;
5813
5814          case OP_NOT_VSPACE:
5815          case OP_VSPACE:
5816          for (i = min; i < max; i++)
5817            {
5818            BOOL gotspace;
5819            int len = 1;
5820            if (eptr >= md->end_subject)
5821              {
5822              SCHECK_PARTIAL();
5823              break;
5824              }
5825            GETCHARLEN(c, eptr, len);
5826            switch(c)
5827              {
5828              VSPACE_CASES: gotspace = TRUE; break;
5829              default: gotspace = FALSE; break;
5830              }
5831            if (gotspace == (ctype == OP_NOT_VSPACE)) break;
5832            eptr += len;
5833            }
5834          break;
5835
5836          case OP_NOT_DIGIT:
5837          for (i = min; i < max; i++)
5838            {
5839            int len = 1;
5840            if (eptr >= md->end_subject)
5841              {
5842              SCHECK_PARTIAL();
5843              break;
5844              }
5845            GETCHARLEN(c, eptr, len);
5846            if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;
5847            eptr+= len;
5848            }
5849          break;
5850
5851          case OP_DIGIT:
5852          for (i = min; i < max; i++)
5853            {
5854            int len = 1;
5855            if (eptr >= md->end_subject)
5856              {
5857              SCHECK_PARTIAL();
5858              break;
5859              }
5860            GETCHARLEN(c, eptr, len);
5861            if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;
5862            eptr+= len;
5863            }
5864          break;
5865
5866          case OP_NOT_WHITESPACE:
5867          for (i = min; i < max; i++)
5868            {
5869            int len = 1;
5870            if (eptr >= md->end_subject)
5871              {
5872              SCHECK_PARTIAL();
5873              break;
5874              }
5875            GETCHARLEN(c, eptr, len);
5876            if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;
5877            eptr+= len;
5878            }
5879          break;
5880
5881          case OP_WHITESPACE:
5882          for (i = min; i < max; i++)
5883            {
5884            int len = 1;
5885            if (eptr >= md->end_subject)
5886              {
5887              SCHECK_PARTIAL();
5888              break;
5889              }
5890            GETCHARLEN(c, eptr, len);
5891            if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;
5892            eptr+= len;
5893            }
5894          break;
5895
5896          case OP_NOT_WORDCHAR:
5897          for (i = min; i < max; i++)
5898            {
5899            int len = 1;
5900            if (eptr >= md->end_subject)
5901              {
5902              SCHECK_PARTIAL();
5903              break;
5904              }
5905            GETCHARLEN(c, eptr, len);
5906            if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;
5907            eptr+= len;
5908            }
5909          break;
5910
5911          case OP_WORDCHAR:
5912          for (i = min; i < max; i++)
5913            {
5914            int len = 1;
5915            if (eptr >= md->end_subject)
5916              {
5917              SCHECK_PARTIAL();
5918              break;
5919              }
5920            GETCHARLEN(c, eptr, len);
5921            if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;
5922            eptr+= len;
5923            }
5924          break;
5925
5926          default:
5927          RRETURN(PCRE_ERROR_INTERNAL);
5928          }
5929
5930        if (possessive) continue;    /* No backtracking */
5931        for(;;)
5932          {
5933          if (eptr <= pp) goto TAIL_RECURSE;
5934          RMATCH(eptr, ecode, offset_top, md, eptrb, RM46);
5935          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5936          eptr--;
5937          BACKCHAR(eptr);
5938          if (ctype == OP_ANYNL && eptr > pp  && UCHAR21(eptr) == CHAR_NL &&
5939              UCHAR21(eptr - 1) == CHAR_CR) eptr--;
5940          }
5941        }
5942      else
5943#endif  /* SUPPORT_UTF */
5944      /* Not UTF mode */
5945        {
5946        switch(ctype)
5947          {
5948          case OP_ANY:
5949          for (i = min; i < max; i++)
5950            {
5951            if (eptr >= md->end_subject)
5952              {
5953              SCHECK_PARTIAL();
5954              break;
5955              }
5956            if (IS_NEWLINE(eptr)) break;
5957            if (md->partial != 0 &&    /* Take care with CRLF partial */
5958                eptr + 1 >= md->end_subject &&
5959                NLBLOCK->nltype == NLTYPE_FIXED &&
5960                NLBLOCK->nllen == 2 &&
5961                *eptr == NLBLOCK->nl[0])
5962              {
5963              md->hitend = TRUE;
5964              if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5965              }
5966            eptr++;
5967            }
5968          break;
5969
5970          case OP_ALLANY:
5971          case OP_ANYBYTE:
5972          c = max - min;
5973          if (c > (unsigned int)(md->end_subject - eptr))
5974            {
5975            eptr = md->end_subject;
5976            SCHECK_PARTIAL();
5977            }
5978          else eptr += c;
5979          break;
5980
5981          case OP_ANYNL:
5982          for (i = min; i < max; i++)
5983            {
5984            if (eptr >= md->end_subject)
5985              {
5986              SCHECK_PARTIAL();
5987              break;
5988              }
5989            c = *eptr;
5990            if (c == CHAR_CR)
5991              {
5992              if (++eptr >= md->end_subject) break;
5993              if (*eptr == CHAR_LF) eptr++;
5994              }
5995            else
5996              {
5997              if (c != CHAR_LF && (md->bsr_anycrlf ||
5998                 (c != CHAR_VT && c != CHAR_FF && c != CHAR_NEL
5999#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
6000                 && c != 0x2028 && c != 0x2029
6001#endif
6002                 ))) break;
6003              eptr++;
6004              }
6005            }
6006          break;
6007
6008          case OP_NOT_HSPACE:
6009          for (i = min; i < max; i++)
6010            {
6011            if (eptr >= md->end_subject)
6012              {
6013              SCHECK_PARTIAL();
6014              break;
6015              }
6016            switch(*eptr)
6017              {
6018              default: eptr++; break;
6019              HSPACE_BYTE_CASES:
6020#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
6021              HSPACE_MULTIBYTE_CASES:
6022#endif
6023              goto ENDLOOP00;
6024              }
6025            }
6026          ENDLOOP00:
6027          break;
6028
6029          case OP_HSPACE:
6030          for (i = min; i < max; i++)
6031            {
6032            if (eptr >= md->end_subject)
6033              {
6034              SCHECK_PARTIAL();
6035              break;
6036              }
6037            switch(*eptr)
6038              {
6039              default: goto ENDLOOP01;
6040              HSPACE_BYTE_CASES:
6041#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
6042              HSPACE_MULTIBYTE_CASES:
6043#endif
6044              eptr++; break;
6045              }
6046            }
6047          ENDLOOP01:
6048          break;
6049
6050          case OP_NOT_VSPACE:
6051          for (i = min; i < max; i++)
6052            {
6053            if (eptr >= md->end_subject)
6054              {
6055              SCHECK_PARTIAL();
6056              break;
6057              }
6058            switch(*eptr)
6059              {
6060              default: eptr++; break;
6061              VSPACE_BYTE_CASES:
6062#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
6063              VSPACE_MULTIBYTE_CASES:
6064#endif
6065              goto ENDLOOP02;
6066              }
6067            }
6068          ENDLOOP02:
6069          break;
6070
6071          case OP_VSPACE:
6072          for (i = min; i < max; i++)
6073            {
6074            if (eptr >= md->end_subject)
6075              {
6076              SCHECK_PARTIAL();
6077              break;
6078              }
6079            switch(*eptr)
6080              {
6081              default: goto ENDLOOP03;
6082              VSPACE_BYTE_CASES:
6083#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
6084              VSPACE_MULTIBYTE_CASES:
6085#endif
6086              eptr++; break;
6087              }
6088            }
6089          ENDLOOP03:
6090          break;
6091
6092          case OP_NOT_DIGIT:
6093          for (i = min; i < max; i++)
6094            {
6095            if (eptr >= md->end_subject)
6096              {
6097              SCHECK_PARTIAL();
6098              break;
6099              }
6100            if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_digit) != 0) break;
6101            eptr++;
6102            }
6103          break;
6104
6105          case OP_DIGIT:
6106          for (i = min; i < max; i++)
6107            {
6108            if (eptr >= md->end_subject)
6109              {
6110              SCHECK_PARTIAL();
6111              break;
6112              }
6113            if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_digit) == 0) break;
6114            eptr++;
6115            }
6116          break;
6117
6118          case OP_NOT_WHITESPACE:
6119          for (i = min; i < max; i++)
6120            {
6121            if (eptr >= md->end_subject)
6122              {
6123              SCHECK_PARTIAL();
6124              break;
6125              }
6126            if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_space) != 0) break;
6127            eptr++;
6128            }
6129          break;
6130
6131          case OP_WHITESPACE:
6132          for (i = min; i < max; i++)
6133            {
6134            if (eptr >= md->end_subject)
6135              {
6136              SCHECK_PARTIAL();
6137              break;
6138              }
6139            if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_space) == 0) break;
6140            eptr++;
6141            }
6142          break;
6143
6144          case OP_NOT_WORDCHAR:
6145          for (i = min; i < max; i++)
6146            {
6147            if (eptr >= md->end_subject)
6148              {
6149              SCHECK_PARTIAL();
6150              break;
6151              }
6152            if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_word) != 0) break;
6153            eptr++;
6154            }
6155          break;
6156
6157          case OP_WORDCHAR:
6158          for (i = min; i < max; i++)
6159            {
6160            if (eptr >= md->end_subject)
6161              {
6162              SCHECK_PARTIAL();
6163              break;
6164              }
6165            if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_word) == 0) break;
6166            eptr++;
6167            }
6168          break;
6169
6170          default:
6171          RRETURN(PCRE_ERROR_INTERNAL);
6172          }
6173
6174        if (possessive) continue;    /* No backtracking */
6175        for (;;)
6176          {
6177          if (eptr == pp) goto TAIL_RECURSE;
6178          RMATCH(eptr, ecode, offset_top, md, eptrb, RM47);
6179          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6180          eptr--;
6181          if (ctype == OP_ANYNL && eptr > pp  && *eptr == CHAR_LF &&
6182              eptr[-1] == CHAR_CR) eptr--;
6183          }
6184        }
6185
6186      /* Control never gets here */
6187      }
6188
6189    /* There's been some horrible disaster. Arrival here can only mean there is
6190    something seriously wrong in the code above or the OP_xxx definitions. */
6191
6192    default:
6193    DPRINTF(("Unknown opcode %d\n", *ecode));
6194    RRETURN(PCRE_ERROR_UNKNOWN_OPCODE);
6195    }
6196
6197  /* Do not stick any code in here without much thought; it is assumed
6198  that "continue" in the code above comes out to here to repeat the main
6199  loop. */
6200
6201  }             /* End of main loop */
6202/* Control never reaches here */
6203
6204
6205/* When compiling to use the heap rather than the stack for recursive calls to
6206match(), the RRETURN() macro jumps here. The number that is saved in
6207frame->Xwhere indicates which label we actually want to return to. */
6208
6209#ifdef NO_RECURSE
6210#define LBL(val) case val: goto L_RM##val;
6211HEAP_RETURN:
6212switch (frame->Xwhere)
6213  {
6214  LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
6215  LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
6216  LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
6217  LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
6218  LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63) LBL(64)
6219  LBL(65) LBL(66)
6220#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
6221  LBL(20) LBL(21)
6222#endif
6223#ifdef SUPPORT_UTF
6224  LBL(16) LBL(18)
6225  LBL(22) LBL(23) LBL(28) LBL(30)
6226  LBL(32) LBL(34) LBL(42) LBL(46)
6227#ifdef SUPPORT_UCP
6228  LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
6229  LBL(59) LBL(60) LBL(61) LBL(62) LBL(67)
6230#endif  /* SUPPORT_UCP */
6231#endif  /* SUPPORT_UTF */
6232  default:
6233  DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
6234  return PCRE_ERROR_INTERNAL;
6235  }
6236#undef LBL
6237#endif  /* NO_RECURSE */
6238}
6239
6240
6241/***************************************************************************
6242****************************************************************************
6243                   RECURSION IN THE match() FUNCTION
6244
6245Undefine all the macros that were defined above to handle this. */
6246
6247#ifdef NO_RECURSE
6248#undef eptr
6249#undef ecode
6250#undef mstart
6251#undef offset_top
6252#undef eptrb
6253#undef flags
6254
6255#undef callpat
6256#undef charptr
6257#undef data
6258#undef next
6259#undef pp
6260#undef prev
6261#undef saved_eptr
6262
6263#undef new_recursive
6264
6265#undef cur_is_word
6266#undef condition
6267#undef prev_is_word
6268
6269#undef ctype
6270#undef length
6271#undef max
6272#undef min
6273#undef number
6274#undef offset
6275#undef op
6276#undef save_capture_last
6277#undef save_offset1
6278#undef save_offset2
6279#undef save_offset3
6280#undef stacksave
6281
6282#undef newptrb
6283
6284#endif
6285
6286/* These two are defined as macros in both cases */
6287
6288#undef fc
6289#undef fi
6290
6291/***************************************************************************
6292***************************************************************************/
6293
6294
6295#ifdef NO_RECURSE
6296/*************************************************
6297*          Release allocated heap frames         *
6298*************************************************/
6299
6300/* This function releases all the allocated frames. The base frame is on the
6301machine stack, and so must not be freed.
6302
6303Argument: the address of the base frame
6304Returns:  nothing
6305*/
6306
6307static void
6308release_match_heapframes (heapframe *frame_base)
6309{
6310heapframe *nextframe = frame_base->Xnextframe;
6311while (nextframe != NULL)
6312  {
6313  heapframe *oldframe = nextframe;
6314  nextframe = nextframe->Xnextframe;
6315  (PUBL(stack_free))(oldframe);
6316  }
6317}
6318#endif
6319
6320
6321/*************************************************
6322*         Execute a Regular Expression           *
6323*************************************************/
6324
6325/* This function applies a compiled re to a subject string and picks out
6326portions of the string if it matches. Two elements in the vector are set for
6327each substring: the offsets to the start and end of the substring.
6328
6329Arguments:
6330  argument_re     points to the compiled expression
6331  extra_data      points to extra data or is NULL
6332  subject         points to the subject string
6333  length          length of subject string (may contain binary zeros)
6334  start_offset    where to start in the subject string
6335  options         option bits
6336  offsets         points to a vector of ints to be filled in with offsets
6337  offsetcount     the number of elements in the vector
6338
6339Returns:          > 0 => success; value is the number of elements filled in
6340                  = 0 => success, but offsets is not big enough
6341                   -1 => failed to match
6342                 < -1 => some kind of unexpected problem
6343*/
6344
6345#if defined COMPILE_PCRE8
6346PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
6347pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
6348  PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
6349  int offsetcount)
6350#elif defined COMPILE_PCRE16
6351PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
6352pcre16_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
6353  PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,
6354  int offsetcount)
6355#elif defined COMPILE_PCRE32
6356PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
6357pcre32_exec(const pcre32 *argument_re, const pcre32_extra *extra_data,
6358  PCRE_SPTR32 subject, int length, int start_offset, int options, int *offsets,
6359  int offsetcount)
6360#endif
6361{
6362int rc, ocount, arg_offset_max;
6363int newline;
6364BOOL using_temporary_offsets = FALSE;
6365BOOL anchored;
6366BOOL startline;
6367BOOL firstline;
6368BOOL utf;
6369BOOL has_first_char = FALSE;
6370BOOL has_req_char = FALSE;
6371pcre_uchar first_char = 0;
6372pcre_uchar first_char2 = 0;
6373pcre_uchar req_char = 0;
6374pcre_uchar req_char2 = 0;
6375match_data match_block;
6376match_data *md = &match_block;
6377const pcre_uint8 *tables;
6378const pcre_uint8 *start_bits = NULL;
6379PCRE_PUCHAR start_match = (PCRE_PUCHAR)subject + start_offset;
6380PCRE_PUCHAR end_subject;
6381PCRE_PUCHAR start_partial = NULL;
6382PCRE_PUCHAR match_partial = NULL;
6383PCRE_PUCHAR req_char_ptr = start_match - 1;
6384
6385const pcre_study_data *study;
6386const REAL_PCRE *re = (const REAL_PCRE *)argument_re;
6387
6388#ifdef NO_RECURSE
6389heapframe frame_zero;
6390frame_zero.Xprevframe = NULL;            /* Marks the top level */
6391frame_zero.Xnextframe = NULL;            /* None are allocated yet */
6392md->match_frames_base = &frame_zero;
6393#endif
6394
6395/* Check for the special magic call that measures the size of the stack used
6396per recursive call of match(). Without the funny casting for sizeof, a Windows
6397compiler gave this error: "unary minus operator applied to unsigned type,
6398result still unsigned". Hopefully the cast fixes that. */
6399
6400if (re == NULL && extra_data == NULL && subject == NULL && length == -999 &&
6401    start_offset == -999)
6402#ifdef NO_RECURSE
6403  return -((int)sizeof(heapframe));
6404#else
6405  return match(NULL, NULL, NULL, 0, NULL, NULL, 0);
6406#endif
6407
6408/* Plausibility checks */
6409
6410if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
6411if (re == NULL || subject == NULL || (offsets == NULL && offsetcount > 0))
6412  return PCRE_ERROR_NULL;
6413if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
6414if (length < 0) return PCRE_ERROR_BADLENGTH;
6415if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
6416
6417/* Check that the first field in the block is the magic number. If it is not,
6418return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
6419REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
6420means that the pattern is likely compiled with different endianness. */
6421
6422if (re->magic_number != MAGIC_NUMBER)
6423  return re->magic_number == REVERSED_MAGIC_NUMBER?
6424    PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
6425if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
6426
6427/* These two settings are used in the code for checking a UTF-8 string that
6428follows immediately afterwards. Other values in the md block are used only
6429during "normal" pcre_exec() processing, not when the JIT support is in use,
6430so they are set up later. */
6431
6432/* PCRE_UTF16 has the same value as PCRE_UTF8. */
6433utf = md->utf = (re->options & PCRE_UTF8) != 0;
6434md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :
6435              ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;
6436
6437/* Check a UTF-8 string if required. Pass back the character offset and error
6438code for an invalid string if a results vector is available. */
6439
6440#ifdef SUPPORT_UTF
6441if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
6442  {
6443  int erroroffset;
6444  int errorcode = PRIV(valid_utf)((PCRE_PUCHAR)subject, length, &erroroffset);
6445  if (errorcode != 0)
6446    {
6447    if (offsetcount >= 2)
6448      {
6449      offsets[0] = erroroffset;
6450      offsets[1] = errorcode;
6451      }
6452#if defined COMPILE_PCRE8
6453    return (errorcode <= PCRE_UTF8_ERR5 && md->partial > 1)?
6454      PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
6455#elif defined COMPILE_PCRE16
6456    return (errorcode <= PCRE_UTF16_ERR1 && md->partial > 1)?
6457      PCRE_ERROR_SHORTUTF16 : PCRE_ERROR_BADUTF16;
6458#elif defined COMPILE_PCRE32
6459    return PCRE_ERROR_BADUTF32;
6460#endif
6461    }
6462#if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
6463  /* Check that a start_offset points to the start of a UTF character. */
6464  if (start_offset > 0 && start_offset < length &&
6465      NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))
6466    return PCRE_ERROR_BADUTF8_OFFSET;
6467#endif
6468  }
6469#endif
6470
6471/* If the pattern was successfully studied with JIT support, run the JIT
6472executable instead of the rest of this function. Most options must be set at
6473compile time for the JIT code to be usable. Fallback to the normal code path if
6474an unsupported flag is set. */
6475
6476#ifdef SUPPORT_JIT
6477if (extra_data != NULL
6478    && (extra_data->flags & (PCRE_EXTRA_EXECUTABLE_JIT |
6479                             PCRE_EXTRA_TABLES)) == PCRE_EXTRA_EXECUTABLE_JIT
6480    && extra_data->executable_jit != NULL
6481    && (options & ~PUBLIC_JIT_EXEC_OPTIONS) == 0)
6482  {
6483  rc = PRIV(jit_exec)(extra_data, (const pcre_uchar *)subject, length,
6484       start_offset, options, offsets, offsetcount);
6485
6486  /* PCRE_ERROR_NULL means that the selected normal or partial matching
6487  mode is not compiled. In this case we simply fallback to interpreter. */
6488
6489  if (rc != PCRE_ERROR_JIT_BADOPTION) return rc;
6490  }
6491#endif
6492
6493/* Carry on with non-JIT matching. This information is for finding all the
6494numbers associated with a given name, for condition testing. */
6495
6496md->name_table = (pcre_uchar *)re + re->name_table_offset;
6497md->name_count = re->name_count;
6498md->name_entry_size = re->name_entry_size;
6499
6500/* Fish out the optional data from the extra_data structure, first setting
6501the default values. */
6502
6503study = NULL;
6504md->match_limit = MATCH_LIMIT;
6505md->match_limit_recursion = MATCH_LIMIT_RECURSION;
6506md->callout_data = NULL;
6507
6508/* The table pointer is always in native byte order. */
6509
6510tables = re->tables;
6511
6512/* The two limit values override the defaults, whatever their value. */
6513
6514if (extra_data != NULL)
6515  {
6516  unsigned long int flags = extra_data->flags;
6517  if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
6518    study = (const pcre_study_data *)extra_data->study_data;
6519  if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
6520    md->match_limit = extra_data->match_limit;
6521  if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
6522    md->match_limit_recursion = extra_data->match_limit_recursion;
6523  if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
6524    md->callout_data = extra_data->callout_data;
6525  if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
6526  }
6527
6528/* Limits in the regex override only if they are smaller. */
6529
6530if ((re->flags & PCRE_MLSET) != 0 && re->limit_match < md->match_limit)
6531  md->match_limit = re->limit_match;
6532
6533if ((re->flags & PCRE_RLSET) != 0 &&
6534    re->limit_recursion < md->match_limit_recursion)
6535  md->match_limit_recursion = re->limit_recursion;
6536
6537/* If the exec call supplied NULL for tables, use the inbuilt ones. This
6538is a feature that makes it possible to save compiled regex and re-use them
6539in other programs later. */
6540
6541if (tables == NULL) tables = PRIV(default_tables);
6542
6543/* Set up other data */
6544
6545anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
6546startline = (re->flags & PCRE_STARTLINE) != 0;
6547firstline = (re->options & PCRE_FIRSTLINE) != 0;
6548
6549/* The code starts after the real_pcre block and the capture name table. */
6550
6551md->start_code = (const pcre_uchar *)re + re->name_table_offset +
6552  re->name_count * re->name_entry_size;
6553
6554md->start_subject = (PCRE_PUCHAR)subject;
6555md->start_offset = start_offset;
6556md->end_subject = md->start_subject + length;
6557end_subject = md->end_subject;
6558
6559md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
6560md->use_ucp = (re->options & PCRE_UCP) != 0;
6561md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
6562md->ignore_skip_arg = 0;
6563
6564/* Some options are unpacked into BOOL variables in the hope that testing
6565them will be faster than individual option bits. */
6566
6567md->notbol = (options & PCRE_NOTBOL) != 0;
6568md->noteol = (options & PCRE_NOTEOL) != 0;
6569md->notempty = (options & PCRE_NOTEMPTY) != 0;
6570md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
6571
6572md->hitend = FALSE;
6573md->mark = md->nomatch_mark = NULL;     /* In case never set */
6574
6575md->recursive = NULL;                   /* No recursion at top level */
6576md->hasthen = (re->flags & PCRE_HASTHEN) != 0;
6577
6578md->lcc = tables + lcc_offset;
6579md->fcc = tables + fcc_offset;
6580md->ctypes = tables + ctypes_offset;
6581
6582/* Handle different \R options. */
6583
6584switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))
6585  {
6586  case 0:
6587  if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
6588    md->bsr_anycrlf = (re->options & PCRE_BSR_ANYCRLF) != 0;
6589  else
6590#ifdef BSR_ANYCRLF
6591  md->bsr_anycrlf = TRUE;
6592#else
6593  md->bsr_anycrlf = FALSE;
6594#endif
6595  break;
6596
6597  case PCRE_BSR_ANYCRLF:
6598  md->bsr_anycrlf = TRUE;
6599  break;
6600
6601  case PCRE_BSR_UNICODE:
6602  md->bsr_anycrlf = FALSE;
6603  break;
6604
6605  default: return PCRE_ERROR_BADNEWLINE;
6606  }
6607
6608/* Handle different types of newline. The three bits give eight cases. If
6609nothing is set at run time, whatever was used at compile time applies. */
6610
6611switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options :
6612        (pcre_uint32)options) & PCRE_NEWLINE_BITS)
6613  {
6614  case 0: newline = NEWLINE; break;   /* Compile-time default */
6615  case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
6616  case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
6617  case PCRE_NEWLINE_CR+
6618       PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
6619  case PCRE_NEWLINE_ANY: newline = -1; break;
6620  case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
6621  default: return PCRE_ERROR_BADNEWLINE;
6622  }
6623
6624if (newline == -2)
6625  {
6626  md->nltype = NLTYPE_ANYCRLF;
6627  }
6628else if (newline < 0)
6629  {
6630  md->nltype = NLTYPE_ANY;
6631  }
6632else
6633  {
6634  md->nltype = NLTYPE_FIXED;
6635  if (newline > 255)
6636    {
6637    md->nllen = 2;
6638    md->nl[0] = (newline >> 8) & 255;
6639    md->nl[1] = newline & 255;
6640    }
6641  else
6642    {
6643    md->nllen = 1;
6644    md->nl[0] = newline;
6645    }
6646  }
6647
6648/* Partial matching was originally supported only for a restricted set of
6649regexes; from release 8.00 there are no restrictions, but the bits are still
6650defined (though never set). So there's no harm in leaving this code. */
6651
6652if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
6653  return PCRE_ERROR_BADPARTIAL;
6654
6655/* If the expression has got more back references than the offsets supplied can
6656hold, we get a temporary chunk of working store to use during the matching.
6657Otherwise, we can use the vector supplied, rounding down its size to a multiple
6658of 3. */
6659
6660ocount = offsetcount - (offsetcount % 3);
6661arg_offset_max = (2*ocount)/3;
6662
6663if (re->top_backref > 0 && re->top_backref >= ocount/3)
6664  {
6665  ocount = re->top_backref * 3 + 3;
6666  md->offset_vector = (int *)(PUBL(malloc))(ocount * sizeof(int));
6667  if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
6668  using_temporary_offsets = TRUE;
6669  DPRINTF(("Got memory to hold back references\n"));
6670  }
6671else md->offset_vector = offsets;
6672md->offset_end = ocount;
6673md->offset_max = (2*ocount)/3;
6674md->capture_last = 0;
6675
6676/* Reset the working variable associated with each extraction. These should
6677never be used unless previously set, but they get saved and restored, and so we
6678initialize them to avoid reading uninitialized locations. Also, unset the
6679offsets for the matched string. This is really just for tidiness with callouts,
6680in case they inspect these fields. */
6681
6682if (md->offset_vector != NULL)
6683  {
6684  register int *iptr = md->offset_vector + ocount;
6685  register int *iend = iptr - re->top_bracket;
6686  if (iend < md->offset_vector + 2) iend = md->offset_vector + 2;
6687  while (--iptr >= iend) *iptr = -1;
6688  if (offsetcount > 0) md->offset_vector[0] = -1;
6689  if (offsetcount > 1) md->offset_vector[1] = -1;
6690  }
6691
6692/* Set up the first character to match, if available. The first_char value is
6693never set for an anchored regular expression, but the anchoring may be forced
6694at run time, so we have to test for anchoring. The first char may be unset for
6695an unanchored pattern, of course. If there's no first char and the pattern was
6696studied, there may be a bitmap of possible first characters. */
6697
6698if (!anchored)
6699  {
6700  if ((re->flags & PCRE_FIRSTSET) != 0)
6701    {
6702    has_first_char = TRUE;
6703    first_char = first_char2 = (pcre_uchar)(re->first_char);
6704    if ((re->flags & PCRE_FCH_CASELESS) != 0)
6705      {
6706      first_char2 = TABLE_GET(first_char, md->fcc, first_char);
6707#if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
6708      if (utf && first_char > 127)
6709        first_char2 = UCD_OTHERCASE(first_char);
6710#endif
6711      }
6712    }
6713  else
6714    if (!startline && study != NULL &&
6715      (study->flags & PCRE_STUDY_MAPPED) != 0)
6716        start_bits = study->start_bits;
6717  }
6718
6719/* For anchored or unanchored matches, there may be a "last known required
6720character" set. */
6721
6722if ((re->flags & PCRE_REQCHSET) != 0)
6723  {
6724  has_req_char = TRUE;
6725  req_char = req_char2 = (pcre_uchar)(re->req_char);
6726  if ((re->flags & PCRE_RCH_CASELESS) != 0)
6727    {
6728    req_char2 = TABLE_GET(req_char, md->fcc, req_char);
6729#if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
6730    if (utf && req_char > 127)
6731      req_char2 = UCD_OTHERCASE(req_char);
6732#endif
6733    }
6734  }
6735
6736
6737/* ==========================================================================*/
6738
6739/* Loop for handling unanchored repeated matching attempts; for anchored regexs
6740the loop runs just once. */
6741
6742for(;;)
6743  {
6744  PCRE_PUCHAR save_end_subject = end_subject;
6745  PCRE_PUCHAR new_start_match;
6746
6747  /* If firstline is TRUE, the start of the match is constrained to the first
6748  line of a multiline string. That is, the match must be before or at the first
6749  newline. Implement this by temporarily adjusting end_subject so that we stop
6750  scanning at a newline. If the match fails at the newline, later code breaks
6751  this loop. */
6752
6753  if (firstline)
6754    {
6755    PCRE_PUCHAR t = start_match;
6756#ifdef SUPPORT_UTF
6757    if (utf)
6758      {
6759      while (t < md->end_subject && !IS_NEWLINE(t))
6760        {
6761        t++;
6762        ACROSSCHAR(t < end_subject, *t, t++);
6763        }
6764      }
6765    else
6766#endif
6767    while (t < md->end_subject && !IS_NEWLINE(t)) t++;
6768    end_subject = t;
6769    }
6770
6771  /* There are some optimizations that avoid running the match if a known
6772  starting point is not found, or if a known later character is not present.
6773  However, there is an option that disables these, for testing and for ensuring
6774  that all callouts do actually occur. The option can be set in the regex by
6775  (*NO_START_OPT) or passed in match-time options. */
6776
6777  if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
6778    {
6779    /* Advance to a unique first char if there is one. */
6780
6781    if (has_first_char)
6782      {
6783      pcre_uchar smc;
6784
6785      if (first_char != first_char2)
6786        while (start_match < end_subject &&
6787          (smc = UCHAR21TEST(start_match)) != first_char && smc != first_char2)
6788          start_match++;
6789      else
6790        while (start_match < end_subject && UCHAR21TEST(start_match) != first_char)
6791          start_match++;
6792      }
6793
6794    /* Or to just after a linebreak for a multiline match */
6795
6796    else if (startline)
6797      {
6798      if (start_match > md->start_subject + start_offset)
6799        {
6800#ifdef SUPPORT_UTF
6801        if (utf)
6802          {
6803          while (start_match < end_subject && !WAS_NEWLINE(start_match))
6804            {
6805            start_match++;
6806            ACROSSCHAR(start_match < end_subject, *start_match,
6807              start_match++);
6808            }
6809          }
6810        else
6811#endif
6812        while (start_match < end_subject && !WAS_NEWLINE(start_match))
6813          start_match++;
6814
6815        /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
6816        and we are now at a LF, advance the match position by one more character.
6817        */
6818
6819        if (start_match[-1] == CHAR_CR &&
6820             (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
6821             start_match < end_subject &&
6822             UCHAR21TEST(start_match) == CHAR_NL)
6823          start_match++;
6824        }
6825      }
6826
6827    /* Or to a non-unique first byte after study */
6828
6829    else if (start_bits != NULL)
6830      {
6831      while (start_match < end_subject)
6832        {
6833        register pcre_uint32 c = UCHAR21TEST(start_match);
6834#ifndef COMPILE_PCRE8
6835        if (c > 255) c = 255;
6836#endif
6837        if ((start_bits[c/8] & (1 << (c&7))) != 0) break;
6838        start_match++;
6839        }
6840      }
6841    }   /* Starting optimizations */
6842
6843  /* Restore fudged end_subject */
6844
6845  end_subject = save_end_subject;
6846
6847  /* The following two optimizations are disabled for partial matching or if
6848  disabling is explicitly requested. */
6849
6850  if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0 && !md->partial)
6851    {
6852    /* If the pattern was studied, a minimum subject length may be set. This is
6853    a lower bound; no actual string of that length may actually match the
6854    pattern. Although the value is, strictly, in characters, we treat it as
6855    bytes to avoid spending too much time in this optimization. */
6856
6857    if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&
6858        (pcre_uint32)(end_subject - start_match) < study->minlength)
6859      {
6860      rc = MATCH_NOMATCH;
6861      break;
6862      }
6863
6864    /* If req_char is set, we know that that character must appear in the
6865    subject for the match to succeed. If the first character is set, req_char
6866    must be later in the subject; otherwise the test starts at the match point.
6867    This optimization can save a huge amount of backtracking in patterns with
6868    nested unlimited repeats that aren't going to match. Writing separate code
6869    for cased/caseless versions makes it go faster, as does using an
6870    autoincrement and backing off on a match.
6871
6872    HOWEVER: when the subject string is very, very long, searching to its end
6873    can take a long time, and give bad performance on quite ordinary patterns.
6874    This showed up when somebody was matching something like /^\d+C/ on a
6875    32-megabyte string... so we don't do this when the string is sufficiently
6876    long. */
6877
6878    if (has_req_char && end_subject - start_match < REQ_BYTE_MAX)
6879      {
6880      register PCRE_PUCHAR p = start_match + (has_first_char? 1:0);
6881
6882      /* We don't need to repeat the search if we haven't yet reached the
6883      place we found it at last time. */
6884
6885      if (p > req_char_ptr)
6886        {
6887        if (req_char != req_char2)
6888          {
6889          while (p < end_subject)
6890            {
6891            register pcre_uint32 pp = UCHAR21INCTEST(p);
6892            if (pp == req_char || pp == req_char2) { p--; break; }
6893            }
6894          }
6895        else
6896          {
6897          while (p < end_subject)
6898            {
6899            if (UCHAR21INCTEST(p) == req_char) { p--; break; }
6900            }
6901          }
6902
6903        /* If we can't find the required character, break the matching loop,
6904        forcing a match failure. */
6905
6906        if (p >= end_subject)
6907          {
6908          rc = MATCH_NOMATCH;
6909          break;
6910          }
6911
6912        /* If we have found the required character, save the point where we
6913        found it, so that we don't search again next time round the loop if
6914        the start hasn't passed this character yet. */
6915
6916        req_char_ptr = p;
6917        }
6918      }
6919    }
6920
6921#ifdef PCRE_DEBUG  /* Sigh. Some compilers never learn. */
6922  printf(">>>> Match against: ");
6923  pchars(start_match, end_subject - start_match, TRUE, md);
6924  printf("\n");
6925#endif
6926
6927  /* OK, we can now run the match. If "hitend" is set afterwards, remember the
6928  first starting point for which a partial match was found. */
6929
6930  md->start_match_ptr = start_match;
6931  md->start_used_ptr = start_match;
6932  md->match_call_count = 0;
6933  md->match_function_type = 0;
6934  md->end_offset_top = 0;
6935  md->skip_arg_count = 0;
6936  rc = match(start_match, md->start_code, start_match, 2, md, NULL, 0);
6937  if (md->hitend && start_partial == NULL)
6938    {
6939    start_partial = md->start_used_ptr;
6940    match_partial = start_match;
6941    }
6942
6943  switch(rc)
6944    {
6945    /* If MATCH_SKIP_ARG reaches this level it means that a MARK that matched
6946    the SKIP's arg was not found. In this circumstance, Perl ignores the SKIP
6947    entirely. The only way we can do that is to re-do the match at the same
6948    point, with a flag to force SKIP with an argument to be ignored. Just
6949    treating this case as NOMATCH does not work because it does not check other
6950    alternatives in patterns such as A(*SKIP:A)B|AC when the subject is AC. */
6951
6952    case MATCH_SKIP_ARG:
6953    new_start_match = start_match;
6954    md->ignore_skip_arg = md->skip_arg_count;
6955    break;
6956
6957    /* SKIP passes back the next starting point explicitly, but if it is no
6958    greater than the match we have just done, treat it as NOMATCH. */
6959
6960    case MATCH_SKIP:
6961    if (md->start_match_ptr > start_match)
6962      {
6963      new_start_match = md->start_match_ptr;
6964      break;
6965      }
6966    /* Fall through */
6967
6968    /* NOMATCH and PRUNE advance by one character. THEN at this level acts
6969    exactly like PRUNE. Unset ignore SKIP-with-argument. */
6970
6971    case MATCH_NOMATCH:
6972    case MATCH_PRUNE:
6973    case MATCH_THEN:
6974    md->ignore_skip_arg = 0;
6975    new_start_match = start_match + 1;
6976#ifdef SUPPORT_UTF
6977    if (utf)
6978      ACROSSCHAR(new_start_match < end_subject, *new_start_match,
6979        new_start_match++);
6980#endif
6981    break;
6982
6983    /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
6984
6985    case MATCH_COMMIT:
6986    rc = MATCH_NOMATCH;
6987    goto ENDLOOP;
6988
6989    /* Any other return is either a match, or some kind of error. */
6990
6991    default:
6992    goto ENDLOOP;
6993    }
6994
6995  /* Control reaches here for the various types of "no match at this point"
6996  result. Reset the code to MATCH_NOMATCH for subsequent checking. */
6997
6998  rc = MATCH_NOMATCH;
6999
7000  /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
7001  newline in the subject (though it may continue over the newline). Therefore,
7002  if we have just failed to match, starting at a newline, do not continue. */
7003
7004  if (firstline && IS_NEWLINE(start_match)) break;
7005
7006  /* Advance to new matching position */
7007
7008  start_match = new_start_match;
7009
7010  /* Break the loop if the pattern is anchored or if we have passed the end of
7011  the subject. */
7012
7013  if (anchored || start_match > end_subject) break;
7014
7015  /* If we have just passed a CR and we are now at a LF, and the pattern does
7016  not contain any explicit matches for \r or \n, and the newline option is CRLF
7017  or ANY or ANYCRLF, advance the match position by one more character. In
7018  normal matching start_match will aways be greater than the first position at
7019  this stage, but a failed *SKIP can cause a return at the same point, which is
7020  why the first test exists. */
7021
7022  if (start_match > (PCRE_PUCHAR)subject + start_offset &&
7023      start_match[-1] == CHAR_CR &&
7024      start_match < end_subject &&
7025      *start_match == CHAR_NL &&
7026      (re->flags & PCRE_HASCRORLF) == 0 &&
7027        (md->nltype == NLTYPE_ANY ||
7028         md->nltype == NLTYPE_ANYCRLF ||
7029         md->nllen == 2))
7030    start_match++;
7031
7032  md->mark = NULL;   /* Reset for start of next match attempt */
7033  }                  /* End of for(;;) "bumpalong" loop */
7034
7035/* ==========================================================================*/
7036
7037/* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
7038conditions is true:
7039
7040(1) The pattern is anchored or the match was failed by (*COMMIT);
7041
7042(2) We are past the end of the subject;
7043
7044(3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because
7045    this option requests that a match occur at or before the first newline in
7046    the subject.
7047
7048When we have a match and the offset vector is big enough to deal with any
7049backreferences, captured substring offsets will already be set up. In the case
7050where we had to get some local store to hold offsets for backreference
7051processing, copy those that we can. In this case there need not be overflow if
7052certain parts of the pattern were not used, even though there are more
7053capturing parentheses than vector slots. */
7054
7055ENDLOOP:
7056
7057if (rc == MATCH_MATCH || rc == MATCH_ACCEPT)
7058  {
7059  if (using_temporary_offsets)
7060    {
7061    if (arg_offset_max >= 4)
7062      {
7063      memcpy(offsets + 2, md->offset_vector + 2,
7064        (arg_offset_max - 2) * sizeof(int));
7065      DPRINTF(("Copied offsets from temporary memory\n"));
7066      }
7067    if (md->end_offset_top > arg_offset_max) md->capture_last |= OVFLBIT;
7068    DPRINTF(("Freeing temporary memory\n"));
7069    (PUBL(free))(md->offset_vector);
7070    }
7071
7072  /* Set the return code to the number of captured strings, or 0 if there were
7073  too many to fit into the vector. */
7074
7075  rc = ((md->capture_last & OVFLBIT) != 0 &&
7076         md->end_offset_top >= arg_offset_max)?
7077    0 : md->end_offset_top/2;
7078
7079  /* If there is space in the offset vector, set any unused pairs at the end of
7080  the pattern to -1 for backwards compatibility. It is documented that this
7081  happens. In earlier versions, the whole set of potential capturing offsets
7082  was set to -1 each time round the loop, but this is handled differently now.
7083  "Gaps" are set to -1 dynamically instead (this fixes a bug). Thus, it is only
7084  those at the end that need unsetting here. We can't just unset them all at
7085  the start of the whole thing because they may get set in one branch that is
7086  not the final matching branch. */
7087
7088  if (md->end_offset_top/2 <= re->top_bracket && offsets != NULL)
7089    {
7090    register int *iptr, *iend;
7091    int resetcount = 2 + re->top_bracket * 2;
7092    if (resetcount > offsetcount) resetcount = offsetcount;
7093    iptr = offsets + md->end_offset_top;
7094    iend = offsets + resetcount;
7095    while (iptr < iend) *iptr++ = -1;
7096    }
7097
7098  /* If there is space, set up the whole thing as substring 0. The value of
7099  md->start_match_ptr might be modified if \K was encountered on the success
7100  matching path. */
7101
7102  if (offsetcount < 2) rc = 0; else
7103    {
7104    offsets[0] = (int)(md->start_match_ptr - md->start_subject);
7105    offsets[1] = (int)(md->end_match_ptr - md->start_subject);
7106    }
7107
7108  /* Return MARK data if requested */
7109
7110  if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
7111    *(extra_data->mark) = (pcre_uchar *)md->mark;
7112  DPRINTF((">>>> returning %d\n", rc));
7113#ifdef NO_RECURSE
7114  release_match_heapframes(&frame_zero);
7115#endif
7116  return rc;
7117  }
7118
7119/* Control gets here if there has been an error, or if the overall match
7120attempt has failed at all permitted starting positions. */
7121
7122if (using_temporary_offsets)
7123  {
7124  DPRINTF(("Freeing temporary memory\n"));
7125  (PUBL(free))(md->offset_vector);
7126  }
7127
7128/* For anything other than nomatch or partial match, just return the code. */
7129
7130if (rc != MATCH_NOMATCH && rc != PCRE_ERROR_PARTIAL)
7131  {
7132  DPRINTF((">>>> error: returning %d\n", rc));
7133#ifdef NO_RECURSE
7134  release_match_heapframes(&frame_zero);
7135#endif
7136  return rc;
7137  }
7138
7139/* Handle partial matches - disable any mark data */
7140
7141if (match_partial != NULL)
7142  {
7143  DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
7144  md->mark = NULL;
7145  if (offsetcount > 1)
7146    {
7147    offsets[0] = (int)(start_partial - (PCRE_PUCHAR)subject);
7148    offsets[1] = (int)(end_subject - (PCRE_PUCHAR)subject);
7149    if (offsetcount > 2)
7150      offsets[2] = (int)(match_partial - (PCRE_PUCHAR)subject);
7151    }
7152  rc = PCRE_ERROR_PARTIAL;
7153  }
7154
7155/* This is the classic nomatch case */
7156
7157else
7158  {
7159  DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
7160  rc = PCRE_ERROR_NOMATCH;
7161  }
7162
7163/* Return the MARK data if it has been requested. */
7164
7165if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
7166  *(extra_data->mark) = (pcre_uchar *)md->nomatch_mark;
7167#ifdef NO_RECURSE
7168  release_match_heapframes(&frame_zero);
7169#endif
7170return rc;
7171}
7172
7173/* End of pcre_exec.c */
7174